mirror of https://github.com/citusdata/citus.git
1566 lines
59 KiB
Plaintext
1566 lines
59 KiB
Plaintext
--
|
|
-- MULTI_SELECT_DISTINCT
|
|
--
|
|
-- Tests select distinct, and select distinct on features.
|
|
--
|
|
ANALYZE lineitem_hash_part;
|
|
-- function calls are supported
|
|
SELECT DISTINCT l_orderkey, now() FROM lineitem_hash_part LIMIT 0;
|
|
l_orderkey | now
|
|
---------------------------------------------------------------------
|
|
(0 rows)
|
|
|
|
SELECT DISTINCT l_orderkey, avg(l_linenumber)
|
|
FROM lineitem_hash_part
|
|
GROUP BY l_orderkey
|
|
HAVING avg(l_linenumber) = (select avg(distinct l_linenumber))
|
|
LIMIT 10;
|
|
ERROR: Subqueries in HAVING cannot refer to outer query
|
|
SELECT DISTINCT l_orderkey
|
|
FROM lineitem_hash_part
|
|
GROUP BY l_orderkey
|
|
HAVING (select avg(distinct l_linenumber) = l_orderkey)
|
|
LIMIT 10;
|
|
ERROR: Subqueries in HAVING cannot refer to outer query
|
|
SELECT DISTINCT l_partkey, 1 + (random() * 0)::int FROM lineitem_hash_part ORDER BY 1 DESC LIMIT 3;
|
|
l_partkey | ?column?
|
|
---------------------------------------------------------------------
|
|
199973 | 1
|
|
199946 | 1
|
|
199943 | 1
|
|
(3 rows)
|
|
|
|
-- const expressions are supported
|
|
SELECT DISTINCT l_orderkey, 1+1 FROM lineitem_hash_part ORDER BY 1 LIMIT 5;
|
|
l_orderkey | ?column?
|
|
---------------------------------------------------------------------
|
|
1 | 2
|
|
2 | 2
|
|
3 | 2
|
|
4 | 2
|
|
5 | 2
|
|
(5 rows)
|
|
|
|
-- non const expressions are also supported
|
|
SELECT DISTINCT l_orderkey, l_partkey + 1 FROM lineitem_hash_part ORDER BY 1, 2 LIMIT 5;
|
|
l_orderkey | ?column?
|
|
---------------------------------------------------------------------
|
|
1 | 2133
|
|
1 | 15636
|
|
1 | 24028
|
|
1 | 63701
|
|
1 | 67311
|
|
(5 rows)
|
|
|
|
-- column expressions are supported
|
|
SELECT DISTINCT l_orderkey, l_shipinstruct || l_shipmode FROM lineitem_hash_part ORDER BY 2 , 1 LIMIT 5;
|
|
l_orderkey | ?column?
|
|
---------------------------------------------------------------------
|
|
32 | COLLECT CODAIR
|
|
39 | COLLECT CODAIR
|
|
66 | COLLECT CODAIR
|
|
70 | COLLECT CODAIR
|
|
98 | COLLECT CODAIR
|
|
(5 rows)
|
|
|
|
-- function calls with const input are supported
|
|
SELECT DISTINCT l_orderkey, strpos('AIR', 'A') FROM lineitem_hash_part ORDER BY 1,2 LIMIT 5;
|
|
l_orderkey | strpos
|
|
---------------------------------------------------------------------
|
|
1 | 1
|
|
2 | 1
|
|
3 | 1
|
|
4 | 1
|
|
5 | 1
|
|
(5 rows)
|
|
|
|
-- function calls with non-const input are supported
|
|
SELECT DISTINCT l_orderkey, strpos(l_shipmode, 'I')
|
|
FROM lineitem_hash_part
|
|
WHERE strpos(l_shipmode, 'I') > 1
|
|
ORDER BY 2, 1
|
|
LIMIT 5;
|
|
l_orderkey | strpos
|
|
---------------------------------------------------------------------
|
|
1 | 2
|
|
3 | 2
|
|
5 | 2
|
|
32 | 2
|
|
33 | 2
|
|
(5 rows)
|
|
|
|
-- row types are supported
|
|
SELECT DISTINCT (l_orderkey, l_partkey) AS pair FROM lineitem_hash_part ORDER BY 1 LIMIT 5;
|
|
pair
|
|
---------------------------------------------------------------------
|
|
(1,2132)
|
|
(1,15635)
|
|
(1,24027)
|
|
(1,63700)
|
|
(1,67310)
|
|
(5 rows)
|
|
|
|
-- distinct on partition column
|
|
-- verify counts match with respect to count(distinct)
|
|
CREATE TEMP TABLE temp_orderkeys AS SELECT DISTINCT l_orderkey FROM lineitem_hash_part;
|
|
SELECT COUNT(*) FROM temp_orderkeys;
|
|
count
|
|
---------------------------------------------------------------------
|
|
2985
|
|
(1 row)
|
|
|
|
SELECT COUNT(DISTINCT l_orderkey) FROM lineitem_hash_part;
|
|
count
|
|
---------------------------------------------------------------------
|
|
2985
|
|
(1 row)
|
|
|
|
SELECT DISTINCT l_orderkey FROM lineitem_hash_part WHERE l_orderkey < 500 and l_partkey < 5000 order by 1;
|
|
l_orderkey
|
|
---------------------------------------------------------------------
|
|
1
|
|
3
|
|
32
|
|
35
|
|
39
|
|
65
|
|
129
|
|
130
|
|
134
|
|
164
|
|
194
|
|
228
|
|
261
|
|
290
|
|
320
|
|
321
|
|
354
|
|
418
|
|
(18 rows)
|
|
|
|
-- distinct on non-partition column
|
|
SELECT DISTINCT l_partkey FROM lineitem_hash_part WHERE l_orderkey > 5 and l_orderkey < 20 order by 1;
|
|
l_partkey
|
|
---------------------------------------------------------------------
|
|
79251
|
|
94780
|
|
139636
|
|
145243
|
|
151894
|
|
157238
|
|
163073
|
|
182052
|
|
(8 rows)
|
|
|
|
SELECT DISTINCT l_shipmode FROM lineitem_hash_part ORDER BY 1 DESC;
|
|
l_shipmode
|
|
---------------------------------------------------------------------
|
|
TRUCK
|
|
SHIP
|
|
REG AIR
|
|
RAIL
|
|
MAIL
|
|
FOB
|
|
AIR
|
|
(7 rows)
|
|
|
|
-- distinct with multiple columns
|
|
SELECT DISTINCT l_orderkey, o_orderdate
|
|
FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey)
|
|
WHERE l_orderkey < 10
|
|
ORDER BY l_orderkey;
|
|
l_orderkey | o_orderdate
|
|
---------------------------------------------------------------------
|
|
1 | 01-02-1996
|
|
2 | 12-01-1996
|
|
3 | 10-14-1993
|
|
4 | 10-11-1995
|
|
5 | 07-30-1994
|
|
6 | 02-21-1992
|
|
7 | 01-10-1996
|
|
(7 rows)
|
|
|
|
-- distinct on partition column with aggregate
|
|
-- this is the same as the one without distinct due to group by
|
|
SELECT DISTINCT l_orderkey, count(*)
|
|
FROM lineitem_hash_part
|
|
WHERE l_orderkey < 200
|
|
GROUP BY 1
|
|
HAVING count(*) > 5
|
|
ORDER BY 2 DESC, 1;
|
|
l_orderkey | count
|
|
---------------------------------------------------------------------
|
|
7 | 7
|
|
68 | 7
|
|
129 | 7
|
|
164 | 7
|
|
194 | 7
|
|
1 | 6
|
|
3 | 6
|
|
32 | 6
|
|
35 | 6
|
|
39 | 6
|
|
67 | 6
|
|
69 | 6
|
|
70 | 6
|
|
71 | 6
|
|
134 | 6
|
|
135 | 6
|
|
163 | 6
|
|
192 | 6
|
|
197 | 6
|
|
(19 rows)
|
|
|
|
-- explain the query to see actual plan
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT l_orderkey, count(*)
|
|
FROM lineitem_hash_part
|
|
WHERE l_orderkey < 200
|
|
GROUP BY 1
|
|
HAVING count(*) > 5
|
|
ORDER BY 2 DESC, 1;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Sort
|
|
Sort Key: remote_scan.count DESC, remote_scan.l_orderkey
|
|
-> HashAggregate
|
|
Group Key: remote_scan.count, remote_scan.l_orderkey
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=xxxxx dbname=regression
|
|
-> HashAggregate
|
|
Group Key: l_orderkey
|
|
Filter: (count(*) > 5)
|
|
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
|
|
Filter: (l_orderkey < 200)
|
|
(14 rows)
|
|
|
|
-- check the plan if the hash aggreate is disabled
|
|
SET enable_hashagg TO off;
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT l_orderkey, count(*)
|
|
FROM lineitem_hash_part
|
|
WHERE l_orderkey < 200
|
|
GROUP BY 1
|
|
HAVING count(*) > 5
|
|
ORDER BY 2 DESC, 1;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Unique
|
|
-> Sort
|
|
Sort Key: remote_scan.count DESC, remote_scan.l_orderkey
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=xxxxx dbname=regression
|
|
-> HashAggregate
|
|
Group Key: l_orderkey
|
|
Filter: (count(*) > 5)
|
|
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
|
|
Filter: (l_orderkey < 200)
|
|
(13 rows)
|
|
|
|
SET enable_hashagg TO on;
|
|
-- distinct on aggregate of group by columns, we try to check whether we handle
|
|
-- queries which does not have any group by column in distinct columns properly.
|
|
SELECT DISTINCT count(*)
|
|
FROM lineitem_hash_part
|
|
GROUP BY l_suppkey, l_linenumber
|
|
ORDER BY 1;
|
|
count
|
|
---------------------------------------------------------------------
|
|
1
|
|
2
|
|
3
|
|
4
|
|
(4 rows)
|
|
|
|
-- explain the query to see actual plan. We expect to see Aggregate node having
|
|
-- group by key on count(*) column, since columns in the Group By doesn't guarantee
|
|
-- the uniqueness of the result.
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT count(*)
|
|
FROM lineitem_hash_part
|
|
GROUP BY l_suppkey, l_linenumber
|
|
ORDER BY 1;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Unique
|
|
-> Sort
|
|
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
|
|
-> HashAggregate
|
|
Group Key: remote_scan.worker_column_2, remote_scan.worker_column_3
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=xxxxx dbname=regression
|
|
-> HashAggregate
|
|
Group Key: l_suppkey, l_linenumber
|
|
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
|
|
(13 rows)
|
|
|
|
-- check the plan if the hash aggreate is disabled. We expect to see sort+unique
|
|
-- instead of aggregate plan node to handle distinct.
|
|
SET enable_hashagg TO off;
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT count(*)
|
|
FROM lineitem_hash_part
|
|
GROUP BY l_suppkey, l_linenumber
|
|
ORDER BY 1;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Unique
|
|
-> Sort
|
|
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
|
|
-> GroupAggregate
|
|
Group Key: remote_scan.worker_column_2, remote_scan.worker_column_3
|
|
-> Sort
|
|
Sort Key: remote_scan.worker_column_2, remote_scan.worker_column_3
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=xxxxx dbname=regression
|
|
-> HashAggregate
|
|
Group Key: l_suppkey, l_linenumber
|
|
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
|
|
(15 rows)
|
|
|
|
SET enable_hashagg TO on;
|
|
-- Now we have only part of group clause columns in distinct, yet it is still not
|
|
-- enough to use Group By columns to guarantee uniqueness of result list.
|
|
SELECT DISTINCT l_suppkey, count(*)
|
|
FROM lineitem_hash_part
|
|
GROUP BY l_suppkey, l_linenumber
|
|
ORDER BY 1
|
|
LIMIT 10;
|
|
l_suppkey | count
|
|
---------------------------------------------------------------------
|
|
1 | 1
|
|
2 | 1
|
|
3 | 1
|
|
4 | 1
|
|
5 | 1
|
|
7 | 1
|
|
10 | 1
|
|
12 | 1
|
|
13 | 1
|
|
14 | 1
|
|
(10 rows)
|
|
|
|
-- explain the query to see actual plan. Similar to the explain of the query above.
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT l_suppkey, count(*)
|
|
FROM lineitem_hash_part
|
|
GROUP BY l_suppkey, l_linenumber
|
|
ORDER BY 1
|
|
LIMIT 10;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Limit
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: remote_scan.l_suppkey, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
|
|
-> HashAggregate
|
|
Group Key: remote_scan.l_suppkey, remote_scan.worker_column_3
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=xxxxx dbname=regression
|
|
-> HashAggregate
|
|
Group Key: l_suppkey, l_linenumber
|
|
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
|
|
(14 rows)
|
|
|
|
-- check the plan if the hash aggreate is disabled. Similar to the explain of
|
|
-- the query above.
|
|
SET enable_hashagg TO off;
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT l_suppkey, count(*)
|
|
FROM lineitem_hash_part
|
|
GROUP BY l_suppkey, l_linenumber
|
|
ORDER BY 1
|
|
LIMIT 10;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Limit
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: remote_scan.l_suppkey, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
|
|
-> GroupAggregate
|
|
Group Key: remote_scan.l_suppkey, remote_scan.worker_column_3
|
|
-> Sort
|
|
Sort Key: remote_scan.l_suppkey, remote_scan.worker_column_3
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=xxxxx dbname=regression
|
|
-> HashAggregate
|
|
Group Key: l_suppkey, l_linenumber
|
|
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
|
|
(16 rows)
|
|
|
|
SET enable_hashagg TO on;
|
|
-- Similar to the above query, not with count but avg. Only difference with the
|
|
-- above query is that, we create run two aggregate functions in workers.
|
|
SELECT DISTINCT l_suppkey, avg(l_partkey)
|
|
FROM lineitem_hash_part
|
|
GROUP BY l_suppkey, l_linenumber
|
|
ORDER BY 1,2
|
|
LIMIT 10;
|
|
l_suppkey | avg
|
|
---------------------------------------------------------------------
|
|
1 | 190000.000000000000
|
|
2 | 172450.000000000000
|
|
3 | 112469.000000000000
|
|
3 | 134976.000000000000
|
|
4 | 112470.000000000000
|
|
4 | 142461.000000000000
|
|
5 | 182450.000000000000
|
|
7 | 137493.000000000000
|
|
10 | 150009.000000000000
|
|
12 | 17510.0000000000000000
|
|
(10 rows)
|
|
|
|
-- explain the query to see actual plan. Similar to the explain of the query above.
|
|
-- Only aggregate functions will be changed.
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT l_suppkey, avg(l_partkey)
|
|
FROM lineitem_hash_part
|
|
GROUP BY l_suppkey, l_linenumber
|
|
ORDER BY 1,2
|
|
LIMIT 10;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Limit
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: remote_scan.l_suppkey, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))
|
|
-> HashAggregate
|
|
Group Key: remote_scan.l_suppkey, remote_scan.worker_column_4
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=xxxxx dbname=regression
|
|
-> HashAggregate
|
|
Group Key: l_suppkey, l_linenumber
|
|
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
|
|
(14 rows)
|
|
|
|
-- check the plan if the hash aggreate is disabled. This explain errors out due
|
|
-- to a bug right now, expectation must be corrected after fixing it.
|
|
SET enable_hashagg TO off;
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT l_suppkey, avg(l_partkey)
|
|
FROM lineitem_hash_part
|
|
GROUP BY l_suppkey, l_linenumber
|
|
ORDER BY 1,2
|
|
LIMIT 10;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Limit
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: remote_scan.l_suppkey, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))
|
|
-> GroupAggregate
|
|
Group Key: remote_scan.l_suppkey, remote_scan.worker_column_4
|
|
-> Sort
|
|
Sort Key: remote_scan.l_suppkey, remote_scan.worker_column_4
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=xxxxx dbname=regression
|
|
-> HashAggregate
|
|
Group Key: l_suppkey, l_linenumber
|
|
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
|
|
(16 rows)
|
|
|
|
SET enable_hashagg TO on;
|
|
-- Similar to the above query but with distinct on
|
|
SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
|
|
FROM lineitem_hash_part
|
|
GROUP BY l_suppkey, l_linenumber
|
|
ORDER BY l_suppkey,1
|
|
LIMIT 10;
|
|
avg
|
|
---------------------------------------------------------------------
|
|
190000.000000000000
|
|
172450.000000000000
|
|
112469.000000000000
|
|
112470.000000000000
|
|
182450.000000000000
|
|
137493.000000000000
|
|
150009.000000000000
|
|
17510.0000000000000000
|
|
87504.000000000000
|
|
77506.000000000000
|
|
(10 rows)
|
|
|
|
-- explain the query to see actual plan. We expect to see sort+unique to handle
|
|
-- distinct on.
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
|
|
FROM lineitem_hash_part
|
|
GROUP BY l_suppkey, l_linenumber
|
|
ORDER BY l_suppkey,1
|
|
LIMIT 10;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Limit
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: remote_scan.worker_column_3, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))
|
|
-> HashAggregate
|
|
Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=xxxxx dbname=regression
|
|
-> HashAggregate
|
|
Group Key: l_suppkey, l_linenumber
|
|
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
|
|
(14 rows)
|
|
|
|
-- check the plan if the hash aggreate is disabled. We expect to see sort+unique to
|
|
-- handle distinct on.
|
|
SET enable_hashagg TO off;
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
|
|
FROM lineitem_hash_part
|
|
GROUP BY l_suppkey, l_linenumber
|
|
ORDER BY l_suppkey,1
|
|
LIMIT 10;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Limit
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: remote_scan.worker_column_3, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))
|
|
-> GroupAggregate
|
|
Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
|
|
-> Sort
|
|
Sort Key: remote_scan.worker_column_3, remote_scan.worker_column_4
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=xxxxx dbname=regression
|
|
-> HashAggregate
|
|
Group Key: l_suppkey, l_linenumber
|
|
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
|
|
(16 rows)
|
|
|
|
SET enable_hashagg TO on;
|
|
-- distinct with expression and aggregation
|
|
SELECT DISTINCT avg(ceil(l_partkey / 2))
|
|
FROM lineitem_hash_part
|
|
GROUP BY l_suppkey, l_linenumber
|
|
ORDER BY 1
|
|
LIMIT 10;
|
|
avg
|
|
---------------------------------------------------------------------
|
|
9
|
|
39
|
|
74
|
|
87
|
|
89
|
|
91
|
|
97
|
|
102
|
|
111
|
|
122
|
|
(10 rows)
|
|
|
|
-- explain the query to see actual plan
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT avg(ceil(l_partkey / 2))
|
|
FROM lineitem_hash_part
|
|
GROUP BY l_suppkey, l_linenumber
|
|
ORDER BY 1
|
|
LIMIT 10;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Limit
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: ((sum(remote_scan.avg) / (pg_catalog.sum(remote_scan.avg_1))::double precision))
|
|
-> HashAggregate
|
|
Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=xxxxx dbname=regression
|
|
-> HashAggregate
|
|
Group Key: l_suppkey, l_linenumber
|
|
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
|
|
(14 rows)
|
|
|
|
-- check the plan if the hash aggreate is disabled. This explain errors out due
|
|
-- to a bug right now, expectation must be corrected after fixing it.
|
|
SET enable_hashagg TO off;
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT avg(ceil(l_partkey / 2))
|
|
FROM lineitem_hash_part
|
|
GROUP BY l_suppkey, l_linenumber
|
|
ORDER BY 1
|
|
LIMIT 10;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Limit
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: ((sum(remote_scan.avg) / (pg_catalog.sum(remote_scan.avg_1))::double precision))
|
|
-> GroupAggregate
|
|
Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
|
|
-> Sort
|
|
Sort Key: remote_scan.worker_column_3, remote_scan.worker_column_4
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=xxxxx dbname=regression
|
|
-> HashAggregate
|
|
Group Key: l_suppkey, l_linenumber
|
|
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
|
|
(16 rows)
|
|
|
|
SET enable_hashagg TO on;
|
|
-- expression among aggregations.
|
|
SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis
|
|
FROM lineitem_hash_part
|
|
GROUP BY l_suppkey, l_linenumber
|
|
ORDER BY 1
|
|
LIMIT 10;
|
|
dis
|
|
---------------------------------------------------------------------
|
|
2
|
|
3
|
|
4
|
|
5
|
|
6
|
|
8
|
|
11
|
|
13
|
|
14
|
|
15
|
|
(10 rows)
|
|
|
|
-- explain the query to see actual plan
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis
|
|
FROM lineitem_hash_part
|
|
GROUP BY l_suppkey, l_linenumber
|
|
ORDER BY 1
|
|
LIMIT 10;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Limit
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: (((pg_catalog.sum(remote_scan.dis))::bigint + COALESCE((pg_catalog.sum(remote_scan.dis_1))::bigint, '0'::bigint)))
|
|
-> HashAggregate
|
|
Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=xxxxx dbname=regression
|
|
-> HashAggregate
|
|
Group Key: l_suppkey, l_linenumber
|
|
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
|
|
(14 rows)
|
|
|
|
-- check the plan if the hash aggreate is disabled. This explain errors out due
|
|
-- to a bug right now, expectation must be corrected after fixing it.
|
|
SET enable_hashagg TO off;
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis
|
|
FROM lineitem_hash_part
|
|
GROUP BY l_suppkey, l_linenumber
|
|
ORDER BY 1
|
|
LIMIT 10;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Limit
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: (((pg_catalog.sum(remote_scan.dis))::bigint + COALESCE((pg_catalog.sum(remote_scan.dis_1))::bigint, '0'::bigint)))
|
|
-> GroupAggregate
|
|
Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
|
|
-> Sort
|
|
Sort Key: remote_scan.worker_column_3, remote_scan.worker_column_4
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=xxxxx dbname=regression
|
|
-> HashAggregate
|
|
Group Key: l_suppkey, l_linenumber
|
|
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
|
|
(16 rows)
|
|
|
|
SET enable_hashagg TO on;
|
|
-- distinct on all columns, note Group By columns guarantees uniqueness of the
|
|
-- result list.
|
|
SELECT DISTINCT *
|
|
FROM lineitem_hash_part
|
|
GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
|
|
ORDER BY 1,2
|
|
LIMIT 10;
|
|
l_orderkey | l_partkey | l_suppkey | l_linenumber | l_quantity | l_extendedprice | l_discount | l_tax | l_returnflag | l_linestatus | l_shipdate | l_commitdate | l_receiptdate | l_shipinstruct | l_shipmode | l_comment
|
|
---------------------------------------------------------------------
|
|
1 | 2132 | 4633 | 4 | 28.00 | 28955.64 | 0.09 | 0.06 | N | O | 04-21-1996 | 03-30-1996 | 05-16-1996 | NONE | AIR | lites. fluffily even de
|
|
1 | 15635 | 638 | 6 | 32.00 | 49620.16 | 0.07 | 0.02 | N | O | 01-30-1996 | 02-07-1996 | 02-03-1996 | DELIVER IN PERSON | MAIL | arefully slyly ex
|
|
1 | 24027 | 1534 | 5 | 24.00 | 22824.48 | 0.10 | 0.04 | N | O | 03-30-1996 | 03-14-1996 | 04-01-1996 | NONE | FOB | pending foxes. slyly re
|
|
1 | 63700 | 3701 | 3 | 8.00 | 13309.60 | 0.10 | 0.02 | N | O | 01-29-1996 | 03-05-1996 | 01-31-1996 | TAKE BACK RETURN | REG AIR | riously. regular, express dep
|
|
1 | 67310 | 7311 | 2 | 36.00 | 45983.16 | 0.09 | 0.06 | N | O | 04-12-1996 | 02-28-1996 | 04-20-1996 | TAKE BACK RETURN | MAIL | ly final dependencies: slyly bold
|
|
1 | 155190 | 7706 | 1 | 17.00 | 21168.23 | 0.04 | 0.02 | N | O | 03-13-1996 | 02-12-1996 | 03-22-1996 | DELIVER IN PERSON | TRUCK | egular courts above the
|
|
2 | 106170 | 1191 | 1 | 38.00 | 44694.46 | 0.00 | 0.05 | N | O | 01-28-1997 | 01-14-1997 | 02-02-1997 | TAKE BACK RETURN | RAIL | ven requests. deposits breach a
|
|
3 | 4297 | 1798 | 1 | 45.00 | 54058.05 | 0.06 | 0.00 | R | F | 02-02-1994 | 01-04-1994 | 02-23-1994 | NONE | AIR | ongside of the furiously brave acco
|
|
3 | 19036 | 6540 | 2 | 49.00 | 46796.47 | 0.10 | 0.00 | R | F | 11-09-1993 | 12-20-1993 | 11-24-1993 | TAKE BACK RETURN | RAIL | unusual accounts. eve
|
|
3 | 29380 | 1883 | 4 | 2.00 | 2618.76 | 0.01 | 0.06 | A | F | 12-04-1993 | 01-07-1994 | 01-01-1994 | NONE | TRUCK | y. fluffily pending d
|
|
(10 rows)
|
|
|
|
-- explain the query to see actual plan. We expect to see only one aggregation
|
|
-- node since group by columns guarantees the uniqueness.
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT *
|
|
FROM lineitem_hash_part
|
|
GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
|
|
ORDER BY 1,2
|
|
LIMIT 10;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Limit
|
|
-> Sort
|
|
Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey
|
|
-> HashAggregate
|
|
Group Key: remote_scan.l_orderkey, remote_scan.l_partkey, remote_scan.l_suppkey, remote_scan.l_linenumber, remote_scan.l_quantity, remote_scan.l_extendedprice, remote_scan.l_discount, remote_scan.l_tax, remote_scan.l_returnflag, remote_scan.l_linestatus, remote_scan.l_shipdate, remote_scan.l_commitdate, remote_scan.l_receiptdate, remote_scan.l_shipinstruct, remote_scan.l_shipmode, remote_scan.l_comment
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=xxxxx dbname=regression
|
|
-> Limit
|
|
-> Unique
|
|
-> Group
|
|
Group Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
|
|
-> Sort
|
|
Sort Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
|
|
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
|
|
(17 rows)
|
|
|
|
-- check the plan if the hash aggreate is disabled. We expect to see only one
|
|
-- aggregation node since group by columns guarantees the uniqueness.
|
|
SET enable_hashagg TO off;
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT *
|
|
FROM lineitem_hash_part
|
|
GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
|
|
ORDER BY 1,2
|
|
LIMIT 10;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Limit
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey, remote_scan.l_suppkey, remote_scan.l_linenumber, remote_scan.l_quantity, remote_scan.l_extendedprice, remote_scan.l_discount, remote_scan.l_tax, remote_scan.l_returnflag, remote_scan.l_linestatus, remote_scan.l_shipdate, remote_scan.l_commitdate, remote_scan.l_receiptdate, remote_scan.l_shipinstruct, remote_scan.l_shipmode, remote_scan.l_comment
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=xxxxx dbname=regression
|
|
-> Limit
|
|
-> Unique
|
|
-> Group
|
|
Group Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
|
|
-> Sort
|
|
Sort Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
|
|
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
|
|
(16 rows)
|
|
|
|
SET enable_hashagg TO on;
|
|
-- distinct on count distinct
|
|
SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
|
|
FROM lineitem_hash_part
|
|
GROUP BY l_orderkey
|
|
ORDER BY 1,2;
|
|
count | count
|
|
---------------------------------------------------------------------
|
|
1 | 1
|
|
2 | 1
|
|
2 | 2
|
|
3 | 1
|
|
3 | 2
|
|
3 | 3
|
|
4 | 1
|
|
4 | 2
|
|
4 | 3
|
|
4 | 4
|
|
5 | 2
|
|
5 | 3
|
|
5 | 4
|
|
5 | 5
|
|
6 | 2
|
|
6 | 3
|
|
6 | 4
|
|
6 | 5
|
|
6 | 6
|
|
7 | 2
|
|
7 | 3
|
|
7 | 4
|
|
7 | 5
|
|
7 | 6
|
|
7 | 7
|
|
(25 rows)
|
|
|
|
-- explain the query to see actual plan. We expect to see aggregation plan for
|
|
-- the outer distinct.
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
|
|
FROM lineitem_hash_part
|
|
GROUP BY l_orderkey
|
|
ORDER BY 1,2;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Sort
|
|
Sort Key: remote_scan.count, remote_scan.count_1
|
|
-> HashAggregate
|
|
Group Key: remote_scan.count, remote_scan.count_1
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=xxxxx dbname=regression
|
|
-> GroupAggregate
|
|
Group Key: l_orderkey
|
|
-> Sort
|
|
Sort Key: l_orderkey
|
|
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
|
|
(14 rows)
|
|
|
|
-- check the plan if the hash aggreate is disabled. We expect to see sort + unique
|
|
-- plans for the outer distinct.
|
|
SET enable_hashagg TO off;
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
|
|
FROM lineitem_hash_part
|
|
GROUP BY l_orderkey
|
|
ORDER BY 1,2;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Unique
|
|
-> Sort
|
|
Sort Key: remote_scan.count, remote_scan.count_1
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=xxxxx dbname=regression
|
|
-> GroupAggregate
|
|
Group Key: l_orderkey
|
|
-> Sort
|
|
Sort Key: l_orderkey
|
|
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
|
|
(13 rows)
|
|
|
|
SET enable_hashagg TO on;
|
|
-- distinct on aggregation with filter and expression
|
|
SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count
|
|
FROM lineitem_hash_part
|
|
GROUP BY l_suppkey
|
|
ORDER BY 1;
|
|
count
|
|
---------------------------------------------------------------------
|
|
0
|
|
1
|
|
2
|
|
3
|
|
4
|
|
(5 rows)
|
|
|
|
-- explain the query to see actual plan
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count
|
|
FROM lineitem_hash_part
|
|
GROUP BY l_suppkey
|
|
ORDER BY 1;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Unique
|
|
-> Sort
|
|
Sort Key: (ceil(((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint) / 2))::double precision))
|
|
-> HashAggregate
|
|
Group Key: remote_scan.worker_column_2
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=xxxxx dbname=regression
|
|
-> HashAggregate
|
|
Group Key: l_suppkey
|
|
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
|
|
(13 rows)
|
|
|
|
-- check the plan if the hash aggreate is disabled
|
|
SET enable_hashagg TO off;
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count
|
|
FROM lineitem_hash_part
|
|
GROUP BY l_suppkey
|
|
ORDER BY 1;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Unique
|
|
-> Sort
|
|
Sort Key: (ceil(((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint) / 2))::double precision))
|
|
-> GroupAggregate
|
|
Group Key: remote_scan.worker_column_2
|
|
-> Sort
|
|
Sort Key: remote_scan.worker_column_2
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=xxxxx dbname=regression
|
|
-> HashAggregate
|
|
Group Key: l_suppkey
|
|
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
|
|
(15 rows)
|
|
|
|
SET enable_hashagg TO on;
|
|
-- explain the query to see actual plan with array_agg aggregation.
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT array_agg(l_linenumber), array_length(array_agg(l_linenumber), 1)
|
|
FROM lineitem_hash_part
|
|
GROUP BY l_orderkey
|
|
ORDER BY 2
|
|
LIMIT 15;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Limit
|
|
-> Sort
|
|
Sort Key: remote_scan.array_length
|
|
-> HashAggregate
|
|
Group Key: remote_scan.array_length, remote_scan.array_agg
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=xxxxx dbname=regression
|
|
-> HashAggregate
|
|
Group Key: l_orderkey
|
|
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
|
|
(13 rows)
|
|
|
|
-- check the plan if the hash aggreate is disabled.
|
|
SET enable_hashagg TO off;
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT array_agg(l_linenumber), array_length(array_agg(l_linenumber), 1)
|
|
FROM lineitem_hash_part
|
|
GROUP BY l_orderkey
|
|
ORDER BY 2
|
|
LIMIT 15;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Limit
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: remote_scan.array_length, remote_scan.array_agg
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=xxxxx dbname=regression
|
|
-> HashAggregate
|
|
Group Key: l_orderkey
|
|
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
|
|
(12 rows)
|
|
|
|
SET enable_hashagg TO on;
|
|
-- distinct on non-partition column with aggregate
|
|
-- this is the same as non-distinct version due to group by
|
|
SELECT DISTINCT l_partkey, count(*)
|
|
FROM lineitem_hash_part
|
|
GROUP BY 1
|
|
HAVING count(*) > 2
|
|
ORDER BY 1;
|
|
l_partkey | count
|
|
---------------------------------------------------------------------
|
|
1051 | 3
|
|
1927 | 3
|
|
6983 | 3
|
|
15283 | 3
|
|
87761 | 3
|
|
136884 | 3
|
|
149926 | 3
|
|
160895 | 3
|
|
177771 | 3
|
|
188804 | 3
|
|
199146 | 3
|
|
(11 rows)
|
|
|
|
-- explain the query to see actual plan
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT l_partkey, count(*)
|
|
FROM lineitem_hash_part
|
|
GROUP BY 1
|
|
HAVING count(*) > 2
|
|
ORDER BY 1;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Unique
|
|
-> Sort
|
|
Sort Key: remote_scan.l_partkey, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
|
|
-> HashAggregate
|
|
Group Key: remote_scan.l_partkey
|
|
Filter: (COALESCE((pg_catalog.sum(remote_scan.worker_column_3))::bigint, '0'::bigint) > 2)
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=xxxxx dbname=regression
|
|
-> HashAggregate
|
|
Group Key: l_partkey
|
|
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
|
|
(14 rows)
|
|
|
|
-- distinct on non-partition column and avg
|
|
SELECT DISTINCT l_partkey, avg(l_linenumber)
|
|
FROM lineitem_hash_part
|
|
WHERE l_partkey < 500
|
|
GROUP BY 1
|
|
HAVING avg(l_linenumber) > 2
|
|
ORDER BY 1;
|
|
l_partkey | avg
|
|
---------------------------------------------------------------------
|
|
18 | 7.0000000000000000
|
|
79 | 6.0000000000000000
|
|
149 | 4.5000000000000000
|
|
175 | 5.0000000000000000
|
|
179 | 6.0000000000000000
|
|
182 | 3.0000000000000000
|
|
222 | 4.0000000000000000
|
|
278 | 3.0000000000000000
|
|
299 | 7.0000000000000000
|
|
308 | 7.0000000000000000
|
|
309 | 5.0000000000000000
|
|
321 | 3.0000000000000000
|
|
337 | 6.0000000000000000
|
|
364 | 3.0000000000000000
|
|
403 | 4.0000000000000000
|
|
(15 rows)
|
|
|
|
-- distinct on multiple non-partition columns
|
|
SELECT DISTINCT l_partkey, l_suppkey
|
|
FROM lineitem_hash_part
|
|
WHERE l_shipmode = 'AIR' AND l_orderkey < 100
|
|
ORDER BY 1, 2;
|
|
l_partkey | l_suppkey
|
|
---------------------------------------------------------------------
|
|
2132 | 4633
|
|
4297 | 1798
|
|
37531 | 35
|
|
44161 | 6666
|
|
44706 | 4707
|
|
67831 | 5350
|
|
85811 | 8320
|
|
94368 | 6878
|
|
108338 | 849
|
|
108570 | 8571
|
|
137267 | 4807
|
|
137469 | 9983
|
|
173489 | 3490
|
|
196156 | 1195
|
|
197921 | 441
|
|
(15 rows)
|
|
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT l_partkey, l_suppkey
|
|
FROM lineitem_hash_part
|
|
WHERE l_shipmode = 'AIR' AND l_orderkey < 100
|
|
ORDER BY 1, 2;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Sort
|
|
Sort Key: remote_scan.l_partkey, remote_scan.l_suppkey
|
|
-> HashAggregate
|
|
Group Key: remote_scan.l_partkey, remote_scan.l_suppkey
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=xxxxx dbname=regression
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: l_partkey, l_suppkey
|
|
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
|
|
Filter: ((l_orderkey < 100) AND (l_shipmode = 'AIR'::bpchar))
|
|
(14 rows)
|
|
|
|
-- distinct on partition column
|
|
SELECT DISTINCT ON (l_orderkey) l_orderkey, l_partkey, l_suppkey
|
|
FROM lineitem_hash_part
|
|
WHERE l_orderkey < 35
|
|
ORDER BY 1, 2, 3;
|
|
l_orderkey | l_partkey | l_suppkey
|
|
---------------------------------------------------------------------
|
|
1 | 2132 | 4633
|
|
2 | 106170 | 1191
|
|
3 | 4297 | 1798
|
|
4 | 88035 | 5560
|
|
5 | 37531 | 35
|
|
6 | 139636 | 2150
|
|
7 | 79251 | 1759
|
|
32 | 2743 | 7744
|
|
33 | 33918 | 3919
|
|
34 | 88362 | 871
|
|
(10 rows)
|
|
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT ON (l_orderkey) l_orderkey, l_partkey, l_suppkey
|
|
FROM lineitem_hash_part
|
|
WHERE l_orderkey < 35
|
|
ORDER BY 1, 2, 3;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Unique
|
|
-> Sort
|
|
Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey, remote_scan.l_suppkey
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=xxxxx dbname=regression
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: l_orderkey, l_partkey, l_suppkey
|
|
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
|
|
Filter: (l_orderkey < 35)
|
|
(13 rows)
|
|
|
|
-- distinct on non-partition column
|
|
-- note order by is required here
|
|
-- otherwise query results will be different since
|
|
-- distinct on clause is on non-partition column
|
|
SELECT DISTINCT ON (l_partkey) l_partkey, l_orderkey
|
|
FROM lineitem_hash_part
|
|
ORDER BY 1,2
|
|
LIMIT 20;
|
|
l_partkey | l_orderkey
|
|
---------------------------------------------------------------------
|
|
18 | 12005
|
|
79 | 5121
|
|
91 | 2883
|
|
149 | 807
|
|
175 | 4102
|
|
179 | 2117
|
|
182 | 548
|
|
195 | 2528
|
|
204 | 10048
|
|
222 | 9413
|
|
245 | 9446
|
|
278 | 1287
|
|
299 | 1122
|
|
308 | 11137
|
|
309 | 2374
|
|
318 | 321
|
|
321 | 5984
|
|
337 | 10403
|
|
350 | 13698
|
|
358 | 4323
|
|
(20 rows)
|
|
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT ON (l_partkey) l_partkey, l_orderkey
|
|
FROM lineitem_hash_part
|
|
ORDER BY 1,2
|
|
LIMIT 20;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Limit
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: remote_scan.l_partkey, remote_scan.l_orderkey
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=xxxxx dbname=regression
|
|
-> Limit
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: l_partkey, l_orderkey
|
|
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
|
|
(14 rows)
|
|
|
|
-- distinct on with joins
|
|
-- each customer's first order key
|
|
SELECT DISTINCT ON (o_custkey) o_custkey, l_orderkey
|
|
FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey)
|
|
WHERE o_custkey < 15
|
|
ORDER BY 1,2;
|
|
o_custkey | l_orderkey
|
|
---------------------------------------------------------------------
|
|
1 | 9154
|
|
2 | 10563
|
|
4 | 320
|
|
5 | 11682
|
|
7 | 10402
|
|
8 | 102
|
|
10 | 1602
|
|
11 | 12800
|
|
13 | 994
|
|
14 | 11011
|
|
(10 rows)
|
|
|
|
SELECT coordinator_plan($Q$
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT ON (o_custkey) o_custkey, l_orderkey
|
|
FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey)
|
|
WHERE o_custkey < 15
|
|
ORDER BY 1,2;
|
|
$Q$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Unique
|
|
-> Sort
|
|
Sort Key: remote_scan.o_custkey, remote_scan.l_orderkey
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
(5 rows)
|
|
|
|
-- explain without order by
|
|
-- notice master plan has order by on distinct on column
|
|
SELECT coordinator_plan($Q$
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT ON (o_custkey) o_custkey, l_orderkey
|
|
FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey)
|
|
WHERE o_custkey < 15;
|
|
$Q$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Unique
|
|
-> Sort
|
|
Sort Key: remote_scan.o_custkey
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
(5 rows)
|
|
|
|
-- each customer's each order's first l_partkey
|
|
SELECT DISTINCT ON (o_custkey, l_orderkey) o_custkey, l_orderkey, l_linenumber, l_partkey
|
|
FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey)
|
|
WHERE o_custkey < 20
|
|
ORDER BY 1,2,3;
|
|
o_custkey | l_orderkey | l_linenumber | l_partkey
|
|
---------------------------------------------------------------------
|
|
1 | 9154 | 1 | 86513
|
|
1 | 14656 | 1 | 59539
|
|
2 | 10563 | 1 | 147459
|
|
4 | 320 | 1 | 4415
|
|
4 | 739 | 1 | 84489
|
|
4 | 10688 | 1 | 45037
|
|
4 | 10788 | 1 | 50814
|
|
4 | 13728 | 1 | 86216
|
|
5 | 11682 | 1 | 31634
|
|
5 | 11746 | 1 | 180724
|
|
5 | 14308 | 1 | 157430
|
|
7 | 10402 | 1 | 53661
|
|
7 | 13031 | 1 | 112161
|
|
7 | 14145 | 1 | 138729
|
|
7 | 14404 | 1 | 143034
|
|
8 | 102 | 1 | 88914
|
|
8 | 164 | 1 | 91309
|
|
8 | 13601 | 1 | 40504
|
|
10 | 1602 | 1 | 182806
|
|
10 | 9862 | 1 | 86241
|
|
10 | 11431 | 1 | 62112
|
|
10 | 13124 | 1 | 29414
|
|
11 | 12800 | 1 | 152806
|
|
13 | 994 | 1 | 64486
|
|
13 | 1603 | 1 | 38191
|
|
13 | 4704 | 1 | 77934
|
|
13 | 9927 | 1 | 875
|
|
14 | 11011 | 1 | 172485
|
|
17 | 896 | 1 | 38675
|
|
17 | 5507 | 1 | 9600
|
|
19 | 353 | 1 | 119305
|
|
19 | 1504 | 1 | 81389
|
|
19 | 1669 | 1 | 78373
|
|
19 | 5893 | 1 | 133707
|
|
19 | 9954 | 1 | 92138
|
|
19 | 14885 | 1 | 36154
|
|
(36 rows)
|
|
|
|
-- explain without order by
|
|
SELECT coordinator_plan($Q$
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT ON (o_custkey, l_orderkey) o_custkey, l_orderkey, l_linenumber, l_partkey
|
|
FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey)
|
|
WHERE o_custkey < 20;
|
|
$Q$);
|
|
coordinator_plan
|
|
---------------------------------------------------------------------
|
|
Unique
|
|
-> Sort
|
|
Sort Key: remote_scan.o_custkey, remote_scan.l_orderkey
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
(5 rows)
|
|
|
|
-- each customer's each order's last l_partkey
|
|
SELECT DISTINCT ON (o_custkey, l_orderkey) o_custkey, l_orderkey, l_linenumber, l_partkey
|
|
FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey)
|
|
WHERE o_custkey < 15
|
|
ORDER BY 1,2,3 DESC;
|
|
o_custkey | l_orderkey | l_linenumber | l_partkey
|
|
---------------------------------------------------------------------
|
|
1 | 9154 | 7 | 173448
|
|
1 | 14656 | 1 | 59539
|
|
2 | 10563 | 4 | 110741
|
|
4 | 320 | 2 | 192158
|
|
4 | 739 | 5 | 187523
|
|
4 | 10688 | 2 | 132574
|
|
4 | 10788 | 4 | 196473
|
|
4 | 13728 | 3 | 12450
|
|
5 | 11682 | 3 | 177152
|
|
5 | 11746 | 7 | 193807
|
|
5 | 14308 | 3 | 140916
|
|
7 | 10402 | 2 | 64514
|
|
7 | 13031 | 6 | 7761
|
|
7 | 14145 | 6 | 130723
|
|
7 | 14404 | 7 | 35349
|
|
8 | 102 | 4 | 61158
|
|
8 | 164 | 7 | 3037
|
|
8 | 13601 | 5 | 12470
|
|
10 | 1602 | 1 | 182806
|
|
10 | 9862 | 5 | 135675
|
|
10 | 11431 | 7 | 8563
|
|
10 | 13124 | 3 | 67055
|
|
11 | 12800 | 5 | 179110
|
|
13 | 994 | 4 | 130471
|
|
13 | 1603 | 2 | 65209
|
|
13 | 4704 | 3 | 63081
|
|
13 | 9927 | 6 | 119356
|
|
14 | 11011 | 7 | 95939
|
|
(28 rows)
|
|
|
|
-- subqueries
|
|
SELECT DISTINCT l_orderkey, l_partkey
|
|
FROM (
|
|
SELECT l_orderkey, l_partkey
|
|
FROM lineitem_hash_part
|
|
) q
|
|
ORDER BY 1,2
|
|
LIMIT 10;
|
|
l_orderkey | l_partkey
|
|
---------------------------------------------------------------------
|
|
1 | 2132
|
|
1 | 15635
|
|
1 | 24027
|
|
1 | 63700
|
|
1 | 67310
|
|
1 | 155190
|
|
2 | 106170
|
|
3 | 4297
|
|
3 | 19036
|
|
3 | 29380
|
|
(10 rows)
|
|
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT l_orderkey, l_partkey
|
|
FROM (
|
|
SELECT l_orderkey, l_partkey
|
|
FROM lineitem_hash_part
|
|
) q
|
|
ORDER BY 1,2
|
|
LIMIT 10;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Limit
|
|
-> Sort
|
|
Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey
|
|
-> HashAggregate
|
|
Group Key: remote_scan.l_orderkey, remote_scan.l_partkey
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=xxxxx dbname=regression
|
|
-> Limit
|
|
-> Sort
|
|
Sort Key: l_orderkey, l_partkey
|
|
-> HashAggregate
|
|
Group Key: l_orderkey, l_partkey
|
|
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
|
|
(16 rows)
|
|
|
|
SELECT DISTINCT l_orderkey, cnt
|
|
FROM (
|
|
SELECT l_orderkey, count(*) as cnt
|
|
FROM lineitem_hash_part
|
|
GROUP BY 1
|
|
) q
|
|
ORDER BY 1,2
|
|
LIMIT 10;
|
|
l_orderkey | cnt
|
|
---------------------------------------------------------------------
|
|
1 | 6
|
|
2 | 1
|
|
3 | 6
|
|
4 | 1
|
|
5 | 3
|
|
6 | 1
|
|
7 | 7
|
|
32 | 6
|
|
33 | 4
|
|
34 | 3
|
|
(10 rows)
|
|
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT l_orderkey, cnt
|
|
FROM (
|
|
SELECT l_orderkey, count(*) as cnt
|
|
FROM lineitem_hash_part
|
|
GROUP BY 1
|
|
) q
|
|
ORDER BY 1,2
|
|
LIMIT 10;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Limit
|
|
-> Sort
|
|
Sort Key: remote_scan.l_orderkey, remote_scan.cnt
|
|
-> HashAggregate
|
|
Group Key: remote_scan.l_orderkey, remote_scan.cnt
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=xxxxx dbname=regression
|
|
-> Limit
|
|
-> Sort
|
|
Sort Key: lineitem_hash_part.l_orderkey, (count(*))
|
|
-> HashAggregate
|
|
Group Key: lineitem_hash_part.l_orderkey, count(*)
|
|
-> HashAggregate
|
|
Group Key: lineitem_hash_part.l_orderkey
|
|
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
|
|
(18 rows)
|
|
|
|
-- distinct on partition column
|
|
-- random() is added to inner query to prevent flattening
|
|
SELECT DISTINCT ON (l_orderkey) l_orderkey, l_partkey
|
|
FROM (
|
|
SELECT l_orderkey, l_partkey, (random()*10)::int + 2 as r
|
|
FROM lineitem_hash_part
|
|
) q
|
|
WHERE r > 1
|
|
ORDER BY 1,2
|
|
LIMIT 10;
|
|
l_orderkey | l_partkey
|
|
---------------------------------------------------------------------
|
|
1 | 2132
|
|
2 | 106170
|
|
3 | 4297
|
|
4 | 88035
|
|
5 | 37531
|
|
6 | 139636
|
|
7 | 79251
|
|
32 | 2743
|
|
33 | 33918
|
|
34 | 88362
|
|
(10 rows)
|
|
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT ON (l_orderkey) l_orderkey, l_partkey
|
|
FROM (
|
|
SELECT l_orderkey, l_partkey, (random()*10)::int + 2 as r
|
|
FROM lineitem_hash_part
|
|
) q
|
|
WHERE r > 1
|
|
ORDER BY 1,2
|
|
LIMIT 10;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Limit
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=xxxxx dbname=regression
|
|
-> Limit
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: q.l_orderkey, q.l_partkey
|
|
-> Subquery Scan on q
|
|
Filter: (q.r > 1)
|
|
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
|
|
(16 rows)
|
|
|
|
-- distinct on non-partition column
|
|
SELECT DISTINCT ON (l_partkey) l_orderkey, l_partkey
|
|
FROM (
|
|
SELECT l_orderkey, l_partkey, (random()*10)::int + 2 as r
|
|
FROM lineitem_hash_part
|
|
) q
|
|
WHERE r > 1
|
|
ORDER BY 2,1
|
|
LIMIT 10;
|
|
l_orderkey | l_partkey
|
|
---------------------------------------------------------------------
|
|
12005 | 18
|
|
5121 | 79
|
|
2883 | 91
|
|
807 | 149
|
|
4102 | 175
|
|
2117 | 179
|
|
548 | 182
|
|
2528 | 195
|
|
10048 | 204
|
|
9413 | 222
|
|
(10 rows)
|
|
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT ON (l_partkey) l_orderkey, l_partkey
|
|
FROM (
|
|
SELECT l_orderkey, l_partkey, (random()*10)::int + 2 as r
|
|
FROM lineitem_hash_part
|
|
) q
|
|
WHERE r > 1
|
|
ORDER BY 2,1
|
|
LIMIT 10;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Limit
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: remote_scan.l_partkey, remote_scan.l_orderkey
|
|
-> Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=xxxxx dbname=regression
|
|
-> Limit
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: q.l_partkey, q.l_orderkey
|
|
-> Subquery Scan on q
|
|
Filter: (q.r > 1)
|
|
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
|
|
(16 rows)
|
|
|