citus/src/test/regress/expected/multi_select_distinct.out

1566 lines
59 KiB
Plaintext

--
-- MULTI_SELECT_DISTINCT
--
-- Tests select distinct, and select distinct on features.
--
ANALYZE lineitem_hash_part;
-- function calls are supported
SELECT DISTINCT l_orderkey, now() FROM lineitem_hash_part LIMIT 0;
l_orderkey | now
---------------------------------------------------------------------
(0 rows)
SELECT DISTINCT l_orderkey, avg(l_linenumber)
FROM lineitem_hash_part
GROUP BY l_orderkey
HAVING avg(l_linenumber) = (select avg(distinct l_linenumber))
LIMIT 10;
ERROR: Subqueries in HAVING cannot refer to outer query
SELECT DISTINCT l_orderkey
FROM lineitem_hash_part
GROUP BY l_orderkey
HAVING (select avg(distinct l_linenumber) = l_orderkey)
LIMIT 10;
ERROR: Subqueries in HAVING cannot refer to outer query
SELECT DISTINCT l_partkey, 1 + (random() * 0)::int FROM lineitem_hash_part ORDER BY 1 DESC LIMIT 3;
l_partkey | ?column?
---------------------------------------------------------------------
199973 | 1
199946 | 1
199943 | 1
(3 rows)
-- const expressions are supported
SELECT DISTINCT l_orderkey, 1+1 FROM lineitem_hash_part ORDER BY 1 LIMIT 5;
l_orderkey | ?column?
---------------------------------------------------------------------
1 | 2
2 | 2
3 | 2
4 | 2
5 | 2
(5 rows)
-- non const expressions are also supported
SELECT DISTINCT l_orderkey, l_partkey + 1 FROM lineitem_hash_part ORDER BY 1, 2 LIMIT 5;
l_orderkey | ?column?
---------------------------------------------------------------------
1 | 2133
1 | 15636
1 | 24028
1 | 63701
1 | 67311
(5 rows)
-- column expressions are supported
SELECT DISTINCT l_orderkey, l_shipinstruct || l_shipmode FROM lineitem_hash_part ORDER BY 2 , 1 LIMIT 5;
l_orderkey | ?column?
---------------------------------------------------------------------
32 | COLLECT CODAIR
39 | COLLECT CODAIR
66 | COLLECT CODAIR
70 | COLLECT CODAIR
98 | COLLECT CODAIR
(5 rows)
-- function calls with const input are supported
SELECT DISTINCT l_orderkey, strpos('AIR', 'A') FROM lineitem_hash_part ORDER BY 1,2 LIMIT 5;
l_orderkey | strpos
---------------------------------------------------------------------
1 | 1
2 | 1
3 | 1
4 | 1
5 | 1
(5 rows)
-- function calls with non-const input are supported
SELECT DISTINCT l_orderkey, strpos(l_shipmode, 'I')
FROM lineitem_hash_part
WHERE strpos(l_shipmode, 'I') > 1
ORDER BY 2, 1
LIMIT 5;
l_orderkey | strpos
---------------------------------------------------------------------
1 | 2
3 | 2
5 | 2
32 | 2
33 | 2
(5 rows)
-- row types are supported
SELECT DISTINCT (l_orderkey, l_partkey) AS pair FROM lineitem_hash_part ORDER BY 1 LIMIT 5;
pair
---------------------------------------------------------------------
(1,2132)
(1,15635)
(1,24027)
(1,63700)
(1,67310)
(5 rows)
-- distinct on partition column
-- verify counts match with respect to count(distinct)
CREATE TEMP TABLE temp_orderkeys AS SELECT DISTINCT l_orderkey FROM lineitem_hash_part;
SELECT COUNT(*) FROM temp_orderkeys;
count
---------------------------------------------------------------------
2985
(1 row)
SELECT COUNT(DISTINCT l_orderkey) FROM lineitem_hash_part;
count
---------------------------------------------------------------------
2985
(1 row)
SELECT DISTINCT l_orderkey FROM lineitem_hash_part WHERE l_orderkey < 500 and l_partkey < 5000 order by 1;
l_orderkey
---------------------------------------------------------------------
1
3
32
35
39
65
129
130
134
164
194
228
261
290
320
321
354
418
(18 rows)
-- distinct on non-partition column
SELECT DISTINCT l_partkey FROM lineitem_hash_part WHERE l_orderkey > 5 and l_orderkey < 20 order by 1;
l_partkey
---------------------------------------------------------------------
79251
94780
139636
145243
151894
157238
163073
182052
(8 rows)
SELECT DISTINCT l_shipmode FROM lineitem_hash_part ORDER BY 1 DESC;
l_shipmode
---------------------------------------------------------------------
TRUCK
SHIP
REG AIR
RAIL
MAIL
FOB
AIR
(7 rows)
-- distinct with multiple columns
SELECT DISTINCT l_orderkey, o_orderdate
FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey)
WHERE l_orderkey < 10
ORDER BY l_orderkey;
l_orderkey | o_orderdate
---------------------------------------------------------------------
1 | 01-02-1996
2 | 12-01-1996
3 | 10-14-1993
4 | 10-11-1995
5 | 07-30-1994
6 | 02-21-1992
7 | 01-10-1996
(7 rows)
-- distinct on partition column with aggregate
-- this is the same as the one without distinct due to group by
SELECT DISTINCT l_orderkey, count(*)
FROM lineitem_hash_part
WHERE l_orderkey < 200
GROUP BY 1
HAVING count(*) > 5
ORDER BY 2 DESC, 1;
l_orderkey | count
---------------------------------------------------------------------
7 | 7
68 | 7
129 | 7
164 | 7
194 | 7
1 | 6
3 | 6
32 | 6
35 | 6
39 | 6
67 | 6
69 | 6
70 | 6
71 | 6
134 | 6
135 | 6
163 | 6
192 | 6
197 | 6
(19 rows)
-- explain the query to see actual plan
EXPLAIN (COSTS FALSE)
SELECT DISTINCT l_orderkey, count(*)
FROM lineitem_hash_part
WHERE l_orderkey < 200
GROUP BY 1
HAVING count(*) > 5
ORDER BY 2 DESC, 1;
QUERY PLAN
---------------------------------------------------------------------
Sort
Sort Key: remote_scan.count DESC, remote_scan.l_orderkey
-> HashAggregate
Group Key: remote_scan.count, remote_scan.l_orderkey
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Group Key: l_orderkey
Filter: (count(*) > 5)
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
Filter: (l_orderkey < 200)
(14 rows)
-- check the plan if the hash aggreate is disabled
SET enable_hashagg TO off;
EXPLAIN (COSTS FALSE)
SELECT DISTINCT l_orderkey, count(*)
FROM lineitem_hash_part
WHERE l_orderkey < 200
GROUP BY 1
HAVING count(*) > 5
ORDER BY 2 DESC, 1;
QUERY PLAN
---------------------------------------------------------------------
Unique
-> Sort
Sort Key: remote_scan.count DESC, remote_scan.l_orderkey
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Group Key: l_orderkey
Filter: (count(*) > 5)
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
Filter: (l_orderkey < 200)
(13 rows)
SET enable_hashagg TO on;
-- distinct on aggregate of group by columns, we try to check whether we handle
-- queries which does not have any group by column in distinct columns properly.
SELECT DISTINCT count(*)
FROM lineitem_hash_part
GROUP BY l_suppkey, l_linenumber
ORDER BY 1;
count
---------------------------------------------------------------------
1
2
3
4
(4 rows)
-- explain the query to see actual plan. We expect to see Aggregate node having
-- group by key on count(*) column, since columns in the Group By doesn't guarantee
-- the uniqueness of the result.
EXPLAIN (COSTS FALSE)
SELECT DISTINCT count(*)
FROM lineitem_hash_part
GROUP BY l_suppkey, l_linenumber
ORDER BY 1;
QUERY PLAN
---------------------------------------------------------------------
Unique
-> Sort
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
-> HashAggregate
Group Key: remote_scan.worker_column_2, remote_scan.worker_column_3
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Group Key: l_suppkey, l_linenumber
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(13 rows)
-- check the plan if the hash aggreate is disabled. We expect to see sort+unique
-- instead of aggregate plan node to handle distinct.
SET enable_hashagg TO off;
EXPLAIN (COSTS FALSE)
SELECT DISTINCT count(*)
FROM lineitem_hash_part
GROUP BY l_suppkey, l_linenumber
ORDER BY 1;
QUERY PLAN
---------------------------------------------------------------------
Unique
-> Sort
Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
-> GroupAggregate
Group Key: remote_scan.worker_column_2, remote_scan.worker_column_3
-> Sort
Sort Key: remote_scan.worker_column_2, remote_scan.worker_column_3
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Group Key: l_suppkey, l_linenumber
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(15 rows)
SET enable_hashagg TO on;
-- Now we have only part of group clause columns in distinct, yet it is still not
-- enough to use Group By columns to guarantee uniqueness of result list.
SELECT DISTINCT l_suppkey, count(*)
FROM lineitem_hash_part
GROUP BY l_suppkey, l_linenumber
ORDER BY 1
LIMIT 10;
l_suppkey | count
---------------------------------------------------------------------
1 | 1
2 | 1
3 | 1
4 | 1
5 | 1
7 | 1
10 | 1
12 | 1
13 | 1
14 | 1
(10 rows)
-- explain the query to see actual plan. Similar to the explain of the query above.
EXPLAIN (COSTS FALSE)
SELECT DISTINCT l_suppkey, count(*)
FROM lineitem_hash_part
GROUP BY l_suppkey, l_linenumber
ORDER BY 1
LIMIT 10;
QUERY PLAN
---------------------------------------------------------------------
Limit
-> Unique
-> Sort
Sort Key: remote_scan.l_suppkey, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
-> HashAggregate
Group Key: remote_scan.l_suppkey, remote_scan.worker_column_3
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Group Key: l_suppkey, l_linenumber
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(14 rows)
-- check the plan if the hash aggreate is disabled. Similar to the explain of
-- the query above.
SET enable_hashagg TO off;
EXPLAIN (COSTS FALSE)
SELECT DISTINCT l_suppkey, count(*)
FROM lineitem_hash_part
GROUP BY l_suppkey, l_linenumber
ORDER BY 1
LIMIT 10;
QUERY PLAN
---------------------------------------------------------------------
Limit
-> Unique
-> Sort
Sort Key: remote_scan.l_suppkey, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
-> GroupAggregate
Group Key: remote_scan.l_suppkey, remote_scan.worker_column_3
-> Sort
Sort Key: remote_scan.l_suppkey, remote_scan.worker_column_3
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Group Key: l_suppkey, l_linenumber
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(16 rows)
SET enable_hashagg TO on;
-- Similar to the above query, not with count but avg. Only difference with the
-- above query is that, we create run two aggregate functions in workers.
SELECT DISTINCT l_suppkey, avg(l_partkey)
FROM lineitem_hash_part
GROUP BY l_suppkey, l_linenumber
ORDER BY 1,2
LIMIT 10;
l_suppkey | avg
---------------------------------------------------------------------
1 | 190000.000000000000
2 | 172450.000000000000
3 | 112469.000000000000
3 | 134976.000000000000
4 | 112470.000000000000
4 | 142461.000000000000
5 | 182450.000000000000
7 | 137493.000000000000
10 | 150009.000000000000
12 | 17510.0000000000000000
(10 rows)
-- explain the query to see actual plan. Similar to the explain of the query above.
-- Only aggregate functions will be changed.
EXPLAIN (COSTS FALSE)
SELECT DISTINCT l_suppkey, avg(l_partkey)
FROM lineitem_hash_part
GROUP BY l_suppkey, l_linenumber
ORDER BY 1,2
LIMIT 10;
QUERY PLAN
---------------------------------------------------------------------
Limit
-> Unique
-> Sort
Sort Key: remote_scan.l_suppkey, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))
-> HashAggregate
Group Key: remote_scan.l_suppkey, remote_scan.worker_column_4
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Group Key: l_suppkey, l_linenumber
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(14 rows)
-- check the plan if the hash aggreate is disabled. This explain errors out due
-- to a bug right now, expectation must be corrected after fixing it.
SET enable_hashagg TO off;
EXPLAIN (COSTS FALSE)
SELECT DISTINCT l_suppkey, avg(l_partkey)
FROM lineitem_hash_part
GROUP BY l_suppkey, l_linenumber
ORDER BY 1,2
LIMIT 10;
QUERY PLAN
---------------------------------------------------------------------
Limit
-> Unique
-> Sort
Sort Key: remote_scan.l_suppkey, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))
-> GroupAggregate
Group Key: remote_scan.l_suppkey, remote_scan.worker_column_4
-> Sort
Sort Key: remote_scan.l_suppkey, remote_scan.worker_column_4
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Group Key: l_suppkey, l_linenumber
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(16 rows)
SET enable_hashagg TO on;
-- Similar to the above query but with distinct on
SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
FROM lineitem_hash_part
GROUP BY l_suppkey, l_linenumber
ORDER BY l_suppkey,1
LIMIT 10;
avg
---------------------------------------------------------------------
190000.000000000000
172450.000000000000
112469.000000000000
112470.000000000000
182450.000000000000
137493.000000000000
150009.000000000000
17510.0000000000000000
87504.000000000000
77506.000000000000
(10 rows)
-- explain the query to see actual plan. We expect to see sort+unique to handle
-- distinct on.
EXPLAIN (COSTS FALSE)
SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
FROM lineitem_hash_part
GROUP BY l_suppkey, l_linenumber
ORDER BY l_suppkey,1
LIMIT 10;
QUERY PLAN
---------------------------------------------------------------------
Limit
-> Unique
-> Sort
Sort Key: remote_scan.worker_column_3, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))
-> HashAggregate
Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Group Key: l_suppkey, l_linenumber
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(14 rows)
-- check the plan if the hash aggreate is disabled. We expect to see sort+unique to
-- handle distinct on.
SET enable_hashagg TO off;
EXPLAIN (COSTS FALSE)
SELECT DISTINCT ON (l_suppkey) avg(l_partkey)
FROM lineitem_hash_part
GROUP BY l_suppkey, l_linenumber
ORDER BY l_suppkey,1
LIMIT 10;
QUERY PLAN
---------------------------------------------------------------------
Limit
-> Unique
-> Sort
Sort Key: remote_scan.worker_column_3, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)))
-> GroupAggregate
Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
-> Sort
Sort Key: remote_scan.worker_column_3, remote_scan.worker_column_4
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Group Key: l_suppkey, l_linenumber
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(16 rows)
SET enable_hashagg TO on;
-- distinct with expression and aggregation
SELECT DISTINCT avg(ceil(l_partkey / 2))
FROM lineitem_hash_part
GROUP BY l_suppkey, l_linenumber
ORDER BY 1
LIMIT 10;
avg
---------------------------------------------------------------------
9
39
74
87
89
91
97
102
111
122
(10 rows)
-- explain the query to see actual plan
EXPLAIN (COSTS FALSE)
SELECT DISTINCT avg(ceil(l_partkey / 2))
FROM lineitem_hash_part
GROUP BY l_suppkey, l_linenumber
ORDER BY 1
LIMIT 10;
QUERY PLAN
---------------------------------------------------------------------
Limit
-> Unique
-> Sort
Sort Key: ((sum(remote_scan.avg) / (pg_catalog.sum(remote_scan.avg_1))::double precision))
-> HashAggregate
Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Group Key: l_suppkey, l_linenumber
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(14 rows)
-- check the plan if the hash aggreate is disabled. This explain errors out due
-- to a bug right now, expectation must be corrected after fixing it.
SET enable_hashagg TO off;
EXPLAIN (COSTS FALSE)
SELECT DISTINCT avg(ceil(l_partkey / 2))
FROM lineitem_hash_part
GROUP BY l_suppkey, l_linenumber
ORDER BY 1
LIMIT 10;
QUERY PLAN
---------------------------------------------------------------------
Limit
-> Unique
-> Sort
Sort Key: ((sum(remote_scan.avg) / (pg_catalog.sum(remote_scan.avg_1))::double precision))
-> GroupAggregate
Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
-> Sort
Sort Key: remote_scan.worker_column_3, remote_scan.worker_column_4
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Group Key: l_suppkey, l_linenumber
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(16 rows)
SET enable_hashagg TO on;
-- expression among aggregations.
SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis
FROM lineitem_hash_part
GROUP BY l_suppkey, l_linenumber
ORDER BY 1
LIMIT 10;
dis
---------------------------------------------------------------------
2
3
4
5
6
8
11
13
14
15
(10 rows)
-- explain the query to see actual plan
EXPLAIN (COSTS FALSE)
SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis
FROM lineitem_hash_part
GROUP BY l_suppkey, l_linenumber
ORDER BY 1
LIMIT 10;
QUERY PLAN
---------------------------------------------------------------------
Limit
-> Unique
-> Sort
Sort Key: (((pg_catalog.sum(remote_scan.dis))::bigint + COALESCE((pg_catalog.sum(remote_scan.dis_1))::bigint, '0'::bigint)))
-> HashAggregate
Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Group Key: l_suppkey, l_linenumber
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(14 rows)
-- check the plan if the hash aggreate is disabled. This explain errors out due
-- to a bug right now, expectation must be corrected after fixing it.
SET enable_hashagg TO off;
EXPLAIN (COSTS FALSE)
SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis
FROM lineitem_hash_part
GROUP BY l_suppkey, l_linenumber
ORDER BY 1
LIMIT 10;
QUERY PLAN
---------------------------------------------------------------------
Limit
-> Unique
-> Sort
Sort Key: (((pg_catalog.sum(remote_scan.dis))::bigint + COALESCE((pg_catalog.sum(remote_scan.dis_1))::bigint, '0'::bigint)))
-> GroupAggregate
Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4
-> Sort
Sort Key: remote_scan.worker_column_3, remote_scan.worker_column_4
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Group Key: l_suppkey, l_linenumber
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(16 rows)
SET enable_hashagg TO on;
-- distinct on all columns, note Group By columns guarantees uniqueness of the
-- result list.
SELECT DISTINCT *
FROM lineitem_hash_part
GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
ORDER BY 1,2
LIMIT 10;
l_orderkey | l_partkey | l_suppkey | l_linenumber | l_quantity | l_extendedprice | l_discount | l_tax | l_returnflag | l_linestatus | l_shipdate | l_commitdate | l_receiptdate | l_shipinstruct | l_shipmode | l_comment
---------------------------------------------------------------------
1 | 2132 | 4633 | 4 | 28.00 | 28955.64 | 0.09 | 0.06 | N | O | 04-21-1996 | 03-30-1996 | 05-16-1996 | NONE | AIR | lites. fluffily even de
1 | 15635 | 638 | 6 | 32.00 | 49620.16 | 0.07 | 0.02 | N | O | 01-30-1996 | 02-07-1996 | 02-03-1996 | DELIVER IN PERSON | MAIL | arefully slyly ex
1 | 24027 | 1534 | 5 | 24.00 | 22824.48 | 0.10 | 0.04 | N | O | 03-30-1996 | 03-14-1996 | 04-01-1996 | NONE | FOB | pending foxes. slyly re
1 | 63700 | 3701 | 3 | 8.00 | 13309.60 | 0.10 | 0.02 | N | O | 01-29-1996 | 03-05-1996 | 01-31-1996 | TAKE BACK RETURN | REG AIR | riously. regular, express dep
1 | 67310 | 7311 | 2 | 36.00 | 45983.16 | 0.09 | 0.06 | N | O | 04-12-1996 | 02-28-1996 | 04-20-1996 | TAKE BACK RETURN | MAIL | ly final dependencies: slyly bold
1 | 155190 | 7706 | 1 | 17.00 | 21168.23 | 0.04 | 0.02 | N | O | 03-13-1996 | 02-12-1996 | 03-22-1996 | DELIVER IN PERSON | TRUCK | egular courts above the
2 | 106170 | 1191 | 1 | 38.00 | 44694.46 | 0.00 | 0.05 | N | O | 01-28-1997 | 01-14-1997 | 02-02-1997 | TAKE BACK RETURN | RAIL | ven requests. deposits breach a
3 | 4297 | 1798 | 1 | 45.00 | 54058.05 | 0.06 | 0.00 | R | F | 02-02-1994 | 01-04-1994 | 02-23-1994 | NONE | AIR | ongside of the furiously brave acco
3 | 19036 | 6540 | 2 | 49.00 | 46796.47 | 0.10 | 0.00 | R | F | 11-09-1993 | 12-20-1993 | 11-24-1993 | TAKE BACK RETURN | RAIL | unusual accounts. eve
3 | 29380 | 1883 | 4 | 2.00 | 2618.76 | 0.01 | 0.06 | A | F | 12-04-1993 | 01-07-1994 | 01-01-1994 | NONE | TRUCK | y. fluffily pending d
(10 rows)
-- explain the query to see actual plan. We expect to see only one aggregation
-- node since group by columns guarantees the uniqueness.
EXPLAIN (COSTS FALSE)
SELECT DISTINCT *
FROM lineitem_hash_part
GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
ORDER BY 1,2
LIMIT 10;
QUERY PLAN
---------------------------------------------------------------------
Limit
-> Sort
Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey
-> HashAggregate
Group Key: remote_scan.l_orderkey, remote_scan.l_partkey, remote_scan.l_suppkey, remote_scan.l_linenumber, remote_scan.l_quantity, remote_scan.l_extendedprice, remote_scan.l_discount, remote_scan.l_tax, remote_scan.l_returnflag, remote_scan.l_linestatus, remote_scan.l_shipdate, remote_scan.l_commitdate, remote_scan.l_receiptdate, remote_scan.l_shipinstruct, remote_scan.l_shipmode, remote_scan.l_comment
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Limit
-> Unique
-> Group
Group Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
-> Sort
Sort Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(17 rows)
-- check the plan if the hash aggreate is disabled. We expect to see only one
-- aggregation node since group by columns guarantees the uniqueness.
SET enable_hashagg TO off;
EXPLAIN (COSTS FALSE)
SELECT DISTINCT *
FROM lineitem_hash_part
GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
ORDER BY 1,2
LIMIT 10;
QUERY PLAN
---------------------------------------------------------------------
Limit
-> Unique
-> Sort
Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey, remote_scan.l_suppkey, remote_scan.l_linenumber, remote_scan.l_quantity, remote_scan.l_extendedprice, remote_scan.l_discount, remote_scan.l_tax, remote_scan.l_returnflag, remote_scan.l_linestatus, remote_scan.l_shipdate, remote_scan.l_commitdate, remote_scan.l_receiptdate, remote_scan.l_shipinstruct, remote_scan.l_shipmode, remote_scan.l_comment
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Limit
-> Unique
-> Group
Group Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
-> Sort
Sort Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(16 rows)
SET enable_hashagg TO on;
-- distinct on count distinct
SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
FROM lineitem_hash_part
GROUP BY l_orderkey
ORDER BY 1,2;
count | count
---------------------------------------------------------------------
1 | 1
2 | 1
2 | 2
3 | 1
3 | 2
3 | 3
4 | 1
4 | 2
4 | 3
4 | 4
5 | 2
5 | 3
5 | 4
5 | 5
6 | 2
6 | 3
6 | 4
6 | 5
6 | 6
7 | 2
7 | 3
7 | 4
7 | 5
7 | 6
7 | 7
(25 rows)
-- explain the query to see actual plan. We expect to see aggregation plan for
-- the outer distinct.
EXPLAIN (COSTS FALSE)
SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
FROM lineitem_hash_part
GROUP BY l_orderkey
ORDER BY 1,2;
QUERY PLAN
---------------------------------------------------------------------
Sort
Sort Key: remote_scan.count, remote_scan.count_1
-> HashAggregate
Group Key: remote_scan.count, remote_scan.count_1
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> GroupAggregate
Group Key: l_orderkey
-> Sort
Sort Key: l_orderkey
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(14 rows)
-- check the plan if the hash aggreate is disabled. We expect to see sort + unique
-- plans for the outer distinct.
SET enable_hashagg TO off;
EXPLAIN (COSTS FALSE)
SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
FROM lineitem_hash_part
GROUP BY l_orderkey
ORDER BY 1,2;
QUERY PLAN
---------------------------------------------------------------------
Unique
-> Sort
Sort Key: remote_scan.count, remote_scan.count_1
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> GroupAggregate
Group Key: l_orderkey
-> Sort
Sort Key: l_orderkey
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(13 rows)
SET enable_hashagg TO on;
-- distinct on aggregation with filter and expression
SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count
FROM lineitem_hash_part
GROUP BY l_suppkey
ORDER BY 1;
count
---------------------------------------------------------------------
0
1
2
3
4
(5 rows)
-- explain the query to see actual plan
EXPLAIN (COSTS FALSE)
SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count
FROM lineitem_hash_part
GROUP BY l_suppkey
ORDER BY 1;
QUERY PLAN
---------------------------------------------------------------------
Unique
-> Sort
Sort Key: (ceil(((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint) / 2))::double precision))
-> HashAggregate
Group Key: remote_scan.worker_column_2
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Group Key: l_suppkey
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(13 rows)
-- check the plan if the hash aggreate is disabled
SET enable_hashagg TO off;
EXPLAIN (COSTS FALSE)
SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count
FROM lineitem_hash_part
GROUP BY l_suppkey
ORDER BY 1;
QUERY PLAN
---------------------------------------------------------------------
Unique
-> Sort
Sort Key: (ceil(((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint) / 2))::double precision))
-> GroupAggregate
Group Key: remote_scan.worker_column_2
-> Sort
Sort Key: remote_scan.worker_column_2
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Group Key: l_suppkey
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(15 rows)
SET enable_hashagg TO on;
-- explain the query to see actual plan with array_agg aggregation.
EXPLAIN (COSTS FALSE)
SELECT DISTINCT array_agg(l_linenumber), array_length(array_agg(l_linenumber), 1)
FROM lineitem_hash_part
GROUP BY l_orderkey
ORDER BY 2
LIMIT 15;
QUERY PLAN
---------------------------------------------------------------------
Limit
-> Sort
Sort Key: remote_scan.array_length
-> HashAggregate
Group Key: remote_scan.array_length, remote_scan.array_agg
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Group Key: l_orderkey
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(13 rows)
-- check the plan if the hash aggreate is disabled.
SET enable_hashagg TO off;
EXPLAIN (COSTS FALSE)
SELECT DISTINCT array_agg(l_linenumber), array_length(array_agg(l_linenumber), 1)
FROM lineitem_hash_part
GROUP BY l_orderkey
ORDER BY 2
LIMIT 15;
QUERY PLAN
---------------------------------------------------------------------
Limit
-> Unique
-> Sort
Sort Key: remote_scan.array_length, remote_scan.array_agg
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Group Key: l_orderkey
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(12 rows)
SET enable_hashagg TO on;
-- distinct on non-partition column with aggregate
-- this is the same as non-distinct version due to group by
SELECT DISTINCT l_partkey, count(*)
FROM lineitem_hash_part
GROUP BY 1
HAVING count(*) > 2
ORDER BY 1;
l_partkey | count
---------------------------------------------------------------------
1051 | 3
1927 | 3
6983 | 3
15283 | 3
87761 | 3
136884 | 3
149926 | 3
160895 | 3
177771 | 3
188804 | 3
199146 | 3
(11 rows)
-- explain the query to see actual plan
EXPLAIN (COSTS FALSE)
SELECT DISTINCT l_partkey, count(*)
FROM lineitem_hash_part
GROUP BY 1
HAVING count(*) > 2
ORDER BY 1;
QUERY PLAN
---------------------------------------------------------------------
Unique
-> Sort
Sort Key: remote_scan.l_partkey, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
-> HashAggregate
Group Key: remote_scan.l_partkey
Filter: (COALESCE((pg_catalog.sum(remote_scan.worker_column_3))::bigint, '0'::bigint) > 2)
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Group Key: l_partkey
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(14 rows)
-- distinct on non-partition column and avg
SELECT DISTINCT l_partkey, avg(l_linenumber)
FROM lineitem_hash_part
WHERE l_partkey < 500
GROUP BY 1
HAVING avg(l_linenumber) > 2
ORDER BY 1;
l_partkey | avg
---------------------------------------------------------------------
18 | 7.0000000000000000
79 | 6.0000000000000000
149 | 4.5000000000000000
175 | 5.0000000000000000
179 | 6.0000000000000000
182 | 3.0000000000000000
222 | 4.0000000000000000
278 | 3.0000000000000000
299 | 7.0000000000000000
308 | 7.0000000000000000
309 | 5.0000000000000000
321 | 3.0000000000000000
337 | 6.0000000000000000
364 | 3.0000000000000000
403 | 4.0000000000000000
(15 rows)
-- distinct on multiple non-partition columns
SELECT DISTINCT l_partkey, l_suppkey
FROM lineitem_hash_part
WHERE l_shipmode = 'AIR' AND l_orderkey < 100
ORDER BY 1, 2;
l_partkey | l_suppkey
---------------------------------------------------------------------
2132 | 4633
4297 | 1798
37531 | 35
44161 | 6666
44706 | 4707
67831 | 5350
85811 | 8320
94368 | 6878
108338 | 849
108570 | 8571
137267 | 4807
137469 | 9983
173489 | 3490
196156 | 1195
197921 | 441
(15 rows)
EXPLAIN (COSTS FALSE)
SELECT DISTINCT l_partkey, l_suppkey
FROM lineitem_hash_part
WHERE l_shipmode = 'AIR' AND l_orderkey < 100
ORDER BY 1, 2;
QUERY PLAN
---------------------------------------------------------------------
Sort
Sort Key: remote_scan.l_partkey, remote_scan.l_suppkey
-> HashAggregate
Group Key: remote_scan.l_partkey, remote_scan.l_suppkey
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Unique
-> Sort
Sort Key: l_partkey, l_suppkey
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
Filter: ((l_orderkey < 100) AND (l_shipmode = 'AIR'::bpchar))
(14 rows)
-- distinct on partition column
SELECT DISTINCT ON (l_orderkey) l_orderkey, l_partkey, l_suppkey
FROM lineitem_hash_part
WHERE l_orderkey < 35
ORDER BY 1, 2, 3;
l_orderkey | l_partkey | l_suppkey
---------------------------------------------------------------------
1 | 2132 | 4633
2 | 106170 | 1191
3 | 4297 | 1798
4 | 88035 | 5560
5 | 37531 | 35
6 | 139636 | 2150
7 | 79251 | 1759
32 | 2743 | 7744
33 | 33918 | 3919
34 | 88362 | 871
(10 rows)
EXPLAIN (COSTS FALSE)
SELECT DISTINCT ON (l_orderkey) l_orderkey, l_partkey, l_suppkey
FROM lineitem_hash_part
WHERE l_orderkey < 35
ORDER BY 1, 2, 3;
QUERY PLAN
---------------------------------------------------------------------
Unique
-> Sort
Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey, remote_scan.l_suppkey
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Unique
-> Sort
Sort Key: l_orderkey, l_partkey, l_suppkey
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
Filter: (l_orderkey < 35)
(13 rows)
-- distinct on non-partition column
-- note order by is required here
-- otherwise query results will be different since
-- distinct on clause is on non-partition column
SELECT DISTINCT ON (l_partkey) l_partkey, l_orderkey
FROM lineitem_hash_part
ORDER BY 1,2
LIMIT 20;
l_partkey | l_orderkey
---------------------------------------------------------------------
18 | 12005
79 | 5121
91 | 2883
149 | 807
175 | 4102
179 | 2117
182 | 548
195 | 2528
204 | 10048
222 | 9413
245 | 9446
278 | 1287
299 | 1122
308 | 11137
309 | 2374
318 | 321
321 | 5984
337 | 10403
350 | 13698
358 | 4323
(20 rows)
EXPLAIN (COSTS FALSE)
SELECT DISTINCT ON (l_partkey) l_partkey, l_orderkey
FROM lineitem_hash_part
ORDER BY 1,2
LIMIT 20;
QUERY PLAN
---------------------------------------------------------------------
Limit
-> Unique
-> Sort
Sort Key: remote_scan.l_partkey, remote_scan.l_orderkey
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Limit
-> Unique
-> Sort
Sort Key: l_partkey, l_orderkey
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(14 rows)
-- distinct on with joins
-- each customer's first order key
SELECT DISTINCT ON (o_custkey) o_custkey, l_orderkey
FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey)
WHERE o_custkey < 15
ORDER BY 1,2;
o_custkey | l_orderkey
---------------------------------------------------------------------
1 | 9154
2 | 10563
4 | 320
5 | 11682
7 | 10402
8 | 102
10 | 1602
11 | 12800
13 | 994
14 | 11011
(10 rows)
SELECT coordinator_plan($Q$
EXPLAIN (COSTS FALSE)
SELECT DISTINCT ON (o_custkey) o_custkey, l_orderkey
FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey)
WHERE o_custkey < 15
ORDER BY 1,2;
$Q$);
coordinator_plan
---------------------------------------------------------------------
Unique
-> Sort
Sort Key: remote_scan.o_custkey, remote_scan.l_orderkey
-> Custom Scan (Citus Adaptive)
Task Count: 4
(5 rows)
-- explain without order by
-- notice master plan has order by on distinct on column
SELECT coordinator_plan($Q$
EXPLAIN (COSTS FALSE)
SELECT DISTINCT ON (o_custkey) o_custkey, l_orderkey
FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey)
WHERE o_custkey < 15;
$Q$);
coordinator_plan
---------------------------------------------------------------------
Unique
-> Sort
Sort Key: remote_scan.o_custkey
-> Custom Scan (Citus Adaptive)
Task Count: 4
(5 rows)
-- each customer's each order's first l_partkey
SELECT DISTINCT ON (o_custkey, l_orderkey) o_custkey, l_orderkey, l_linenumber, l_partkey
FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey)
WHERE o_custkey < 20
ORDER BY 1,2,3;
o_custkey | l_orderkey | l_linenumber | l_partkey
---------------------------------------------------------------------
1 | 9154 | 1 | 86513
1 | 14656 | 1 | 59539
2 | 10563 | 1 | 147459
4 | 320 | 1 | 4415
4 | 739 | 1 | 84489
4 | 10688 | 1 | 45037
4 | 10788 | 1 | 50814
4 | 13728 | 1 | 86216
5 | 11682 | 1 | 31634
5 | 11746 | 1 | 180724
5 | 14308 | 1 | 157430
7 | 10402 | 1 | 53661
7 | 13031 | 1 | 112161
7 | 14145 | 1 | 138729
7 | 14404 | 1 | 143034
8 | 102 | 1 | 88914
8 | 164 | 1 | 91309
8 | 13601 | 1 | 40504
10 | 1602 | 1 | 182806
10 | 9862 | 1 | 86241
10 | 11431 | 1 | 62112
10 | 13124 | 1 | 29414
11 | 12800 | 1 | 152806
13 | 994 | 1 | 64486
13 | 1603 | 1 | 38191
13 | 4704 | 1 | 77934
13 | 9927 | 1 | 875
14 | 11011 | 1 | 172485
17 | 896 | 1 | 38675
17 | 5507 | 1 | 9600
19 | 353 | 1 | 119305
19 | 1504 | 1 | 81389
19 | 1669 | 1 | 78373
19 | 5893 | 1 | 133707
19 | 9954 | 1 | 92138
19 | 14885 | 1 | 36154
(36 rows)
-- explain without order by
SELECT coordinator_plan($Q$
EXPLAIN (COSTS FALSE)
SELECT DISTINCT ON (o_custkey, l_orderkey) o_custkey, l_orderkey, l_linenumber, l_partkey
FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey)
WHERE o_custkey < 20;
$Q$);
coordinator_plan
---------------------------------------------------------------------
Unique
-> Sort
Sort Key: remote_scan.o_custkey, remote_scan.l_orderkey
-> Custom Scan (Citus Adaptive)
Task Count: 4
(5 rows)
-- each customer's each order's last l_partkey
SELECT DISTINCT ON (o_custkey, l_orderkey) o_custkey, l_orderkey, l_linenumber, l_partkey
FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey)
WHERE o_custkey < 15
ORDER BY 1,2,3 DESC;
o_custkey | l_orderkey | l_linenumber | l_partkey
---------------------------------------------------------------------
1 | 9154 | 7 | 173448
1 | 14656 | 1 | 59539
2 | 10563 | 4 | 110741
4 | 320 | 2 | 192158
4 | 739 | 5 | 187523
4 | 10688 | 2 | 132574
4 | 10788 | 4 | 196473
4 | 13728 | 3 | 12450
5 | 11682 | 3 | 177152
5 | 11746 | 7 | 193807
5 | 14308 | 3 | 140916
7 | 10402 | 2 | 64514
7 | 13031 | 6 | 7761
7 | 14145 | 6 | 130723
7 | 14404 | 7 | 35349
8 | 102 | 4 | 61158
8 | 164 | 7 | 3037
8 | 13601 | 5 | 12470
10 | 1602 | 1 | 182806
10 | 9862 | 5 | 135675
10 | 11431 | 7 | 8563
10 | 13124 | 3 | 67055
11 | 12800 | 5 | 179110
13 | 994 | 4 | 130471
13 | 1603 | 2 | 65209
13 | 4704 | 3 | 63081
13 | 9927 | 6 | 119356
14 | 11011 | 7 | 95939
(28 rows)
-- subqueries
SELECT DISTINCT l_orderkey, l_partkey
FROM (
SELECT l_orderkey, l_partkey
FROM lineitem_hash_part
) q
ORDER BY 1,2
LIMIT 10;
l_orderkey | l_partkey
---------------------------------------------------------------------
1 | 2132
1 | 15635
1 | 24027
1 | 63700
1 | 67310
1 | 155190
2 | 106170
3 | 4297
3 | 19036
3 | 29380
(10 rows)
EXPLAIN (COSTS FALSE)
SELECT DISTINCT l_orderkey, l_partkey
FROM (
SELECT l_orderkey, l_partkey
FROM lineitem_hash_part
) q
ORDER BY 1,2
LIMIT 10;
QUERY PLAN
---------------------------------------------------------------------
Limit
-> Sort
Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey
-> HashAggregate
Group Key: remote_scan.l_orderkey, remote_scan.l_partkey
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Limit
-> Sort
Sort Key: l_orderkey, l_partkey
-> HashAggregate
Group Key: l_orderkey, l_partkey
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(16 rows)
SELECT DISTINCT l_orderkey, cnt
FROM (
SELECT l_orderkey, count(*) as cnt
FROM lineitem_hash_part
GROUP BY 1
) q
ORDER BY 1,2
LIMIT 10;
l_orderkey | cnt
---------------------------------------------------------------------
1 | 6
2 | 1
3 | 6
4 | 1
5 | 3
6 | 1
7 | 7
32 | 6
33 | 4
34 | 3
(10 rows)
EXPLAIN (COSTS FALSE)
SELECT DISTINCT l_orderkey, cnt
FROM (
SELECT l_orderkey, count(*) as cnt
FROM lineitem_hash_part
GROUP BY 1
) q
ORDER BY 1,2
LIMIT 10;
QUERY PLAN
---------------------------------------------------------------------
Limit
-> Sort
Sort Key: remote_scan.l_orderkey, remote_scan.cnt
-> HashAggregate
Group Key: remote_scan.l_orderkey, remote_scan.cnt
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Limit
-> Sort
Sort Key: lineitem_hash_part.l_orderkey, (count(*))
-> HashAggregate
Group Key: lineitem_hash_part.l_orderkey, count(*)
-> HashAggregate
Group Key: lineitem_hash_part.l_orderkey
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(18 rows)
-- distinct on partition column
-- random() is added to inner query to prevent flattening
SELECT DISTINCT ON (l_orderkey) l_orderkey, l_partkey
FROM (
SELECT l_orderkey, l_partkey, (random()*10)::int + 2 as r
FROM lineitem_hash_part
) q
WHERE r > 1
ORDER BY 1,2
LIMIT 10;
l_orderkey | l_partkey
---------------------------------------------------------------------
1 | 2132
2 | 106170
3 | 4297
4 | 88035
5 | 37531
6 | 139636
7 | 79251
32 | 2743
33 | 33918
34 | 88362
(10 rows)
EXPLAIN (COSTS FALSE)
SELECT DISTINCT ON (l_orderkey) l_orderkey, l_partkey
FROM (
SELECT l_orderkey, l_partkey, (random()*10)::int + 2 as r
FROM lineitem_hash_part
) q
WHERE r > 1
ORDER BY 1,2
LIMIT 10;
QUERY PLAN
---------------------------------------------------------------------
Limit
-> Unique
-> Sort
Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Limit
-> Unique
-> Sort
Sort Key: q.l_orderkey, q.l_partkey
-> Subquery Scan on q
Filter: (q.r > 1)
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(16 rows)
-- distinct on non-partition column
SELECT DISTINCT ON (l_partkey) l_orderkey, l_partkey
FROM (
SELECT l_orderkey, l_partkey, (random()*10)::int + 2 as r
FROM lineitem_hash_part
) q
WHERE r > 1
ORDER BY 2,1
LIMIT 10;
l_orderkey | l_partkey
---------------------------------------------------------------------
12005 | 18
5121 | 79
2883 | 91
807 | 149
4102 | 175
2117 | 179
548 | 182
2528 | 195
10048 | 204
9413 | 222
(10 rows)
EXPLAIN (COSTS FALSE)
SELECT DISTINCT ON (l_partkey) l_orderkey, l_partkey
FROM (
SELECT l_orderkey, l_partkey, (random()*10)::int + 2 as r
FROM lineitem_hash_part
) q
WHERE r > 1
ORDER BY 2,1
LIMIT 10;
QUERY PLAN
---------------------------------------------------------------------
Limit
-> Unique
-> Sort
Sort Key: remote_scan.l_partkey, remote_scan.l_orderkey
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Limit
-> Unique
-> Sort
Sort Key: q.l_partkey, q.l_orderkey
-> Subquery Scan on q
Filter: (q.r > 1)
-> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part
(16 rows)