mirror of https://github.com/citusdata/citus.git
825 lines
28 KiB
Plaintext
825 lines
28 KiB
Plaintext
--
|
|
-- MULTI_SELECT_DISTINCT
|
|
--
|
|
-- Tests select distinct, and select distinct on features.
|
|
--
|
|
-- function calls are supported
|
|
SELECT DISTINCT l_orderkey, now() FROM lineitem_hash_part LIMIT 0;
|
|
l_orderkey | now
|
|
------------+-----
|
|
(0 rows)
|
|
|
|
SELECT DISTINCT l_partkey, 1 + (random() * 0)::int FROM lineitem_hash_part ORDER BY 1 DESC LIMIT 3;
|
|
l_partkey | ?column?
|
|
-----------+----------
|
|
199973 | 1
|
|
199946 | 1
|
|
199943 | 1
|
|
(3 rows)
|
|
|
|
-- const expressions are supported
|
|
SELECT DISTINCT l_orderkey, 1+1 FROM lineitem_hash_part ORDER BY 1 LIMIT 5;
|
|
l_orderkey | ?column?
|
|
------------+----------
|
|
1 | 2
|
|
2 | 2
|
|
3 | 2
|
|
4 | 2
|
|
5 | 2
|
|
(5 rows)
|
|
|
|
-- non const expressions are also supported
|
|
SELECT DISTINCT l_orderkey, l_partkey + 1 FROM lineitem_hash_part ORDER BY 1, 2 LIMIT 5;
|
|
l_orderkey | ?column?
|
|
------------+----------
|
|
1 | 2133
|
|
1 | 15636
|
|
1 | 24028
|
|
1 | 63701
|
|
1 | 67311
|
|
(5 rows)
|
|
|
|
-- column expressions are supported
|
|
SELECT DISTINCT l_orderkey, l_shipinstruct || l_shipmode FROM lineitem_hash_part ORDER BY 2 , 1 LIMIT 5;
|
|
l_orderkey | ?column?
|
|
------------+----------------
|
|
32 | COLLECT CODAIR
|
|
39 | COLLECT CODAIR
|
|
66 | COLLECT CODAIR
|
|
70 | COLLECT CODAIR
|
|
98 | COLLECT CODAIR
|
|
(5 rows)
|
|
|
|
-- function calls with const input are supported
|
|
SELECT DISTINCT l_orderkey, strpos('AIR', 'A') FROM lineitem_hash_part ORDER BY 1,2 LIMIT 5;
|
|
l_orderkey | strpos
|
|
------------+--------
|
|
1 | 1
|
|
2 | 1
|
|
3 | 1
|
|
4 | 1
|
|
5 | 1
|
|
(5 rows)
|
|
|
|
-- function calls with non-const input are supported
|
|
SELECT DISTINCT l_orderkey, strpos(l_shipmode, 'I')
|
|
FROM lineitem_hash_part
|
|
WHERE strpos(l_shipmode, 'I') > 1
|
|
ORDER BY 2, 1
|
|
LIMIT 5;
|
|
l_orderkey | strpos
|
|
------------+--------
|
|
1 | 2
|
|
3 | 2
|
|
5 | 2
|
|
32 | 2
|
|
33 | 2
|
|
(5 rows)
|
|
|
|
-- distinct on partition column
|
|
-- verify counts match with respect to count(distinct)
|
|
CREATE TEMP TABLE temp_orderkeys AS SELECT DISTINCT l_orderkey FROM lineitem_hash_part;
|
|
SELECT COUNT(*) FROM temp_orderkeys;
|
|
count
|
|
-------
|
|
2985
|
|
(1 row)
|
|
|
|
SELECT COUNT(DISTINCT l_orderkey) FROM lineitem_hash_part;
|
|
count
|
|
-------
|
|
2985
|
|
(1 row)
|
|
|
|
SELECT DISTINCT l_orderkey FROM lineitem_hash_part WHERE l_orderkey < 500 and l_partkey < 5000 order by 1;
|
|
l_orderkey
|
|
------------
|
|
1
|
|
3
|
|
32
|
|
35
|
|
39
|
|
65
|
|
129
|
|
130
|
|
134
|
|
164
|
|
194
|
|
228
|
|
261
|
|
290
|
|
320
|
|
321
|
|
354
|
|
418
|
|
(18 rows)
|
|
|
|
-- distinct on non-partition column
|
|
SELECT DISTINCT l_partkey FROM lineitem_hash_part WHERE l_orderkey > 5 and l_orderkey < 20 order by 1;
|
|
l_partkey
|
|
-----------
|
|
79251
|
|
94780
|
|
139636
|
|
145243
|
|
151894
|
|
157238
|
|
163073
|
|
182052
|
|
(8 rows)
|
|
|
|
SELECT DISTINCT l_shipmode FROM lineitem_hash_part ORDER BY 1 DESC;
|
|
l_shipmode
|
|
------------
|
|
TRUCK
|
|
SHIP
|
|
REG AIR
|
|
RAIL
|
|
MAIL
|
|
FOB
|
|
AIR
|
|
(7 rows)
|
|
|
|
-- distinct with multiple columns
|
|
SELECT DISTINCT l_orderkey, o_orderdate
|
|
FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey)
|
|
WHERE l_orderkey < 10
|
|
ORDER BY l_orderkey;
|
|
l_orderkey | o_orderdate
|
|
------------+-------------
|
|
1 | 01-02-1996
|
|
2 | 12-01-1996
|
|
3 | 10-14-1993
|
|
4 | 10-11-1995
|
|
5 | 07-30-1994
|
|
6 | 02-21-1992
|
|
7 | 01-10-1996
|
|
(7 rows)
|
|
|
|
-- distinct on partition column with aggregate
|
|
-- this is the same as the one without distinct due to group by
|
|
SELECT DISTINCT l_orderkey, count(*)
|
|
FROM lineitem_hash_part
|
|
WHERE l_orderkey < 200
|
|
GROUP BY 1
|
|
HAVING count(*) > 5
|
|
ORDER BY 2 DESC, 1;
|
|
l_orderkey | count
|
|
------------+-------
|
|
7 | 7
|
|
68 | 7
|
|
129 | 7
|
|
164 | 7
|
|
194 | 7
|
|
1 | 6
|
|
3 | 6
|
|
32 | 6
|
|
35 | 6
|
|
39 | 6
|
|
67 | 6
|
|
69 | 6
|
|
70 | 6
|
|
71 | 6
|
|
134 | 6
|
|
135 | 6
|
|
163 | 6
|
|
192 | 6
|
|
197 | 6
|
|
(19 rows)
|
|
|
|
|
|
-- explain the query to see actual plan
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT l_orderkey, count(*)
|
|
FROM lineitem_hash_part
|
|
WHERE l_orderkey < 200
|
|
GROUP BY 1
|
|
HAVING count(*) > 5
|
|
ORDER BY 2 DESC, 1;
|
|
QUERY PLAN
|
|
----------------------------------------------------------------------------------------------------------------------------------------------------------------
|
|
Sort
|
|
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint) DESC, remote_scan.l_orderkey
|
|
-> HashAggregate
|
|
Group Key: remote_scan.l_orderkey
|
|
Filter: (COALESCE((pg_catalog.sum(remote_scan.worker_column_3))::bigint, '0'::bigint) > 5)
|
|
-> Custom Scan (Citus Real-Time)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=57637 dbname=regression
|
|
-> HashAggregate
|
|
Group Key: l_orderkey
|
|
Filter: (count(*) > 5)
|
|
-> Seq Scan on lineitem_hash_part_360290 lineitem_hash_part
|
|
Filter: (l_orderkey < 200)
|
|
(15 rows)
|
|
|
|
-- distinct on non-partition column with aggregate
|
|
-- this is the same as non-distinct version due to group by
|
|
SELECT DISTINCT l_partkey, count(*)
|
|
FROM lineitem_hash_part
|
|
GROUP BY 1
|
|
HAVING count(*) > 2
|
|
ORDER BY 1;
|
|
l_partkey | count
|
|
-----------+-------
|
|
1051 | 3
|
|
1927 | 3
|
|
6983 | 3
|
|
15283 | 3
|
|
87761 | 3
|
|
136884 | 3
|
|
149926 | 3
|
|
160895 | 3
|
|
177771 | 3
|
|
188804 | 3
|
|
199146 | 3
|
|
(11 rows)
|
|
|
|
|
|
-- explain the query to see actual plan
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT l_partkey, count(*)
|
|
FROM lineitem_hash_part
|
|
GROUP BY 1
|
|
HAVING count(*) > 2
|
|
ORDER BY 1;
|
|
QUERY PLAN
|
|
----------------------------------------------------------------------------------------------------
|
|
Sort
|
|
Sort Key: remote_scan.l_partkey
|
|
-> HashAggregate
|
|
Group Key: remote_scan.l_partkey
|
|
Filter: (COALESCE((pg_catalog.sum(remote_scan.worker_column_3))::bigint, '0'::bigint) > 2)
|
|
-> Custom Scan (Citus Real-Time)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=57637 dbname=regression
|
|
-> HashAggregate
|
|
Group Key: l_partkey
|
|
-> Seq Scan on lineitem_hash_part_360290 lineitem_hash_part
|
|
(13 rows)
|
|
|
|
-- distinct on non-partition column and avg
|
|
SELECT DISTINCT l_partkey, avg(l_linenumber)
|
|
FROM lineitem_hash_part
|
|
WHERE l_partkey < 500
|
|
GROUP BY 1
|
|
HAVING avg(l_linenumber) > 2
|
|
ORDER BY 1;
|
|
l_partkey | avg
|
|
-----------+--------------------
|
|
18 | 7.0000000000000000
|
|
79 | 6.0000000000000000
|
|
149 | 4.5000000000000000
|
|
175 | 5.0000000000000000
|
|
179 | 6.0000000000000000
|
|
182 | 3.0000000000000000
|
|
222 | 4.0000000000000000
|
|
278 | 3.0000000000000000
|
|
299 | 7.0000000000000000
|
|
308 | 7.0000000000000000
|
|
309 | 5.0000000000000000
|
|
321 | 3.0000000000000000
|
|
337 | 6.0000000000000000
|
|
364 | 3.0000000000000000
|
|
403 | 4.0000000000000000
|
|
(15 rows)
|
|
|
|
-- distinct on multiple non-partition columns
|
|
SELECT DISTINCT l_partkey, l_suppkey
|
|
FROM lineitem_hash_part
|
|
WHERE l_shipmode = 'AIR' AND l_orderkey < 100
|
|
ORDER BY 1, 2;
|
|
l_partkey | l_suppkey
|
|
-----------+-----------
|
|
2132 | 4633
|
|
4297 | 1798
|
|
37531 | 35
|
|
44161 | 6666
|
|
44706 | 4707
|
|
67831 | 5350
|
|
85811 | 8320
|
|
94368 | 6878
|
|
108338 | 849
|
|
108570 | 8571
|
|
137267 | 4807
|
|
137469 | 9983
|
|
173489 | 3490
|
|
196156 | 1195
|
|
197921 | 441
|
|
(15 rows)
|
|
|
|
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT l_partkey, l_suppkey
|
|
FROM lineitem_hash_part
|
|
WHERE l_shipmode = 'AIR' AND l_orderkey < 100
|
|
ORDER BY 1, 2;
|
|
QUERY PLAN
|
|
-----------------------------------------------------------------------------------------------------
|
|
Sort
|
|
Sort Key: remote_scan.l_partkey, remote_scan.l_suppkey
|
|
-> HashAggregate
|
|
Group Key: remote_scan.l_partkey, remote_scan.l_suppkey
|
|
-> Custom Scan (Citus Real-Time)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=57637 dbname=regression
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: l_partkey, l_suppkey
|
|
-> Seq Scan on lineitem_hash_part_360290 lineitem_hash_part
|
|
Filter: ((l_orderkey < 100) AND (l_shipmode = 'AIR'::bpchar))
|
|
(14 rows)
|
|
|
|
-- distinct on partition column
|
|
SELECT DISTINCT ON (l_orderkey) l_orderkey, l_partkey, l_suppkey
|
|
FROM lineitem_hash_part
|
|
WHERE l_orderkey < 35
|
|
ORDER BY 1;
|
|
l_orderkey | l_partkey | l_suppkey
|
|
------------+-----------+-----------
|
|
1 | 155190 | 7706
|
|
2 | 106170 | 1191
|
|
3 | 4297 | 1798
|
|
4 | 88035 | 5560
|
|
5 | 108570 | 8571
|
|
6 | 139636 | 2150
|
|
7 | 182052 | 9607
|
|
32 | 82704 | 7721
|
|
33 | 61336 | 8855
|
|
34 | 88362 | 871
|
|
(10 rows)
|
|
|
|
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT ON (l_orderkey) l_orderkey, l_partkey, l_suppkey
|
|
FROM lineitem_hash_part
|
|
WHERE l_orderkey < 35
|
|
ORDER BY 1;
|
|
QUERY PLAN
|
|
----------------------------------------------------------------------------------------------
|
|
Unique
|
|
-> Sort
|
|
Sort Key: remote_scan.l_orderkey
|
|
-> Custom Scan (Citus Real-Time)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=57637 dbname=regression
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: l_orderkey
|
|
-> Seq Scan on lineitem_hash_part_360290 lineitem_hash_part
|
|
Filter: (l_orderkey < 35)
|
|
(13 rows)
|
|
|
|
-- distinct on non-partition column
|
|
-- note order by is required here
|
|
-- otherwise query results will be different since
|
|
-- distinct on clause is on non-partition column
|
|
SELECT DISTINCT ON (l_partkey) l_partkey, l_orderkey
|
|
FROM lineitem_hash_part
|
|
ORDER BY 1,2
|
|
LIMIT 20;
|
|
l_partkey | l_orderkey
|
|
-----------+------------
|
|
18 | 12005
|
|
79 | 5121
|
|
91 | 2883
|
|
149 | 807
|
|
175 | 4102
|
|
179 | 2117
|
|
182 | 548
|
|
195 | 2528
|
|
204 | 10048
|
|
222 | 9413
|
|
245 | 9446
|
|
278 | 1287
|
|
299 | 1122
|
|
308 | 11137
|
|
309 | 2374
|
|
318 | 321
|
|
321 | 5984
|
|
337 | 10403
|
|
350 | 13698
|
|
358 | 4323
|
|
(20 rows)
|
|
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT ON (l_partkey) l_partkey, l_orderkey
|
|
FROM lineitem_hash_part
|
|
ORDER BY 1,2
|
|
LIMIT 20;
|
|
QUERY PLAN
|
|
----------------------------------------------------------------------------------------------------------
|
|
Limit
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: remote_scan.l_partkey, remote_scan.l_orderkey
|
|
-> Custom Scan (Citus Real-Time)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=57637 dbname=regression
|
|
-> Limit
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: l_partkey, l_orderkey
|
|
-> Seq Scan on lineitem_hash_part_360290 lineitem_hash_part
|
|
(14 rows)
|
|
|
|
-- distinct on with joins
|
|
-- each customer's first order key
|
|
SELECT DISTINCT ON (o_custkey) o_custkey, l_orderkey
|
|
FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey)
|
|
WHERE o_custkey < 15
|
|
ORDER BY 1,2;
|
|
o_custkey | l_orderkey
|
|
-----------+------------
|
|
1 | 9154
|
|
2 | 10563
|
|
4 | 320
|
|
5 | 11682
|
|
7 | 10402
|
|
8 | 102
|
|
10 | 1602
|
|
11 | 12800
|
|
13 | 994
|
|
14 | 11011
|
|
(10 rows)
|
|
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT ON (o_custkey) o_custkey, l_orderkey
|
|
FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey)
|
|
WHERE o_custkey < 15
|
|
ORDER BY 1,2;
|
|
QUERY PLAN
|
|
----------------------------------------------------------------------------------------------------------------
|
|
Unique
|
|
-> Sort
|
|
Sort Key: remote_scan.o_custkey, remote_scan.l_orderkey
|
|
-> Custom Scan (Citus Real-Time)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=57637 dbname=regression
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: orders_hash_part.o_custkey, lineitem_hash_part.l_orderkey
|
|
-> Hash Join
|
|
Hash Cond: (lineitem_hash_part.l_orderkey = orders_hash_part.o_orderkey)
|
|
-> Seq Scan on lineitem_hash_part_360290 lineitem_hash_part
|
|
-> Hash
|
|
-> Seq Scan on orders_hash_part_360294 orders_hash_part
|
|
Filter: (o_custkey < 15)
|
|
(17 rows)
|
|
|
|
-- explain without order by
|
|
-- notice master plan has order by on distinct on column
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT ON (o_custkey) o_custkey, l_orderkey
|
|
FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey)
|
|
WHERE o_custkey < 15;
|
|
QUERY PLAN
|
|
----------------------------------------------------------------------------------------------------------------
|
|
Unique
|
|
-> Sort
|
|
Sort Key: remote_scan.o_custkey
|
|
-> Custom Scan (Citus Real-Time)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=57637 dbname=regression
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: orders_hash_part.o_custkey
|
|
-> Hash Join
|
|
Hash Cond: (lineitem_hash_part.l_orderkey = orders_hash_part.o_orderkey)
|
|
-> Seq Scan on lineitem_hash_part_360290 lineitem_hash_part
|
|
-> Hash
|
|
-> Seq Scan on orders_hash_part_360294 orders_hash_part
|
|
Filter: (o_custkey < 15)
|
|
(17 rows)
|
|
|
|
-- each customer's each order's first l_partkey
|
|
SELECT DISTINCT ON (o_custkey, l_orderkey) o_custkey, l_orderkey, l_linenumber, l_partkey
|
|
FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey)
|
|
WHERE o_custkey < 20
|
|
ORDER BY 1,2,3;
|
|
o_custkey | l_orderkey | l_linenumber | l_partkey
|
|
-----------+------------+--------------+-----------
|
|
1 | 9154 | 1 | 86513
|
|
1 | 14656 | 1 | 59539
|
|
2 | 10563 | 1 | 147459
|
|
4 | 320 | 1 | 4415
|
|
4 | 739 | 1 | 84489
|
|
4 | 10688 | 1 | 45037
|
|
4 | 10788 | 1 | 50814
|
|
4 | 13728 | 1 | 86216
|
|
5 | 11682 | 1 | 31634
|
|
5 | 11746 | 1 | 180724
|
|
5 | 14308 | 1 | 157430
|
|
7 | 10402 | 1 | 53661
|
|
7 | 13031 | 1 | 112161
|
|
7 | 14145 | 1 | 138729
|
|
7 | 14404 | 1 | 143034
|
|
8 | 102 | 1 | 88914
|
|
8 | 164 | 1 | 91309
|
|
8 | 13601 | 1 | 40504
|
|
10 | 1602 | 1 | 182806
|
|
10 | 9862 | 1 | 86241
|
|
10 | 11431 | 1 | 62112
|
|
10 | 13124 | 1 | 29414
|
|
11 | 12800 | 1 | 152806
|
|
13 | 994 | 1 | 64486
|
|
13 | 1603 | 1 | 38191
|
|
13 | 4704 | 1 | 77934
|
|
13 | 9927 | 1 | 875
|
|
14 | 11011 | 1 | 172485
|
|
17 | 896 | 1 | 38675
|
|
17 | 5507 | 1 | 9600
|
|
19 | 353 | 1 | 119305
|
|
19 | 1504 | 1 | 81389
|
|
19 | 1669 | 1 | 78373
|
|
19 | 5893 | 1 | 133707
|
|
19 | 9954 | 1 | 92138
|
|
19 | 14885 | 1 | 36154
|
|
(36 rows)
|
|
|
|
-- explain without order by
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT ON (o_custkey, l_orderkey) o_custkey, l_orderkey, l_linenumber, l_partkey
|
|
FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey)
|
|
WHERE o_custkey < 20;
|
|
QUERY PLAN
|
|
----------------------------------------------------------------------------------------------------------------
|
|
Unique
|
|
-> Sort
|
|
Sort Key: remote_scan.o_custkey, remote_scan.l_orderkey
|
|
-> Custom Scan (Citus Real-Time)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=57637 dbname=regression
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: orders_hash_part.o_custkey, lineitem_hash_part.l_orderkey
|
|
-> Hash Join
|
|
Hash Cond: (lineitem_hash_part.l_orderkey = orders_hash_part.o_orderkey)
|
|
-> Seq Scan on lineitem_hash_part_360290 lineitem_hash_part
|
|
-> Hash
|
|
-> Seq Scan on orders_hash_part_360294 orders_hash_part
|
|
Filter: (o_custkey < 20)
|
|
(17 rows)
|
|
|
|
-- each customer's each order's last l_partkey
|
|
SELECT DISTINCT ON (o_custkey, l_orderkey) o_custkey, l_orderkey, l_linenumber, l_partkey
|
|
FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey)
|
|
WHERE o_custkey < 15
|
|
ORDER BY 1,2,3 DESC;
|
|
o_custkey | l_orderkey | l_linenumber | l_partkey
|
|
-----------+------------+--------------+-----------
|
|
1 | 9154 | 7 | 173448
|
|
1 | 14656 | 1 | 59539
|
|
2 | 10563 | 4 | 110741
|
|
4 | 320 | 2 | 192158
|
|
4 | 739 | 5 | 187523
|
|
4 | 10688 | 2 | 132574
|
|
4 | 10788 | 4 | 196473
|
|
4 | 13728 | 3 | 12450
|
|
5 | 11682 | 3 | 177152
|
|
5 | 11746 | 7 | 193807
|
|
5 | 14308 | 3 | 140916
|
|
7 | 10402 | 2 | 64514
|
|
7 | 13031 | 6 | 7761
|
|
7 | 14145 | 6 | 130723
|
|
7 | 14404 | 7 | 35349
|
|
8 | 102 | 4 | 61158
|
|
8 | 164 | 7 | 3037
|
|
8 | 13601 | 5 | 12470
|
|
10 | 1602 | 1 | 182806
|
|
10 | 9862 | 5 | 135675
|
|
10 | 11431 | 7 | 8563
|
|
10 | 13124 | 3 | 67055
|
|
11 | 12800 | 5 | 179110
|
|
13 | 994 | 4 | 130471
|
|
13 | 1603 | 2 | 65209
|
|
13 | 4704 | 3 | 63081
|
|
13 | 9927 | 6 | 119356
|
|
14 | 11011 | 7 | 95939
|
|
(28 rows)
|
|
|
|
-- subqueries
|
|
SELECT DISTINCT l_orderkey, l_partkey
|
|
FROM (
|
|
SELECT l_orderkey, l_partkey
|
|
FROM lineitem_hash_part
|
|
) q
|
|
ORDER BY 1,2
|
|
LIMIT 10;
|
|
l_orderkey | l_partkey
|
|
------------+-----------
|
|
1 | 2132
|
|
1 | 15635
|
|
1 | 24027
|
|
1 | 63700
|
|
1 | 67310
|
|
1 | 155190
|
|
2 | 106170
|
|
3 | 4297
|
|
3 | 19036
|
|
3 | 29380
|
|
(10 rows)
|
|
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT l_orderkey, l_partkey
|
|
FROM (
|
|
SELECT l_orderkey, l_partkey
|
|
FROM lineitem_hash_part
|
|
) q
|
|
ORDER BY 1,2
|
|
LIMIT 10;
|
|
QUERY PLAN
|
|
----------------------------------------------------------------------------------------------------------
|
|
Limit
|
|
-> Sort
|
|
Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey
|
|
-> HashAggregate
|
|
Group Key: remote_scan.l_orderkey, remote_scan.l_partkey
|
|
-> Custom Scan (Citus Real-Time)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=57637 dbname=regression
|
|
-> Limit
|
|
-> Sort
|
|
Sort Key: l_orderkey, l_partkey
|
|
-> HashAggregate
|
|
Group Key: l_orderkey, l_partkey
|
|
-> Seq Scan on lineitem_hash_part_360290 lineitem_hash_part
|
|
(16 rows)
|
|
|
|
SELECT DISTINCT l_orderkey, cnt
|
|
FROM (
|
|
SELECT l_orderkey, count(*) as cnt
|
|
FROM lineitem_hash_part
|
|
GROUP BY 1
|
|
) q
|
|
ORDER BY 1,2
|
|
LIMIT 10;
|
|
l_orderkey | cnt
|
|
------------+-----
|
|
1 | 6
|
|
2 | 1
|
|
3 | 6
|
|
4 | 1
|
|
5 | 3
|
|
6 | 1
|
|
7 | 7
|
|
32 | 6
|
|
33 | 4
|
|
34 | 3
|
|
(10 rows)
|
|
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT l_orderkey, cnt
|
|
FROM (
|
|
SELECT l_orderkey, count(*) as cnt
|
|
FROM lineitem_hash_part
|
|
GROUP BY 1
|
|
) q
|
|
ORDER BY 1,2
|
|
LIMIT 10;
|
|
QUERY PLAN
|
|
----------------------------------------------------------------------------------------------------------------
|
|
Limit
|
|
-> Sort
|
|
Sort Key: remote_scan.l_orderkey, remote_scan.cnt
|
|
-> HashAggregate
|
|
Group Key: remote_scan.l_orderkey, remote_scan.cnt
|
|
-> Custom Scan (Citus Real-Time)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=57637 dbname=regression
|
|
-> Limit
|
|
-> Sort
|
|
Sort Key: lineitem_hash_part.l_orderkey, (count(*))
|
|
-> HashAggregate
|
|
Group Key: lineitem_hash_part.l_orderkey, count(*)
|
|
-> HashAggregate
|
|
Group Key: lineitem_hash_part.l_orderkey
|
|
-> Seq Scan on lineitem_hash_part_360290 lineitem_hash_part
|
|
(18 rows)
|
|
|
|
-- distinct on partition column
|
|
-- random() is added to inner query to prevent flattening
|
|
SELECT DISTINCT ON (l_orderkey) l_orderkey, l_partkey
|
|
FROM (
|
|
SELECT l_orderkey, l_partkey, (random()*10)::int + 2 as r
|
|
FROM lineitem_hash_part
|
|
) q
|
|
WHERE r > 1
|
|
ORDER BY 1,2
|
|
LIMIT 10;
|
|
l_orderkey | l_partkey
|
|
------------+-----------
|
|
1 | 2132
|
|
2 | 106170
|
|
3 | 4297
|
|
4 | 88035
|
|
5 | 37531
|
|
6 | 139636
|
|
7 | 79251
|
|
32 | 2743
|
|
33 | 33918
|
|
34 | 88362
|
|
(10 rows)
|
|
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT ON (l_orderkey) l_orderkey, l_partkey
|
|
FROM (
|
|
SELECT l_orderkey, l_partkey, (random()*10)::int + 2 as r
|
|
FROM lineitem_hash_part
|
|
) q
|
|
WHERE r > 1
|
|
ORDER BY 1,2
|
|
LIMIT 10;
|
|
QUERY PLAN
|
|
----------------------------------------------------------------------------------------------------------------
|
|
Limit
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey
|
|
-> Custom Scan (Citus Real-Time)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=57637 dbname=regression
|
|
-> Limit
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: q.l_orderkey, q.l_partkey
|
|
-> Subquery Scan on q
|
|
Filter: (q.r > 1)
|
|
-> Seq Scan on lineitem_hash_part_360290 lineitem_hash_part
|
|
(16 rows)
|
|
|
|
-- distinct on non-partition column
|
|
SELECT DISTINCT ON (l_partkey) l_orderkey, l_partkey
|
|
FROM (
|
|
SELECT l_orderkey, l_partkey, (random()*10)::int + 2 as r
|
|
FROM lineitem_hash_part
|
|
) q
|
|
WHERE r > 1
|
|
ORDER BY 2,1
|
|
LIMIT 10;
|
|
l_orderkey | l_partkey
|
|
------------+-----------
|
|
12005 | 18
|
|
5121 | 79
|
|
2883 | 91
|
|
807 | 149
|
|
4102 | 175
|
|
2117 | 179
|
|
548 | 182
|
|
2528 | 195
|
|
10048 | 204
|
|
9413 | 222
|
|
(10 rows)
|
|
|
|
EXPLAIN (COSTS FALSE)
|
|
SELECT DISTINCT ON (l_partkey) l_orderkey, l_partkey
|
|
FROM (
|
|
SELECT l_orderkey, l_partkey, (random()*10)::int + 2 as r
|
|
FROM lineitem_hash_part
|
|
) q
|
|
WHERE r > 1
|
|
ORDER BY 2,1
|
|
LIMIT 10;
|
|
QUERY PLAN
|
|
----------------------------------------------------------------------------------------------------------------
|
|
Limit
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: remote_scan.l_partkey, remote_scan.l_orderkey
|
|
-> Custom Scan (Citus Real-Time)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=57637 dbname=regression
|
|
-> Limit
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: q.l_partkey, q.l_orderkey
|
|
-> Subquery Scan on q
|
|
Filter: (q.r > 1)
|
|
-> Seq Scan on lineitem_hash_part_360290 lineitem_hash_part
|
|
(16 rows)
|
|
|