mirror of https://github.com/citusdata/citus.git
946 lines
34 KiB
Plaintext
946 lines
34 KiB
Plaintext
--
|
|
-- COMPLEX_COUNT_DISTINCT
|
|
--
|
|
SET citus.next_shard_id TO 240000;
|
|
CREATE TABLE lineitem_hash (
|
|
l_orderkey bigint not null,
|
|
l_partkey integer not null,
|
|
l_suppkey integer not null,
|
|
l_linenumber integer not null,
|
|
l_quantity decimal(15, 2) not null,
|
|
l_extendedprice decimal(15, 2) not null,
|
|
l_discount decimal(15, 2) not null,
|
|
l_tax decimal(15, 2) not null,
|
|
l_returnflag char(1) not null,
|
|
l_linestatus char(1) not null,
|
|
l_shipdate date not null,
|
|
l_commitdate date not null,
|
|
l_receiptdate date not null,
|
|
l_shipinstruct char(25) not null,
|
|
l_shipmode char(10) not null,
|
|
l_comment varchar(44) not null,
|
|
PRIMARY KEY(l_orderkey, l_linenumber) );
|
|
|
|
SELECT master_create_distributed_table('lineitem_hash', 'l_orderkey', 'hash');
|
|
master_create_distributed_table
|
|
---------------------------------
|
|
|
|
(1 row)
|
|
|
|
SELECT master_create_worker_shards('lineitem_hash', 8, 1);
|
|
master_create_worker_shards
|
|
-----------------------------
|
|
|
|
(1 row)
|
|
|
|
\copy lineitem_hash FROM '@abs_srcdir@/data/lineitem.1.data' with delimiter '|'
|
|
\copy lineitem_hash FROM '@abs_srcdir@/data/lineitem.2.data' with delimiter '|'
|
|
ANALYZE lineitem_hash;
|
|
SET citus.task_executor_type to "task-tracker";
|
|
-- count(distinct) is supported on top level query if there
|
|
-- is a grouping on the partition key
|
|
SELECT
|
|
l_orderkey, count(DISTINCT l_partkey)
|
|
FROM lineitem_hash
|
|
GROUP BY l_orderkey
|
|
ORDER BY 2 DESC, 1 DESC
|
|
LIMIT 10;
|
|
l_orderkey | count
|
|
------------+-------
|
|
14885 | 7
|
|
14884 | 7
|
|
14821 | 7
|
|
14790 | 7
|
|
14785 | 7
|
|
14755 | 7
|
|
14725 | 7
|
|
14694 | 7
|
|
14627 | 7
|
|
14624 | 7
|
|
(10 rows)
|
|
|
|
EXPLAIN (COSTS false, VERBOSE true)
|
|
SELECT
|
|
l_orderkey, count(DISTINCT l_partkey)
|
|
FROM lineitem_hash
|
|
GROUP BY l_orderkey
|
|
ORDER BY 2 DESC, 1 DESC
|
|
LIMIT 10;
|
|
QUERY PLAN
|
|
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
|
Limit
|
|
Output: remote_scan.l_orderkey, COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint)
|
|
-> Sort
|
|
Output: remote_scan.l_orderkey, COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint)
|
|
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint) DESC, remote_scan.l_orderkey DESC
|
|
-> HashAggregate
|
|
Output: remote_scan.l_orderkey, COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)
|
|
Group Key: remote_scan.l_orderkey
|
|
-> Custom Scan (Citus Task-Tracker)
|
|
Output: remote_scan.l_orderkey, remote_scan.count
|
|
Task Count: 8
|
|
Tasks Shown: One of 8
|
|
-> Task
|
|
Node: host=localhost port=57637 dbname=regression
|
|
-> Limit
|
|
Output: l_orderkey, (count(DISTINCT l_partkey))
|
|
-> Sort
|
|
Output: l_orderkey, (count(DISTINCT l_partkey))
|
|
Sort Key: (count(DISTINCT lineitem_hash.l_partkey)) DESC, lineitem_hash.l_orderkey DESC
|
|
-> GroupAggregate
|
|
Output: l_orderkey, count(DISTINCT l_partkey)
|
|
Group Key: lineitem_hash.l_orderkey
|
|
-> Index Scan Backward using lineitem_hash_pkey_240000 on public.lineitem_hash_240000 lineitem_hash
|
|
Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
|
|
(24 rows)
|
|
|
|
-- it is also supported if there is no grouping or grouping is on non-partition field
|
|
SELECT
|
|
count(DISTINCT l_partkey)
|
|
FROM lineitem_hash
|
|
ORDER BY 1 DESC
|
|
LIMIT 10;
|
|
count
|
|
-------
|
|
11661
|
|
(1 row)
|
|
|
|
EXPLAIN (COSTS false, VERBOSE true)
|
|
SELECT
|
|
count(DISTINCT l_partkey)
|
|
FROM lineitem_hash
|
|
ORDER BY 1 DESC
|
|
LIMIT 10;
|
|
QUERY PLAN
|
|
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
|
Limit
|
|
Output: count(DISTINCT (count(DISTINCT (count(DISTINCT remote_scan.count)))))
|
|
-> Sort
|
|
Output: count(DISTINCT (count(DISTINCT remote_scan.count)))
|
|
Sort Key: count(DISTINCT (count(DISTINCT remote_scan.count))) DESC
|
|
-> Aggregate
|
|
Output: count(DISTINCT remote_scan.count)
|
|
-> Custom Scan (Citus Task-Tracker)
|
|
Output: remote_scan.count
|
|
Task Count: 8
|
|
Tasks Shown: One of 8
|
|
-> Task
|
|
Node: host=localhost port=57637 dbname=regression
|
|
-> HashAggregate
|
|
Output: l_partkey
|
|
Group Key: lineitem_hash.l_partkey
|
|
-> Seq Scan on public.lineitem_hash_240000 lineitem_hash
|
|
Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
|
|
(18 rows)
|
|
|
|
SELECT
|
|
l_shipmode, count(DISTINCT l_partkey)
|
|
FROM lineitem_hash
|
|
GROUP BY l_shipmode
|
|
ORDER BY 2 DESC, 1 DESC
|
|
LIMIT 10;
|
|
l_shipmode | count
|
|
------------+-------
|
|
TRUCK | 1757
|
|
MAIL | 1730
|
|
AIR | 1702
|
|
FOB | 1700
|
|
RAIL | 1696
|
|
SHIP | 1684
|
|
REG AIR | 1676
|
|
(7 rows)
|
|
|
|
EXPLAIN (COSTS false, VERBOSE true)
|
|
SELECT
|
|
l_shipmode, count(DISTINCT l_partkey)
|
|
FROM lineitem_hash
|
|
GROUP BY l_shipmode
|
|
ORDER BY 2 DESC, 1 DESC
|
|
LIMIT 10;
|
|
QUERY PLAN
|
|
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
|
Limit
|
|
Output: remote_scan.l_shipmode, count(DISTINCT (count(DISTINCT (count(DISTINCT remote_scan.count)))))
|
|
-> Sort
|
|
Output: remote_scan.l_shipmode, count(DISTINCT (count(DISTINCT remote_scan.count)))
|
|
Sort Key: count(DISTINCT (count(DISTINCT remote_scan.count))) DESC, remote_scan.l_shipmode DESC
|
|
-> GroupAggregate
|
|
Output: remote_scan.l_shipmode, count(DISTINCT remote_scan.count)
|
|
Group Key: remote_scan.l_shipmode
|
|
-> Sort
|
|
Output: remote_scan.l_shipmode, remote_scan.count
|
|
Sort Key: remote_scan.l_shipmode DESC
|
|
-> Custom Scan (Citus Task-Tracker)
|
|
Output: remote_scan.l_shipmode, remote_scan.count
|
|
Task Count: 8
|
|
Tasks Shown: One of 8
|
|
-> Task
|
|
Node: host=localhost port=57637 dbname=regression
|
|
-> HashAggregate
|
|
Output: l_shipmode, l_partkey
|
|
Group Key: lineitem_hash.l_shipmode, lineitem_hash.l_partkey
|
|
-> Seq Scan on public.lineitem_hash_240000 lineitem_hash
|
|
Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
|
|
(22 rows)
|
|
|
|
-- mixed mode count distinct, grouped by partition column
|
|
SELECT
|
|
l_orderkey, count(distinct l_partkey), count(distinct l_shipmode)
|
|
FROM lineitem_hash
|
|
GROUP BY l_orderkey
|
|
ORDER BY 3 DESC, 2 DESC, 1
|
|
LIMIT 10;
|
|
l_orderkey | count | count
|
|
------------+-------+-------
|
|
226 | 7 | 7
|
|
1316 | 7 | 7
|
|
1477 | 7 | 7
|
|
3555 | 7 | 7
|
|
12258 | 7 | 7
|
|
12835 | 7 | 7
|
|
768 | 7 | 6
|
|
1121 | 7 | 6
|
|
1153 | 7 | 6
|
|
1281 | 7 | 6
|
|
(10 rows)
|
|
|
|
EXPLAIN (COSTS false, VERBOSE true)
|
|
SELECT
|
|
l_orderkey, count(distinct l_partkey), count(distinct l_shipmode)
|
|
FROM lineitem_hash
|
|
GROUP BY l_orderkey
|
|
ORDER BY 3 DESC, 2 DESC, 1
|
|
LIMIT 10;
|
|
QUERY PLAN
|
|
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
|
Limit
|
|
Output: remote_scan.l_orderkey, COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint), COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint)
|
|
-> Sort
|
|
Output: remote_scan.l_orderkey, COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint), COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint))))::bigint, '0'::bigint)
|
|
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint))))::bigint, '0'::bigint) DESC, COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint) DESC, remote_scan.l_orderkey
|
|
-> HashAggregate
|
|
Output: remote_scan.l_orderkey, COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint), COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint)
|
|
Group Key: remote_scan.l_orderkey
|
|
-> Custom Scan (Citus Task-Tracker)
|
|
Output: remote_scan.l_orderkey, remote_scan.count, remote_scan.count_1
|
|
Task Count: 8
|
|
Tasks Shown: One of 8
|
|
-> Task
|
|
Node: host=localhost port=57637 dbname=regression
|
|
-> Limit
|
|
Output: l_orderkey, (count(DISTINCT l_partkey)), (count(DISTINCT l_shipmode))
|
|
-> Sort
|
|
Output: l_orderkey, (count(DISTINCT l_partkey)), (count(DISTINCT l_shipmode))
|
|
Sort Key: (count(DISTINCT lineitem_hash.l_shipmode)) DESC, (count(DISTINCT lineitem_hash.l_partkey)) DESC, lineitem_hash.l_orderkey
|
|
-> GroupAggregate
|
|
Output: l_orderkey, count(DISTINCT l_partkey), count(DISTINCT l_shipmode)
|
|
Group Key: lineitem_hash.l_orderkey
|
|
-> Index Scan using lineitem_hash_pkey_240000 on public.lineitem_hash_240000 lineitem_hash
|
|
Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
|
|
(24 rows)
|
|
|
|
-- partition/non-partition column count distinct no grouping
|
|
SELECT
|
|
count(distinct l_orderkey), count(distinct l_partkey), count(distinct l_shipmode)
|
|
FROM lineitem_hash;
|
|
count | count | count
|
|
-------+-------+-------
|
|
2985 | 11661 | 7
|
|
(1 row)
|
|
|
|
EXPLAIN (COSTS false, VERBOSE true)
|
|
SELECT
|
|
count(distinct l_orderkey), count(distinct l_partkey), count(distinct l_shipmode)
|
|
FROM lineitem_hash;
|
|
QUERY PLAN
|
|
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
|
Aggregate
|
|
Output: count(DISTINCT remote_scan.count), count(DISTINCT remote_scan.count_1), count(DISTINCT remote_scan.count_2)
|
|
-> Custom Scan (Citus Task-Tracker)
|
|
Output: remote_scan.count, remote_scan.count_1, remote_scan.count_2
|
|
Task Count: 8
|
|
Tasks Shown: One of 8
|
|
-> Task
|
|
Node: host=localhost port=57637 dbname=regression
|
|
-> HashAggregate
|
|
Output: l_orderkey, l_partkey, l_shipmode
|
|
Group Key: lineitem_hash.l_orderkey, lineitem_hash.l_partkey, lineitem_hash.l_shipmode
|
|
-> Seq Scan on public.lineitem_hash_240000 lineitem_hash
|
|
Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
|
|
(13 rows)
|
|
|
|
-- distinct/non-distinct on partition and non-partition columns
|
|
SELECT
|
|
count(distinct l_orderkey), count(l_orderkey),
|
|
count(distinct l_partkey), count(l_partkey),
|
|
count(distinct l_shipmode), count(l_shipmode)
|
|
FROM lineitem_hash;
|
|
count | count | count | count | count | count
|
|
-------+-------+-------+-------+-------+-------
|
|
2985 | 12000 | 11661 | 12000 | 7 | 12000
|
|
(1 row)
|
|
|
|
-- mixed mode count distinct, grouped by non-partition column
|
|
SELECT
|
|
l_shipmode, count(distinct l_partkey), count(distinct l_orderkey)
|
|
FROM lineitem_hash
|
|
GROUP BY l_shipmode
|
|
ORDER BY 1, 2 DESC, 3 DESC;
|
|
l_shipmode | count | count
|
|
------------+-------+-------
|
|
AIR | 1702 | 1327
|
|
FOB | 1700 | 1276
|
|
MAIL | 1730 | 1299
|
|
RAIL | 1696 | 1265
|
|
REG AIR | 1676 | 1275
|
|
SHIP | 1684 | 1289
|
|
TRUCK | 1757 | 1333
|
|
(7 rows)
|
|
|
|
-- mixed mode count distinct, grouped by non-partition column
|
|
-- having on partition column
|
|
SELECT
|
|
l_shipmode, count(distinct l_partkey), count(distinct l_orderkey)
|
|
FROM lineitem_hash
|
|
GROUP BY l_shipmode
|
|
HAVING count(distinct l_orderkey) > 1300
|
|
ORDER BY 1, 2 DESC;
|
|
l_shipmode | count | count
|
|
------------+-------+-------
|
|
AIR | 1702 | 1327
|
|
TRUCK | 1757 | 1333
|
|
(2 rows)
|
|
|
|
-- same but having clause is not on target list
|
|
SELECT
|
|
l_shipmode, count(distinct l_partkey)
|
|
FROM lineitem_hash
|
|
GROUP BY l_shipmode
|
|
HAVING count(distinct l_orderkey) > 1300
|
|
ORDER BY 1, 2 DESC;
|
|
l_shipmode | count
|
|
------------+-------
|
|
AIR | 1702
|
|
TRUCK | 1757
|
|
(2 rows)
|
|
|
|
-- mixed mode count distinct, grouped by non-partition column
|
|
-- having on non-partition column
|
|
SELECT
|
|
l_shipmode, count(distinct l_partkey), count(distinct l_suppkey)
|
|
FROM lineitem_hash
|
|
GROUP BY l_shipmode
|
|
HAVING count(distinct l_suppkey) > 1550
|
|
ORDER BY 1, 2 DESC;
|
|
l_shipmode | count | count
|
|
------------+-------+-------
|
|
AIR | 1702 | 1564
|
|
FOB | 1700 | 1571
|
|
MAIL | 1730 | 1573
|
|
RAIL | 1696 | 1581
|
|
REG AIR | 1676 | 1557
|
|
SHIP | 1684 | 1554
|
|
TRUCK | 1757 | 1602
|
|
(7 rows)
|
|
|
|
-- same but having clause is not on target list
|
|
SELECT
|
|
l_shipmode, count(distinct l_partkey)
|
|
FROM lineitem_hash
|
|
GROUP BY l_shipmode
|
|
HAVING count(distinct l_suppkey) > 1550
|
|
ORDER BY 1, 2 DESC;
|
|
l_shipmode | count
|
|
------------+-------
|
|
AIR | 1702
|
|
FOB | 1700
|
|
MAIL | 1730
|
|
RAIL | 1696
|
|
REG AIR | 1676
|
|
SHIP | 1684
|
|
TRUCK | 1757
|
|
(7 rows)
|
|
|
|
EXPLAIN (COSTS false, VERBOSE true)
|
|
SELECT
|
|
l_shipmode, count(distinct l_partkey)
|
|
FROM lineitem_hash
|
|
GROUP BY l_shipmode
|
|
HAVING count(distinct l_suppkey) > 1550
|
|
ORDER BY 1, 2 DESC;
|
|
QUERY PLAN
|
|
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
|
Sort
|
|
Output: remote_scan.l_shipmode, count(DISTINCT (count(DISTINCT remote_scan.count)))
|
|
Sort Key: remote_scan.l_shipmode, count(DISTINCT (count(DISTINCT remote_scan.count))) DESC
|
|
-> GroupAggregate
|
|
Output: remote_scan.l_shipmode, count(DISTINCT remote_scan.count)
|
|
Group Key: remote_scan.l_shipmode
|
|
Filter: (count(DISTINCT remote_scan.worker_column_3) > 1550)
|
|
-> Sort
|
|
Output: remote_scan.l_shipmode, remote_scan.count, remote_scan.worker_column_3
|
|
Sort Key: remote_scan.l_shipmode
|
|
-> Custom Scan (Citus Task-Tracker)
|
|
Output: remote_scan.l_shipmode, remote_scan.count, remote_scan.worker_column_3
|
|
Task Count: 8
|
|
Tasks Shown: One of 8
|
|
-> Task
|
|
Node: host=localhost port=57637 dbname=regression
|
|
-> HashAggregate
|
|
Output: l_shipmode, l_partkey, l_suppkey
|
|
Group Key: lineitem_hash.l_shipmode, lineitem_hash.l_partkey, lineitem_hash.l_suppkey
|
|
-> Seq Scan on public.lineitem_hash_240000 lineitem_hash
|
|
Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
|
|
(21 rows)
|
|
|
|
-- count distinct is supported on single table subqueries
|
|
SELECT *
|
|
FROM (
|
|
SELECT
|
|
l_orderkey, count(DISTINCT l_partkey)
|
|
FROM lineitem_hash
|
|
GROUP BY l_orderkey) sub
|
|
ORDER BY 2 DESC, 1 DESC
|
|
LIMIT 10;
|
|
l_orderkey | count
|
|
------------+-------
|
|
14885 | 7
|
|
14884 | 7
|
|
14821 | 7
|
|
14790 | 7
|
|
14785 | 7
|
|
14755 | 7
|
|
14725 | 7
|
|
14694 | 7
|
|
14627 | 7
|
|
14624 | 7
|
|
(10 rows)
|
|
|
|
SELECT *
|
|
FROM (
|
|
SELECT
|
|
l_partkey, count(DISTINCT l_orderkey)
|
|
FROM lineitem_hash
|
|
GROUP BY l_partkey) sub
|
|
ORDER BY 2 DESC, 1 DESC
|
|
LIMIT 10;
|
|
l_partkey | count
|
|
-----------+-------
|
|
199146 | 3
|
|
188804 | 3
|
|
177771 | 3
|
|
160895 | 3
|
|
149926 | 3
|
|
136884 | 3
|
|
87761 | 3
|
|
15283 | 3
|
|
6983 | 3
|
|
1927 | 3
|
|
(10 rows)
|
|
|
|
EXPLAIN (COSTS false, VERBOSE true)
|
|
SELECT *
|
|
FROM (
|
|
SELECT
|
|
l_partkey, count(DISTINCT l_orderkey)
|
|
FROM lineitem_hash
|
|
GROUP BY l_partkey) sub
|
|
ORDER BY 2 DESC, 1 DESC
|
|
LIMIT 10;
|
|
QUERY PLAN
|
|
-------------------------------------------------------------------------
|
|
Limit
|
|
Output: remote_scan.l_partkey, remote_scan.count
|
|
-> Sort
|
|
Output: remote_scan.l_partkey, remote_scan.count
|
|
Sort Key: remote_scan.count DESC, remote_scan.l_partkey DESC
|
|
-> Custom Scan (Citus Task-Tracker)
|
|
Output: remote_scan.l_partkey, remote_scan.count
|
|
Task Count: 4
|
|
Tasks Shown: None, not supported for re-partition queries
|
|
-> MapMergeJob
|
|
Map Task Count: 8
|
|
Merge Task Count: 4
|
|
(12 rows)
|
|
|
|
-- count distinct with filters
|
|
SELECT
|
|
l_orderkey,
|
|
count(DISTINCT l_suppkey) FILTER (WHERE l_shipmode = 'AIR'),
|
|
count(DISTINCT l_suppkey)
|
|
FROM lineitem_hash
|
|
GROUP BY l_orderkey
|
|
ORDER BY 2 DESC, 3 DESC, 1
|
|
LIMIT 10;
|
|
l_orderkey | count | count
|
|
------------+-------+-------
|
|
4964 | 4 | 7
|
|
12005 | 4 | 7
|
|
5409 | 4 | 6
|
|
164 | 3 | 7
|
|
322 | 3 | 7
|
|
871 | 3 | 7
|
|
1156 | 3 | 7
|
|
1574 | 3 | 7
|
|
2054 | 3 | 7
|
|
2309 | 3 | 7
|
|
(10 rows)
|
|
|
|
EXPLAIN (COSTS false, VERBOSE true)
|
|
SELECT
|
|
l_orderkey,
|
|
count(DISTINCT l_suppkey) FILTER (WHERE l_shipmode = 'AIR'),
|
|
count(DISTINCT l_suppkey)
|
|
FROM lineitem_hash
|
|
GROUP BY l_orderkey
|
|
ORDER BY 2 DESC, 3 DESC, 1
|
|
LIMIT 10;
|
|
QUERY PLAN
|
|
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
|
Limit
|
|
Output: remote_scan.l_orderkey, COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint), COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint))))::bigint, '0'::bigint))))::bigint, '0'::bigint)
|
|
-> Sort
|
|
Output: remote_scan.l_orderkey, COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint), COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint))))::bigint, '0'::bigint)
|
|
Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint) DESC, COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint))))::bigint, '0'::bigint) DESC, remote_scan.l_orderkey
|
|
-> HashAggregate
|
|
Output: remote_scan.l_orderkey, COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint), COALESCE((pg_catalog.sum(remote_scan.count_1))::bigint, '0'::bigint)
|
|
Group Key: remote_scan.l_orderkey
|
|
-> Custom Scan (Citus Task-Tracker)
|
|
Output: remote_scan.l_orderkey, remote_scan.count, remote_scan.count_1
|
|
Task Count: 8
|
|
Tasks Shown: One of 8
|
|
-> Task
|
|
Node: host=localhost port=57637 dbname=regression
|
|
-> Limit
|
|
Output: l_orderkey, (count(DISTINCT l_suppkey) FILTER (WHERE (l_shipmode = 'AIR'::bpchar))), (count(DISTINCT l_suppkey))
|
|
-> Sort
|
|
Output: l_orderkey, (count(DISTINCT l_suppkey) FILTER (WHERE (l_shipmode = 'AIR'::bpchar))), (count(DISTINCT l_suppkey))
|
|
Sort Key: (count(DISTINCT lineitem_hash.l_suppkey) FILTER (WHERE (lineitem_hash.l_shipmode = 'AIR'::bpchar))) DESC, (count(DISTINCT lineitem_hash.l_suppkey)) DESC, lineitem_hash.l_orderkey
|
|
-> GroupAggregate
|
|
Output: l_orderkey, count(DISTINCT l_suppkey) FILTER (WHERE (l_shipmode = 'AIR'::bpchar)), count(DISTINCT l_suppkey)
|
|
Group Key: lineitem_hash.l_orderkey
|
|
-> Index Scan using lineitem_hash_pkey_240000 on public.lineitem_hash_240000 lineitem_hash
|
|
Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
|
|
(24 rows)
|
|
|
|
-- group by on non-partition column
|
|
SELECT
|
|
l_suppkey, count(DISTINCT l_partkey) FILTER (WHERE l_shipmode = 'AIR')
|
|
FROM lineitem_hash
|
|
GROUP BY l_suppkey
|
|
ORDER BY 2 DESC, 1 DESC
|
|
LIMIT 10;
|
|
l_suppkey | count
|
|
-----------+-------
|
|
7680 | 4
|
|
7703 | 3
|
|
7542 | 3
|
|
7072 | 3
|
|
6335 | 3
|
|
5873 | 3
|
|
1318 | 3
|
|
1042 | 3
|
|
160 | 3
|
|
9872 | 2
|
|
(10 rows)
|
|
|
|
-- explaining the same query fails
|
|
EXPLAIN (COSTS false, VERBOSE true)
|
|
SELECT
|
|
l_suppkey, count(DISTINCT l_partkey) FILTER (WHERE l_shipmode = 'AIR')
|
|
FROM lineitem_hash
|
|
GROUP BY l_suppkey
|
|
ORDER BY 2 DESC, 1 DESC
|
|
LIMIT 10;
|
|
ERROR: bogus varattno for OUTER_VAR var: 3
|
|
-- without group by, on partition column
|
|
SELECT
|
|
count(DISTINCT l_orderkey) FILTER (WHERE l_shipmode = 'AIR')
|
|
FROM lineitem_hash;
|
|
count
|
|
-------
|
|
1327
|
|
(1 row)
|
|
|
|
-- without group by, on non-partition column
|
|
SELECT
|
|
count(DISTINCT l_partkey) FILTER (WHERE l_shipmode = 'AIR')
|
|
FROM lineitem_hash;
|
|
count
|
|
-------
|
|
1702
|
|
(1 row)
|
|
|
|
SELECT
|
|
count(DISTINCT l_partkey) FILTER (WHERE l_shipmode = 'AIR'),
|
|
count(DISTINCT l_partkey),
|
|
count(DISTINCT l_shipdate)
|
|
FROM lineitem_hash;
|
|
count | count | count
|
|
-------+-------+-------
|
|
1702 | 11661 | 2470
|
|
(1 row)
|
|
|
|
-- filter column already exists in target list
|
|
SELECT *
|
|
FROM (
|
|
SELECT
|
|
l_orderkey, count(DISTINCT l_partkey) FILTER (WHERE l_orderkey > 100)
|
|
FROM lineitem_hash
|
|
GROUP BY l_orderkey) sub
|
|
ORDER BY 2 DESC, 1 DESC
|
|
LIMIT 10;
|
|
l_orderkey | count
|
|
------------+-------
|
|
14885 | 7
|
|
14884 | 7
|
|
14821 | 7
|
|
14790 | 7
|
|
14785 | 7
|
|
14755 | 7
|
|
14725 | 7
|
|
14694 | 7
|
|
14627 | 7
|
|
14624 | 7
|
|
(10 rows)
|
|
|
|
-- filter column does not exist in target list
|
|
SELECT *
|
|
FROM (
|
|
SELECT
|
|
l_orderkey, count(DISTINCT l_partkey) FILTER (WHERE l_shipmode = 'AIR')
|
|
FROM lineitem_hash
|
|
GROUP BY l_orderkey) sub
|
|
ORDER BY 2 DESC, 1 DESC
|
|
LIMIT 10;
|
|
l_orderkey | count
|
|
------------+-------
|
|
12005 | 4
|
|
5409 | 4
|
|
4964 | 4
|
|
14848 | 3
|
|
14496 | 3
|
|
13473 | 3
|
|
13122 | 3
|
|
12929 | 3
|
|
12645 | 3
|
|
12417 | 3
|
|
(10 rows)
|
|
|
|
-- case expr in count distinct is supported.
|
|
-- count orders partkeys if l_shipmode is air
|
|
SELECT *
|
|
FROM (
|
|
SELECT
|
|
l_orderkey, count(DISTINCT CASE WHEN l_shipmode = 'AIR' THEN l_partkey ELSE NULL END) as count
|
|
FROM lineitem_hash
|
|
GROUP BY l_orderkey) sub
|
|
WHERE count > 0
|
|
ORDER BY 2 DESC, 1 DESC
|
|
LIMIT 10;
|
|
l_orderkey | count
|
|
------------+-------
|
|
12005 | 4
|
|
5409 | 4
|
|
4964 | 4
|
|
14848 | 3
|
|
14496 | 3
|
|
13473 | 3
|
|
13122 | 3
|
|
12929 | 3
|
|
12645 | 3
|
|
12417 | 3
|
|
(10 rows)
|
|
|
|
-- text like operator is also supported
|
|
SELECT *
|
|
FROM (
|
|
SELECT
|
|
l_orderkey, count(DISTINCT CASE WHEN l_shipmode like '%A%' THEN l_partkey ELSE NULL END) as count
|
|
FROM lineitem_hash
|
|
GROUP BY l_orderkey) sub
|
|
WHERE count > 0
|
|
ORDER BY 2 DESC, 1 DESC
|
|
LIMIT 10;
|
|
l_orderkey | count
|
|
------------+-------
|
|
14275 | 7
|
|
14181 | 7
|
|
13605 | 7
|
|
12707 | 7
|
|
12384 | 7
|
|
11746 | 7
|
|
10727 | 7
|
|
10467 | 7
|
|
5636 | 7
|
|
4614 | 7
|
|
(10 rows)
|
|
|
|
-- count distinct is rejected if it does not reference any columns
|
|
SELECT *
|
|
FROM (
|
|
SELECT
|
|
l_linenumber, count(DISTINCT 1)
|
|
FROM lineitem_hash
|
|
GROUP BY l_linenumber) sub
|
|
ORDER BY 2 DESC, 1 DESC
|
|
LIMIT 10;
|
|
ERROR: cannot compute aggregate (distinct)
|
|
DETAIL: aggregate (distinct) with no columns is unsupported
|
|
HINT: You can load the hll extension from contrib packages and enable distinct approximations.
|
|
-- count distinct is rejected if it does not reference any columns
|
|
SELECT *
|
|
FROM (
|
|
SELECT
|
|
l_linenumber, count(DISTINCT (random() * 5)::int)
|
|
FROM lineitem_hash
|
|
GROUP BY l_linenumber) sub
|
|
ORDER BY 2 DESC, 1 DESC
|
|
LIMIT 10;
|
|
ERROR: cannot compute aggregate (distinct)
|
|
DETAIL: aggregate (distinct) with no columns is unsupported
|
|
HINT: You can load the hll extension from contrib packages and enable distinct approximations.
|
|
-- even non-const function calls are supported within count distinct
|
|
SELECT *
|
|
FROM (
|
|
SELECT
|
|
l_orderkey, count(DISTINCT (random() * 5)::int = l_linenumber)
|
|
FROM lineitem_hash
|
|
GROUP BY l_orderkey) sub
|
|
ORDER BY 2 DESC, 1 DESC
|
|
LIMIT 0;
|
|
l_orderkey | count
|
|
------------+-------
|
|
(0 rows)
|
|
|
|
-- multiple nested subquery
|
|
SELECT
|
|
total,
|
|
avg(avg_count) as total_avg_count
|
|
FROM (
|
|
SELECT
|
|
number_sum,
|
|
count(DISTINCT l_suppkey) as total,
|
|
avg(total_count) avg_count
|
|
FROM (
|
|
SELECT
|
|
l_suppkey,
|
|
sum(l_linenumber) as number_sum,
|
|
count(DISTINCT l_shipmode) as total_count
|
|
FROM
|
|
lineitem_hash
|
|
WHERE
|
|
l_partkey > 100 and
|
|
l_quantity > 2 and
|
|
l_orderkey < 10000
|
|
GROUP BY
|
|
l_suppkey) as distributed_table
|
|
WHERE
|
|
number_sum >= 10
|
|
GROUP BY
|
|
number_sum) as distributed_table_2
|
|
GROUP BY
|
|
total
|
|
ORDER BY
|
|
total_avg_count DESC;
|
|
total | total_avg_count
|
|
-------+--------------------
|
|
1 | 3.6000000000000000
|
|
6 | 2.8333333333333333
|
|
10 | 2.6000000000000000
|
|
27 | 2.5555555555555556
|
|
32 | 2.4687500000000000
|
|
77 | 2.1948051948051948
|
|
57 | 2.1754385964912281
|
|
(7 rows)
|
|
|
|
-- multiple cases query
|
|
SELECT *
|
|
FROM (
|
|
SELECT
|
|
count(DISTINCT
|
|
CASE
|
|
WHEN l_shipmode = 'TRUCK' THEN l_partkey
|
|
WHEN l_shipmode = 'AIR' THEN l_quantity
|
|
WHEN l_shipmode = 'SHIP' THEN l_discount
|
|
ELSE l_suppkey
|
|
END) as count,
|
|
l_shipdate
|
|
FROM
|
|
lineitem_hash
|
|
GROUP BY
|
|
l_shipdate) sub
|
|
WHERE
|
|
count > 0
|
|
ORDER BY
|
|
1 DESC, 2 DESC
|
|
LIMIT 10;
|
|
count | l_shipdate
|
|
-------+------------
|
|
14 | 07-30-1997
|
|
13 | 05-26-1998
|
|
13 | 08-08-1997
|
|
13 | 11-17-1995
|
|
13 | 01-09-1993
|
|
12 | 01-15-1998
|
|
12 | 10-15-1997
|
|
12 | 09-07-1997
|
|
12 | 06-02-1997
|
|
12 | 03-14-1997
|
|
(10 rows)
|
|
|
|
-- count DISTINCT expression
|
|
SELECT *
|
|
FROM (
|
|
SELECT
|
|
l_quantity, count(DISTINCT ((l_orderkey / 1000) * 1000 )) as count
|
|
FROM
|
|
lineitem_hash
|
|
GROUP BY
|
|
l_quantity) sub
|
|
WHERE
|
|
count > 0
|
|
ORDER BY
|
|
2 DESC, 1 DESC
|
|
LIMIT 10;
|
|
l_quantity | count
|
|
------------+-------
|
|
48.00 | 13
|
|
47.00 | 13
|
|
37.00 | 13
|
|
33.00 | 13
|
|
26.00 | 13
|
|
25.00 | 13
|
|
23.00 | 13
|
|
21.00 | 13
|
|
15.00 | 13
|
|
12.00 | 13
|
|
(10 rows)
|
|
|
|
-- count DISTINCT is part of an expression which inclues another aggregate
|
|
SELECT *
|
|
FROM (
|
|
SELECT
|
|
sum(((l_partkey * l_tax) / 100)) /
|
|
count(DISTINCT
|
|
CASE
|
|
WHEN l_shipmode = 'TRUCK' THEN l_partkey
|
|
ELSE l_suppkey
|
|
END) as avg,
|
|
l_shipmode
|
|
FROM
|
|
lineitem_hash
|
|
GROUP BY
|
|
l_shipmode) sub
|
|
ORDER BY
|
|
1 DESC, 2 DESC
|
|
LIMIT 10;
|
|
avg | l_shipmode
|
|
-------------------------+------------
|
|
44.82904609027336300064 | MAIL
|
|
44.80704536679536679537 | SHIP
|
|
44.68891732736572890026 | AIR
|
|
44.34106724470134874759 | REG AIR
|
|
43.12739987269255251432 | FOB
|
|
43.07299253636938646426 | RAIL
|
|
40.50298377916903813318 | TRUCK
|
|
(7 rows)
|
|
|
|
--- count DISTINCT CASE WHEN expression
|
|
SELECT *
|
|
FROM (
|
|
SELECT
|
|
count(DISTINCT
|
|
CASE
|
|
WHEN l_shipmode = 'TRUCK' THEN l_linenumber
|
|
WHEN l_shipmode = 'AIR' THEN l_linenumber + 10
|
|
ELSE 2
|
|
END) as avg
|
|
FROM
|
|
lineitem_hash
|
|
GROUP BY l_shipdate) sub
|
|
ORDER BY 1 DESC
|
|
LIMIT 10;
|
|
avg
|
|
-----
|
|
7
|
|
6
|
|
6
|
|
6
|
|
6
|
|
6
|
|
6
|
|
6
|
|
5
|
|
5
|
|
(10 rows)
|
|
|
|
-- COUNT DISTINCT (c1, c2)
|
|
SELECT *
|
|
FROM
|
|
(SELECT
|
|
l_shipmode,
|
|
count(DISTINCT (l_shipdate, l_tax))
|
|
FROM
|
|
lineitem_hash
|
|
GROUP BY
|
|
l_shipmode) t
|
|
ORDER BY
|
|
2 DESC,1 DESC
|
|
LIMIT 10;
|
|
l_shipmode | count
|
|
------------+-------
|
|
TRUCK | 1689
|
|
MAIL | 1683
|
|
FOB | 1655
|
|
AIR | 1650
|
|
SHIP | 1644
|
|
RAIL | 1636
|
|
REG AIR | 1607
|
|
(7 rows)
|
|
|
|
-- other distinct aggregate are not supported
|
|
SELECT *
|
|
FROM (
|
|
SELECT
|
|
l_linenumber, sum(DISTINCT l_partkey)
|
|
FROM lineitem_hash
|
|
GROUP BY l_linenumber) sub
|
|
ORDER BY 2 DESC, 1 DESC
|
|
LIMIT 10;
|
|
ERROR: cannot compute aggregate (distinct)
|
|
DETAIL: Only count(distinct) aggregate is supported in subqueries
|
|
SELECT *
|
|
FROM (
|
|
SELECT
|
|
l_linenumber, avg(DISTINCT l_partkey)
|
|
FROM lineitem_hash
|
|
GROUP BY l_linenumber) sub
|
|
ORDER BY 2 DESC, 1 DESC
|
|
LIMIT 10;
|
|
ERROR: cannot compute aggregate (distinct)
|
|
DETAIL: Only count(distinct) aggregate is supported in subqueries
|
|
-- whole row references, oid, and ctid are not supported in count distinct
|
|
-- test table does not have oid or ctid enabled, so tests for them are skipped
|
|
SELECT *
|
|
FROM (
|
|
SELECT
|
|
l_linenumber, count(DISTINCT lineitem_hash)
|
|
FROM lineitem_hash
|
|
GROUP BY l_linenumber) sub
|
|
ORDER BY 2 DESC, 1 DESC
|
|
LIMIT 10;
|
|
ERROR: cannot compute count (distinct)
|
|
DETAIL: Non-column references are not supported yet
|
|
SELECT *
|
|
FROM (
|
|
SELECT
|
|
l_linenumber, count(DISTINCT lineitem_hash.*)
|
|
FROM lineitem_hash
|
|
GROUP BY l_linenumber) sub
|
|
ORDER BY 2 DESC, 1 DESC
|
|
LIMIT 10;
|
|
ERROR: cannot compute count (distinct)
|
|
DETAIL: Non-column references are not supported yet
|
|
DROP TABLE lineitem_hash;
|