From 64db74c051abdfb8ea16ff7ee791efe7cb51066c Mon Sep 17 00:00:00 2001 From: Hanefi Onaldi Date: Tue, 4 Oct 2022 13:02:46 +0300 Subject: [PATCH] Remove references to optimization PG15 reverted PG15 introduced an optimization on GROUP BY keys that is now reverted on RC2. Relevant PG commit: Revert "Optimize order of GROUP BY keys". 443df6e2db932a7cd6d85ddfb67e11a43345130d (cherry picked from commit cbe4298c5b080c84bce7fea8e1f0228e77224894) --- src/test/regress/expected/cte_inline.out | 39 - src/test/regress/expected/cte_inline_0.out | 39 - src/test/regress/expected/multi_explain.out | 23 - .../expected/multi_select_distinct.out | 40 +- .../expected/multi_select_distinct_0.out | 1548 ----------------- src/test/regress/sql/cte_inline.sql | 20 - src/test/regress/sql/multi_explain.sql | 20 - .../regress/sql/multi_select_distinct.sql | 3 - 8 files changed, 15 insertions(+), 1717 deletions(-) delete mode 100644 src/test/regress/expected/multi_select_distinct_0.out diff --git a/src/test/regress/expected/cte_inline.out b/src/test/regress/expected/cte_inline.out index 1e1f410c3..7e46227d2 100644 --- a/src/test/regress/expected/cte_inline.out +++ b/src/test/regress/expected/cte_inline.out @@ -463,29 +463,6 @@ DEBUG: Creating router plan (5 rows) \set VERBOSITY default --- enable_group_by_reordering is a new GUC introduced in PG15 --- it does some optimization of the order of group by keys which results --- in a different explain output plan between PG13/14 and PG15 --- Hence we set that GUC to off. -SHOW server_version \gset -SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 -\gset -\if :server_version_ge_15 -SET enable_group_by_reordering TO off; -\endif -SELECT DISTINCT 1 FROM run_command_on_workers($$ALTER SYSTEM SET enable_group_by_reordering TO off;$$); - ?column? ---------------------------------------------------------------------- - 1 -(1 row) - -SELECT run_command_on_workers($$SELECT pg_reload_conf()$$); - run_command_on_workers ---------------------------------------------------------------------- - (localhost,57637,t,t) - (localhost,57638,t,t) -(2 rows) - EXPLAIN (COSTS OFF) WITH cte_1 AS NOT MATERIALIZED (SELECT * FROM test_table) SELECT count(*) @@ -524,22 +501,6 @@ DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823] -> Seq Scan on test_table_1960000 test_table_1 (12 rows) -\if :server_version_ge_15 -RESET enable_group_by_reordering; -\endif -SELECT DISTINCT 1 FROM run_command_on_workers($$ALTER SYSTEM RESET enable_group_by_reordering;$$); - ?column? ---------------------------------------------------------------------- - 1 -(1 row) - -SELECT run_command_on_workers($$SELECT pg_reload_conf()$$); - run_command_on_workers ---------------------------------------------------------------------- - (localhost,57637,t,t) - (localhost,57638,t,t) -(2 rows) - -- ctes with volatile functions are not -- inlined WITH cte_1 AS (SELECT *, random() FROM test_table) diff --git a/src/test/regress/expected/cte_inline_0.out b/src/test/regress/expected/cte_inline_0.out index ebaa172bb..ab2b91791 100644 --- a/src/test/regress/expected/cte_inline_0.out +++ b/src/test/regress/expected/cte_inline_0.out @@ -463,29 +463,6 @@ DEBUG: Creating router plan (5 rows) \set VERBOSITY default --- enable_group_by_reordering is a new GUC introduced in PG15 --- it does some optimization of the order of group by keys which results --- in a different explain output plan between PG13/14 and PG15 --- Hence we set that GUC to off. -SHOW server_version \gset -SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 -\gset -\if :server_version_ge_15 -SET enable_group_by_reordering TO off; -\endif -SELECT DISTINCT 1 FROM run_command_on_workers($$ALTER SYSTEM SET enable_group_by_reordering TO off;$$); - ?column? ---------------------------------------------------------------------- - 1 -(1 row) - -SELECT run_command_on_workers($$SELECT pg_reload_conf()$$); - run_command_on_workers ---------------------------------------------------------------------- - (localhost,57637,t,t) - (localhost,57638,t,t) -(2 rows) - EXPLAIN (COSTS OFF) WITH cte_1 AS NOT MATERIALIZED (SELECT * FROM test_table) SELECT count(*) @@ -524,22 +501,6 @@ DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823] -> Seq Scan on test_table_1960000 test_table_1 (12 rows) -\if :server_version_ge_15 -RESET enable_group_by_reordering; -\endif -SELECT DISTINCT 1 FROM run_command_on_workers($$ALTER SYSTEM RESET enable_group_by_reordering;$$); - ?column? ---------------------------------------------------------------------- - 1 -(1 row) - -SELECT run_command_on_workers($$SELECT pg_reload_conf()$$); - run_command_on_workers ---------------------------------------------------------------------- - (localhost,57637,t,t) - (localhost,57638,t,t) -(2 rows) - -- ctes with volatile functions are not -- inlined WITH cte_1 AS (SELECT *, random() FROM test_table) diff --git a/src/test/regress/expected/multi_explain.out b/src/test/regress/expected/multi_explain.out index 1c985ecce..eee7a6236 100644 --- a/src/test/regress/expected/multi_explain.out +++ b/src/test/regress/expected/multi_explain.out @@ -636,21 +636,6 @@ Aggregate -> Seq Scan on events_1400285 events Filter: ((event_type)::text = ANY ('{click,submit,pay}'::text[])) -- Union and left join subquery pushdown --- enable_group_by_reordering is a new GUC introduced in PG15 --- it does some optimization of the order of group by keys which results --- in a different explain output plan between PG13/14 and PG15 --- Hence we set that GUC to off. -SHOW server_version \gset -SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 -\gset -\if :server_version_ge_15 -SET enable_group_by_reordering TO off; -\endif -SELECT DISTINCT 1 FROM run_command_on_workers($$ALTER SYSTEM SET enable_group_by_reordering TO off;$$); -1 -SELECT run_command_on_workers($$SELECT pg_reload_conf()$$); -(localhost,57637,t,t) -(localhost,57638,t,t) EXPLAIN (COSTS OFF) SELECT avg(array_length(events, 1)) AS event_average, @@ -873,14 +858,6 @@ Sort Sort Key: events_2.composite_id -> Seq Scan on events_1400285 events_2 Filter: ((composite_id >= '(1,-9223372036854775808)'::user_composite_type) AND (composite_id <= '(1,9223372036854775807)'::user_composite_type) AND ((event_type)::text = 'pay'::text)) -\if :server_version_ge_15 -RESET enable_group_by_reordering; -\endif -SELECT DISTINCT 1 FROM run_command_on_workers($$ALTER SYSTEM RESET enable_group_by_reordering;$$); -1 -SELECT run_command_on_workers($$SELECT pg_reload_conf()$$); -(localhost,57637,t,t) -(localhost,57638,t,t) -- Lateral join subquery pushdown -- set subquery_pushdown due to limit in the query SET citus.subquery_pushdown to ON; diff --git a/src/test/regress/expected/multi_select_distinct.out b/src/test/regress/expected/multi_select_distinct.out index d281ad4b4..75d47026b 100644 --- a/src/test/regress/expected/multi_select_distinct.out +++ b/src/test/regress/expected/multi_select_distinct.out @@ -3,13 +3,6 @@ -- -- Tests select distinct, and select distinct on features. -- -SHOW server_version \gset -SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15; - server_version_ge_15 ---------------------------------------------------------------------- - t -(1 row) - ANALYZE lineitem_hash_part; -- function calls are supported SELECT DISTINCT l_orderkey, now() FROM lineitem_hash_part LIMIT 0; @@ -443,13 +436,12 @@ EXPLAIN (COSTS FALSE) GROUP BY l_suppkey, l_linenumber ORDER BY 1,2 LIMIT 10; - QUERY PLAN + QUERY PLAN --------------------------------------------------------------------- Limit - -> Sort - Sort Key: remote_scan.l_suppkey, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1))) - -> HashAggregate - Group Key: remote_scan.l_suppkey, (pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)) + -> Unique + -> Sort + Sort Key: remote_scan.l_suppkey, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1))) -> HashAggregate Group Key: remote_scan.l_suppkey, remote_scan.worker_column_4 -> Custom Scan (Citus Adaptive) @@ -460,7 +452,7 @@ EXPLAIN (COSTS FALSE) -> HashAggregate Group Key: l_suppkey, l_linenumber -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(15 rows) +(14 rows) -- check the plan if the hash aggreate is disabled. This explain errors out due -- to a bug right now, expectation must be corrected after fixing it. @@ -595,13 +587,12 @@ EXPLAIN (COSTS FALSE) GROUP BY l_suppkey, l_linenumber ORDER BY 1 LIMIT 10; - QUERY PLAN + QUERY PLAN --------------------------------------------------------------------- Limit - -> Sort - Sort Key: ((sum(remote_scan.avg) / (pg_catalog.sum(remote_scan.avg_1))::double precision)) - -> HashAggregate - Group Key: (sum(remote_scan.avg) / (pg_catalog.sum(remote_scan.avg_1))::double precision) + -> Unique + -> Sort + Sort Key: ((sum(remote_scan.avg) / (pg_catalog.sum(remote_scan.avg_1))::double precision)) -> HashAggregate Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4 -> Custom Scan (Citus Adaptive) @@ -612,7 +603,7 @@ EXPLAIN (COSTS FALSE) -> HashAggregate Group Key: l_suppkey, l_linenumber -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(15 rows) +(14 rows) -- check the plan if the hash aggreate is disabled. This explain errors out due -- to a bug right now, expectation must be corrected after fixing it. @@ -671,13 +662,12 @@ EXPLAIN (COSTS FALSE) GROUP BY l_suppkey, l_linenumber ORDER BY 1 LIMIT 10; - QUERY PLAN + QUERY PLAN --------------------------------------------------------------------- Limit - -> Sort - Sort Key: (((pg_catalog.sum(remote_scan.dis))::bigint + COALESCE((pg_catalog.sum(remote_scan.dis_1))::bigint, '0'::bigint))) - -> HashAggregate - Group Key: ((pg_catalog.sum(remote_scan.dis))::bigint + COALESCE((pg_catalog.sum(remote_scan.dis_1))::bigint, '0'::bigint)) + -> Unique + -> Sort + Sort Key: (((pg_catalog.sum(remote_scan.dis))::bigint + COALESCE((pg_catalog.sum(remote_scan.dis_1))::bigint, '0'::bigint))) -> HashAggregate Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4 -> Custom Scan (Citus Adaptive) @@ -688,7 +678,7 @@ EXPLAIN (COSTS FALSE) -> HashAggregate Group Key: l_suppkey, l_linenumber -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(15 rows) +(14 rows) -- check the plan if the hash aggreate is disabled. This explain errors out due -- to a bug right now, expectation must be corrected after fixing it. diff --git a/src/test/regress/expected/multi_select_distinct_0.out b/src/test/regress/expected/multi_select_distinct_0.out deleted file mode 100644 index 69e90b7a0..000000000 --- a/src/test/regress/expected/multi_select_distinct_0.out +++ /dev/null @@ -1,1548 +0,0 @@ --- --- MULTI_SELECT_DISTINCT --- --- Tests select distinct, and select distinct on features. --- -SHOW server_version \gset -SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15; - server_version_ge_15 ---------------------------------------------------------------------- - f -(1 row) - -ANALYZE lineitem_hash_part; --- function calls are supported -SELECT DISTINCT l_orderkey, now() FROM lineitem_hash_part LIMIT 0; - l_orderkey | now ---------------------------------------------------------------------- -(0 rows) - -SELECT DISTINCT l_orderkey, avg(l_linenumber) -FROM lineitem_hash_part -GROUP BY l_orderkey -HAVING avg(l_linenumber) = (select avg(distinct l_linenumber)) -LIMIT 10; -ERROR: Subqueries in HAVING cannot refer to outer query -SELECT DISTINCT l_orderkey -FROM lineitem_hash_part -GROUP BY l_orderkey -HAVING (select avg(distinct l_linenumber) = l_orderkey) -LIMIT 10; -ERROR: Subqueries in HAVING cannot refer to outer query -SELECT DISTINCT l_partkey, 1 + (random() * 0)::int FROM lineitem_hash_part ORDER BY 1 DESC LIMIT 3; - l_partkey | ?column? ---------------------------------------------------------------------- - 199973 | 1 - 199946 | 1 - 199943 | 1 -(3 rows) - --- const expressions are supported -SELECT DISTINCT l_orderkey, 1+1 FROM lineitem_hash_part ORDER BY 1 LIMIT 5; - l_orderkey | ?column? ---------------------------------------------------------------------- - 1 | 2 - 2 | 2 - 3 | 2 - 4 | 2 - 5 | 2 -(5 rows) - --- non const expressions are also supported -SELECT DISTINCT l_orderkey, l_partkey + 1 FROM lineitem_hash_part ORDER BY 1, 2 LIMIT 5; - l_orderkey | ?column? ---------------------------------------------------------------------- - 1 | 2133 - 1 | 15636 - 1 | 24028 - 1 | 63701 - 1 | 67311 -(5 rows) - --- column expressions are supported -SELECT DISTINCT l_orderkey, l_shipinstruct || l_shipmode FROM lineitem_hash_part ORDER BY 2 , 1 LIMIT 5; - l_orderkey | ?column? ---------------------------------------------------------------------- - 32 | COLLECT CODAIR - 39 | COLLECT CODAIR - 66 | COLLECT CODAIR - 70 | COLLECT CODAIR - 98 | COLLECT CODAIR -(5 rows) - --- function calls with const input are supported -SELECT DISTINCT l_orderkey, strpos('AIR', 'A') FROM lineitem_hash_part ORDER BY 1,2 LIMIT 5; - l_orderkey | strpos ---------------------------------------------------------------------- - 1 | 1 - 2 | 1 - 3 | 1 - 4 | 1 - 5 | 1 -(5 rows) - --- function calls with non-const input are supported -SELECT DISTINCT l_orderkey, strpos(l_shipmode, 'I') - FROM lineitem_hash_part - WHERE strpos(l_shipmode, 'I') > 1 - ORDER BY 2, 1 - LIMIT 5; - l_orderkey | strpos ---------------------------------------------------------------------- - 1 | 2 - 3 | 2 - 5 | 2 - 32 | 2 - 33 | 2 -(5 rows) - --- row types are supported -SELECT DISTINCT (l_orderkey, l_partkey) AS pair FROM lineitem_hash_part ORDER BY 1 LIMIT 5; - pair ---------------------------------------------------------------------- - (1,2132) - (1,15635) - (1,24027) - (1,63700) - (1,67310) -(5 rows) - --- distinct on partition column --- verify counts match with respect to count(distinct) -CREATE TEMP TABLE temp_orderkeys AS SELECT DISTINCT l_orderkey FROM lineitem_hash_part; -SELECT COUNT(*) FROM temp_orderkeys; - count ---------------------------------------------------------------------- - 2985 -(1 row) - -SELECT COUNT(DISTINCT l_orderkey) FROM lineitem_hash_part; - count ---------------------------------------------------------------------- - 2985 -(1 row) - -SELECT DISTINCT l_orderkey FROM lineitem_hash_part WHERE l_orderkey < 500 and l_partkey < 5000 order by 1; - l_orderkey ---------------------------------------------------------------------- - 1 - 3 - 32 - 35 - 39 - 65 - 129 - 130 - 134 - 164 - 194 - 228 - 261 - 290 - 320 - 321 - 354 - 418 -(18 rows) - --- distinct on non-partition column -SELECT DISTINCT l_partkey FROM lineitem_hash_part WHERE l_orderkey > 5 and l_orderkey < 20 order by 1; - l_partkey ---------------------------------------------------------------------- - 79251 - 94780 - 139636 - 145243 - 151894 - 157238 - 163073 - 182052 -(8 rows) - -SELECT DISTINCT l_shipmode FROM lineitem_hash_part ORDER BY 1 DESC; - l_shipmode ---------------------------------------------------------------------- - TRUCK - SHIP - REG AIR - RAIL - MAIL - FOB - AIR -(7 rows) - --- distinct with multiple columns -SELECT DISTINCT l_orderkey, o_orderdate - FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey) - WHERE l_orderkey < 10 - ORDER BY l_orderkey; - l_orderkey | o_orderdate ---------------------------------------------------------------------- - 1 | 01-02-1996 - 2 | 12-01-1996 - 3 | 10-14-1993 - 4 | 10-11-1995 - 5 | 07-30-1994 - 6 | 02-21-1992 - 7 | 01-10-1996 -(7 rows) - --- distinct on partition column with aggregate --- this is the same as the one without distinct due to group by -SELECT DISTINCT l_orderkey, count(*) - FROM lineitem_hash_part - WHERE l_orderkey < 200 - GROUP BY 1 - HAVING count(*) > 5 - ORDER BY 2 DESC, 1; - l_orderkey | count ---------------------------------------------------------------------- - 7 | 7 - 68 | 7 - 129 | 7 - 164 | 7 - 194 | 7 - 1 | 6 - 3 | 6 - 32 | 6 - 35 | 6 - 39 | 6 - 67 | 6 - 69 | 6 - 70 | 6 - 71 | 6 - 134 | 6 - 135 | 6 - 163 | 6 - 192 | 6 - 197 | 6 -(19 rows) - --- explain the query to see actual plan -EXPLAIN (COSTS FALSE) - SELECT DISTINCT l_orderkey, count(*) - FROM lineitem_hash_part - WHERE l_orderkey < 200 - GROUP BY 1 - HAVING count(*) > 5 - ORDER BY 2 DESC, 1; - QUERY PLAN ---------------------------------------------------------------------- - Sort - Sort Key: remote_scan.count DESC, remote_scan.l_orderkey - -> HashAggregate - Group Key: remote_scan.count, remote_scan.l_orderkey - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_orderkey - Filter: (count(*) > 5) - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part - Filter: (l_orderkey < 200) -(14 rows) - --- check the plan if the hash aggreate is disabled -SET enable_hashagg TO off; -EXPLAIN (COSTS FALSE) - SELECT DISTINCT l_orderkey, count(*) - FROM lineitem_hash_part - WHERE l_orderkey < 200 - GROUP BY 1 - HAVING count(*) > 5 - ORDER BY 2 DESC, 1; - QUERY PLAN ---------------------------------------------------------------------- - Unique - -> Sort - Sort Key: remote_scan.count DESC, remote_scan.l_orderkey - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_orderkey - Filter: (count(*) > 5) - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part - Filter: (l_orderkey < 200) -(13 rows) - -SET enable_hashagg TO on; --- distinct on aggregate of group by columns, we try to check whether we handle --- queries which does not have any group by column in distinct columns properly. -SELECT DISTINCT count(*) - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY 1; - count ---------------------------------------------------------------------- - 1 - 2 - 3 - 4 -(4 rows) - --- explain the query to see actual plan. We expect to see Aggregate node having --- group by key on count(*) column, since columns in the Group By doesn't guarantee --- the uniqueness of the result. -EXPLAIN (COSTS FALSE) - SELECT DISTINCT count(*) - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY 1; - QUERY PLAN ---------------------------------------------------------------------- - Unique - -> Sort - Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)) - -> HashAggregate - Group Key: remote_scan.worker_column_2, remote_scan.worker_column_3 - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_suppkey, l_linenumber - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(13 rows) - --- check the plan if the hash aggreate is disabled. We expect to see sort+unique --- instead of aggregate plan node to handle distinct. -SET enable_hashagg TO off; -EXPLAIN (COSTS FALSE) - SELECT DISTINCT count(*) - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY 1; - QUERY PLAN ---------------------------------------------------------------------- - Unique - -> Sort - Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)) - -> GroupAggregate - Group Key: remote_scan.worker_column_2, remote_scan.worker_column_3 - -> Sort - Sort Key: remote_scan.worker_column_2, remote_scan.worker_column_3 - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_suppkey, l_linenumber - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(15 rows) - -SET enable_hashagg TO on; --- Now we have only part of group clause columns in distinct, yet it is still not --- enough to use Group By columns to guarantee uniqueness of result list. -SELECT DISTINCT l_suppkey, count(*) - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY 1 - LIMIT 10; - l_suppkey | count ---------------------------------------------------------------------- - 1 | 1 - 2 | 1 - 3 | 1 - 4 | 1 - 5 | 1 - 7 | 1 - 10 | 1 - 12 | 1 - 13 | 1 - 14 | 1 -(10 rows) - --- explain the query to see actual plan. Similar to the explain of the query above. -EXPLAIN (COSTS FALSE) - SELECT DISTINCT l_suppkey, count(*) - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY 1 - LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Unique - -> Sort - Sort Key: remote_scan.l_suppkey, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)) - -> HashAggregate - Group Key: remote_scan.l_suppkey, remote_scan.worker_column_3 - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_suppkey, l_linenumber - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(14 rows) - --- check the plan if the hash aggreate is disabled. Similar to the explain of --- the query above. -SET enable_hashagg TO off; -EXPLAIN (COSTS FALSE) - SELECT DISTINCT l_suppkey, count(*) - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY 1 - LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Unique - -> Sort - Sort Key: remote_scan.l_suppkey, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)) - -> GroupAggregate - Group Key: remote_scan.l_suppkey, remote_scan.worker_column_3 - -> Sort - Sort Key: remote_scan.l_suppkey, remote_scan.worker_column_3 - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_suppkey, l_linenumber - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(16 rows) - -SET enable_hashagg TO on; --- Similar to the above query, not with count but avg. Only difference with the --- above query is that, we create run two aggregate functions in workers. -SELECT DISTINCT l_suppkey, avg(l_partkey) - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY 1,2 - LIMIT 10; - l_suppkey | avg ---------------------------------------------------------------------- - 1 | 190000.000000000000 - 2 | 172450.000000000000 - 3 | 112469.000000000000 - 3 | 134976.000000000000 - 4 | 112470.000000000000 - 4 | 142461.000000000000 - 5 | 182450.000000000000 - 7 | 137493.000000000000 - 10 | 150009.000000000000 - 12 | 17510.0000000000000000 -(10 rows) - --- explain the query to see actual plan. Similar to the explain of the query above. --- Only aggregate functions will be changed. -EXPLAIN (COSTS FALSE) - SELECT DISTINCT l_suppkey, avg(l_partkey) - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY 1,2 - LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Unique - -> Sort - Sort Key: remote_scan.l_suppkey, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1))) - -> HashAggregate - Group Key: remote_scan.l_suppkey, remote_scan.worker_column_4 - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_suppkey, l_linenumber - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(14 rows) - --- check the plan if the hash aggreate is disabled. This explain errors out due --- to a bug right now, expectation must be corrected after fixing it. -SET enable_hashagg TO off; -EXPLAIN (COSTS FALSE) - SELECT DISTINCT l_suppkey, avg(l_partkey) - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY 1,2 - LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Unique - -> Sort - Sort Key: remote_scan.l_suppkey, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1))) - -> GroupAggregate - Group Key: remote_scan.l_suppkey, remote_scan.worker_column_4 - -> Sort - Sort Key: remote_scan.l_suppkey, remote_scan.worker_column_4 - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_suppkey, l_linenumber - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(16 rows) - -SET enable_hashagg TO on; --- Similar to the above query but with distinct on -SELECT DISTINCT ON (l_suppkey) avg(l_partkey) - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY l_suppkey,1 - LIMIT 10; - avg ---------------------------------------------------------------------- - 190000.000000000000 - 172450.000000000000 - 112469.000000000000 - 112470.000000000000 - 182450.000000000000 - 137493.000000000000 - 150009.000000000000 - 17510.0000000000000000 - 87504.000000000000 - 77506.000000000000 -(10 rows) - --- explain the query to see actual plan. We expect to see sort+unique to handle --- distinct on. -EXPLAIN (COSTS FALSE) - SELECT DISTINCT ON (l_suppkey) avg(l_partkey) - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY l_suppkey,1 - LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Unique - -> Sort - Sort Key: remote_scan.worker_column_3, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1))) - -> HashAggregate - Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4 - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_suppkey, l_linenumber - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(14 rows) - --- check the plan if the hash aggreate is disabled. We expect to see sort+unique to --- handle distinct on. -SET enable_hashagg TO off; -EXPLAIN (COSTS FALSE) - SELECT DISTINCT ON (l_suppkey) avg(l_partkey) - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY l_suppkey,1 - LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Unique - -> Sort - Sort Key: remote_scan.worker_column_3, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1))) - -> GroupAggregate - Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4 - -> Sort - Sort Key: remote_scan.worker_column_3, remote_scan.worker_column_4 - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_suppkey, l_linenumber - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(16 rows) - -SET enable_hashagg TO on; --- distinct with expression and aggregation -SELECT DISTINCT avg(ceil(l_partkey / 2)) - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY 1 - LIMIT 10; - avg ---------------------------------------------------------------------- - 9 - 39 - 74 - 87 - 89 - 91 - 97 - 102 - 111 - 122 -(10 rows) - --- explain the query to see actual plan -EXPLAIN (COSTS FALSE) - SELECT DISTINCT avg(ceil(l_partkey / 2)) - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY 1 - LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Unique - -> Sort - Sort Key: ((sum(remote_scan.avg) / (pg_catalog.sum(remote_scan.avg_1))::double precision)) - -> HashAggregate - Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4 - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_suppkey, l_linenumber - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(14 rows) - --- check the plan if the hash aggreate is disabled. This explain errors out due --- to a bug right now, expectation must be corrected after fixing it. -SET enable_hashagg TO off; -EXPLAIN (COSTS FALSE) - SELECT DISTINCT avg(ceil(l_partkey / 2)) - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY 1 - LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Unique - -> Sort - Sort Key: ((sum(remote_scan.avg) / (pg_catalog.sum(remote_scan.avg_1))::double precision)) - -> GroupAggregate - Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4 - -> Sort - Sort Key: remote_scan.worker_column_3, remote_scan.worker_column_4 - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_suppkey, l_linenumber - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(16 rows) - -SET enable_hashagg TO on; --- expression among aggregations. -SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY 1 - LIMIT 10; - dis ---------------------------------------------------------------------- - 2 - 3 - 4 - 5 - 6 - 8 - 11 - 13 - 14 - 15 -(10 rows) - --- explain the query to see actual plan -EXPLAIN (COSTS FALSE) - SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY 1 - LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Unique - -> Sort - Sort Key: (((pg_catalog.sum(remote_scan.dis))::bigint + COALESCE((pg_catalog.sum(remote_scan.dis_1))::bigint, '0'::bigint))) - -> HashAggregate - Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4 - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_suppkey, l_linenumber - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(14 rows) - --- check the plan if the hash aggreate is disabled. This explain errors out due --- to a bug right now, expectation must be corrected after fixing it. -SET enable_hashagg TO off; -EXPLAIN (COSTS FALSE) - SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY 1 - LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Unique - -> Sort - Sort Key: (((pg_catalog.sum(remote_scan.dis))::bigint + COALESCE((pg_catalog.sum(remote_scan.dis_1))::bigint, '0'::bigint))) - -> GroupAggregate - Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4 - -> Sort - Sort Key: remote_scan.worker_column_3, remote_scan.worker_column_4 - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_suppkey, l_linenumber - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(16 rows) - -SET enable_hashagg TO on; --- distinct on all columns, note Group By columns guarantees uniqueness of the --- result list. -SELECT DISTINCT * - FROM lineitem_hash_part - GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16 - ORDER BY 1,2 - LIMIT 10; - l_orderkey | l_partkey | l_suppkey | l_linenumber | l_quantity | l_extendedprice | l_discount | l_tax | l_returnflag | l_linestatus | l_shipdate | l_commitdate | l_receiptdate | l_shipinstruct | l_shipmode | l_comment ---------------------------------------------------------------------- - 1 | 2132 | 4633 | 4 | 28.00 | 28955.64 | 0.09 | 0.06 | N | O | 04-21-1996 | 03-30-1996 | 05-16-1996 | NONE | AIR | lites. fluffily even de - 1 | 15635 | 638 | 6 | 32.00 | 49620.16 | 0.07 | 0.02 | N | O | 01-30-1996 | 02-07-1996 | 02-03-1996 | DELIVER IN PERSON | MAIL | arefully slyly ex - 1 | 24027 | 1534 | 5 | 24.00 | 22824.48 | 0.10 | 0.04 | N | O | 03-30-1996 | 03-14-1996 | 04-01-1996 | NONE | FOB | pending foxes. slyly re - 1 | 63700 | 3701 | 3 | 8.00 | 13309.60 | 0.10 | 0.02 | N | O | 01-29-1996 | 03-05-1996 | 01-31-1996 | TAKE BACK RETURN | REG AIR | riously. regular, express dep - 1 | 67310 | 7311 | 2 | 36.00 | 45983.16 | 0.09 | 0.06 | N | O | 04-12-1996 | 02-28-1996 | 04-20-1996 | TAKE BACK RETURN | MAIL | ly final dependencies: slyly bold - 1 | 155190 | 7706 | 1 | 17.00 | 21168.23 | 0.04 | 0.02 | N | O | 03-13-1996 | 02-12-1996 | 03-22-1996 | DELIVER IN PERSON | TRUCK | egular courts above the - 2 | 106170 | 1191 | 1 | 38.00 | 44694.46 | 0.00 | 0.05 | N | O | 01-28-1997 | 01-14-1997 | 02-02-1997 | TAKE BACK RETURN | RAIL | ven requests. deposits breach a - 3 | 4297 | 1798 | 1 | 45.00 | 54058.05 | 0.06 | 0.00 | R | F | 02-02-1994 | 01-04-1994 | 02-23-1994 | NONE | AIR | ongside of the furiously brave acco - 3 | 19036 | 6540 | 2 | 49.00 | 46796.47 | 0.10 | 0.00 | R | F | 11-09-1993 | 12-20-1993 | 11-24-1993 | TAKE BACK RETURN | RAIL | unusual accounts. eve - 3 | 29380 | 1883 | 4 | 2.00 | 2618.76 | 0.01 | 0.06 | A | F | 12-04-1993 | 01-07-1994 | 01-01-1994 | NONE | TRUCK | y. fluffily pending d -(10 rows) - --- explain the query to see actual plan. We expect to see only one aggregation --- node since group by columns guarantees the uniqueness. -SELECT coordinator_plan($Q$ -EXPLAIN (COSTS FALSE) - SELECT DISTINCT * - FROM lineitem_hash_part - GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16 - ORDER BY 1,2 - LIMIT 10; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Limit - -> Sort - Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey - -> HashAggregate - Group Key: remote_scan.l_orderkey, remote_scan.l_partkey, remote_scan.l_suppkey, remote_scan.l_linenumber, remote_scan.l_quantity, remote_scan.l_extendedprice, remote_scan.l_discount, remote_scan.l_tax, remote_scan.l_returnflag, remote_scan.l_linestatus, remote_scan.l_shipdate, remote_scan.l_commitdate, remote_scan.l_receiptdate, remote_scan.l_shipinstruct, remote_scan.l_shipmode, remote_scan.l_comment - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(7 rows) - --- check the plan if the hash aggreate is disabled. We expect to see only one --- aggregation node since group by columns guarantees the uniqueness. -SET enable_hashagg TO off; -SELECT coordinator_plan($Q$ -EXPLAIN (COSTS FALSE) - SELECT DISTINCT * - FROM lineitem_hash_part - GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16 - ORDER BY 1,2 - LIMIT 10; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Limit - -> Unique - -> Sort - Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey, remote_scan.l_suppkey, remote_scan.l_linenumber, remote_scan.l_quantity, remote_scan.l_extendedprice, remote_scan.l_discount, remote_scan.l_tax, remote_scan.l_returnflag, remote_scan.l_linestatus, remote_scan.l_shipdate, remote_scan.l_commitdate, remote_scan.l_receiptdate, remote_scan.l_shipinstruct, remote_scan.l_shipmode, remote_scan.l_comment - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(6 rows) - -SET enable_hashagg TO on; --- distinct on count distinct -SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode) - FROM lineitem_hash_part - GROUP BY l_orderkey - ORDER BY 1,2; - count | count ---------------------------------------------------------------------- - 1 | 1 - 2 | 1 - 2 | 2 - 3 | 1 - 3 | 2 - 3 | 3 - 4 | 1 - 4 | 2 - 4 | 3 - 4 | 4 - 5 | 2 - 5 | 3 - 5 | 4 - 5 | 5 - 6 | 2 - 6 | 3 - 6 | 4 - 6 | 5 - 6 | 6 - 7 | 2 - 7 | 3 - 7 | 4 - 7 | 5 - 7 | 6 - 7 | 7 -(25 rows) - --- explain the query to see actual plan. We expect to see aggregation plan for --- the outer distinct. -EXPLAIN (COSTS FALSE) - SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode) - FROM lineitem_hash_part - GROUP BY l_orderkey - ORDER BY 1,2; - QUERY PLAN ---------------------------------------------------------------------- - Sort - Sort Key: remote_scan.count, remote_scan.count_1 - -> HashAggregate - Group Key: remote_scan.count, remote_scan.count_1 - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> GroupAggregate - Group Key: l_orderkey - -> Sort - Sort Key: l_orderkey - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(14 rows) - --- check the plan if the hash aggreate is disabled. We expect to see sort + unique --- plans for the outer distinct. -SET enable_hashagg TO off; -EXPLAIN (COSTS FALSE) - SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode) - FROM lineitem_hash_part - GROUP BY l_orderkey - ORDER BY 1,2; - QUERY PLAN ---------------------------------------------------------------------- - Unique - -> Sort - Sort Key: remote_scan.count, remote_scan.count_1 - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> GroupAggregate - Group Key: l_orderkey - -> Sort - Sort Key: l_orderkey - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(13 rows) - -SET enable_hashagg TO on; --- distinct on aggregation with filter and expression -SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count - FROM lineitem_hash_part - GROUP BY l_suppkey - ORDER BY 1; - count ---------------------------------------------------------------------- - 0 - 1 - 2 - 3 - 4 -(5 rows) - --- explain the query to see actual plan -EXPLAIN (COSTS FALSE) - SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count - FROM lineitem_hash_part - GROUP BY l_suppkey - ORDER BY 1; - QUERY PLAN ---------------------------------------------------------------------- - Unique - -> Sort - Sort Key: (ceil(((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint) / 2))::double precision)) - -> HashAggregate - Group Key: remote_scan.worker_column_2 - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_suppkey - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(13 rows) - --- check the plan if the hash aggreate is disabled -SET enable_hashagg TO off; -EXPLAIN (COSTS FALSE) - SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count - FROM lineitem_hash_part - GROUP BY l_suppkey - ORDER BY 1; - QUERY PLAN ---------------------------------------------------------------------- - Unique - -> Sort - Sort Key: (ceil(((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint) / 2))::double precision)) - -> GroupAggregate - Group Key: remote_scan.worker_column_2 - -> Sort - Sort Key: remote_scan.worker_column_2 - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_suppkey - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(15 rows) - -SET enable_hashagg TO on; --- explain the query to see actual plan with array_agg aggregation. -SELECT coordinator_plan($Q$ -EXPLAIN (COSTS FALSE) - SELECT DISTINCT array_agg(l_linenumber), array_length(array_agg(l_linenumber), 1) - FROM lineitem_hash_part - GROUP BY l_orderkey - ORDER BY 2 - LIMIT 15; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Limit - -> Sort - Sort Key: remote_scan.array_length - -> HashAggregate - Group Key: remote_scan.array_length, remote_scan.array_agg - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(7 rows) - --- check the plan if the hash aggreate is disabled. -SET enable_hashagg TO off; -SELECT coordinator_plan($Q$ -EXPLAIN (COSTS FALSE) - SELECT DISTINCT array_agg(l_linenumber), array_length(array_agg(l_linenumber), 1) - FROM lineitem_hash_part - GROUP BY l_orderkey - ORDER BY 2 - LIMIT 15; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Limit - -> Unique - -> Sort - Sort Key: remote_scan.array_length, remote_scan.array_agg - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(6 rows) - -SET enable_hashagg TO on; --- distinct on non-partition column with aggregate --- this is the same as non-distinct version due to group by -SELECT DISTINCT l_partkey, count(*) - FROM lineitem_hash_part - GROUP BY 1 - HAVING count(*) > 2 - ORDER BY 1; - l_partkey | count ---------------------------------------------------------------------- - 1051 | 3 - 1927 | 3 - 6983 | 3 - 15283 | 3 - 87761 | 3 - 136884 | 3 - 149926 | 3 - 160895 | 3 - 177771 | 3 - 188804 | 3 - 199146 | 3 -(11 rows) - --- explain the query to see actual plan -EXPLAIN (COSTS FALSE) - SELECT DISTINCT l_partkey, count(*) - FROM lineitem_hash_part - GROUP BY 1 - HAVING count(*) > 2 - ORDER BY 1; - QUERY PLAN ---------------------------------------------------------------------- - Unique - -> Sort - Sort Key: remote_scan.l_partkey, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)) - -> HashAggregate - Group Key: remote_scan.l_partkey - Filter: (COALESCE((pg_catalog.sum(remote_scan.worker_column_3))::bigint, '0'::bigint) > 2) - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_partkey - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(14 rows) - --- distinct on non-partition column and avg -SELECT DISTINCT l_partkey, avg(l_linenumber) - FROM lineitem_hash_part - WHERE l_partkey < 500 - GROUP BY 1 - HAVING avg(l_linenumber) > 2 - ORDER BY 1; - l_partkey | avg ---------------------------------------------------------------------- - 18 | 7.0000000000000000 - 79 | 6.0000000000000000 - 149 | 4.5000000000000000 - 175 | 5.0000000000000000 - 179 | 6.0000000000000000 - 182 | 3.0000000000000000 - 222 | 4.0000000000000000 - 278 | 3.0000000000000000 - 299 | 7.0000000000000000 - 308 | 7.0000000000000000 - 309 | 5.0000000000000000 - 321 | 3.0000000000000000 - 337 | 6.0000000000000000 - 364 | 3.0000000000000000 - 403 | 4.0000000000000000 -(15 rows) - --- distinct on multiple non-partition columns -SELECT DISTINCT l_partkey, l_suppkey - FROM lineitem_hash_part - WHERE l_shipmode = 'AIR' AND l_orderkey < 100 - ORDER BY 1, 2; - l_partkey | l_suppkey ---------------------------------------------------------------------- - 2132 | 4633 - 4297 | 1798 - 37531 | 35 - 44161 | 6666 - 44706 | 4707 - 67831 | 5350 - 85811 | 8320 - 94368 | 6878 - 108338 | 849 - 108570 | 8571 - 137267 | 4807 - 137469 | 9983 - 173489 | 3490 - 196156 | 1195 - 197921 | 441 -(15 rows) - -EXPLAIN (COSTS FALSE) - SELECT DISTINCT l_partkey, l_suppkey - FROM lineitem_hash_part - WHERE l_shipmode = 'AIR' AND l_orderkey < 100 - ORDER BY 1, 2; - QUERY PLAN ---------------------------------------------------------------------- - Sort - Sort Key: remote_scan.l_partkey, remote_scan.l_suppkey - -> HashAggregate - Group Key: remote_scan.l_partkey, remote_scan.l_suppkey - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> Unique - -> Sort - Sort Key: l_partkey, l_suppkey - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part - Filter: ((l_orderkey < 100) AND (l_shipmode = 'AIR'::bpchar)) -(14 rows) - --- distinct on partition column -SELECT DISTINCT ON (l_orderkey) l_orderkey, l_partkey, l_suppkey - FROM lineitem_hash_part - WHERE l_orderkey < 35 - ORDER BY 1, 2, 3; - l_orderkey | l_partkey | l_suppkey ---------------------------------------------------------------------- - 1 | 2132 | 4633 - 2 | 106170 | 1191 - 3 | 4297 | 1798 - 4 | 88035 | 5560 - 5 | 37531 | 35 - 6 | 139636 | 2150 - 7 | 79251 | 1759 - 32 | 2743 | 7744 - 33 | 33918 | 3919 - 34 | 88362 | 871 -(10 rows) - -EXPLAIN (COSTS FALSE) - SELECT DISTINCT ON (l_orderkey) l_orderkey, l_partkey, l_suppkey - FROM lineitem_hash_part - WHERE l_orderkey < 35 - ORDER BY 1, 2, 3; - QUERY PLAN ---------------------------------------------------------------------- - Unique - -> Sort - Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey, remote_scan.l_suppkey - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> Unique - -> Sort - Sort Key: l_orderkey, l_partkey, l_suppkey - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part - Filter: (l_orderkey < 35) -(13 rows) - --- distinct on non-partition column --- note order by is required here --- otherwise query results will be different since --- distinct on clause is on non-partition column -SELECT DISTINCT ON (l_partkey) l_partkey, l_orderkey - FROM lineitem_hash_part - ORDER BY 1,2 - LIMIT 20; - l_partkey | l_orderkey ---------------------------------------------------------------------- - 18 | 12005 - 79 | 5121 - 91 | 2883 - 149 | 807 - 175 | 4102 - 179 | 2117 - 182 | 548 - 195 | 2528 - 204 | 10048 - 222 | 9413 - 245 | 9446 - 278 | 1287 - 299 | 1122 - 308 | 11137 - 309 | 2374 - 318 | 321 - 321 | 5984 - 337 | 10403 - 350 | 13698 - 358 | 4323 -(20 rows) - -EXPLAIN (COSTS FALSE) - SELECT DISTINCT ON (l_partkey) l_partkey, l_orderkey - FROM lineitem_hash_part - ORDER BY 1,2 - LIMIT 20; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Unique - -> Sort - Sort Key: remote_scan.l_partkey, remote_scan.l_orderkey - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> Limit - -> Unique - -> Sort - Sort Key: l_partkey, l_orderkey - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(14 rows) - --- distinct on with joins --- each customer's first order key -SELECT DISTINCT ON (o_custkey) o_custkey, l_orderkey - FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey) - WHERE o_custkey < 15 - ORDER BY 1,2; - o_custkey | l_orderkey ---------------------------------------------------------------------- - 1 | 9154 - 2 | 10563 - 4 | 320 - 5 | 11682 - 7 | 10402 - 8 | 102 - 10 | 1602 - 11 | 12800 - 13 | 994 - 14 | 11011 -(10 rows) - -SELECT coordinator_plan($Q$ -EXPLAIN (COSTS FALSE) - SELECT DISTINCT ON (o_custkey) o_custkey, l_orderkey - FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey) - WHERE o_custkey < 15 - ORDER BY 1,2; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Unique - -> Sort - Sort Key: remote_scan.o_custkey, remote_scan.l_orderkey - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(5 rows) - --- explain without order by --- notice master plan has order by on distinct on column -SELECT coordinator_plan($Q$ -EXPLAIN (COSTS FALSE) - SELECT DISTINCT ON (o_custkey) o_custkey, l_orderkey - FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey) - WHERE o_custkey < 15; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Unique - -> Sort - Sort Key: remote_scan.o_custkey - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(5 rows) - --- each customer's each order's first l_partkey -SELECT DISTINCT ON (o_custkey, l_orderkey) o_custkey, l_orderkey, l_linenumber, l_partkey - FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey) - WHERE o_custkey < 20 - ORDER BY 1,2,3; - o_custkey | l_orderkey | l_linenumber | l_partkey ---------------------------------------------------------------------- - 1 | 9154 | 1 | 86513 - 1 | 14656 | 1 | 59539 - 2 | 10563 | 1 | 147459 - 4 | 320 | 1 | 4415 - 4 | 739 | 1 | 84489 - 4 | 10688 | 1 | 45037 - 4 | 10788 | 1 | 50814 - 4 | 13728 | 1 | 86216 - 5 | 11682 | 1 | 31634 - 5 | 11746 | 1 | 180724 - 5 | 14308 | 1 | 157430 - 7 | 10402 | 1 | 53661 - 7 | 13031 | 1 | 112161 - 7 | 14145 | 1 | 138729 - 7 | 14404 | 1 | 143034 - 8 | 102 | 1 | 88914 - 8 | 164 | 1 | 91309 - 8 | 13601 | 1 | 40504 - 10 | 1602 | 1 | 182806 - 10 | 9862 | 1 | 86241 - 10 | 11431 | 1 | 62112 - 10 | 13124 | 1 | 29414 - 11 | 12800 | 1 | 152806 - 13 | 994 | 1 | 64486 - 13 | 1603 | 1 | 38191 - 13 | 4704 | 1 | 77934 - 13 | 9927 | 1 | 875 - 14 | 11011 | 1 | 172485 - 17 | 896 | 1 | 38675 - 17 | 5507 | 1 | 9600 - 19 | 353 | 1 | 119305 - 19 | 1504 | 1 | 81389 - 19 | 1669 | 1 | 78373 - 19 | 5893 | 1 | 133707 - 19 | 9954 | 1 | 92138 - 19 | 14885 | 1 | 36154 -(36 rows) - --- explain without order by -SELECT coordinator_plan($Q$ -EXPLAIN (COSTS FALSE) - SELECT DISTINCT ON (o_custkey, l_orderkey) o_custkey, l_orderkey, l_linenumber, l_partkey - FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey) - WHERE o_custkey < 20; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Unique - -> Sort - Sort Key: remote_scan.o_custkey, remote_scan.l_orderkey - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(5 rows) - --- each customer's each order's last l_partkey -SELECT DISTINCT ON (o_custkey, l_orderkey) o_custkey, l_orderkey, l_linenumber, l_partkey - FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey) - WHERE o_custkey < 15 - ORDER BY 1,2,3 DESC; - o_custkey | l_orderkey | l_linenumber | l_partkey ---------------------------------------------------------------------- - 1 | 9154 | 7 | 173448 - 1 | 14656 | 1 | 59539 - 2 | 10563 | 4 | 110741 - 4 | 320 | 2 | 192158 - 4 | 739 | 5 | 187523 - 4 | 10688 | 2 | 132574 - 4 | 10788 | 4 | 196473 - 4 | 13728 | 3 | 12450 - 5 | 11682 | 3 | 177152 - 5 | 11746 | 7 | 193807 - 5 | 14308 | 3 | 140916 - 7 | 10402 | 2 | 64514 - 7 | 13031 | 6 | 7761 - 7 | 14145 | 6 | 130723 - 7 | 14404 | 7 | 35349 - 8 | 102 | 4 | 61158 - 8 | 164 | 7 | 3037 - 8 | 13601 | 5 | 12470 - 10 | 1602 | 1 | 182806 - 10 | 9862 | 5 | 135675 - 10 | 11431 | 7 | 8563 - 10 | 13124 | 3 | 67055 - 11 | 12800 | 5 | 179110 - 13 | 994 | 4 | 130471 - 13 | 1603 | 2 | 65209 - 13 | 4704 | 3 | 63081 - 13 | 9927 | 6 | 119356 - 14 | 11011 | 7 | 95939 -(28 rows) - --- subqueries -SELECT DISTINCT l_orderkey, l_partkey - FROM ( - SELECT l_orderkey, l_partkey - FROM lineitem_hash_part - ) q - ORDER BY 1,2 - LIMIT 10; - l_orderkey | l_partkey ---------------------------------------------------------------------- - 1 | 2132 - 1 | 15635 - 1 | 24027 - 1 | 63700 - 1 | 67310 - 1 | 155190 - 2 | 106170 - 3 | 4297 - 3 | 19036 - 3 | 29380 -(10 rows) - -EXPLAIN (COSTS FALSE) - SELECT DISTINCT l_orderkey, l_partkey - FROM ( - SELECT l_orderkey, l_partkey - FROM lineitem_hash_part - ) q - ORDER BY 1,2 - LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Sort - Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey - -> HashAggregate - Group Key: remote_scan.l_orderkey, remote_scan.l_partkey - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> Limit - -> Sort - Sort Key: l_orderkey, l_partkey - -> HashAggregate - Group Key: l_orderkey, l_partkey - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(16 rows) - -SELECT DISTINCT l_orderkey, cnt - FROM ( - SELECT l_orderkey, count(*) as cnt - FROM lineitem_hash_part - GROUP BY 1 - ) q - ORDER BY 1,2 - LIMIT 10; - l_orderkey | cnt ---------------------------------------------------------------------- - 1 | 6 - 2 | 1 - 3 | 6 - 4 | 1 - 5 | 3 - 6 | 1 - 7 | 7 - 32 | 6 - 33 | 4 - 34 | 3 -(10 rows) - -EXPLAIN (COSTS FALSE) - SELECT DISTINCT l_orderkey, cnt - FROM ( - SELECT l_orderkey, count(*) as cnt - FROM lineitem_hash_part - GROUP BY 1 - ) q - ORDER BY 1,2 - LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Sort - Sort Key: remote_scan.l_orderkey, remote_scan.cnt - -> HashAggregate - Group Key: remote_scan.l_orderkey, remote_scan.cnt - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> Limit - -> Sort - Sort Key: lineitem_hash_part.l_orderkey, (count(*)) - -> HashAggregate - Group Key: lineitem_hash_part.l_orderkey, count(*) - -> HashAggregate - Group Key: lineitem_hash_part.l_orderkey - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(18 rows) - --- distinct on partition column --- random() is added to inner query to prevent flattening -SELECT DISTINCT ON (l_orderkey) l_orderkey, l_partkey - FROM ( - SELECT l_orderkey, l_partkey, (random()*10)::int + 2 as r - FROM lineitem_hash_part - ) q - WHERE r > 1 - ORDER BY 1,2 - LIMIT 10; - l_orderkey | l_partkey ---------------------------------------------------------------------- - 1 | 2132 - 2 | 106170 - 3 | 4297 - 4 | 88035 - 5 | 37531 - 6 | 139636 - 7 | 79251 - 32 | 2743 - 33 | 33918 - 34 | 88362 -(10 rows) - -EXPLAIN (COSTS FALSE) - SELECT DISTINCT ON (l_orderkey) l_orderkey, l_partkey - FROM ( - SELECT l_orderkey, l_partkey, (random()*10)::int + 2 as r - FROM lineitem_hash_part - ) q - WHERE r > 1 - ORDER BY 1,2 - LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Unique - -> Sort - Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> Limit - -> Unique - -> Sort - Sort Key: q.l_orderkey, q.l_partkey - -> Subquery Scan on q - Filter: (q.r > 1) - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(16 rows) - --- distinct on non-partition column -SELECT DISTINCT ON (l_partkey) l_orderkey, l_partkey - FROM ( - SELECT l_orderkey, l_partkey, (random()*10)::int + 2 as r - FROM lineitem_hash_part - ) q - WHERE r > 1 - ORDER BY 2,1 - LIMIT 10; - l_orderkey | l_partkey ---------------------------------------------------------------------- - 12005 | 18 - 5121 | 79 - 2883 | 91 - 807 | 149 - 4102 | 175 - 2117 | 179 - 548 | 182 - 2528 | 195 - 10048 | 204 - 9413 | 222 -(10 rows) - -EXPLAIN (COSTS FALSE) - SELECT DISTINCT ON (l_partkey) l_orderkey, l_partkey - FROM ( - SELECT l_orderkey, l_partkey, (random()*10)::int + 2 as r - FROM lineitem_hash_part - ) q - WHERE r > 1 - ORDER BY 2,1 - LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Unique - -> Sort - Sort Key: remote_scan.l_partkey, remote_scan.l_orderkey - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> Limit - -> Unique - -> Sort - Sort Key: q.l_partkey, q.l_orderkey - -> Subquery Scan on q - Filter: (q.r > 1) - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(16 rows) - diff --git a/src/test/regress/sql/cte_inline.sql b/src/test/regress/sql/cte_inline.sql index 0e446d7f0..ab11c3749 100644 --- a/src/test/regress/sql/cte_inline.sql +++ b/src/test/regress/sql/cte_inline.sql @@ -246,19 +246,6 @@ $Q$); \set VERBOSITY default --- enable_group_by_reordering is a new GUC introduced in PG15 --- it does some optimization of the order of group by keys which results --- in a different explain output plan between PG13/14 and PG15 --- Hence we set that GUC to off. -SHOW server_version \gset -SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 -\gset -\if :server_version_ge_15 -SET enable_group_by_reordering TO off; -\endif -SELECT DISTINCT 1 FROM run_command_on_workers($$ALTER SYSTEM SET enable_group_by_reordering TO off;$$); -SELECT run_command_on_workers($$SELECT pg_reload_conf()$$); - EXPLAIN (COSTS OFF) WITH cte_1 AS NOT MATERIALIZED (SELECT * FROM test_table) SELECT count(*) @@ -268,13 +255,6 @@ FROM cte_1 as second_entry USING (key); -\if :server_version_ge_15 -RESET enable_group_by_reordering; -\endif -SELECT DISTINCT 1 FROM run_command_on_workers($$ALTER SYSTEM RESET enable_group_by_reordering;$$); -SELECT run_command_on_workers($$SELECT pg_reload_conf()$$); - - -- ctes with volatile functions are not -- inlined WITH cte_1 AS (SELECT *, random() FROM test_table) diff --git a/src/test/regress/sql/multi_explain.sql b/src/test/regress/sql/multi_explain.sql index 5086f7cf1..4429e46e7 100644 --- a/src/test/regress/sql/multi_explain.sql +++ b/src/test/regress/sql/multi_explain.sql @@ -254,20 +254,6 @@ FROM user_id) AS subquery; -- Union and left join subquery pushdown - --- enable_group_by_reordering is a new GUC introduced in PG15 --- it does some optimization of the order of group by keys which results --- in a different explain output plan between PG13/14 and PG15 --- Hence we set that GUC to off. -SHOW server_version \gset -SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15 -\gset -\if :server_version_ge_15 -SET enable_group_by_reordering TO off; -\endif -SELECT DISTINCT 1 FROM run_command_on_workers($$ALTER SYSTEM SET enable_group_by_reordering TO off;$$); -SELECT run_command_on_workers($$SELECT pg_reload_conf()$$); - EXPLAIN (COSTS OFF) SELECT avg(array_length(events, 1)) AS event_average, @@ -403,12 +389,6 @@ GROUP BY ORDER BY count_pay; -\if :server_version_ge_15 -RESET enable_group_by_reordering; -\endif -SELECT DISTINCT 1 FROM run_command_on_workers($$ALTER SYSTEM RESET enable_group_by_reordering;$$); -SELECT run_command_on_workers($$SELECT pg_reload_conf()$$); - -- Lateral join subquery pushdown -- set subquery_pushdown due to limit in the query SET citus.subquery_pushdown to ON; diff --git a/src/test/regress/sql/multi_select_distinct.sql b/src/test/regress/sql/multi_select_distinct.sql index a2ee189b0..c3ba20cf1 100644 --- a/src/test/regress/sql/multi_select_distinct.sql +++ b/src/test/regress/sql/multi_select_distinct.sql @@ -3,9 +3,6 @@ -- -- Tests select distinct, and select distinct on features. -- -SHOW server_version \gset -SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15; - ANALYZE lineitem_hash_part; -- function calls are supported