From 98dcbeb3042e902896c796be3b9414a3f98416d7 Mon Sep 17 00:00:00 2001 From: Naisila Puka <37271756+naisila@users.noreply.github.com> Date: Wed, 31 Aug 2022 10:48:01 +0300 Subject: [PATCH] Specifies that our CustomScan providers support projections (#6244) Before, this was the default mode for CustomScan providers. Now, the default is to assume that they can't project. This causes performance penalties due to adding unnecessary Result nodes. Hence we use the newly added flag, CUSTOMPATH_SUPPORT_PROJECTION to get it back to how it was. In PG15 support branch we created explain functions to ignore the new Result nodes, so we undo that in this commit. Relevant PG commit: 955b3e0f9269639fb916cee3dea37aee50b82df0 --- src/backend/columnar/columnar_customscan.c | 12 + .../planner/combine_query_planner.c | 6 + .../distributed/planner/distributed_planner.c | 7 + .../expected/columnar_chunk_filtering.out | 8 +- .../expected/columnar_citus_integration.out | 4 +- .../expected/insert_select_repartition.out | 8 +- .../expected/insert_select_repartition_0.out | 8 +- .../expected/multi_select_distinct.out | 68 +- .../expected/multi_select_distinct_0.out | 1548 +++++++++++++++++ .../regress/expected/multi_test_helpers.out | 34 - .../regress/expected/window_functions.out | 4 +- .../regress/expected/window_functions_0.out | 4 +- .../regress/sql/columnar_chunk_filtering.sql | 4 - .../sql/columnar_citus_integration.sql | 2 - .../regress/sql/insert_select_repartition.sql | 4 - .../regress/sql/multi_select_distinct.sql | 16 +- src/test/regress/sql/multi_test_helpers.sql | 36 - src/test/regress/sql/window_functions.sql | 2 - 18 files changed, 1616 insertions(+), 159 deletions(-) create mode 100644 src/test/regress/expected/multi_select_distinct_0.out diff --git a/src/backend/columnar/columnar_customscan.c b/src/backend/columnar/columnar_customscan.c index 98c13e2a7..74c50e4f6 100644 --- a/src/backend/columnar/columnar_customscan.c +++ b/src/backend/columnar/columnar_customscan.c @@ -1303,6 +1303,12 @@ AddColumnarScanPath(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte, cpath->methods = &ColumnarScanPathMethods; +#if (PG_VERSION_NUM >= PG_VERSION_15) + + /* necessary to avoid extra Result node in PG15 */ + cpath->flags = CUSTOMPATH_SUPPORT_PROJECTION; +#endif + /* * populate generic path information */ @@ -1545,6 +1551,12 @@ ColumnarScanPath_PlanCustomPath(PlannerInfo *root, cscan->scan.plan.targetlist = list_copy(tlist); cscan->scan.scanrelid = best_path->path.parent->relid; +#if (PG_VERSION_NUM >= 150000) + + /* necessary to avoid extra Result node in PG15 */ + cscan->flags = CUSTOMPATH_SUPPORT_PROJECTION; +#endif + return (Plan *) cscan; } diff --git a/src/backend/distributed/planner/combine_query_planner.c b/src/backend/distributed/planner/combine_query_planner.c index f67f71b53..0a871f3e6 100644 --- a/src/backend/distributed/planner/combine_query_planner.c +++ b/src/backend/distributed/planner/combine_query_planner.c @@ -136,6 +136,12 @@ CreateCitusCustomScanPath(PlannerInfo *root, RelOptInfo *relOptInfo, path->custom_path.path.pathtarget = relOptInfo->reltarget; path->custom_path.path.parent = relOptInfo; +#if (PG_VERSION_NUM >= PG_VERSION_15) + + /* necessary to avoid extra Result node in PG15 */ + path->custom_path.flags = CUSTOMPATH_SUPPORT_PROJECTION; +#endif + /* * The 100k rows we put on the cost of the path is kind of arbitrary and could be * improved in accuracy to produce better plans. diff --git a/src/backend/distributed/planner/distributed_planner.c b/src/backend/distributed/planner/distributed_planner.c index 35664d7c7..17816a3b4 100644 --- a/src/backend/distributed/planner/distributed_planner.c +++ b/src/backend/distributed/planner/distributed_planner.c @@ -1369,7 +1369,14 @@ FinalizePlan(PlannedStmt *localPlan, DistributedPlan *distributedPlan) Node *distributedPlanData = (Node *) distributedPlan; customScan->custom_private = list_make1(distributedPlanData); + +#if (PG_VERSION_NUM >= PG_VERSION_15) + + /* necessary to avoid extra Result node in PG15 */ + customScan->flags = CUSTOMPATH_SUPPORT_BACKWARD_SCAN | CUSTOMPATH_SUPPORT_PROJECTION; +#else customScan->flags = CUSTOMPATH_SUPPORT_BACKWARD_SCAN; +#endif /* * Fast path queries cannot have any subplans by definition, so skip diff --git a/src/test/regress/expected/columnar_chunk_filtering.out b/src/test/regress/expected/columnar_chunk_filtering.out index 292d5fb1a..0d0534ccc 100644 --- a/src/test/regress/expected/columnar_chunk_filtering.out +++ b/src/test/regress/expected/columnar_chunk_filtering.out @@ -264,21 +264,17 @@ EXPLAIN (analyze on, costs off, timing off, summary off) Columnar Projected Columns: a (9 rows) -SELECT plan_without_arrows($Q$ EXPLAIN (costs off, timing off, summary off) SELECT y, * FROM another_columnar_table; -$Q$); - plan_without_arrows + QUERY PLAN --------------------------------------------------------------------- Custom Scan (ColumnarScan) on another_columnar_table Columnar Projected Columns: x, y (2 rows) -SELECT plan_without_arrows($Q$ EXPLAIN (costs off, timing off, summary off) SELECT *, x FROM another_columnar_table; -$Q$); - plan_without_arrows + QUERY PLAN --------------------------------------------------------------------- Custom Scan (ColumnarScan) on another_columnar_table Columnar Projected Columns: x, y diff --git a/src/test/regress/expected/columnar_citus_integration.out b/src/test/regress/expected/columnar_citus_integration.out index 8beb09edf..fb7d9201e 100644 --- a/src/test/regress/expected/columnar_citus_integration.out +++ b/src/test/regress/expected/columnar_citus_integration.out @@ -958,15 +958,13 @@ SELECT * FROM weird_col_explain; (7 rows) \set VERBOSITY terse -SELECT public.plan_without_result_lines($Q$ EXPLAIN (COSTS OFF, SUMMARY OFF) SELECT *, "bbbbbbbbbbbbbbbbbbbbbbbbb\!bbbb'bbbbbbbbbbbbbbbbbbbbb''bbbbbbbb" FROM weird_col_explain WHERE "bbbbbbbbbbbbbbbbbbbbbbbbb\!bbbb'bbbbbbbbbbbbbbbbbbbbb''bbbbbbbb" * 2 > "aaaaaaaaaaaa$aaaaaa$$aaaaaaaaaaaaaaaaaaaaaaaaaaaaa'aaaaaaaa'$a'!"; -$Q$); NOTICE: identifier "aaaaaaaaaaaa$aaaaaa$$aaaaaaaaaaaaaaaaaaaaaaaaaaaaa'aaaaaaaa'$a'!" will be truncated to "aaaaaaaaaaaa$aaaaaa$$aaaaaaaaaaaaaaaaaaaaaaaaaaaaa'aaaaaaaa'$a'" - plan_without_result_lines + QUERY PLAN --------------------------------------------------------------------- Custom Scan (Citus Adaptive) Task Count: 4 diff --git a/src/test/regress/expected/insert_select_repartition.out b/src/test/regress/expected/insert_select_repartition.out index 913419072..5d9ddca6a 100644 --- a/src/test/regress/expected/insert_select_repartition.out +++ b/src/test/regress/expected/insert_select_repartition.out @@ -1261,10 +1261,8 @@ NOTICE: copying the data has completed (1 row) -SELECT public.plan_without_result_lines($Q$ explain (costs off) insert into table_with_sequences select y, x from table_with_sequences; -$Q$); - plan_without_result_lines + QUERY PLAN --------------------------------------------------------------------- Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: pull to coordinator @@ -1289,10 +1287,8 @@ NOTICE: copying the data has completed (1 row) -SELECT public.plan_without_result_lines($Q$ explain (costs off) insert into table_with_user_sequences select y, x from table_with_user_sequences; -$Q$); - plan_without_result_lines + QUERY PLAN --------------------------------------------------------------------- Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: pull to coordinator diff --git a/src/test/regress/expected/insert_select_repartition_0.out b/src/test/regress/expected/insert_select_repartition_0.out index 31377ef16..2eea30bdf 100644 --- a/src/test/regress/expected/insert_select_repartition_0.out +++ b/src/test/regress/expected/insert_select_repartition_0.out @@ -1261,10 +1261,8 @@ NOTICE: copying the data has completed (1 row) -SELECT public.plan_without_result_lines($Q$ explain (costs off) insert into table_with_sequences select y, x from table_with_sequences; -$Q$); - plan_without_result_lines + QUERY PLAN --------------------------------------------------------------------- Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: pull to coordinator @@ -1289,10 +1287,8 @@ NOTICE: copying the data has completed (1 row) -SELECT public.plan_without_result_lines($Q$ explain (costs off) insert into table_with_user_sequences select y, x from table_with_user_sequences; -$Q$); - plan_without_result_lines + QUERY PLAN --------------------------------------------------------------------- Custom Scan (Citus INSERT ... SELECT) INSERT/SELECT method: pull to coordinator diff --git a/src/test/regress/expected/multi_select_distinct.out b/src/test/regress/expected/multi_select_distinct.out index 1112124ae..d281ad4b4 100644 --- a/src/test/regress/expected/multi_select_distinct.out +++ b/src/test/regress/expected/multi_select_distinct.out @@ -3,6 +3,13 @@ -- -- Tests select distinct, and select distinct on features. -- +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15; + server_version_ge_15 +--------------------------------------------------------------------- + t +(1 row) + ANALYZE lineitem_hash_part; -- function calls are supported SELECT DISTINCT l_orderkey, now() FROM lineitem_hash_part LIMIT 0; @@ -306,14 +313,12 @@ EXPLAIN (COSTS FALSE) -- check the plan if the hash aggreate is disabled. We expect to see sort+unique -- instead of aggregate plan node to handle distinct. SET enable_hashagg TO off; -SELECT public.plan_without_result_lines($Q$ EXPLAIN (COSTS FALSE) SELECT DISTINCT count(*) FROM lineitem_hash_part GROUP BY l_suppkey, l_linenumber ORDER BY 1; -$Q$); - plan_without_result_lines + QUERY PLAN --------------------------------------------------------------------- Unique -> Sort @@ -382,15 +387,13 @@ EXPLAIN (COSTS FALSE) -- check the plan if the hash aggreate is disabled. Similar to the explain of -- the query above. SET enable_hashagg TO off; -SELECT public.plan_without_result_lines($Q$ EXPLAIN (COSTS FALSE) SELECT DISTINCT l_suppkey, count(*) FROM lineitem_hash_part GROUP BY l_suppkey, l_linenumber ORDER BY 1 LIMIT 10; -$Q$); - plan_without_result_lines + QUERY PLAN --------------------------------------------------------------------- Limit -> Unique @@ -440,12 +443,13 @@ EXPLAIN (COSTS FALSE) GROUP BY l_suppkey, l_linenumber ORDER BY 1,2 LIMIT 10; - QUERY PLAN + QUERY PLAN --------------------------------------------------------------------- Limit - -> Unique - -> Sort - Sort Key: remote_scan.l_suppkey, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1))) + -> Sort + Sort Key: remote_scan.l_suppkey, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1))) + -> HashAggregate + Group Key: remote_scan.l_suppkey, (pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1)) -> HashAggregate Group Key: remote_scan.l_suppkey, remote_scan.worker_column_4 -> Custom Scan (Citus Adaptive) @@ -456,20 +460,18 @@ EXPLAIN (COSTS FALSE) -> HashAggregate Group Key: l_suppkey, l_linenumber -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(14 rows) +(15 rows) -- check the plan if the hash aggreate is disabled. This explain errors out due -- to a bug right now, expectation must be corrected after fixing it. SET enable_hashagg TO off; -SELECT public.plan_without_result_lines($Q$ EXPLAIN (COSTS FALSE) SELECT DISTINCT l_suppkey, avg(l_partkey) FROM lineitem_hash_part GROUP BY l_suppkey, l_linenumber ORDER BY 1,2 LIMIT 10; -$Q$); - plan_without_result_lines + QUERY PLAN --------------------------------------------------------------------- Limit -> Unique @@ -539,15 +541,13 @@ EXPLAIN (COSTS FALSE) -- check the plan if the hash aggreate is disabled. We expect to see sort+unique to -- handle distinct on. SET enable_hashagg TO off; -SELECT public.plan_without_result_lines($Q$ EXPLAIN (COSTS FALSE) SELECT DISTINCT ON (l_suppkey) avg(l_partkey) FROM lineitem_hash_part GROUP BY l_suppkey, l_linenumber ORDER BY l_suppkey,1 LIMIT 10; -$Q$); - plan_without_result_lines + QUERY PLAN --------------------------------------------------------------------- Limit -> Unique @@ -595,12 +595,13 @@ EXPLAIN (COSTS FALSE) GROUP BY l_suppkey, l_linenumber ORDER BY 1 LIMIT 10; - QUERY PLAN + QUERY PLAN --------------------------------------------------------------------- Limit - -> Unique - -> Sort - Sort Key: ((sum(remote_scan.avg) / (pg_catalog.sum(remote_scan.avg_1))::double precision)) + -> Sort + Sort Key: ((sum(remote_scan.avg) / (pg_catalog.sum(remote_scan.avg_1))::double precision)) + -> HashAggregate + Group Key: (sum(remote_scan.avg) / (pg_catalog.sum(remote_scan.avg_1))::double precision) -> HashAggregate Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4 -> Custom Scan (Citus Adaptive) @@ -611,20 +612,18 @@ EXPLAIN (COSTS FALSE) -> HashAggregate Group Key: l_suppkey, l_linenumber -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(14 rows) +(15 rows) -- check the plan if the hash aggreate is disabled. This explain errors out due -- to a bug right now, expectation must be corrected after fixing it. SET enable_hashagg TO off; -SELECT public.plan_without_result_lines($Q$ EXPLAIN (COSTS FALSE) SELECT DISTINCT avg(ceil(l_partkey / 2)) FROM lineitem_hash_part GROUP BY l_suppkey, l_linenumber ORDER BY 1 LIMIT 10; -$Q$); - plan_without_result_lines + QUERY PLAN --------------------------------------------------------------------- Limit -> Unique @@ -672,12 +671,13 @@ EXPLAIN (COSTS FALSE) GROUP BY l_suppkey, l_linenumber ORDER BY 1 LIMIT 10; - QUERY PLAN + QUERY PLAN --------------------------------------------------------------------- Limit - -> Unique - -> Sort - Sort Key: (((pg_catalog.sum(remote_scan.dis))::bigint + COALESCE((pg_catalog.sum(remote_scan.dis_1))::bigint, '0'::bigint))) + -> Sort + Sort Key: (((pg_catalog.sum(remote_scan.dis))::bigint + COALESCE((pg_catalog.sum(remote_scan.dis_1))::bigint, '0'::bigint))) + -> HashAggregate + Group Key: ((pg_catalog.sum(remote_scan.dis))::bigint + COALESCE((pg_catalog.sum(remote_scan.dis_1))::bigint, '0'::bigint)) -> HashAggregate Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4 -> Custom Scan (Citus Adaptive) @@ -688,20 +688,18 @@ EXPLAIN (COSTS FALSE) -> HashAggregate Group Key: l_suppkey, l_linenumber -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(14 rows) +(15 rows) -- check the plan if the hash aggreate is disabled. This explain errors out due -- to a bug right now, expectation must be corrected after fixing it. SET enable_hashagg TO off; -SELECT public.plan_without_result_lines($Q$ EXPLAIN (COSTS FALSE) SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis FROM lineitem_hash_part GROUP BY l_suppkey, l_linenumber ORDER BY 1 LIMIT 10; -$Q$); - plan_without_result_lines + QUERY PLAN --------------------------------------------------------------------- Limit -> Unique @@ -910,14 +908,12 @@ EXPLAIN (COSTS FALSE) -- check the plan if the hash aggreate is disabled SET enable_hashagg TO off; -SELECT public.plan_without_result_lines($Q$ EXPLAIN (COSTS FALSE) SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count FROM lineitem_hash_part GROUP BY l_suppkey ORDER BY 1; -$Q$); - plan_without_result_lines + QUERY PLAN --------------------------------------------------------------------- Unique -> Sort diff --git a/src/test/regress/expected/multi_select_distinct_0.out b/src/test/regress/expected/multi_select_distinct_0.out new file mode 100644 index 000000000..69e90b7a0 --- /dev/null +++ b/src/test/regress/expected/multi_select_distinct_0.out @@ -0,0 +1,1548 @@ +-- +-- MULTI_SELECT_DISTINCT +-- +-- Tests select distinct, and select distinct on features. +-- +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15; + server_version_ge_15 +--------------------------------------------------------------------- + f +(1 row) + +ANALYZE lineitem_hash_part; +-- function calls are supported +SELECT DISTINCT l_orderkey, now() FROM lineitem_hash_part LIMIT 0; + l_orderkey | now +--------------------------------------------------------------------- +(0 rows) + +SELECT DISTINCT l_orderkey, avg(l_linenumber) +FROM lineitem_hash_part +GROUP BY l_orderkey +HAVING avg(l_linenumber) = (select avg(distinct l_linenumber)) +LIMIT 10; +ERROR: Subqueries in HAVING cannot refer to outer query +SELECT DISTINCT l_orderkey +FROM lineitem_hash_part +GROUP BY l_orderkey +HAVING (select avg(distinct l_linenumber) = l_orderkey) +LIMIT 10; +ERROR: Subqueries in HAVING cannot refer to outer query +SELECT DISTINCT l_partkey, 1 + (random() * 0)::int FROM lineitem_hash_part ORDER BY 1 DESC LIMIT 3; + l_partkey | ?column? +--------------------------------------------------------------------- + 199973 | 1 + 199946 | 1 + 199943 | 1 +(3 rows) + +-- const expressions are supported +SELECT DISTINCT l_orderkey, 1+1 FROM lineitem_hash_part ORDER BY 1 LIMIT 5; + l_orderkey | ?column? +--------------------------------------------------------------------- + 1 | 2 + 2 | 2 + 3 | 2 + 4 | 2 + 5 | 2 +(5 rows) + +-- non const expressions are also supported +SELECT DISTINCT l_orderkey, l_partkey + 1 FROM lineitem_hash_part ORDER BY 1, 2 LIMIT 5; + l_orderkey | ?column? +--------------------------------------------------------------------- + 1 | 2133 + 1 | 15636 + 1 | 24028 + 1 | 63701 + 1 | 67311 +(5 rows) + +-- column expressions are supported +SELECT DISTINCT l_orderkey, l_shipinstruct || l_shipmode FROM lineitem_hash_part ORDER BY 2 , 1 LIMIT 5; + l_orderkey | ?column? +--------------------------------------------------------------------- + 32 | COLLECT CODAIR + 39 | COLLECT CODAIR + 66 | COLLECT CODAIR + 70 | COLLECT CODAIR + 98 | COLLECT CODAIR +(5 rows) + +-- function calls with const input are supported +SELECT DISTINCT l_orderkey, strpos('AIR', 'A') FROM lineitem_hash_part ORDER BY 1,2 LIMIT 5; + l_orderkey | strpos +--------------------------------------------------------------------- + 1 | 1 + 2 | 1 + 3 | 1 + 4 | 1 + 5 | 1 +(5 rows) + +-- function calls with non-const input are supported +SELECT DISTINCT l_orderkey, strpos(l_shipmode, 'I') + FROM lineitem_hash_part + WHERE strpos(l_shipmode, 'I') > 1 + ORDER BY 2, 1 + LIMIT 5; + l_orderkey | strpos +--------------------------------------------------------------------- + 1 | 2 + 3 | 2 + 5 | 2 + 32 | 2 + 33 | 2 +(5 rows) + +-- row types are supported +SELECT DISTINCT (l_orderkey, l_partkey) AS pair FROM lineitem_hash_part ORDER BY 1 LIMIT 5; + pair +--------------------------------------------------------------------- + (1,2132) + (1,15635) + (1,24027) + (1,63700) + (1,67310) +(5 rows) + +-- distinct on partition column +-- verify counts match with respect to count(distinct) +CREATE TEMP TABLE temp_orderkeys AS SELECT DISTINCT l_orderkey FROM lineitem_hash_part; +SELECT COUNT(*) FROM temp_orderkeys; + count +--------------------------------------------------------------------- + 2985 +(1 row) + +SELECT COUNT(DISTINCT l_orderkey) FROM lineitem_hash_part; + count +--------------------------------------------------------------------- + 2985 +(1 row) + +SELECT DISTINCT l_orderkey FROM lineitem_hash_part WHERE l_orderkey < 500 and l_partkey < 5000 order by 1; + l_orderkey +--------------------------------------------------------------------- + 1 + 3 + 32 + 35 + 39 + 65 + 129 + 130 + 134 + 164 + 194 + 228 + 261 + 290 + 320 + 321 + 354 + 418 +(18 rows) + +-- distinct on non-partition column +SELECT DISTINCT l_partkey FROM lineitem_hash_part WHERE l_orderkey > 5 and l_orderkey < 20 order by 1; + l_partkey +--------------------------------------------------------------------- + 79251 + 94780 + 139636 + 145243 + 151894 + 157238 + 163073 + 182052 +(8 rows) + +SELECT DISTINCT l_shipmode FROM lineitem_hash_part ORDER BY 1 DESC; + l_shipmode +--------------------------------------------------------------------- + TRUCK + SHIP + REG AIR + RAIL + MAIL + FOB + AIR +(7 rows) + +-- distinct with multiple columns +SELECT DISTINCT l_orderkey, o_orderdate + FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey) + WHERE l_orderkey < 10 + ORDER BY l_orderkey; + l_orderkey | o_orderdate +--------------------------------------------------------------------- + 1 | 01-02-1996 + 2 | 12-01-1996 + 3 | 10-14-1993 + 4 | 10-11-1995 + 5 | 07-30-1994 + 6 | 02-21-1992 + 7 | 01-10-1996 +(7 rows) + +-- distinct on partition column with aggregate +-- this is the same as the one without distinct due to group by +SELECT DISTINCT l_orderkey, count(*) + FROM lineitem_hash_part + WHERE l_orderkey < 200 + GROUP BY 1 + HAVING count(*) > 5 + ORDER BY 2 DESC, 1; + l_orderkey | count +--------------------------------------------------------------------- + 7 | 7 + 68 | 7 + 129 | 7 + 164 | 7 + 194 | 7 + 1 | 6 + 3 | 6 + 32 | 6 + 35 | 6 + 39 | 6 + 67 | 6 + 69 | 6 + 70 | 6 + 71 | 6 + 134 | 6 + 135 | 6 + 163 | 6 + 192 | 6 + 197 | 6 +(19 rows) + +-- explain the query to see actual plan +EXPLAIN (COSTS FALSE) + SELECT DISTINCT l_orderkey, count(*) + FROM lineitem_hash_part + WHERE l_orderkey < 200 + GROUP BY 1 + HAVING count(*) > 5 + ORDER BY 2 DESC, 1; + QUERY PLAN +--------------------------------------------------------------------- + Sort + Sort Key: remote_scan.count DESC, remote_scan.l_orderkey + -> HashAggregate + Group Key: remote_scan.count, remote_scan.l_orderkey + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Group Key: l_orderkey + Filter: (count(*) > 5) + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part + Filter: (l_orderkey < 200) +(14 rows) + +-- check the plan if the hash aggreate is disabled +SET enable_hashagg TO off; +EXPLAIN (COSTS FALSE) + SELECT DISTINCT l_orderkey, count(*) + FROM lineitem_hash_part + WHERE l_orderkey < 200 + GROUP BY 1 + HAVING count(*) > 5 + ORDER BY 2 DESC, 1; + QUERY PLAN +--------------------------------------------------------------------- + Unique + -> Sort + Sort Key: remote_scan.count DESC, remote_scan.l_orderkey + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Group Key: l_orderkey + Filter: (count(*) > 5) + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part + Filter: (l_orderkey < 200) +(13 rows) + +SET enable_hashagg TO on; +-- distinct on aggregate of group by columns, we try to check whether we handle +-- queries which does not have any group by column in distinct columns properly. +SELECT DISTINCT count(*) + FROM lineitem_hash_part + GROUP BY l_suppkey, l_linenumber + ORDER BY 1; + count +--------------------------------------------------------------------- + 1 + 2 + 3 + 4 +(4 rows) + +-- explain the query to see actual plan. We expect to see Aggregate node having +-- group by key on count(*) column, since columns in the Group By doesn't guarantee +-- the uniqueness of the result. +EXPLAIN (COSTS FALSE) + SELECT DISTINCT count(*) + FROM lineitem_hash_part + GROUP BY l_suppkey, l_linenumber + ORDER BY 1; + QUERY PLAN +--------------------------------------------------------------------- + Unique + -> Sort + Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)) + -> HashAggregate + Group Key: remote_scan.worker_column_2, remote_scan.worker_column_3 + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Group Key: l_suppkey, l_linenumber + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part +(13 rows) + +-- check the plan if the hash aggreate is disabled. We expect to see sort+unique +-- instead of aggregate plan node to handle distinct. +SET enable_hashagg TO off; +EXPLAIN (COSTS FALSE) + SELECT DISTINCT count(*) + FROM lineitem_hash_part + GROUP BY l_suppkey, l_linenumber + ORDER BY 1; + QUERY PLAN +--------------------------------------------------------------------- + Unique + -> Sort + Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)) + -> GroupAggregate + Group Key: remote_scan.worker_column_2, remote_scan.worker_column_3 + -> Sort + Sort Key: remote_scan.worker_column_2, remote_scan.worker_column_3 + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Group Key: l_suppkey, l_linenumber + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part +(15 rows) + +SET enable_hashagg TO on; +-- Now we have only part of group clause columns in distinct, yet it is still not +-- enough to use Group By columns to guarantee uniqueness of result list. +SELECT DISTINCT l_suppkey, count(*) + FROM lineitem_hash_part + GROUP BY l_suppkey, l_linenumber + ORDER BY 1 + LIMIT 10; + l_suppkey | count +--------------------------------------------------------------------- + 1 | 1 + 2 | 1 + 3 | 1 + 4 | 1 + 5 | 1 + 7 | 1 + 10 | 1 + 12 | 1 + 13 | 1 + 14 | 1 +(10 rows) + +-- explain the query to see actual plan. Similar to the explain of the query above. +EXPLAIN (COSTS FALSE) + SELECT DISTINCT l_suppkey, count(*) + FROM lineitem_hash_part + GROUP BY l_suppkey, l_linenumber + ORDER BY 1 + LIMIT 10; + QUERY PLAN +--------------------------------------------------------------------- + Limit + -> Unique + -> Sort + Sort Key: remote_scan.l_suppkey, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)) + -> HashAggregate + Group Key: remote_scan.l_suppkey, remote_scan.worker_column_3 + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Group Key: l_suppkey, l_linenumber + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part +(14 rows) + +-- check the plan if the hash aggreate is disabled. Similar to the explain of +-- the query above. +SET enable_hashagg TO off; +EXPLAIN (COSTS FALSE) + SELECT DISTINCT l_suppkey, count(*) + FROM lineitem_hash_part + GROUP BY l_suppkey, l_linenumber + ORDER BY 1 + LIMIT 10; + QUERY PLAN +--------------------------------------------------------------------- + Limit + -> Unique + -> Sort + Sort Key: remote_scan.l_suppkey, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)) + -> GroupAggregate + Group Key: remote_scan.l_suppkey, remote_scan.worker_column_3 + -> Sort + Sort Key: remote_scan.l_suppkey, remote_scan.worker_column_3 + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Group Key: l_suppkey, l_linenumber + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part +(16 rows) + +SET enable_hashagg TO on; +-- Similar to the above query, not with count but avg. Only difference with the +-- above query is that, we create run two aggregate functions in workers. +SELECT DISTINCT l_suppkey, avg(l_partkey) + FROM lineitem_hash_part + GROUP BY l_suppkey, l_linenumber + ORDER BY 1,2 + LIMIT 10; + l_suppkey | avg +--------------------------------------------------------------------- + 1 | 190000.000000000000 + 2 | 172450.000000000000 + 3 | 112469.000000000000 + 3 | 134976.000000000000 + 4 | 112470.000000000000 + 4 | 142461.000000000000 + 5 | 182450.000000000000 + 7 | 137493.000000000000 + 10 | 150009.000000000000 + 12 | 17510.0000000000000000 +(10 rows) + +-- explain the query to see actual plan. Similar to the explain of the query above. +-- Only aggregate functions will be changed. +EXPLAIN (COSTS FALSE) + SELECT DISTINCT l_suppkey, avg(l_partkey) + FROM lineitem_hash_part + GROUP BY l_suppkey, l_linenumber + ORDER BY 1,2 + LIMIT 10; + QUERY PLAN +--------------------------------------------------------------------- + Limit + -> Unique + -> Sort + Sort Key: remote_scan.l_suppkey, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1))) + -> HashAggregate + Group Key: remote_scan.l_suppkey, remote_scan.worker_column_4 + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Group Key: l_suppkey, l_linenumber + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part +(14 rows) + +-- check the plan if the hash aggreate is disabled. This explain errors out due +-- to a bug right now, expectation must be corrected after fixing it. +SET enable_hashagg TO off; +EXPLAIN (COSTS FALSE) + SELECT DISTINCT l_suppkey, avg(l_partkey) + FROM lineitem_hash_part + GROUP BY l_suppkey, l_linenumber + ORDER BY 1,2 + LIMIT 10; + QUERY PLAN +--------------------------------------------------------------------- + Limit + -> Unique + -> Sort + Sort Key: remote_scan.l_suppkey, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1))) + -> GroupAggregate + Group Key: remote_scan.l_suppkey, remote_scan.worker_column_4 + -> Sort + Sort Key: remote_scan.l_suppkey, remote_scan.worker_column_4 + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Group Key: l_suppkey, l_linenumber + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part +(16 rows) + +SET enable_hashagg TO on; +-- Similar to the above query but with distinct on +SELECT DISTINCT ON (l_suppkey) avg(l_partkey) + FROM lineitem_hash_part + GROUP BY l_suppkey, l_linenumber + ORDER BY l_suppkey,1 + LIMIT 10; + avg +--------------------------------------------------------------------- + 190000.000000000000 + 172450.000000000000 + 112469.000000000000 + 112470.000000000000 + 182450.000000000000 + 137493.000000000000 + 150009.000000000000 + 17510.0000000000000000 + 87504.000000000000 + 77506.000000000000 +(10 rows) + +-- explain the query to see actual plan. We expect to see sort+unique to handle +-- distinct on. +EXPLAIN (COSTS FALSE) + SELECT DISTINCT ON (l_suppkey) avg(l_partkey) + FROM lineitem_hash_part + GROUP BY l_suppkey, l_linenumber + ORDER BY l_suppkey,1 + LIMIT 10; + QUERY PLAN +--------------------------------------------------------------------- + Limit + -> Unique + -> Sort + Sort Key: remote_scan.worker_column_3, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1))) + -> HashAggregate + Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4 + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Group Key: l_suppkey, l_linenumber + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part +(14 rows) + +-- check the plan if the hash aggreate is disabled. We expect to see sort+unique to +-- handle distinct on. +SET enable_hashagg TO off; +EXPLAIN (COSTS FALSE) + SELECT DISTINCT ON (l_suppkey) avg(l_partkey) + FROM lineitem_hash_part + GROUP BY l_suppkey, l_linenumber + ORDER BY l_suppkey,1 + LIMIT 10; + QUERY PLAN +--------------------------------------------------------------------- + Limit + -> Unique + -> Sort + Sort Key: remote_scan.worker_column_3, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1))) + -> GroupAggregate + Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4 + -> Sort + Sort Key: remote_scan.worker_column_3, remote_scan.worker_column_4 + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Group Key: l_suppkey, l_linenumber + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part +(16 rows) + +SET enable_hashagg TO on; +-- distinct with expression and aggregation +SELECT DISTINCT avg(ceil(l_partkey / 2)) + FROM lineitem_hash_part + GROUP BY l_suppkey, l_linenumber + ORDER BY 1 + LIMIT 10; + avg +--------------------------------------------------------------------- + 9 + 39 + 74 + 87 + 89 + 91 + 97 + 102 + 111 + 122 +(10 rows) + +-- explain the query to see actual plan +EXPLAIN (COSTS FALSE) + SELECT DISTINCT avg(ceil(l_partkey / 2)) + FROM lineitem_hash_part + GROUP BY l_suppkey, l_linenumber + ORDER BY 1 + LIMIT 10; + QUERY PLAN +--------------------------------------------------------------------- + Limit + -> Unique + -> Sort + Sort Key: ((sum(remote_scan.avg) / (pg_catalog.sum(remote_scan.avg_1))::double precision)) + -> HashAggregate + Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4 + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Group Key: l_suppkey, l_linenumber + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part +(14 rows) + +-- check the plan if the hash aggreate is disabled. This explain errors out due +-- to a bug right now, expectation must be corrected after fixing it. +SET enable_hashagg TO off; +EXPLAIN (COSTS FALSE) + SELECT DISTINCT avg(ceil(l_partkey / 2)) + FROM lineitem_hash_part + GROUP BY l_suppkey, l_linenumber + ORDER BY 1 + LIMIT 10; + QUERY PLAN +--------------------------------------------------------------------- + Limit + -> Unique + -> Sort + Sort Key: ((sum(remote_scan.avg) / (pg_catalog.sum(remote_scan.avg_1))::double precision)) + -> GroupAggregate + Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4 + -> Sort + Sort Key: remote_scan.worker_column_3, remote_scan.worker_column_4 + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Group Key: l_suppkey, l_linenumber + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part +(16 rows) + +SET enable_hashagg TO on; +-- expression among aggregations. +SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis + FROM lineitem_hash_part + GROUP BY l_suppkey, l_linenumber + ORDER BY 1 + LIMIT 10; + dis +--------------------------------------------------------------------- + 2 + 3 + 4 + 5 + 6 + 8 + 11 + 13 + 14 + 15 +(10 rows) + +-- explain the query to see actual plan +EXPLAIN (COSTS FALSE) + SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis + FROM lineitem_hash_part + GROUP BY l_suppkey, l_linenumber + ORDER BY 1 + LIMIT 10; + QUERY PLAN +--------------------------------------------------------------------- + Limit + -> Unique + -> Sort + Sort Key: (((pg_catalog.sum(remote_scan.dis))::bigint + COALESCE((pg_catalog.sum(remote_scan.dis_1))::bigint, '0'::bigint))) + -> HashAggregate + Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4 + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Group Key: l_suppkey, l_linenumber + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part +(14 rows) + +-- check the plan if the hash aggreate is disabled. This explain errors out due +-- to a bug right now, expectation must be corrected after fixing it. +SET enable_hashagg TO off; +EXPLAIN (COSTS FALSE) + SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis + FROM lineitem_hash_part + GROUP BY l_suppkey, l_linenumber + ORDER BY 1 + LIMIT 10; + QUERY PLAN +--------------------------------------------------------------------- + Limit + -> Unique + -> Sort + Sort Key: (((pg_catalog.sum(remote_scan.dis))::bigint + COALESCE((pg_catalog.sum(remote_scan.dis_1))::bigint, '0'::bigint))) + -> GroupAggregate + Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4 + -> Sort + Sort Key: remote_scan.worker_column_3, remote_scan.worker_column_4 + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Group Key: l_suppkey, l_linenumber + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part +(16 rows) + +SET enable_hashagg TO on; +-- distinct on all columns, note Group By columns guarantees uniqueness of the +-- result list. +SELECT DISTINCT * + FROM lineitem_hash_part + GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16 + ORDER BY 1,2 + LIMIT 10; + l_orderkey | l_partkey | l_suppkey | l_linenumber | l_quantity | l_extendedprice | l_discount | l_tax | l_returnflag | l_linestatus | l_shipdate | l_commitdate | l_receiptdate | l_shipinstruct | l_shipmode | l_comment +--------------------------------------------------------------------- + 1 | 2132 | 4633 | 4 | 28.00 | 28955.64 | 0.09 | 0.06 | N | O | 04-21-1996 | 03-30-1996 | 05-16-1996 | NONE | AIR | lites. fluffily even de + 1 | 15635 | 638 | 6 | 32.00 | 49620.16 | 0.07 | 0.02 | N | O | 01-30-1996 | 02-07-1996 | 02-03-1996 | DELIVER IN PERSON | MAIL | arefully slyly ex + 1 | 24027 | 1534 | 5 | 24.00 | 22824.48 | 0.10 | 0.04 | N | O | 03-30-1996 | 03-14-1996 | 04-01-1996 | NONE | FOB | pending foxes. slyly re + 1 | 63700 | 3701 | 3 | 8.00 | 13309.60 | 0.10 | 0.02 | N | O | 01-29-1996 | 03-05-1996 | 01-31-1996 | TAKE BACK RETURN | REG AIR | riously. regular, express dep + 1 | 67310 | 7311 | 2 | 36.00 | 45983.16 | 0.09 | 0.06 | N | O | 04-12-1996 | 02-28-1996 | 04-20-1996 | TAKE BACK RETURN | MAIL | ly final dependencies: slyly bold + 1 | 155190 | 7706 | 1 | 17.00 | 21168.23 | 0.04 | 0.02 | N | O | 03-13-1996 | 02-12-1996 | 03-22-1996 | DELIVER IN PERSON | TRUCK | egular courts above the + 2 | 106170 | 1191 | 1 | 38.00 | 44694.46 | 0.00 | 0.05 | N | O | 01-28-1997 | 01-14-1997 | 02-02-1997 | TAKE BACK RETURN | RAIL | ven requests. deposits breach a + 3 | 4297 | 1798 | 1 | 45.00 | 54058.05 | 0.06 | 0.00 | R | F | 02-02-1994 | 01-04-1994 | 02-23-1994 | NONE | AIR | ongside of the furiously brave acco + 3 | 19036 | 6540 | 2 | 49.00 | 46796.47 | 0.10 | 0.00 | R | F | 11-09-1993 | 12-20-1993 | 11-24-1993 | TAKE BACK RETURN | RAIL | unusual accounts. eve + 3 | 29380 | 1883 | 4 | 2.00 | 2618.76 | 0.01 | 0.06 | A | F | 12-04-1993 | 01-07-1994 | 01-01-1994 | NONE | TRUCK | y. fluffily pending d +(10 rows) + +-- explain the query to see actual plan. We expect to see only one aggregation +-- node since group by columns guarantees the uniqueness. +SELECT coordinator_plan($Q$ +EXPLAIN (COSTS FALSE) + SELECT DISTINCT * + FROM lineitem_hash_part + GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16 + ORDER BY 1,2 + LIMIT 10; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Limit + -> Sort + Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey + -> HashAggregate + Group Key: remote_scan.l_orderkey, remote_scan.l_partkey, remote_scan.l_suppkey, remote_scan.l_linenumber, remote_scan.l_quantity, remote_scan.l_extendedprice, remote_scan.l_discount, remote_scan.l_tax, remote_scan.l_returnflag, remote_scan.l_linestatus, remote_scan.l_shipdate, remote_scan.l_commitdate, remote_scan.l_receiptdate, remote_scan.l_shipinstruct, remote_scan.l_shipmode, remote_scan.l_comment + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(7 rows) + +-- check the plan if the hash aggreate is disabled. We expect to see only one +-- aggregation node since group by columns guarantees the uniqueness. +SET enable_hashagg TO off; +SELECT coordinator_plan($Q$ +EXPLAIN (COSTS FALSE) + SELECT DISTINCT * + FROM lineitem_hash_part + GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16 + ORDER BY 1,2 + LIMIT 10; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Limit + -> Unique + -> Sort + Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey, remote_scan.l_suppkey, remote_scan.l_linenumber, remote_scan.l_quantity, remote_scan.l_extendedprice, remote_scan.l_discount, remote_scan.l_tax, remote_scan.l_returnflag, remote_scan.l_linestatus, remote_scan.l_shipdate, remote_scan.l_commitdate, remote_scan.l_receiptdate, remote_scan.l_shipinstruct, remote_scan.l_shipmode, remote_scan.l_comment + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(6 rows) + +SET enable_hashagg TO on; +-- distinct on count distinct +SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode) + FROM lineitem_hash_part + GROUP BY l_orderkey + ORDER BY 1,2; + count | count +--------------------------------------------------------------------- + 1 | 1 + 2 | 1 + 2 | 2 + 3 | 1 + 3 | 2 + 3 | 3 + 4 | 1 + 4 | 2 + 4 | 3 + 4 | 4 + 5 | 2 + 5 | 3 + 5 | 4 + 5 | 5 + 6 | 2 + 6 | 3 + 6 | 4 + 6 | 5 + 6 | 6 + 7 | 2 + 7 | 3 + 7 | 4 + 7 | 5 + 7 | 6 + 7 | 7 +(25 rows) + +-- explain the query to see actual plan. We expect to see aggregation plan for +-- the outer distinct. +EXPLAIN (COSTS FALSE) + SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode) + FROM lineitem_hash_part + GROUP BY l_orderkey + ORDER BY 1,2; + QUERY PLAN +--------------------------------------------------------------------- + Sort + Sort Key: remote_scan.count, remote_scan.count_1 + -> HashAggregate + Group Key: remote_scan.count, remote_scan.count_1 + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> GroupAggregate + Group Key: l_orderkey + -> Sort + Sort Key: l_orderkey + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part +(14 rows) + +-- check the plan if the hash aggreate is disabled. We expect to see sort + unique +-- plans for the outer distinct. +SET enable_hashagg TO off; +EXPLAIN (COSTS FALSE) + SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode) + FROM lineitem_hash_part + GROUP BY l_orderkey + ORDER BY 1,2; + QUERY PLAN +--------------------------------------------------------------------- + Unique + -> Sort + Sort Key: remote_scan.count, remote_scan.count_1 + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> GroupAggregate + Group Key: l_orderkey + -> Sort + Sort Key: l_orderkey + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part +(13 rows) + +SET enable_hashagg TO on; +-- distinct on aggregation with filter and expression +SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count + FROM lineitem_hash_part + GROUP BY l_suppkey + ORDER BY 1; + count +--------------------------------------------------------------------- + 0 + 1 + 2 + 3 + 4 +(5 rows) + +-- explain the query to see actual plan +EXPLAIN (COSTS FALSE) + SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count + FROM lineitem_hash_part + GROUP BY l_suppkey + ORDER BY 1; + QUERY PLAN +--------------------------------------------------------------------- + Unique + -> Sort + Sort Key: (ceil(((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint) / 2))::double precision)) + -> HashAggregate + Group Key: remote_scan.worker_column_2 + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Group Key: l_suppkey + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part +(13 rows) + +-- check the plan if the hash aggreate is disabled +SET enable_hashagg TO off; +EXPLAIN (COSTS FALSE) + SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count + FROM lineitem_hash_part + GROUP BY l_suppkey + ORDER BY 1; + QUERY PLAN +--------------------------------------------------------------------- + Unique + -> Sort + Sort Key: (ceil(((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint) / 2))::double precision)) + -> GroupAggregate + Group Key: remote_scan.worker_column_2 + -> Sort + Sort Key: remote_scan.worker_column_2 + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Group Key: l_suppkey + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part +(15 rows) + +SET enable_hashagg TO on; +-- explain the query to see actual plan with array_agg aggregation. +SELECT coordinator_plan($Q$ +EXPLAIN (COSTS FALSE) + SELECT DISTINCT array_agg(l_linenumber), array_length(array_agg(l_linenumber), 1) + FROM lineitem_hash_part + GROUP BY l_orderkey + ORDER BY 2 + LIMIT 15; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Limit + -> Sort + Sort Key: remote_scan.array_length + -> HashAggregate + Group Key: remote_scan.array_length, remote_scan.array_agg + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(7 rows) + +-- check the plan if the hash aggreate is disabled. +SET enable_hashagg TO off; +SELECT coordinator_plan($Q$ +EXPLAIN (COSTS FALSE) + SELECT DISTINCT array_agg(l_linenumber), array_length(array_agg(l_linenumber), 1) + FROM lineitem_hash_part + GROUP BY l_orderkey + ORDER BY 2 + LIMIT 15; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Limit + -> Unique + -> Sort + Sort Key: remote_scan.array_length, remote_scan.array_agg + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(6 rows) + +SET enable_hashagg TO on; +-- distinct on non-partition column with aggregate +-- this is the same as non-distinct version due to group by +SELECT DISTINCT l_partkey, count(*) + FROM lineitem_hash_part + GROUP BY 1 + HAVING count(*) > 2 + ORDER BY 1; + l_partkey | count +--------------------------------------------------------------------- + 1051 | 3 + 1927 | 3 + 6983 | 3 + 15283 | 3 + 87761 | 3 + 136884 | 3 + 149926 | 3 + 160895 | 3 + 177771 | 3 + 188804 | 3 + 199146 | 3 +(11 rows) + +-- explain the query to see actual plan +EXPLAIN (COSTS FALSE) + SELECT DISTINCT l_partkey, count(*) + FROM lineitem_hash_part + GROUP BY 1 + HAVING count(*) > 2 + ORDER BY 1; + QUERY PLAN +--------------------------------------------------------------------- + Unique + -> Sort + Sort Key: remote_scan.l_partkey, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)) + -> HashAggregate + Group Key: remote_scan.l_partkey + Filter: (COALESCE((pg_catalog.sum(remote_scan.worker_column_3))::bigint, '0'::bigint) > 2) + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> HashAggregate + Group Key: l_partkey + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part +(14 rows) + +-- distinct on non-partition column and avg +SELECT DISTINCT l_partkey, avg(l_linenumber) + FROM lineitem_hash_part + WHERE l_partkey < 500 + GROUP BY 1 + HAVING avg(l_linenumber) > 2 + ORDER BY 1; + l_partkey | avg +--------------------------------------------------------------------- + 18 | 7.0000000000000000 + 79 | 6.0000000000000000 + 149 | 4.5000000000000000 + 175 | 5.0000000000000000 + 179 | 6.0000000000000000 + 182 | 3.0000000000000000 + 222 | 4.0000000000000000 + 278 | 3.0000000000000000 + 299 | 7.0000000000000000 + 308 | 7.0000000000000000 + 309 | 5.0000000000000000 + 321 | 3.0000000000000000 + 337 | 6.0000000000000000 + 364 | 3.0000000000000000 + 403 | 4.0000000000000000 +(15 rows) + +-- distinct on multiple non-partition columns +SELECT DISTINCT l_partkey, l_suppkey + FROM lineitem_hash_part + WHERE l_shipmode = 'AIR' AND l_orderkey < 100 + ORDER BY 1, 2; + l_partkey | l_suppkey +--------------------------------------------------------------------- + 2132 | 4633 + 4297 | 1798 + 37531 | 35 + 44161 | 6666 + 44706 | 4707 + 67831 | 5350 + 85811 | 8320 + 94368 | 6878 + 108338 | 849 + 108570 | 8571 + 137267 | 4807 + 137469 | 9983 + 173489 | 3490 + 196156 | 1195 + 197921 | 441 +(15 rows) + +EXPLAIN (COSTS FALSE) + SELECT DISTINCT l_partkey, l_suppkey + FROM lineitem_hash_part + WHERE l_shipmode = 'AIR' AND l_orderkey < 100 + ORDER BY 1, 2; + QUERY PLAN +--------------------------------------------------------------------- + Sort + Sort Key: remote_scan.l_partkey, remote_scan.l_suppkey + -> HashAggregate + Group Key: remote_scan.l_partkey, remote_scan.l_suppkey + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Unique + -> Sort + Sort Key: l_partkey, l_suppkey + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part + Filter: ((l_orderkey < 100) AND (l_shipmode = 'AIR'::bpchar)) +(14 rows) + +-- distinct on partition column +SELECT DISTINCT ON (l_orderkey) l_orderkey, l_partkey, l_suppkey + FROM lineitem_hash_part + WHERE l_orderkey < 35 + ORDER BY 1, 2, 3; + l_orderkey | l_partkey | l_suppkey +--------------------------------------------------------------------- + 1 | 2132 | 4633 + 2 | 106170 | 1191 + 3 | 4297 | 1798 + 4 | 88035 | 5560 + 5 | 37531 | 35 + 6 | 139636 | 2150 + 7 | 79251 | 1759 + 32 | 2743 | 7744 + 33 | 33918 | 3919 + 34 | 88362 | 871 +(10 rows) + +EXPLAIN (COSTS FALSE) + SELECT DISTINCT ON (l_orderkey) l_orderkey, l_partkey, l_suppkey + FROM lineitem_hash_part + WHERE l_orderkey < 35 + ORDER BY 1, 2, 3; + QUERY PLAN +--------------------------------------------------------------------- + Unique + -> Sort + Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey, remote_scan.l_suppkey + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Unique + -> Sort + Sort Key: l_orderkey, l_partkey, l_suppkey + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part + Filter: (l_orderkey < 35) +(13 rows) + +-- distinct on non-partition column +-- note order by is required here +-- otherwise query results will be different since +-- distinct on clause is on non-partition column +SELECT DISTINCT ON (l_partkey) l_partkey, l_orderkey + FROM lineitem_hash_part + ORDER BY 1,2 + LIMIT 20; + l_partkey | l_orderkey +--------------------------------------------------------------------- + 18 | 12005 + 79 | 5121 + 91 | 2883 + 149 | 807 + 175 | 4102 + 179 | 2117 + 182 | 548 + 195 | 2528 + 204 | 10048 + 222 | 9413 + 245 | 9446 + 278 | 1287 + 299 | 1122 + 308 | 11137 + 309 | 2374 + 318 | 321 + 321 | 5984 + 337 | 10403 + 350 | 13698 + 358 | 4323 +(20 rows) + +EXPLAIN (COSTS FALSE) + SELECT DISTINCT ON (l_partkey) l_partkey, l_orderkey + FROM lineitem_hash_part + ORDER BY 1,2 + LIMIT 20; + QUERY PLAN +--------------------------------------------------------------------- + Limit + -> Unique + -> Sort + Sort Key: remote_scan.l_partkey, remote_scan.l_orderkey + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Limit + -> Unique + -> Sort + Sort Key: l_partkey, l_orderkey + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part +(14 rows) + +-- distinct on with joins +-- each customer's first order key +SELECT DISTINCT ON (o_custkey) o_custkey, l_orderkey + FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey) + WHERE o_custkey < 15 + ORDER BY 1,2; + o_custkey | l_orderkey +--------------------------------------------------------------------- + 1 | 9154 + 2 | 10563 + 4 | 320 + 5 | 11682 + 7 | 10402 + 8 | 102 + 10 | 1602 + 11 | 12800 + 13 | 994 + 14 | 11011 +(10 rows) + +SELECT coordinator_plan($Q$ +EXPLAIN (COSTS FALSE) + SELECT DISTINCT ON (o_custkey) o_custkey, l_orderkey + FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey) + WHERE o_custkey < 15 + ORDER BY 1,2; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Unique + -> Sort + Sort Key: remote_scan.o_custkey, remote_scan.l_orderkey + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(5 rows) + +-- explain without order by +-- notice master plan has order by on distinct on column +SELECT coordinator_plan($Q$ +EXPLAIN (COSTS FALSE) + SELECT DISTINCT ON (o_custkey) o_custkey, l_orderkey + FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey) + WHERE o_custkey < 15; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Unique + -> Sort + Sort Key: remote_scan.o_custkey + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(5 rows) + +-- each customer's each order's first l_partkey +SELECT DISTINCT ON (o_custkey, l_orderkey) o_custkey, l_orderkey, l_linenumber, l_partkey + FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey) + WHERE o_custkey < 20 + ORDER BY 1,2,3; + o_custkey | l_orderkey | l_linenumber | l_partkey +--------------------------------------------------------------------- + 1 | 9154 | 1 | 86513 + 1 | 14656 | 1 | 59539 + 2 | 10563 | 1 | 147459 + 4 | 320 | 1 | 4415 + 4 | 739 | 1 | 84489 + 4 | 10688 | 1 | 45037 + 4 | 10788 | 1 | 50814 + 4 | 13728 | 1 | 86216 + 5 | 11682 | 1 | 31634 + 5 | 11746 | 1 | 180724 + 5 | 14308 | 1 | 157430 + 7 | 10402 | 1 | 53661 + 7 | 13031 | 1 | 112161 + 7 | 14145 | 1 | 138729 + 7 | 14404 | 1 | 143034 + 8 | 102 | 1 | 88914 + 8 | 164 | 1 | 91309 + 8 | 13601 | 1 | 40504 + 10 | 1602 | 1 | 182806 + 10 | 9862 | 1 | 86241 + 10 | 11431 | 1 | 62112 + 10 | 13124 | 1 | 29414 + 11 | 12800 | 1 | 152806 + 13 | 994 | 1 | 64486 + 13 | 1603 | 1 | 38191 + 13 | 4704 | 1 | 77934 + 13 | 9927 | 1 | 875 + 14 | 11011 | 1 | 172485 + 17 | 896 | 1 | 38675 + 17 | 5507 | 1 | 9600 + 19 | 353 | 1 | 119305 + 19 | 1504 | 1 | 81389 + 19 | 1669 | 1 | 78373 + 19 | 5893 | 1 | 133707 + 19 | 9954 | 1 | 92138 + 19 | 14885 | 1 | 36154 +(36 rows) + +-- explain without order by +SELECT coordinator_plan($Q$ +EXPLAIN (COSTS FALSE) + SELECT DISTINCT ON (o_custkey, l_orderkey) o_custkey, l_orderkey, l_linenumber, l_partkey + FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey) + WHERE o_custkey < 20; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Unique + -> Sort + Sort Key: remote_scan.o_custkey, remote_scan.l_orderkey + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(5 rows) + +-- each customer's each order's last l_partkey +SELECT DISTINCT ON (o_custkey, l_orderkey) o_custkey, l_orderkey, l_linenumber, l_partkey + FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey) + WHERE o_custkey < 15 + ORDER BY 1,2,3 DESC; + o_custkey | l_orderkey | l_linenumber | l_partkey +--------------------------------------------------------------------- + 1 | 9154 | 7 | 173448 + 1 | 14656 | 1 | 59539 + 2 | 10563 | 4 | 110741 + 4 | 320 | 2 | 192158 + 4 | 739 | 5 | 187523 + 4 | 10688 | 2 | 132574 + 4 | 10788 | 4 | 196473 + 4 | 13728 | 3 | 12450 + 5 | 11682 | 3 | 177152 + 5 | 11746 | 7 | 193807 + 5 | 14308 | 3 | 140916 + 7 | 10402 | 2 | 64514 + 7 | 13031 | 6 | 7761 + 7 | 14145 | 6 | 130723 + 7 | 14404 | 7 | 35349 + 8 | 102 | 4 | 61158 + 8 | 164 | 7 | 3037 + 8 | 13601 | 5 | 12470 + 10 | 1602 | 1 | 182806 + 10 | 9862 | 5 | 135675 + 10 | 11431 | 7 | 8563 + 10 | 13124 | 3 | 67055 + 11 | 12800 | 5 | 179110 + 13 | 994 | 4 | 130471 + 13 | 1603 | 2 | 65209 + 13 | 4704 | 3 | 63081 + 13 | 9927 | 6 | 119356 + 14 | 11011 | 7 | 95939 +(28 rows) + +-- subqueries +SELECT DISTINCT l_orderkey, l_partkey + FROM ( + SELECT l_orderkey, l_partkey + FROM lineitem_hash_part + ) q + ORDER BY 1,2 + LIMIT 10; + l_orderkey | l_partkey +--------------------------------------------------------------------- + 1 | 2132 + 1 | 15635 + 1 | 24027 + 1 | 63700 + 1 | 67310 + 1 | 155190 + 2 | 106170 + 3 | 4297 + 3 | 19036 + 3 | 29380 +(10 rows) + +EXPLAIN (COSTS FALSE) + SELECT DISTINCT l_orderkey, l_partkey + FROM ( + SELECT l_orderkey, l_partkey + FROM lineitem_hash_part + ) q + ORDER BY 1,2 + LIMIT 10; + QUERY PLAN +--------------------------------------------------------------------- + Limit + -> Sort + Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey + -> HashAggregate + Group Key: remote_scan.l_orderkey, remote_scan.l_partkey + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Limit + -> Sort + Sort Key: l_orderkey, l_partkey + -> HashAggregate + Group Key: l_orderkey, l_partkey + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part +(16 rows) + +SELECT DISTINCT l_orderkey, cnt + FROM ( + SELECT l_orderkey, count(*) as cnt + FROM lineitem_hash_part + GROUP BY 1 + ) q + ORDER BY 1,2 + LIMIT 10; + l_orderkey | cnt +--------------------------------------------------------------------- + 1 | 6 + 2 | 1 + 3 | 6 + 4 | 1 + 5 | 3 + 6 | 1 + 7 | 7 + 32 | 6 + 33 | 4 + 34 | 3 +(10 rows) + +EXPLAIN (COSTS FALSE) + SELECT DISTINCT l_orderkey, cnt + FROM ( + SELECT l_orderkey, count(*) as cnt + FROM lineitem_hash_part + GROUP BY 1 + ) q + ORDER BY 1,2 + LIMIT 10; + QUERY PLAN +--------------------------------------------------------------------- + Limit + -> Sort + Sort Key: remote_scan.l_orderkey, remote_scan.cnt + -> HashAggregate + Group Key: remote_scan.l_orderkey, remote_scan.cnt + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Limit + -> Sort + Sort Key: lineitem_hash_part.l_orderkey, (count(*)) + -> HashAggregate + Group Key: lineitem_hash_part.l_orderkey, count(*) + -> HashAggregate + Group Key: lineitem_hash_part.l_orderkey + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part +(18 rows) + +-- distinct on partition column +-- random() is added to inner query to prevent flattening +SELECT DISTINCT ON (l_orderkey) l_orderkey, l_partkey + FROM ( + SELECT l_orderkey, l_partkey, (random()*10)::int + 2 as r + FROM lineitem_hash_part + ) q + WHERE r > 1 + ORDER BY 1,2 + LIMIT 10; + l_orderkey | l_partkey +--------------------------------------------------------------------- + 1 | 2132 + 2 | 106170 + 3 | 4297 + 4 | 88035 + 5 | 37531 + 6 | 139636 + 7 | 79251 + 32 | 2743 + 33 | 33918 + 34 | 88362 +(10 rows) + +EXPLAIN (COSTS FALSE) + SELECT DISTINCT ON (l_orderkey) l_orderkey, l_partkey + FROM ( + SELECT l_orderkey, l_partkey, (random()*10)::int + 2 as r + FROM lineitem_hash_part + ) q + WHERE r > 1 + ORDER BY 1,2 + LIMIT 10; + QUERY PLAN +--------------------------------------------------------------------- + Limit + -> Unique + -> Sort + Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Limit + -> Unique + -> Sort + Sort Key: q.l_orderkey, q.l_partkey + -> Subquery Scan on q + Filter: (q.r > 1) + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part +(16 rows) + +-- distinct on non-partition column +SELECT DISTINCT ON (l_partkey) l_orderkey, l_partkey + FROM ( + SELECT l_orderkey, l_partkey, (random()*10)::int + 2 as r + FROM lineitem_hash_part + ) q + WHERE r > 1 + ORDER BY 2,1 + LIMIT 10; + l_orderkey | l_partkey +--------------------------------------------------------------------- + 12005 | 18 + 5121 | 79 + 2883 | 91 + 807 | 149 + 4102 | 175 + 2117 | 179 + 548 | 182 + 2528 | 195 + 10048 | 204 + 9413 | 222 +(10 rows) + +EXPLAIN (COSTS FALSE) + SELECT DISTINCT ON (l_partkey) l_orderkey, l_partkey + FROM ( + SELECT l_orderkey, l_partkey, (random()*10)::int + 2 as r + FROM lineitem_hash_part + ) q + WHERE r > 1 + ORDER BY 2,1 + LIMIT 10; + QUERY PLAN +--------------------------------------------------------------------- + Limit + -> Unique + -> Sort + Sort Key: remote_scan.l_partkey, remote_scan.l_orderkey + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Limit + -> Unique + -> Sort + Sort Key: q.l_partkey, q.l_orderkey + -> Subquery Scan on q + Filter: (q.r > 1) + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part +(16 rows) + diff --git a/src/test/regress/expected/multi_test_helpers.out b/src/test/regress/expected/multi_test_helpers.out index b02cd6cd4..640a8d9ee 100644 --- a/src/test/regress/expected/multi_test_helpers.out +++ b/src/test/regress/expected/multi_test_helpers.out @@ -17,15 +17,10 @@ BEGIN END; $$LANGUAGE plpgsql; -- Create a function to ignore worker plans in explain output --- Also remove extra "-> Result" lines for PG15 support CREATE OR REPLACE FUNCTION coordinator_plan(explain_command text, out query_plan text) RETURNS SETOF TEXT AS $$ BEGIN FOR query_plan IN execute explain_command LOOP - IF (query_plan LIKE '%-> Result%' OR query_plan = 'Result') - THEN - CONTINUE; - END IF; RETURN next; IF query_plan LIKE '%Task Count:%' THEN @@ -36,16 +31,12 @@ BEGIN END; $$ language plpgsql; -- Create a function to ignore worker plans in explain output -- It also shows task count for plan and subplans --- Also remove extra "-> Result" lines for PG15 support CREATE OR REPLACE FUNCTION coordinator_plan_with_subplans(explain_command text, out query_plan text) RETURNS SETOF TEXT AS $$ DECLARE task_count_line_reached boolean := false; BEGIN FOR query_plan IN execute explain_command LOOP - IF (query_plan LIKE '%-> Result%' OR query_plan = 'Result') THEN - CONTINUE; - END IF; IF NOT task_count_line_reached THEN RETURN next; END IF; @@ -59,19 +50,6 @@ BEGIN END LOOP; RETURN; END; $$ language plpgsql; --- Create a function to ignore "-> Result" lines for PG15 support --- In PG15 there are some extra "-> Result" lines -CREATE OR REPLACE FUNCTION plan_without_result_lines(explain_command text, out query_plan text) -RETURNS SETOF TEXT AS $$ -BEGIN - FOR query_plan IN execute explain_command LOOP - IF (query_plan LIKE '%-> Result%' OR query_plan = 'Result') THEN - CONTINUE; - END IF; - RETURN next; - END LOOP; - RETURN; -END; $$ language plpgsql; -- Create a function to normalize Memory Usage, Buckets, Batches CREATE OR REPLACE FUNCTION plan_normalize_memory(explain_command text, out query_plan text) RETURNS SETOF TEXT AS $$ @@ -81,18 +59,6 @@ BEGIN RETURN NEXT; END LOOP; END; $$ language plpgsql; --- Create a function to remove arrows from the explain plan -CREATE OR REPLACE FUNCTION plan_without_arrows(explain_command text, out query_plan text) -RETURNS SETOF TEXT AS $$ -BEGIN - FOR query_plan IN execute explain_command LOOP - IF (query_plan LIKE '%-> Result%' OR query_plan = 'Result') THEN - CONTINUE; - END IF; - query_plan := regexp_replace(query_plan, '( )*-> (.*)', '\2', 'g'); - RETURN NEXT; - END LOOP; -END; $$ language plpgsql; -- helper function that returns true if output of given explain has "is not null" (case in-sensitive) CREATE OR REPLACE FUNCTION explain_has_is_not_null(explain_command text) RETURNS BOOLEAN AS $$ diff --git a/src/test/regress/expected/window_functions.out b/src/test/regress/expected/window_functions.out index 2e88f5b51..6f30a49e3 100644 --- a/src/test/regress/expected/window_functions.out +++ b/src/test/regress/expected/window_functions.out @@ -1491,12 +1491,10 @@ LIMIT 5; (17 rows) -- Grouping can be pushed down with aggregates even when window function can't -SELECT public.plan_without_result_lines($Q$ EXPLAIN (COSTS FALSE) SELECT user_id, count(value_1), stddev(value_1), count(user_id) OVER (PARTITION BY random()) FROM users_table GROUP BY user_id HAVING avg(value_1) > 2 LIMIT 1; -$Q$); - plan_without_result_lines + QUERY PLAN --------------------------------------------------------------------- Limit -> WindowAgg diff --git a/src/test/regress/expected/window_functions_0.out b/src/test/regress/expected/window_functions_0.out index c9442c7b5..c5a132301 100644 --- a/src/test/regress/expected/window_functions_0.out +++ b/src/test/regress/expected/window_functions_0.out @@ -1495,12 +1495,10 @@ LIMIT 5; (18 rows) -- Grouping can be pushed down with aggregates even when window function can't -SELECT public.plan_without_result_lines($Q$ EXPLAIN (COSTS FALSE) SELECT user_id, count(value_1), stddev(value_1), count(user_id) OVER (PARTITION BY random()) FROM users_table GROUP BY user_id HAVING avg(value_1) > 2 LIMIT 1; -$Q$); - plan_without_result_lines + QUERY PLAN --------------------------------------------------------------------- Limit -> WindowAgg diff --git a/src/test/regress/sql/columnar_chunk_filtering.sql b/src/test/regress/sql/columnar_chunk_filtering.sql index 335401a20..b8b2b411d 100644 --- a/src/test/regress/sql/columnar_chunk_filtering.sql +++ b/src/test/regress/sql/columnar_chunk_filtering.sql @@ -130,15 +130,11 @@ INSERT INTO another_columnar_table SELECT generate_series(0,5); EXPLAIN (analyze on, costs off, timing off, summary off) SELECT a, y FROM multi_column_chunk_filtering, another_columnar_table WHERE x > 1; -SELECT plan_without_arrows($Q$ EXPLAIN (costs off, timing off, summary off) SELECT y, * FROM another_columnar_table; -$Q$); -SELECT plan_without_arrows($Q$ EXPLAIN (costs off, timing off, summary off) SELECT *, x FROM another_columnar_table; -$Q$); EXPLAIN (costs off, timing off, summary off) SELECT y, another_columnar_table FROM another_columnar_table; diff --git a/src/test/regress/sql/columnar_citus_integration.sql b/src/test/regress/sql/columnar_citus_integration.sql index 566c3a9f6..514508795 100644 --- a/src/test/regress/sql/columnar_citus_integration.sql +++ b/src/test/regress/sql/columnar_citus_integration.sql @@ -429,13 +429,11 @@ EXPLAIN (COSTS OFF, SUMMARY OFF) SELECT * FROM weird_col_explain; \set VERBOSITY terse -SELECT public.plan_without_result_lines($Q$ EXPLAIN (COSTS OFF, SUMMARY OFF) SELECT *, "bbbbbbbbbbbbbbbbbbbbbbbbb\!bbbb'bbbbbbbbbbbbbbbbbbbbb''bbbbbbbb" FROM weird_col_explain WHERE "bbbbbbbbbbbbbbbbbbbbbbbbb\!bbbb'bbbbbbbbbbbbbbbbbbbbb''bbbbbbbb" * 2 > "aaaaaaaaaaaa$aaaaaa$$aaaaaaaaaaaaaaaaaaaaaaaaaaaaa'aaaaaaaa'$a'!"; -$Q$); \set VERBOSITY default -- should not project any columns diff --git a/src/test/regress/sql/insert_select_repartition.sql b/src/test/regress/sql/insert_select_repartition.sql index ee6065b88..94a16fed0 100644 --- a/src/test/regress/sql/insert_select_repartition.sql +++ b/src/test/regress/sql/insert_select_repartition.sql @@ -635,9 +635,7 @@ DO UPDATE SET create table table_with_sequences (x int, y int, z bigserial); insert into table_with_sequences values (1,1); select create_distributed_table('table_with_sequences','x'); -SELECT public.plan_without_result_lines($Q$ explain (costs off) insert into table_with_sequences select y, x from table_with_sequences; -$Q$); -- verify that we don't report repartitioned insert/select for tables -- with user-defined sequences. @@ -645,9 +643,7 @@ CREATE SEQUENCE user_defined_sequence; create table table_with_user_sequences (x int, y int, z bigint default nextval('user_defined_sequence')); insert into table_with_user_sequences values (1,1); select create_distributed_table('table_with_user_sequences','x'); -SELECT public.plan_without_result_lines($Q$ explain (costs off) insert into table_with_user_sequences select y, x from table_with_user_sequences; -$Q$); -- clean-up SET client_min_messages TO WARNING; diff --git a/src/test/regress/sql/multi_select_distinct.sql b/src/test/regress/sql/multi_select_distinct.sql index 75dd99da0..a2ee189b0 100644 --- a/src/test/regress/sql/multi_select_distinct.sql +++ b/src/test/regress/sql/multi_select_distinct.sql @@ -3,6 +3,8 @@ -- -- Tests select distinct, and select distinct on features. -- +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15; ANALYZE lineitem_hash_part; @@ -113,13 +115,11 @@ EXPLAIN (COSTS FALSE) -- check the plan if the hash aggreate is disabled. We expect to see sort+unique -- instead of aggregate plan node to handle distinct. SET enable_hashagg TO off; -SELECT public.plan_without_result_lines($Q$ EXPLAIN (COSTS FALSE) SELECT DISTINCT count(*) FROM lineitem_hash_part GROUP BY l_suppkey, l_linenumber ORDER BY 1; -$Q$); SET enable_hashagg TO on; @@ -142,14 +142,12 @@ EXPLAIN (COSTS FALSE) -- check the plan if the hash aggreate is disabled. Similar to the explain of -- the query above. SET enable_hashagg TO off; -SELECT public.plan_without_result_lines($Q$ EXPLAIN (COSTS FALSE) SELECT DISTINCT l_suppkey, count(*) FROM lineitem_hash_part GROUP BY l_suppkey, l_linenumber ORDER BY 1 LIMIT 10; -$Q$); SET enable_hashagg TO on; @@ -173,14 +171,12 @@ EXPLAIN (COSTS FALSE) -- check the plan if the hash aggreate is disabled. This explain errors out due -- to a bug right now, expectation must be corrected after fixing it. SET enable_hashagg TO off; -SELECT public.plan_without_result_lines($Q$ EXPLAIN (COSTS FALSE) SELECT DISTINCT l_suppkey, avg(l_partkey) FROM lineitem_hash_part GROUP BY l_suppkey, l_linenumber ORDER BY 1,2 LIMIT 10; -$Q$); SET enable_hashagg TO on; @@ -203,14 +199,12 @@ EXPLAIN (COSTS FALSE) -- check the plan if the hash aggreate is disabled. We expect to see sort+unique to -- handle distinct on. SET enable_hashagg TO off; -SELECT public.plan_without_result_lines($Q$ EXPLAIN (COSTS FALSE) SELECT DISTINCT ON (l_suppkey) avg(l_partkey) FROM lineitem_hash_part GROUP BY l_suppkey, l_linenumber ORDER BY l_suppkey,1 LIMIT 10; -$Q$); SET enable_hashagg TO on; @@ -232,14 +226,12 @@ EXPLAIN (COSTS FALSE) -- check the plan if the hash aggreate is disabled. This explain errors out due -- to a bug right now, expectation must be corrected after fixing it. SET enable_hashagg TO off; -SELECT public.plan_without_result_lines($Q$ EXPLAIN (COSTS FALSE) SELECT DISTINCT avg(ceil(l_partkey / 2)) FROM lineitem_hash_part GROUP BY l_suppkey, l_linenumber ORDER BY 1 LIMIT 10; -$Q$); SET enable_hashagg TO on; @@ -261,14 +253,12 @@ EXPLAIN (COSTS FALSE) -- check the plan if the hash aggreate is disabled. This explain errors out due -- to a bug right now, expectation must be corrected after fixing it. SET enable_hashagg TO off; -SELECT public.plan_without_result_lines($Q$ EXPLAIN (COSTS FALSE) SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis FROM lineitem_hash_part GROUP BY l_suppkey, l_linenumber ORDER BY 1 LIMIT 10; -$Q$); SET enable_hashagg TO on; @@ -345,13 +335,11 @@ EXPLAIN (COSTS FALSE) -- check the plan if the hash aggreate is disabled SET enable_hashagg TO off; -SELECT public.plan_without_result_lines($Q$ EXPLAIN (COSTS FALSE) SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count FROM lineitem_hash_part GROUP BY l_suppkey ORDER BY 1; -$Q$); SET enable_hashagg TO on; diff --git a/src/test/regress/sql/multi_test_helpers.sql b/src/test/regress/sql/multi_test_helpers.sql index b5d4b9cd9..51cb2b129 100644 --- a/src/test/regress/sql/multi_test_helpers.sql +++ b/src/test/regress/sql/multi_test_helpers.sql @@ -20,15 +20,10 @@ END; $$LANGUAGE plpgsql; -- Create a function to ignore worker plans in explain output --- Also remove extra "-> Result" lines for PG15 support CREATE OR REPLACE FUNCTION coordinator_plan(explain_command text, out query_plan text) RETURNS SETOF TEXT AS $$ BEGIN FOR query_plan IN execute explain_command LOOP - IF (query_plan LIKE '%-> Result%' OR query_plan = 'Result') - THEN - CONTINUE; - END IF; RETURN next; IF query_plan LIKE '%Task Count:%' THEN @@ -40,16 +35,12 @@ END; $$ language plpgsql; -- Create a function to ignore worker plans in explain output -- It also shows task count for plan and subplans --- Also remove extra "-> Result" lines for PG15 support CREATE OR REPLACE FUNCTION coordinator_plan_with_subplans(explain_command text, out query_plan text) RETURNS SETOF TEXT AS $$ DECLARE task_count_line_reached boolean := false; BEGIN FOR query_plan IN execute explain_command LOOP - IF (query_plan LIKE '%-> Result%' OR query_plan = 'Result') THEN - CONTINUE; - END IF; IF NOT task_count_line_reached THEN RETURN next; END IF; @@ -64,20 +55,6 @@ BEGIN RETURN; END; $$ language plpgsql; --- Create a function to ignore "-> Result" lines for PG15 support --- In PG15 there are some extra "-> Result" lines -CREATE OR REPLACE FUNCTION plan_without_result_lines(explain_command text, out query_plan text) -RETURNS SETOF TEXT AS $$ -BEGIN - FOR query_plan IN execute explain_command LOOP - IF (query_plan LIKE '%-> Result%' OR query_plan = 'Result') THEN - CONTINUE; - END IF; - RETURN next; - END LOOP; - RETURN; -END; $$ language plpgsql; - -- Create a function to normalize Memory Usage, Buckets, Batches CREATE OR REPLACE FUNCTION plan_normalize_memory(explain_command text, out query_plan text) RETURNS SETOF TEXT AS $$ @@ -88,19 +65,6 @@ BEGIN END LOOP; END; $$ language plpgsql; --- Create a function to remove arrows from the explain plan -CREATE OR REPLACE FUNCTION plan_without_arrows(explain_command text, out query_plan text) -RETURNS SETOF TEXT AS $$ -BEGIN - FOR query_plan IN execute explain_command LOOP - IF (query_plan LIKE '%-> Result%' OR query_plan = 'Result') THEN - CONTINUE; - END IF; - query_plan := regexp_replace(query_plan, '( )*-> (.*)', '\2', 'g'); - RETURN NEXT; - END LOOP; -END; $$ language plpgsql; - -- helper function that returns true if output of given explain has "is not null" (case in-sensitive) CREATE OR REPLACE FUNCTION explain_has_is_not_null(explain_command text) RETURNS BOOLEAN AS $$ diff --git a/src/test/regress/sql/window_functions.sql b/src/test/regress/sql/window_functions.sql index 77f353efb..de936c95c 100644 --- a/src/test/regress/sql/window_functions.sql +++ b/src/test/regress/sql/window_functions.sql @@ -576,11 +576,9 @@ ORDER BY user_id, avg(value_1) DESC LIMIT 5; -- Grouping can be pushed down with aggregates even when window function can't -SELECT public.plan_without_result_lines($Q$ EXPLAIN (COSTS FALSE) SELECT user_id, count(value_1), stddev(value_1), count(user_id) OVER (PARTITION BY random()) FROM users_table GROUP BY user_id HAVING avg(value_1) > 2 LIMIT 1; -$Q$); -- Window function with inlined CTE WITH cte as (