diff --git a/src/backend/distributed/planner/multi_master_planner.c b/src/backend/distributed/planner/multi_master_planner.c index 79c8016de..0d63a44ba 100644 --- a/src/backend/distributed/planner/multi_master_planner.c +++ b/src/backend/distributed/planner/multi_master_planner.c @@ -21,6 +21,7 @@ #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "optimizer/clauses.h" +#include "optimizer/cost.h" #include "optimizer/planmain.h" #include "optimizer/tlist.h" #include "optimizer/var.h" @@ -289,7 +290,7 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan) if (groupColumnCount > 0) { bool groupingIsHashable = grouping_is_hashable(groupColumnList); - bool groupingIsSortable = grouping_is_hashable(groupColumnList); + bool groupingIsSortable = grouping_is_sortable(groupColumnList); bool hasDistinctAggregate = HasDistinctAggregate(masterQuery); if (!groupingIsHashable && !groupingIsSortable) @@ -303,13 +304,20 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan) * see nodeAgg.c:build_pertrans_for_aggref(). In that case we use * sorted agg strategy, otherwise we use hash strategy. */ - if (!groupingIsHashable || hasDistinctAggregate) + if (!enable_hashagg || !groupingIsHashable || hasDistinctAggregate) { + char *messageHint = NULL; + if (!enable_hashagg && groupingIsHashable) + { + messageHint = "Consider setting enable_hashagg to on."; + } + if (!groupingIsSortable) { ereport(ERROR, (errmsg("grouped column list must cannot be sorted"), errdetail("Having a distinct aggregate requires " - "grouped column list to be sortable."))); + "grouped column list to be sortable."), + messageHint ? errhint("%s", messageHint) : 0)); } aggregateStrategy = AGG_SORTED; @@ -418,7 +426,8 @@ BuildDistinctPlan(Query *masterQuery, Plan *subPlan) */ distinctClausesHashable = grouping_is_hashable(distinctClauseList); hasDistinctAggregate = HasDistinctAggregate(masterQuery); - if (distinctClausesHashable && !hasDistinctAggregate) + + if (enable_hashagg && distinctClausesHashable && !hasDistinctAggregate) { const long rowEstimate = 10; /* using the same value as BuildAggregatePlan() */ AttrNumber *distinctColumnIdArray = extract_grouping_cols(distinctClauseList, diff --git a/src/test/regress/expected/multi_explain.out b/src/test/regress/expected/multi_explain.out index 887b3ed19..f603b1c88 100644 --- a/src/test/regress/expected/multi_explain.out +++ b/src/test/regress/expected/multi_explain.out @@ -54,6 +54,26 @@ Sort -> HashAggregate Group Key: l_quantity -> Seq Scan on lineitem_290001 lineitem +-- Test disable hash aggregate +SET enable_hashagg TO off; +EXPLAIN (COSTS FALSE, FORMAT TEXT) + SELECT l_quantity, count(*) count_quantity FROM lineitem + GROUP BY l_quantity ORDER BY count_quantity, l_quantity; +Sort + Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint), remote_scan.l_quantity + -> GroupAggregate + Group Key: remote_scan.l_quantity + -> Sort + Sort Key: remote_scan.l_quantity + -> Custom Scan (Citus Real-Time) + Task Count: 8 + Tasks Shown: One of 8 + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: l_quantity + -> Seq Scan on lineitem_290001 lineitem +SET enable_hashagg TO on; -- Test JSON format EXPLAIN (COSTS FALSE, FORMAT JSON) SELECT l_quantity, count(*) count_quantity FROM lineitem @@ -1128,7 +1148,7 @@ SELECT l_orderkey FROM series JOIN keys ON (s = l_orderkey) ORDER BY s; Custom Scan (Citus Router) Output: remote_scan.l_orderkey - -> Distributed Subplan 54_1 + -> Distributed Subplan 55_1 -> HashAggregate Output: remote_scan.l_orderkey Group Key: remote_scan.l_orderkey @@ -1143,7 +1163,7 @@ Custom Scan (Citus Router) Group Key: lineitem_hash_part.l_orderkey -> Seq Scan on public.lineitem_hash_part_360038 lineitem_hash_part Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment - -> Distributed Subplan 54_2 + -> Distributed Subplan 55_2 -> Function Scan on pg_catalog.generate_series s Output: s Function Call: generate_series(1, 10) @@ -1159,13 +1179,13 @@ Custom Scan (Citus Router) Sort Key: intermediate_result.s -> Function Scan on pg_catalog.read_intermediate_result intermediate_result Output: intermediate_result.s - Function Call: read_intermediate_result('54_2'::text, 'binary'::citus_copy_format) + Function Call: read_intermediate_result('55_2'::text, 'binary'::citus_copy_format) -> Sort Output: intermediate_result_1.l_orderkey Sort Key: intermediate_result_1.l_orderkey -> Function Scan on pg_catalog.read_intermediate_result intermediate_result_1 Output: intermediate_result_1.l_orderkey - Function Call: read_intermediate_result('54_1'::text, 'binary'::citus_copy_format) + Function Call: read_intermediate_result('55_1'::text, 'binary'::citus_copy_format) SELECT true AS valid FROM explain_json($$ WITH result AS ( SELECT l_quantity, count(*) count_quantity FROM lineitem diff --git a/src/test/regress/expected/multi_explain_0.out b/src/test/regress/expected/multi_explain_0.out index 18061e8fd..1cab9ff0f 100644 --- a/src/test/regress/expected/multi_explain_0.out +++ b/src/test/regress/expected/multi_explain_0.out @@ -54,6 +54,26 @@ Sort -> HashAggregate Group Key: l_quantity -> Seq Scan on lineitem_290001 lineitem +-- Test disable hash aggregate +SET enable_hashagg TO off; +EXPLAIN (COSTS FALSE, FORMAT TEXT) + SELECT l_quantity, count(*) count_quantity FROM lineitem + GROUP BY l_quantity ORDER BY count_quantity, l_quantity; +Sort + Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint), remote_scan.l_quantity + -> GroupAggregate + Group Key: remote_scan.l_quantity + -> Sort + Sort Key: remote_scan.l_quantity + -> Custom Scan (Citus Real-Time) + Task Count: 8 + Tasks Shown: One of 8 + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: l_quantity + -> Seq Scan on lineitem_290001 lineitem +SET enable_hashagg TO on; -- Test JSON format EXPLAIN (COSTS FALSE, FORMAT JSON) SELECT l_quantity, count(*) count_quantity FROM lineitem @@ -1128,7 +1148,7 @@ SELECT l_orderkey FROM series JOIN keys ON (s = l_orderkey) ORDER BY s; Custom Scan (Citus Router) Output: remote_scan.l_orderkey - -> Distributed Subplan 54_1 + -> Distributed Subplan 55_1 -> HashAggregate Output: remote_scan.l_orderkey Group Key: remote_scan.l_orderkey @@ -1143,7 +1163,7 @@ Custom Scan (Citus Router) Group Key: lineitem_hash_part.l_orderkey -> Seq Scan on public.lineitem_hash_part_360038 lineitem_hash_part Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment - -> Distributed Subplan 54_2 + -> Distributed Subplan 55_2 -> Function Scan on pg_catalog.generate_series s Output: s Function Call: generate_series(1, 10) @@ -1159,13 +1179,13 @@ Custom Scan (Citus Router) Sort Key: intermediate_result.s -> Function Scan on pg_catalog.read_intermediate_result intermediate_result Output: intermediate_result.s - Function Call: read_intermediate_result('54_2'::text, 'binary'::citus_copy_format) + Function Call: read_intermediate_result('55_2'::text, 'binary'::citus_copy_format) -> Sort Output: intermediate_result_1.l_orderkey Sort Key: intermediate_result_1.l_orderkey -> Function Scan on pg_catalog.read_intermediate_result intermediate_result_1 Output: intermediate_result_1.l_orderkey - Function Call: read_intermediate_result('54_1'::text, 'binary'::citus_copy_format) + Function Call: read_intermediate_result('55_1'::text, 'binary'::citus_copy_format) SELECT true AS valid FROM explain_json($$ WITH result AS ( SELECT l_quantity, count(*) count_quantity FROM lineitem diff --git a/src/test/regress/expected/multi_select_distinct.out b/src/test/regress/expected/multi_select_distinct.out index ddb4ae56e..51dda9108 100644 --- a/src/test/regress/expected/multi_select_distinct.out +++ b/src/test/regress/expected/multi_select_distinct.out @@ -215,6 +215,37 @@ EXPLAIN (COSTS FALSE) Filter: (l_orderkey < 200) (15 rows) +-- check the plan if the hash aggreate is disabled +SET enable_hashagg TO off; +EXPLAIN (COSTS FALSE) + SELECT DISTINCT l_orderkey, count(*) + FROM lineitem_hash_part + WHERE l_orderkey < 200 + GROUP BY 1 + HAVING count(*) > 5 + ORDER BY 2 DESC, 1; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------- + Sort + Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))))::bigint, '0'::bigint) DESC, remote_scan.l_orderkey + -> GroupAggregate + Group Key: remote_scan.l_orderkey + Filter: (COALESCE((pg_catalog.sum(remote_scan.worker_column_3))::bigint, '0'::bigint) > 5) + -> Sort + Sort Key: remote_scan.l_orderkey + -> Custom Scan (Citus Real-Time) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: l_orderkey + Filter: (count(*) > 5) + -> Seq Scan on lineitem_hash_part_360038 lineitem_hash_part + Filter: (l_orderkey < 200) +(17 rows) + +SET enable_hashagg TO on; -- distinct on non-partition column with aggregate -- this is the same as non-distinct version due to group by SELECT DISTINCT l_partkey, count(*) diff --git a/src/test/regress/sql/multi_explain.sql b/src/test/regress/sql/multi_explain.sql index 43693adb4..b0b103e1c 100644 --- a/src/test/regress/sql/multi_explain.sql +++ b/src/test/regress/sql/multi_explain.sql @@ -46,6 +46,14 @@ EXPLAIN (COSTS FALSE, FORMAT TEXT) SELECT l_quantity, count(*) count_quantity FROM lineitem GROUP BY l_quantity ORDER BY count_quantity, l_quantity; +-- Test disable hash aggregate +SET enable_hashagg TO off; +EXPLAIN (COSTS FALSE, FORMAT TEXT) + SELECT l_quantity, count(*) count_quantity FROM lineitem + GROUP BY l_quantity ORDER BY count_quantity, l_quantity; + +SET enable_hashagg TO on; + -- Test JSON format EXPLAIN (COSTS FALSE, FORMAT JSON) SELECT l_quantity, count(*) count_quantity FROM lineitem diff --git a/src/test/regress/sql/multi_select_distinct.sql b/src/test/regress/sql/multi_select_distinct.sql index eb01eceac..65b88c0ec 100644 --- a/src/test/regress/sql/multi_select_distinct.sql +++ b/src/test/regress/sql/multi_select_distinct.sql @@ -66,6 +66,18 @@ EXPLAIN (COSTS FALSE) HAVING count(*) > 5 ORDER BY 2 DESC, 1; +-- check the plan if the hash aggreate is disabled +SET enable_hashagg TO off; +EXPLAIN (COSTS FALSE) + SELECT DISTINCT l_orderkey, count(*) + FROM lineitem_hash_part + WHERE l_orderkey < 200 + GROUP BY 1 + HAVING count(*) > 5 + ORDER BY 2 DESC, 1; + +SET enable_hashagg TO on; + -- distinct on non-partition column with aggregate -- this is the same as non-distinct version due to group by SELECT DISTINCT l_partkey, count(*)