diff --git a/src/backend/distributed/planner/multi_master_planner.c b/src/backend/distributed/planner/multi_master_planner.c index 6b4be984f..b1cbbc631 100644 --- a/src/backend/distributed/planner/multi_master_planner.c +++ b/src/backend/distributed/planner/multi_master_planner.c @@ -21,6 +21,7 @@ #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "optimizer/clauses.h" +#include "optimizer/cost.h" #include "optimizer/planmain.h" #include "optimizer/tlist.h" #include "optimizer/var.h" @@ -128,13 +129,47 @@ BuildAggregatePlan(Query *masterQuery, Plan *subPlan) /* if we have grouping, then initialize appropriate information */ if (groupColumnCount > 0) { - if (!grouping_is_hashable(groupColumnList)) + bool groupingIsHashable = grouping_is_hashable(groupColumnList); + bool groupingIsSortable = grouping_is_sortable(groupColumnList); + + if (!groupingIsHashable && !groupingIsSortable) { - ereport(ERROR, (errmsg("grouped column list cannot be hashed"))); + ereport(ERROR, (errmsg("grouped column list cannot be hashed or sorted"))); } - /* switch to hashed aggregate strategy to allow grouping */ - aggregateStrategy = AGG_HASHED; + /* + * Postgres hash aggregate strategy does not support distinct aggregates + * in group and order by with aggregate operations. + * see nodeAgg.c:build_pertrans_for_aggref(). In that case we use + * sorted agg strategy, otherwise we use hash strategy. + */ + if (!enable_hashagg || !groupingIsHashable) + { + char *messageHint = NULL; + if (!enable_hashagg && groupingIsHashable) + { + messageHint = "Consider setting enable_hashagg to on."; + } + + if (!groupingIsSortable) + { + ereport(ERROR, (errmsg("grouped column list must cannot be sorted"), + errdetail("Having a distinct aggregate requires " + "grouped column list to be sortable."), + messageHint ? errhint("%s", messageHint) : 0)); + } + + aggregateStrategy = AGG_SORTED; +#if (PG_VERSION_NUM >= 90600) + subPlan = (Plan *) make_sort_from_sortclauses(groupColumnList, subPlan); +#else + subPlan = (Plan *) make_sort_from_sortclauses(NULL, groupColumnList, subPlan); +#endif + } + else + { + aggregateStrategy = AGG_HASHED; + } /* get column indexes that are being grouped */ groupColumnIdArray = extract_grouping_cols(groupColumnList, subPlan->targetlist); diff --git a/src/test/regress/expected/multi_explain.out b/src/test/regress/expected/multi_explain.out index 2d8193b6a..dcb0564ff 100644 --- a/src/test/regress/expected/multi_explain.out +++ b/src/test/regress/expected/multi_explain.out @@ -686,3 +686,23 @@ Custom Scan (Citus Router) -> Seq Scan on explain_table_570001 explain_table Filter: (id = 1) ROLLBACK; +-- Test disable hash aggregate +SET enable_hashagg TO off; +EXPLAIN (COSTS FALSE, FORMAT TEXT) + SELECT l_quantity, count(*) count_quantity FROM lineitem + GROUP BY l_quantity ORDER BY count_quantity, l_quantity; +Sort + Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint), remote_scan.l_quantity + -> GroupAggregate + Group Key: remote_scan.l_quantity + -> Sort + Sort Key: remote_scan.l_quantity + -> Custom Scan (Citus Real-Time) + Task Count: 8 + Tasks Shown: One of 8 + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: l_quantity + -> Seq Scan on lineitem_290001 lineitem +SET enable_hashagg TO on; diff --git a/src/test/regress/expected/multi_explain_0.out b/src/test/regress/expected/multi_explain_0.out index 10798f526..110bf77a3 100644 --- a/src/test/regress/expected/multi_explain_0.out +++ b/src/test/regress/expected/multi_explain_0.out @@ -657,3 +657,23 @@ Custom Scan (Citus Router) -> Seq Scan on explain_table_570001 explain_table Filter: (id = 1) ROLLBACK; +-- Test disable hash aggregate +SET enable_hashagg TO off; +EXPLAIN (COSTS FALSE, FORMAT TEXT) + SELECT l_quantity, count(*) count_quantity FROM lineitem + GROUP BY l_quantity ORDER BY count_quantity, l_quantity; +Sort + Sort Key: COALESCE((pg_catalog.sum((COALESCE((pg_catalog.sum(remote_scan.count_quantity))::bigint, '0'::bigint))))::bigint, '0'::bigint), remote_scan.l_quantity + -> GroupAggregate + Group Key: remote_scan.l_quantity + -> Sort + Sort Key: remote_scan.l_quantity + -> Custom Scan (Citus Real-Time) + Task Count: 8 + Tasks Shown: One of 8 + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: l_quantity + -> Seq Scan on lineitem_290001 lineitem +SET enable_hashagg TO on; diff --git a/src/test/regress/sql/multi_explain.sql b/src/test/regress/sql/multi_explain.sql index e744b1f31..ccbdcc49c 100644 --- a/src/test/regress/sql/multi_explain.sql +++ b/src/test/regress/sql/multi_explain.sql @@ -247,3 +247,11 @@ ALTER TABLE explain_table ADD COLUMN value int; EXPLAIN (COSTS FALSE) SELECT value FROM explain_table WHERE id = 1; ROLLBACK; + +-- Test disable hash aggregate +SET enable_hashagg TO off; +EXPLAIN (COSTS FALSE, FORMAT TEXT) + SELECT l_quantity, count(*) count_quantity FROM lineitem + GROUP BY l_quantity ORDER BY count_quantity, l_quantity; + +SET enable_hashagg TO on;