From 4860e11561dc142bc39ca48a08bcdcc6d993a788 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Mon, 6 Apr 2020 18:51:30 +0000 Subject: [PATCH] Duplicate grouping on worker whenever possible This is possible whenever we aren't pulling up intermediate rows We want to do this because this was done in 9.2, some queries rely on the performance of grouping causing distinct values This change was introduced when implementing window functions on coordinator --- .../planner/multi_logical_optimizer.c | 45 +------------------ src/test/regress/expected/multi_explain.out | 5 ++- src/test/regress/expected/multi_view.out | 26 ++++++----- 3 files changed, 19 insertions(+), 57 deletions(-) diff --git a/src/backend/distributed/planner/multi_logical_optimizer.c b/src/backend/distributed/planner/multi_logical_optimizer.c index d1c3addaf..8264a7b8b 100644 --- a/src/backend/distributed/planner/multi_logical_optimizer.c +++ b/src/backend/distributed/planner/multi_logical_optimizer.c @@ -328,9 +328,6 @@ static bool HasOrderByAggregate(List *sortClauseList, List *targetList); static bool HasOrderByNonCommutativeAggregate(List *sortClauseList, List *targetList); static bool HasOrderByComplexExpression(List *sortClauseList, List *targetList); static bool HasOrderByHllType(List *sortClauseList, List *targetList); -static bool ShouldPushDownGroupingToWorker(MultiExtendedOp *opNode, - ExtendedOpNodeProperties * - extendedOpNodeProperties); static bool ShouldProcessDistinctOrderAndLimitForWorker( ExtendedOpNodeProperties *extendedOpNodeProperties, bool pushingDownOriginalGrouping, @@ -2223,7 +2220,7 @@ WorkerExtendedOpNode(MultiExtendedOp *originalOpNode, /* targetProjectionNumber starts from 1 */ queryTargetList.targetProjectionNumber = 1; - if (ShouldPushDownGroupingToWorker(originalOpNode, extendedOpNodeProperties)) + if (!extendedOpNodeProperties->pullUpIntermediateRows) { queryGroupClause.groupClauseList = copyObject(originalGroupClauseList); } @@ -4717,46 +4714,6 @@ HasOrderByHllType(List *sortClauseList, List *targetList) } -/* - * ShouldPushDownGroupingToWorker returns whether we push down GROUP BY. - * This may return true even when GROUP BY is necessary on master. - */ -static bool -ShouldPushDownGroupingToWorker(MultiExtendedOp *opNode, - ExtendedOpNodeProperties *extendedOpNodeProperties) -{ - if (extendedOpNodeProperties->pushDownGroupingAndHaving) - { - return true; - } - - if (extendedOpNodeProperties->pullUpIntermediateRows) - { - return false; - } - - /* - * Duplicate grouping if we have LIMIT without HAVING, as this can - * often result in LIMIT being pushed down. - */ - if (opNode->havingQual == NULL && opNode->limitCount != NULL) - { - return true; - } - - /* - * If aggregates are being split across worker & master, so must grouping. - */ - if (contain_aggs_of_level(opNode->havingQual, 0) || - contain_aggs_of_level((Node *) opNode->targetList, 0)) - { - return true; - } - - return false; -} - - /* * ShouldProcessDistinctOrderAndLimitForWorker returns whether * ProcessDistinctClauseForWorkerQuery should be called. If not, diff --git a/src/test/regress/expected/multi_explain.out b/src/test/regress/expected/multi_explain.out index 48ef0d0f9..7ca2f509f 100644 --- a/src/test/regress/expected/multi_explain.out +++ b/src/test/regress/expected/multi_explain.out @@ -453,8 +453,11 @@ HashAggregate Tasks Shown: One of 2 -> Task Node: host=localhost port=xxxxx dbname=regression - -> Seq Scan on public.lineitem_290000 lineitem + -> HashAggregate Output: l_quantity, l_quantity + Group Key: lineitem.l_quantity + -> Seq Scan on public.lineitem_290000 lineitem + Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment -- Subquery pushdown tests with explain EXPLAIN (COSTS OFF) SELECT diff --git a/src/test/regress/expected/multi_view.out b/src/test/regress/expected/multi_view.out index 5edd3a257..82e20b8f3 100644 --- a/src/test/regress/expected/multi_view.out +++ b/src/test/regress/expected/multi_view.out @@ -747,7 +747,7 @@ RESET citus.subquery_pushdown; VACUUM ANALYZE users_table; -- explain tests EXPLAIN (COSTS FALSE) SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER BY 1; - QUERY PLAN + QUERY PLAN --------------------------------------------------------------------- Sort Sort Key: remote_scan.user_id @@ -758,17 +758,19 @@ EXPLAIN (COSTS FALSE) SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER Tasks Shown: One of 4 -> Task Node: host=localhost port=xxxxx dbname=regression - -> Nested Loop - Join Filter: (users_table.user_id = users_table_1.user_id) - -> Sort - Sort Key: (max(users_table_1."time")) DESC - -> HashAggregate - Group Key: users_table_1.user_id - Filter: (max(users_table_1."time") > '2017-11-23 16:20:33.264457'::timestamp without time zone) - -> Seq Scan on users_table_1400256 users_table_1 - -> Seq Scan on users_table_1400256 users_table - Filter: ((value_1 >= 1) AND (value_1 < 3)) -(19 rows) + -> HashAggregate + Group Key: users_table.user_id + -> Nested Loop + Join Filter: (users_table.user_id = users_table_1.user_id) + -> Sort + Sort Key: (max(users_table_1."time")) DESC + -> HashAggregate + Group Key: users_table_1.user_id + Filter: (max(users_table_1."time") > '2017-11-23 16:20:33.264457'::timestamp without time zone) + -> Seq Scan on users_table_1400256 users_table_1 + -> Seq Scan on users_table_1400256 users_table + Filter: ((value_1 >= 1) AND (value_1 < 3)) +(21 rows) EXPLAIN (COSTS FALSE) SELECT * FROM (