Duplicate grouping on worker whenever possible

This is possible whenever we aren't pulling up intermediate rows

We want to do this because this was done in 9.2,
some queries rely on the performance of grouping causing distinct values

This change was introduced when implementing window functions on coordinator
pull/3722/head
Philip Dubé 2020-04-06 18:51:30 +00:00
parent 6a6d5af8a3
commit 4860e11561
3 changed files with 19 additions and 57 deletions

View File

@ -328,9 +328,6 @@ static bool HasOrderByAggregate(List *sortClauseList, List *targetList);
static bool HasOrderByNonCommutativeAggregate(List *sortClauseList, List *targetList); static bool HasOrderByNonCommutativeAggregate(List *sortClauseList, List *targetList);
static bool HasOrderByComplexExpression(List *sortClauseList, List *targetList); static bool HasOrderByComplexExpression(List *sortClauseList, List *targetList);
static bool HasOrderByHllType(List *sortClauseList, List *targetList); static bool HasOrderByHllType(List *sortClauseList, List *targetList);
static bool ShouldPushDownGroupingToWorker(MultiExtendedOp *opNode,
ExtendedOpNodeProperties *
extendedOpNodeProperties);
static bool ShouldProcessDistinctOrderAndLimitForWorker( static bool ShouldProcessDistinctOrderAndLimitForWorker(
ExtendedOpNodeProperties *extendedOpNodeProperties, ExtendedOpNodeProperties *extendedOpNodeProperties,
bool pushingDownOriginalGrouping, bool pushingDownOriginalGrouping,
@ -2223,7 +2220,7 @@ WorkerExtendedOpNode(MultiExtendedOp *originalOpNode,
/* targetProjectionNumber starts from 1 */ /* targetProjectionNumber starts from 1 */
queryTargetList.targetProjectionNumber = 1; queryTargetList.targetProjectionNumber = 1;
if (ShouldPushDownGroupingToWorker(originalOpNode, extendedOpNodeProperties)) if (!extendedOpNodeProperties->pullUpIntermediateRows)
{ {
queryGroupClause.groupClauseList = copyObject(originalGroupClauseList); queryGroupClause.groupClauseList = copyObject(originalGroupClauseList);
} }
@ -4717,46 +4714,6 @@ HasOrderByHllType(List *sortClauseList, List *targetList)
} }
/*
* ShouldPushDownGroupingToWorker returns whether we push down GROUP BY.
* This may return true even when GROUP BY is necessary on master.
*/
static bool
ShouldPushDownGroupingToWorker(MultiExtendedOp *opNode,
ExtendedOpNodeProperties *extendedOpNodeProperties)
{
if (extendedOpNodeProperties->pushDownGroupingAndHaving)
{
return true;
}
if (extendedOpNodeProperties->pullUpIntermediateRows)
{
return false;
}
/*
* Duplicate grouping if we have LIMIT without HAVING, as this can
* often result in LIMIT being pushed down.
*/
if (opNode->havingQual == NULL && opNode->limitCount != NULL)
{
return true;
}
/*
* If aggregates are being split across worker & master, so must grouping.
*/
if (contain_aggs_of_level(opNode->havingQual, 0) ||
contain_aggs_of_level((Node *) opNode->targetList, 0))
{
return true;
}
return false;
}
/* /*
* ShouldProcessDistinctOrderAndLimitForWorker returns whether * ShouldProcessDistinctOrderAndLimitForWorker returns whether
* ProcessDistinctClauseForWorkerQuery should be called. If not, * ProcessDistinctClauseForWorkerQuery should be called. If not,

View File

@ -453,8 +453,11 @@ HashAggregate
Tasks Shown: One of 2 Tasks Shown: One of 2
-> Task -> Task
Node: host=localhost port=xxxxx dbname=regression Node: host=localhost port=xxxxx dbname=regression
-> Seq Scan on public.lineitem_290000 lineitem -> HashAggregate
Output: l_quantity, l_quantity Output: l_quantity, l_quantity
Group Key: lineitem.l_quantity
-> Seq Scan on public.lineitem_290000 lineitem
Output: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment
-- Subquery pushdown tests with explain -- Subquery pushdown tests with explain
EXPLAIN (COSTS OFF) EXPLAIN (COSTS OFF)
SELECT SELECT

View File

@ -758,6 +758,8 @@ EXPLAIN (COSTS FALSE) SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER
Tasks Shown: One of 4 Tasks Shown: One of 4
-> Task -> Task
Node: host=localhost port=xxxxx dbname=regression Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Group Key: users_table.user_id
-> Nested Loop -> Nested Loop
Join Filter: (users_table.user_id = users_table_1.user_id) Join Filter: (users_table.user_id = users_table_1.user_id)
-> Sort -> Sort
@ -768,7 +770,7 @@ EXPLAIN (COSTS FALSE) SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER
-> Seq Scan on users_table_1400256 users_table_1 -> Seq Scan on users_table_1400256 users_table_1
-> Seq Scan on users_table_1400256 users_table -> Seq Scan on users_table_1400256 users_table
Filter: ((value_1 >= 1) AND (value_1 < 3)) Filter: ((value_1 >= 1) AND (value_1 < 3))
(19 rows) (21 rows)
EXPLAIN (COSTS FALSE) SELECT * EXPLAIN (COSTS FALSE) SELECT *
FROM ( FROM (