From 0db413491c88f2c22b930a5d9890bdc9c68aba4c Mon Sep 17 00:00:00 2001 From: Murat Tuncer Date: Tue, 7 Jun 2016 12:24:00 +0300 Subject: [PATCH] Fix crash in count distinct with filters in repartition subqueries now copies all column references in count distinct aggreagete to worker target list and group by. Master target list is also updated to reflect changes in attribute order. Fixes 569 --- .../planner/multi_logical_optimizer.c | 18 ++----- .../input/multi_complex_count_distinct.source | 28 ++++++++++ .../multi_complex_count_distinct.source | 54 +++++++++++++++++++ 3 files changed, 85 insertions(+), 15 deletions(-) diff --git a/src/backend/distributed/planner/multi_logical_optimizer.c b/src/backend/distributed/planner/multi_logical_optimizer.c index 070d8dda9..a4191184a 100644 --- a/src/backend/distributed/planner/multi_logical_optimizer.c +++ b/src/backend/distributed/planner/multi_logical_optimizer.c @@ -1360,9 +1360,7 @@ MasterAggregateExpression(Aggref *originalAggregate, walkerContext->repartitionSubquery) { Aggref *aggregate = (Aggref *) copyObject(originalAggregate); - List *aggTargetEntryList = aggregate->args; - TargetEntry *distinctTargetEntry = linitial(aggTargetEntryList); - List *varList = pull_var_clause_default((Node *) distinctTargetEntry->expr); + List *varList = pull_var_clause_default((Node *) aggregate); ListCell *varCell = NULL; List *uniqueVarList = NIL; int startColumnCount = walkerContext->columnId; @@ -1878,22 +1876,12 @@ WorkerAggregateExpressionList(Aggref *originalAggregate, walkerContext->repartitionSubquery) { Aggref *aggregate = (Aggref *) copyObject(originalAggregate); - List *aggTargetEntryList = aggregate->args; - TargetEntry *distinctTargetEntry = (TargetEntry *) linitial(aggTargetEntryList); - List *columnList = pull_var_clause_default((Node *) distinctTargetEntry); + List *columnList = pull_var_clause_default((Node *) aggregate); ListCell *columnCell = NULL; - List *processedColumnList = NIL; - foreach(columnCell, columnList) { Var *column = (Var *) lfirst(columnCell); - if (list_member(processedColumnList, column)) - { - continue; - } - - processedColumnList = lappend(processedColumnList, column); - workerAggregateList = lappend(workerAggregateList, copyObject(column)); + workerAggregateList = list_append_unique(workerAggregateList, column); } walkerContext->createGroupByClause = true; diff --git a/src/test/regress/input/multi_complex_count_distinct.source b/src/test/regress/input/multi_complex_count_distinct.source index 7620e45a2..edb6c68e4 100644 --- a/src/test/regress/input/multi_complex_count_distinct.source +++ b/src/test/regress/input/multi_complex_count_distinct.source @@ -76,6 +76,34 @@ SELECT * ORDER BY 2 DESC, 1 DESC LIMIT 10; +-- there is a known issue with aggregates with filters in non-repartition queries (#395) +SELECT + l_orderkey, count(DISTINCT l_partkey) FILTER (WHERE l_shipmode = 'AIR') + FROM lineitem_hash + GROUP BY l_orderkey + ORDER BY 2 DESC, 1 DESC + LIMIT 10; + +-- filter column already exists in target list +SELECT * + FROM ( + SELECT + l_orderkey, count(DISTINCT l_partkey) FILTER (WHERE l_orderkey > 100) + FROM lineitem_hash + GROUP BY l_orderkey) sub + ORDER BY 2 DESC, 1 DESC + LIMIT 10; + +-- filter column does not exist in target list +SELECT * + FROM ( + SELECT + l_orderkey, count(DISTINCT l_partkey) FILTER (WHERE l_shipmode = 'AIR') + FROM lineitem_hash + GROUP BY l_orderkey) sub + ORDER BY 2 DESC, 1 DESC + LIMIT 10; + -- case expr in count distinct is supported. -- count orders partkeys if l_shipmode is air SELECT * diff --git a/src/test/regress/output/multi_complex_count_distinct.source b/src/test/regress/output/multi_complex_count_distinct.source index b4c6b3a04..08ab725da 100644 --- a/src/test/regress/output/multi_complex_count_distinct.source +++ b/src/test/regress/output/multi_complex_count_distinct.source @@ -122,6 +122,60 @@ SELECT * 1927 | 3 (10 rows) +-- there is a known issue with aggregates with filters in non-repartition queries (#395) +SELECT + l_orderkey, count(DISTINCT l_partkey) FILTER (WHERE l_shipmode = 'AIR') + FROM lineitem_hash + GROUP BY l_orderkey + ORDER BY 2 DESC, 1 DESC + LIMIT 10; +ERROR: attribute number 15 exceeds number of columns 2 +-- filter column already exists in target list +SELECT * + FROM ( + SELECT + l_orderkey, count(DISTINCT l_partkey) FILTER (WHERE l_orderkey > 100) + FROM lineitem_hash + GROUP BY l_orderkey) sub + ORDER BY 2 DESC, 1 DESC + LIMIT 10; + l_orderkey | count +------------+------- + 14885 | 7 + 14884 | 7 + 14821 | 7 + 14790 | 7 + 14785 | 7 + 14755 | 7 + 14725 | 7 + 14694 | 7 + 14627 | 7 + 14624 | 7 +(10 rows) + +-- filter column does not exist in target list +SELECT * + FROM ( + SELECT + l_orderkey, count(DISTINCT l_partkey) FILTER (WHERE l_shipmode = 'AIR') + FROM lineitem_hash + GROUP BY l_orderkey) sub + ORDER BY 2 DESC, 1 DESC + LIMIT 10; + l_orderkey | count +------------+------- + 12005 | 4 + 5409 | 4 + 4964 | 4 + 14848 | 3 + 14496 | 3 + 13473 | 3 + 13122 | 3 + 12929 | 3 + 12645 | 3 + 12417 | 3 +(10 rows) + -- case expr in count distinct is supported. -- count orders partkeys if l_shipmode is air SELECT *