From 86b2d9420c9116c21d021b249397bc008c97f031 Mon Sep 17 00:00:00 2001 From: Onder Kalaci Date: Mon, 4 Dec 2017 17:09:57 +0200 Subject: [PATCH] Treat recurring tuples as reference table for GROUP BY checks read_intermediate_results() and immutable functions are implemented. Empty join trees seems not applicable here. --- .../planner/multi_logical_planner.c | 72 ++++--- ...ulti_subquery_complex_reference_clause.out | 180 ++++++++++++++++++ ...ulti_subquery_complex_reference_clause.sql | 136 +++++++++++++ 3 files changed, 358 insertions(+), 30 deletions(-) diff --git a/src/backend/distributed/planner/multi_logical_planner.c b/src/backend/distributed/planner/multi_logical_planner.c index faf6e7a68..853c03930 100644 --- a/src/backend/distributed/planner/multi_logical_planner.c +++ b/src/backend/distributed/planner/multi_logical_planner.c @@ -109,7 +109,8 @@ static bool ExtractSetOperationStatmentWalker(Node *node, List **setOperationLis static DeferredErrorMessage * DeferErrorIfUnsupportedTableCombination(Query *queryTree); static bool WindowPartitionOnDistributionColumn(Query *query); static bool AllTargetExpressionsAreColumnReferences(List *targetEntryList); -static bool RangeTableListContainsOnlyReferenceTables(List *rangeTableList); +static bool FindNodeCheckInRangeTableList(List *rtable, bool (*check)(Node *)); +static bool IsDistributedTableRTE(Node *node); static FieldSelect * CompositeFieldRecursive(Expr *expression, Query *query); static bool FullCompositeFieldList(List *compositeFieldList); static MultiNode * MultiNodeTree(Query *queryTree); @@ -1510,12 +1511,12 @@ TargetListOnPartitionColumn(Query *query, List *targetEntryList) /* * We could still behave as if the target list is on partition column if - * all range table entries are reference tables and all target expressions - * are column references to the given query level. + * all range table entries are reference tables or intermediate results, + * and all target expressions are column references to the given query level. */ if (!targetListOnPartitionColumn) { - if (RangeTableListContainsOnlyReferenceTables(query->rtable) && + if (!FindNodeCheckInRangeTableList(query->rtable, IsDistributedTableRTE) && AllTargetExpressionsAreColumnReferences(targetEntryList)) { targetListOnPartitionColumn = true; @@ -1580,40 +1581,51 @@ AllTargetExpressionsAreColumnReferences(List *targetEntryList) /* - * RangeTableListContainsOnlyReferenceTables returns true if all range table - * entries are reference tables. + * FindNodeCheckInRangeTableList finds a node for which the check + * function returns true. * - * The function returns false for range table entries that are not relations. - * - * Note that the function doesn't recurse into subqueries, returns false when - * a subquery is found. + * FindNodeCheckInRangeTableList relies on FindNodeCheck() but only + * considers the range table entries. */ static bool -RangeTableListContainsOnlyReferenceTables(List *rangeTableList) +FindNodeCheckInRangeTableList(List *rtable, bool (*check)(Node *)) { - ListCell *rangeTableCell = NULL; - foreach(rangeTableCell, rangeTableList) + return range_table_walker(rtable, FindNodeCheck, check, QTW_EXAMINE_RTES); +} + + +/* + * IsDistributedTableRTE gets a node and returns true if the node + * is a range table relation entry that points to a distributed + * relation (i.e., excluding reference tables). + */ +static bool +IsDistributedTableRTE(Node *node) +{ + RangeTblEntry *rangeTableEntry = NULL; + Oid relationId = InvalidOid; + + if (node == NULL) { - RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(rangeTableCell); + return false; + } - if (rangeTableEntry->rtekind == RTE_RELATION) - { - Oid relationId = rangeTableEntry->relid; + if (!IsA(node, RangeTblEntry)) + { + return false; + } - if (!IsDistributedTable(relationId)) - { - return false; - } + rangeTableEntry = (RangeTblEntry *) node; + if (rangeTableEntry->rtekind != RTE_RELATION) + { + return false; + } - if (PartitionMethod(relationId) != DISTRIBUTE_BY_NONE) - { - return false; - } - } - else - { - return false; - } + relationId = rangeTableEntry->relid; + if (!IsDistributedTable(relationId) || + PartitionMethod(relationId) == DISTRIBUTE_BY_NONE) + { + return false; } return true; diff --git a/src/test/regress/expected/multi_subquery_complex_reference_clause.out b/src/test/regress/expected/multi_subquery_complex_reference_clause.out index e849083af..97dd96cba 100644 --- a/src/test/regress/expected/multi_subquery_complex_reference_clause.out +++ b/src/test/regress/expected/multi_subquery_complex_reference_clause.out @@ -1361,6 +1361,186 @@ ORDER BY 1 DESC LIMIT 4; ERROR: cannot push down this subquery DETAIL: Distinct on columns without partition column is currently unsupported +-- test the read_intermediate_result() for GROUP BYs +BEGIN; + +SELECT broadcast_intermediate_result('squares', 'SELECT s, s*s FROM generate_series(1,200) s'); + broadcast_intermediate_result +------------------------------- + 200 +(1 row) + +-- single appereance of read_intermediate_result +SELECT + DISTINCT user_id +FROM + users_table +JOIN +(SELECT + max(res.val) as mx +FROM + read_intermediate_result('squares', 'binary') AS res (val int, val_square int) +GROUP BY res.val_square) squares + ON (mx = user_id) +ORDER BY 1 +LIMIT 5; + user_id +--------- + 1 + 2 + 3 + 4 + 5 +(5 rows) + +-- similar to the above, with DISTINCT on intermediate result +SELECT DISTINCT user_id +FROM users_table +JOIN + (SELECT DISTINCT res.val AS mx + FROM read_intermediate_result('squares', 'binary') AS res (val int, val_square int)) squares ON (mx = user_id) +ORDER BY 1 +LIMIT 5; + user_id +--------- + 1 + 2 + 3 + 4 + 5 +(5 rows) + +-- single appereance of read_intermediate_result but inside a subquery +SELECT + DISTINCT user_id +FROM + users_table +JOIN ( + SELECT *,random() FROM (SELECT + max(res.val) as mx + FROM + (SELECT val, val_square FROM read_intermediate_result('squares', 'binary') AS res (val int, val_square int)) res + GROUP BY res.val_square) foo) +squares + ON (mx = user_id) +ORDER BY 1 +LIMIT 5; + user_id +--------- + 1 + 2 + 3 + 4 + 5 +(5 rows) + +-- multiple read_intermediate_results in the same subquery is OK +SELECT + DISTINCT user_id +FROM + users_table +JOIN +(SELECT + max(res.val) as mx +FROM + read_intermediate_result('squares', 'binary') AS res (val int, val_square int), + read_intermediate_result('squares', 'binary') AS res2 (val int, val_square int) +WHERE res.val = res2.val_square +GROUP BY res2.val_square) squares + ON (mx = user_id) +ORDER BY 1 +LIMIT 5; + user_id +--------- + 1 + 4 +(2 rows) + +-- mixed recurring tuples should be supported +SELECT + DISTINCT user_id +FROM + users_table +JOIN +(SELECT + max(res.val) as mx +FROM + read_intermediate_result('squares', 'binary') AS res (val int, val_square int), + generate_series(0, 10) i + WHERE + res.val = i + GROUP BY + i) squares + ON (mx = user_id) +ORDER BY 1 +LIMIT 5; + user_id +--------- + 1 + 2 + 3 + 4 + 5 +(5 rows) + +-- should error out since there is a distributed table and +-- there are no columns on the GROUP BY from the distributed table +SELECT + DISTINCT user_id +FROM + users_reference_table +JOIN + (SELECT + max(val_square) as mx + FROM + read_intermediate_result('squares', 'binary') AS res (val int, val_square int), events_table + WHERE + events_table.user_id = res.val GROUP BY res.val) squares + ON (mx = user_id) +ORDER BY 1 +LIMIT 5; +ERROR: cannot push down this subquery +DETAIL: Group by list without partition column is currently unsupported +ROLLBACK; +-- should work since we're using an immutable function as recurring tuple +SELECT + DISTINCT user_id +FROM + users_table +JOIN +(SELECT + max(i+5)as mx +FROM + generate_series(0, 10) as i GROUP BY i) squares + ON (mx = user_id) +ORDER BY 1 +LIMIT 5; + user_id +--------- + 5 + 6 +(2 rows) + +-- should not work since we're +-- using an immutable function as recurring tuple +-- along with a distributed table, where GROUP BY is +-- on the recurring tuple +SELECT + DISTINCT user_id +FROM + users_reference_table +JOIN + (SELECT + max(i+5)as mx + FROM + generate_series(0, 10) as i, events_table + WHERE + events_table.user_id = i GROUP BY i) squares + ON (mx = user_id) +ORDER BY 1 +LIMIT 5; +ERROR: cannot push down this subquery +DETAIL: Group by list without partition column is currently unsupported DROP TABLE user_buy_test_table; DROP TABLE users_ref_test_table; DROP TABLE users_return_test_table; diff --git a/src/test/regress/sql/multi_subquery_complex_reference_clause.sql b/src/test/regress/sql/multi_subquery_complex_reference_clause.sql index 9ddcce691..c7949d742 100644 --- a/src/test/regress/sql/multi_subquery_complex_reference_clause.sql +++ b/src/test/regress/sql/multi_subquery_complex_reference_clause.sql @@ -1089,6 +1089,142 @@ SELECT * FROM ORDER BY 1 DESC LIMIT 4; + + +-- test the read_intermediate_result() for GROUP BYs +BEGIN; + +SELECT broadcast_intermediate_result('squares', 'SELECT s, s*s FROM generate_series(1,200) s'); + +-- single appereance of read_intermediate_result +SELECT + DISTINCT user_id +FROM + users_table +JOIN +(SELECT + max(res.val) as mx +FROM + read_intermediate_result('squares', 'binary') AS res (val int, val_square int) +GROUP BY res.val_square) squares + ON (mx = user_id) +ORDER BY 1 +LIMIT 5; + +-- similar to the above, with DISTINCT on intermediate result +SELECT DISTINCT user_id +FROM users_table +JOIN + (SELECT DISTINCT res.val AS mx + FROM read_intermediate_result('squares', 'binary') AS res (val int, val_square int)) squares ON (mx = user_id) +ORDER BY 1 +LIMIT 5; + +-- single appereance of read_intermediate_result but inside a subquery +SELECT + DISTINCT user_id +FROM + users_table +JOIN ( + SELECT *,random() FROM (SELECT + max(res.val) as mx + FROM + (SELECT val, val_square FROM read_intermediate_result('squares', 'binary') AS res (val int, val_square int)) res + GROUP BY res.val_square) foo) +squares + ON (mx = user_id) +ORDER BY 1 +LIMIT 5; + +-- multiple read_intermediate_results in the same subquery is OK +SELECT + DISTINCT user_id +FROM + users_table +JOIN +(SELECT + max(res.val) as mx +FROM + read_intermediate_result('squares', 'binary') AS res (val int, val_square int), + read_intermediate_result('squares', 'binary') AS res2 (val int, val_square int) +WHERE res.val = res2.val_square +GROUP BY res2.val_square) squares + ON (mx = user_id) +ORDER BY 1 +LIMIT 5; + +-- mixed recurring tuples should be supported +SELECT + DISTINCT user_id +FROM + users_table +JOIN +(SELECT + max(res.val) as mx +FROM + read_intermediate_result('squares', 'binary') AS res (val int, val_square int), + generate_series(0, 10) i + WHERE + res.val = i + GROUP BY + i) squares + ON (mx = user_id) +ORDER BY 1 +LIMIT 5; + +-- should error out since there is a distributed table and +-- there are no columns on the GROUP BY from the distributed table +SELECT + DISTINCT user_id +FROM + users_reference_table +JOIN + (SELECT + max(val_square) as mx + FROM + read_intermediate_result('squares', 'binary') AS res (val int, val_square int), events_table + WHERE + events_table.user_id = res.val GROUP BY res.val) squares + ON (mx = user_id) +ORDER BY 1 +LIMIT 5; + +ROLLBACK; + +-- should work since we're using an immutable function as recurring tuple +SELECT + DISTINCT user_id +FROM + users_table +JOIN +(SELECT + max(i+5)as mx +FROM + generate_series(0, 10) as i GROUP BY i) squares + ON (mx = user_id) +ORDER BY 1 +LIMIT 5; + + +-- should not work since we're +-- using an immutable function as recurring tuple +-- along with a distributed table, where GROUP BY is +-- on the recurring tuple +SELECT + DISTINCT user_id +FROM + users_reference_table +JOIN + (SELECT + max(i+5)as mx + FROM + generate_series(0, 10) as i, events_table + WHERE + events_table.user_id = i GROUP BY i) squares + ON (mx = user_id) +ORDER BY 1 +LIMIT 5; + DROP TABLE user_buy_test_table; DROP TABLE users_ref_test_table; DROP TABLE users_return_test_table;