From 3f03cb6a6a666720de8f121fde4e765239f00d80 Mon Sep 17 00:00:00 2001 From: Marco Slot Date: Wed, 29 Nov 2017 12:35:01 +0100 Subject: [PATCH] Support UNION with joins in the subqueries --- .../planner/insert_select_planner.c | 4 +- .../planner/multi_logical_planner.c | 17 +- .../relation_restriction_equivalence.c | 49 ++++-- .../relation_restriction_equivalence.h | 3 +- src/test/regress/expected/multi_subquery.out | 4 +- .../regress/expected/multi_subquery_union.out | 151 +++++++++++++++--- src/test/regress/expected/multi_view.out | 8 +- src/test/regress/sql/multi_subquery_union.sql | 71 +++++++- 8 files changed, 256 insertions(+), 51 deletions(-) diff --git a/src/backend/distributed/planner/insert_select_planner.c b/src/backend/distributed/planner/insert_select_planner.c index 21e4d4f01..acbdcebd8 100644 --- a/src/backend/distributed/planner/insert_select_planner.c +++ b/src/backend/distributed/planner/insert_select_planner.c @@ -403,8 +403,6 @@ static bool SafeToPushDownSubquery(PlannerRestrictionContext *plannerRestrictionContext, Query *originalQuery) { - RelationRestrictionContext *relationRestrictionContext = - plannerRestrictionContext->relationRestrictionContext; bool restrictionEquivalenceForPartitionKeys = RestrictionEquivalenceForPartitionKeys(plannerRestrictionContext); @@ -415,7 +413,7 @@ SafeToPushDownSubquery(PlannerRestrictionContext *plannerRestrictionContext, if (ContainsUnionSubquery(originalQuery)) { - return SafeToPushdownUnionSubquery(relationRestrictionContext); + return SafeToPushdownUnionSubquery(plannerRestrictionContext); } return false; diff --git a/src/backend/distributed/planner/multi_logical_planner.c b/src/backend/distributed/planner/multi_logical_planner.c index ab7169095..6a720d665 100644 --- a/src/backend/distributed/planner/multi_logical_planner.c +++ b/src/backend/distributed/planner/multi_logical_planner.c @@ -633,8 +633,6 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery, ListCell *subqueryCell = NULL; List *subqueryList = NIL; DeferredErrorMessage *error = NULL; - RelationRestrictionContext *relationRestrictionContext = - plannerRestrictionContext->relationRestrictionContext; if (originalQuery->limitCount != NULL) { @@ -649,15 +647,16 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery, */ if (ContainsUnionSubquery(originalQuery)) { - if (!SafeToPushdownUnionSubquery(relationRestrictionContext)) + if (!SafeToPushdownUnionSubquery(plannerRestrictionContext)) { return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "cannot pushdown the subquery since all leaves of " - "the UNION does not include partition key at the " - "same position", - "Each leaf query of the UNION should return " - "partition key at the same position on its " - "target list.", NULL); + "cannot pushdown the subquery since not all subqueries " + "in the UNION have the partition column in the same " + "position", + "Each leaf query of the UNION should return the " + "partition column in the same position and all joins " + "must be on the partition column", + NULL); } } else if (!RestrictionEquivalenceForPartitionKeys(plannerRestrictionContext)) diff --git a/src/backend/distributed/planner/relation_restriction_equivalence.c b/src/backend/distributed/planner/relation_restriction_equivalence.c index 245fe82f9..68e0c7351 100644 --- a/src/backend/distributed/planner/relation_restriction_equivalence.c +++ b/src/backend/distributed/planner/relation_restriction_equivalence.c @@ -142,15 +142,27 @@ static Index RelationRestrictionPartitionKeyIndex(RelationRestriction * * safe to push down, the function would fail to return true. */ bool -SafeToPushdownUnionSubquery(RelationRestrictionContext *restrictionContext) +SafeToPushdownUnionSubquery(PlannerRestrictionContext *plannerRestrictionContext) { + RelationRestrictionContext *restrictionContext = + plannerRestrictionContext->relationRestrictionContext; + JoinRestrictionContext *joinRestrictionContext = + plannerRestrictionContext->joinRestrictionContext; Index unionQueryPartitionKeyIndex = 0; AttributeEquivalenceClass *attributeEquivalance = palloc0(sizeof(AttributeEquivalenceClass)); ListCell *relationRestrictionCell = NULL; + List *relationRestrictionAttributeEquivalenceList = NIL; + List *joinRestrictionAttributeEquivalenceList = NIL; + List *allAttributeEquivalenceList = NIL; attributeEquivalance->equivalenceId = attributeEquivalenceId++; + /* + * Ensure that the partition column is in the same place across all + * leaf queries in the UNION and construct an equivalence class for + * these columns. + */ foreach(relationRestrictionCell, restrictionContext->relationRestrictionList) { RelationRestriction *relationRestriction = lfirst(relationRestrictionCell); @@ -192,7 +204,7 @@ SafeToPushdownUnionSubquery(RelationRestrictionContext *restrictionContext) /* union does not have partition key in the target list */ if (partitionKeyIndex == 0) { - return false; + continue; } } else @@ -203,26 +215,24 @@ SafeToPushdownUnionSubquery(RelationRestrictionContext *restrictionContext) /* union does not have partition key in the target list */ if (partitionKeyIndex == 0) { - return false; + continue; } targetEntryToAdd = list_nth(targetList, partitionKeyIndex - 1); if (!IsA(targetEntryToAdd->expr, Var)) { - return false; + continue; } varToBeAdded = (Var *) targetEntryToAdd->expr; } /* - * If the first relation doesn't have partition key on the target - * list of the query that the relation in, simply not allow to push down - * the query. + * The current relation does not have its partition key in the target list. */ if (partitionKeyIndex == InvalidAttrNumber) { - return false; + continue; } /* @@ -236,14 +246,33 @@ SafeToPushdownUnionSubquery(RelationRestrictionContext *restrictionContext) } else if (unionQueryPartitionKeyIndex != partitionKeyIndex) { - return false; + continue; } AddToAttributeEquivalenceClass(&attributeEquivalance, relationPlannerRoot, varToBeAdded); } - return EquivalenceListContainsRelationsEquality(list_make1(attributeEquivalance), + /* + * For queries of the form: + * (SELECT ... FROM a JOIN b ...) UNION (SELECT .. FROM c JOIN d ... ) + * + * we determine whether all relations are joined on the partition column + * by adding the equivalence classes that can be inferred from joins. + */ + relationRestrictionAttributeEquivalenceList = + GenerateAttributeEquivalencesForRelationRestrictions(restrictionContext); + joinRestrictionAttributeEquivalenceList = + GenerateAttributeEquivalencesForJoinRestrictions(joinRestrictionContext); + + allAttributeEquivalenceList = + list_concat(relationRestrictionAttributeEquivalenceList, + joinRestrictionAttributeEquivalenceList); + + allAttributeEquivalenceList = lappend(allAttributeEquivalenceList, + attributeEquivalance); + + return EquivalenceListContainsRelationsEquality(allAttributeEquivalenceList, restrictionContext); } diff --git a/src/include/distributed/relation_restriction_equivalence.h b/src/include/distributed/relation_restriction_equivalence.h index ad515b423..051fde2f7 100644 --- a/src/include/distributed/relation_restriction_equivalence.h +++ b/src/include/distributed/relation_restriction_equivalence.h @@ -19,7 +19,8 @@ extern bool ContainsUnionSubquery(Query *queryTree); extern bool RestrictionEquivalenceForPartitionKeys(PlannerRestrictionContext * plannerRestrictionContext); extern uint32 ReferenceRelationCount(RelationRestrictionContext *restrictionContext); -extern bool SafeToPushdownUnionSubquery(RelationRestrictionContext *restrictionContext); +extern bool SafeToPushdownUnionSubquery( + PlannerRestrictionContext *plannerRestrictionContext); extern List * RelationIdList(Query *query); diff --git a/src/test/regress/expected/multi_subquery.out b/src/test/regress/expected/multi_subquery.out index 981ce47af..3ca5893d1 100644 --- a/src/test/regress/expected/multi_subquery.out +++ b/src/test/regress/expected/multi_subquery.out @@ -91,8 +91,8 @@ SELECT count(*) FROM (SELECT l_orderkey FROM lineitem_subquery) UNION (SELECT l_partkey FROM lineitem_subquery) ) b; -ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position -DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list. +ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position +DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column -- Check that we run union queries if partition column is selected. SELECT count(*) FROM ( diff --git a/src/test/regress/expected/multi_subquery_union.out b/src/test/regress/expected/multi_subquery_union.out index c87aae5b1..f035d93b1 100644 --- a/src/test/regress/expected/multi_subquery_union.out +++ b/src/test/regress/expected/multi_subquery_union.out @@ -731,8 +731,8 @@ FROM ( SELECT value_1 as user_id, sum(value_2) AS counter FROM users_table GROUP BY value_1 ) user_id GROUP BY user_id; -ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position -DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list. +ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position +DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column -- partition key is not selected SELECT sum(counter) FROM ( @@ -747,8 +747,8 @@ FROM ( SELECT 2 * user_id, sum(value_2) AS counter FROM users_table where value_1 < 5 and value_1 < 6 GROUP BY user_id HAVING sum(value_2) > 25 ) user_id GROUP BY user_id ORDER BY 1 DESC LIMIT 5; -ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position -DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list. +ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position +DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column -- excepts within unions are not supported SELECT * FROM ( @@ -773,7 +773,7 @@ UNION ) as ftop; ERROR: cannot push down this subquery DETAIL: Intersect and Except are currently unsupported --- joins inside unions are not supported +-- non-equi join are not supported since there is no equivalence between the partition column SELECT user_id, sum(counter) FROM ( SELECT user_id, sum(value_2) AS counter FROM users_table GROUP BY user_id @@ -781,9 +781,19 @@ FROM ( SELECT events_table.user_id, sum(events_table.value_2) AS counter FROM events_table, users_table WHERE users_table.user_id > events_table.user_id GROUP BY 1 ) user_id GROUP BY user_id; -ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position -DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list. --- joins inside unions are not supported -- slightly more comlex than the above +ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position +DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column +-- non-equi join also not supported for UNION ALL +SELECT user_id, sum(counter) +FROM ( + SELECT user_id, sum(value_2) AS counter FROM users_table GROUP BY user_id + UNION ALL + SELECT events_table.user_id, sum(events_table.value_2) AS counter FROM events_table, users_table WHERE users_table.user_id > events_table.user_id GROUP BY 1 +) user_id +GROUP BY user_id; +ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position +DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column +-- joins inside unions are supported -- slightly more comlex than the above SELECT * FROM ( ( @@ -804,9 +814,112 @@ UNION SELECT events_table.user_id, sum(events_table.value_2) AS counter FROM events_table, users_table WHERE (events_table.user_id = users_table.user_id) GROUP BY events_table.user_id ) user_id_2 GROUP BY user_id) -) as ftop; -ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position -DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list. +) as ftop +ORDER BY 2, 1 +LIMIT 10; + user_id | sum +---------+----- + 6 | 43 + 1 | 62 + 4 | 91 + 5 | 94 + 3 | 101 + 2 | 107 + 6 | 241 + 1 | 314 + 3 | 837 + 5 | 869 +(10 rows) + +-- mix up the joins a bit +SELECT * FROM +( +( + SELECT sum(users_table.value_2), events_table.user_id + FROM users_table, events_table + WHERE users_table.user_id = events_Table.user_id + GROUP BY events_table.user_id +) +UNION +( + SELECT sum(users_table.value_2), user_id + FROM users_table LEFT JOIN events_table USING (user_id) + GROUP BY user_id +) +) ftop +ORDER BY 2, 1 +LIMIT 10; + sum | user_id +------+--------- + 300 | 1 + 1200 | 2 + 1155 | 3 + 850 | 4 + 882 | 5 + 210 | 6 +(6 rows) + +SELECT * FROM +( +( + SELECT value_2, user_id + FROM users_table +) +UNION +( + SELECT sum(users_table.value_2), user_id + FROM users_table RIGHT JOIN events_table USING (user_id) + GROUP BY user_id +) +) ftop +ORDER BY 2, 1 +LIMIT 10; + value_2 | user_id +---------+--------- + 0 | 1 + 2 | 1 + 3 | 1 + 4 | 1 + 300 | 1 + 0 | 2 + 1 | 2 + 2 | 2 + 3 | 2 + 4 | 2 +(10 rows) + +-- UNION ALL with joins is supported +SELECT * FROM +( +( + SELECT sum(users_table.value_2), events_table.user_id + FROM users_table, events_table + WHERE users_table.user_id = events_Table.user_id + GROUP BY events_table.user_id +) +UNION ALL +( + SELECT sum(users_table.value_2), user_id + FROM users_table JOIN events_table USING (user_id) + GROUP BY user_id +) +) ftop +ORDER BY 2, 1 +LIMIT 10; + sum | user_id +------+--------- + 300 | 1 + 300 | 1 + 1200 | 2 + 1200 | 2 + 1155 | 3 + 1155 | 3 + 850 | 4 + 850 | 4 + 882 | 5 + 882 | 5 +(10 rows) + -- offset inside the union SELECT user_id, sum(counter) FROM ( @@ -857,8 +970,8 @@ FROM ( user_id) user_id_2 GROUP BY user_id)) AS ftop; -ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position -DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list. +ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position +DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column -- some UNION all queries that are going to be pulled up SELECT count(*) @@ -868,8 +981,8 @@ FROM UNION ALL (SELECT 2 * user_id FROM events_table) ) b; -ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position -DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list. +ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position +DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column -- last query does not have partition key SELECT user_id, value_3 @@ -889,8 +1002,8 @@ FROM ) b ORDER BY 1 DESC, 2 DESC LIMIT 5; -ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position -DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list. +ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position +DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column -- we don't allow joins within unions SELECT count(*) @@ -900,8 +1013,8 @@ FROM UNION ALL (SELECT users_table.user_id FROM events_table, users_table WHERE events_table.user_id = users_table.user_id) ) b; -ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position -DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list. +ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position +DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column -- we don't support subqueries without relations SELECT count(*) diff --git a/src/test/regress/expected/multi_view.out b/src/test/regress/expected/multi_view.out index 8bfeed8aa..3ad9550c4 100644 --- a/src/test/regress/expected/multi_view.out +++ b/src/test/regress/expected/multi_view.out @@ -558,8 +558,8 @@ SELECT count(*) UNION ALL (SELECT user_id FROM selected_users) ) u WHERE user_id < 2 AND user_id > 0; -ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position -DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list. +ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position +DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column -- expand view definitions and re-run last 2 queries SELECT count(*) FROM ( @@ -584,8 +584,8 @@ SELECT count(*) UNION ALL (SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 1 and value_1 < 3) bb) ) u WHERE user_id < 2 AND user_id > 0; -ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position -DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list. +ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position +DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column -- test distinct -- distinct is supported if it is on a partition key CREATE VIEW distinct_user_with_value_1_3 AS SELECT DISTINCT user_id FROM users_table WHERE value_1 = 3; diff --git a/src/test/regress/sql/multi_subquery_union.sql b/src/test/regress/sql/multi_subquery_union.sql index df40e9d3b..867877e49 100644 --- a/src/test/regress/sql/multi_subquery_union.sql +++ b/src/test/regress/sql/multi_subquery_union.sql @@ -605,7 +605,7 @@ UNION GROUP BY user_id) ) as ftop; --- joins inside unions are not supported +-- non-equi join are not supported since there is no equivalence between the partition column SELECT user_id, sum(counter) FROM ( SELECT user_id, sum(value_2) AS counter FROM users_table GROUP BY user_id @@ -614,7 +614,16 @@ FROM ( ) user_id GROUP BY user_id; --- joins inside unions are not supported -- slightly more comlex than the above +-- non-equi join also not supported for UNION ALL +SELECT user_id, sum(counter) +FROM ( + SELECT user_id, sum(value_2) AS counter FROM users_table GROUP BY user_id + UNION ALL + SELECT events_table.user_id, sum(events_table.value_2) AS counter FROM events_table, users_table WHERE users_table.user_id > events_table.user_id GROUP BY 1 +) user_id +GROUP BY user_id; + +-- joins inside unions are supported -- slightly more comlex than the above SELECT * FROM ( ( @@ -635,7 +644,63 @@ UNION SELECT events_table.user_id, sum(events_table.value_2) AS counter FROM events_table, users_table WHERE (events_table.user_id = users_table.user_id) GROUP BY events_table.user_id ) user_id_2 GROUP BY user_id) -) as ftop; +) as ftop +ORDER BY 2, 1 +LIMIT 10; + +-- mix up the joins a bit +SELECT * FROM +( +( + SELECT sum(users_table.value_2), events_table.user_id + FROM users_table, events_table + WHERE users_table.user_id = events_Table.user_id + GROUP BY events_table.user_id +) +UNION +( + SELECT sum(users_table.value_2), user_id + FROM users_table LEFT JOIN events_table USING (user_id) + GROUP BY user_id +) +) ftop +ORDER BY 2, 1 +LIMIT 10; + +SELECT * FROM +( +( + SELECT value_2, user_id + FROM users_table +) +UNION +( + SELECT sum(users_table.value_2), user_id + FROM users_table RIGHT JOIN events_table USING (user_id) + GROUP BY user_id +) +) ftop +ORDER BY 2, 1 +LIMIT 10; + +-- UNION ALL with joins is supported +SELECT * FROM +( +( + SELECT sum(users_table.value_2), events_table.user_id + FROM users_table, events_table + WHERE users_table.user_id = events_Table.user_id + GROUP BY events_table.user_id +) +UNION ALL +( + SELECT sum(users_table.value_2), user_id + FROM users_table JOIN events_table USING (user_id) + GROUP BY user_id +) +) ftop +ORDER BY 2, 1 +LIMIT 10; -- offset inside the union SELECT user_id, sum(counter)