Support UNION with joins in the subqueries

pull/1839/head
Marco Slot 2017-11-29 12:35:01 +01:00
parent 906dadddb7
commit 3f03cb6a6a
8 changed files with 256 additions and 51 deletions

View File

@ -403,8 +403,6 @@ static bool
SafeToPushDownSubquery(PlannerRestrictionContext *plannerRestrictionContext, SafeToPushDownSubquery(PlannerRestrictionContext *plannerRestrictionContext,
Query *originalQuery) Query *originalQuery)
{ {
RelationRestrictionContext *relationRestrictionContext =
plannerRestrictionContext->relationRestrictionContext;
bool restrictionEquivalenceForPartitionKeys = bool restrictionEquivalenceForPartitionKeys =
RestrictionEquivalenceForPartitionKeys(plannerRestrictionContext); RestrictionEquivalenceForPartitionKeys(plannerRestrictionContext);
@ -415,7 +413,7 @@ SafeToPushDownSubquery(PlannerRestrictionContext *plannerRestrictionContext,
if (ContainsUnionSubquery(originalQuery)) if (ContainsUnionSubquery(originalQuery))
{ {
return SafeToPushdownUnionSubquery(relationRestrictionContext); return SafeToPushdownUnionSubquery(plannerRestrictionContext);
} }
return false; return false;

View File

@ -633,8 +633,6 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery,
ListCell *subqueryCell = NULL; ListCell *subqueryCell = NULL;
List *subqueryList = NIL; List *subqueryList = NIL;
DeferredErrorMessage *error = NULL; DeferredErrorMessage *error = NULL;
RelationRestrictionContext *relationRestrictionContext =
plannerRestrictionContext->relationRestrictionContext;
if (originalQuery->limitCount != NULL) if (originalQuery->limitCount != NULL)
{ {
@ -649,15 +647,16 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery,
*/ */
if (ContainsUnionSubquery(originalQuery)) if (ContainsUnionSubquery(originalQuery))
{ {
if (!SafeToPushdownUnionSubquery(relationRestrictionContext)) if (!SafeToPushdownUnionSubquery(plannerRestrictionContext))
{ {
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"cannot pushdown the subquery since all leaves of " "cannot pushdown the subquery since not all subqueries "
"the UNION does not include partition key at the " "in the UNION have the partition column in the same "
"same position", "position",
"Each leaf query of the UNION should return " "Each leaf query of the UNION should return the "
"partition key at the same position on its " "partition column in the same position and all joins "
"target list.", NULL); "must be on the partition column",
NULL);
} }
} }
else if (!RestrictionEquivalenceForPartitionKeys(plannerRestrictionContext)) else if (!RestrictionEquivalenceForPartitionKeys(plannerRestrictionContext))

View File

@ -142,15 +142,27 @@ static Index RelationRestrictionPartitionKeyIndex(RelationRestriction *
* safe to push down, the function would fail to return true. * safe to push down, the function would fail to return true.
*/ */
bool bool
SafeToPushdownUnionSubquery(RelationRestrictionContext *restrictionContext) SafeToPushdownUnionSubquery(PlannerRestrictionContext *plannerRestrictionContext)
{ {
RelationRestrictionContext *restrictionContext =
plannerRestrictionContext->relationRestrictionContext;
JoinRestrictionContext *joinRestrictionContext =
plannerRestrictionContext->joinRestrictionContext;
Index unionQueryPartitionKeyIndex = 0; Index unionQueryPartitionKeyIndex = 0;
AttributeEquivalenceClass *attributeEquivalance = AttributeEquivalenceClass *attributeEquivalance =
palloc0(sizeof(AttributeEquivalenceClass)); palloc0(sizeof(AttributeEquivalenceClass));
ListCell *relationRestrictionCell = NULL; ListCell *relationRestrictionCell = NULL;
List *relationRestrictionAttributeEquivalenceList = NIL;
List *joinRestrictionAttributeEquivalenceList = NIL;
List *allAttributeEquivalenceList = NIL;
attributeEquivalance->equivalenceId = attributeEquivalenceId++; attributeEquivalance->equivalenceId = attributeEquivalenceId++;
/*
* Ensure that the partition column is in the same place across all
* leaf queries in the UNION and construct an equivalence class for
* these columns.
*/
foreach(relationRestrictionCell, restrictionContext->relationRestrictionList) foreach(relationRestrictionCell, restrictionContext->relationRestrictionList)
{ {
RelationRestriction *relationRestriction = lfirst(relationRestrictionCell); RelationRestriction *relationRestriction = lfirst(relationRestrictionCell);
@ -192,7 +204,7 @@ SafeToPushdownUnionSubquery(RelationRestrictionContext *restrictionContext)
/* union does not have partition key in the target list */ /* union does not have partition key in the target list */
if (partitionKeyIndex == 0) if (partitionKeyIndex == 0)
{ {
return false; continue;
} }
} }
else else
@ -203,26 +215,24 @@ SafeToPushdownUnionSubquery(RelationRestrictionContext *restrictionContext)
/* union does not have partition key in the target list */ /* union does not have partition key in the target list */
if (partitionKeyIndex == 0) if (partitionKeyIndex == 0)
{ {
return false; continue;
} }
targetEntryToAdd = list_nth(targetList, partitionKeyIndex - 1); targetEntryToAdd = list_nth(targetList, partitionKeyIndex - 1);
if (!IsA(targetEntryToAdd->expr, Var)) if (!IsA(targetEntryToAdd->expr, Var))
{ {
return false; continue;
} }
varToBeAdded = (Var *) targetEntryToAdd->expr; varToBeAdded = (Var *) targetEntryToAdd->expr;
} }
/* /*
* If the first relation doesn't have partition key on the target * The current relation does not have its partition key in the target list.
* list of the query that the relation in, simply not allow to push down
* the query.
*/ */
if (partitionKeyIndex == InvalidAttrNumber) if (partitionKeyIndex == InvalidAttrNumber)
{ {
return false; continue;
} }
/* /*
@ -236,14 +246,33 @@ SafeToPushdownUnionSubquery(RelationRestrictionContext *restrictionContext)
} }
else if (unionQueryPartitionKeyIndex != partitionKeyIndex) else if (unionQueryPartitionKeyIndex != partitionKeyIndex)
{ {
return false; continue;
} }
AddToAttributeEquivalenceClass(&attributeEquivalance, relationPlannerRoot, AddToAttributeEquivalenceClass(&attributeEquivalance, relationPlannerRoot,
varToBeAdded); varToBeAdded);
} }
return EquivalenceListContainsRelationsEquality(list_make1(attributeEquivalance), /*
* For queries of the form:
* (SELECT ... FROM a JOIN b ...) UNION (SELECT .. FROM c JOIN d ... )
*
* we determine whether all relations are joined on the partition column
* by adding the equivalence classes that can be inferred from joins.
*/
relationRestrictionAttributeEquivalenceList =
GenerateAttributeEquivalencesForRelationRestrictions(restrictionContext);
joinRestrictionAttributeEquivalenceList =
GenerateAttributeEquivalencesForJoinRestrictions(joinRestrictionContext);
allAttributeEquivalenceList =
list_concat(relationRestrictionAttributeEquivalenceList,
joinRestrictionAttributeEquivalenceList);
allAttributeEquivalenceList = lappend(allAttributeEquivalenceList,
attributeEquivalance);
return EquivalenceListContainsRelationsEquality(allAttributeEquivalenceList,
restrictionContext); restrictionContext);
} }

View File

@ -19,7 +19,8 @@ extern bool ContainsUnionSubquery(Query *queryTree);
extern bool RestrictionEquivalenceForPartitionKeys(PlannerRestrictionContext * extern bool RestrictionEquivalenceForPartitionKeys(PlannerRestrictionContext *
plannerRestrictionContext); plannerRestrictionContext);
extern uint32 ReferenceRelationCount(RelationRestrictionContext *restrictionContext); extern uint32 ReferenceRelationCount(RelationRestrictionContext *restrictionContext);
extern bool SafeToPushdownUnionSubquery(RelationRestrictionContext *restrictionContext); extern bool SafeToPushdownUnionSubquery(
PlannerRestrictionContext *plannerRestrictionContext);
extern List * RelationIdList(Query *query); extern List * RelationIdList(Query *query);

View File

@ -91,8 +91,8 @@ SELECT count(*) FROM
(SELECT l_orderkey FROM lineitem_subquery) UNION (SELECT l_orderkey FROM lineitem_subquery) UNION
(SELECT l_partkey FROM lineitem_subquery) (SELECT l_partkey FROM lineitem_subquery)
) b; ) b;
ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position
DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list. DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column
-- Check that we run union queries if partition column is selected. -- Check that we run union queries if partition column is selected.
SELECT count(*) FROM SELECT count(*) FROM
( (

View File

@ -731,8 +731,8 @@ FROM (
SELECT value_1 as user_id, sum(value_2) AS counter FROM users_table GROUP BY value_1 SELECT value_1 as user_id, sum(value_2) AS counter FROM users_table GROUP BY value_1
) user_id ) user_id
GROUP BY user_id; GROUP BY user_id;
ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position
DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list. DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column
-- partition key is not selected -- partition key is not selected
SELECT sum(counter) SELECT sum(counter)
FROM ( FROM (
@ -747,8 +747,8 @@ FROM (
SELECT 2 * user_id, sum(value_2) AS counter FROM users_table where value_1 < 5 and value_1 < 6 GROUP BY user_id HAVING sum(value_2) > 25 SELECT 2 * user_id, sum(value_2) AS counter FROM users_table where value_1 < 5 and value_1 < 6 GROUP BY user_id HAVING sum(value_2) > 25
) user_id ) user_id
GROUP BY user_id ORDER BY 1 DESC LIMIT 5; GROUP BY user_id ORDER BY 1 DESC LIMIT 5;
ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position
DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list. DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column
-- excepts within unions are not supported -- excepts within unions are not supported
SELECT * FROM SELECT * FROM
( (
@ -773,7 +773,7 @@ UNION
) as ftop; ) as ftop;
ERROR: cannot push down this subquery ERROR: cannot push down this subquery
DETAIL: Intersect and Except are currently unsupported DETAIL: Intersect and Except are currently unsupported
-- joins inside unions are not supported -- non-equi join are not supported since there is no equivalence between the partition column
SELECT user_id, sum(counter) SELECT user_id, sum(counter)
FROM ( FROM (
SELECT user_id, sum(value_2) AS counter FROM users_table GROUP BY user_id SELECT user_id, sum(value_2) AS counter FROM users_table GROUP BY user_id
@ -781,9 +781,19 @@ FROM (
SELECT events_table.user_id, sum(events_table.value_2) AS counter FROM events_table, users_table WHERE users_table.user_id > events_table.user_id GROUP BY 1 SELECT events_table.user_id, sum(events_table.value_2) AS counter FROM events_table, users_table WHERE users_table.user_id > events_table.user_id GROUP BY 1
) user_id ) user_id
GROUP BY user_id; GROUP BY user_id;
ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position
DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list. DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column
-- joins inside unions are not supported -- slightly more comlex than the above -- non-equi join also not supported for UNION ALL
SELECT user_id, sum(counter)
FROM (
SELECT user_id, sum(value_2) AS counter FROM users_table GROUP BY user_id
UNION ALL
SELECT events_table.user_id, sum(events_table.value_2) AS counter FROM events_table, users_table WHERE users_table.user_id > events_table.user_id GROUP BY 1
) user_id
GROUP BY user_id;
ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position
DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column
-- joins inside unions are supported -- slightly more comlex than the above
SELECT * FROM SELECT * FROM
( (
( (
@ -804,9 +814,112 @@ UNION
SELECT events_table.user_id, sum(events_table.value_2) AS counter FROM events_table, users_table WHERE (events_table.user_id = users_table.user_id) GROUP BY events_table.user_id SELECT events_table.user_id, sum(events_table.value_2) AS counter FROM events_table, users_table WHERE (events_table.user_id = users_table.user_id) GROUP BY events_table.user_id
) user_id_2 ) user_id_2
GROUP BY user_id) GROUP BY user_id)
) as ftop; ) as ftop
ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position ORDER BY 2, 1
DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list. LIMIT 10;
user_id | sum
---------+-----
6 | 43
1 | 62
4 | 91
5 | 94
3 | 101
2 | 107
6 | 241
1 | 314
3 | 837
5 | 869
(10 rows)
-- mix up the joins a bit
SELECT * FROM
(
(
SELECT sum(users_table.value_2), events_table.user_id
FROM users_table, events_table
WHERE users_table.user_id = events_Table.user_id
GROUP BY events_table.user_id
)
UNION
(
SELECT sum(users_table.value_2), user_id
FROM users_table LEFT JOIN events_table USING (user_id)
GROUP BY user_id
)
) ftop
ORDER BY 2, 1
LIMIT 10;
sum | user_id
------+---------
300 | 1
1200 | 2
1155 | 3
850 | 4
882 | 5
210 | 6
(6 rows)
SELECT * FROM
(
(
SELECT value_2, user_id
FROM users_table
)
UNION
(
SELECT sum(users_table.value_2), user_id
FROM users_table RIGHT JOIN events_table USING (user_id)
GROUP BY user_id
)
) ftop
ORDER BY 2, 1
LIMIT 10;
value_2 | user_id
---------+---------
0 | 1
2 | 1
3 | 1
4 | 1
300 | 1
0 | 2
1 | 2
2 | 2
3 | 2
4 | 2
(10 rows)
-- UNION ALL with joins is supported
SELECT * FROM
(
(
SELECT sum(users_table.value_2), events_table.user_id
FROM users_table, events_table
WHERE users_table.user_id = events_Table.user_id
GROUP BY events_table.user_id
)
UNION ALL
(
SELECT sum(users_table.value_2), user_id
FROM users_table JOIN events_table USING (user_id)
GROUP BY user_id
)
) ftop
ORDER BY 2, 1
LIMIT 10;
sum | user_id
------+---------
300 | 1
300 | 1
1200 | 2
1200 | 2
1155 | 3
1155 | 3
850 | 4
850 | 4
882 | 5
882 | 5
(10 rows)
-- offset inside the union -- offset inside the union
SELECT user_id, sum(counter) SELECT user_id, sum(counter)
FROM ( FROM (
@ -857,8 +970,8 @@ FROM (
user_id) user_id_2 user_id) user_id_2
GROUP BY GROUP BY
user_id)) AS ftop; user_id)) AS ftop;
ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position
DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list. DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column
-- some UNION all queries that are going to be pulled up -- some UNION all queries that are going to be pulled up
SELECT SELECT
count(*) count(*)
@ -868,8 +981,8 @@ FROM
UNION ALL UNION ALL
(SELECT 2 * user_id FROM events_table) (SELECT 2 * user_id FROM events_table)
) b; ) b;
ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position
DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list. DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column
-- last query does not have partition key -- last query does not have partition key
SELECT SELECT
user_id, value_3 user_id, value_3
@ -889,8 +1002,8 @@ FROM
) b ) b
ORDER BY 1 DESC, 2 DESC ORDER BY 1 DESC, 2 DESC
LIMIT 5; LIMIT 5;
ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position
DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list. DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column
-- we don't allow joins within unions -- we don't allow joins within unions
SELECT SELECT
count(*) count(*)
@ -900,8 +1013,8 @@ FROM
UNION ALL UNION ALL
(SELECT users_table.user_id FROM events_table, users_table WHERE events_table.user_id = users_table.user_id) (SELECT users_table.user_id FROM events_table, users_table WHERE events_table.user_id = users_table.user_id)
) b; ) b;
ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position
DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list. DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column
-- we don't support subqueries without relations -- we don't support subqueries without relations
SELECT SELECT
count(*) count(*)

View File

@ -558,8 +558,8 @@ SELECT count(*)
UNION ALL UNION ALL
(SELECT user_id FROM selected_users) ) u (SELECT user_id FROM selected_users) ) u
WHERE user_id < 2 AND user_id > 0; WHERE user_id < 2 AND user_id > 0;
ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position
DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list. DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column
-- expand view definitions and re-run last 2 queries -- expand view definitions and re-run last 2 queries
SELECT count(*) SELECT count(*)
FROM ( FROM (
@ -584,8 +584,8 @@ SELECT count(*)
UNION ALL UNION ALL
(SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 1 and value_1 < 3) bb) ) u (SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 1 and value_1 < 3) bb) ) u
WHERE user_id < 2 AND user_id > 0; WHERE user_id < 2 AND user_id > 0;
ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position
DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list. DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column
-- test distinct -- test distinct
-- distinct is supported if it is on a partition key -- distinct is supported if it is on a partition key
CREATE VIEW distinct_user_with_value_1_3 AS SELECT DISTINCT user_id FROM users_table WHERE value_1 = 3; CREATE VIEW distinct_user_with_value_1_3 AS SELECT DISTINCT user_id FROM users_table WHERE value_1 = 3;

View File

@ -605,7 +605,7 @@ UNION
GROUP BY user_id) GROUP BY user_id)
) as ftop; ) as ftop;
-- joins inside unions are not supported -- non-equi join are not supported since there is no equivalence between the partition column
SELECT user_id, sum(counter) SELECT user_id, sum(counter)
FROM ( FROM (
SELECT user_id, sum(value_2) AS counter FROM users_table GROUP BY user_id SELECT user_id, sum(value_2) AS counter FROM users_table GROUP BY user_id
@ -614,7 +614,16 @@ FROM (
) user_id ) user_id
GROUP BY user_id; GROUP BY user_id;
-- joins inside unions are not supported -- slightly more comlex than the above -- non-equi join also not supported for UNION ALL
SELECT user_id, sum(counter)
FROM (
SELECT user_id, sum(value_2) AS counter FROM users_table GROUP BY user_id
UNION ALL
SELECT events_table.user_id, sum(events_table.value_2) AS counter FROM events_table, users_table WHERE users_table.user_id > events_table.user_id GROUP BY 1
) user_id
GROUP BY user_id;
-- joins inside unions are supported -- slightly more comlex than the above
SELECT * FROM SELECT * FROM
( (
( (
@ -635,7 +644,63 @@ UNION
SELECT events_table.user_id, sum(events_table.value_2) AS counter FROM events_table, users_table WHERE (events_table.user_id = users_table.user_id) GROUP BY events_table.user_id SELECT events_table.user_id, sum(events_table.value_2) AS counter FROM events_table, users_table WHERE (events_table.user_id = users_table.user_id) GROUP BY events_table.user_id
) user_id_2 ) user_id_2
GROUP BY user_id) GROUP BY user_id)
) as ftop; ) as ftop
ORDER BY 2, 1
LIMIT 10;
-- mix up the joins a bit
SELECT * FROM
(
(
SELECT sum(users_table.value_2), events_table.user_id
FROM users_table, events_table
WHERE users_table.user_id = events_Table.user_id
GROUP BY events_table.user_id
)
UNION
(
SELECT sum(users_table.value_2), user_id
FROM users_table LEFT JOIN events_table USING (user_id)
GROUP BY user_id
)
) ftop
ORDER BY 2, 1
LIMIT 10;
SELECT * FROM
(
(
SELECT value_2, user_id
FROM users_table
)
UNION
(
SELECT sum(users_table.value_2), user_id
FROM users_table RIGHT JOIN events_table USING (user_id)
GROUP BY user_id
)
) ftop
ORDER BY 2, 1
LIMIT 10;
-- UNION ALL with joins is supported
SELECT * FROM
(
(
SELECT sum(users_table.value_2), events_table.user_id
FROM users_table, events_table
WHERE users_table.user_id = events_Table.user_id
GROUP BY events_table.user_id
)
UNION ALL
(
SELECT sum(users_table.value_2), user_id
FROM users_table JOIN events_table USING (user_id)
GROUP BY user_id
)
) ftop
ORDER BY 2, 1
LIMIT 10;
-- offset inside the union -- offset inside the union
SELECT user_id, sum(counter) SELECT user_id, sum(counter)