Merge pull request #1839 from citusdata/union_joins

Support UNION with joins in the subqueries
pull/1840/head
Marco Slot 2017-11-30 10:53:54 +01:00 committed by GitHub
commit 7b8f13cf35
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 256 additions and 51 deletions

View File

@ -403,8 +403,6 @@ static bool
SafeToPushDownSubquery(PlannerRestrictionContext *plannerRestrictionContext,
Query *originalQuery)
{
RelationRestrictionContext *relationRestrictionContext =
plannerRestrictionContext->relationRestrictionContext;
bool restrictionEquivalenceForPartitionKeys =
RestrictionEquivalenceForPartitionKeys(plannerRestrictionContext);
@ -415,7 +413,7 @@ SafeToPushDownSubquery(PlannerRestrictionContext *plannerRestrictionContext,
if (ContainsUnionSubquery(originalQuery))
{
return SafeToPushdownUnionSubquery(relationRestrictionContext);
return SafeToPushdownUnionSubquery(plannerRestrictionContext);
}
return false;

View File

@ -633,8 +633,6 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery,
ListCell *subqueryCell = NULL;
List *subqueryList = NIL;
DeferredErrorMessage *error = NULL;
RelationRestrictionContext *relationRestrictionContext =
plannerRestrictionContext->relationRestrictionContext;
if (originalQuery->limitCount != NULL)
{
@ -649,15 +647,16 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery,
*/
if (ContainsUnionSubquery(originalQuery))
{
if (!SafeToPushdownUnionSubquery(relationRestrictionContext))
if (!SafeToPushdownUnionSubquery(plannerRestrictionContext))
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"cannot pushdown the subquery since all leaves of "
"the UNION does not include partition key at the "
"same position",
"Each leaf query of the UNION should return "
"partition key at the same position on its "
"target list.", NULL);
"cannot pushdown the subquery since not all subqueries "
"in the UNION have the partition column in the same "
"position",
"Each leaf query of the UNION should return the "
"partition column in the same position and all joins "
"must be on the partition column",
NULL);
}
}
else if (!RestrictionEquivalenceForPartitionKeys(plannerRestrictionContext))

View File

@ -142,15 +142,27 @@ static Index RelationRestrictionPartitionKeyIndex(RelationRestriction *
* safe to push down, the function would fail to return true.
*/
bool
SafeToPushdownUnionSubquery(RelationRestrictionContext *restrictionContext)
SafeToPushdownUnionSubquery(PlannerRestrictionContext *plannerRestrictionContext)
{
RelationRestrictionContext *restrictionContext =
plannerRestrictionContext->relationRestrictionContext;
JoinRestrictionContext *joinRestrictionContext =
plannerRestrictionContext->joinRestrictionContext;
Index unionQueryPartitionKeyIndex = 0;
AttributeEquivalenceClass *attributeEquivalance =
palloc0(sizeof(AttributeEquivalenceClass));
ListCell *relationRestrictionCell = NULL;
List *relationRestrictionAttributeEquivalenceList = NIL;
List *joinRestrictionAttributeEquivalenceList = NIL;
List *allAttributeEquivalenceList = NIL;
attributeEquivalance->equivalenceId = attributeEquivalenceId++;
/*
* Ensure that the partition column is in the same place across all
* leaf queries in the UNION and construct an equivalence class for
* these columns.
*/
foreach(relationRestrictionCell, restrictionContext->relationRestrictionList)
{
RelationRestriction *relationRestriction = lfirst(relationRestrictionCell);
@ -192,7 +204,7 @@ SafeToPushdownUnionSubquery(RelationRestrictionContext *restrictionContext)
/* union does not have partition key in the target list */
if (partitionKeyIndex == 0)
{
return false;
continue;
}
}
else
@ -203,26 +215,24 @@ SafeToPushdownUnionSubquery(RelationRestrictionContext *restrictionContext)
/* union does not have partition key in the target list */
if (partitionKeyIndex == 0)
{
return false;
continue;
}
targetEntryToAdd = list_nth(targetList, partitionKeyIndex - 1);
if (!IsA(targetEntryToAdd->expr, Var))
{
return false;
continue;
}
varToBeAdded = (Var *) targetEntryToAdd->expr;
}
/*
* If the first relation doesn't have partition key on the target
* list of the query that the relation in, simply not allow to push down
* the query.
* The current relation does not have its partition key in the target list.
*/
if (partitionKeyIndex == InvalidAttrNumber)
{
return false;
continue;
}
/*
@ -236,14 +246,33 @@ SafeToPushdownUnionSubquery(RelationRestrictionContext *restrictionContext)
}
else if (unionQueryPartitionKeyIndex != partitionKeyIndex)
{
return false;
continue;
}
AddToAttributeEquivalenceClass(&attributeEquivalance, relationPlannerRoot,
varToBeAdded);
}
return EquivalenceListContainsRelationsEquality(list_make1(attributeEquivalance),
/*
* For queries of the form:
* (SELECT ... FROM a JOIN b ...) UNION (SELECT .. FROM c JOIN d ... )
*
* we determine whether all relations are joined on the partition column
* by adding the equivalence classes that can be inferred from joins.
*/
relationRestrictionAttributeEquivalenceList =
GenerateAttributeEquivalencesForRelationRestrictions(restrictionContext);
joinRestrictionAttributeEquivalenceList =
GenerateAttributeEquivalencesForJoinRestrictions(joinRestrictionContext);
allAttributeEquivalenceList =
list_concat(relationRestrictionAttributeEquivalenceList,
joinRestrictionAttributeEquivalenceList);
allAttributeEquivalenceList = lappend(allAttributeEquivalenceList,
attributeEquivalance);
return EquivalenceListContainsRelationsEquality(allAttributeEquivalenceList,
restrictionContext);
}

View File

@ -19,7 +19,8 @@ extern bool ContainsUnionSubquery(Query *queryTree);
extern bool RestrictionEquivalenceForPartitionKeys(PlannerRestrictionContext *
plannerRestrictionContext);
extern uint32 ReferenceRelationCount(RelationRestrictionContext *restrictionContext);
extern bool SafeToPushdownUnionSubquery(RelationRestrictionContext *restrictionContext);
extern bool SafeToPushdownUnionSubquery(
PlannerRestrictionContext *plannerRestrictionContext);
extern List * RelationIdList(Query *query);

View File

@ -91,8 +91,8 @@ SELECT count(*) FROM
(SELECT l_orderkey FROM lineitem_subquery) UNION
(SELECT l_partkey FROM lineitem_subquery)
) b;
ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position
DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list.
ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position
DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column
-- Check that we run union queries if partition column is selected.
SELECT count(*) FROM
(

View File

@ -731,8 +731,8 @@ FROM (
SELECT value_1 as user_id, sum(value_2) AS counter FROM users_table GROUP BY value_1
) user_id
GROUP BY user_id;
ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position
DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list.
ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position
DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column
-- partition key is not selected
SELECT sum(counter)
FROM (
@ -747,8 +747,8 @@ FROM (
SELECT 2 * user_id, sum(value_2) AS counter FROM users_table where value_1 < 5 and value_1 < 6 GROUP BY user_id HAVING sum(value_2) > 25
) user_id
GROUP BY user_id ORDER BY 1 DESC LIMIT 5;
ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position
DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list.
ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position
DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column
-- excepts within unions are not supported
SELECT * FROM
(
@ -773,7 +773,7 @@ UNION
) as ftop;
ERROR: cannot push down this subquery
DETAIL: Intersect and Except are currently unsupported
-- joins inside unions are not supported
-- non-equi join are not supported since there is no equivalence between the partition column
SELECT user_id, sum(counter)
FROM (
SELECT user_id, sum(value_2) AS counter FROM users_table GROUP BY user_id
@ -781,9 +781,19 @@ FROM (
SELECT events_table.user_id, sum(events_table.value_2) AS counter FROM events_table, users_table WHERE users_table.user_id > events_table.user_id GROUP BY 1
) user_id
GROUP BY user_id;
ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position
DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list.
-- joins inside unions are not supported -- slightly more comlex than the above
ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position
DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column
-- non-equi join also not supported for UNION ALL
SELECT user_id, sum(counter)
FROM (
SELECT user_id, sum(value_2) AS counter FROM users_table GROUP BY user_id
UNION ALL
SELECT events_table.user_id, sum(events_table.value_2) AS counter FROM events_table, users_table WHERE users_table.user_id > events_table.user_id GROUP BY 1
) user_id
GROUP BY user_id;
ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position
DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column
-- joins inside unions are supported -- slightly more comlex than the above
SELECT * FROM
(
(
@ -804,9 +814,112 @@ UNION
SELECT events_table.user_id, sum(events_table.value_2) AS counter FROM events_table, users_table WHERE (events_table.user_id = users_table.user_id) GROUP BY events_table.user_id
) user_id_2
GROUP BY user_id)
) as ftop;
ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position
DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list.
) as ftop
ORDER BY 2, 1
LIMIT 10;
user_id | sum
---------+-----
6 | 43
1 | 62
4 | 91
5 | 94
3 | 101
2 | 107
6 | 241
1 | 314
3 | 837
5 | 869
(10 rows)
-- mix up the joins a bit
SELECT * FROM
(
(
SELECT sum(users_table.value_2), events_table.user_id
FROM users_table, events_table
WHERE users_table.user_id = events_Table.user_id
GROUP BY events_table.user_id
)
UNION
(
SELECT sum(users_table.value_2), user_id
FROM users_table LEFT JOIN events_table USING (user_id)
GROUP BY user_id
)
) ftop
ORDER BY 2, 1
LIMIT 10;
sum | user_id
------+---------
300 | 1
1200 | 2
1155 | 3
850 | 4
882 | 5
210 | 6
(6 rows)
SELECT * FROM
(
(
SELECT value_2, user_id
FROM users_table
)
UNION
(
SELECT sum(users_table.value_2), user_id
FROM users_table RIGHT JOIN events_table USING (user_id)
GROUP BY user_id
)
) ftop
ORDER BY 2, 1
LIMIT 10;
value_2 | user_id
---------+---------
0 | 1
2 | 1
3 | 1
4 | 1
300 | 1
0 | 2
1 | 2
2 | 2
3 | 2
4 | 2
(10 rows)
-- UNION ALL with joins is supported
SELECT * FROM
(
(
SELECT sum(users_table.value_2), events_table.user_id
FROM users_table, events_table
WHERE users_table.user_id = events_Table.user_id
GROUP BY events_table.user_id
)
UNION ALL
(
SELECT sum(users_table.value_2), user_id
FROM users_table JOIN events_table USING (user_id)
GROUP BY user_id
)
) ftop
ORDER BY 2, 1
LIMIT 10;
sum | user_id
------+---------
300 | 1
300 | 1
1200 | 2
1200 | 2
1155 | 3
1155 | 3
850 | 4
850 | 4
882 | 5
882 | 5
(10 rows)
-- offset inside the union
SELECT user_id, sum(counter)
FROM (
@ -857,8 +970,8 @@ FROM (
user_id) user_id_2
GROUP BY
user_id)) AS ftop;
ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position
DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list.
ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position
DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column
-- some UNION all queries that are going to be pulled up
SELECT
count(*)
@ -868,8 +981,8 @@ FROM
UNION ALL
(SELECT 2 * user_id FROM events_table)
) b;
ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position
DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list.
ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position
DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column
-- last query does not have partition key
SELECT
user_id, value_3
@ -889,8 +1002,8 @@ FROM
) b
ORDER BY 1 DESC, 2 DESC
LIMIT 5;
ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position
DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list.
ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position
DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column
-- we don't allow joins within unions
SELECT
count(*)
@ -900,8 +1013,8 @@ FROM
UNION ALL
(SELECT users_table.user_id FROM events_table, users_table WHERE events_table.user_id = users_table.user_id)
) b;
ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position
DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list.
ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position
DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column
-- we don't support subqueries without relations
SELECT
count(*)

View File

@ -558,8 +558,8 @@ SELECT count(*)
UNION ALL
(SELECT user_id FROM selected_users) ) u
WHERE user_id < 2 AND user_id > 0;
ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position
DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list.
ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position
DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column
-- expand view definitions and re-run last 2 queries
SELECT count(*)
FROM (
@ -584,8 +584,8 @@ SELECT count(*)
UNION ALL
(SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 1 and value_1 < 3) bb) ) u
WHERE user_id < 2 AND user_id > 0;
ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position
DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list.
ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position
DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column
-- test distinct
-- distinct is supported if it is on a partition key
CREATE VIEW distinct_user_with_value_1_3 AS SELECT DISTINCT user_id FROM users_table WHERE value_1 = 3;

View File

@ -605,7 +605,7 @@ UNION
GROUP BY user_id)
) as ftop;
-- joins inside unions are not supported
-- non-equi join are not supported since there is no equivalence between the partition column
SELECT user_id, sum(counter)
FROM (
SELECT user_id, sum(value_2) AS counter FROM users_table GROUP BY user_id
@ -614,7 +614,16 @@ FROM (
) user_id
GROUP BY user_id;
-- joins inside unions are not supported -- slightly more comlex than the above
-- non-equi join also not supported for UNION ALL
SELECT user_id, sum(counter)
FROM (
SELECT user_id, sum(value_2) AS counter FROM users_table GROUP BY user_id
UNION ALL
SELECT events_table.user_id, sum(events_table.value_2) AS counter FROM events_table, users_table WHERE users_table.user_id > events_table.user_id GROUP BY 1
) user_id
GROUP BY user_id;
-- joins inside unions are supported -- slightly more comlex than the above
SELECT * FROM
(
(
@ -635,7 +644,63 @@ UNION
SELECT events_table.user_id, sum(events_table.value_2) AS counter FROM events_table, users_table WHERE (events_table.user_id = users_table.user_id) GROUP BY events_table.user_id
) user_id_2
GROUP BY user_id)
) as ftop;
) as ftop
ORDER BY 2, 1
LIMIT 10;
-- mix up the joins a bit
SELECT * FROM
(
(
SELECT sum(users_table.value_2), events_table.user_id
FROM users_table, events_table
WHERE users_table.user_id = events_Table.user_id
GROUP BY events_table.user_id
)
UNION
(
SELECT sum(users_table.value_2), user_id
FROM users_table LEFT JOIN events_table USING (user_id)
GROUP BY user_id
)
) ftop
ORDER BY 2, 1
LIMIT 10;
SELECT * FROM
(
(
SELECT value_2, user_id
FROM users_table
)
UNION
(
SELECT sum(users_table.value_2), user_id
FROM users_table RIGHT JOIN events_table USING (user_id)
GROUP BY user_id
)
) ftop
ORDER BY 2, 1
LIMIT 10;
-- UNION ALL with joins is supported
SELECT * FROM
(
(
SELECT sum(users_table.value_2), events_table.user_id
FROM users_table, events_table
WHERE users_table.user_id = events_Table.user_id
GROUP BY events_table.user_id
)
UNION ALL
(
SELECT sum(users_table.value_2), user_id
FROM users_table JOIN events_table USING (user_id)
GROUP BY user_id
)
) ftop
ORDER BY 2, 1
LIMIT 10;
-- offset inside the union
SELECT user_id, sum(counter)