Merge pull request #1871 from citusdata/relax_from_sublink_checks

Relax checks on recurring tuples in FROM with sublinks
pull/1873/head
Marco Slot 2017-12-15 12:13:19 +01:00 committed by GitHub
commit cf7dda3892
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 234 additions and 48 deletions

View File

@ -97,8 +97,7 @@ static bool RangeTableArrayContainsAnyRTEIdentities(RangeTblEntry **rangeTableEn
rangeTableArrayLength, Relids rangeTableArrayLength, Relids
queryRteIdentities); queryRteIdentities);
static Relids QueryRteIdentities(Query *queryTree); static Relids QueryRteIdentities(Query *queryTree);
static DeferredErrorMessage * DeferErrorIfUnsupportedSublinkAndReferenceTable( static DeferredErrorMessage * DeferErrorIfFromClauseRecurs(Query *queryTree);
Query *queryTree);
static DeferredErrorMessage * DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, static DeferredErrorMessage * DeferErrorIfCannotPushdownSubquery(Query *subqueryTree,
bool bool
outerMostQueryHasLimit); outerMostQueryHasLimit);
@ -130,6 +129,9 @@ static bool HasComplexRangeTableType(Query *queryTree);
static bool RelationInfoContainsRecurringTuples(PlannerInfo *plannerInfo, static bool RelationInfoContainsRecurringTuples(PlannerInfo *plannerInfo,
RelOptInfo *relationInfo, RelOptInfo *relationInfo,
RecurringTuplesType *recurType); RecurringTuplesType *recurType);
static bool IsRecurringRTE(RangeTblEntry *rangeTableEntry,
RecurringTuplesType *recurType);
static bool IsRecurringRangeTable(List *rangeTable, RecurringTuplesType *recurType);
static bool HasRecurringTuples(Node *node, RecurringTuplesType *recurType); static bool HasRecurringTuples(Node *node, RecurringTuplesType *recurType);
static bool IsReadIntermediateResultFunction(Node *node); static bool IsReadIntermediateResultFunction(Node *node);
static void ValidateClauseList(List *clauseList); static void ValidateClauseList(List *clauseList);
@ -567,7 +569,7 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery,
} }
/* we shouldn't allow reference tables in the FROM clause when the query has sublinks */ /* we shouldn't allow reference tables in the FROM clause when the query has sublinks */
error = DeferErrorIfUnsupportedSublinkAndReferenceTable(originalQuery); error = DeferErrorIfFromClauseRecurs(originalQuery);
if (error) if (error)
{ {
return error; return error;
@ -810,7 +812,7 @@ QueryRteIdentities(Query *queryTree)
/* /*
* DeferErrorIfUnsupportedSublinkAndReferenceTable returns a deferred error if the * DeferErrorIfFromClauseRecurs returns a deferred error if the
* given query is not suitable for subquery pushdown. * given query is not suitable for subquery pushdown.
* *
* While planning sublinks, we rely on Postgres in the sense that it converts some of * While planning sublinks, we rely on Postgres in the sense that it converts some of
@ -819,13 +821,13 @@ QueryRteIdentities(Query *queryTree)
* In some cases, sublinks are pulled up and converted into outer joins. Those cases * In some cases, sublinks are pulled up and converted into outer joins. Those cases
* are already handled with DeferredErrorIfUnsupportedRecurringTuplesJoin(). * are already handled with DeferredErrorIfUnsupportedRecurringTuplesJoin().
* *
* If the sublinks are not pulled up, we should still error out in if any reference table * If the sublinks are not pulled up, we should still error out in if the expression
* appears in the FROM clause of a subquery. * in the FROM clause would recur for every shard in a subquery on the WHERE clause.
* *
* Otherwise, the result would include duplicate rows. * Otherwise, the result would include duplicate rows.
*/ */
static DeferredErrorMessage * static DeferredErrorMessage *
DeferErrorIfUnsupportedSublinkAndReferenceTable(Query *queryTree) DeferErrorIfFromClauseRecurs(Query *queryTree)
{ {
RecurringTuplesType recurType = RECURRING_TUPLES_INVALID; RecurringTuplesType recurType = RECURRING_TUPLES_INVALID;
@ -834,8 +836,30 @@ DeferErrorIfUnsupportedSublinkAndReferenceTable(Query *queryTree)
return NULL; return NULL;
} }
if (HasRecurringTuples((Node *) queryTree->rtable, &recurType)) if (FindNodeCheckInRangeTableList(queryTree->rtable, IsDistributedTableRTE))
{ {
/*
* There is a distributed table somewhere in the FROM clause.
*
* In the typical case this means that the query does not recur,
* but there are two exceptions:
*
* - outer joins such as reference_table LEFT JOIN distributed_table
* - FROM reference_table WHERE .. (SELECT .. FROM distributed_table) ..
*
* However, we check all subqueries and joins separately, so we would
* find such conditions in other calls.
*/
return NULL;
}
/*
* Try to figure out which type of recurring tuples we have to produce a
* relevant error message. If there are several we'll pick the first one.
*/
IsRecurringRangeTable(queryTree->rtable, &recurType);
if (recurType == RECURRING_TUPLES_REFERENCE_TABLE) if (recurType == RECURRING_TUPLES_REFERENCE_TABLE)
{ {
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
@ -860,7 +884,7 @@ DeferErrorIfUnsupportedSublinkAndReferenceTable(Query *queryTree)
"the FROM clause when the query has subqueries in the " "the FROM clause when the query has subqueries in the "
"WHERE clause", NULL); "WHERE clause", NULL);
} }
else else if (recurType == RECURRING_TUPLES_EMPTY_JOIN_TREE)
{ {
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"cannot pushdown the subquery", "cannot pushdown the subquery",
@ -868,7 +892,13 @@ DeferErrorIfUnsupportedSublinkAndReferenceTable(Query *queryTree)
"clause when the outer query has subqueries in " "clause when the outer query has subqueries in "
"WHERE clause", NULL); "WHERE clause", NULL);
} }
}
/*
* We get here when there is neither a distributed table, nor recurring tuples.
* That usually means that there isn't a FROM at all (only sublinks), this
* implies that queryTree is recurring, but whether this is a problem depends
* on outer queries, not on queryTree itself.
*/
return NULL; return NULL;
} }
@ -1030,7 +1060,7 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi
} }
} }
deferredError = DeferErrorIfUnsupportedSublinkAndReferenceTable(subqueryTree); deferredError = DeferErrorIfFromClauseRecurs(subqueryTree);
if (deferredError) if (deferredError)
{ {
preconditionsSatisfied = false; preconditionsSatisfied = false;
@ -2040,7 +2070,7 @@ RelationInfoContainsRecurringTuples(PlannerInfo *plannerInfo, RelOptInfo *relati
RangeTblEntry *rangeTableEntry = plannerInfo->simple_rte_array[relationId]; RangeTblEntry *rangeTableEntry = plannerInfo->simple_rte_array[relationId];
/* relationInfo has this range table entry */ /* relationInfo has this range table entry */
if (HasRecurringTuples((Node *) rangeTableEntry, recurType)) if (IsRecurringRTE(rangeTableEntry, recurType))
{ {
return true; return true;
} }
@ -2050,6 +2080,31 @@ RelationInfoContainsRecurringTuples(PlannerInfo *plannerInfo, RelOptInfo *relati
} }
/*
* IsRecurringRTE returns whether the range table entry will generate
* the same set of tuples when repeating it in a query on different
* shards.
*/
static bool
IsRecurringRTE(RangeTblEntry *rangeTableEntry, RecurringTuplesType *recurType)
{
return IsRecurringRangeTable(list_make1(rangeTableEntry), recurType);
}
/*
* IsRecurringRangeTable returns whether the range table will generate
* the same set of tuples when repeating it in a query on different
* shards.
*/
static bool
IsRecurringRangeTable(List *rangeTable, RecurringTuplesType *recurType)
{
return range_table_walker(rangeTable, HasRecurringTuples, recurType,
QTW_EXAMINE_RTES);
}
/* /*
* HasRecurringTuples returns whether any part of the expression will generate * HasRecurringTuples returns whether any part of the expression will generate
* the same set of tuples in every query on shards when executing a distributed * the same set of tuples in every query on shards when executing a distributed
@ -2103,8 +2158,7 @@ HasRecurringTuples(Node *node, RecurringTuplesType *recurType)
return true; return true;
} }
return range_table_walker(list_make1(rangeTableEntry), HasRecurringTuples, return false;
recurType, 0);
} }
else if (IsA(node, Query)) else if (IsA(node, Query))
{ {

View File

@ -98,6 +98,73 @@ WHERE
LIMIT 3; LIMIT 3;
ERROR: cannot pushdown the subquery ERROR: cannot pushdown the subquery
DETAIL: Subqueries without FROM are not allowed in FROM clause when the outer query has subqueries in WHERE clause DETAIL: Subqueries without FROM are not allowed in FROM clause when the outer query has subqueries in WHERE clause
-- join with distributed table prevents FROM from recurring
SELECT
DISTINCT user_id
FROM
(SELECT s FROM generate_series(1,10) s) series,
(SELECT DISTINCT user_id FROM users_table) users_table,
(SELECT 1 AS one) one
WHERE
s = user_id AND user_id > one AND
user_id IN
(SELECT
value_2
FROM
events_table
WHERE
users_table.user_id = events_table.user_id
)
ORDER BY user_id
LIMIT 3;
user_id
---------
2
3
4
(3 rows)
-- inner join between distributed prevents FROM from recurring
SELECT
DISTINCT user_id
FROM
users_table JOIN users_reference_table USING (user_id)
WHERE
users_table.value_2 IN
(SELECT
value_2
FROM
events_table
WHERE
users_table.user_id = events_table.user_id
)
ORDER BY user_id
LIMIT 3;
user_id
---------
1
2
3
(3 rows)
-- outer join could still recur
SELECT
DISTINCT user_id
FROM
users_table RIGHT JOIN users_reference_table USING (user_id)
WHERE
users_table.value_2 IN
(SELECT
value_2
FROM
events_table
WHERE
users_table.user_id = events_table.user_id
)
ORDER BY user_id
LIMIT 3;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- subqueries in WHERE with IN operator without equality -- subqueries in WHERE with IN operator without equality
SELECT SELECT
users_table.user_id, count(*) users_table.user_id, count(*)
@ -393,9 +460,18 @@ WHERE user_id IN
users_reference_table users_reference_table
WHERE users_reference_table.user_id NOT IN WHERE users_reference_table.user_id NOT IN
(SELECT value_2 (SELECT value_2
FROM users_reference_table AS u2)); FROM users_reference_table AS u2))
ERROR: cannot push down this subquery ORDER BY 1,2,3
DETAIL: Reference tables are not allowed in FROM clause when the query has subqueries in WHERE clause LIMIT 5;
user_id | time | value_1 | value_2 | value_3 | value_4
---------+---------------------------------+---------+---------+---------+---------
1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 |
1 | Thu Nov 23 03:32:50.803031 2017 | 3 | 2 | 1 |
1 | Thu Nov 23 09:26:42.145043 2017 | 1 | 3 | 3 |
1 | Thu Nov 23 11:11:24.40789 2017 | 3 | 4 | 0 |
1 | Thu Nov 23 11:44:57.515981 2017 | 4 | 3 | 4 |
(5 rows)
-- not supported since GROUP BY references to an upper level query -- not supported since GROUP BY references to an upper level query
SELECT SELECT
user_id user_id

View File

@ -87,6 +87,60 @@ WHERE
) )
LIMIT 3; LIMIT 3;
-- join with distributed table prevents FROM from recurring
SELECT
DISTINCT user_id
FROM
(SELECT s FROM generate_series(1,10) s) series,
(SELECT DISTINCT user_id FROM users_table) users_table,
(SELECT 1 AS one) one
WHERE
s = user_id AND user_id > one AND
user_id IN
(SELECT
value_2
FROM
events_table
WHERE
users_table.user_id = events_table.user_id
)
ORDER BY user_id
LIMIT 3;
-- inner join between distributed prevents FROM from recurring
SELECT
DISTINCT user_id
FROM
users_table JOIN users_reference_table USING (user_id)
WHERE
users_table.value_2 IN
(SELECT
value_2
FROM
events_table
WHERE
users_table.user_id = events_table.user_id
)
ORDER BY user_id
LIMIT 3;
-- outer join could still recur
SELECT
DISTINCT user_id
FROM
users_table RIGHT JOIN users_reference_table USING (user_id)
WHERE
users_table.value_2 IN
(SELECT
value_2
FROM
events_table
WHERE
users_table.user_id = events_table.user_id
)
ORDER BY user_id
LIMIT 3;
-- subqueries in WHERE with IN operator without equality -- subqueries in WHERE with IN operator without equality
SELECT SELECT
users_table.user_id, count(*) users_table.user_id, count(*)
@ -344,7 +398,9 @@ WHERE user_id IN
users_reference_table users_reference_table
WHERE users_reference_table.user_id NOT IN WHERE users_reference_table.user_id NOT IN
(SELECT value_2 (SELECT value_2
FROM users_reference_table AS u2)); FROM users_reference_table AS u2))
ORDER BY 1,2,3
LIMIT 5;
-- not supported since GROUP BY references to an upper level query -- not supported since GROUP BY references to an upper level query
SELECT SELECT