mirror of https://github.com/citusdata/citus.git
Merge pull request #1871 from citusdata/relax_from_sublink_checks
Relax checks on recurring tuples in FROM with sublinkspull/1873/head
commit
cf7dda3892
|
@ -97,8 +97,7 @@ static bool RangeTableArrayContainsAnyRTEIdentities(RangeTblEntry **rangeTableEn
|
|||
rangeTableArrayLength, Relids
|
||||
queryRteIdentities);
|
||||
static Relids QueryRteIdentities(Query *queryTree);
|
||||
static DeferredErrorMessage * DeferErrorIfUnsupportedSublinkAndReferenceTable(
|
||||
Query *queryTree);
|
||||
static DeferredErrorMessage * DeferErrorIfFromClauseRecurs(Query *queryTree);
|
||||
static DeferredErrorMessage * DeferErrorIfCannotPushdownSubquery(Query *subqueryTree,
|
||||
bool
|
||||
outerMostQueryHasLimit);
|
||||
|
@ -130,6 +129,9 @@ static bool HasComplexRangeTableType(Query *queryTree);
|
|||
static bool RelationInfoContainsRecurringTuples(PlannerInfo *plannerInfo,
|
||||
RelOptInfo *relationInfo,
|
||||
RecurringTuplesType *recurType);
|
||||
static bool IsRecurringRTE(RangeTblEntry *rangeTableEntry,
|
||||
RecurringTuplesType *recurType);
|
||||
static bool IsRecurringRangeTable(List *rangeTable, RecurringTuplesType *recurType);
|
||||
static bool HasRecurringTuples(Node *node, RecurringTuplesType *recurType);
|
||||
static bool IsReadIntermediateResultFunction(Node *node);
|
||||
static void ValidateClauseList(List *clauseList);
|
||||
|
@ -567,7 +569,7 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery,
|
|||
}
|
||||
|
||||
/* we shouldn't allow reference tables in the FROM clause when the query has sublinks */
|
||||
error = DeferErrorIfUnsupportedSublinkAndReferenceTable(originalQuery);
|
||||
error = DeferErrorIfFromClauseRecurs(originalQuery);
|
||||
if (error)
|
||||
{
|
||||
return error;
|
||||
|
@ -810,7 +812,7 @@ QueryRteIdentities(Query *queryTree)
|
|||
|
||||
|
||||
/*
|
||||
* DeferErrorIfUnsupportedSublinkAndReferenceTable returns a deferred error if the
|
||||
* DeferErrorIfFromClauseRecurs returns a deferred error if the
|
||||
* given query is not suitable for subquery pushdown.
|
||||
*
|
||||
* While planning sublinks, we rely on Postgres in the sense that it converts some of
|
||||
|
@ -819,13 +821,13 @@ QueryRteIdentities(Query *queryTree)
|
|||
* In some cases, sublinks are pulled up and converted into outer joins. Those cases
|
||||
* are already handled with DeferredErrorIfUnsupportedRecurringTuplesJoin().
|
||||
*
|
||||
* If the sublinks are not pulled up, we should still error out in if any reference table
|
||||
* appears in the FROM clause of a subquery.
|
||||
* If the sublinks are not pulled up, we should still error out in if the expression
|
||||
* in the FROM clause would recur for every shard in a subquery on the WHERE clause.
|
||||
*
|
||||
* Otherwise, the result would include duplicate rows.
|
||||
*/
|
||||
static DeferredErrorMessage *
|
||||
DeferErrorIfUnsupportedSublinkAndReferenceTable(Query *queryTree)
|
||||
DeferErrorIfFromClauseRecurs(Query *queryTree)
|
||||
{
|
||||
RecurringTuplesType recurType = RECURRING_TUPLES_INVALID;
|
||||
|
||||
|
@ -834,8 +836,30 @@ DeferErrorIfUnsupportedSublinkAndReferenceTable(Query *queryTree)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if (HasRecurringTuples((Node *) queryTree->rtable, &recurType))
|
||||
if (FindNodeCheckInRangeTableList(queryTree->rtable, IsDistributedTableRTE))
|
||||
{
|
||||
/*
|
||||
* There is a distributed table somewhere in the FROM clause.
|
||||
*
|
||||
* In the typical case this means that the query does not recur,
|
||||
* but there are two exceptions:
|
||||
*
|
||||
* - outer joins such as reference_table LEFT JOIN distributed_table
|
||||
* - FROM reference_table WHERE .. (SELECT .. FROM distributed_table) ..
|
||||
*
|
||||
* However, we check all subqueries and joins separately, so we would
|
||||
* find such conditions in other calls.
|
||||
*/
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Try to figure out which type of recurring tuples we have to produce a
|
||||
* relevant error message. If there are several we'll pick the first one.
|
||||
*/
|
||||
IsRecurringRangeTable(queryTree->rtable, &recurType);
|
||||
|
||||
if (recurType == RECURRING_TUPLES_REFERENCE_TABLE)
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
|
@ -860,7 +884,7 @@ DeferErrorIfUnsupportedSublinkAndReferenceTable(Query *queryTree)
|
|||
"the FROM clause when the query has subqueries in the "
|
||||
"WHERE clause", NULL);
|
||||
}
|
||||
else
|
||||
else if (recurType == RECURRING_TUPLES_EMPTY_JOIN_TREE)
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"cannot pushdown the subquery",
|
||||
|
@ -868,7 +892,13 @@ DeferErrorIfUnsupportedSublinkAndReferenceTable(Query *queryTree)
|
|||
"clause when the outer query has subqueries in "
|
||||
"WHERE clause", NULL);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We get here when there is neither a distributed table, nor recurring tuples.
|
||||
* That usually means that there isn't a FROM at all (only sublinks), this
|
||||
* implies that queryTree is recurring, but whether this is a problem depends
|
||||
* on outer queries, not on queryTree itself.
|
||||
*/
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
@ -1030,7 +1060,7 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi
|
|||
}
|
||||
}
|
||||
|
||||
deferredError = DeferErrorIfUnsupportedSublinkAndReferenceTable(subqueryTree);
|
||||
deferredError = DeferErrorIfFromClauseRecurs(subqueryTree);
|
||||
if (deferredError)
|
||||
{
|
||||
preconditionsSatisfied = false;
|
||||
|
@ -2040,7 +2070,7 @@ RelationInfoContainsRecurringTuples(PlannerInfo *plannerInfo, RelOptInfo *relati
|
|||
RangeTblEntry *rangeTableEntry = plannerInfo->simple_rte_array[relationId];
|
||||
|
||||
/* relationInfo has this range table entry */
|
||||
if (HasRecurringTuples((Node *) rangeTableEntry, recurType))
|
||||
if (IsRecurringRTE(rangeTableEntry, recurType))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
@ -2050,6 +2080,31 @@ RelationInfoContainsRecurringTuples(PlannerInfo *plannerInfo, RelOptInfo *relati
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* IsRecurringRTE returns whether the range table entry will generate
|
||||
* the same set of tuples when repeating it in a query on different
|
||||
* shards.
|
||||
*/
|
||||
static bool
|
||||
IsRecurringRTE(RangeTblEntry *rangeTableEntry, RecurringTuplesType *recurType)
|
||||
{
|
||||
return IsRecurringRangeTable(list_make1(rangeTableEntry), recurType);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* IsRecurringRangeTable returns whether the range table will generate
|
||||
* the same set of tuples when repeating it in a query on different
|
||||
* shards.
|
||||
*/
|
||||
static bool
|
||||
IsRecurringRangeTable(List *rangeTable, RecurringTuplesType *recurType)
|
||||
{
|
||||
return range_table_walker(rangeTable, HasRecurringTuples, recurType,
|
||||
QTW_EXAMINE_RTES);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* HasRecurringTuples returns whether any part of the expression will generate
|
||||
* the same set of tuples in every query on shards when executing a distributed
|
||||
|
@ -2103,8 +2158,7 @@ HasRecurringTuples(Node *node, RecurringTuplesType *recurType)
|
|||
return true;
|
||||
}
|
||||
|
||||
return range_table_walker(list_make1(rangeTableEntry), HasRecurringTuples,
|
||||
recurType, 0);
|
||||
return false;
|
||||
}
|
||||
else if (IsA(node, Query))
|
||||
{
|
||||
|
|
|
@ -98,6 +98,73 @@ WHERE
|
|||
LIMIT 3;
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: Subqueries without FROM are not allowed in FROM clause when the outer query has subqueries in WHERE clause
|
||||
-- join with distributed table prevents FROM from recurring
|
||||
SELECT
|
||||
DISTINCT user_id
|
||||
FROM
|
||||
(SELECT s FROM generate_series(1,10) s) series,
|
||||
(SELECT DISTINCT user_id FROM users_table) users_table,
|
||||
(SELECT 1 AS one) one
|
||||
WHERE
|
||||
s = user_id AND user_id > one AND
|
||||
user_id IN
|
||||
(SELECT
|
||||
value_2
|
||||
FROM
|
||||
events_table
|
||||
WHERE
|
||||
users_table.user_id = events_table.user_id
|
||||
)
|
||||
ORDER BY user_id
|
||||
LIMIT 3;
|
||||
user_id
|
||||
---------
|
||||
2
|
||||
3
|
||||
4
|
||||
(3 rows)
|
||||
|
||||
-- inner join between distributed prevents FROM from recurring
|
||||
SELECT
|
||||
DISTINCT user_id
|
||||
FROM
|
||||
users_table JOIN users_reference_table USING (user_id)
|
||||
WHERE
|
||||
users_table.value_2 IN
|
||||
(SELECT
|
||||
value_2
|
||||
FROM
|
||||
events_table
|
||||
WHERE
|
||||
users_table.user_id = events_table.user_id
|
||||
)
|
||||
ORDER BY user_id
|
||||
LIMIT 3;
|
||||
user_id
|
||||
---------
|
||||
1
|
||||
2
|
||||
3
|
||||
(3 rows)
|
||||
|
||||
-- outer join could still recur
|
||||
SELECT
|
||||
DISTINCT user_id
|
||||
FROM
|
||||
users_table RIGHT JOIN users_reference_table USING (user_id)
|
||||
WHERE
|
||||
users_table.value_2 IN
|
||||
(SELECT
|
||||
value_2
|
||||
FROM
|
||||
events_table
|
||||
WHERE
|
||||
users_table.user_id = events_table.user_id
|
||||
)
|
||||
ORDER BY user_id
|
||||
LIMIT 3;
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
-- subqueries in WHERE with IN operator without equality
|
||||
SELECT
|
||||
users_table.user_id, count(*)
|
||||
|
@ -393,9 +460,18 @@ WHERE user_id IN
|
|||
users_reference_table
|
||||
WHERE users_reference_table.user_id NOT IN
|
||||
(SELECT value_2
|
||||
FROM users_reference_table AS u2));
|
||||
ERROR: cannot push down this subquery
|
||||
DETAIL: Reference tables are not allowed in FROM clause when the query has subqueries in WHERE clause
|
||||
FROM users_reference_table AS u2))
|
||||
ORDER BY 1,2,3
|
||||
LIMIT 5;
|
||||
user_id | time | value_1 | value_2 | value_3 | value_4
|
||||
---------+---------------------------------+---------+---------+---------+---------
|
||||
1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 |
|
||||
1 | Thu Nov 23 03:32:50.803031 2017 | 3 | 2 | 1 |
|
||||
1 | Thu Nov 23 09:26:42.145043 2017 | 1 | 3 | 3 |
|
||||
1 | Thu Nov 23 11:11:24.40789 2017 | 3 | 4 | 0 |
|
||||
1 | Thu Nov 23 11:44:57.515981 2017 | 4 | 3 | 4 |
|
||||
(5 rows)
|
||||
|
||||
-- not supported since GROUP BY references to an upper level query
|
||||
SELECT
|
||||
user_id
|
||||
|
|
|
@ -87,6 +87,60 @@ WHERE
|
|||
)
|
||||
LIMIT 3;
|
||||
|
||||
-- join with distributed table prevents FROM from recurring
|
||||
SELECT
|
||||
DISTINCT user_id
|
||||
FROM
|
||||
(SELECT s FROM generate_series(1,10) s) series,
|
||||
(SELECT DISTINCT user_id FROM users_table) users_table,
|
||||
(SELECT 1 AS one) one
|
||||
WHERE
|
||||
s = user_id AND user_id > one AND
|
||||
user_id IN
|
||||
(SELECT
|
||||
value_2
|
||||
FROM
|
||||
events_table
|
||||
WHERE
|
||||
users_table.user_id = events_table.user_id
|
||||
)
|
||||
ORDER BY user_id
|
||||
LIMIT 3;
|
||||
|
||||
-- inner join between distributed prevents FROM from recurring
|
||||
SELECT
|
||||
DISTINCT user_id
|
||||
FROM
|
||||
users_table JOIN users_reference_table USING (user_id)
|
||||
WHERE
|
||||
users_table.value_2 IN
|
||||
(SELECT
|
||||
value_2
|
||||
FROM
|
||||
events_table
|
||||
WHERE
|
||||
users_table.user_id = events_table.user_id
|
||||
)
|
||||
ORDER BY user_id
|
||||
LIMIT 3;
|
||||
|
||||
-- outer join could still recur
|
||||
SELECT
|
||||
DISTINCT user_id
|
||||
FROM
|
||||
users_table RIGHT JOIN users_reference_table USING (user_id)
|
||||
WHERE
|
||||
users_table.value_2 IN
|
||||
(SELECT
|
||||
value_2
|
||||
FROM
|
||||
events_table
|
||||
WHERE
|
||||
users_table.user_id = events_table.user_id
|
||||
)
|
||||
ORDER BY user_id
|
||||
LIMIT 3;
|
||||
|
||||
-- subqueries in WHERE with IN operator without equality
|
||||
SELECT
|
||||
users_table.user_id, count(*)
|
||||
|
@ -344,7 +398,9 @@ WHERE user_id IN
|
|||
users_reference_table
|
||||
WHERE users_reference_table.user_id NOT IN
|
||||
(SELECT value_2
|
||||
FROM users_reference_table AS u2));
|
||||
FROM users_reference_table AS u2))
|
||||
ORDER BY 1,2,3
|
||||
LIMIT 5;
|
||||
|
||||
-- not supported since GROUP BY references to an upper level query
|
||||
SELECT
|
||||
|
|
Loading…
Reference in New Issue