mirror of https://github.com/citusdata/citus.git
Address review feedback
parent
23dccd8941
commit
100e5d3196
|
@ -34,6 +34,7 @@
|
|||
#include "distributed/pg_dist_partition.h"
|
||||
#include "distributed/query_utils.h"
|
||||
#include "distributed/query_pushdown_planning.h"
|
||||
#include "distributed/recursive_planning.h"
|
||||
#include "distributed/relation_restriction_equivalence.h"
|
||||
#include "distributed/version_compat.h"
|
||||
#include "nodes/nodeFuncs.h"
|
||||
|
@ -78,6 +79,7 @@ static RecurringTuplesType FromClauseRecurringTupleType(Query *queryTree);
|
|||
static DeferredErrorMessage * DeferredErrorIfUnsupportedRecurringTuplesJoin(
|
||||
PlannerRestrictionContext *plannerRestrictionContext);
|
||||
static DeferredErrorMessage * DeferErrorIfUnsupportedTableCombination(Query *queryTree);
|
||||
static DeferredErrorMessage * DeferErrorIfSubqueryRequiresMerge(Query *subqueryTree);
|
||||
static bool ExtractSetOperationStatmentWalker(Node *node, List **setOperationList);
|
||||
static RecurringTuplesType FetchFirstRecurType(PlannerInfo *plannerInfo,
|
||||
Relids relids);
|
||||
|
@ -911,7 +913,6 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi
|
|||
{
|
||||
bool preconditionsSatisfied = true;
|
||||
char *errorDetail = NULL;
|
||||
StringInfo errorInfo = NULL;
|
||||
|
||||
DeferredErrorMessage *deferredError = DeferErrorIfUnsupportedTableCombination(
|
||||
subqueryTree);
|
||||
|
@ -934,84 +935,12 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi
|
|||
* push down SQL features within such a function, as long as co-located join
|
||||
* checks are applied.
|
||||
*/
|
||||
if (!contain_vars_of_level((Node *) subqueryTree, 1))
|
||||
if (!ContainsReferencesToOuterQuery(subqueryTree))
|
||||
{
|
||||
if (subqueryTree->limitOffset)
|
||||
deferredError = DeferErrorIfSubqueryRequiresMerge(subqueryTree);
|
||||
if (deferredError)
|
||||
{
|
||||
preconditionsSatisfied = false;
|
||||
errorDetail = "Offset clause is currently unsupported when a subquery "
|
||||
"references a column from another query";
|
||||
}
|
||||
|
||||
/* limit is not supported when SubqueryPushdown is not set */
|
||||
if (subqueryTree->limitCount && !SubqueryPushdown)
|
||||
{
|
||||
preconditionsSatisfied = false;
|
||||
errorDetail = "Limit in subquery is currently unsupported when a "
|
||||
"subquery references a column from another query";
|
||||
}
|
||||
|
||||
/* group clause list must include partition column */
|
||||
if (subqueryTree->groupClause)
|
||||
{
|
||||
List *groupClauseList = subqueryTree->groupClause;
|
||||
List *targetEntryList = subqueryTree->targetList;
|
||||
List *groupTargetEntryList = GroupTargetEntryList(groupClauseList,
|
||||
targetEntryList);
|
||||
bool groupOnPartitionColumn =
|
||||
TargetListOnPartitionColumn(subqueryTree, groupTargetEntryList);
|
||||
if (!groupOnPartitionColumn)
|
||||
{
|
||||
preconditionsSatisfied = false;
|
||||
errorDetail = "Group by list without partition column is currently "
|
||||
"unsupported when a subquery references a column "
|
||||
"from another query";
|
||||
}
|
||||
}
|
||||
|
||||
/* we don't support aggregates without group by */
|
||||
if (subqueryTree->hasAggs && (subqueryTree->groupClause == NULL))
|
||||
{
|
||||
preconditionsSatisfied = false;
|
||||
errorDetail = "Aggregates without group by are currently unsupported "
|
||||
"when a subquery references a column from another query";
|
||||
}
|
||||
|
||||
/* having clause without group by on partition column is not supported */
|
||||
if (subqueryTree->havingQual && (subqueryTree->groupClause == NULL))
|
||||
{
|
||||
preconditionsSatisfied = false;
|
||||
errorDetail = "Having qual without group by on partition column is "
|
||||
"currently unsupported when a subquery references "
|
||||
"a column from another query";
|
||||
}
|
||||
|
||||
/*
|
||||
* We support window functions when the window function
|
||||
* is partitioned on distribution column.
|
||||
*/
|
||||
if (subqueryTree->hasWindowFuncs && !SafeToPushdownWindowFunction(subqueryTree,
|
||||
&errorInfo))
|
||||
{
|
||||
errorDetail = (char *) errorInfo->data;
|
||||
preconditionsSatisfied = false;
|
||||
}
|
||||
|
||||
/* distinct clause list must include partition column */
|
||||
if (subqueryTree->distinctClause)
|
||||
{
|
||||
List *distinctClauseList = subqueryTree->distinctClause;
|
||||
List *targetEntryList = subqueryTree->targetList;
|
||||
List *distinctTargetEntryList = GroupTargetEntryList(distinctClauseList,
|
||||
targetEntryList);
|
||||
bool distinctOnPartitionColumn =
|
||||
TargetListOnPartitionColumn(subqueryTree, distinctTargetEntryList);
|
||||
if (!distinctOnPartitionColumn)
|
||||
{
|
||||
preconditionsSatisfied = false;
|
||||
errorDetail = "Distinct on columns without partition column is "
|
||||
"currently unsupported";
|
||||
}
|
||||
return deferredError;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1080,6 +1009,108 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* DeferErrorIfSubqueryRequiresMerge returns a deferred error if the subquery
|
||||
* requires a merge step on the coordinator (e.g. limit, group by non-distribution
|
||||
* column, etc.).
|
||||
*/
|
||||
static DeferredErrorMessage *
|
||||
DeferErrorIfSubqueryRequiresMerge(Query *subqueryTree)
|
||||
{
|
||||
bool preconditionsSatisfied = true;
|
||||
char *errorDetail = NULL;
|
||||
|
||||
if (subqueryTree->limitOffset)
|
||||
{
|
||||
preconditionsSatisfied = false;
|
||||
errorDetail = "Offset clause is currently unsupported when a subquery "
|
||||
"references a column from another query";
|
||||
}
|
||||
|
||||
/* limit is not supported when SubqueryPushdown is not set */
|
||||
if (subqueryTree->limitCount && !SubqueryPushdown)
|
||||
{
|
||||
preconditionsSatisfied = false;
|
||||
errorDetail = "Limit in subquery is currently unsupported when a "
|
||||
"subquery references a column from another query";
|
||||
}
|
||||
|
||||
/* group clause list must include partition column */
|
||||
if (subqueryTree->groupClause)
|
||||
{
|
||||
List *groupClauseList = subqueryTree->groupClause;
|
||||
List *targetEntryList = subqueryTree->targetList;
|
||||
List *groupTargetEntryList = GroupTargetEntryList(groupClauseList,
|
||||
targetEntryList);
|
||||
bool groupOnPartitionColumn =
|
||||
TargetListOnPartitionColumn(subqueryTree, groupTargetEntryList);
|
||||
if (!groupOnPartitionColumn)
|
||||
{
|
||||
preconditionsSatisfied = false;
|
||||
errorDetail = "Group by list without partition column is currently "
|
||||
"unsupported when a subquery references a column "
|
||||
"from another query";
|
||||
}
|
||||
}
|
||||
|
||||
/* we don't support aggregates without group by */
|
||||
if (subqueryTree->hasAggs && (subqueryTree->groupClause == NULL))
|
||||
{
|
||||
preconditionsSatisfied = false;
|
||||
errorDetail = "Aggregates without group by are currently unsupported "
|
||||
"when a subquery references a column from another query";
|
||||
}
|
||||
|
||||
/* having clause without group by on partition column is not supported */
|
||||
if (subqueryTree->havingQual && (subqueryTree->groupClause == NULL))
|
||||
{
|
||||
preconditionsSatisfied = false;
|
||||
errorDetail = "Having qual without group by on partition column is "
|
||||
"currently unsupported when a subquery references "
|
||||
"a column from another query";
|
||||
}
|
||||
|
||||
/*
|
||||
* We support window functions when the window function
|
||||
* is partitioned on distribution column.
|
||||
*/
|
||||
StringInfo errorInfo = NULL;
|
||||
if (subqueryTree->hasWindowFuncs && !SafeToPushdownWindowFunction(subqueryTree,
|
||||
&errorInfo))
|
||||
{
|
||||
errorDetail = (char *) errorInfo->data;
|
||||
preconditionsSatisfied = false;
|
||||
}
|
||||
|
||||
/* distinct clause list must include partition column */
|
||||
if (subqueryTree->distinctClause)
|
||||
{
|
||||
List *distinctClauseList = subqueryTree->distinctClause;
|
||||
List *targetEntryList = subqueryTree->targetList;
|
||||
List *distinctTargetEntryList = GroupTargetEntryList(distinctClauseList,
|
||||
targetEntryList);
|
||||
bool distinctOnPartitionColumn =
|
||||
TargetListOnPartitionColumn(subqueryTree, distinctTargetEntryList);
|
||||
if (!distinctOnPartitionColumn)
|
||||
{
|
||||
preconditionsSatisfied = false;
|
||||
errorDetail = "Distinct on columns without partition column is "
|
||||
"currently unsupported";
|
||||
}
|
||||
}
|
||||
|
||||
/* finally check and return deferred if not satisfied */
|
||||
if (!preconditionsSatisfied)
|
||||
{
|
||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||
"cannot push down this subquery",
|
||||
errorDetail, NULL);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* DeferErrorIfUnsupportedTableCombination checks if the given query tree contains any
|
||||
* unsupported range table combinations. For this, the function walks over all
|
||||
|
|
|
@ -180,7 +180,6 @@ static bool IsLocalTableRteOrMatView(Node *node);
|
|||
static DistributedSubPlan * CreateDistributedSubPlan(uint32 subPlanId,
|
||||
Query *subPlanQuery);
|
||||
static bool CteReferenceListWalker(Node *node, CteReferenceWalkerContext *context);
|
||||
static bool ContainsReferencesToOuterQuery(Query *query);
|
||||
static bool ContainsReferencesToOuterQueryWalker(Node *node,
|
||||
VarLevelsUpWalkerContext *context);
|
||||
static bool NodeContainsSubqueryReferencingOuterQuery(Node *node);
|
||||
|
@ -1288,7 +1287,7 @@ CteReferenceListWalker(Node *node, CteReferenceWalkerContext *context)
|
|||
* anything that points outside of the query itself. Such queries cannot be
|
||||
* planned recursively.
|
||||
*/
|
||||
static bool
|
||||
bool
|
||||
ContainsReferencesToOuterQuery(Query *query)
|
||||
{
|
||||
VarLevelsUpWalkerContext context = { 0 };
|
||||
|
|
|
@ -49,4 +49,7 @@ extern void ReplaceRTERelationWithRteSubquery(RangeTblEntry *rangeTableEntry,
|
|||
RecursivePlanningContext *context);
|
||||
extern bool IsRecursivelyPlannableRelation(RangeTblEntry *rangeTableEntry);
|
||||
extern bool IsRelationLocalTableOrMatView(Oid relationId);
|
||||
extern bool ContainsReferencesToOuterQuery(Query *query);
|
||||
|
||||
|
||||
#endif /* RECURSIVE_PLANNING_H */
|
||||
|
|
|
@ -484,7 +484,7 @@ DEBUG: query has a single distribution column value: 1
|
|||
41 | 11814
|
||||
(5 rows)
|
||||
|
||||
-- subqueries are not supported in SELECT clause
|
||||
-- subqueries in SELECT clause
|
||||
SELECT a.title AS name, (SELECT a2.id FROM articles_single_shard_hash_mx a2 WHERE a.id = a2.id LIMIT 1)
|
||||
AS special_price FROM articles_hash_mx a;
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
|
|
|
@ -1227,7 +1227,7 @@ FROM
|
|||
WHERE
|
||||
"users"."user_id" = "some_recent_users"."user_id" AND
|
||||
users.value_2 > 1 and users.value_2 < 3
|
||||
LIMIT 1) "some_users_data"
|
||||
ORDER BY 1 LIMIT 1) "some_users_data"
|
||||
ON TRUE
|
||||
ORDER BY
|
||||
user_id
|
||||
|
@ -1240,9 +1240,7 @@ limit 50;
|
|||
|
||||
-- reset subquery_pushdown
|
||||
SET citus.subquery_pushdown to OFF;
|
||||
-- we recursively plan recent_events_1
|
||||
-- but not some_users_data since it has a reference
|
||||
-- from an outer query which is not recursively planned
|
||||
-- mixture of recursively planned subqueries and correlated subqueries
|
||||
SELECT "some_users_data".user_id, lastseen
|
||||
FROM
|
||||
(SELECT user_id, max(time) AS lastseen
|
||||
|
@ -1270,24 +1268,21 @@ FROM
|
|||
WHERE
|
||||
"users"."value_1" = "some_recent_users"."user_id" AND
|
||||
users.value_2 > 1 and users.value_2 < 3
|
||||
LIMIT 1) "some_users_data"
|
||||
ORDER BY 1 LIMIT 1) "some_users_data"
|
||||
ON TRUE
|
||||
ORDER BY
|
||||
user_id
|
||||
limit 50;
|
||||
user_id | lastseen
|
||||
---------------------------------------------------------------------
|
||||
1 | Thu Nov 23 18:08:26.550729 2017
|
||||
2 | Thu Nov 23 17:26:14.563216 2017
|
||||
3 | Thu Nov 23 18:08:26.550729 2017
|
||||
3 | Thu Nov 23 17:26:14.563216 2017
|
||||
5 | Thu Nov 23 18:08:26.550729 2017
|
||||
5 | Thu Nov 23 17:26:14.563216 2017
|
||||
6 | Thu Nov 23 18:08:26.550729 2017
|
||||
(6 rows)
|
||||
|
||||
-- we recursively plan some queries but fail in the end
|
||||
-- since some_users_data since it has a reference
|
||||
-- from an outer query which is not recursively planned
|
||||
SELECT "some_users_data".user_id, lastseen
|
||||
FROM
|
||||
(SELECT 2 * user_id as user_id, max(time) AS lastseen
|
||||
|
@ -1315,7 +1310,7 @@ FROM
|
|||
WHERE
|
||||
"users"."user_id" = "some_recent_users"."user_id" AND
|
||||
users.value_2 > 1 and users.value_2 < 3
|
||||
LIMIT 1) "some_users_data"
|
||||
ORDER BY 1 LIMIT 1) "some_users_data"
|
||||
ON TRUE
|
||||
ORDER BY
|
||||
user_id
|
||||
|
@ -1379,7 +1374,7 @@ FROM
|
|||
WHERE
|
||||
"users"."user_id" = "some_recent_users"."user_id" AND
|
||||
"users"."value_2" > 4
|
||||
LIMIT 1) "some_users_data"
|
||||
ORDER BY 1 LIMIT 1) "some_users_data"
|
||||
ON TRUE
|
||||
ORDER BY
|
||||
lastseen DESC
|
||||
|
@ -1447,7 +1442,7 @@ SELECT "some_users_data".user_id, MAX(lastseen), count(*)
|
|||
WHERE
|
||||
"users"."user_id" = "some_recent_users"."user_id" AND
|
||||
"users"."value_2" > 4
|
||||
LIMIT 1) "some_users_data" ON true
|
||||
ORDER BY 1 LIMIT 1) "some_users_data" ON true
|
||||
GROUP BY 1
|
||||
ORDER BY 2, 1 DESC
|
||||
LIMIT 10;
|
||||
|
@ -1504,7 +1499,7 @@ FROM
|
|||
WHERE
|
||||
"users"."user_id" = "some_recent_users"."user_id" AND
|
||||
"users"."value_2" > 4
|
||||
LIMIT 1) "some_users_data" ON true
|
||||
ORDER BY 1 LIMIT 1) "some_users_data" ON true
|
||||
ORDER BY
|
||||
lastseen DESC
|
||||
LIMIT 10) "some_users"
|
||||
|
@ -1519,7 +1514,7 @@ DEBUG: push down of limit count: 10
|
|||
DEBUG: generating subplan XXX_2 for subquery SELECT filter_users_1.user_id, last_events_1."time" AS lastseen FROM ((SELECT user_where_1_1.user_id FROM ((SELECT users.user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.>) 1) AND (users.user_id OPERATOR(pg_catalog.<) 4) AND (users.value_1 OPERATOR(pg_catalog.>) 2))) user_where_1_1 JOIN (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) user_where_1_join_1 ON ((user_where_1_1.user_id OPERATOR(pg_catalog.<>) user_where_1_join_1.user_id)))) filter_users_1 JOIN LATERAL (SELECT events.user_id, events."time" FROM public.events_table events WHERE ((events.user_id OPERATOR(pg_catalog.>) 1) AND (events.user_id OPERATOR(pg_catalog.<) 4) AND (events.user_id OPERATOR(pg_catalog.=) filter_users_1.user_id)) ORDER BY events."time" DESC LIMIT 1) last_events_1 ON (true)) ORDER BY last_events_1."time" DESC LIMIT 10
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: push down of limit count: 10
|
||||
DEBUG: generating subplan XXX_3 for subquery SELECT some_users_data.user_id, some_recent_users.lastseen FROM ((SELECT intermediate_result.user_id, intermediate_result.lastseen FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, lastseen timestamp without time zone)) some_recent_users JOIN LATERAL (SELECT users.user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.=) some_recent_users.user_id) AND (users.value_2 OPERATOR(pg_catalog.>) 4)) LIMIT 1) some_users_data ON (true)) ORDER BY some_recent_users.lastseen DESC LIMIT 10
|
||||
DEBUG: generating subplan XXX_3 for subquery SELECT some_users_data.user_id, some_recent_users.lastseen FROM ((SELECT intermediate_result.user_id, intermediate_result.lastseen FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, lastseen timestamp without time zone)) some_recent_users JOIN LATERAL (SELECT users.user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.=) some_recent_users.user_id) AND (users.value_2 OPERATOR(pg_catalog.>) 4)) ORDER BY users.user_id LIMIT 1) some_users_data ON (true)) ORDER BY some_recent_users.lastseen DESC LIMIT 10
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, lastseen FROM (SELECT intermediate_result.user_id, intermediate_result.lastseen FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, lastseen timestamp without time zone)) some_users ORDER BY user_id DESC, lastseen DESC LIMIT 10
|
||||
DEBUG: Creating router plan
|
||||
user_id | lastseen
|
||||
|
@ -1584,7 +1579,7 @@ FROM
|
|||
WHERE
|
||||
"users"."user_id" = "some_recent_users"."user_id" AND
|
||||
"users"."value_2" > 4
|
||||
LIMIT 1) "some_users_data" ON true
|
||||
ORDER BY 1 LIMIT 1) "some_users_data" ON true
|
||||
ORDER BY
|
||||
lastseen DESC
|
||||
LIMIT 10) "some_users"
|
||||
|
@ -1595,7 +1590,7 @@ DEBUG: generating subplan XXX_1 for subquery SELECT user_id, value_1 FROM publi
|
|||
DEBUG: push down of limit count: 10
|
||||
DEBUG: generating subplan XXX_2 for subquery SELECT filter_users_1.user_id, last_events_1."time" AS lastseen FROM ((SELECT user_where_1_1.user_id FROM ((SELECT users.user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.>) 1) AND (users.user_id OPERATOR(pg_catalog.<) 4) AND (users.value_1 OPERATOR(pg_catalog.>) 2))) user_where_1_1 JOIN (SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer)) user_where_1_join_1 ON ((user_where_1_1.user_id OPERATOR(pg_catalog.=) user_where_1_join_1.value_1)))) filter_users_1 JOIN LATERAL (SELECT events.user_id, events."time" FROM public.events_table events WHERE ((events.user_id OPERATOR(pg_catalog.>) 1) AND (events.user_id OPERATOR(pg_catalog.<) 4) AND (events.user_id OPERATOR(pg_catalog.=) filter_users_1.user_id)) ORDER BY events."time" DESC LIMIT 1) last_events_1 ON (true)) ORDER BY last_events_1."time" DESC LIMIT 10
|
||||
DEBUG: push down of limit count: 10
|
||||
DEBUG: generating subplan XXX_3 for subquery SELECT some_users_data.user_id, some_recent_users.lastseen FROM ((SELECT intermediate_result.user_id, intermediate_result.lastseen FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, lastseen timestamp without time zone)) some_recent_users JOIN LATERAL (SELECT users.user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.=) some_recent_users.user_id) AND (users.value_2 OPERATOR(pg_catalog.>) 4)) LIMIT 1) some_users_data ON (true)) ORDER BY some_recent_users.lastseen DESC LIMIT 10
|
||||
DEBUG: generating subplan XXX_3 for subquery SELECT some_users_data.user_id, some_recent_users.lastseen FROM ((SELECT intermediate_result.user_id, intermediate_result.lastseen FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, lastseen timestamp without time zone)) some_recent_users JOIN LATERAL (SELECT users.user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.=) some_recent_users.user_id) AND (users.value_2 OPERATOR(pg_catalog.>) 4)) ORDER BY users.user_id LIMIT 1) some_users_data ON (true)) ORDER BY some_recent_users.lastseen DESC LIMIT 10
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, lastseen FROM (SELECT intermediate_result.user_id, intermediate_result.lastseen FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, lastseen timestamp without time zone)) some_users ORDER BY user_id DESC, lastseen DESC LIMIT 10
|
||||
user_id | lastseen
|
||||
---------------------------------------------------------------------
|
||||
|
@ -1667,9 +1662,7 @@ ORDER BY
|
|||
user_id DESC, lastseen DESC
|
||||
LIMIT 10;
|
||||
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
|
||||
-- not pushdownable since lower LATERAL JOIN is not on the partition key
|
||||
-- not recursively plannable due to LATERAL join where there is a reference
|
||||
-- from an outer query
|
||||
-- complex lateral join between inner join and correlated subquery
|
||||
SELECT user_id, lastseen
|
||||
FROM
|
||||
(SELECT
|
||||
|
@ -1715,7 +1708,7 @@ FROM
|
|||
WHERE
|
||||
"users"."value_1" = "some_recent_users"."user_id" AND
|
||||
"users"."value_2" > 4
|
||||
LIMIT 1) "some_users_data" ON true
|
||||
ORDER BY 1 LIMIT 1) "some_users_data" ON true
|
||||
ORDER BY
|
||||
lastseen DESC
|
||||
LIMIT 10) "some_users"
|
||||
|
@ -2339,9 +2332,7 @@ LIMIT 10;
|
|||
(1 row)
|
||||
|
||||
SET citus.subquery_pushdown to OFF;
|
||||
-- not pushdownable since lower LATERAL JOIN is not on the partition key
|
||||
-- not recursively plannable due to LATERAL join where there is a reference
|
||||
-- from an outer query
|
||||
-- on side of the lateral join can be recursively plannen, then pushed down
|
||||
SELECT *
|
||||
FROM
|
||||
(SELECT
|
||||
|
@ -2372,7 +2363,7 @@ FROM
|
|||
WHERE
|
||||
"users"."value_2" = "some_recent_users"."user_id" AND
|
||||
value_2 > 4
|
||||
LIMIT 1) "some_users_data" ON true
|
||||
ORDER BY 1 LIMIT 1) "some_users_data" ON true
|
||||
ORDER BY
|
||||
value_2 DESC
|
||||
LIMIT 10) "some_users"
|
||||
|
|
|
@ -967,6 +967,186 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS c
|
|||
24
|
||||
(1 row)
|
||||
|
||||
-- two levels of correlation should also allow
|
||||
-- merge step in the subquery
|
||||
SELECT sum(value_1)
|
||||
FROM users_table u
|
||||
WHERE EXISTS
|
||||
(SELECT 1
|
||||
FROM events_table e
|
||||
WHERE u.user_id = e.user_id AND
|
||||
EXISTS
|
||||
(SELECT 1
|
||||
FROM users_table u2
|
||||
WHERE u2.user_id = u.user_id AND u2.value_1 = 5
|
||||
LIMIT 1));
|
||||
sum
|
||||
---------------------------------------------------------------------
|
||||
216
|
||||
(1 row)
|
||||
|
||||
-- correlated subquery in WHERE, with a slightly
|
||||
-- different syntax that the result of the subquery
|
||||
-- is compared with a constant
|
||||
SELECT sum(value_1)
|
||||
FROM users_table u1
|
||||
WHERE (SELECT COUNT(DISTINCT e1.value_2)
|
||||
FROM events_table e1
|
||||
WHERE e1.user_id = u1.user_id
|
||||
) > 115;
|
||||
sum
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
-- a correlated subquery which requires merge step
|
||||
-- can be pushed down on UPDATE/DELETE queries as well
|
||||
-- rollback to keep the rest of the tests unchanged
|
||||
BEGIN;
|
||||
UPDATE users_table u1
|
||||
SET value_1 = (SELECT count(DISTINCT value_2)
|
||||
FROM events_table e1
|
||||
WHERE e1.user_id = u1.user_id);
|
||||
DELETE FROM users_table u1 WHERE (SELECT count(DISTINCT value_2)
|
||||
FROM events_table e1
|
||||
WHERE e1.user_id = u1.user_id) > 10;
|
||||
ROLLBACK;
|
||||
-- a correlated anti-join can also be pushed down even if the subquery
|
||||
-- has a LIMIT
|
||||
SELECT avg(value_1)
|
||||
FROM users_table u
|
||||
WHERE NOT EXISTS
|
||||
(SELECT 'XXX'
|
||||
FROM events_table e
|
||||
WHERE u.user_id = e.user_id and e.value_2 > 10000 LIMIT 1);
|
||||
avg
|
||||
---------------------------------------------------------------------
|
||||
2.5544554455445545
|
||||
(1 row)
|
||||
|
||||
-- a [correlated] lateral join can also be pushed down even if the subquery
|
||||
-- has an aggregate wout a GROUP BY
|
||||
SELECT
|
||||
max(min_of_val_2), max(u1.value_1)
|
||||
FROM
|
||||
users_table u1
|
||||
LEFT JOIN LATERAL
|
||||
(SELECT min(e1.value_2) as min_of_val_2 FROM events_table e1 WHERE e1.user_id = u1.user_id) as foo ON (true);
|
||||
max | max
|
||||
---------------------------------------------------------------------
|
||||
1 | 5
|
||||
(1 row)
|
||||
|
||||
-- a self join is followed by a correlated subquery
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
users_table u1 JOIN users_table u2 USING (user_id)
|
||||
WHERE
|
||||
u1.value_1 < u2.value_1 AND
|
||||
(SELECT
|
||||
count(*)
|
||||
FROM
|
||||
events_table e1
|
||||
WHERE
|
||||
e1.user_id = u2.user_id) > 10;
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------
|
||||
Custom Scan (Citus Adaptive)
|
||||
Task Count: 4
|
||||
Tasks Shown: One of 4
|
||||
-> Task
|
||||
Node: host=localhost port=xxxxx dbname=regression
|
||||
-> Hash Join
|
||||
Hash Cond: (u2.user_id = u1.user_id)
|
||||
Join Filter: (u1.value_1 < u2.value_1)
|
||||
-> Seq Scan on users_table_1400256 u2
|
||||
Filter: ((SubPlan 1) > 10)
|
||||
SubPlan 1
|
||||
-> Aggregate
|
||||
-> Seq Scan on events_table_1400260 e1
|
||||
Filter: (user_id = u2.user_id)
|
||||
-> Hash
|
||||
-> Seq Scan on users_table_1400256 u1
|
||||
(16 rows)
|
||||
|
||||
-- when the colocated join of the FROM clause
|
||||
-- entries happen on WHERE clause, Citus cannot
|
||||
-- pushdown
|
||||
-- Likely that the colocation checks should be
|
||||
-- improved
|
||||
SELECT
|
||||
u1.user_id, u2.user_id
|
||||
FROM
|
||||
users_table u1, users_table u2
|
||||
WHERE
|
||||
u1.value_1 < u2.value_1 AND
|
||||
(SELECT
|
||||
count(*)
|
||||
FROM
|
||||
events_table e1
|
||||
WHERE
|
||||
e1.user_id = u2.user_id AND
|
||||
u1.user_id = u2.user_id) > 10
|
||||
ORDER BY 1,2;
|
||||
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
|
||||
-- create a view that contains correlated subquery
|
||||
CREATE TEMPORARY VIEW correlated_subquery_view AS
|
||||
SELECT u1.user_id
|
||||
FROM users_table u1
|
||||
WHERE (SELECT COUNT(DISTINCT e1.value_2)
|
||||
FROM events_table e1
|
||||
WHERE e1.user_id = u1.user_id
|
||||
) > 0;
|
||||
SELECT sum(user_id) FROM correlated_subquery_view;
|
||||
sum
|
||||
---------------------------------------------------------------------
|
||||
376
|
||||
(1 row)
|
||||
|
||||
-- now, join the view with another correlated subquery
|
||||
SELECT
|
||||
sum(mx)
|
||||
FROM
|
||||
correlated_subquery_view
|
||||
LEFT JOIN LATERAL
|
||||
(SELECT max(value_2) as mx FROM events_table WHERE correlated_subquery_view.user_id = events_table.user_id) as foo ON (true);
|
||||
sum
|
||||
---------------------------------------------------------------------
|
||||
459
|
||||
(1 row)
|
||||
|
||||
-- as an edge case, JOIN is on false
|
||||
SELECT
|
||||
sum(mx)
|
||||
FROM
|
||||
correlated_subquery_view
|
||||
LEFT JOIN LATERAL
|
||||
(SELECT max(value_2) as mx FROM events_table WHERE correlated_subquery_view.user_id = events_table.user_id) as foo ON (false);
|
||||
sum
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT sum(value_1)
|
||||
FROM users_table u1
|
||||
WHERE (SELECT COUNT(DISTINCT e1.value_2)
|
||||
FROM events_table e1
|
||||
WHERE e1.user_id = u1.user_id AND false
|
||||
) > 115;
|
||||
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
|
||||
SELECT sum(value_1)
|
||||
FROM users_table u1
|
||||
WHERE (SELECT COUNT(DISTINCT e1.value_2)
|
||||
FROM events_table e1
|
||||
WHERE e1.user_id = u1.user_id
|
||||
) > 115 AND false;
|
||||
sum
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SET client_min_messages TO DEFAULT;
|
||||
DROP TABLE local_table;
|
||||
DROP SCHEMA subquery_in_where CASCADE;
|
||||
|
|
|
@ -235,7 +235,7 @@ FROM articles_hash_mx, (SELECT id, word_count FROM articles_hash_mx) AS test
|
|||
WHERE test.id = articles_hash_mx.id and articles_hash_mx.author_id = 1
|
||||
ORDER BY articles_hash_mx.id;
|
||||
|
||||
-- subqueries are not supported in SELECT clause
|
||||
-- subqueries in SELECT clause
|
||||
SELECT a.title AS name, (SELECT a2.id FROM articles_single_shard_hash_mx a2 WHERE a.id = a2.id LIMIT 1)
|
||||
AS special_price FROM articles_hash_mx a;
|
||||
|
||||
|
|
|
@ -1119,7 +1119,7 @@ FROM
|
|||
WHERE
|
||||
"users"."user_id" = "some_recent_users"."user_id" AND
|
||||
users.value_2 > 1 and users.value_2 < 3
|
||||
LIMIT 1) "some_users_data"
|
||||
ORDER BY 1 LIMIT 1) "some_users_data"
|
||||
ON TRUE
|
||||
ORDER BY
|
||||
user_id
|
||||
|
@ -1128,9 +1128,7 @@ limit 50;
|
|||
-- reset subquery_pushdown
|
||||
SET citus.subquery_pushdown to OFF;
|
||||
|
||||
-- we recursively plan recent_events_1
|
||||
-- but not some_users_data since it has a reference
|
||||
-- from an outer query which is not recursively planned
|
||||
-- mixture of recursively planned subqueries and correlated subqueries
|
||||
SELECT "some_users_data".user_id, lastseen
|
||||
FROM
|
||||
(SELECT user_id, max(time) AS lastseen
|
||||
|
@ -1158,15 +1156,12 @@ FROM
|
|||
WHERE
|
||||
"users"."value_1" = "some_recent_users"."user_id" AND
|
||||
users.value_2 > 1 and users.value_2 < 3
|
||||
LIMIT 1) "some_users_data"
|
||||
ORDER BY 1 LIMIT 1) "some_users_data"
|
||||
ON TRUE
|
||||
ORDER BY
|
||||
user_id
|
||||
limit 50;
|
||||
|
||||
-- we recursively plan some queries but fail in the end
|
||||
-- since some_users_data since it has a reference
|
||||
-- from an outer query which is not recursively planned
|
||||
SELECT "some_users_data".user_id, lastseen
|
||||
FROM
|
||||
(SELECT 2 * user_id as user_id, max(time) AS lastseen
|
||||
|
@ -1194,7 +1189,7 @@ FROM
|
|||
WHERE
|
||||
"users"."user_id" = "some_recent_users"."user_id" AND
|
||||
users.value_2 > 1 and users.value_2 < 3
|
||||
LIMIT 1) "some_users_data"
|
||||
ORDER BY 1 LIMIT 1) "some_users_data"
|
||||
ON TRUE
|
||||
ORDER BY
|
||||
user_id
|
||||
|
@ -1251,7 +1246,7 @@ FROM
|
|||
WHERE
|
||||
"users"."user_id" = "some_recent_users"."user_id" AND
|
||||
"users"."value_2" > 4
|
||||
LIMIT 1) "some_users_data"
|
||||
ORDER BY 1 LIMIT 1) "some_users_data"
|
||||
ON TRUE
|
||||
ORDER BY
|
||||
lastseen DESC
|
||||
|
@ -1306,7 +1301,7 @@ SELECT "some_users_data".user_id, MAX(lastseen), count(*)
|
|||
WHERE
|
||||
"users"."user_id" = "some_recent_users"."user_id" AND
|
||||
"users"."value_2" > 4
|
||||
LIMIT 1) "some_users_data" ON true
|
||||
ORDER BY 1 LIMIT 1) "some_users_data" ON true
|
||||
GROUP BY 1
|
||||
ORDER BY 2, 1 DESC
|
||||
LIMIT 10;
|
||||
|
@ -1360,7 +1355,7 @@ FROM
|
|||
WHERE
|
||||
"users"."user_id" = "some_recent_users"."user_id" AND
|
||||
"users"."value_2" > 4
|
||||
LIMIT 1) "some_users_data" ON true
|
||||
ORDER BY 1 LIMIT 1) "some_users_data" ON true
|
||||
ORDER BY
|
||||
lastseen DESC
|
||||
LIMIT 10) "some_users"
|
||||
|
@ -1418,7 +1413,7 @@ FROM
|
|||
WHERE
|
||||
"users"."user_id" = "some_recent_users"."user_id" AND
|
||||
"users"."value_2" > 4
|
||||
LIMIT 1) "some_users_data" ON true
|
||||
ORDER BY 1 LIMIT 1) "some_users_data" ON true
|
||||
ORDER BY
|
||||
lastseen DESC
|
||||
LIMIT 10) "some_users"
|
||||
|
@ -1484,9 +1479,7 @@ ORDER BY
|
|||
user_id DESC, lastseen DESC
|
||||
LIMIT 10;
|
||||
|
||||
-- not pushdownable since lower LATERAL JOIN is not on the partition key
|
||||
-- not recursively plannable due to LATERAL join where there is a reference
|
||||
-- from an outer query
|
||||
-- complex lateral join between inner join and correlated subquery
|
||||
SELECT user_id, lastseen
|
||||
FROM
|
||||
(SELECT
|
||||
|
@ -1532,7 +1525,7 @@ FROM
|
|||
WHERE
|
||||
"users"."value_1" = "some_recent_users"."user_id" AND
|
||||
"users"."value_2" > 4
|
||||
LIMIT 1) "some_users_data" ON true
|
||||
ORDER BY 1 LIMIT 1) "some_users_data" ON true
|
||||
ORDER BY
|
||||
lastseen DESC
|
||||
LIMIT 10) "some_users"
|
||||
|
@ -2033,9 +2026,7 @@ ORDER BY
|
|||
LIMIT 10;
|
||||
SET citus.subquery_pushdown to OFF;
|
||||
|
||||
-- not pushdownable since lower LATERAL JOIN is not on the partition key
|
||||
-- not recursively plannable due to LATERAL join where there is a reference
|
||||
-- from an outer query
|
||||
-- on side of the lateral join can be recursively plannen, then pushed down
|
||||
SELECT *
|
||||
FROM
|
||||
(SELECT
|
||||
|
@ -2066,7 +2057,7 @@ FROM
|
|||
WHERE
|
||||
"users"."value_2" = "some_recent_users"."user_id" AND
|
||||
value_2 > 4
|
||||
LIMIT 1) "some_users_data" ON true
|
||||
ORDER BY 1 LIMIT 1) "some_users_data" ON true
|
||||
ORDER BY
|
||||
value_2 DESC
|
||||
LIMIT 10) "some_users"
|
||||
|
|
|
@ -713,6 +713,143 @@ WHERE
|
|||
WHERE u.value_2 > 3
|
||||
GROUP BY r.value_2 HAVING min(r.value_3) > 0);
|
||||
|
||||
-- two levels of correlation should also allow
|
||||
-- merge step in the subquery
|
||||
SELECT sum(value_1)
|
||||
FROM users_table u
|
||||
WHERE EXISTS
|
||||
(SELECT 1
|
||||
FROM events_table e
|
||||
WHERE u.user_id = e.user_id AND
|
||||
EXISTS
|
||||
(SELECT 1
|
||||
FROM users_table u2
|
||||
WHERE u2.user_id = u.user_id AND u2.value_1 = 5
|
||||
LIMIT 1));
|
||||
|
||||
-- correlated subquery in WHERE, with a slightly
|
||||
-- different syntax that the result of the subquery
|
||||
-- is compared with a constant
|
||||
SELECT sum(value_1)
|
||||
FROM users_table u1
|
||||
WHERE (SELECT COUNT(DISTINCT e1.value_2)
|
||||
FROM events_table e1
|
||||
WHERE e1.user_id = u1.user_id
|
||||
) > 115;
|
||||
|
||||
|
||||
-- a correlated subquery which requires merge step
|
||||
-- can be pushed down on UPDATE/DELETE queries as well
|
||||
-- rollback to keep the rest of the tests unchanged
|
||||
BEGIN;
|
||||
UPDATE users_table u1
|
||||
SET value_1 = (SELECT count(DISTINCT value_2)
|
||||
FROM events_table e1
|
||||
WHERE e1.user_id = u1.user_id);
|
||||
|
||||
DELETE FROM users_table u1 WHERE (SELECT count(DISTINCT value_2)
|
||||
FROM events_table e1
|
||||
WHERE e1.user_id = u1.user_id) > 10;
|
||||
|
||||
ROLLBACK;
|
||||
|
||||
-- a correlated anti-join can also be pushed down even if the subquery
|
||||
-- has a LIMIT
|
||||
SELECT avg(value_1)
|
||||
FROM users_table u
|
||||
WHERE NOT EXISTS
|
||||
(SELECT 'XXX'
|
||||
FROM events_table e
|
||||
WHERE u.user_id = e.user_id and e.value_2 > 10000 LIMIT 1);
|
||||
|
||||
-- a [correlated] lateral join can also be pushed down even if the subquery
|
||||
-- has an aggregate wout a GROUP BY
|
||||
SELECT
|
||||
max(min_of_val_2), max(u1.value_1)
|
||||
FROM
|
||||
users_table u1
|
||||
LEFT JOIN LATERAL
|
||||
(SELECT min(e1.value_2) as min_of_val_2 FROM events_table e1 WHERE e1.user_id = u1.user_id) as foo ON (true);
|
||||
|
||||
|
||||
-- a self join is followed by a correlated subquery
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
users_table u1 JOIN users_table u2 USING (user_id)
|
||||
WHERE
|
||||
u1.value_1 < u2.value_1 AND
|
||||
(SELECT
|
||||
count(*)
|
||||
FROM
|
||||
events_table e1
|
||||
WHERE
|
||||
e1.user_id = u2.user_id) > 10;
|
||||
|
||||
-- when the colocated join of the FROM clause
|
||||
-- entries happen on WHERE clause, Citus cannot
|
||||
-- pushdown
|
||||
-- Likely that the colocation checks should be
|
||||
-- improved
|
||||
SELECT
|
||||
u1.user_id, u2.user_id
|
||||
FROM
|
||||
users_table u1, users_table u2
|
||||
WHERE
|
||||
u1.value_1 < u2.value_1 AND
|
||||
(SELECT
|
||||
count(*)
|
||||
FROM
|
||||
events_table e1
|
||||
WHERE
|
||||
e1.user_id = u2.user_id AND
|
||||
u1.user_id = u2.user_id) > 10
|
||||
ORDER BY 1,2;
|
||||
|
||||
|
||||
-- create a view that contains correlated subquery
|
||||
CREATE TEMPORARY VIEW correlated_subquery_view AS
|
||||
SELECT u1.user_id
|
||||
FROM users_table u1
|
||||
WHERE (SELECT COUNT(DISTINCT e1.value_2)
|
||||
FROM events_table e1
|
||||
WHERE e1.user_id = u1.user_id
|
||||
) > 0;
|
||||
|
||||
SELECT sum(user_id) FROM correlated_subquery_view;
|
||||
|
||||
-- now, join the view with another correlated subquery
|
||||
SELECT
|
||||
sum(mx)
|
||||
FROM
|
||||
correlated_subquery_view
|
||||
LEFT JOIN LATERAL
|
||||
(SELECT max(value_2) as mx FROM events_table WHERE correlated_subquery_view.user_id = events_table.user_id) as foo ON (true);
|
||||
|
||||
-- as an edge case, JOIN is on false
|
||||
SELECT
|
||||
sum(mx)
|
||||
FROM
|
||||
correlated_subquery_view
|
||||
LEFT JOIN LATERAL
|
||||
(SELECT max(value_2) as mx FROM events_table WHERE correlated_subquery_view.user_id = events_table.user_id) as foo ON (false);
|
||||
|
||||
|
||||
SELECT sum(value_1)
|
||||
FROM users_table u1
|
||||
WHERE (SELECT COUNT(DISTINCT e1.value_2)
|
||||
FROM events_table e1
|
||||
WHERE e1.user_id = u1.user_id AND false
|
||||
) > 115;
|
||||
|
||||
SELECT sum(value_1)
|
||||
FROM users_table u1
|
||||
WHERE (SELECT COUNT(DISTINCT e1.value_2)
|
||||
FROM events_table e1
|
||||
WHERE e1.user_id = u1.user_id
|
||||
) > 115 AND false;
|
||||
|
||||
SET client_min_messages TO DEFAULT;
|
||||
|
||||
DROP TABLE local_table;
|
||||
|
|
Loading…
Reference in New Issue