Merge pull request #4385 from citusdata/marcocitus/correlated-subqueries

pull/4422/head
Marco Slot 2020-12-16 11:55:43 +01:00 committed by GitHub
commit 61bf2fb477
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 937 additions and 204 deletions

View File

@ -34,6 +34,7 @@
#include "distributed/pg_dist_partition.h" #include "distributed/pg_dist_partition.h"
#include "distributed/query_utils.h" #include "distributed/query_utils.h"
#include "distributed/query_pushdown_planning.h" #include "distributed/query_pushdown_planning.h"
#include "distributed/recursive_planning.h"
#include "distributed/relation_restriction_equivalence.h" #include "distributed/relation_restriction_equivalence.h"
#include "distributed/version_compat.h" #include "distributed/version_compat.h"
#include "nodes/nodeFuncs.h" #include "nodes/nodeFuncs.h"
@ -78,6 +79,7 @@ static RecurringTuplesType FromClauseRecurringTupleType(Query *queryTree);
static DeferredErrorMessage * DeferredErrorIfUnsupportedRecurringTuplesJoin( static DeferredErrorMessage * DeferredErrorIfUnsupportedRecurringTuplesJoin(
PlannerRestrictionContext *plannerRestrictionContext); PlannerRestrictionContext *plannerRestrictionContext);
static DeferredErrorMessage * DeferErrorIfUnsupportedTableCombination(Query *queryTree); static DeferredErrorMessage * DeferErrorIfUnsupportedTableCombination(Query *queryTree);
static DeferredErrorMessage * DeferErrorIfSubqueryRequiresMerge(Query *subqueryTree);
static bool ExtractSetOperationStatmentWalker(Node *node, List **setOperationList); static bool ExtractSetOperationStatmentWalker(Node *node, List **setOperationList);
static RecurringTuplesType FetchFirstRecurType(PlannerInfo *plannerInfo, static RecurringTuplesType FetchFirstRecurType(PlannerInfo *plannerInfo,
Relids relids); Relids relids);
@ -911,7 +913,6 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi
{ {
bool preconditionsSatisfied = true; bool preconditionsSatisfied = true;
char *errorDetail = NULL; char *errorDetail = NULL;
StringInfo errorInfo = NULL;
DeferredErrorMessage *deferredError = DeferErrorIfUnsupportedTableCombination( DeferredErrorMessage *deferredError = DeferErrorIfUnsupportedTableCombination(
subqueryTree); subqueryTree);
@ -928,19 +929,19 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi
"functions"; "functions";
} }
if (subqueryTree->limitOffset) /*
* Correlated subqueries are effectively functions that are repeatedly called
* for the values of the vars that point to the outer query. We can liberally
* push down SQL features within such a function, as long as co-located join
* checks are applied.
*/
if (!ContainsReferencesToOuterQuery(subqueryTree))
{ {
preconditionsSatisfied = false; deferredError = DeferErrorIfSubqueryRequiresMerge(subqueryTree);
errorDetail = "Offset clause is currently unsupported when a subquery " if (deferredError)
"references a column from another query"; {
} return deferredError;
}
/* limit is not supported when SubqueryPushdown is not set */
if (subqueryTree->limitCount && !SubqueryPushdown)
{
preconditionsSatisfied = false;
errorDetail = "Limit in subquery is currently unsupported when a "
"subquery references a column from another query";
} }
/* /*
@ -981,24 +982,6 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi
errorDetail = "For Update/Share commands are currently unsupported"; errorDetail = "For Update/Share commands are currently unsupported";
} }
/* group clause list must include partition column */
if (subqueryTree->groupClause)
{
List *groupClauseList = subqueryTree->groupClause;
List *targetEntryList = subqueryTree->targetList;
List *groupTargetEntryList = GroupTargetEntryList(groupClauseList,
targetEntryList);
bool groupOnPartitionColumn = TargetListOnPartitionColumn(subqueryTree,
groupTargetEntryList);
if (!groupOnPartitionColumn)
{
preconditionsSatisfied = false;
errorDetail = "Group by list without partition column is currently "
"unsupported when a subquery references a column "
"from another query";
}
}
/* grouping sets are not allowed in subqueries*/ /* grouping sets are not allowed in subqueries*/
if (subqueryTree->groupingSets) if (subqueryTree->groupingSets)
{ {
@ -1007,15 +990,67 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi
"or ROLLUP"; "or ROLLUP";
} }
/* deferredError = DeferErrorIfFromClauseRecurs(subqueryTree);
* We support window functions when the window function if (deferredError)
* is partitioned on distribution column. {
*/ return deferredError;
if (subqueryTree->hasWindowFuncs && !SafeToPushdownWindowFunction(subqueryTree, }
&errorInfo))
/* finally check and return deferred if not satisfied */
if (!preconditionsSatisfied)
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"cannot push down this subquery",
errorDetail, NULL);
}
return NULL;
}
/*
* DeferErrorIfSubqueryRequiresMerge returns a deferred error if the subquery
* requires a merge step on the coordinator (e.g. limit, group by non-distribution
* column, etc.).
*/
static DeferredErrorMessage *
DeferErrorIfSubqueryRequiresMerge(Query *subqueryTree)
{
bool preconditionsSatisfied = true;
char *errorDetail = NULL;
if (subqueryTree->limitOffset)
{ {
errorDetail = (char *) errorInfo->data;
preconditionsSatisfied = false; preconditionsSatisfied = false;
errorDetail = "Offset clause is currently unsupported when a subquery "
"references a column from another query";
}
/* limit is not supported when SubqueryPushdown is not set */
if (subqueryTree->limitCount && !SubqueryPushdown)
{
preconditionsSatisfied = false;
errorDetail = "Limit in subquery is currently unsupported when a "
"subquery references a column from another query";
}
/* group clause list must include partition column */
if (subqueryTree->groupClause)
{
List *groupClauseList = subqueryTree->groupClause;
List *targetEntryList = subqueryTree->targetList;
List *groupTargetEntryList = GroupTargetEntryList(groupClauseList,
targetEntryList);
bool groupOnPartitionColumn =
TargetListOnPartitionColumn(subqueryTree, groupTargetEntryList);
if (!groupOnPartitionColumn)
{
preconditionsSatisfied = false;
errorDetail = "Group by list without partition column is currently "
"unsupported when a subquery references a column "
"from another query";
}
} }
/* we don't support aggregates without group by */ /* we don't support aggregates without group by */
@ -1035,6 +1070,18 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi
"a column from another query"; "a column from another query";
} }
/*
* We support window functions when the window function
* is partitioned on distribution column.
*/
StringInfo errorInfo = NULL;
if (subqueryTree->hasWindowFuncs && !SafeToPushdownWindowFunction(subqueryTree,
&errorInfo))
{
errorDetail = (char *) errorInfo->data;
preconditionsSatisfied = false;
}
/* distinct clause list must include partition column */ /* distinct clause list must include partition column */
if (subqueryTree->distinctClause) if (subqueryTree->distinctClause)
{ {
@ -1052,13 +1099,6 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi
} }
} }
deferredError = DeferErrorIfFromClauseRecurs(subqueryTree);
if (deferredError)
{
return deferredError;
}
/* finally check and return deferred if not satisfied */ /* finally check and return deferred if not satisfied */
if (!preconditionsSatisfied) if (!preconditionsSatisfied)
{ {

View File

@ -180,7 +180,6 @@ static bool IsLocalTableRteOrMatView(Node *node);
static DistributedSubPlan * CreateDistributedSubPlan(uint32 subPlanId, static DistributedSubPlan * CreateDistributedSubPlan(uint32 subPlanId,
Query *subPlanQuery); Query *subPlanQuery);
static bool CteReferenceListWalker(Node *node, CteReferenceWalkerContext *context); static bool CteReferenceListWalker(Node *node, CteReferenceWalkerContext *context);
static bool ContainsReferencesToOuterQuery(Query *query);
static bool ContainsReferencesToOuterQueryWalker(Node *node, static bool ContainsReferencesToOuterQueryWalker(Node *node,
VarLevelsUpWalkerContext *context); VarLevelsUpWalkerContext *context);
static bool NodeContainsSubqueryReferencingOuterQuery(Node *node); static bool NodeContainsSubqueryReferencingOuterQuery(Node *node);
@ -1288,7 +1287,7 @@ CteReferenceListWalker(Node *node, CteReferenceWalkerContext *context)
* anything that points outside of the query itself. Such queries cannot be * anything that points outside of the query itself. Such queries cannot be
* planned recursively. * planned recursively.
*/ */
static bool bool
ContainsReferencesToOuterQuery(Query *query) ContainsReferencesToOuterQuery(Query *query)
{ {
VarLevelsUpWalkerContext context = { 0 }; VarLevelsUpWalkerContext context = { 0 };

View File

@ -49,4 +49,7 @@ extern void ReplaceRTERelationWithRteSubquery(RangeTblEntry *rangeTableEntry,
RecursivePlanningContext *context); RecursivePlanningContext *context);
extern bool IsRecursivelyPlannableRelation(RangeTblEntry *rangeTableEntry); extern bool IsRecursivelyPlannableRelation(RangeTblEntry *rangeTableEntry);
extern bool IsRelationLocalTableOrMatView(Oid relationId); extern bool IsRelationLocalTableOrMatView(Oid relationId);
extern bool ContainsReferencesToOuterQuery(Query *query);
#endif /* RECURSIVE_PLANNING_H */ #endif /* RECURSIVE_PLANNING_H */

View File

@ -773,8 +773,6 @@ WHERE
) )
GROUP BY GROUP BY
user_id; user_id;
ERROR: cannot push down this subquery
DETAIL: Window functions without PARTITION BY on distribution column is currently unsupported
INSERT INTO agg_results_window(user_id, value_2_agg) INSERT INTO agg_results_window(user_id, value_2_agg)
SELECT * FROM ( SELECT * FROM (
SELECT SELECT

View File

@ -484,11 +484,10 @@ DEBUG: query has a single distribution column value: 1
41 | 11814 41 | 11814
(5 rows) (5 rows)
-- subqueries are not supported in SELECT clause -- subqueries in SELECT clause
SELECT a.title AS name, (SELECT a2.id FROM articles_single_shard_hash_mx a2 WHERE a.id = a2.id LIMIT 1) SELECT a.title AS name, (SELECT a2.id FROM articles_single_shard_hash_mx a2 WHERE a.id = a2.id LIMIT 1)
AS special_price FROM articles_hash_mx a; AS special_price FROM articles_hash_mx a;
DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: skipping recursive planning for the subquery since it contains references to outer queries
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- simple lookup query -- simple lookup query
SELECT * SELECT *

View File

@ -677,7 +677,6 @@ DEBUG: query has a single distribution column value: 1
SELECT a.title AS name, (SELECT a2.id FROM articles_single_shard_hash a2 WHERE a.id = a2.id LIMIT 1) SELECT a.title AS name, (SELECT a2.id FROM articles_single_shard_hash a2 WHERE a.id = a2.id LIMIT 1)
AS special_price FROM articles_hash a; AS special_price FROM articles_hash a;
DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: skipping recursive planning for the subquery since it contains references to outer queries
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- simple lookup query -- simple lookup query
SELECT * SELECT *

View File

@ -423,7 +423,6 @@ DEBUG: query has a single distribution column value: 1
SELECT a.title AS name, (SELECT a2.id FROM articles_hash a2 WHERE a.id = a2.id LIMIT 1) SELECT a.title AS name, (SELECT a2.id FROM articles_hash a2 WHERE a.id = a2.id LIMIT 1)
AS special_price FROM articles_hash a; AS special_price FROM articles_hash a;
DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: skipping recursive planning for the subquery since it contains references to outer queries
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- simple lookup query just works -- simple lookup query just works
SELECT * SELECT *

View File

@ -1227,7 +1227,7 @@ FROM
WHERE WHERE
"users"."user_id" = "some_recent_users"."user_id" AND "users"."user_id" = "some_recent_users"."user_id" AND
users.value_2 > 1 and users.value_2 < 3 users.value_2 > 1 and users.value_2 < 3
LIMIT 1) "some_users_data" ORDER BY 1 LIMIT 1) "some_users_data"
ON TRUE ON TRUE
ORDER BY ORDER BY
user_id user_id
@ -1240,9 +1240,7 @@ limit 50;
-- reset subquery_pushdown -- reset subquery_pushdown
SET citus.subquery_pushdown to OFF; SET citus.subquery_pushdown to OFF;
-- we recursively plan recent_events_1 -- mixture of recursively planned subqueries and correlated subqueries
-- but not some_users_data since it has a reference
-- from an outer query which is not recursively planned
SELECT "some_users_data".user_id, lastseen SELECT "some_users_data".user_id, lastseen
FROM FROM
(SELECT user_id, max(time) AS lastseen (SELECT user_id, max(time) AS lastseen
@ -1270,16 +1268,21 @@ FROM
WHERE WHERE
"users"."value_1" = "some_recent_users"."user_id" AND "users"."value_1" = "some_recent_users"."user_id" AND
users.value_2 > 1 and users.value_2 < 3 users.value_2 > 1 and users.value_2 < 3
LIMIT 1) "some_users_data" ORDER BY 1 LIMIT 1) "some_users_data"
ON TRUE ON TRUE
ORDER BY ORDER BY
user_id user_id
limit 50; limit 50;
ERROR: cannot push down this subquery user_id | lastseen
DETAIL: Limit in subquery is currently unsupported when a subquery references a column from another query ---------------------------------------------------------------------
-- we recursively plan some queries but fail in the end 1 | Thu Nov 23 18:08:26.550729 2017
-- since some_users_data since it has a reference 2 | Thu Nov 23 17:26:14.563216 2017
-- from an outer query which is not recursively planned 3 | Thu Nov 23 18:08:26.550729 2017
3 | Thu Nov 23 17:26:14.563216 2017
5 | Thu Nov 23 17:26:14.563216 2017
6 | Thu Nov 23 18:08:26.550729 2017
(6 rows)
SELECT "some_users_data".user_id, lastseen SELECT "some_users_data".user_id, lastseen
FROM FROM
(SELECT 2 * user_id as user_id, max(time) AS lastseen (SELECT 2 * user_id as user_id, max(time) AS lastseen
@ -1307,13 +1310,17 @@ FROM
WHERE WHERE
"users"."user_id" = "some_recent_users"."user_id" AND "users"."user_id" = "some_recent_users"."user_id" AND
users.value_2 > 1 and users.value_2 < 3 users.value_2 > 1 and users.value_2 < 3
LIMIT 1) "some_users_data" ORDER BY 1 LIMIT 1) "some_users_data"
ON TRUE ON TRUE
ORDER BY ORDER BY
user_id user_id
limit 50; limit 50;
ERROR: cannot push down this subquery user_id | lastseen
DETAIL: Limit in subquery is currently unsupported when a subquery references a column from another query ---------------------------------------------------------------------
4 | Thu Nov 23 17:26:14.563216 2017
6 | Thu Nov 23 18:08:26.550729 2017
(2 rows)
-- LATERAL JOINs used with INNER JOINs -- LATERAL JOINs used with INNER JOINs
SET citus.subquery_pushdown to ON; SET citus.subquery_pushdown to ON;
NOTICE: Setting citus.subquery_pushdown flag is discouraged becuase it forces the planner to pushdown certain queries, skipping relevant correctness checks. NOTICE: Setting citus.subquery_pushdown flag is discouraged becuase it forces the planner to pushdown certain queries, skipping relevant correctness checks.
@ -1367,7 +1374,7 @@ FROM
WHERE WHERE
"users"."user_id" = "some_recent_users"."user_id" AND "users"."user_id" = "some_recent_users"."user_id" AND
"users"."value_2" > 4 "users"."value_2" > 4
LIMIT 1) "some_users_data" ORDER BY 1 LIMIT 1) "some_users_data"
ON TRUE ON TRUE
ORDER BY ORDER BY
lastseen DESC lastseen DESC
@ -1435,7 +1442,7 @@ SELECT "some_users_data".user_id, MAX(lastseen), count(*)
WHERE WHERE
"users"."user_id" = "some_recent_users"."user_id" AND "users"."user_id" = "some_recent_users"."user_id" AND
"users"."value_2" > 4 "users"."value_2" > 4
LIMIT 1) "some_users_data" ON true ORDER BY 1 LIMIT 1) "some_users_data" ON true
GROUP BY 1 GROUP BY 1
ORDER BY 2, 1 DESC ORDER BY 2, 1 DESC
LIMIT 10; LIMIT 10;
@ -1492,7 +1499,7 @@ FROM
WHERE WHERE
"users"."user_id" = "some_recent_users"."user_id" AND "users"."user_id" = "some_recent_users"."user_id" AND
"users"."value_2" > 4 "users"."value_2" > 4
LIMIT 1) "some_users_data" ON true ORDER BY 1 LIMIT 1) "some_users_data" ON true
ORDER BY ORDER BY
lastseen DESC lastseen DESC
LIMIT 10) "some_users" LIMIT 10) "some_users"
@ -1502,11 +1509,28 @@ LIMIT 10;
DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM public.users_table users WHERE ((user_id OPERATOR(pg_catalog.>) 1) AND (user_id OPERATOR(pg_catalog.<) 4) AND (value_2 OPERATOR(pg_catalog.>) 3)) DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM public.users_table users WHERE ((user_id OPERATOR(pg_catalog.>) 1) AND (user_id OPERATOR(pg_catalog.<) 4) AND (value_2 OPERATOR(pg_catalog.>) 3))
DEBUG: skipping recursive planning for the subquery since it contains references to outer queries
DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: skipping recursive planning for the subquery since it contains references to outer queries DEBUG: push down of limit count: 10
ERROR: cannot push down this subquery DEBUG: generating subplan XXX_2 for subquery SELECT filter_users_1.user_id, last_events_1."time" AS lastseen FROM ((SELECT user_where_1_1.user_id FROM ((SELECT users.user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.>) 1) AND (users.user_id OPERATOR(pg_catalog.<) 4) AND (users.value_1 OPERATOR(pg_catalog.>) 2))) user_where_1_1 JOIN (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) user_where_1_join_1 ON ((user_where_1_1.user_id OPERATOR(pg_catalog.<>) user_where_1_join_1.user_id)))) filter_users_1 JOIN LATERAL (SELECT events.user_id, events."time" FROM public.events_table events WHERE ((events.user_id OPERATOR(pg_catalog.>) 1) AND (events.user_id OPERATOR(pg_catalog.<) 4) AND (events.user_id OPERATOR(pg_catalog.=) filter_users_1.user_id)) ORDER BY events."time" DESC LIMIT 1) last_events_1 ON (true)) ORDER BY last_events_1."time" DESC LIMIT 10
DETAIL: Limit in subquery is currently unsupported when a subquery references a column from another query DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: push down of limit count: 10
DEBUG: generating subplan XXX_3 for subquery SELECT some_users_data.user_id, some_recent_users.lastseen FROM ((SELECT intermediate_result.user_id, intermediate_result.lastseen FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, lastseen timestamp without time zone)) some_recent_users JOIN LATERAL (SELECT users.user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.=) some_recent_users.user_id) AND (users.value_2 OPERATOR(pg_catalog.>) 4)) ORDER BY users.user_id LIMIT 1) some_users_data ON (true)) ORDER BY some_recent_users.lastseen DESC LIMIT 10
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, lastseen FROM (SELECT intermediate_result.user_id, intermediate_result.lastseen FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, lastseen timestamp without time zone)) some_users ORDER BY user_id DESC, lastseen DESC LIMIT 10
DEBUG: Creating router plan
user_id | lastseen
---------------------------------------------------------------------
3 | Thu Nov 23 18:08:26.550729 2017
3 | Thu Nov 23 18:08:26.550729 2017
3 | Thu Nov 23 18:08:26.550729 2017
3 | Thu Nov 23 18:08:26.550729 2017
3 | Thu Nov 23 18:08:26.550729 2017
3 | Thu Nov 23 18:08:26.550729 2017
3 | Thu Nov 23 18:08:26.550729 2017
3 | Thu Nov 23 18:08:26.550729 2017
3 | Thu Nov 23 18:08:26.550729 2017
3 | Thu Nov 23 18:08:26.550729 2017
(10 rows)
SET citus.enable_repartition_joins to ON; SET citus.enable_repartition_joins to ON;
SET client_min_messages TO DEBUG1; SET client_min_messages TO DEBUG1;
-- recursively planner since the inner JOIN is not on the partition key -- recursively planner since the inner JOIN is not on the partition key
@ -1555,7 +1579,7 @@ FROM
WHERE WHERE
"users"."user_id" = "some_recent_users"."user_id" AND "users"."user_id" = "some_recent_users"."user_id" AND
"users"."value_2" > 4 "users"."value_2" > 4
LIMIT 1) "some_users_data" ON true ORDER BY 1 LIMIT 1) "some_users_data" ON true
ORDER BY ORDER BY
lastseen DESC lastseen DESC
LIMIT 10) "some_users" LIMIT 10) "some_users"
@ -1563,8 +1587,25 @@ ORDER BY
user_id DESC, lastseen DESC user_id DESC, lastseen DESC
LIMIT 10; LIMIT 10;
DEBUG: generating subplan XXX_1 for subquery SELECT user_id, value_1 FROM public.users_table users WHERE ((user_id OPERATOR(pg_catalog.>) 1) AND (user_id OPERATOR(pg_catalog.<) 4) AND (value_2 OPERATOR(pg_catalog.>) 3)) DEBUG: generating subplan XXX_1 for subquery SELECT user_id, value_1 FROM public.users_table users WHERE ((user_id OPERATOR(pg_catalog.>) 1) AND (user_id OPERATOR(pg_catalog.<) 4) AND (value_2 OPERATOR(pg_catalog.>) 3))
ERROR: cannot push down this subquery DEBUG: push down of limit count: 10
DETAIL: Limit in subquery is currently unsupported when a subquery references a column from another query DEBUG: generating subplan XXX_2 for subquery SELECT filter_users_1.user_id, last_events_1."time" AS lastseen FROM ((SELECT user_where_1_1.user_id FROM ((SELECT users.user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.>) 1) AND (users.user_id OPERATOR(pg_catalog.<) 4) AND (users.value_1 OPERATOR(pg_catalog.>) 2))) user_where_1_1 JOIN (SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer)) user_where_1_join_1 ON ((user_where_1_1.user_id OPERATOR(pg_catalog.=) user_where_1_join_1.value_1)))) filter_users_1 JOIN LATERAL (SELECT events.user_id, events."time" FROM public.events_table events WHERE ((events.user_id OPERATOR(pg_catalog.>) 1) AND (events.user_id OPERATOR(pg_catalog.<) 4) AND (events.user_id OPERATOR(pg_catalog.=) filter_users_1.user_id)) ORDER BY events."time" DESC LIMIT 1) last_events_1 ON (true)) ORDER BY last_events_1."time" DESC LIMIT 10
DEBUG: push down of limit count: 10
DEBUG: generating subplan XXX_3 for subquery SELECT some_users_data.user_id, some_recent_users.lastseen FROM ((SELECT intermediate_result.user_id, intermediate_result.lastseen FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, lastseen timestamp without time zone)) some_recent_users JOIN LATERAL (SELECT users.user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.=) some_recent_users.user_id) AND (users.value_2 OPERATOR(pg_catalog.>) 4)) ORDER BY users.user_id LIMIT 1) some_users_data ON (true)) ORDER BY some_recent_users.lastseen DESC LIMIT 10
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, lastseen FROM (SELECT intermediate_result.user_id, intermediate_result.lastseen FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, lastseen timestamp without time zone)) some_users ORDER BY user_id DESC, lastseen DESC LIMIT 10
user_id | lastseen
---------------------------------------------------------------------
3 | Thu Nov 23 18:08:26.550729 2017
3 | Thu Nov 23 18:08:26.550729 2017
3 | Thu Nov 23 18:08:26.550729 2017
3 | Thu Nov 23 18:08:26.550729 2017
3 | Thu Nov 23 18:08:26.550729 2017
3 | Thu Nov 23 18:08:26.550729 2017
3 | Thu Nov 23 18:08:26.550729 2017
3 | Thu Nov 23 18:08:26.550729 2017
3 | Thu Nov 23 18:08:26.550729 2017
3 | Thu Nov 23 18:08:26.550729 2017
(10 rows)
SET citus.enable_repartition_joins to OFF; SET citus.enable_repartition_joins to OFF;
RESET client_min_messages; RESET client_min_messages;
-- not supported since upper LATERAL JOIN is not equi join -- not supported since upper LATERAL JOIN is not equi join
@ -1621,9 +1662,7 @@ ORDER BY
user_id DESC, lastseen DESC user_id DESC, lastseen DESC
LIMIT 10; LIMIT 10;
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- not pushdownable since lower LATERAL JOIN is not on the partition key -- complex lateral join between inner join and correlated subquery
-- not recursively plannable due to LATERAL join where there is a reference
-- from an outer query
SELECT user_id, lastseen SELECT user_id, lastseen
FROM FROM
(SELECT (SELECT
@ -1669,15 +1708,27 @@ FROM
WHERE WHERE
"users"."value_1" = "some_recent_users"."user_id" AND "users"."value_1" = "some_recent_users"."user_id" AND
"users"."value_2" > 4 "users"."value_2" > 4
LIMIT 1) "some_users_data" ON true ORDER BY 1 LIMIT 1) "some_users_data" ON true
ORDER BY ORDER BY
lastseen DESC lastseen DESC
LIMIT 10) "some_users" LIMIT 10) "some_users"
ORDER BY ORDER BY
user_id DESC, lastseen DESC user_id DESC, lastseen DESC
LIMIT 10; LIMIT 10;
ERROR: cannot push down this subquery user_id | lastseen
DETAIL: Limit in subquery is currently unsupported when a subquery references a column from another query ---------------------------------------------------------------------
5 | Thu Nov 23 17:26:14.563216 2017
5 | Thu Nov 23 17:26:14.563216 2017
5 | Thu Nov 23 17:26:14.563216 2017
5 | Thu Nov 23 17:26:14.563216 2017
5 | Thu Nov 23 17:26:14.563216 2017
5 | Thu Nov 23 17:26:14.563216 2017
5 | Thu Nov 23 17:26:14.563216 2017
5 | Thu Nov 23 17:26:14.563216 2017
5 | Thu Nov 23 17:26:14.563216 2017
5 | Thu Nov 23 17:26:14.563216 2017
(10 rows)
-- NESTED INNER JOINs -- NESTED INNER JOINs
SELECT SELECT
count(*) AS value, "generated_group_field" count(*) AS value, "generated_group_field"
@ -2281,9 +2332,7 @@ LIMIT 10;
(1 row) (1 row)
SET citus.subquery_pushdown to OFF; SET citus.subquery_pushdown to OFF;
-- not pushdownable since lower LATERAL JOIN is not on the partition key -- on side of the lateral join can be recursively plannen, then pushed down
-- not recursively plannable due to LATERAL join where there is a reference
-- from an outer query
SELECT * SELECT *
FROM FROM
(SELECT (SELECT
@ -2314,15 +2363,17 @@ FROM
WHERE WHERE
"users"."value_2" = "some_recent_users"."user_id" AND "users"."value_2" = "some_recent_users"."user_id" AND
value_2 > 4 value_2 > 4
LIMIT 1) "some_users_data" ON true ORDER BY 1 LIMIT 1) "some_users_data" ON true
ORDER BY ORDER BY
value_2 DESC value_2 DESC
LIMIT 10) "some_users" LIMIT 10) "some_users"
ORDER BY ORDER BY
value_2 DESC, user_id DESC value_2 DESC, user_id DESC
LIMIT 10; LIMIT 10;
ERROR: cannot push down this subquery user_id | value_2
DETAIL: Limit in subquery is currently unsupported when a subquery references a column from another query ---------------------------------------------------------------------
(0 rows)
-- lets test some unsupported set operations -- lets test some unsupported set operations
-- not supported since we use INTERSECT -- not supported since we use INTERSECT
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType

View File

@ -610,8 +610,10 @@ WHERE
user_id user_id
OFFSET 3 OFFSET 3
); );
ERROR: cannot push down this subquery user_id
DETAIL: Offset clause is currently unsupported when a subquery references a column from another query ---------------------------------------------------------------------
(0 rows)
-- we can detect unsupported subqueries even if they appear -- we can detect unsupported subqueries even if they appear
-- in WHERE subquery -> FROM subquery -> WHERE subquery -- in WHERE subquery -> FROM subquery -> WHERE subquery
-- but we can recursively plan that anyway -- but we can recursively plan that anyway

View File

@ -83,46 +83,6 @@ DEBUG: generating subplan XXX_1 for subquery SELECT users_table.value_2 FROM pu
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT foo.value_2 FROM ((SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) foo LEFT JOIN (SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))) bar ON ((foo.value_2 OPERATOR(pg_catalog.=) bar.value_2))) DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT foo.value_2 FROM ((SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) foo LEFT JOIN (SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))) bar ON ((foo.value_2 OPERATOR(pg_catalog.=) bar.value_2)))
ERROR: cannot pushdown the subquery ERROR: cannot pushdown the subquery
DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join
-- Aggregates in subquery without partition column can be planned recursively
-- unless there is a reference to an outer query
SELECT
*
FROM
users_table
WHERE
user_id IN
(
SELECT
SUM(events_table.user_id)
FROM
events_table
WHERE
users_table.user_id = events_table.user_id
)
;
ERROR: cannot push down this subquery
DETAIL: Aggregates without group by are currently unsupported when a subquery references a column from another query
-- Having qual without group by on partition column can be planned recursively
-- unless there is a reference to an outer query
SELECT
*
FROM
users_table
WHERE
user_id IN
(
SELECT
SUM(events_table.user_id)
FROM
events_table
WHERE
events_table.user_id = users_table.user_id
HAVING
MIN(value_2) > 2
)
;
ERROR: cannot push down this subquery
DETAIL: Having qual without group by on partition column is currently unsupported when a subquery references a column from another query
-- We do not support GROUPING SETS in subqueries -- We do not support GROUPING SETS in subqueries
-- This also includes ROLLUP or CUBE clauses -- This also includes ROLLUP or CUBE clauses
SELECT * FROM (SELECT user_id, value_1 FROM users_table GROUP BY GROUPING SETS ((user_id), (value_1))) s; SELECT * FROM (SELECT user_id, value_1 FROM users_table GROUP BY GROUPING SETS ((user_id), (value_1))) s;

View File

@ -31,14 +31,20 @@ ORDER BY 1,2 LIMIT 1;
SELECT event_type, (SELECT max(time) FROM users_table WHERE user_id = e.user_id) SELECT event_type, (SELECT max(time) FROM users_table WHERE user_id = e.user_id)
FROM events_table e FROM events_table e
ORDER BY 1,2 LIMIT 1; ORDER BY 1,2 LIMIT 1;
ERROR: cannot push down this subquery event_type | max
DETAIL: Aggregates without group by are currently unsupported when a subquery references a column from another query ---------------------------------------------------------------------
0 | Thu Nov 23 13:52:54.83829 2017
(1 row)
-- correlated subquery wtth limit -- correlated subquery wtth limit
SELECT event_type, (SELECT time FROM users_table WHERE user_id = e.user_id ORDER BY time LIMIT 1) SELECT event_type, (SELECT time FROM users_table WHERE user_id = e.user_id ORDER BY time LIMIT 1)
FROM events_table e FROM events_table e
ORDER BY 1,2 LIMIT 1; ORDER BY 1,2 LIMIT 1;
ERROR: cannot push down this subquery event_type | time
DETAIL: Limit in subquery is currently unsupported when a subquery references a column from another query ---------------------------------------------------------------------
0 | Wed Nov 22 18:19:49.944985 2017
(1 row)
-- correlated subquery with group by distribution column -- correlated subquery with group by distribution column
SELECT event_type, (SELECT max(time) FROM users_table WHERE user_id = e.user_id GROUP BY user_id) SELECT event_type, (SELECT max(time) FROM users_table WHERE user_id = e.user_id GROUP BY user_id)
FROM events_table e FROM events_table e
@ -52,8 +58,11 @@ ORDER BY 1,2 LIMIT 1;
SELECT event_type, (SELECT max(time) FROM users_table WHERE user_id = e.user_id GROUP BY e.user_id) SELECT event_type, (SELECT max(time) FROM users_table WHERE user_id = e.user_id GROUP BY e.user_id)
FROM events_table e FROM events_table e
ORDER BY 1,2 LIMIT 1; ORDER BY 1,2 LIMIT 1;
ERROR: cannot push down this subquery event_type | max
DETAIL: Group by list without partition column is currently unsupported when a subquery references a column from another query ---------------------------------------------------------------------
0 | Thu Nov 23 13:52:54.83829 2017
(1 row)
-- correlated subquery co-located join in outer query -- correlated subquery co-located join in outer query
SELECT event_type, (SELECT max(time) FROM users_table WHERE user_id = e.user_id GROUP BY user_id) SELECT event_type, (SELECT max(time) FROM users_table WHERE user_id = e.user_id GROUP BY user_id)
FROM users_table u JOIN events_table e USING (user_id) FROM users_table u JOIN events_table e USING (user_id)
@ -81,8 +90,11 @@ ERROR: complex joins are only supported when all distributed tables are co-loca
SELECT event_type, (SELECT max(time) FROM users_reference_table WHERE user_id = e.value_2) SELECT event_type, (SELECT max(time) FROM users_reference_table WHERE user_id = e.value_2)
FROM events_table e FROM events_table e
ORDER BY 1,2 LIMIT 1; ORDER BY 1,2 LIMIT 1;
ERROR: cannot push down this subquery event_type | max
DETAIL: Aggregates without group by are currently unsupported when a subquery references a column from another query ---------------------------------------------------------------------
0 | Thu Nov 23 13:52:54.83829 2017
(1 row)
-- correlated subquery with reference table and group by -- correlated subquery with reference table and group by
SELECT event_type, (SELECT max(time) FROM users_reference_table WHERE user_id = e.value_2 GROUP BY user_id) SELECT event_type, (SELECT max(time) FROM users_reference_table WHERE user_id = e.value_2 GROUP BY user_id)
FROM events_table e FROM events_table e
@ -270,8 +282,12 @@ SELECT (SELECT value_2 FROM view_1 WHERE user_id = e.user_id GROUP BY value_2)
FROM events_table e FROM events_table e
GROUP BY 1 GROUP BY 1
ORDER BY 1 LIMIT 3; ORDER BY 1 LIMIT 3;
ERROR: cannot push down this subquery value_2
DETAIL: Group by list without partition column is currently unsupported when a subquery references a column from another query ---------------------------------------------------------------------
3
(2 rows)
-- without view in the outer query FROM -- without view in the outer query FROM
SELECT (SELECT value_2 FROM view_1 WHERE user_id = e.user_id GROUP BY user_id, value_2) SELECT (SELECT value_2 FROM view_1 WHERE user_id = e.user_id GROUP BY user_id, value_2)
FROM view_1 e FROM view_1 e

View File

@ -691,6 +691,48 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS c
101 101
(1 row) (1 row)
-- correlated subquery with aggregate in WHERE
SELECT
*
FROM
users_table
WHERE
user_id IN
(
SELECT
SUM(events_table.user_id)
FROM
events_table
WHERE
users_table.user_id = events_table.user_id
)
;
user_id | time | value_1 | value_2 | value_3 | value_4
---------------------------------------------------------------------
(0 rows)
-- correlated subquery with aggregate in HAVING
SELECT
*
FROM
users_table
WHERE
user_id IN
(
SELECT
SUM(events_table.user_id)
FROM
events_table
WHERE
events_table.user_id = users_table.user_id
HAVING
MIN(value_2) > 2
)
;
user_id | time | value_1 | value_2 | value_3 | value_4
---------------------------------------------------------------------
(0 rows)
-- Local tables also planned recursively, so using it as part of the FROM clause -- Local tables also planned recursively, so using it as part of the FROM clause
-- make the clause recurring -- make the clause recurring
CREATE TABLE local_table(id int, value_1 int); CREATE TABLE local_table(id int, value_1 int);
@ -746,6 +788,365 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS c
10 10
(1 row) (1 row)
-- basic NOT IN correlated subquery
SELECT
count(*)
FROM
events_table e
WHERE
value_2 NOT IN (SELECT value_2 FROM users_table WHERE user_id = e.user_id);
count
---------------------------------------------------------------------
7
(1 row)
-- correlated subquery with limit
SELECT
count(*)
FROM
events_table e
WHERE
value_2 IN (SELECT value_2 FROM users_table WHERE user_id = e.user_id ORDER BY value_2 LIMIT 1);
count
---------------------------------------------------------------------
10
(1 row)
-- correlated subquery with distinct
SELECT
count(*)
FROM
events_table e
WHERE
value_2 IN (SELECT DISTINCT (value_3) FROM users_table WHERE user_id = e.user_id);
count
---------------------------------------------------------------------
90
(1 row)
-- correlated subquery with aggregate
SELECT
count(*)
FROM
events_table e
WHERE
value_2 = (SELECT max(value_2) FROM users_table WHERE user_id = e.user_id);
count
---------------------------------------------------------------------
11
(1 row)
-- correlated subquery with window function
SELECT
count(*)
FROM
events_table e
WHERE
value_2 IN (SELECT row_number() OVER () FROM users_table WHERE user_id = e.user_id);
count
---------------------------------------------------------------------
94
(1 row)
-- correlated subquery with group by
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (SELECT min(value_3) FROM users_table WHERE user_id = e.user_id GROUP BY value_2);
count
---------------------------------------------------------------------
72
(1 row)
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (SELECT min(value_3) FROM users_table WHERE user_id = e.user_id GROUP BY value_2);
count
---------------------------------------------------------------------
72
(1 row)
-- correlated subquery with group by
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (SELECT min(value_3) v FROM users_table WHERE user_id = e.user_id GROUP BY e.value_2);
count
---------------------------------------------------------------------
10
(1 row)
-- correlated subquery with having
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (SELECT min(value_3) v FROM users_table WHERE user_id = e.user_id GROUP BY e.value_2 HAVING min(value_3) > (SELECT 1));
count
---------------------------------------------------------------------
0
(1 row)
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (SELECT min(value_3) v FROM users_table WHERE user_id = e.user_id GROUP BY e.value_2 HAVING min(value_3) > (SELECT e.value_3));
ERROR: Subqueries in HAVING cannot refer to outer query
-- nested correlated subquery
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (
SELECT min(r.value_3) v FROM users_reference_table r JOIN (SELECT * FROM users_table WHERE user_id = e.user_id) u USING (user_id)
WHERE u.value_2 > 3
GROUP BY e.value_2 HAVING min(r.value_3) > e.value_3);
count
---------------------------------------------------------------------
0
(1 row)
-- not co-located correlated subquery
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (
SELECT min(r.value_3) v FROM users_reference_table r JOIN (SELECT * FROM users_table WHERE value_2 = e.user_id) u USING (user_id)
WHERE u.value_2 > 3
GROUP BY e.value_2 HAVING min(r.value_3) > e.value_3);
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- cartesian correlated subquery
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (
SELECT min(r.value_3) v FROM users_reference_table r JOIN users_table u USING (user_id)
WHERE u.value_2 > 3
GROUP BY e.value_2 HAVING min(r.value_3) > e.value_3);
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- even more subtle cartesian correlated subquery
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (
SELECT min(r.value_3) v FROM users_reference_table r JOIN users_table u USING (user_id)
WHERE u.value_2 > 3
GROUP BY u.value_2 HAVING min(r.value_3) > e.value_3);
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- not a correlated subquery, uses recursive planning
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (
SELECT min(r.value_3) v FROM users_reference_table r JOIN users_table u USING (user_id)
WHERE u.value_2 > 3
GROUP BY r.value_2 HAVING min(r.value_3) > 0);
DEBUG: generating subplan XXX_1 for subquery SELECT min(r.value_3) AS v FROM (public.users_reference_table r JOIN public.users_table u USING (user_id)) WHERE (u.value_2 OPERATOR(pg_catalog.>) 3) GROUP BY r.value_2 HAVING (min(r.value_3) OPERATOR(pg_catalog.>) (0)::double precision)
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM public.events_table e WHERE (value_3 OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.v FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(v double precision)))
count
---------------------------------------------------------------------
24
(1 row)
-- two levels of correlation should also allow
-- merge step in the subquery
SELECT sum(value_1)
FROM users_table u
WHERE EXISTS
(SELECT 1
FROM events_table e
WHERE u.user_id = e.user_id AND
EXISTS
(SELECT 1
FROM users_table u2
WHERE u2.user_id = u.user_id AND u2.value_1 = 5
LIMIT 1));
sum
---------------------------------------------------------------------
216
(1 row)
-- correlated subquery in WHERE, with a slightly
-- different syntax that the result of the subquery
-- is compared with a constant
SELECT sum(value_1)
FROM users_table u1
WHERE (SELECT COUNT(DISTINCT e1.value_2)
FROM events_table e1
WHERE e1.user_id = u1.user_id
) > 115;
sum
---------------------------------------------------------------------
(1 row)
-- a correlated subquery which requires merge step
-- can be pushed down on UPDATE/DELETE queries as well
-- rollback to keep the rest of the tests unchanged
BEGIN;
UPDATE users_table u1
SET value_1 = (SELECT count(DISTINCT value_2)
FROM events_table e1
WHERE e1.user_id = u1.user_id);
DELETE FROM users_table u1 WHERE (SELECT count(DISTINCT value_2)
FROM events_table e1
WHERE e1.user_id = u1.user_id) > 10;
ROLLBACK;
-- a correlated anti-join can also be pushed down even if the subquery
-- has a LIMIT
SELECT avg(value_1)
FROM users_table u
WHERE NOT EXISTS
(SELECT 'XXX'
FROM events_table e
WHERE u.user_id = e.user_id and e.value_2 > 10000 LIMIT 1);
avg
---------------------------------------------------------------------
2.5544554455445545
(1 row)
-- a [correlated] lateral join can also be pushed down even if the subquery
-- has an aggregate wout a GROUP BY
SELECT
max(min_of_val_2), max(u1.value_1)
FROM
users_table u1
LEFT JOIN LATERAL
(SELECT min(e1.value_2) as min_of_val_2 FROM events_table e1 WHERE e1.user_id = u1.user_id) as foo ON (true);
max | max
---------------------------------------------------------------------
1 | 5
(1 row)
-- a self join is followed by a correlated subquery
EXPLAIN (COSTS OFF)
SELECT
*
FROM
users_table u1 JOIN users_table u2 USING (user_id)
WHERE
u1.value_1 < u2.value_1 AND
(SELECT
count(*)
FROM
events_table e1
WHERE
e1.user_id = u2.user_id) > 10;
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Hash Join
Hash Cond: (u2.user_id = u1.user_id)
Join Filter: (u1.value_1 < u2.value_1)
-> Seq Scan on users_table_1400256 u2
Filter: ((SubPlan 1) > 10)
SubPlan 1
-> Aggregate
-> Seq Scan on events_table_1400260 e1
Filter: (user_id = u2.user_id)
-> Hash
-> Seq Scan on users_table_1400256 u1
(16 rows)
-- when the colocated join of the FROM clause
-- entries happen on WHERE clause, Citus cannot
-- pushdown
-- Likely that the colocation checks should be
-- improved
SELECT
u1.user_id, u2.user_id
FROM
users_table u1, users_table u2
WHERE
u1.value_1 < u2.value_1 AND
(SELECT
count(*)
FROM
events_table e1
WHERE
e1.user_id = u2.user_id AND
u1.user_id = u2.user_id) > 10
ORDER BY 1,2;
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- create a view that contains correlated subquery
CREATE TEMPORARY VIEW correlated_subquery_view AS
SELECT u1.user_id
FROM users_table u1
WHERE (SELECT COUNT(DISTINCT e1.value_2)
FROM events_table e1
WHERE e1.user_id = u1.user_id
) > 0;
SELECT sum(user_id) FROM correlated_subquery_view;
sum
---------------------------------------------------------------------
376
(1 row)
-- now, join the view with another correlated subquery
SELECT
sum(mx)
FROM
correlated_subquery_view
LEFT JOIN LATERAL
(SELECT max(value_2) as mx FROM events_table WHERE correlated_subquery_view.user_id = events_table.user_id) as foo ON (true);
sum
---------------------------------------------------------------------
459
(1 row)
-- as an edge case, JOIN is on false
SELECT
sum(mx)
FROM
correlated_subquery_view
LEFT JOIN LATERAL
(SELECT max(value_2) as mx FROM events_table WHERE correlated_subquery_view.user_id = events_table.user_id) as foo ON (false);
sum
---------------------------------------------------------------------
(1 row)
SELECT sum(value_1)
FROM users_table u1
WHERE (SELECT COUNT(DISTINCT e1.value_2)
FROM events_table e1
WHERE e1.user_id = u1.user_id AND false
) > 115;
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
SELECT sum(value_1)
FROM users_table u1
WHERE (SELECT COUNT(DISTINCT e1.value_2)
FROM events_table e1
WHERE e1.user_id = u1.user_id
) > 115 AND false;
sum
---------------------------------------------------------------------
(1 row)
SET client_min_messages TO DEFAULT; SET client_min_messages TO DEFAULT;
DROP TABLE local_table; DROP TABLE local_table;
DROP SCHEMA subquery_in_where CASCADE; DROP SCHEMA subquery_in_where CASCADE;

View File

@ -413,7 +413,14 @@ DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT user_id FROM public.users
DEBUG: generating subplan XXX_2 for CTE cte_1: SELECT user_id FROM public.users_table DEBUG: generating subplan XXX_2 for CTE cte_1: SELECT user_id FROM public.users_table
DEBUG: generating subplan XXX_3 for subquery SELECT cte_1.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_1 UNION SELECT cte_1.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_1 DEBUG: generating subplan XXX_3 for subquery SELECT cte_1.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_1 UNION SELECT cte_1.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) cte_1
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT DISTINCT user_id FROM public.events_table WHERE (event_type OPERATOR(pg_catalog.=) ANY (SELECT users_table.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) foo, public.users_table WHERE ((users_table.value_2 OPERATOR(pg_catalog.=) foo.user_id) AND (events_table.user_id OPERATOR(pg_catalog.=) users_table.user_id)) LIMIT 5)) ORDER BY user_id DESC DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT DISTINCT user_id FROM public.events_table WHERE (event_type OPERATOR(pg_catalog.=) ANY (SELECT users_table.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) foo, public.users_table WHERE ((users_table.value_2 OPERATOR(pg_catalog.=) foo.user_id) AND (events_table.user_id OPERATOR(pg_catalog.=) users_table.user_id)) LIMIT 5)) ORDER BY user_id DESC
ERROR: cannot push down this subquery user_id
DETAIL: Limit in subquery is currently unsupported when a subquery references a column from another query ---------------------------------------------------------------------
5
4
3
2
1
(5 rows)
SET client_min_messages TO DEFAULT; SET client_min_messages TO DEFAULT;
SET search_path TO public; SET search_path TO public;

View File

@ -235,7 +235,7 @@ FROM articles_hash_mx, (SELECT id, word_count FROM articles_hash_mx) AS test
WHERE test.id = articles_hash_mx.id and articles_hash_mx.author_id = 1 WHERE test.id = articles_hash_mx.id and articles_hash_mx.author_id = 1
ORDER BY articles_hash_mx.id; ORDER BY articles_hash_mx.id;
-- subqueries are not supported in SELECT clause -- subqueries in SELECT clause
SELECT a.title AS name, (SELECT a2.id FROM articles_single_shard_hash_mx a2 WHERE a.id = a2.id LIMIT 1) SELECT a.title AS name, (SELECT a2.id FROM articles_single_shard_hash_mx a2 WHERE a.id = a2.id LIMIT 1)
AS special_price FROM articles_hash_mx a; AS special_price FROM articles_hash_mx a;

View File

@ -1119,7 +1119,7 @@ FROM
WHERE WHERE
"users"."user_id" = "some_recent_users"."user_id" AND "users"."user_id" = "some_recent_users"."user_id" AND
users.value_2 > 1 and users.value_2 < 3 users.value_2 > 1 and users.value_2 < 3
LIMIT 1) "some_users_data" ORDER BY 1 LIMIT 1) "some_users_data"
ON TRUE ON TRUE
ORDER BY ORDER BY
user_id user_id
@ -1128,9 +1128,7 @@ limit 50;
-- reset subquery_pushdown -- reset subquery_pushdown
SET citus.subquery_pushdown to OFF; SET citus.subquery_pushdown to OFF;
-- we recursively plan recent_events_1 -- mixture of recursively planned subqueries and correlated subqueries
-- but not some_users_data since it has a reference
-- from an outer query which is not recursively planned
SELECT "some_users_data".user_id, lastseen SELECT "some_users_data".user_id, lastseen
FROM FROM
(SELECT user_id, max(time) AS lastseen (SELECT user_id, max(time) AS lastseen
@ -1158,15 +1156,12 @@ FROM
WHERE WHERE
"users"."value_1" = "some_recent_users"."user_id" AND "users"."value_1" = "some_recent_users"."user_id" AND
users.value_2 > 1 and users.value_2 < 3 users.value_2 > 1 and users.value_2 < 3
LIMIT 1) "some_users_data" ORDER BY 1 LIMIT 1) "some_users_data"
ON TRUE ON TRUE
ORDER BY ORDER BY
user_id user_id
limit 50; limit 50;
-- we recursively plan some queries but fail in the end
-- since some_users_data since it has a reference
-- from an outer query which is not recursively planned
SELECT "some_users_data".user_id, lastseen SELECT "some_users_data".user_id, lastseen
FROM FROM
(SELECT 2 * user_id as user_id, max(time) AS lastseen (SELECT 2 * user_id as user_id, max(time) AS lastseen
@ -1194,7 +1189,7 @@ FROM
WHERE WHERE
"users"."user_id" = "some_recent_users"."user_id" AND "users"."user_id" = "some_recent_users"."user_id" AND
users.value_2 > 1 and users.value_2 < 3 users.value_2 > 1 and users.value_2 < 3
LIMIT 1) "some_users_data" ORDER BY 1 LIMIT 1) "some_users_data"
ON TRUE ON TRUE
ORDER BY ORDER BY
user_id user_id
@ -1251,7 +1246,7 @@ FROM
WHERE WHERE
"users"."user_id" = "some_recent_users"."user_id" AND "users"."user_id" = "some_recent_users"."user_id" AND
"users"."value_2" > 4 "users"."value_2" > 4
LIMIT 1) "some_users_data" ORDER BY 1 LIMIT 1) "some_users_data"
ON TRUE ON TRUE
ORDER BY ORDER BY
lastseen DESC lastseen DESC
@ -1306,7 +1301,7 @@ SELECT "some_users_data".user_id, MAX(lastseen), count(*)
WHERE WHERE
"users"."user_id" = "some_recent_users"."user_id" AND "users"."user_id" = "some_recent_users"."user_id" AND
"users"."value_2" > 4 "users"."value_2" > 4
LIMIT 1) "some_users_data" ON true ORDER BY 1 LIMIT 1) "some_users_data" ON true
GROUP BY 1 GROUP BY 1
ORDER BY 2, 1 DESC ORDER BY 2, 1 DESC
LIMIT 10; LIMIT 10;
@ -1360,7 +1355,7 @@ FROM
WHERE WHERE
"users"."user_id" = "some_recent_users"."user_id" AND "users"."user_id" = "some_recent_users"."user_id" AND
"users"."value_2" > 4 "users"."value_2" > 4
LIMIT 1) "some_users_data" ON true ORDER BY 1 LIMIT 1) "some_users_data" ON true
ORDER BY ORDER BY
lastseen DESC lastseen DESC
LIMIT 10) "some_users" LIMIT 10) "some_users"
@ -1418,7 +1413,7 @@ FROM
WHERE WHERE
"users"."user_id" = "some_recent_users"."user_id" AND "users"."user_id" = "some_recent_users"."user_id" AND
"users"."value_2" > 4 "users"."value_2" > 4
LIMIT 1) "some_users_data" ON true ORDER BY 1 LIMIT 1) "some_users_data" ON true
ORDER BY ORDER BY
lastseen DESC lastseen DESC
LIMIT 10) "some_users" LIMIT 10) "some_users"
@ -1484,9 +1479,7 @@ ORDER BY
user_id DESC, lastseen DESC user_id DESC, lastseen DESC
LIMIT 10; LIMIT 10;
-- not pushdownable since lower LATERAL JOIN is not on the partition key -- complex lateral join between inner join and correlated subquery
-- not recursively plannable due to LATERAL join where there is a reference
-- from an outer query
SELECT user_id, lastseen SELECT user_id, lastseen
FROM FROM
(SELECT (SELECT
@ -1532,7 +1525,7 @@ FROM
WHERE WHERE
"users"."value_1" = "some_recent_users"."user_id" AND "users"."value_1" = "some_recent_users"."user_id" AND
"users"."value_2" > 4 "users"."value_2" > 4
LIMIT 1) "some_users_data" ON true ORDER BY 1 LIMIT 1) "some_users_data" ON true
ORDER BY ORDER BY
lastseen DESC lastseen DESC
LIMIT 10) "some_users" LIMIT 10) "some_users"
@ -2033,9 +2026,7 @@ ORDER BY
LIMIT 10; LIMIT 10;
SET citus.subquery_pushdown to OFF; SET citus.subquery_pushdown to OFF;
-- not pushdownable since lower LATERAL JOIN is not on the partition key -- on side of the lateral join can be recursively plannen, then pushed down
-- not recursively plannable due to LATERAL join where there is a reference
-- from an outer query
SELECT * SELECT *
FROM FROM
(SELECT (SELECT
@ -2066,7 +2057,7 @@ FROM
WHERE WHERE
"users"."value_2" = "some_recent_users"."user_id" AND "users"."value_2" = "some_recent_users"."user_id" AND
value_2 > 4 value_2 > 4
LIMIT 1) "some_users_data" ON true ORDER BY 1 LIMIT 1) "some_users_data" ON true
ORDER BY ORDER BY
value_2 DESC value_2 DESC
LIMIT 10) "some_users" LIMIT 10) "some_users"

View File

@ -78,46 +78,6 @@ FROM
ON(foo.value_2 = bar.value_2); ON(foo.value_2 = bar.value_2);
-- Aggregates in subquery without partition column can be planned recursively
-- unless there is a reference to an outer query
SELECT
*
FROM
users_table
WHERE
user_id IN
(
SELECT
SUM(events_table.user_id)
FROM
events_table
WHERE
users_table.user_id = events_table.user_id
)
;
-- Having qual without group by on partition column can be planned recursively
-- unless there is a reference to an outer query
SELECT
*
FROM
users_table
WHERE
user_id IN
(
SELECT
SUM(events_table.user_id)
FROM
events_table
WHERE
events_table.user_id = users_table.user_id
HAVING
MIN(value_2) > 2
)
;
-- We do not support GROUPING SETS in subqueries -- We do not support GROUPING SETS in subqueries
-- This also includes ROLLUP or CUBE clauses -- This also includes ROLLUP or CUBE clauses
SELECT * FROM (SELECT user_id, value_1 FROM users_table GROUP BY GROUPING SETS ((user_id), (value_1))) s; SELECT * FROM (SELECT user_id, value_1 FROM users_table GROUP BY GROUPING SETS ((user_id), (value_1))) s;

View File

@ -502,6 +502,43 @@ WHERE value_1 IN
(SELECT value_1 (SELECT value_1
FROM users_Table) OR (EXISTS (SELECT * FROM events_table)); FROM users_Table) OR (EXISTS (SELECT * FROM events_table));
-- correlated subquery with aggregate in WHERE
SELECT
*
FROM
users_table
WHERE
user_id IN
(
SELECT
SUM(events_table.user_id)
FROM
events_table
WHERE
users_table.user_id = events_table.user_id
)
;
-- correlated subquery with aggregate in HAVING
SELECT
*
FROM
users_table
WHERE
user_id IN
(
SELECT
SUM(events_table.user_id)
FROM
events_table
WHERE
events_table.user_id = users_table.user_id
HAVING
MIN(value_2) > 2
)
;
-- Local tables also planned recursively, so using it as part of the FROM clause -- Local tables also planned recursively, so using it as part of the FROM clause
-- make the clause recurring -- make the clause recurring
CREATE TABLE local_table(id int, value_1 int); CREATE TABLE local_table(id int, value_1 int);
@ -542,6 +579,277 @@ IN
FROM FROM
local_table); local_table);
-- basic NOT IN correlated subquery
SELECT
count(*)
FROM
events_table e
WHERE
value_2 NOT IN (SELECT value_2 FROM users_table WHERE user_id = e.user_id);
-- correlated subquery with limit
SELECT
count(*)
FROM
events_table e
WHERE
value_2 IN (SELECT value_2 FROM users_table WHERE user_id = e.user_id ORDER BY value_2 LIMIT 1);
-- correlated subquery with distinct
SELECT
count(*)
FROM
events_table e
WHERE
value_2 IN (SELECT DISTINCT (value_3) FROM users_table WHERE user_id = e.user_id);
-- correlated subquery with aggregate
SELECT
count(*)
FROM
events_table e
WHERE
value_2 = (SELECT max(value_2) FROM users_table WHERE user_id = e.user_id);
-- correlated subquery with window function
SELECT
count(*)
FROM
events_table e
WHERE
value_2 IN (SELECT row_number() OVER () FROM users_table WHERE user_id = e.user_id);
-- correlated subquery with group by
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (SELECT min(value_3) FROM users_table WHERE user_id = e.user_id GROUP BY value_2);
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (SELECT min(value_3) FROM users_table WHERE user_id = e.user_id GROUP BY value_2);
-- correlated subquery with group by
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (SELECT min(value_3) v FROM users_table WHERE user_id = e.user_id GROUP BY e.value_2);
-- correlated subquery with having
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (SELECT min(value_3) v FROM users_table WHERE user_id = e.user_id GROUP BY e.value_2 HAVING min(value_3) > (SELECT 1));
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (SELECT min(value_3) v FROM users_table WHERE user_id = e.user_id GROUP BY e.value_2 HAVING min(value_3) > (SELECT e.value_3));
-- nested correlated subquery
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (
SELECT min(r.value_3) v FROM users_reference_table r JOIN (SELECT * FROM users_table WHERE user_id = e.user_id) u USING (user_id)
WHERE u.value_2 > 3
GROUP BY e.value_2 HAVING min(r.value_3) > e.value_3);
-- not co-located correlated subquery
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (
SELECT min(r.value_3) v FROM users_reference_table r JOIN (SELECT * FROM users_table WHERE value_2 = e.user_id) u USING (user_id)
WHERE u.value_2 > 3
GROUP BY e.value_2 HAVING min(r.value_3) > e.value_3);
-- cartesian correlated subquery
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (
SELECT min(r.value_3) v FROM users_reference_table r JOIN users_table u USING (user_id)
WHERE u.value_2 > 3
GROUP BY e.value_2 HAVING min(r.value_3) > e.value_3);
-- even more subtle cartesian correlated subquery
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (
SELECT min(r.value_3) v FROM users_reference_table r JOIN users_table u USING (user_id)
WHERE u.value_2 > 3
GROUP BY u.value_2 HAVING min(r.value_3) > e.value_3);
-- not a correlated subquery, uses recursive planning
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (
SELECT min(r.value_3) v FROM users_reference_table r JOIN users_table u USING (user_id)
WHERE u.value_2 > 3
GROUP BY r.value_2 HAVING min(r.value_3) > 0);
-- two levels of correlation should also allow
-- merge step in the subquery
SELECT sum(value_1)
FROM users_table u
WHERE EXISTS
(SELECT 1
FROM events_table e
WHERE u.user_id = e.user_id AND
EXISTS
(SELECT 1
FROM users_table u2
WHERE u2.user_id = u.user_id AND u2.value_1 = 5
LIMIT 1));
-- correlated subquery in WHERE, with a slightly
-- different syntax that the result of the subquery
-- is compared with a constant
SELECT sum(value_1)
FROM users_table u1
WHERE (SELECT COUNT(DISTINCT e1.value_2)
FROM events_table e1
WHERE e1.user_id = u1.user_id
) > 115;
-- a correlated subquery which requires merge step
-- can be pushed down on UPDATE/DELETE queries as well
-- rollback to keep the rest of the tests unchanged
BEGIN;
UPDATE users_table u1
SET value_1 = (SELECT count(DISTINCT value_2)
FROM events_table e1
WHERE e1.user_id = u1.user_id);
DELETE FROM users_table u1 WHERE (SELECT count(DISTINCT value_2)
FROM events_table e1
WHERE e1.user_id = u1.user_id) > 10;
ROLLBACK;
-- a correlated anti-join can also be pushed down even if the subquery
-- has a LIMIT
SELECT avg(value_1)
FROM users_table u
WHERE NOT EXISTS
(SELECT 'XXX'
FROM events_table e
WHERE u.user_id = e.user_id and e.value_2 > 10000 LIMIT 1);
-- a [correlated] lateral join can also be pushed down even if the subquery
-- has an aggregate wout a GROUP BY
SELECT
max(min_of_val_2), max(u1.value_1)
FROM
users_table u1
LEFT JOIN LATERAL
(SELECT min(e1.value_2) as min_of_val_2 FROM events_table e1 WHERE e1.user_id = u1.user_id) as foo ON (true);
-- a self join is followed by a correlated subquery
EXPLAIN (COSTS OFF)
SELECT
*
FROM
users_table u1 JOIN users_table u2 USING (user_id)
WHERE
u1.value_1 < u2.value_1 AND
(SELECT
count(*)
FROM
events_table e1
WHERE
e1.user_id = u2.user_id) > 10;
-- when the colocated join of the FROM clause
-- entries happen on WHERE clause, Citus cannot
-- pushdown
-- Likely that the colocation checks should be
-- improved
SELECT
u1.user_id, u2.user_id
FROM
users_table u1, users_table u2
WHERE
u1.value_1 < u2.value_1 AND
(SELECT
count(*)
FROM
events_table e1
WHERE
e1.user_id = u2.user_id AND
u1.user_id = u2.user_id) > 10
ORDER BY 1,2;
-- create a view that contains correlated subquery
CREATE TEMPORARY VIEW correlated_subquery_view AS
SELECT u1.user_id
FROM users_table u1
WHERE (SELECT COUNT(DISTINCT e1.value_2)
FROM events_table e1
WHERE e1.user_id = u1.user_id
) > 0;
SELECT sum(user_id) FROM correlated_subquery_view;
-- now, join the view with another correlated subquery
SELECT
sum(mx)
FROM
correlated_subquery_view
LEFT JOIN LATERAL
(SELECT max(value_2) as mx FROM events_table WHERE correlated_subquery_view.user_id = events_table.user_id) as foo ON (true);
-- as an edge case, JOIN is on false
SELECT
sum(mx)
FROM
correlated_subquery_view
LEFT JOIN LATERAL
(SELECT max(value_2) as mx FROM events_table WHERE correlated_subquery_view.user_id = events_table.user_id) as foo ON (false);
SELECT sum(value_1)
FROM users_table u1
WHERE (SELECT COUNT(DISTINCT e1.value_2)
FROM events_table e1
WHERE e1.user_id = u1.user_id AND false
) > 115;
SELECT sum(value_1)
FROM users_table u1
WHERE (SELECT COUNT(DISTINCT e1.value_2)
FROM events_table e1
WHERE e1.user_id = u1.user_id
) > 115 AND false;
SET client_min_messages TO DEFAULT; SET client_min_messages TO DEFAULT;
DROP TABLE local_table; DROP TABLE local_table;