From e5d5c7be930c9c46d78208019f6e81913c37cc04 Mon Sep 17 00:00:00 2001 From: Jelte Fennema Date: Fri, 11 Mar 2022 11:59:18 +0100 Subject: [PATCH] Start erroring out for unsupported lateral subqueries (#5753) With the introduction of #4385 we inadvertently started allowing and pushing down certain lateral subqueries that were unsafe to push down. To be precise the type of LATERAL subqueries that is unsafe to push down has all of the following properties: 1. The lateral subquery contains some non recurring tuples 2. The lateral subquery references a recurring tuple from outside of the subquery (recurringRelids) 3. The lateral subquery requires a merge step (e.g. a LIMIT) 4. The reference to the recurring tuple should be something else than an equality check on the distribution column, e.g. equality on a non distribution column. Property number four is considered both hard to detect and probably not used very often. Thus this PR ignores property number four and causes query planning to error out if the first three properties hold. Fixes #5327 --- .../planner/query_pushdown_planning.c | 353 +++++++++++++++- .../regress/expected/distributed_planning.out | 387 ++++++++++++++++++ .../regress/expected/local_table_join.out | 7 +- .../multi_subquery_complex_queries.out | 81 +--- .../unsupported_lateral_subqueries.out | 208 ++++++++++ src/test/regress/multi_schedule | 2 +- src/test/regress/sql/distributed_planning.sql | 310 ++++++++++++++ .../sql/unsupported_lateral_subqueries.sql | 197 +++++++++ 8 files changed, 1455 insertions(+), 90 deletions(-) create mode 100644 src/test/regress/expected/unsupported_lateral_subqueries.out create mode 100644 src/test/regress/sql/unsupported_lateral_subqueries.sql diff --git a/src/backend/distributed/planner/query_pushdown_planning.c b/src/backend/distributed/planner/query_pushdown_planning.c index f78cec339..5ad7887e9 100644 --- a/src/backend/distributed/planner/query_pushdown_planning.c +++ b/src/backend/distributed/planner/query_pushdown_planning.c @@ -45,6 +45,8 @@ #include "parser/parsetree.h" +#define INVALID_RELID -1 + /* * RecurringTuplesType is used to distinguish different types of expressions * that always produce the same set of tuples when a shard is queried. We make @@ -61,6 +63,17 @@ typedef enum RecurringTuplesType RECURRING_TUPLES_VALUES } RecurringTuplesType; +/* + * RelidsReferenceWalkerContext is used to find Vars in a (sub)query that + * refer to certain relids from the upper query. + */ +typedef struct RelidsReferenceWalkerContext +{ + int level; + Relids relids; + int foundRelid; +} RelidsReferenceWalkerContext; + /* Config variable managed via guc.c */ bool SubqueryPushdown = false; /* is subquery pushdown enabled */ @@ -76,7 +89,9 @@ static RecurringTuplesType FromClauseRecurringTupleType(Query *queryTree); static DeferredErrorMessage * DeferredErrorIfUnsupportedRecurringTuplesJoin( PlannerRestrictionContext *plannerRestrictionContext); static DeferredErrorMessage * DeferErrorIfUnsupportedTableCombination(Query *queryTree); -static DeferredErrorMessage * DeferErrorIfSubqueryRequiresMerge(Query *subqueryTree); +static DeferredErrorMessage * DeferErrorIfSubqueryRequiresMerge(Query *subqueryTree, bool + lateral, + char *referencedThing); static bool ExtractSetOperationStatementWalker(Node *node, List **setOperationList); static RecurringTuplesType FetchFirstRecurType(PlannerInfo *plannerInfo, Relids relids); @@ -90,7 +105,12 @@ static List * CreateSubqueryTargetListAndAdjustVars(List *columnList); static AttrNumber FindResnoForVarInTargetList(List *targetList, int varno, int varattno); static bool RelationInfoContainsOnlyRecurringTuples(PlannerInfo *plannerInfo, Relids relids); +static DeferredErrorMessage * DeferredErrorIfUnsupportedLateralSubquery( + PlannerInfo *plannerInfo, Relids recurringRelIds, Relids nonRecurringRelIds); static Var * PartitionColumnForPushedDownSubquery(Query *query); +static bool ContainsReferencesToRelids(Query *query, Relids relids, int *foundRelid); +static bool ContainsReferencesToRelidsWalker(Node *node, + RelidsReferenceWalkerContext *context); /* @@ -844,6 +864,49 @@ DeferredErrorIfUnsupportedRecurringTuplesJoin( break; } } + else if (joinType == JOIN_INNER && plannerInfo->hasLateralRTEs) + { + /* + * Sometimes we cannot push down INNER JOINS when they have only + * recurring tuples on one side and a lateral on the other side. + * See comment on DeferredErrorIfUnsupportedLateralSubquery for + * details. + * + * When planning inner joins postgres can move RTEs from left to + * right and from right to left. So we don't know on which side the + * lateral join wil appear. Thus we try to find a side of the join + * that only contains recurring tuples. And then we check the other + * side to see if it contains an unsupported lateral join. + * + */ + if (RelationInfoContainsOnlyRecurringTuples(plannerInfo, innerrelRelids)) + { + DeferredErrorMessage *deferredError = + DeferredErrorIfUnsupportedLateralSubquery(plannerInfo, + innerrelRelids, + outerrelRelids); + if (deferredError) + { + return deferredError; + } + } + else if (RelationInfoContainsOnlyRecurringTuples(plannerInfo, outerrelRelids)) + { + /* + * This branch uses "else if" instead of "if", because if both + * sides contain only recurring tuples there will never be an + * unsupported lateral subquery. + */ + DeferredErrorMessage *deferredError = + DeferredErrorIfUnsupportedLateralSubquery(plannerInfo, + outerrelRelids, + innerrelRelids); + if (deferredError) + { + return deferredError; + } + } + } } if (recurType == RECURRING_TUPLES_REFERENCE_TABLE) @@ -950,7 +1013,8 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi */ if (!ContainsReferencesToOuterQuery(subqueryTree)) { - deferredError = DeferErrorIfSubqueryRequiresMerge(subqueryTree); + deferredError = DeferErrorIfSubqueryRequiresMerge(subqueryTree, false, + "another query"); if (deferredError) { return deferredError; @@ -1028,24 +1092,29 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi * column, etc.). */ static DeferredErrorMessage * -DeferErrorIfSubqueryRequiresMerge(Query *subqueryTree) +DeferErrorIfSubqueryRequiresMerge(Query *subqueryTree, bool lateral, + char *referencedThing) { bool preconditionsSatisfied = true; char *errorDetail = NULL; + char *lateralString = lateral ? "lateral " : ""; + if (subqueryTree->limitOffset) { preconditionsSatisfied = false; - errorDetail = "Offset clause is currently unsupported when a subquery " - "references a column from another query"; + errorDetail = psprintf("Offset clause is currently unsupported when a %ssubquery " + "references a column from %s", lateralString, + referencedThing); } /* limit is not supported when SubqueryPushdown is not set */ if (subqueryTree->limitCount && !SubqueryPushdown) { preconditionsSatisfied = false; - errorDetail = "Limit in subquery is currently unsupported when a " - "subquery references a column from another query"; + errorDetail = psprintf("Limit clause is currently unsupported when a " + "%ssubquery references a column from %s", lateralString, + referencedThing); } /* group clause list must include partition column */ @@ -1060,9 +1129,9 @@ DeferErrorIfSubqueryRequiresMerge(Query *subqueryTree) if (!groupOnPartitionColumn) { preconditionsSatisfied = false; - errorDetail = "Group by list without partition column is currently " - "unsupported when a subquery references a column " - "from another query"; + errorDetail = psprintf("Group by list without partition column is currently " + "unsupported when a %ssubquery references a column " + "from %s", lateralString, referencedThing); } } @@ -1070,17 +1139,18 @@ DeferErrorIfSubqueryRequiresMerge(Query *subqueryTree) if (subqueryTree->hasAggs && (subqueryTree->groupClause == NULL)) { preconditionsSatisfied = false; - errorDetail = "Aggregates without group by are currently unsupported " - "when a subquery references a column from another query"; + errorDetail = psprintf("Aggregates without group by are currently unsupported " + "when a %ssubquery references a column from %s", + lateralString, referencedThing); } /* having clause without group by on partition column is not supported */ if (subqueryTree->havingQual && (subqueryTree->groupClause == NULL)) { preconditionsSatisfied = false; - errorDetail = "Having qual without group by on partition column is " - "currently unsupported when a subquery references " - "a column from another query"; + errorDetail = psprintf("Having qual without group by on partition column is " + "currently unsupported when a %ssubquery references " + "a column from %s", lateralString, referencedThing); } /* @@ -1397,6 +1467,259 @@ RelationInfoContainsOnlyRecurringTuples(PlannerInfo *plannerInfo, Relids relids) } +/* + * RecurringTypeDescription returns a discriptive string for the given + * recurType. This string can be used in error messages to help the users + * understand why a query cannot be planned. + */ +static char * +RecurringTypeDescription(RecurringTuplesType recurType) +{ + switch (recurType) + { + case RECURRING_TUPLES_REFERENCE_TABLE: + { + return "a reference table"; + } + + case RECURRING_TUPLES_FUNCTION: + { + return "a table function"; + } + + case RECURRING_TUPLES_EMPTY_JOIN_TREE: + { + return "a subquery without FROM"; + } + + case RECURRING_TUPLES_RESULT_FUNCTION: + { + return "complex subqueries, CTEs or local tables"; + } + + case RECURRING_TUPLES_VALUES: + { + return "a VALUES clause"; + } + + case RECURRING_TUPLES_INVALID: + { + /* + * This branch should never be hit, but it's here just in case it + * happens. + */ + return "an unknown recurring tuple"; + } + } + + /* + * This should never be hit, but is needed to fix compiler warnings. + */ + return "an unknown recurring tuple"; +} + + +/* + * ContainsReferencesToRelids determines whether the given query contains + * any references that point to columns of the given relids. The given relids + * should be from exactly one query level above the given query. + * + * If the function returns true, then foundRelid is set to the first relid that + * was referenced. + * + * There are some queries where it cannot easily be determined if the relids + * are used, e.g because the query contains placeholder vars. In those cases + * this function returns true, because it's better to error out than to return + * wrong results. But in these cases foundRelid is set to INVALID_RELID. + */ +static bool +ContainsReferencesToRelids(Query *query, Relids relids, int *foundRelid) +{ + RelidsReferenceWalkerContext context = { 0 }; + context.level = 1; + context.relids = relids; + context.foundRelid = INVALID_RELID; + int flags = 0; + + if (query_tree_walker(query, ContainsReferencesToRelidsWalker, + &context, flags)) + { + *foundRelid = context.foundRelid; + return true; + } + return false; +} + + +/* + * ContainsReferencesToRelidsWalker determines whether the given query + * contains any Vars that reference the relids in the context. + * + * ContainsReferencesToRelidsWalker recursively descends into subqueries + * and increases the level by 1 before recursing. + */ +static bool +ContainsReferencesToRelidsWalker(Node *node, RelidsReferenceWalkerContext *context) +{ + if (node == NULL) + { + return false; + } + + if (IsA(node, Var)) + { + Var *var = (Var *) node; + if (var->varlevelsup == context->level && bms_is_member(var->varno, + context->relids)) + { + context->foundRelid = var->varno; + return true; + } + + return false; + } + else if (IsA(node, Aggref)) + { + if (((Aggref *) node)->agglevelsup > context->level) + { + /* + * TODO: Only return true when aggref points to an aggregate that + * uses vars from a recurring tuple. + */ + return true; + } + } + else if (IsA(node, GroupingFunc)) + { + if (((GroupingFunc *) node)->agglevelsup > context->level) + { + /* + * TODO: Only return true when groupingfunc points to a grouping + * func that uses vars from a recurring tuple. + */ + return true; + } + + return false; + } + else if (IsA(node, PlaceHolderVar)) + { + if (((PlaceHolderVar *) node)->phlevelsup > context->level) + { + /* + * TODO: Only return true when aggref points to a placeholdervar + * that uses vars from a recurring tuple. + */ + return true; + } + } + else if (IsA(node, Query)) + { + Query *query = (Query *) node; + int flags = 0; + + context->level += 1; + bool found = query_tree_walker(query, ContainsReferencesToRelidsWalker, + context, flags); + context->level -= 1; + + return found; + } + + return expression_tree_walker(node, ContainsReferencesToRelidsWalker, + context); +} + + +/* + * DeferredErrorIfUnsupportedLateralSubquery returns true if + * notFullyRecurringRelids contains a lateral subquery that we do not support. + * + * If there is an inner join with a lateral subquery we cannot + * push it down when the following properties all hold: + * 1. The lateral subquery contains some non recurring tuples + * 2. The lateral subquery references a recurring tuple from + * outside of the subquery (recurringRelids) + * 3. The lateral subquery requires a merge step (e.g. a LIMIT) + * 4. The reference to the recurring tuple should be something else than an + * equality check on the distribution column, e.g. equality on a non + * distribution column. + * + * Property number four is considered both hard to detect and + * probably not used very often, so we only check for 1, 2 and 3. + */ +static DeferredErrorMessage * +DeferredErrorIfUnsupportedLateralSubquery(PlannerInfo *plannerInfo, + Relids recurringRelids, + Relids notFullyRecurringRelids) +{ + int relationId = -1; + while ((relationId = bms_next_member(notFullyRecurringRelids, relationId)) >= 0) + { + RangeTblEntry *rangeTableEntry = plannerInfo->simple_rte_array[relationId]; + + if (!rangeTableEntry->lateral) + { + continue; + } + + /* TODO: What about others kinds? */ + if (rangeTableEntry->rtekind == RTE_SUBQUERY) + { + /* property number 1, contains non-recurring tuples */ + if (!FindNodeMatchingCheckFunctionInRangeTableList( + list_make1(rangeTableEntry), IsDistributedTableRTE)) + { + continue; + } + + /* property number 2, references recurring tuple */ + int recurringRelid = INVALID_RELID; + if (!ContainsReferencesToRelids(rangeTableEntry->subquery, recurringRelids, + &recurringRelid)) + { + continue; + } + + char *recurTypeDescription = + "an aggregate, grouping func or placeholder var coming from the outer query"; + if (recurringRelid != INVALID_RELID) + { + RangeTblEntry *recurringRangeTableEntry = + plannerInfo->simple_rte_array[recurringRelid]; + RecurringTuplesType recurType = RECURRING_TUPLES_INVALID; + ContainsRecurringRTE(recurringRangeTableEntry, &recurType); + recurTypeDescription = RecurringTypeDescription(recurType); + + /* + * Add the alias for all recuring tuples where it is useful to + * see them. We don't add it for VALUES and intermediate + * results, because there the aliases are currently hardcoded + * strings anyway. + */ + if (recurType != RECURRING_TUPLES_VALUES && + recurType != RECURRING_TUPLES_RESULT_FUNCTION) + { + recurTypeDescription = psprintf("%s (%s)", recurTypeDescription, + recurringRangeTableEntry->eref-> + aliasname); + } + } + + /* property number 3, has a merge step */ + DeferredErrorMessage *deferredError = DeferErrorIfSubqueryRequiresMerge( + rangeTableEntry->subquery, true, recurTypeDescription); + if (deferredError) + { + return deferredError; + } + } + } + + return NULL; +} + + /* * FetchFirstRecurType checks whether the relationInfo * contains any recurring table expression, namely a reference table, diff --git a/src/test/regress/expected/distributed_planning.out b/src/test/regress/expected/distributed_planning.out index 42dac188d..f05e46d3a 100644 --- a/src/test/regress/expected/distributed_planning.out +++ b/src/test/regress/expected/distributed_planning.out @@ -831,3 +831,390 @@ SELECT count(*), t1.event FROM date_part_table t1 JOIN date_part_table t2 USING 4 | 1 (5 rows) +TRUNCATE test; +TRUNCATE ref; +insert into test(x, y) SELECT 1, i FROM generate_series(1, 10) i; +insert into test(x, y) SELECT 3, i FROM generate_series(11, 40) i; +insert into test(x, y) SELECT i, 1 FROM generate_series(1, 10) i; +insert into test(x, y) SELECT i, 3 FROM generate_series(11, 40) i; +insert into ref(a, b) SELECT i, 1 FROM generate_series(1, 10) i; +insert into ref(a, b) SELECT i, 3 FROM generate_series(11, 40) i; +insert into ref(a, b) SELECT 1, i FROM generate_series(1, 10) i; +insert into ref(a, b) SELECT 3, i FROM generate_series(11, 40) i; +SELECT count(*) +FROM test, + LATERAL ( + SELECT + ref.a + FROM ref + WHERE + ref.b = test.x + LIMIT 2 + ) q; + count +--------------------------------------------------------------------- + 122 +(1 row) + +SELECT count(*) +FROM test, + LATERAL ( + SELECT + ref.a + FROM ref + WHERE + ref.b = test.y + LIMIT 2 + ) q; + count +--------------------------------------------------------------------- + 122 +(1 row) + +-- Since the only correlates on the distribution column, this can be safely +-- pushed down. But this is currently considered to hard to detect, so we fail. +-- +-- SELECT count(*) +-- FROM ref, +-- LATERAL ( +-- SELECT +-- test.x +-- FROM test +-- WHERE +-- test.x = ref.a +-- LIMIT 2 +-- ) q; +-- This returns wrong results when pushed down. Instead of returning 2 rows, +-- for each row in the reference table. It would return (2 * number of shards) +-- rows for each row in the reference table. +-- See issue #5327 +-- +-- SELECT count(*) +-- FROM ref, +-- LATERAL ( +-- SELECT +-- test.y +-- FROM test +-- WHERE +-- test.y = ref.a +-- LIMIT 2 +-- ) q; +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + ref_2.b y + FROM ref ref_2 + WHERE + ref_2.b = ref.a + LIMIT 2 + ) q; + count +--------------------------------------------------------------------- + 122 +(1 row) + +SELECT count(*) +FROM test, + LATERAL ( + SELECT + test_2.y + FROM test test_2 + WHERE + test_2.x = test.x + LIMIT 2 + ) q; + count +--------------------------------------------------------------------- + 122 +(1 row) + +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + ref_2.b y + FROM ref ref_2 + WHERE + ref_2.b = ref.a + LIMIT 2 + ) q JOIN test ON test.x = q.y; + count +--------------------------------------------------------------------- + 2202 +(1 row) + +-- Would require repartitioning to work with subqueries +-- +-- SELECT count(*) +-- FROM test, +-- LATERAL ( +-- SELECT +-- test_2.x +-- FROM test test_2 +-- WHERE +-- test_2.x = test.y +-- LIMIT 2 +-- ) q ; +SELECT count(*) +FROM ref JOIN test on ref.b = test.x, + LATERAL ( + SELECT + ref_2.b y + FROM ref ref_2 + WHERE + ref_2.b = ref.a + LIMIT 2 + ) q +; + count +--------------------------------------------------------------------- + 1222 +(1 row) + +SELECT count(*) +FROM ref JOIN test on ref.b = test.y, + LATERAL ( + SELECT + ref_2.b y + FROM ref ref_2 + WHERE + ref_2.b = ref.a + LIMIT 2 + ) q +; + count +--------------------------------------------------------------------- + 1222 +(1 row) + +-- Too complex joins for Citus to handle currently +-- +-- SELECT count(*) +-- FROM ref JOIN test on ref.b = test.x, +-- LATERAL ( +-- SELECT +-- test_2.x +-- FROM test test_2 +-- WHERE +-- test_2.x = ref.a +-- LIMIT 2 +-- ) q +-- ; +-- Would require repartitioning to work with subqueries +-- +-- SELECT count(*) +-- FROM ref JOIN test on ref.b = test.x, +-- LATERAL ( +-- SELECT +-- test_2.y +-- FROM test test_2 +-- WHERE +-- test_2.y = ref.a +-- LIMIT 2 +-- ) q +-- ; +SELECT count(*) +FROM ref JOIN test on ref.b = test.x, + LATERAL ( + SELECT + test_2.x + FROM test test_2 + WHERE + test_2.x = test.x + LIMIT 2 + ) q +; + count +--------------------------------------------------------------------- + 2202 +(1 row) + +-- Without LIMIT clauses +SELECT count(*) +FROM test, + LATERAL ( + SELECT + ref.a + FROM ref + WHERE + ref.b = test.x + ) q; + count +--------------------------------------------------------------------- + 1120 +(1 row) + +SELECT count(*) +FROM test, + LATERAL ( + SELECT + ref.a + FROM ref + WHERE + ref.b = test.y + ) q; + count +--------------------------------------------------------------------- + 1120 +(1 row) + +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + test.x + FROM test + WHERE + test.x = ref.a + ) q; + count +--------------------------------------------------------------------- + 1120 +(1 row) + +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref.a + ) q; + count +--------------------------------------------------------------------- + 1120 +(1 row) + +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + ref_2.b y + FROM ref ref_2 + WHERE + ref_2.b = ref.a + ) q; + count +--------------------------------------------------------------------- + 1120 +(1 row) + +SELECT count(*) +FROM test, + LATERAL ( + SELECT + test_2.y + FROM test test_2 + WHERE + test_2.x = test.x + ) q; + count +--------------------------------------------------------------------- + 1120 +(1 row) + +SELECT count(*) +FROM test, + LATERAL ( + SELECT + test_2.x + FROM test test_2 + WHERE + test_2.x = test.y + ) q ; + count +--------------------------------------------------------------------- + 1120 +(1 row) + +SELECT count(*) +FROM ref JOIN test on ref.b = test.x, + LATERAL ( + SELECT + ref_2.b y + FROM ref ref_2 + WHERE + ref_2.b = ref.a + ) q +; + count +--------------------------------------------------------------------- + 2960 +(1 row) + +SELECT count(*) +FROM ref JOIN test on ref.b = test.y, + LATERAL ( + SELECT + ref_2.b y + FROM ref ref_2 + WHERE + ref_2.b = ref.a + ) q +; + count +--------------------------------------------------------------------- + 2960 +(1 row) + +SELECT count(*) +FROM ref JOIN test on ref.b = test.x, + LATERAL ( + SELECT + test_2.x + FROM test test_2 + WHERE + test_2.x = ref.a + ) q +; + count +--------------------------------------------------------------------- + 2960 +(1 row) + +SELECT count(*) +FROM ref JOIN test on ref.b = test.x, + LATERAL ( + SELECT + test_2.y + FROM test test_2 + WHERE + test_2.y = ref.a + ) q +; + count +--------------------------------------------------------------------- + 2960 +(1 row) + +SELECT count(*) +FROM ref JOIN test on ref.b = test.x, + LATERAL ( + SELECT + test_2.x + FROM test test_2 + WHERE + test_2.x = test.x + ) q +; + count +--------------------------------------------------------------------- + 31160 +(1 row) + +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + ref_2.b y + FROM ref ref_2 + WHERE + ref_2.b = ref.a + ) q JOIN test ON test.x = q.y; + count +--------------------------------------------------------------------- + 31160 +(1 row) + diff --git a/src/test/regress/expected/local_table_join.out b/src/test/regress/expected/local_table_join.out index f2abb1066..effe23b0d 100644 --- a/src/test/regress/expected/local_table_join.out +++ b/src/test/regress/expected/local_table_join.out @@ -1392,11 +1392,8 @@ DEBUG: generating subplan XXX_1 for subquery SELECT true AS bool FROM pg_am LIM DEBUG: Wrapping relation "custom_pg_type" to a subquery DEBUG: generating subplan XXX_2 for subquery SELECT typdefault FROM local_table_join.custom_pg_type WHERE true DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT typdefault FROM (SELECT subq_1.typdefault FROM (SELECT custom_pg_type.typdefault FROM (SELECT custom_pg_type_1.typdefault FROM (SELECT intermediate_result.typdefault FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(typdefault text)) custom_pg_type_1) custom_pg_type, LATERAL (SELECT tbl.a FROM local_table_join.tbl WHERE (custom_pg_type.typdefault OPERATOR(pg_catalog.>) 'a'::text) LIMIT 1) subq_0 WHERE (SELECT intermediate_result.bool FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(bool boolean))) subq_1) subq_2 - typdefault ---------------------------------------------------------------------- - b -(1 row) - +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from complex subqueries, CTEs or local tables -- Not supported because of 4470 select typdefault from ( select typdefault from ( diff --git a/src/test/regress/expected/multi_subquery_complex_queries.out b/src/test/regress/expected/multi_subquery_complex_queries.out index 5432f27c4..3d91da2a3 100644 --- a/src/test/regress/expected/multi_subquery_complex_queries.out +++ b/src/test/regress/expected/multi_subquery_complex_queries.out @@ -1274,16 +1274,8 @@ FROM ORDER BY user_id limit 50; - user_id | lastseen ---------------------------------------------------------------------- - 1 | Thu Nov 23 18:08:26.550729 2017 - 2 | Thu Nov 23 17:26:14.563216 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 17:26:14.563216 2017 - 5 | Thu Nov 23 17:26:14.563216 2017 - 6 | Thu Nov 23 18:08:26.550729 2017 -(6 rows) - +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from complex subqueries, CTEs or local tables SELECT "some_users_data".user_id, lastseen FROM (SELECT 2 * user_id as user_id, max(time) AS lastseen @@ -1316,12 +1308,8 @@ FROM ORDER BY user_id limit 50; - user_id | lastseen ---------------------------------------------------------------------- - 4 | Thu Nov 23 17:26:14.563216 2017 - 6 | Thu Nov 23 18:08:26.550729 2017 -(2 rows) - +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from complex subqueries, CTEs or local tables -- LATERAL JOINs used with INNER JOINs SET citus.subquery_pushdown to ON; NOTICE: Setting citus.subquery_pushdown flag is discouraged becuase it forces the planner to pushdown certain queries, skipping relevant correctness checks. @@ -1514,24 +1502,8 @@ DEBUG: Router planner cannot handle multi-shard select queries DEBUG: push down of limit count: 10 DEBUG: generating subplan XXX_2 for subquery SELECT filter_users_1.user_id, last_events_1."time" AS lastseen FROM ((SELECT user_where_1_1.user_id FROM ((SELECT users.user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.>) 1) AND (users.user_id OPERATOR(pg_catalog.<) 4) AND (users.value_1 OPERATOR(pg_catalog.>) 2))) user_where_1_1 JOIN (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) user_where_1_join_1 ON ((user_where_1_1.user_id OPERATOR(pg_catalog.<>) user_where_1_join_1.user_id)))) filter_users_1 JOIN LATERAL (SELECT events.user_id, events."time" FROM public.events_table events WHERE ((events.user_id OPERATOR(pg_catalog.>) 1) AND (events.user_id OPERATOR(pg_catalog.<) 4) AND (events.user_id OPERATOR(pg_catalog.=) filter_users_1.user_id)) ORDER BY events."time" DESC LIMIT 1) last_events_1 ON (true)) ORDER BY last_events_1."time" DESC LIMIT 10 DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: push down of limit count: 10 -DEBUG: generating subplan XXX_3 for subquery SELECT some_users_data.user_id, some_recent_users.lastseen FROM ((SELECT intermediate_result.user_id, intermediate_result.lastseen FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, lastseen timestamp without time zone)) some_recent_users JOIN LATERAL (SELECT users.user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.=) some_recent_users.user_id) AND (users.value_2 OPERATOR(pg_catalog.>) 4)) ORDER BY users.user_id LIMIT 1) some_users_data ON (true)) ORDER BY some_recent_users.lastseen DESC LIMIT 10 -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, lastseen FROM (SELECT intermediate_result.user_id, intermediate_result.lastseen FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, lastseen timestamp without time zone)) some_users ORDER BY user_id DESC, lastseen DESC LIMIT 10 -DEBUG: Creating router plan - user_id | lastseen ---------------------------------------------------------------------- - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 -(10 rows) - +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from complex subqueries, CTEs or local tables SET citus.enable_repartition_joins to ON; SET client_min_messages TO DEBUG1; -- recursively planner since the inner JOIN is not on the partition key @@ -1590,23 +1562,8 @@ LIMIT 10; DEBUG: generating subplan XXX_1 for subquery SELECT user_id, value_1 FROM public.users_table users WHERE ((user_id OPERATOR(pg_catalog.>) 1) AND (user_id OPERATOR(pg_catalog.<) 4) AND (value_2 OPERATOR(pg_catalog.>) 3)) DEBUG: push down of limit count: 10 DEBUG: generating subplan XXX_2 for subquery SELECT filter_users_1.user_id, last_events_1."time" AS lastseen FROM ((SELECT user_where_1_1.user_id FROM ((SELECT users.user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.>) 1) AND (users.user_id OPERATOR(pg_catalog.<) 4) AND (users.value_1 OPERATOR(pg_catalog.>) 2))) user_where_1_1 JOIN (SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer)) user_where_1_join_1 ON ((user_where_1_1.user_id OPERATOR(pg_catalog.=) user_where_1_join_1.value_1)))) filter_users_1 JOIN LATERAL (SELECT events.user_id, events."time" FROM public.events_table events WHERE ((events.user_id OPERATOR(pg_catalog.>) 1) AND (events.user_id OPERATOR(pg_catalog.<) 4) AND (events.user_id OPERATOR(pg_catalog.=) filter_users_1.user_id)) ORDER BY events."time" DESC LIMIT 1) last_events_1 ON (true)) ORDER BY last_events_1."time" DESC LIMIT 10 -DEBUG: push down of limit count: 10 -DEBUG: generating subplan XXX_3 for subquery SELECT some_users_data.user_id, some_recent_users.lastseen FROM ((SELECT intermediate_result.user_id, intermediate_result.lastseen FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, lastseen timestamp without time zone)) some_recent_users JOIN LATERAL (SELECT users.user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.=) some_recent_users.user_id) AND (users.value_2 OPERATOR(pg_catalog.>) 4)) ORDER BY users.user_id LIMIT 1) some_users_data ON (true)) ORDER BY some_recent_users.lastseen DESC LIMIT 10 -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, lastseen FROM (SELECT intermediate_result.user_id, intermediate_result.lastseen FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, lastseen timestamp without time zone)) some_users ORDER BY user_id DESC, lastseen DESC LIMIT 10 - user_id | lastseen ---------------------------------------------------------------------- - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 -(10 rows) - +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from complex subqueries, CTEs or local tables SET citus.enable_repartition_joins to OFF; RESET client_min_messages; -- not supported since upper LATERAL JOIN is not equi join @@ -1716,20 +1673,8 @@ FROM ORDER BY user_id DESC, lastseen DESC LIMIT 10; - user_id | lastseen ---------------------------------------------------------------------- - 5 | Thu Nov 23 17:26:14.563216 2017 - 5 | Thu Nov 23 17:26:14.563216 2017 - 5 | Thu Nov 23 17:26:14.563216 2017 - 5 | Thu Nov 23 17:26:14.563216 2017 - 5 | Thu Nov 23 17:26:14.563216 2017 - 5 | Thu Nov 23 17:26:14.563216 2017 - 5 | Thu Nov 23 17:26:14.563216 2017 - 5 | Thu Nov 23 17:26:14.563216 2017 - 5 | Thu Nov 23 17:26:14.563216 2017 - 5 | Thu Nov 23 17:26:14.563216 2017 -(10 rows) - +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from complex subqueries, CTEs or local tables -- NESTED INNER JOINs SELECT count(*) AS value, "generated_group_field" @@ -2371,10 +2316,8 @@ FROM ORDER BY value_2 DESC, user_id DESC LIMIT 10; - user_id | value_2 ---------------------------------------------------------------------- -(0 rows) - +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from complex subqueries, CTEs or local tables -- lets test some unsupported set operations -- not supported since we use INTERSECT SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType diff --git a/src/test/regress/expected/unsupported_lateral_subqueries.out b/src/test/regress/expected/unsupported_lateral_subqueries.out new file mode 100644 index 000000000..9c613a199 --- /dev/null +++ b/src/test/regress/expected/unsupported_lateral_subqueries.out @@ -0,0 +1,208 @@ +CREATE SCHEMA unsupported_lateral_joins; +SET search_path TO unsupported_lateral_joins; +SET citus.shard_count TO 4; +SET citus.shard_replication_factor TO 1; +SET citus.next_shard_id TO 13354100; +CREATE TABLE test(x bigint, y bigint); +SELECT create_distributed_table('test','x'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE ref(a bigint, b bigint); +SELECT create_reference_table('ref'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +insert into test(x, y) SELECT 1, i FROM generate_series(1, 10) i; +insert into test(x, y) SELECT 3, i FROM generate_series(11, 40) i; +insert into test(x, y) SELECT i, 1 FROM generate_series(1, 10) i; +insert into test(x, y) SELECT i, 3 FROM generate_series(11, 40) i; +insert into ref(a, b) SELECT i, 1 FROM generate_series(1, 10) i; +insert into ref(a, b) SELECT i, 3 FROM generate_series(11, 40) i; +insert into ref(a, b) SELECT 1, i FROM generate_series(1, 10) i; +insert into ref(a, b) SELECT 3, i FROM generate_series(11, 40) i; +-- The following queries return wrong results when pushed down. Instead of +-- returning 2 rows, for each row in ref table. They would return (2 * number +-- of shards) rows for each row in the reference table. See issue #5327 +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref.a + LIMIT 2 + ) q; +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from a reference table (ref) +SELECT count(*) +FROM (VALUES (1), (3)) ref(a), + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref.a + LIMIT 2 + ) q; +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from a VALUES clause +WITH ref(a) as (select y from test) +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref.a + LIMIT 2 + ) q; +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from complex subqueries, CTEs or local tables +SELECT count(*) +FROM generate_series(1, 3) ref(a), + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref.a + LIMIT 2 + ) q; +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from a table function (ref) +SELECT count(*) +FROM (SELECT generate_series(1, 3)) ref(a), + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref.a + LIMIT 2 + ) q; +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from a subquery without FROM (ref) +-- make sure right error message is chosen +SELECT count(*) +FROM ref ref_table, + (VALUES (1), (3)) rec_values(a), + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref_table.a + LIMIT 2 + ) q; +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from a reference table (ref_table) +SELECT count(*) +FROM ref as ref_table, + (VALUES (1), (3)) ref_values(a), + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref_values.a + LIMIT 2 + ) q; +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from a VALUES clause +SELECT count(*) FROM + ref ref_outer, + LATERAL ( + SELECT * FROM + LATERAL ( SELECT * + FROM ref ref_inner, + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref_outer.a + LIMIT 2 + ) q + ) q2 + ) q3; +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from a reference table (ref_outer) +SELECT count(*) FROM + ref ref_outer, + LATERAL ( + SELECT * FROM + LATERAL ( SELECT * + FROM ref ref_inner, + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref_inner.a + LIMIT 2 + ) q + ) q2 + ) q3; +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from a reference table (ref_inner) +-- Since this only correlates on the distribution column, this can be safely +-- pushed down. But this is currently considered to hard to detect, so we fail. +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + test.x + FROM test + WHERE + test.x = ref.a + LIMIT 2 + ) q; +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from a reference table (ref) +-- Would require repartitioning to work with subqueries +SELECT count(*) +FROM test, + LATERAL ( + SELECT + test_2.x + FROM test test_2 + WHERE + test_2.x = test.y + LIMIT 2 + ) q ; +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +-- Too complex joins for Citus to handle currently +SELECT count(*) +FROM ref JOIN test on ref.b = test.x, + LATERAL ( + SELECT + test_2.x + FROM test test_2 + WHERE + test_2.x = ref.a + LIMIT 2 + ) q +; +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +-- Would require repartitioning to work with subqueries +SELECT count(*) +FROM ref JOIN test on ref.b = test.x, + LATERAL ( + SELECT + test_2.y + FROM test test_2 + WHERE + test_2.y = ref.a + LIMIT 2 + ) q +; +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +SET client_min_messages TO WARNING; +DROP SCHEMA unsupported_lateral_joins CASCADE; diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule index 48e166deb..088a7b8bd 100644 --- a/src/test/regress/multi_schedule +++ b/src/test/regress/multi_schedule @@ -80,7 +80,7 @@ test: multi_array_agg multi_limit_clause multi_orderby_limit_pushdown test: multi_jsonb_agg multi_jsonb_object_agg multi_json_agg multi_json_object_agg bool_agg ch_bench_having chbenchmark_all_queries expression_reference_join anonymous_columns test: ch_bench_subquery_repartition test: multi_agg_type_conversion multi_count_type_conversion recursive_relation_planning_restriction_pushdown -test: multi_partition_pruning single_hash_repartition_join +test: multi_partition_pruning single_hash_repartition_join unsupported_lateral_subqueries test: multi_join_pruning multi_hash_pruning intermediate_result_pruning test: multi_null_minmax_value_pruning cursors test: modification_correctness diff --git a/src/test/regress/sql/distributed_planning.sql b/src/test/regress/sql/distributed_planning.sql index b388ad390..b19654ff4 100644 --- a/src/test/regress/sql/distributed_planning.sql +++ b/src/test/regress/sql/distributed_planning.sql @@ -359,3 +359,313 @@ SELECT count(*), event FROM date_part_table WHERE event_time > '2020-01-05' GROU SELECT count(*), event FROM date_part_table WHERE user_id = 12 AND event_time = '2020-01-12 12:00:00' GROUP BY event ORDER BY count(*) DESC, event DESC LIMIT 5; SELECT count(*), t1.event FROM date_part_table t1 JOIN date_part_table t2 USING (user_id) WHERE t1.user_id = 1 AND t2.event_time > '2020-01-03' GROUP BY t1.event ORDER BY count(*) DESC, t1.event DESC LIMIT 5; +TRUNCATE test; +TRUNCATE ref; +insert into test(x, y) SELECT 1, i FROM generate_series(1, 10) i; +insert into test(x, y) SELECT 3, i FROM generate_series(11, 40) i; +insert into test(x, y) SELECT i, 1 FROM generate_series(1, 10) i; +insert into test(x, y) SELECT i, 3 FROM generate_series(11, 40) i; + +insert into ref(a, b) SELECT i, 1 FROM generate_series(1, 10) i; +insert into ref(a, b) SELECT i, 3 FROM generate_series(11, 40) i; +insert into ref(a, b) SELECT 1, i FROM generate_series(1, 10) i; +insert into ref(a, b) SELECT 3, i FROM generate_series(11, 40) i; + +SELECT count(*) +FROM test, + LATERAL ( + SELECT + ref.a + FROM ref + WHERE + ref.b = test.x + LIMIT 2 + ) q; + +SELECT count(*) +FROM test, + LATERAL ( + SELECT + ref.a + FROM ref + WHERE + ref.b = test.y + LIMIT 2 + ) q; + +-- Since the only correlates on the distribution column, this can be safely +-- pushed down. But this is currently considered to hard to detect, so we fail. +-- +-- SELECT count(*) +-- FROM ref, +-- LATERAL ( +-- SELECT +-- test.x +-- FROM test +-- WHERE +-- test.x = ref.a +-- LIMIT 2 +-- ) q; + +-- This returns wrong results when pushed down. Instead of returning 2 rows, +-- for each row in the reference table. It would return (2 * number of shards) +-- rows for each row in the reference table. +-- See issue #5327 +-- +-- SELECT count(*) +-- FROM ref, +-- LATERAL ( +-- SELECT +-- test.y +-- FROM test +-- WHERE +-- test.y = ref.a +-- LIMIT 2 +-- ) q; + +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + ref_2.b y + FROM ref ref_2 + WHERE + ref_2.b = ref.a + LIMIT 2 + ) q; + +SELECT count(*) +FROM test, + LATERAL ( + SELECT + test_2.y + FROM test test_2 + WHERE + test_2.x = test.x + LIMIT 2 + ) q; + +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + ref_2.b y + FROM ref ref_2 + WHERE + ref_2.b = ref.a + LIMIT 2 + ) q JOIN test ON test.x = q.y; + +-- Would require repartitioning to work with subqueries +-- +-- SELECT count(*) +-- FROM test, +-- LATERAL ( +-- SELECT +-- test_2.x +-- FROM test test_2 +-- WHERE +-- test_2.x = test.y +-- LIMIT 2 +-- ) q ; + +SELECT count(*) +FROM ref JOIN test on ref.b = test.x, + LATERAL ( + SELECT + ref_2.b y + FROM ref ref_2 + WHERE + ref_2.b = ref.a + LIMIT 2 + ) q +; + +SELECT count(*) +FROM ref JOIN test on ref.b = test.y, + LATERAL ( + SELECT + ref_2.b y + FROM ref ref_2 + WHERE + ref_2.b = ref.a + LIMIT 2 + ) q +; + +-- Too complex joins for Citus to handle currently +-- +-- SELECT count(*) +-- FROM ref JOIN test on ref.b = test.x, +-- LATERAL ( +-- SELECT +-- test_2.x +-- FROM test test_2 +-- WHERE +-- test_2.x = ref.a +-- LIMIT 2 +-- ) q +-- ; + +-- Would require repartitioning to work with subqueries +-- +-- SELECT count(*) +-- FROM ref JOIN test on ref.b = test.x, +-- LATERAL ( +-- SELECT +-- test_2.y +-- FROM test test_2 +-- WHERE +-- test_2.y = ref.a +-- LIMIT 2 +-- ) q +-- ; + +SELECT count(*) +FROM ref JOIN test on ref.b = test.x, + LATERAL ( + SELECT + test_2.x + FROM test test_2 + WHERE + test_2.x = test.x + LIMIT 2 + ) q +; + +-- Without LIMIT clauses +SELECT count(*) +FROM test, + LATERAL ( + SELECT + ref.a + FROM ref + WHERE + ref.b = test.x + ) q; + +SELECT count(*) +FROM test, + LATERAL ( + SELECT + ref.a + FROM ref + WHERE + ref.b = test.y + ) q; + +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + test.x + FROM test + WHERE + test.x = ref.a + ) q; + +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref.a + ) q; + +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + ref_2.b y + FROM ref ref_2 + WHERE + ref_2.b = ref.a + ) q; + +SELECT count(*) +FROM test, + LATERAL ( + SELECT + test_2.y + FROM test test_2 + WHERE + test_2.x = test.x + ) q; + +SELECT count(*) +FROM test, + LATERAL ( + SELECT + test_2.x + FROM test test_2 + WHERE + test_2.x = test.y + ) q ; + +SELECT count(*) +FROM ref JOIN test on ref.b = test.x, + LATERAL ( + SELECT + ref_2.b y + FROM ref ref_2 + WHERE + ref_2.b = ref.a + ) q +; + +SELECT count(*) +FROM ref JOIN test on ref.b = test.y, + LATERAL ( + SELECT + ref_2.b y + FROM ref ref_2 + WHERE + ref_2.b = ref.a + ) q +; + +SELECT count(*) +FROM ref JOIN test on ref.b = test.x, + LATERAL ( + SELECT + test_2.x + FROM test test_2 + WHERE + test_2.x = ref.a + ) q +; + +SELECT count(*) +FROM ref JOIN test on ref.b = test.x, + LATERAL ( + SELECT + test_2.y + FROM test test_2 + WHERE + test_2.y = ref.a + ) q +; + +SELECT count(*) +FROM ref JOIN test on ref.b = test.x, + LATERAL ( + SELECT + test_2.x + FROM test test_2 + WHERE + test_2.x = test.x + ) q +; + +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + ref_2.b y + FROM ref ref_2 + WHERE + ref_2.b = ref.a + ) q JOIN test ON test.x = q.y; + diff --git a/src/test/regress/sql/unsupported_lateral_subqueries.sql b/src/test/regress/sql/unsupported_lateral_subqueries.sql new file mode 100644 index 000000000..3e33f107d --- /dev/null +++ b/src/test/regress/sql/unsupported_lateral_subqueries.sql @@ -0,0 +1,197 @@ +CREATE SCHEMA unsupported_lateral_joins; +SET search_path TO unsupported_lateral_joins; +SET citus.shard_count TO 4; +SET citus.shard_replication_factor TO 1; +SET citus.next_shard_id TO 13354100; + +CREATE TABLE test(x bigint, y bigint); +SELECT create_distributed_table('test','x'); + +CREATE TABLE ref(a bigint, b bigint); +SELECT create_reference_table('ref'); + +insert into test(x, y) SELECT 1, i FROM generate_series(1, 10) i; +insert into test(x, y) SELECT 3, i FROM generate_series(11, 40) i; +insert into test(x, y) SELECT i, 1 FROM generate_series(1, 10) i; +insert into test(x, y) SELECT i, 3 FROM generate_series(11, 40) i; + +insert into ref(a, b) SELECT i, 1 FROM generate_series(1, 10) i; +insert into ref(a, b) SELECT i, 3 FROM generate_series(11, 40) i; +insert into ref(a, b) SELECT 1, i FROM generate_series(1, 10) i; +insert into ref(a, b) SELECT 3, i FROM generate_series(11, 40) i; + +-- The following queries return wrong results when pushed down. Instead of +-- returning 2 rows, for each row in ref table. They would return (2 * number +-- of shards) rows for each row in the reference table. See issue #5327 +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref.a + LIMIT 2 + ) q; + +SELECT count(*) +FROM (VALUES (1), (3)) ref(a), + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref.a + LIMIT 2 + ) q; + +WITH ref(a) as (select y from test) +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref.a + LIMIT 2 + ) q; + +SELECT count(*) +FROM generate_series(1, 3) ref(a), + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref.a + LIMIT 2 + ) q; + +SELECT count(*) +FROM (SELECT generate_series(1, 3)) ref(a), + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref.a + LIMIT 2 + ) q; + + +-- make sure right error message is chosen +SELECT count(*) +FROM ref ref_table, + (VALUES (1), (3)) rec_values(a), + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref_table.a + LIMIT 2 + ) q; + +SELECT count(*) +FROM ref as ref_table, + (VALUES (1), (3)) ref_values(a), + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref_values.a + LIMIT 2 + ) q; + +SELECT count(*) FROM + ref ref_outer, + LATERAL ( + SELECT * FROM + LATERAL ( SELECT * + FROM ref ref_inner, + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref_outer.a + LIMIT 2 + ) q + ) q2 + ) q3; + +SELECT count(*) FROM + ref ref_outer, + LATERAL ( + SELECT * FROM + LATERAL ( SELECT * + FROM ref ref_inner, + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref_inner.a + LIMIT 2 + ) q + ) q2 + ) q3; + + + + +-- Since this only correlates on the distribution column, this can be safely +-- pushed down. But this is currently considered to hard to detect, so we fail. +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + test.x + FROM test + WHERE + test.x = ref.a + LIMIT 2 + ) q; + +-- Would require repartitioning to work with subqueries +SELECT count(*) +FROM test, + LATERAL ( + SELECT + test_2.x + FROM test test_2 + WHERE + test_2.x = test.y + LIMIT 2 + ) q ; + +-- Too complex joins for Citus to handle currently +SELECT count(*) +FROM ref JOIN test on ref.b = test.x, + LATERAL ( + SELECT + test_2.x + FROM test test_2 + WHERE + test_2.x = ref.a + LIMIT 2 + ) q +; + +-- Would require repartitioning to work with subqueries +SELECT count(*) +FROM ref JOIN test on ref.b = test.x, + LATERAL ( + SELECT + test_2.y + FROM test test_2 + WHERE + test_2.y = ref.a + LIMIT 2 + ) q +; + +SET client_min_messages TO WARNING; +DROP SCHEMA unsupported_lateral_joins CASCADE;