diff --git a/src/backend/distributed/planner/query_pushdown_planning.c b/src/backend/distributed/planner/query_pushdown_planning.c index f78cec339..5ad7887e9 100644 --- a/src/backend/distributed/planner/query_pushdown_planning.c +++ b/src/backend/distributed/planner/query_pushdown_planning.c @@ -45,6 +45,8 @@ #include "parser/parsetree.h" +#define INVALID_RELID -1 + /* * RecurringTuplesType is used to distinguish different types of expressions * that always produce the same set of tuples when a shard is queried. We make @@ -61,6 +63,17 @@ typedef enum RecurringTuplesType RECURRING_TUPLES_VALUES } RecurringTuplesType; +/* + * RelidsReferenceWalkerContext is used to find Vars in a (sub)query that + * refer to certain relids from the upper query. + */ +typedef struct RelidsReferenceWalkerContext +{ + int level; + Relids relids; + int foundRelid; +} RelidsReferenceWalkerContext; + /* Config variable managed via guc.c */ bool SubqueryPushdown = false; /* is subquery pushdown enabled */ @@ -76,7 +89,9 @@ static RecurringTuplesType FromClauseRecurringTupleType(Query *queryTree); static DeferredErrorMessage * DeferredErrorIfUnsupportedRecurringTuplesJoin( PlannerRestrictionContext *plannerRestrictionContext); static DeferredErrorMessage * DeferErrorIfUnsupportedTableCombination(Query *queryTree); -static DeferredErrorMessage * DeferErrorIfSubqueryRequiresMerge(Query *subqueryTree); +static DeferredErrorMessage * DeferErrorIfSubqueryRequiresMerge(Query *subqueryTree, bool + lateral, + char *referencedThing); static bool ExtractSetOperationStatementWalker(Node *node, List **setOperationList); static RecurringTuplesType FetchFirstRecurType(PlannerInfo *plannerInfo, Relids relids); @@ -90,7 +105,12 @@ static List * CreateSubqueryTargetListAndAdjustVars(List *columnList); static AttrNumber FindResnoForVarInTargetList(List *targetList, int varno, int varattno); static bool RelationInfoContainsOnlyRecurringTuples(PlannerInfo *plannerInfo, Relids relids); +static DeferredErrorMessage * DeferredErrorIfUnsupportedLateralSubquery( + PlannerInfo *plannerInfo, Relids recurringRelIds, Relids nonRecurringRelIds); static Var * PartitionColumnForPushedDownSubquery(Query *query); +static bool ContainsReferencesToRelids(Query *query, Relids relids, int *foundRelid); +static bool ContainsReferencesToRelidsWalker(Node *node, + RelidsReferenceWalkerContext *context); /* @@ -844,6 +864,49 @@ DeferredErrorIfUnsupportedRecurringTuplesJoin( break; } } + else if (joinType == JOIN_INNER && plannerInfo->hasLateralRTEs) + { + /* + * Sometimes we cannot push down INNER JOINS when they have only + * recurring tuples on one side and a lateral on the other side. + * See comment on DeferredErrorIfUnsupportedLateralSubquery for + * details. + * + * When planning inner joins postgres can move RTEs from left to + * right and from right to left. So we don't know on which side the + * lateral join wil appear. Thus we try to find a side of the join + * that only contains recurring tuples. And then we check the other + * side to see if it contains an unsupported lateral join. + * + */ + if (RelationInfoContainsOnlyRecurringTuples(plannerInfo, innerrelRelids)) + { + DeferredErrorMessage *deferredError = + DeferredErrorIfUnsupportedLateralSubquery(plannerInfo, + innerrelRelids, + outerrelRelids); + if (deferredError) + { + return deferredError; + } + } + else if (RelationInfoContainsOnlyRecurringTuples(plannerInfo, outerrelRelids)) + { + /* + * This branch uses "else if" instead of "if", because if both + * sides contain only recurring tuples there will never be an + * unsupported lateral subquery. + */ + DeferredErrorMessage *deferredError = + DeferredErrorIfUnsupportedLateralSubquery(plannerInfo, + outerrelRelids, + innerrelRelids); + if (deferredError) + { + return deferredError; + } + } + } } if (recurType == RECURRING_TUPLES_REFERENCE_TABLE) @@ -950,7 +1013,8 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi */ if (!ContainsReferencesToOuterQuery(subqueryTree)) { - deferredError = DeferErrorIfSubqueryRequiresMerge(subqueryTree); + deferredError = DeferErrorIfSubqueryRequiresMerge(subqueryTree, false, + "another query"); if (deferredError) { return deferredError; @@ -1028,24 +1092,29 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi * column, etc.). */ static DeferredErrorMessage * -DeferErrorIfSubqueryRequiresMerge(Query *subqueryTree) +DeferErrorIfSubqueryRequiresMerge(Query *subqueryTree, bool lateral, + char *referencedThing) { bool preconditionsSatisfied = true; char *errorDetail = NULL; + char *lateralString = lateral ? "lateral " : ""; + if (subqueryTree->limitOffset) { preconditionsSatisfied = false; - errorDetail = "Offset clause is currently unsupported when a subquery " - "references a column from another query"; + errorDetail = psprintf("Offset clause is currently unsupported when a %ssubquery " + "references a column from %s", lateralString, + referencedThing); } /* limit is not supported when SubqueryPushdown is not set */ if (subqueryTree->limitCount && !SubqueryPushdown) { preconditionsSatisfied = false; - errorDetail = "Limit in subquery is currently unsupported when a " - "subquery references a column from another query"; + errorDetail = psprintf("Limit clause is currently unsupported when a " + "%ssubquery references a column from %s", lateralString, + referencedThing); } /* group clause list must include partition column */ @@ -1060,9 +1129,9 @@ DeferErrorIfSubqueryRequiresMerge(Query *subqueryTree) if (!groupOnPartitionColumn) { preconditionsSatisfied = false; - errorDetail = "Group by list without partition column is currently " - "unsupported when a subquery references a column " - "from another query"; + errorDetail = psprintf("Group by list without partition column is currently " + "unsupported when a %ssubquery references a column " + "from %s", lateralString, referencedThing); } } @@ -1070,17 +1139,18 @@ DeferErrorIfSubqueryRequiresMerge(Query *subqueryTree) if (subqueryTree->hasAggs && (subqueryTree->groupClause == NULL)) { preconditionsSatisfied = false; - errorDetail = "Aggregates without group by are currently unsupported " - "when a subquery references a column from another query"; + errorDetail = psprintf("Aggregates without group by are currently unsupported " + "when a %ssubquery references a column from %s", + lateralString, referencedThing); } /* having clause without group by on partition column is not supported */ if (subqueryTree->havingQual && (subqueryTree->groupClause == NULL)) { preconditionsSatisfied = false; - errorDetail = "Having qual without group by on partition column is " - "currently unsupported when a subquery references " - "a column from another query"; + errorDetail = psprintf("Having qual without group by on partition column is " + "currently unsupported when a %ssubquery references " + "a column from %s", lateralString, referencedThing); } /* @@ -1397,6 +1467,259 @@ RelationInfoContainsOnlyRecurringTuples(PlannerInfo *plannerInfo, Relids relids) } +/* + * RecurringTypeDescription returns a discriptive string for the given + * recurType. This string can be used in error messages to help the users + * understand why a query cannot be planned. + */ +static char * +RecurringTypeDescription(RecurringTuplesType recurType) +{ + switch (recurType) + { + case RECURRING_TUPLES_REFERENCE_TABLE: + { + return "a reference table"; + } + + case RECURRING_TUPLES_FUNCTION: + { + return "a table function"; + } + + case RECURRING_TUPLES_EMPTY_JOIN_TREE: + { + return "a subquery without FROM"; + } + + case RECURRING_TUPLES_RESULT_FUNCTION: + { + return "complex subqueries, CTEs or local tables"; + } + + case RECURRING_TUPLES_VALUES: + { + return "a VALUES clause"; + } + + case RECURRING_TUPLES_INVALID: + { + /* + * This branch should never be hit, but it's here just in case it + * happens. + */ + return "an unknown recurring tuple"; + } + } + + /* + * This should never be hit, but is needed to fix compiler warnings. + */ + return "an unknown recurring tuple"; +} + + +/* + * ContainsReferencesToRelids determines whether the given query contains + * any references that point to columns of the given relids. The given relids + * should be from exactly one query level above the given query. + * + * If the function returns true, then foundRelid is set to the first relid that + * was referenced. + * + * There are some queries where it cannot easily be determined if the relids + * are used, e.g because the query contains placeholder vars. In those cases + * this function returns true, because it's better to error out than to return + * wrong results. But in these cases foundRelid is set to INVALID_RELID. + */ +static bool +ContainsReferencesToRelids(Query *query, Relids relids, int *foundRelid) +{ + RelidsReferenceWalkerContext context = { 0 }; + context.level = 1; + context.relids = relids; + context.foundRelid = INVALID_RELID; + int flags = 0; + + if (query_tree_walker(query, ContainsReferencesToRelidsWalker, + &context, flags)) + { + *foundRelid = context.foundRelid; + return true; + } + return false; +} + + +/* + * ContainsReferencesToRelidsWalker determines whether the given query + * contains any Vars that reference the relids in the context. + * + * ContainsReferencesToRelidsWalker recursively descends into subqueries + * and increases the level by 1 before recursing. + */ +static bool +ContainsReferencesToRelidsWalker(Node *node, RelidsReferenceWalkerContext *context) +{ + if (node == NULL) + { + return false; + } + + if (IsA(node, Var)) + { + Var *var = (Var *) node; + if (var->varlevelsup == context->level && bms_is_member(var->varno, + context->relids)) + { + context->foundRelid = var->varno; + return true; + } + + return false; + } + else if (IsA(node, Aggref)) + { + if (((Aggref *) node)->agglevelsup > context->level) + { + /* + * TODO: Only return true when aggref points to an aggregate that + * uses vars from a recurring tuple. + */ + return true; + } + } + else if (IsA(node, GroupingFunc)) + { + if (((GroupingFunc *) node)->agglevelsup > context->level) + { + /* + * TODO: Only return true when groupingfunc points to a grouping + * func that uses vars from a recurring tuple. + */ + return true; + } + + return false; + } + else if (IsA(node, PlaceHolderVar)) + { + if (((PlaceHolderVar *) node)->phlevelsup > context->level) + { + /* + * TODO: Only return true when aggref points to a placeholdervar + * that uses vars from a recurring tuple. + */ + return true; + } + } + else if (IsA(node, Query)) + { + Query *query = (Query *) node; + int flags = 0; + + context->level += 1; + bool found = query_tree_walker(query, ContainsReferencesToRelidsWalker, + context, flags); + context->level -= 1; + + return found; + } + + return expression_tree_walker(node, ContainsReferencesToRelidsWalker, + context); +} + + +/* + * DeferredErrorIfUnsupportedLateralSubquery returns true if + * notFullyRecurringRelids contains a lateral subquery that we do not support. + * + * If there is an inner join with a lateral subquery we cannot + * push it down when the following properties all hold: + * 1. The lateral subquery contains some non recurring tuples + * 2. The lateral subquery references a recurring tuple from + * outside of the subquery (recurringRelids) + * 3. The lateral subquery requires a merge step (e.g. a LIMIT) + * 4. The reference to the recurring tuple should be something else than an + * equality check on the distribution column, e.g. equality on a non + * distribution column. + * + * Property number four is considered both hard to detect and + * probably not used very often, so we only check for 1, 2 and 3. + */ +static DeferredErrorMessage * +DeferredErrorIfUnsupportedLateralSubquery(PlannerInfo *plannerInfo, + Relids recurringRelids, + Relids notFullyRecurringRelids) +{ + int relationId = -1; + while ((relationId = bms_next_member(notFullyRecurringRelids, relationId)) >= 0) + { + RangeTblEntry *rangeTableEntry = plannerInfo->simple_rte_array[relationId]; + + if (!rangeTableEntry->lateral) + { + continue; + } + + /* TODO: What about others kinds? */ + if (rangeTableEntry->rtekind == RTE_SUBQUERY) + { + /* property number 1, contains non-recurring tuples */ + if (!FindNodeMatchingCheckFunctionInRangeTableList( + list_make1(rangeTableEntry), IsDistributedTableRTE)) + { + continue; + } + + /* property number 2, references recurring tuple */ + int recurringRelid = INVALID_RELID; + if (!ContainsReferencesToRelids(rangeTableEntry->subquery, recurringRelids, + &recurringRelid)) + { + continue; + } + + char *recurTypeDescription = + "an aggregate, grouping func or placeholder var coming from the outer query"; + if (recurringRelid != INVALID_RELID) + { + RangeTblEntry *recurringRangeTableEntry = + plannerInfo->simple_rte_array[recurringRelid]; + RecurringTuplesType recurType = RECURRING_TUPLES_INVALID; + ContainsRecurringRTE(recurringRangeTableEntry, &recurType); + recurTypeDescription = RecurringTypeDescription(recurType); + + /* + * Add the alias for all recuring tuples where it is useful to + * see them. We don't add it for VALUES and intermediate + * results, because there the aliases are currently hardcoded + * strings anyway. + */ + if (recurType != RECURRING_TUPLES_VALUES && + recurType != RECURRING_TUPLES_RESULT_FUNCTION) + { + recurTypeDescription = psprintf("%s (%s)", recurTypeDescription, + recurringRangeTableEntry->eref-> + aliasname); + } + } + + /* property number 3, has a merge step */ + DeferredErrorMessage *deferredError = DeferErrorIfSubqueryRequiresMerge( + rangeTableEntry->subquery, true, recurTypeDescription); + if (deferredError) + { + return deferredError; + } + } + } + + return NULL; +} + + /* * FetchFirstRecurType checks whether the relationInfo * contains any recurring table expression, namely a reference table, diff --git a/src/test/regress/expected/distributed_planning.out b/src/test/regress/expected/distributed_planning.out index 42dac188d..f05e46d3a 100644 --- a/src/test/regress/expected/distributed_planning.out +++ b/src/test/regress/expected/distributed_planning.out @@ -831,3 +831,390 @@ SELECT count(*), t1.event FROM date_part_table t1 JOIN date_part_table t2 USING 4 | 1 (5 rows) +TRUNCATE test; +TRUNCATE ref; +insert into test(x, y) SELECT 1, i FROM generate_series(1, 10) i; +insert into test(x, y) SELECT 3, i FROM generate_series(11, 40) i; +insert into test(x, y) SELECT i, 1 FROM generate_series(1, 10) i; +insert into test(x, y) SELECT i, 3 FROM generate_series(11, 40) i; +insert into ref(a, b) SELECT i, 1 FROM generate_series(1, 10) i; +insert into ref(a, b) SELECT i, 3 FROM generate_series(11, 40) i; +insert into ref(a, b) SELECT 1, i FROM generate_series(1, 10) i; +insert into ref(a, b) SELECT 3, i FROM generate_series(11, 40) i; +SELECT count(*) +FROM test, + LATERAL ( + SELECT + ref.a + FROM ref + WHERE + ref.b = test.x + LIMIT 2 + ) q; + count +--------------------------------------------------------------------- + 122 +(1 row) + +SELECT count(*) +FROM test, + LATERAL ( + SELECT + ref.a + FROM ref + WHERE + ref.b = test.y + LIMIT 2 + ) q; + count +--------------------------------------------------------------------- + 122 +(1 row) + +-- Since the only correlates on the distribution column, this can be safely +-- pushed down. But this is currently considered to hard to detect, so we fail. +-- +-- SELECT count(*) +-- FROM ref, +-- LATERAL ( +-- SELECT +-- test.x +-- FROM test +-- WHERE +-- test.x = ref.a +-- LIMIT 2 +-- ) q; +-- This returns wrong results when pushed down. Instead of returning 2 rows, +-- for each row in the reference table. It would return (2 * number of shards) +-- rows for each row in the reference table. +-- See issue #5327 +-- +-- SELECT count(*) +-- FROM ref, +-- LATERAL ( +-- SELECT +-- test.y +-- FROM test +-- WHERE +-- test.y = ref.a +-- LIMIT 2 +-- ) q; +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + ref_2.b y + FROM ref ref_2 + WHERE + ref_2.b = ref.a + LIMIT 2 + ) q; + count +--------------------------------------------------------------------- + 122 +(1 row) + +SELECT count(*) +FROM test, + LATERAL ( + SELECT + test_2.y + FROM test test_2 + WHERE + test_2.x = test.x + LIMIT 2 + ) q; + count +--------------------------------------------------------------------- + 122 +(1 row) + +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + ref_2.b y + FROM ref ref_2 + WHERE + ref_2.b = ref.a + LIMIT 2 + ) q JOIN test ON test.x = q.y; + count +--------------------------------------------------------------------- + 2202 +(1 row) + +-- Would require repartitioning to work with subqueries +-- +-- SELECT count(*) +-- FROM test, +-- LATERAL ( +-- SELECT +-- test_2.x +-- FROM test test_2 +-- WHERE +-- test_2.x = test.y +-- LIMIT 2 +-- ) q ; +SELECT count(*) +FROM ref JOIN test on ref.b = test.x, + LATERAL ( + SELECT + ref_2.b y + FROM ref ref_2 + WHERE + ref_2.b = ref.a + LIMIT 2 + ) q +; + count +--------------------------------------------------------------------- + 1222 +(1 row) + +SELECT count(*) +FROM ref JOIN test on ref.b = test.y, + LATERAL ( + SELECT + ref_2.b y + FROM ref ref_2 + WHERE + ref_2.b = ref.a + LIMIT 2 + ) q +; + count +--------------------------------------------------------------------- + 1222 +(1 row) + +-- Too complex joins for Citus to handle currently +-- +-- SELECT count(*) +-- FROM ref JOIN test on ref.b = test.x, +-- LATERAL ( +-- SELECT +-- test_2.x +-- FROM test test_2 +-- WHERE +-- test_2.x = ref.a +-- LIMIT 2 +-- ) q +-- ; +-- Would require repartitioning to work with subqueries +-- +-- SELECT count(*) +-- FROM ref JOIN test on ref.b = test.x, +-- LATERAL ( +-- SELECT +-- test_2.y +-- FROM test test_2 +-- WHERE +-- test_2.y = ref.a +-- LIMIT 2 +-- ) q +-- ; +SELECT count(*) +FROM ref JOIN test on ref.b = test.x, + LATERAL ( + SELECT + test_2.x + FROM test test_2 + WHERE + test_2.x = test.x + LIMIT 2 + ) q +; + count +--------------------------------------------------------------------- + 2202 +(1 row) + +-- Without LIMIT clauses +SELECT count(*) +FROM test, + LATERAL ( + SELECT + ref.a + FROM ref + WHERE + ref.b = test.x + ) q; + count +--------------------------------------------------------------------- + 1120 +(1 row) + +SELECT count(*) +FROM test, + LATERAL ( + SELECT + ref.a + FROM ref + WHERE + ref.b = test.y + ) q; + count +--------------------------------------------------------------------- + 1120 +(1 row) + +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + test.x + FROM test + WHERE + test.x = ref.a + ) q; + count +--------------------------------------------------------------------- + 1120 +(1 row) + +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref.a + ) q; + count +--------------------------------------------------------------------- + 1120 +(1 row) + +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + ref_2.b y + FROM ref ref_2 + WHERE + ref_2.b = ref.a + ) q; + count +--------------------------------------------------------------------- + 1120 +(1 row) + +SELECT count(*) +FROM test, + LATERAL ( + SELECT + test_2.y + FROM test test_2 + WHERE + test_2.x = test.x + ) q; + count +--------------------------------------------------------------------- + 1120 +(1 row) + +SELECT count(*) +FROM test, + LATERAL ( + SELECT + test_2.x + FROM test test_2 + WHERE + test_2.x = test.y + ) q ; + count +--------------------------------------------------------------------- + 1120 +(1 row) + +SELECT count(*) +FROM ref JOIN test on ref.b = test.x, + LATERAL ( + SELECT + ref_2.b y + FROM ref ref_2 + WHERE + ref_2.b = ref.a + ) q +; + count +--------------------------------------------------------------------- + 2960 +(1 row) + +SELECT count(*) +FROM ref JOIN test on ref.b = test.y, + LATERAL ( + SELECT + ref_2.b y + FROM ref ref_2 + WHERE + ref_2.b = ref.a + ) q +; + count +--------------------------------------------------------------------- + 2960 +(1 row) + +SELECT count(*) +FROM ref JOIN test on ref.b = test.x, + LATERAL ( + SELECT + test_2.x + FROM test test_2 + WHERE + test_2.x = ref.a + ) q +; + count +--------------------------------------------------------------------- + 2960 +(1 row) + +SELECT count(*) +FROM ref JOIN test on ref.b = test.x, + LATERAL ( + SELECT + test_2.y + FROM test test_2 + WHERE + test_2.y = ref.a + ) q +; + count +--------------------------------------------------------------------- + 2960 +(1 row) + +SELECT count(*) +FROM ref JOIN test on ref.b = test.x, + LATERAL ( + SELECT + test_2.x + FROM test test_2 + WHERE + test_2.x = test.x + ) q +; + count +--------------------------------------------------------------------- + 31160 +(1 row) + +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + ref_2.b y + FROM ref ref_2 + WHERE + ref_2.b = ref.a + ) q JOIN test ON test.x = q.y; + count +--------------------------------------------------------------------- + 31160 +(1 row) + diff --git a/src/test/regress/expected/local_table_join.out b/src/test/regress/expected/local_table_join.out index f2abb1066..effe23b0d 100644 --- a/src/test/regress/expected/local_table_join.out +++ b/src/test/regress/expected/local_table_join.out @@ -1392,11 +1392,8 @@ DEBUG: generating subplan XXX_1 for subquery SELECT true AS bool FROM pg_am LIM DEBUG: Wrapping relation "custom_pg_type" to a subquery DEBUG: generating subplan XXX_2 for subquery SELECT typdefault FROM local_table_join.custom_pg_type WHERE true DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT typdefault FROM (SELECT subq_1.typdefault FROM (SELECT custom_pg_type.typdefault FROM (SELECT custom_pg_type_1.typdefault FROM (SELECT intermediate_result.typdefault FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(typdefault text)) custom_pg_type_1) custom_pg_type, LATERAL (SELECT tbl.a FROM local_table_join.tbl WHERE (custom_pg_type.typdefault OPERATOR(pg_catalog.>) 'a'::text) LIMIT 1) subq_0 WHERE (SELECT intermediate_result.bool FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(bool boolean))) subq_1) subq_2 - typdefault ---------------------------------------------------------------------- - b -(1 row) - +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from complex subqueries, CTEs or local tables -- Not supported because of 4470 select typdefault from ( select typdefault from ( diff --git a/src/test/regress/expected/multi_subquery_complex_queries.out b/src/test/regress/expected/multi_subquery_complex_queries.out index 5432f27c4..3d91da2a3 100644 --- a/src/test/regress/expected/multi_subquery_complex_queries.out +++ b/src/test/regress/expected/multi_subquery_complex_queries.out @@ -1274,16 +1274,8 @@ FROM ORDER BY user_id limit 50; - user_id | lastseen ---------------------------------------------------------------------- - 1 | Thu Nov 23 18:08:26.550729 2017 - 2 | Thu Nov 23 17:26:14.563216 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 17:26:14.563216 2017 - 5 | Thu Nov 23 17:26:14.563216 2017 - 6 | Thu Nov 23 18:08:26.550729 2017 -(6 rows) - +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from complex subqueries, CTEs or local tables SELECT "some_users_data".user_id, lastseen FROM (SELECT 2 * user_id as user_id, max(time) AS lastseen @@ -1316,12 +1308,8 @@ FROM ORDER BY user_id limit 50; - user_id | lastseen ---------------------------------------------------------------------- - 4 | Thu Nov 23 17:26:14.563216 2017 - 6 | Thu Nov 23 18:08:26.550729 2017 -(2 rows) - +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from complex subqueries, CTEs or local tables -- LATERAL JOINs used with INNER JOINs SET citus.subquery_pushdown to ON; NOTICE: Setting citus.subquery_pushdown flag is discouraged becuase it forces the planner to pushdown certain queries, skipping relevant correctness checks. @@ -1514,24 +1502,8 @@ DEBUG: Router planner cannot handle multi-shard select queries DEBUG: push down of limit count: 10 DEBUG: generating subplan XXX_2 for subquery SELECT filter_users_1.user_id, last_events_1."time" AS lastseen FROM ((SELECT user_where_1_1.user_id FROM ((SELECT users.user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.>) 1) AND (users.user_id OPERATOR(pg_catalog.<) 4) AND (users.value_1 OPERATOR(pg_catalog.>) 2))) user_where_1_1 JOIN (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) user_where_1_join_1 ON ((user_where_1_1.user_id OPERATOR(pg_catalog.<>) user_where_1_join_1.user_id)))) filter_users_1 JOIN LATERAL (SELECT events.user_id, events."time" FROM public.events_table events WHERE ((events.user_id OPERATOR(pg_catalog.>) 1) AND (events.user_id OPERATOR(pg_catalog.<) 4) AND (events.user_id OPERATOR(pg_catalog.=) filter_users_1.user_id)) ORDER BY events."time" DESC LIMIT 1) last_events_1 ON (true)) ORDER BY last_events_1."time" DESC LIMIT 10 DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: push down of limit count: 10 -DEBUG: generating subplan XXX_3 for subquery SELECT some_users_data.user_id, some_recent_users.lastseen FROM ((SELECT intermediate_result.user_id, intermediate_result.lastseen FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, lastseen timestamp without time zone)) some_recent_users JOIN LATERAL (SELECT users.user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.=) some_recent_users.user_id) AND (users.value_2 OPERATOR(pg_catalog.>) 4)) ORDER BY users.user_id LIMIT 1) some_users_data ON (true)) ORDER BY some_recent_users.lastseen DESC LIMIT 10 -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, lastseen FROM (SELECT intermediate_result.user_id, intermediate_result.lastseen FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, lastseen timestamp without time zone)) some_users ORDER BY user_id DESC, lastseen DESC LIMIT 10 -DEBUG: Creating router plan - user_id | lastseen ---------------------------------------------------------------------- - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 -(10 rows) - +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from complex subqueries, CTEs or local tables SET citus.enable_repartition_joins to ON; SET client_min_messages TO DEBUG1; -- recursively planner since the inner JOIN is not on the partition key @@ -1590,23 +1562,8 @@ LIMIT 10; DEBUG: generating subplan XXX_1 for subquery SELECT user_id, value_1 FROM public.users_table users WHERE ((user_id OPERATOR(pg_catalog.>) 1) AND (user_id OPERATOR(pg_catalog.<) 4) AND (value_2 OPERATOR(pg_catalog.>) 3)) DEBUG: push down of limit count: 10 DEBUG: generating subplan XXX_2 for subquery SELECT filter_users_1.user_id, last_events_1."time" AS lastseen FROM ((SELECT user_where_1_1.user_id FROM ((SELECT users.user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.>) 1) AND (users.user_id OPERATOR(pg_catalog.<) 4) AND (users.value_1 OPERATOR(pg_catalog.>) 2))) user_where_1_1 JOIN (SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer)) user_where_1_join_1 ON ((user_where_1_1.user_id OPERATOR(pg_catalog.=) user_where_1_join_1.value_1)))) filter_users_1 JOIN LATERAL (SELECT events.user_id, events."time" FROM public.events_table events WHERE ((events.user_id OPERATOR(pg_catalog.>) 1) AND (events.user_id OPERATOR(pg_catalog.<) 4) AND (events.user_id OPERATOR(pg_catalog.=) filter_users_1.user_id)) ORDER BY events."time" DESC LIMIT 1) last_events_1 ON (true)) ORDER BY last_events_1."time" DESC LIMIT 10 -DEBUG: push down of limit count: 10 -DEBUG: generating subplan XXX_3 for subquery SELECT some_users_data.user_id, some_recent_users.lastseen FROM ((SELECT intermediate_result.user_id, intermediate_result.lastseen FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, lastseen timestamp without time zone)) some_recent_users JOIN LATERAL (SELECT users.user_id FROM public.users_table users WHERE ((users.user_id OPERATOR(pg_catalog.=) some_recent_users.user_id) AND (users.value_2 OPERATOR(pg_catalog.>) 4)) ORDER BY users.user_id LIMIT 1) some_users_data ON (true)) ORDER BY some_recent_users.lastseen DESC LIMIT 10 -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, lastseen FROM (SELECT intermediate_result.user_id, intermediate_result.lastseen FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, lastseen timestamp without time zone)) some_users ORDER BY user_id DESC, lastseen DESC LIMIT 10 - user_id | lastseen ---------------------------------------------------------------------- - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 - 3 | Thu Nov 23 18:08:26.550729 2017 -(10 rows) - +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from complex subqueries, CTEs or local tables SET citus.enable_repartition_joins to OFF; RESET client_min_messages; -- not supported since upper LATERAL JOIN is not equi join @@ -1716,20 +1673,8 @@ FROM ORDER BY user_id DESC, lastseen DESC LIMIT 10; - user_id | lastseen ---------------------------------------------------------------------- - 5 | Thu Nov 23 17:26:14.563216 2017 - 5 | Thu Nov 23 17:26:14.563216 2017 - 5 | Thu Nov 23 17:26:14.563216 2017 - 5 | Thu Nov 23 17:26:14.563216 2017 - 5 | Thu Nov 23 17:26:14.563216 2017 - 5 | Thu Nov 23 17:26:14.563216 2017 - 5 | Thu Nov 23 17:26:14.563216 2017 - 5 | Thu Nov 23 17:26:14.563216 2017 - 5 | Thu Nov 23 17:26:14.563216 2017 - 5 | Thu Nov 23 17:26:14.563216 2017 -(10 rows) - +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from complex subqueries, CTEs or local tables -- NESTED INNER JOINs SELECT count(*) AS value, "generated_group_field" @@ -2371,10 +2316,8 @@ FROM ORDER BY value_2 DESC, user_id DESC LIMIT 10; - user_id | value_2 ---------------------------------------------------------------------- -(0 rows) - +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from complex subqueries, CTEs or local tables -- lets test some unsupported set operations -- not supported since we use INTERSECT SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType diff --git a/src/test/regress/expected/unsupported_lateral_subqueries.out b/src/test/regress/expected/unsupported_lateral_subqueries.out new file mode 100644 index 000000000..9c613a199 --- /dev/null +++ b/src/test/regress/expected/unsupported_lateral_subqueries.out @@ -0,0 +1,208 @@ +CREATE SCHEMA unsupported_lateral_joins; +SET search_path TO unsupported_lateral_joins; +SET citus.shard_count TO 4; +SET citus.shard_replication_factor TO 1; +SET citus.next_shard_id TO 13354100; +CREATE TABLE test(x bigint, y bigint); +SELECT create_distributed_table('test','x'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE ref(a bigint, b bigint); +SELECT create_reference_table('ref'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +insert into test(x, y) SELECT 1, i FROM generate_series(1, 10) i; +insert into test(x, y) SELECT 3, i FROM generate_series(11, 40) i; +insert into test(x, y) SELECT i, 1 FROM generate_series(1, 10) i; +insert into test(x, y) SELECT i, 3 FROM generate_series(11, 40) i; +insert into ref(a, b) SELECT i, 1 FROM generate_series(1, 10) i; +insert into ref(a, b) SELECT i, 3 FROM generate_series(11, 40) i; +insert into ref(a, b) SELECT 1, i FROM generate_series(1, 10) i; +insert into ref(a, b) SELECT 3, i FROM generate_series(11, 40) i; +-- The following queries return wrong results when pushed down. Instead of +-- returning 2 rows, for each row in ref table. They would return (2 * number +-- of shards) rows for each row in the reference table. See issue #5327 +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref.a + LIMIT 2 + ) q; +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from a reference table (ref) +SELECT count(*) +FROM (VALUES (1), (3)) ref(a), + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref.a + LIMIT 2 + ) q; +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from a VALUES clause +WITH ref(a) as (select y from test) +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref.a + LIMIT 2 + ) q; +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from complex subqueries, CTEs or local tables +SELECT count(*) +FROM generate_series(1, 3) ref(a), + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref.a + LIMIT 2 + ) q; +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from a table function (ref) +SELECT count(*) +FROM (SELECT generate_series(1, 3)) ref(a), + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref.a + LIMIT 2 + ) q; +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from a subquery without FROM (ref) +-- make sure right error message is chosen +SELECT count(*) +FROM ref ref_table, + (VALUES (1), (3)) rec_values(a), + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref_table.a + LIMIT 2 + ) q; +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from a reference table (ref_table) +SELECT count(*) +FROM ref as ref_table, + (VALUES (1), (3)) ref_values(a), + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref_values.a + LIMIT 2 + ) q; +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from a VALUES clause +SELECT count(*) FROM + ref ref_outer, + LATERAL ( + SELECT * FROM + LATERAL ( SELECT * + FROM ref ref_inner, + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref_outer.a + LIMIT 2 + ) q + ) q2 + ) q3; +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from a reference table (ref_outer) +SELECT count(*) FROM + ref ref_outer, + LATERAL ( + SELECT * FROM + LATERAL ( SELECT * + FROM ref ref_inner, + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref_inner.a + LIMIT 2 + ) q + ) q2 + ) q3; +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from a reference table (ref_inner) +-- Since this only correlates on the distribution column, this can be safely +-- pushed down. But this is currently considered to hard to detect, so we fail. +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + test.x + FROM test + WHERE + test.x = ref.a + LIMIT 2 + ) q; +ERROR: cannot push down this subquery +DETAIL: Limit clause is currently unsupported when a lateral subquery references a column from a reference table (ref) +-- Would require repartitioning to work with subqueries +SELECT count(*) +FROM test, + LATERAL ( + SELECT + test_2.x + FROM test test_2 + WHERE + test_2.x = test.y + LIMIT 2 + ) q ; +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +-- Too complex joins for Citus to handle currently +SELECT count(*) +FROM ref JOIN test on ref.b = test.x, + LATERAL ( + SELECT + test_2.x + FROM test test_2 + WHERE + test_2.x = ref.a + LIMIT 2 + ) q +; +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +-- Would require repartitioning to work with subqueries +SELECT count(*) +FROM ref JOIN test on ref.b = test.x, + LATERAL ( + SELECT + test_2.y + FROM test test_2 + WHERE + test_2.y = ref.a + LIMIT 2 + ) q +; +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +SET client_min_messages TO WARNING; +DROP SCHEMA unsupported_lateral_joins CASCADE; diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule index 48e166deb..088a7b8bd 100644 --- a/src/test/regress/multi_schedule +++ b/src/test/regress/multi_schedule @@ -80,7 +80,7 @@ test: multi_array_agg multi_limit_clause multi_orderby_limit_pushdown test: multi_jsonb_agg multi_jsonb_object_agg multi_json_agg multi_json_object_agg bool_agg ch_bench_having chbenchmark_all_queries expression_reference_join anonymous_columns test: ch_bench_subquery_repartition test: multi_agg_type_conversion multi_count_type_conversion recursive_relation_planning_restriction_pushdown -test: multi_partition_pruning single_hash_repartition_join +test: multi_partition_pruning single_hash_repartition_join unsupported_lateral_subqueries test: multi_join_pruning multi_hash_pruning intermediate_result_pruning test: multi_null_minmax_value_pruning cursors test: modification_correctness diff --git a/src/test/regress/sql/distributed_planning.sql b/src/test/regress/sql/distributed_planning.sql index b388ad390..b19654ff4 100644 --- a/src/test/regress/sql/distributed_planning.sql +++ b/src/test/regress/sql/distributed_planning.sql @@ -359,3 +359,313 @@ SELECT count(*), event FROM date_part_table WHERE event_time > '2020-01-05' GROU SELECT count(*), event FROM date_part_table WHERE user_id = 12 AND event_time = '2020-01-12 12:00:00' GROUP BY event ORDER BY count(*) DESC, event DESC LIMIT 5; SELECT count(*), t1.event FROM date_part_table t1 JOIN date_part_table t2 USING (user_id) WHERE t1.user_id = 1 AND t2.event_time > '2020-01-03' GROUP BY t1.event ORDER BY count(*) DESC, t1.event DESC LIMIT 5; +TRUNCATE test; +TRUNCATE ref; +insert into test(x, y) SELECT 1, i FROM generate_series(1, 10) i; +insert into test(x, y) SELECT 3, i FROM generate_series(11, 40) i; +insert into test(x, y) SELECT i, 1 FROM generate_series(1, 10) i; +insert into test(x, y) SELECT i, 3 FROM generate_series(11, 40) i; + +insert into ref(a, b) SELECT i, 1 FROM generate_series(1, 10) i; +insert into ref(a, b) SELECT i, 3 FROM generate_series(11, 40) i; +insert into ref(a, b) SELECT 1, i FROM generate_series(1, 10) i; +insert into ref(a, b) SELECT 3, i FROM generate_series(11, 40) i; + +SELECT count(*) +FROM test, + LATERAL ( + SELECT + ref.a + FROM ref + WHERE + ref.b = test.x + LIMIT 2 + ) q; + +SELECT count(*) +FROM test, + LATERAL ( + SELECT + ref.a + FROM ref + WHERE + ref.b = test.y + LIMIT 2 + ) q; + +-- Since the only correlates on the distribution column, this can be safely +-- pushed down. But this is currently considered to hard to detect, so we fail. +-- +-- SELECT count(*) +-- FROM ref, +-- LATERAL ( +-- SELECT +-- test.x +-- FROM test +-- WHERE +-- test.x = ref.a +-- LIMIT 2 +-- ) q; + +-- This returns wrong results when pushed down. Instead of returning 2 rows, +-- for each row in the reference table. It would return (2 * number of shards) +-- rows for each row in the reference table. +-- See issue #5327 +-- +-- SELECT count(*) +-- FROM ref, +-- LATERAL ( +-- SELECT +-- test.y +-- FROM test +-- WHERE +-- test.y = ref.a +-- LIMIT 2 +-- ) q; + +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + ref_2.b y + FROM ref ref_2 + WHERE + ref_2.b = ref.a + LIMIT 2 + ) q; + +SELECT count(*) +FROM test, + LATERAL ( + SELECT + test_2.y + FROM test test_2 + WHERE + test_2.x = test.x + LIMIT 2 + ) q; + +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + ref_2.b y + FROM ref ref_2 + WHERE + ref_2.b = ref.a + LIMIT 2 + ) q JOIN test ON test.x = q.y; + +-- Would require repartitioning to work with subqueries +-- +-- SELECT count(*) +-- FROM test, +-- LATERAL ( +-- SELECT +-- test_2.x +-- FROM test test_2 +-- WHERE +-- test_2.x = test.y +-- LIMIT 2 +-- ) q ; + +SELECT count(*) +FROM ref JOIN test on ref.b = test.x, + LATERAL ( + SELECT + ref_2.b y + FROM ref ref_2 + WHERE + ref_2.b = ref.a + LIMIT 2 + ) q +; + +SELECT count(*) +FROM ref JOIN test on ref.b = test.y, + LATERAL ( + SELECT + ref_2.b y + FROM ref ref_2 + WHERE + ref_2.b = ref.a + LIMIT 2 + ) q +; + +-- Too complex joins for Citus to handle currently +-- +-- SELECT count(*) +-- FROM ref JOIN test on ref.b = test.x, +-- LATERAL ( +-- SELECT +-- test_2.x +-- FROM test test_2 +-- WHERE +-- test_2.x = ref.a +-- LIMIT 2 +-- ) q +-- ; + +-- Would require repartitioning to work with subqueries +-- +-- SELECT count(*) +-- FROM ref JOIN test on ref.b = test.x, +-- LATERAL ( +-- SELECT +-- test_2.y +-- FROM test test_2 +-- WHERE +-- test_2.y = ref.a +-- LIMIT 2 +-- ) q +-- ; + +SELECT count(*) +FROM ref JOIN test on ref.b = test.x, + LATERAL ( + SELECT + test_2.x + FROM test test_2 + WHERE + test_2.x = test.x + LIMIT 2 + ) q +; + +-- Without LIMIT clauses +SELECT count(*) +FROM test, + LATERAL ( + SELECT + ref.a + FROM ref + WHERE + ref.b = test.x + ) q; + +SELECT count(*) +FROM test, + LATERAL ( + SELECT + ref.a + FROM ref + WHERE + ref.b = test.y + ) q; + +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + test.x + FROM test + WHERE + test.x = ref.a + ) q; + +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref.a + ) q; + +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + ref_2.b y + FROM ref ref_2 + WHERE + ref_2.b = ref.a + ) q; + +SELECT count(*) +FROM test, + LATERAL ( + SELECT + test_2.y + FROM test test_2 + WHERE + test_2.x = test.x + ) q; + +SELECT count(*) +FROM test, + LATERAL ( + SELECT + test_2.x + FROM test test_2 + WHERE + test_2.x = test.y + ) q ; + +SELECT count(*) +FROM ref JOIN test on ref.b = test.x, + LATERAL ( + SELECT + ref_2.b y + FROM ref ref_2 + WHERE + ref_2.b = ref.a + ) q +; + +SELECT count(*) +FROM ref JOIN test on ref.b = test.y, + LATERAL ( + SELECT + ref_2.b y + FROM ref ref_2 + WHERE + ref_2.b = ref.a + ) q +; + +SELECT count(*) +FROM ref JOIN test on ref.b = test.x, + LATERAL ( + SELECT + test_2.x + FROM test test_2 + WHERE + test_2.x = ref.a + ) q +; + +SELECT count(*) +FROM ref JOIN test on ref.b = test.x, + LATERAL ( + SELECT + test_2.y + FROM test test_2 + WHERE + test_2.y = ref.a + ) q +; + +SELECT count(*) +FROM ref JOIN test on ref.b = test.x, + LATERAL ( + SELECT + test_2.x + FROM test test_2 + WHERE + test_2.x = test.x + ) q +; + +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + ref_2.b y + FROM ref ref_2 + WHERE + ref_2.b = ref.a + ) q JOIN test ON test.x = q.y; + diff --git a/src/test/regress/sql/unsupported_lateral_subqueries.sql b/src/test/regress/sql/unsupported_lateral_subqueries.sql new file mode 100644 index 000000000..3e33f107d --- /dev/null +++ b/src/test/regress/sql/unsupported_lateral_subqueries.sql @@ -0,0 +1,197 @@ +CREATE SCHEMA unsupported_lateral_joins; +SET search_path TO unsupported_lateral_joins; +SET citus.shard_count TO 4; +SET citus.shard_replication_factor TO 1; +SET citus.next_shard_id TO 13354100; + +CREATE TABLE test(x bigint, y bigint); +SELECT create_distributed_table('test','x'); + +CREATE TABLE ref(a bigint, b bigint); +SELECT create_reference_table('ref'); + +insert into test(x, y) SELECT 1, i FROM generate_series(1, 10) i; +insert into test(x, y) SELECT 3, i FROM generate_series(11, 40) i; +insert into test(x, y) SELECT i, 1 FROM generate_series(1, 10) i; +insert into test(x, y) SELECT i, 3 FROM generate_series(11, 40) i; + +insert into ref(a, b) SELECT i, 1 FROM generate_series(1, 10) i; +insert into ref(a, b) SELECT i, 3 FROM generate_series(11, 40) i; +insert into ref(a, b) SELECT 1, i FROM generate_series(1, 10) i; +insert into ref(a, b) SELECT 3, i FROM generate_series(11, 40) i; + +-- The following queries return wrong results when pushed down. Instead of +-- returning 2 rows, for each row in ref table. They would return (2 * number +-- of shards) rows for each row in the reference table. See issue #5327 +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref.a + LIMIT 2 + ) q; + +SELECT count(*) +FROM (VALUES (1), (3)) ref(a), + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref.a + LIMIT 2 + ) q; + +WITH ref(a) as (select y from test) +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref.a + LIMIT 2 + ) q; + +SELECT count(*) +FROM generate_series(1, 3) ref(a), + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref.a + LIMIT 2 + ) q; + +SELECT count(*) +FROM (SELECT generate_series(1, 3)) ref(a), + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref.a + LIMIT 2 + ) q; + + +-- make sure right error message is chosen +SELECT count(*) +FROM ref ref_table, + (VALUES (1), (3)) rec_values(a), + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref_table.a + LIMIT 2 + ) q; + +SELECT count(*) +FROM ref as ref_table, + (VALUES (1), (3)) ref_values(a), + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref_values.a + LIMIT 2 + ) q; + +SELECT count(*) FROM + ref ref_outer, + LATERAL ( + SELECT * FROM + LATERAL ( SELECT * + FROM ref ref_inner, + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref_outer.a + LIMIT 2 + ) q + ) q2 + ) q3; + +SELECT count(*) FROM + ref ref_outer, + LATERAL ( + SELECT * FROM + LATERAL ( SELECT * + FROM ref ref_inner, + LATERAL ( + SELECT + test.y + FROM test + WHERE + test.y = ref_inner.a + LIMIT 2 + ) q + ) q2 + ) q3; + + + + +-- Since this only correlates on the distribution column, this can be safely +-- pushed down. But this is currently considered to hard to detect, so we fail. +SELECT count(*) +FROM ref, + LATERAL ( + SELECT + test.x + FROM test + WHERE + test.x = ref.a + LIMIT 2 + ) q; + +-- Would require repartitioning to work with subqueries +SELECT count(*) +FROM test, + LATERAL ( + SELECT + test_2.x + FROM test test_2 + WHERE + test_2.x = test.y + LIMIT 2 + ) q ; + +-- Too complex joins for Citus to handle currently +SELECT count(*) +FROM ref JOIN test on ref.b = test.x, + LATERAL ( + SELECT + test_2.x + FROM test test_2 + WHERE + test_2.x = ref.a + LIMIT 2 + ) q +; + +-- Would require repartitioning to work with subqueries +SELECT count(*) +FROM ref JOIN test on ref.b = test.x, + LATERAL ( + SELECT + test_2.y + FROM test test_2 + WHERE + test_2.y = ref.a + LIMIT 2 + ) q +; + +SET client_min_messages TO WARNING; +DROP SCHEMA unsupported_lateral_joins CASCADE;