From f339450a9d024666dced97c8353a34290a4b2a87 Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Fri, 18 Nov 2022 17:06:02 +0300 Subject: [PATCH 1/5] Phase - I: recursively plan non-recurring relations --- .../distributed/planner/distributed_planner.c | 2 + .../planner/local_distributed_join_planner.c | 5 +- .../distributed/planner/recursive_planning.c | 335 ++++++++++++ .../relation_restriction_equivalence.c | 1 + src/include/distributed/distributed_planner.h | 1 + .../local_distributed_join_planner.h | 3 + .../expected/citus_local_tables_queries.out | 9 +- .../expected/citus_local_tables_queries_0.out | 9 +- .../citus_local_tables_queries_mx.out | 8 +- src/test/regress/expected/cross_join.out | 58 ++- .../expected/local_dist_join_mixed.out | 13 +- .../regress/expected/local_table_join.out | 13 +- .../regress/expected/mixed_relkind_tests.out | 13 +- .../expected/multi_dropped_column_aliases.out | 20 +- .../expected/multi_outer_join_reference.out | 62 ++- .../expected/multi_shard_update_delete.out | 27 +- ...ulti_subquery_complex_reference_clause.out | 489 +++++++++++++----- ...lti_subquery_in_where_reference_clause.out | 3 +- src/test/regress/expected/multi_view.out | 18 +- .../expected/non_colocated_subquery_joins.out | 27 +- .../set_operation_and_local_tables.out | 13 +- src/test/regress/expected/set_operations.out | 30 +- .../regress/expected/sqlancer_failures.out | 7 +- src/test/regress/expected/values.out | 28 +- src/test/regress/expected/with_join.out | 39 +- .../sql/citus_local_tables_queries.sql | 1 - .../sql/citus_local_tables_queries_mx.sql | 2 +- src/test/regress/sql/cross_join.sql | 2 +- src/test/regress/sql/mixed_relkind_tests.sql | 2 - .../sql/multi_dropped_column_aliases.sql | 5 +- .../sql/multi_outer_join_reference.sql | 9 +- .../regress/sql/multi_shard_update_delete.sql | 15 +- ...ulti_subquery_complex_reference_clause.sql | 35 +- src/test/regress/sql/multi_view.sql | 9 +- .../sql/non_colocated_subquery_joins.sql | 2 +- src/test/regress/sql/values.sql | 4 +- src/test/regress/sql/with_join.sql | 6 +- 37 files changed, 1045 insertions(+), 280 deletions(-) diff --git a/src/backend/distributed/planner/distributed_planner.c b/src/backend/distributed/planner/distributed_planner.c index 917ab0540..b02317773 100644 --- a/src/backend/distributed/planner/distributed_planner.c +++ b/src/backend/distributed/planner/distributed_planner.c @@ -1845,6 +1845,8 @@ multi_join_restriction_hook(PlannerInfo *root, */ joinRestrictionContext->hasSemiJoin = joinRestrictionContext->hasSemiJoin || extra->sjinfo->jointype == JOIN_SEMI; + joinRestrictionContext->hasOuterJoin = joinRestrictionContext->hasOuterJoin || + IS_OUTER_JOIN(extra->sjinfo->jointype); MemoryContextSwitchTo(oldMemoryContext); } diff --git a/src/backend/distributed/planner/local_distributed_join_planner.c b/src/backend/distributed/planner/local_distributed_join_planner.c index 9f93b447c..449b96195 100644 --- a/src/backend/distributed/planner/local_distributed_join_planner.c +++ b/src/backend/distributed/planner/local_distributed_join_planner.c @@ -173,9 +173,6 @@ typedef enum ConversionChoice static bool HasConstantFilterOnUniqueColumn(RangeTblEntry *rangeTableEntry, RelationRestriction *relationRestriction); -static List * RequiredAttrNumbersForRelation(RangeTblEntry *relationRte, - PlannerRestrictionContext * - plannerRestrictionContext); static ConversionCandidates * CreateConversionCandidates(PlannerRestrictionContext * plannerRestrictionContext, List *rangeTableList, @@ -474,7 +471,7 @@ AppendUniqueIndexColumnsToList(Form_pg_index indexForm, List **uniqueIndexGroups * The function could be optimized by not adding the columns that only appear * WHERE clause as a filter (e.g., not a join clause). */ -static List * +List * RequiredAttrNumbersForRelation(RangeTblEntry *rangeTableEntry, PlannerRestrictionContext *plannerRestrictionContext) { diff --git a/src/backend/distributed/planner/recursive_planning.c b/src/backend/distributed/planner/recursive_planning.c index 2ebaba829..28b856b2c 100644 --- a/src/backend/distributed/planner/recursive_planning.c +++ b/src/backend/distributed/planner/recursive_planning.c @@ -137,6 +137,7 @@ static bool ShouldRecursivelyPlanNonColocatedSubqueries(Query *subquery, RecursivePlanningContext * context); static bool ContainsSubquery(Query *query); +static bool ShouldRecursivelyPlanOuterJoins(RecursivePlanningContext *context); static void RecursivelyPlanNonColocatedSubqueries(Query *subquery, RecursivePlanningContext *context); static void RecursivelyPlanNonColocatedJoinWalker(Node *joinNode, @@ -149,6 +150,11 @@ static void RecursivelyPlanNonColocatedSubqueriesInWhere(Query *query, colocatedJoinChecker, RecursivePlanningContext * recursivePlanningContext); +static bool RecursivelyPlanRecurringTupleOuterJoinWalker(Node *node, Query *query, + RecursivePlanningContext *context); +static void RecursivelyPlanDistributedJoinNode(Node *node, Query *query, + RecursivePlanningContext *context); +static bool IsRTERefRecurring(RangeTblRef *rangeTableRef, Query *query); static List * SublinkListFromWhere(Query *originalQuery); static bool ExtractSublinkWalker(Node *node, List **sublinkList); static bool ShouldRecursivelyPlanSublinks(Query *query); @@ -359,6 +365,22 @@ RecursivelyPlanSubqueriesAndCTEs(Query *query, RecursivePlanningContext *context RecursivelyPlanLocalTableJoins(query, context); } + /* + * Similarly, logical planner cannot handle outer joins when the outer rel + * is recurring, such as " LEFT JOIN ". In that case, + * we convert distributed table into a subquery and recursively plan inner + * side of the outer join. That way, inner rel gets converted into an intermediate + * result and logical planner can handle the new query since it's of the from + * " LEFT JOIN ". + * + * See DeferredErrorIfUnsupportedRecurringTuplesJoin for the supported join + * types. + */ + if (ShouldRecursivelyPlanOuterJoins(context)) + { + RecursivelyPlanRecurringTupleOuterJoinWalker((Node *) query->jointree, + query, context); + } return NULL; } @@ -440,6 +462,25 @@ ContainsSubquery(Query *query) } +/* + * ShouldRecursivelyPlanOuterJoins returns true if the JoinRestrictionContext + * that given RecursivePlanningContext holds implies that the query has outer + * join(s) that might need to be recursively planned. + */ +static bool +ShouldRecursivelyPlanOuterJoins(RecursivePlanningContext *context) +{ + if (!context || !context->plannerRestrictionContext || + !context->plannerRestrictionContext->joinRestrictionContext) + { + ereport(ERROR, (errmsg("unexpectedly got NULL pointer in recursive " + "planning context"))); + } + + return context->plannerRestrictionContext->joinRestrictionContext->hasOuterJoin; +} + + /* * RecursivelyPlanNonColocatedSubqueries gets a query which includes one or more * other subqueries that are not joined on their distribution keys. The function @@ -599,6 +640,300 @@ RecursivelyPlanNonColocatedSubqueriesInWhere(Query *query, } +/* + * RecursivelyPlanRecurringTupleOuterJoinWalker descends into a join tree and + * recursively plans all non-recurring (i.e., distributed) rels that that + * participate in an outer join expression together with a recurring rel, + * such as in " LEFT JOIN ", i.e., + * where the recurring rel causes returning recurring tuples from the worker + * nodes. + * + * Returns true if given node is recurring. + * + * See RecursivelyPlanDistributedJoinNode() function for the explanation on + * what does it mean for a node to be "recurring" or "distributed". + */ +static bool +RecursivelyPlanRecurringTupleOuterJoinWalker(Node *node, Query *query, + RecursivePlanningContext * + recursivePlanningContext) +{ + if (node == NULL) + { + return false; + } + else if (IsA(node, FromExpr)) + { + FromExpr *fromExpr = (FromExpr *) node; + ListCell *fromExprCell; + + /* search for join trees in each FROM element */ + foreach(fromExprCell, fromExpr->fromlist) + { + Node *fromElement = (Node *) lfirst(fromExprCell); + + RecursivelyPlanRecurringTupleOuterJoinWalker(fromElement, query, + recursivePlanningContext); + } + + /* + * Can only appear during the top-level call and top-level callers + * are not interested in the return value. Even more, we can't tell + * whether a FromExpr is recurring or not. + */ + return false; + } + else if (IsA(node, JoinExpr)) + { + JoinExpr *joinExpr = (JoinExpr *) node; + + Node *leftNode = joinExpr->larg; + Node *rightNode = joinExpr->rarg; + + /* + * There may be recursively plannable outer joins deeper in the join tree. + * + * We first handle the sub join trees and then the top level one since the + * top level join expression might not require recursive planning after + * handling the sub join trees. + */ + bool leftNodeRecurs = + RecursivelyPlanRecurringTupleOuterJoinWalker(leftNode, query, + recursivePlanningContext); + bool rightNodeRecurs = + RecursivelyPlanRecurringTupleOuterJoinWalker(rightNode, query, + recursivePlanningContext); + switch (joinExpr->jointype) + { + case JOIN_LEFT: + { + /* left join */ + if (leftNodeRecurs && !rightNodeRecurs) + { + ereport(DEBUG1, (errmsg("recursively planning right side of " + "the left join since the outer side " + "is a recurring rel"))); + RecursivelyPlanDistributedJoinNode(rightNode, query, + recursivePlanningContext); + } + + /* + * A LEFT JOIN is recurring if the lhs is recurring. + * Note that we should have converted the rhs into a recurring + * one too if the lhs is recurring, but this anyway has no + * effects when deciding whether a LEFT JOIN is recurring. + */ + return leftNodeRecurs; + } + + case JOIN_RIGHT: + { + /* right join */ + if (!leftNodeRecurs && rightNodeRecurs) + { + ereport(DEBUG1, (errmsg("recursively planning left side of " + "the right join since the outer side " + "is a recurring rel"))); + RecursivelyPlanDistributedJoinNode(leftNode, query, + recursivePlanningContext); + } + + /* + * Similar to LEFT JOINs, a RIGHT JOIN is recurring if the rhs + * is recurring. + */ + return rightNodeRecurs; + } + + case JOIN_FULL: + { + /* + * full join + * full join + */ + if (leftNodeRecurs && !rightNodeRecurs) + { + ereport(DEBUG1, (errmsg("recursively planning right side of " + "the full join since the other side " + "is a recurring rel"))); + RecursivelyPlanDistributedJoinNode(rightNode, query, + recursivePlanningContext); + } + else if (!leftNodeRecurs && rightNodeRecurs) + { + ereport(DEBUG1, (errmsg("recursively planning left side of " + "the full join since the other side " + "is a recurring rel"))); + RecursivelyPlanDistributedJoinNode(leftNode, query, + recursivePlanningContext); + } + + /* + * An OUTER JOIN is recurring if any sides of the join is + * recurring. As in other outer join types, it doesn't matter + * whether the other side was / became recurring or not. + */ + return leftNodeRecurs || rightNodeRecurs; + } + + case JOIN_INNER: + { + /* + * We don't need to recursively plan non-outer joins and we + * already descended into sub join trees to handle outer joins + * buried in them. + */ + return leftNodeRecurs && rightNodeRecurs; + } + + default: + { + ereport(ERROR, (errmsg("got unexpected join type (%d) when recursively " + "planning a join", + joinExpr->jointype))); + } + } + } + else if (IsA(node, RangeTblRef)) + { + return IsRTERefRecurring((RangeTblRef *) node, query); + } + else + { + ereport(ERROR, errmsg("got unexpected node type (%d) when recursively " + "planning a join", + nodeTag(node))); + } +} + + +/* + * RecursivelyPlanDistributedJoinNode is a helper function for + * RecursivelyPlanRecurringTupleOuterJoinWalker that recursively plans given + * distributed node that is known to be inner side of an outer join. + * + * We call a node "distributed" if it points to a distributed table or a + * more complex object (i.e., a join tree or a subquery) that can be pushed + * down to the worker nodes directly. For a join, this means that it's either + * an INNER join where any side of it is a distributed table / a distributed + * sub join tree, or an OUTER join where the outer side is a distributed table + * / a distributed sub join tree. + */ +static void +RecursivelyPlanDistributedJoinNode(Node *node, Query *query, + RecursivePlanningContext *recursivePlanningContext) +{ + if (IsA(node, JoinExpr)) + { + /* + * XXX: This, for example, means that RecursivelyPlanRecurringTupleOuterJoins + * needs to plan inner side, i.e., INNER JOIN , + * of the following join: + * + * LEFT JOIN ( INNER JOIN ) + * + * However, this would require moving part of the join tree into a + * subquery but this implies that we need to rebuild the rtable and + * re-point all the Vars to the new rtable indexes. We have not + * implemented that yet. + */ + ereport(DEBUG4, (errmsg("recursive planner cannot plan distributed sub " + "join nodes yet"))); + return; + } + + if (!IsA(node, RangeTblRef)) + { + ereport(ERROR, (errmsg("unexpected join node type (%d)", + nodeTag(node)))); + } + + RangeTblRef *rangeTableRef = (RangeTblRef *) node; + if (IsRTERefRecurring(rangeTableRef, query)) + { + /* + * Not the top-level callers but RecursivelyPlanDistributedJoinNode + * might call itself for recurring nodes and need to skip them. + */ + return; + } + + RangeTblEntry *distributedRte = rt_fetch(rangeTableRef->rtindex, + query->rtable); + if (distributedRte->rtekind == RTE_RELATION) + { + ereport(DEBUG1, (errmsg("recursively planning distributed relation %s " + "since it is part of a distributed join node " + "that is outer joined with a recurring rel", + GetRelationNameAndAliasName(distributedRte)))); + + PlannerRestrictionContext *restrictionContext = + GetPlannerRestrictionContext(recursivePlanningContext); + List *requiredAttributes = + RequiredAttrNumbersForRelation(distributedRte, restrictionContext); + + ReplaceRTERelationWithRteSubquery(distributedRte, requiredAttributes, + recursivePlanningContext); + } + else if (distributedRte->rtekind == RTE_SUBQUERY) + { + /* + * XXX: Similar to JoinExpr, we don't know how to recursively plan distributed + * subqueries within join expressions yet. + */ + ereport(DEBUG4, (errmsg("recursive planner cannot plan distributed " + "subqueries within join expressions yet"))); + return; + } + else + { + /* + * We don't expect RecursivelyPlanRecurringTupleOuterJoinWalker to try recursively + * plan such an RTE. + */ + ereport(ERROR, errmsg("got unexpected RTE type (%d) when recursively " + "planning a join", + distributedRte->rtekind)); + } +} + + +/* + * IsRTERefRecurring returns true if given rte reference points to a recurring + * rte. + * + * If an rte points to a table, then we call it recurring if the table is not + * a distributed table. Otherwise, e.g., if it points a query, then we call it + * recurring if none of the rtes that belongs to the query point to a distributed + * table. + * + * Note that it's safe to assume a subquery is not recurring if we have a rte reference + * to a distributed table somewhere in the query tree. For example, considering + * the subquery (q) of the the following query: + * SELECT * FROM ref LEFT JOIN (SELECT * FROM ref LEFT dist) q, + * one might think that it's not appropriate to call IsRTERefRecurring for subquery + * (q). However, this is already not the case because this function is called + * in the context of recursive planning and hence any query that contains + * rtes pointing to distributed tables and that cannot be pushed down to worker + * nodes should've been recursively planned already. This is because, the recursive + * planner processes the queries in bottom-up fashion. For this reason, the subquery + * in the example should've already be converted to the following before we check + * the rte reference that points to the subquery (q): + * SELECT * FROM ref LEFT JOIN (SELECT * FROM ref LEFT (SELECT * FROM read_intermediate_result()) dist_1) + * That way, we wouldn't incorrectly say that (SELECT * FROM ref LEFT dist) is a + * distributed subquery (due to having a reference to a distributed table). + */ +static bool +IsRTERefRecurring(RangeTblRef *rangeTableRef, Query *query) +{ + int rangeTableIndex = rangeTableRef->rtindex; + List *rangeTableList = query->rtable; + RangeTblEntry *rangeTableEntry = rt_fetch(rangeTableIndex, rangeTableList); + return !FindNodeMatchingCheckFunctionInRangeTableList(list_make1(rangeTableEntry), + IsDistributedTableRTE); +} + + /* * SublinkListFromWhere finds the subquery nodes in the where clause of the given query. Note * that the function should be called on the original query given that postgres diff --git a/src/backend/distributed/planner/relation_restriction_equivalence.c b/src/backend/distributed/planner/relation_restriction_equivalence.c index ff6b90dc2..15c08f8b8 100644 --- a/src/backend/distributed/planner/relation_restriction_equivalence.c +++ b/src/backend/distributed/planner/relation_restriction_equivalence.c @@ -2230,6 +2230,7 @@ FilterJoinRestrictionContext(JoinRestrictionContext *joinRestrictionContext, Rel * the same query and as these values are calculated per-query basis. */ filtererdJoinRestrictionContext->hasSemiJoin = joinRestrictionContext->hasSemiJoin; + filtererdJoinRestrictionContext->hasOuterJoin = joinRestrictionContext->hasOuterJoin; return filtererdJoinRestrictionContext; } diff --git a/src/include/distributed/distributed_planner.h b/src/include/distributed/distributed_planner.h index 31de463f0..f0057064b 100644 --- a/src/include/distributed/distributed_planner.h +++ b/src/include/distributed/distributed_planner.h @@ -72,6 +72,7 @@ typedef struct JoinRestrictionContext { List *joinRestrictionList; bool hasSemiJoin; + bool hasOuterJoin; } JoinRestrictionContext; typedef struct JoinRestriction diff --git a/src/include/distributed/local_distributed_join_planner.h b/src/include/distributed/local_distributed_join_planner.h index dd74c8fb1..f2108f603 100644 --- a/src/include/distributed/local_distributed_join_planner.h +++ b/src/include/distributed/local_distributed_join_planner.h @@ -30,5 +30,8 @@ extern int LocalTableJoinPolicy; extern bool ShouldConvertLocalTableJoinsToSubqueries(List *rangeTableList); extern void RecursivelyPlanLocalTableJoins(Query *query, RecursivePlanningContext *context); +extern List * RequiredAttrNumbersForRelation(RangeTblEntry *relationRte, + PlannerRestrictionContext * + plannerRestrictionContext); #endif /* LOCAL_DISTRIBUTED_JOIN_PLANNER_H */ diff --git a/src/test/regress/expected/citus_local_tables_queries.out b/src/test/regress/expected/citus_local_tables_queries.out index 8f6c47393..f14607155 100644 --- a/src/test/regress/expected/citus_local_tables_queries.out +++ b/src/test/regress/expected/citus_local_tables_queries.out @@ -497,9 +497,14 @@ NOTICE: executing the command locally: SELECT count(*) AS count FROM (((citus_l 1296 (1 row) --- not supported direct outer join SELECT count(*) FROM citus_local_table LEFT JOIN distributed_table ON (true); -ERROR: cannot pushdown the subquery +NOTICE: executing the command locally: SELECT NULL::integer AS "dummy-1" FROM citus_local_table_queries.citus_local_table_1509001 citus_local_table WHERE true +NOTICE: executing the command locally: SELECT count(*) AS count FROM ((SELECT NULL::integer AS a, NULL::integer AS b FROM (SELECT intermediate_result."dummy-1" FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result("dummy-1" integer)) citus_local_table_1) citus_local_table LEFT JOIN (SELECT NULL::integer AS a, NULL::integer AS b FROM (SELECT intermediate_result."dummy-1" FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result("dummy-1" integer)) distributed_table_1) distributed_table ON (true)) + count +--------------------------------------------------------------------- + 36 +(1 row) + -- distinct in subquery on CTE WITH one_row AS ( SELECT a from citus_local_table WHERE b = 1 diff --git a/src/test/regress/expected/citus_local_tables_queries_0.out b/src/test/regress/expected/citus_local_tables_queries_0.out index 4b8d3411e..3bac4fbee 100644 --- a/src/test/regress/expected/citus_local_tables_queries_0.out +++ b/src/test/regress/expected/citus_local_tables_queries_0.out @@ -497,9 +497,14 @@ NOTICE: executing the command locally: SELECT count(*) AS count FROM (((citus_l 1296 (1 row) --- not supported direct outer join SELECT count(*) FROM citus_local_table LEFT JOIN distributed_table ON (true); -ERROR: cannot pushdown the subquery +NOTICE: executing the command locally: SELECT NULL::integer AS "dummy-1" FROM citus_local_table_queries.citus_local_table_1509001 citus_local_table WHERE true +NOTICE: executing the command locally: SELECT count(*) AS count FROM ((SELECT NULL::integer AS a, NULL::integer AS b FROM (SELECT intermediate_result."dummy-1" FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result("dummy-1" integer)) citus_local_table_1) citus_local_table LEFT JOIN (SELECT NULL::integer AS a, NULL::integer AS b FROM (SELECT intermediate_result."dummy-1" FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result("dummy-1" integer)) distributed_table_1) distributed_table ON (true)) + count +--------------------------------------------------------------------- + 36 +(1 row) + -- distinct in subquery on CTE WITH one_row AS ( SELECT a from citus_local_table WHERE b = 1 diff --git a/src/test/regress/expected/citus_local_tables_queries_mx.out b/src/test/regress/expected/citus_local_tables_queries_mx.out index 2eff2cd1d..9d0b4a051 100644 --- a/src/test/regress/expected/citus_local_tables_queries_mx.out +++ b/src/test/regress/expected/citus_local_tables_queries_mx.out @@ -430,9 +430,13 @@ SELECT count(*) FROM reference_table 1296 (1 row) --- not supported direct outer join +-- supported outer join SELECT count(*) FROM citus_local_table LEFT JOIN distributed_table ON (true); -ERROR: cannot pushdown the subquery + count +--------------------------------------------------------------------- + 36 +(1 row) + -- distinct in subquery on CTE WITH one_row AS ( SELECT a from citus_local_table WHERE b = 1 diff --git a/src/test/regress/expected/cross_join.out b/src/test/regress/expected/cross_join.out index 279c4ab2f..5887a9ae3 100644 --- a/src/test/regress/expected/cross_join.out +++ b/src/test/regress/expected/cross_join.out @@ -90,28 +90,49 @@ SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table r -- two reference tables CROSS JOINNed, and later JOINED with distributed tables -- but the reference table CROSS JOIN is in the outer side of the JOIN with the distributed table --- so we cannot pushdown +-- so this is supported by recursively planning the distributed table (users_table) SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id = users_table.user_id); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 606 +(1 row) + SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 FULL JOIN users_table ON (ref1.id = users_table.user_id); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 606 +(1 row) + SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id != users_table.user_id); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 3030 +(1 row) + SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id > 0); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 3636 +(1 row) + SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (users_table.user_id > 0); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 3636 +(1 row) + SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON true; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 3636 +(1 row) + SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON false; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 36 +(1 row) + -- a reference tables CROSS JOINed with a distribted table, and later JOINED with distributed tables on distribution keys -- so safe to pushdown SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 JOIN users_table u2 ON (u1.user_id = u2.user_id); @@ -167,8 +188,11 @@ SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table r -- this is the same query as the above, but this time the outer query is also LEFT JOIN, meaning that Postgres -- cannot eliminate the outer join SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id = users_table.user_id) LEFT JOIN users_table u2 ON (u2.user_id = users_table.user_id); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 11802 +(1 row) + -- cross join that goes through non-colocated subquery logic -- for the "events_table" subquery as both distributed tables -- do not have JOIN on the distribution key diff --git a/src/test/regress/expected/local_dist_join_mixed.out b/src/test/regress/expected/local_dist_join_mixed.out index cc709b982..20287ee35 100644 --- a/src/test/regress/expected/local_dist_join_mixed.out +++ b/src/test/regress/expected/local_dist_join_mixed.out @@ -1212,9 +1212,16 @@ DEBUG: push down of limit count: 1 SELECT id, name FROM local LEFT JOIN distributed USING (id) ORDER BY 1 LIMIT 1; DEBUG: Wrapping relation "local" to a subquery DEBUG: generating subplan XXX_1 for subquery SELECT id FROM local_dist_join_mixed.local WHERE true -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT local.id, distributed.name FROM ((SELECT NULL::integer AS "dummy-1", local_1.id, NULL::integer AS "dummy-3", NULL::text AS title, NULL::integer AS "dummy-5" FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint)) local_1) local LEFT JOIN local_dist_join_mixed.distributed USING (id)) ORDER BY local.id LIMIT 1 -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "distributed" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "distributed" to a subquery +DEBUG: generating subplan XXX_2 for subquery SELECT id, name FROM local_dist_join_mixed.distributed WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT local.id, distributed.name FROM ((SELECT NULL::integer AS "dummy-1", local_1.id, NULL::integer AS "dummy-3", NULL::text AS title, NULL::integer AS "dummy-5" FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id bigint)) local_1) local LEFT JOIN (SELECT NULL::integer AS "dummy-1", distributed_1.id, distributed_1.name, NULL::timestamp with time zone AS created_at FROM (SELECT intermediate_result.id, intermediate_result.name FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id bigint, name text)) distributed_1) distributed USING (id)) ORDER BY local.id LIMIT 1 + id | name +--------------------------------------------------------------------- + 0 | 0 +(1 row) + SELECT foo1.id FROM diff --git a/src/test/regress/expected/local_table_join.out b/src/test/regress/expected/local_table_join.out index 3c2f73393..202cccb42 100644 --- a/src/test/regress/expected/local_table_join.out +++ b/src/test/regress/expected/local_table_join.out @@ -1577,9 +1577,16 @@ SELECT 1 AS res FROM table2 RIGHT JOIN (SELECT 1 FROM table1, table2) AS sub1 ON DEBUG: Wrapping relation "table1" to a subquery DEBUG: generating subplan XXX_1 for subquery SELECT NULL::integer AS "dummy-1" FROM local_table_join.table1 WHERE true DEBUG: generating subplan XXX_2 for subquery SELECT 1 FROM (SELECT NULL::integer AS a FROM (SELECT intermediate_result."dummy-1" FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result("dummy-1" integer)) table1_1) table1, local_table_join.table2 -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT 1 AS res FROM (local_table_join.table2 RIGHT JOIN (SELECT intermediate_result."?column?" FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result("?column?" integer)) sub1("?column?") ON (false)) -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table +DEBUG: recursively planning left side of the right join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "table2" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "table2" to a subquery +DEBUG: generating subplan XXX_3 for subquery SELECT NULL::integer AS "dummy-1" FROM local_table_join.table2 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT 1 AS res FROM ((SELECT NULL::integer AS a FROM (SELECT intermediate_result."dummy-1" FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result("dummy-1" integer)) table2_1) table2 RIGHT JOIN (SELECT intermediate_result."?column?" FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result("?column?" integer)) sub1("?column?") ON (false)) + res +--------------------------------------------------------------------- + 1 +(1 row) + ROLLBACK; BEGIN; SELECT create_reference_table('table1'); diff --git a/src/test/regress/expected/mixed_relkind_tests.out b/src/test/regress/expected/mixed_relkind_tests.out index 20cb6ebac..b168cd7be 100644 --- a/src/test/regress/expected/mixed_relkind_tests.out +++ b/src/test/regress/expected/mixed_relkind_tests.out @@ -349,10 +349,17 @@ DEBUG: Wrapping relation "citus_local_table" to a subquery DEBUG: generating subplan XXX_1 for subquery SELECT NULL::integer AS "dummy-1" FROM mixed_relkind_tests.citus_local_table WHERE true DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT foo.a FROM (mixed_relkind_tests.partitioned_distributed_table foo JOIN (SELECT NULL::integer AS a FROM (SELECT intermediate_result."dummy-1" FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result("dummy-1" integer)) citus_local_table_1) citus_local_table ON (true)) DEBUG: performing repartitioned INSERT ... SELECT --- should fail SELECT COUNT(*) FROM reference_table LEFT JOIN partitioned_distributed_table ON true; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "partitioned_distributed_table" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "partitioned_distributed_table" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT NULL::integer AS "dummy-1" FROM mixed_relkind_tests.partitioned_distributed_table WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (mixed_relkind_tests.reference_table LEFT JOIN (SELECT NULL::integer AS a, NULL::integer AS b FROM (SELECT intermediate_result."dummy-1" FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result("dummy-1" integer)) partitioned_distributed_table_1) partitioned_distributed_table ON (true)) + count +--------------------------------------------------------------------- + 468 +(1 row) + -- non-colocated subquery should work SELECT COUNT(*) FROM (SELECT *, random() FROM partitioned_distributed_table) AS foo, diff --git a/src/test/regress/expected/multi_dropped_column_aliases.out b/src/test/regress/expected/multi_dropped_column_aliases.out index c3f250d62..99b82bf40 100644 --- a/src/test/regress/expected/multi_dropped_column_aliases.out +++ b/src/test/regress/expected/multi_dropped_column_aliases.out @@ -51,12 +51,24 @@ SELECT count(*) FROM customer, orders WHERE c_custkey = o_custkey; 1956 (1 row) --- Test joinExpr aliases by performing an outer-join. This code path is --- currently not exercised, but we are adding this test to catch this bug when --- we start supporting outer joins. +-- Test joinExpr aliases by performing an outer-join. SELECT c_custkey FROM (customer LEFT OUTER JOIN orders ON (c_custkey = o_custkey)) AS test(c_custkey, c_nationkey) INNER JOIN lineitem ON (test.c_custkey = l_orderkey) +ORDER BY 1 LIMIT 10; -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns + c_custkey +--------------------------------------------------------------------- + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 +(10 rows) + diff --git a/src/test/regress/expected/multi_outer_join_reference.out b/src/test/regress/expected/multi_outer_join_reference.out index d75f67089..1e705d14d 100644 --- a/src/test/regress/expected/multi_outer_join_reference.out +++ b/src/test/regress/expected/multi_outer_join_reference.out @@ -208,13 +208,17 @@ FROM 20 | 0 (1 row) --- Right join should be disallowed in this case +-- Right join is allowed as we recursively plan the distributed table (multi_outer_join_left_hash) SELECT min(r_custkey), max(r_custkey) FROM multi_outer_join_left_hash a RIGHT JOIN multi_outer_join_right_reference b ON (l_custkey = r_custkey); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join +LOG: join order: [ "multi_outer_join_left_hash" ] + min | max +--------------------------------------------------------------------- + 1 | 15 +(1 row) + -- Reverse right join should be same as left join SELECT min(l_custkey), max(l_custkey) @@ -319,13 +323,17 @@ FROM 25 | 1 (1 row) --- Right join should not be allowed in this case +-- Right join should be allowed in this case as we recursively plan the distributed table (multi_outer_join_left_hash SELECT min(r_custkey), max(r_custkey) FROM multi_outer_join_left_hash a RIGHT JOIN multi_outer_join_right_reference b ON (l_custkey = r_custkey); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join +LOG: join order: [ "multi_outer_join_left_hash" ] + min | max +--------------------------------------------------------------------- + 11 | 30 +(1 row) + -- Reverse right join should be same as left join SELECT min(l_custkey), max(l_custkey) @@ -600,14 +608,48 @@ ORDER BY 1,2 DESC; | 16 (15 rows) --- full outer join should error out for mismatched shards +-- full outer join should work as we recursively plan the distributed table (multi_outer_join_left_hash SELECT l_custkey, t_custkey FROM multi_outer_join_left_hash l1 - FULL JOIN multi_outer_join_third_reference t1 ON (l1.l_custkey = t1.t_custkey); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + FULL JOIN multi_outer_join_third_reference t1 ON (l1.l_custkey = t1.t_custkey) +ORDER BY 1,2; +LOG: join order: [ "multi_outer_join_left_hash" ] + l_custkey | t_custkey +--------------------------------------------------------------------- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 10 | 10 + 11 | 11 + 12 | 12 + 13 | 13 + 14 | 14 + 15 | 15 + 21 | 21 + 22 | 22 + 23 | 23 + 24 | 24 + 25 | 25 + 26 | 26 + 27 | 27 + 28 | 28 + 29 | 29 + 30 | 30 + | 16 + | 17 + | 18 + | 19 + | 20 +(30 rows) + -- inner join + single shard left join should work SELECT l_custkey, r_custkey, t_custkey diff --git a/src/test/regress/expected/multi_shard_update_delete.out b/src/test/regress/expected/multi_shard_update_delete.out index 1d494ecea..916db808e 100644 --- a/src/test/regress/expected/multi_shard_update_delete.out +++ b/src/test/regress/expected/multi_shard_update_delete.out @@ -664,15 +664,28 @@ WHERE user_id IN INTERSECT SELECT user_id FROM events_test_table); --- Reference tables can not locate on the outer part of the outer join +-- Reference tables can locate on the outer part of the outer join +-- Note that we don't need to sort the output because +-- citus.sort_returning is enabled by default during +-- regression tests. UPDATE users_test_table SET value_1 = 4 -WHERE user_id IN - (SELECT DISTINCT e2.user_id - FROM users_reference_copy_table - LEFT JOIN users_test_table e2 ON (e2.user_id = users_reference_copy_table.value_1)) RETURNING *; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join +WHERE user_id IN ( + SELECT DISTINCT e2.user_id + FROM users_reference_copy_table + LEFT JOIN users_test_table e2 ON (e2.user_id = users_reference_copy_table.value_1) +) +RETURNING *; + user_id | value_1 | value_2 | value_3 +--------------------------------------------------------------------- + 5 | 4 | 1 | 0 + 5 | 4 | 1 | 0 + 5 | 4 | 1 | 0 + 6 | 4 | 11 | 0 + 6 | 4 | 15 | 0 + 16 | 4 | | 0 +(6 rows) + -- Volatile functions are also not supported UPDATE users_test_table SET value_2 = 5 diff --git a/src/test/regress/expected/multi_subquery_complex_reference_clause.out b/src/test/regress/expected/multi_subquery_complex_reference_clause.out index 65ff161ca..22a64ccb5 100644 --- a/src/test/regress/expected/multi_subquery_complex_reference_clause.out +++ b/src/test/regress/expected/multi_subquery_complex_reference_clause.out @@ -76,12 +76,20 @@ ORDER BY 1; 7 (10 rows) --- Shouldn't work, reference table at the outer side is not allowed +-- Should work, reference table at the outer side is allowed SELECT * FROM - (SELECT random() FROM users_ref_test_table LEFT JOIN user_buy_test_table + (SELECT random() > 2 FROM users_ref_test_table LEFT JOIN user_buy_test_table ON users_ref_test_table.id = user_buy_test_table.user_id) subquery_1; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + ?column? +--------------------------------------------------------------------- + f + f + f + f + f + f +(6 rows) + -- Should work, reference table at the inner side is allowed SELECT count(*) FROM (SELECT random() FROM users_ref_test_table RIGHT JOIN user_buy_test_table @@ -91,12 +99,20 @@ SELECT count(*) FROM 4 (1 row) --- Shouldn't work, reference table at the outer side is not allowed +-- Should work, reference table at the outer side is allowed SELECT * FROM - (SELECT random() FROM user_buy_test_table RIGHT JOIN users_ref_test_table + (SELECT random() > 2 FROM user_buy_test_table RIGHT JOIN users_ref_test_table ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + ?column? +--------------------------------------------------------------------- + f + f + f + f + f + f +(6 rows) + -- Equi join test with reference table on non-partition keys SELECT count(*) FROM (SELECT random() FROM user_buy_test_table JOIN users_ref_test_table @@ -270,16 +286,22 @@ ON user_buy_test_table.item_id = users_ref_test_table.id; 4 (1 row) --- table function cannot be the outer relationship in an outer join +-- table function can be the outer relationship in an outer join SELECT count(*) FROM (SELECT random() FROM user_buy_test_table RIGHT JOIN generate_series(1,10) AS users_ref_test_table(id) ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1; -ERROR: cannot pushdown the subquery -DETAIL: There exist a table function in the outer part of the outer join + count +--------------------------------------------------------------------- + 16 +(1 row) + SELECT count(*) FROM user_buy_test_table RIGHT JOIN (SELECT * FROM generate_series(1,10) id) users_ref_test_table ON user_buy_test_table.item_id = users_ref_test_table.id; -ERROR: cannot pushdown the subquery -DETAIL: There exist a table function in the outer part of the outer join + count +--------------------------------------------------------------------- + 10 +(1 row) + -- volatile functions can be used as table expressions through recursive planning SET client_min_messages TO DEBUG; SELECT count(*) FROM @@ -351,11 +373,14 @@ ON user_buy_test_table.item_id = users_ref_test_table.id; 4 (1 row) --- subquery without FROM cannot be the outer relationship in an outer join +-- subquery without FROM can be the outer relationship in an outer join SELECT count(*) FROM user_buy_test_table RIGHT JOIN (SELECT 5 AS id) users_ref_test_table ON user_buy_test_table.item_id = users_ref_test_table.id; -ERROR: cannot pushdown the subquery -DETAIL: There exist a subquery without FROM in the outer part of the outer join + count +--------------------------------------------------------------------- + 1 +(1 row) + -- can perform a union with subquery without FROM -- with pulling data to coordinator SET client_min_messages TO DEBUG; @@ -498,8 +523,8 @@ DEBUG: Router planner cannot handle multi-shard select queries 1 (4 rows) --- query can be pushed down when a reference table inside union query is --- joined with a distributed table. reference table cannot be at +-- query is supported when a reference table inside union query is +-- joined with a distributed table. reference table can be at -- the outer part. SELECT * FROM (SELECT user_id FROM users_ref_test_table ref LEFT JOIN user_buy_test_table dis @@ -508,8 +533,26 @@ SELECT * FROM SELECT user_id FROM user_buy_test_table) sub ORDER BY 1 DESC; DEBUG: Router planner cannot handle multi-shard select queries -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "user_buy_test_table" "dis" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "user_buy_test_table" "dis" to a subquery +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM public.user_buy_test_table dis WHERE true +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_2 for subquery SELECT user_id FROM public.user_buy_test_table +DEBUG: Creating router plan +DEBUG: generating subplan XXX_3 for subquery SELECT dis.user_id FROM (public.users_ref_test_table ref LEFT JOIN (SELECT dis_1.user_id, NULL::integer AS item_id, NULL::integer AS buy_count FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) dis_1) dis ON ((ref.id OPERATOR(pg_catalog.=) dis.user_id))) UNION SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) sub ORDER BY user_id DESC +DEBUG: Creating router plan + user_id +--------------------------------------------------------------------- + + 7 + 3 + 2 + 1 +(5 rows) + RESET client_min_messages; -- should be able to pushdown since reference table is in the -- inner part of the left join @@ -533,7 +576,7 @@ FROM 6 | 210 (6 rows) --- should not be able to pushdown since reference table is in the +-- supported even if the reference table is in the -- direct outer part of the left join SELECT user_id, sum(value_1) @@ -545,18 +588,29 @@ FROM LEFT JOIN events_table ON (events_table.user_id = users_table.user_id) ) as foo GROUP BY user_id ORDER BY 2 DESC LIMIT 10; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join --- should not be able to pushdown since reference table is in the + user_id | sum +--------------------------------------------------------------------- + | + 2 | 31248 + 3 | 15120 + 4 | 14994 + 5 | 8694 + 1 | 7590 +(6 rows) + +-- supported even if the reference table is in the -- direct outer part of the left join wrapped into a subquery SELECT - * + COUNT(*) = 1581 FROM (SELECT *, random() FROM events_reference_table) as ref_all LEFT JOIN users_table ON (users_table.user_id = ref_all.value_2); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join --- should not be able to pushdown since reference table is in the + ?column? +--------------------------------------------------------------------- + t +(1 row) + +-- supported even if the reference table is in the -- outer part of the left join SELECT user_id, sum(value_1) @@ -568,8 +622,16 @@ FROM LEFT JOIN events_table ON (events_table.user_id = users_table.user_id) ) as foo GROUP BY user_id ORDER BY 2 DESC LIMIT 10; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + user_id | sum +--------------------------------------------------------------------- + | + 2 | 31248 + 3 | 15120 + 4 | 14994 + 5 | 8694 + 1 | 7590 +(6 rows) + -- should be able to pushdown since reference table is in the -- inner part of the left join SELECT * FROM @@ -1466,13 +1528,16 @@ ORDER BY types; 3 | 120 (4 rows) --- just a sanity check that we don't allow this if the reference table is on the +-- just a sanity check that we allow this even if the reference table is on the -- left part of the left join SELECT count(*) FROM (SELECT random() FROM users_ref_test_table LEFT JOIN user_buy_test_table ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 12 +(1 row) + -- we do allow non equi join among subqueries via recursive planning SET client_min_messages TO DEBUG1; SELECT count(*) FROM @@ -1961,73 +2026,124 @@ LIMIT 5; 6 (1 row) --- outer part of the LEFT JOIN consists only reference tables, so we cannot push down +-- supported even if the outer part of the LEFT JOIN consists only reference tables -- we have different combinations for ON condition, true/false/two column join/single column filter SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON true; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 24 +(1 row) + SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON true; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 24 +(1 row) + SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON false; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 6 +(1 row) + SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON false; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 6 +(1 row) + SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref1.id > 5); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 9 +(1 row) + SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (user_buy_test_table.user_id > 5); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 6 +(1 row) + SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref1.id = user_buy_test_table.user_id); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 6 +(1 row) + SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref2.id = user_buy_test_table.user_id); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 6 +(1 row) + SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref1.id = user_buy_test_table.user_id); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 6 +(1 row) + SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref2.id = user_buy_test_table.user_id); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join --- outer part of the LEFT JOIN consists only reference tables within a subquery, so we cannot push down + count +--------------------------------------------------------------------- + 6 +(1 row) + +-- outer part of the LEFT JOIN consists only reference tables within a subquery -- we have different combinations for ON condition, true/false/two column join/single column filter SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON true; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 24 +(1 row) + SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON true; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 24 +(1 row) + SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON false; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 6 +(1 row) + SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON false; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 6 +(1 row) + SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON (foo.id > 5); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 9 +(1 row) + SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON (user_buy_test_table.user_id > 19); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 6 +(1 row) + SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON (foo.id = user_buy_test_table.user_id); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 6 +(1 row) + -- one example where unsupported outer join is deep inside a subquery SELECT *, random() FROM ( SELECT *,random() FROM user_buy_test_table WHERE user_id > ( SELECT count(*) FROM (SELECT *,random() FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as bar) as foo LEFT JOIN (SELECT *, random() FROM (SELECT *,random() FROM user_buy_test_table d1 JOIN user_buy_test_table d2 USING (user_id)) as bar_inner ) as bar ON true)) as boo; ERROR: cannot pushdown the subquery DETAIL: There exist a reference table in the outer part of the outer join --- In theory, we should be able to pushdown this query --- however, as the LEFT JOIN condition is between a reference table and the distributed table --- Postgres generates a LEFT JOIN alternative among those tables SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN user_buy_test_table u1 on ref1.id = u1.user_id) as foo LEFT JOIN user_buy_test_table ON (foo.id = user_buy_test_table.user_id); -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table + count +--------------------------------------------------------------------- + 3 +(1 row) + -- same as the above query, but this time LEFT JOIN condition is between distributed tables -- so Postgres doesn't generate join restriction between reference and distributed tables SELECT count(*) FROM (SELECT u1.*, random() FROM users_ref_test_table ref1 INNER JOIN user_buy_test_table u1 on ref1.id = u1.user_id) as foo LEFT JOIN user_buy_test_table ON (foo.user_id = user_buy_test_table.user_id); @@ -2036,10 +2152,13 @@ SELECT count(*) FROM (SELECT u1.*, random() FROM users_ref_test_table ref1 INNER 3 (1 row) --- outer part of the LEFT JOIN consists only intermediate result due to LIMIT, so we cannot push down +-- outer part of the LEFT JOIN consists only intermediate result due to LIMIT but this is still supported SELECT count(*) FROM (SELECT ref1.* FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LIMIT 5) as foo LEFT JOIN user_buy_test_table ON true; -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table + count +--------------------------------------------------------------------- + 20 +(1 row) + -- should be fine as OUTER part is the distributed table SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id RIGHT JOIN user_buy_test_table ON true; count @@ -2062,123 +2181,212 @@ SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table r -- left outer part of the FULL JOIN consists only reference tables, so we cannot push down -- we have different combinations for ON condition, true/false/two column join/single column filter SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id FULL JOIN user_buy_test_table ON true; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 24 +(1 row) + SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id FULL JOIN user_buy_test_table ON true; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 24 +(1 row) + SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id FULL JOIN user_buy_test_table ON true; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 24 +(1 row) + SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id FULL JOIN user_buy_test_table ON false; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 10 +(1 row) + SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id FULL JOIN user_buy_test_table ON false; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 10 +(1 row) + SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id FULL JOIN user_buy_test_table ON (ref1.id > 5); ERROR: FULL JOIN is only supported with merge-joinable or hash-joinable join conditions SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id FULL JOIN user_buy_test_table ON (user_buy_test_table.user_id > 5); ERROR: FULL JOIN is only supported with merge-joinable or hash-joinable join conditions SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id FULL JOIN user_buy_test_table ON (ref1.id = user_buy_test_table.user_id); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 7 +(1 row) + SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id FULL JOIN user_buy_test_table ON (ref2.id = user_buy_test_table.user_id); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 7 +(1 row) + SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id FULL JOIN user_buy_test_table ON (ref1.id = user_buy_test_table.user_id); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 7 +(1 row) + SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id FULL JOIN user_buy_test_table ON (ref2.id = user_buy_test_table.user_id); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 7 +(1 row) + -- right outer part of the FULL JOIN consists only reference tables, so we cannot push down -- we have different combinations for ON condition, true/false/two column join/single column filter SELECT count(*) FROM user_buy_test_table FULL JOIN users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id ON true; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 24 +(1 row) + SELECT count(*) FROM user_buy_test_table FULL JOIN users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id ON true; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 24 +(1 row) + SELECT count(*) FROM user_buy_test_table FULL JOIN users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id ON true; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 24 +(1 row) + SELECT count(*) FROM user_buy_test_table FULL JOIN users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id ON false; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 10 +(1 row) + SELECT count(*) FROM user_buy_test_table FULL JOIN users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id ON false; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 10 +(1 row) + SELECT count(*) FROM user_buy_test_table FULL JOIN users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id ON (ref1.id > 5); ERROR: FULL JOIN is only supported with merge-joinable or hash-joinable join conditions SELECT count(*) FROM user_buy_test_table FULL JOIN users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id ON (user_buy_test_table.user_id > 5); ERROR: FULL JOIN is only supported with merge-joinable or hash-joinable join conditions SELECT count(*) FROM user_buy_test_table FULL JOIN users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id ON (ref1.id = user_buy_test_table.user_id); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 7 +(1 row) + SELECT count(*) FROM user_buy_test_table FULL JOIN users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id ON (ref2.id = user_buy_test_table.user_id); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 7 +(1 row) + SELECT count(*) FROM user_buy_test_table FULL JOIN users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id ON (ref1.id = user_buy_test_table.user_id); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 7 +(1 row) + SELECT count(*) FROM user_buy_test_table FULL JOIN users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id ON (ref2.id = user_buy_test_table.user_id); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 7 +(1 row) + -- left outer part of the FULL JOIN consists only reference tables within a subquery, so we cannot push down -- we have different combinations for ON condition, true/false/two column join/single column filter SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo FULL JOIN user_buy_test_table ON true; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 24 +(1 row) + SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo FULL JOIN user_buy_test_table ON true; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 24 +(1 row) + SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo FULL JOIN user_buy_test_table ON false; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 10 +(1 row) + SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo FULL JOIN user_buy_test_table ON false; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 10 +(1 row) + SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo FULL JOIN user_buy_test_table ON (foo.id > 5); ERROR: FULL JOIN is only supported with merge-joinable or hash-joinable join conditions SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo FULL JOIN user_buy_test_table ON (user_buy_test_table.user_id > 19); ERROR: FULL JOIN is only supported with merge-joinable or hash-joinable join conditions SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo FULL JOIN user_buy_test_table ON (foo.id = user_buy_test_table.user_id); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 7 +(1 row) + -- right outer part of the FULL JOIN consists only reference tables within a subquery, so we cannot push down -- we have different combinations for ON condition, true/false/two column join/single column filter SELECT count(*) FROM user_buy_test_table FULL JOIN (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo ON true; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 24 +(1 row) + SELECT count(*) FROM user_buy_test_table FULL JOIN (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo ON true; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 24 +(1 row) + SELECT count(*) FROM user_buy_test_table FULL JOIN (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo ON false; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 10 +(1 row) + SELECT count(*) FROM user_buy_test_table FULL JOIN (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo ON false; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 10 +(1 row) + SELECT count(*) FROM user_buy_test_table FULL JOIN (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo ON (foo.id > 5); ERROR: FULL JOIN is only supported with merge-joinable or hash-joinable join conditions SELECT count(*) FROM user_buy_test_table FULL JOIN (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo ON (user_buy_test_table.user_id > 19); ERROR: FULL JOIN is only supported with merge-joinable or hash-joinable join conditions SELECT count(*) FROM user_buy_test_table FULL JOIN (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo ON (foo.id = user_buy_test_table.user_id); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 7 +(1 row) + -- one example where unsupported outer join is deep inside a subquery SELECT *, random() FROM ( SELECT *,random() FROM user_buy_test_table WHERE user_id > ( SELECT count(*) FROM (SELECT *,random() FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as bar) as foo FULL JOIN (SELECT *, random() FROM (SELECT *,random() FROM user_buy_test_table d1 JOIN user_buy_test_table d2 USING (user_id)) as bar_inner ) as bar ON true)) as boo; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + user_id | item_id | buy_count | random | random +--------------------------------------------------------------------- +(0 rows) + -- In theory, we should be able to pushdown this query -- however, as the FULL JOIN condition is between a reference table and the distributed table -- Postgres generates a FULL JOIN alternative among those tables SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN user_buy_test_table u1 on ref1.id = u1.user_id) as foo FULL JOIN user_buy_test_table ON (foo.id = user_buy_test_table.user_id); -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table + count +--------------------------------------------------------------------- + 4 +(1 row) + -- same as the above query, but this time FULL JOIN condition is between distributed tables -- so Postgres doesn't generate join restriction between reference and distributed tables SELECT count(*) FROM (SELECT u1.*, random() FROM users_ref_test_table ref1 INNER JOIN user_buy_test_table u1 on ref1.id = u1.user_id) as foo FULL JOIN user_buy_test_table ON (foo.user_id = user_buy_test_table.user_id); @@ -2189,8 +2397,11 @@ SELECT count(*) FROM (SELECT u1.*, random() FROM users_ref_test_table ref1 INNER -- left outer part of the FULL JOIN consists only intermediate result due to LIMIT, so we cannot push down SELECT count(*) FROM (SELECT ref1.* FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LIMIT 5) as foo FULL JOIN user_buy_test_table ON true; -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table + count +--------------------------------------------------------------------- + 20 +(1 row) + DROP TABLE user_buy_test_table; DROP TABLE users_ref_test_table; DROP TABLE users_return_test_table; diff --git a/src/test/regress/expected/multi_subquery_in_where_reference_clause.out b/src/test/regress/expected/multi_subquery_in_where_reference_clause.out index 74fd8a624..d4ada83d3 100644 --- a/src/test/regress/expected/multi_subquery_in_where_reference_clause.out +++ b/src/test/regress/expected/multi_subquery_in_where_reference_clause.out @@ -162,8 +162,7 @@ WHERE ) ORDER BY user_id LIMIT 3; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join +ERROR: correlated subqueries are not supported when the FROM clause contains a CTE or subquery -- subqueries in WHERE with IN operator without equality SELECT users_table.user_id, count(*) diff --git a/src/test/regress/expected/multi_view.out b/src/test/regress/expected/multi_view.out index fafa242df..11f78ea34 100644 --- a/src/test/regress/expected/multi_view.out +++ b/src/test/regress/expected/multi_view.out @@ -486,17 +486,23 @@ ORDER BY 2 DESC, 1; 5 | NO (3 rows) --- event vs table non-partition-key join is not supported --- given that we cannot recursively plan tables yet -SELECT * FROM +-- event vs table non-partition-key join is supported +-- given that we can recursively plan events_table +SELECT count(*), user_id, done_event FROM (SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event FROM recent_users ru LEFT JOIN events_table et ON(ru.user_id = et.event_type) ) s1 -ORDER BY 2 DESC, 1; -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table +GROUP BY user_id, done_event +ORDER BY 1,2,3; + count | user_id | done_event +--------------------------------------------------------------------- + 7 | 5 | YES + 23 | 3 | YES + 24 | 1 | YES +(3 rows) + -- create a select only view CREATE VIEW selected_users AS SELECT * FROM users_table WHERE value_1 >= 1 and value_1 <3; CREATE VIEW recent_selected_users AS SELECT su.* FROM selected_users su JOIN recent_users ru USING(user_id); diff --git a/src/test/regress/expected/non_colocated_subquery_joins.out b/src/test/regress/expected/non_colocated_subquery_joins.out index 1b7b1b1e7..a4d66aba2 100644 --- a/src/test/regress/expected/non_colocated_subquery_joins.out +++ b/src/test/regress/expected/non_colocated_subquery_joins.out @@ -987,7 +987,7 @@ DEBUG: skipping recursive planning for the subquery since it contains reference ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- similar to the above, make sure that we skip recursive planning when -- the subquery contains only intermediate results -SELECT * +SELECT COUNT(*) = 176 FROM ( SELECT * FROM( @@ -1026,9 +1026,18 @@ DEBUG: Router planner cannot handle multi-shard select queries DEBUG: generating subplan XXX_5 for subquery SELECT user_id, "time", event_type, value_2, value_3, value_4 FROM public.events_table WHERE (value_2 OPERATOR(pg_catalog.>) 2) DEBUG: Creating router plan DEBUG: generating subplan XXX_6 for subquery SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.event_type, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, event_type integer, value_2 integer, value_3 double precision, value_4 bigint) INTERSECT SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.event_type, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_5'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, event_type integer, value_2 integer, value_3 double precision, value_4 bigint) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT users_table_limited.user_id, users_table_limited."time", users_table_limited.value_1, users_table_limited.value_2, users_table_limited.value_3, users_table_limited.value_4, foo.user_id, foo."time", foo.event_type, foo.value_2, foo.value_3, foo.value_4, foo.user_id_1 AS user_id, foo.time_1 AS "time", foo.value_1, foo.value_2_1 AS value_2, foo.value_3_1 AS value_3, foo.value_4_1 AS value_4 FROM ((SELECT users_table_union.user_id, users_table_union."time", users_table_union.value_1, users_table_union.value_2, users_table_union.value_3, users_table_union.value_4 FROM (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table_union) users_table_limited JOIN LATERAL (SELECT bar.user_id, bar."time", bar.event_type, bar.value_2, bar.value_3, bar.value_4, u2.user_id, u2."time", u2.value_1, u2.value_2, u2.value_3, u2.value_4 FROM ((SELECT events_table.user_id, events_table."time", events_table.event_type, events_table.value_2, events_table.value_3, events_table.value_4 FROM (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.event_type, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_6'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, event_type integer, value_2 integer, value_3 double precision, value_4 bigint)) events_table WHERE (events_table.user_id OPERATOR(pg_catalog.=) users_table_limited.user_id)) bar LEFT JOIN public.users_table u2 ON ((u2.user_id OPERATOR(pg_catalog.=) bar.value_2)))) foo(user_id, "time", event_type, value_2, value_3, value_4, user_id_1, time_1, value_1, value_2_1, value_3_1, value_4_1) ON (true)) +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "users_table" "u2" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "users_table" "u2" to a subquery DEBUG: Router planner cannot handle multi-shard select queries -ERROR: cannot pushdown the subquery +DEBUG: generating subplan XXX_7 for subquery SELECT user_id FROM public.users_table u2 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT (count(*) OPERATOR(pg_catalog.=) 176) FROM ((SELECT users_table_union.user_id, users_table_union."time", users_table_union.value_1, users_table_union.value_2, users_table_union.value_3, users_table_union.value_4 FROM (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table_union) users_table_limited JOIN LATERAL (SELECT bar.user_id, bar."time", bar.event_type, bar.value_2, bar.value_3, bar.value_4, u2.user_id, u2."time", u2.value_1, u2.value_2, u2.value_3, u2.value_4 FROM ((SELECT events_table.user_id, events_table."time", events_table.event_type, events_table.value_2, events_table.value_3, events_table.value_4 FROM (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.event_type, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_6'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, event_type integer, value_2 integer, value_3 double precision, value_4 bigint)) events_table WHERE (events_table.user_id OPERATOR(pg_catalog.=) users_table_limited.user_id)) bar LEFT JOIN (SELECT u2_1.user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_7'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) u2_1) u2 ON ((u2.user_id OPERATOR(pg_catalog.=) bar.value_2)))) foo(user_id, "time", event_type, value_2, value_3, value_4, user_id_1, time_1, value_1, value_2_1, value_3_1, value_4_1) ON (true)) +DEBUG: Creating router plan + ?column? +--------------------------------------------------------------------- + t +(1 row) + -- similar to the above, but this time there are multiple -- non-colocated subquery joins one of them contains lateral -- join @@ -1071,8 +1080,18 @@ DEBUG: Router planner cannot handle multi-shard select queries DEBUG: generating subplan XXX_5 for subquery SELECT user_id, "time", event_type, value_2, value_3, value_4 FROM public.events_table WHERE (value_2 OPERATOR(pg_catalog.>) 2) DEBUG: Creating router plan DEBUG: generating subplan XXX_6 for subquery SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.event_type, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, event_type integer, value_2 integer, value_3 double precision, value_4 bigint) INTERSECT SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.event_type, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_5'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, event_type integer, value_2 integer, value_3 double precision, value_4 bigint) +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "users_table" "u2" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "users_table" "u2" to a subquery DEBUG: Router planner cannot handle multi-shard select queries -ERROR: cannot pushdown the subquery +DEBUG: generating subplan XXX_7 for subquery SELECT user_id FROM public.users_table u2 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM public.events_table WHERE (NOT (user_id OPERATOR(pg_catalog.=) ANY (SELECT users_table_limited.user_id FROM ((SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table_limited JOIN LATERAL (SELECT bar.user_id, bar."time", bar.event_type, bar.value_2, bar.value_3, bar.value_4, u2.user_id, u2."time", u2.value_1, u2.value_2, u2.value_3, u2.value_4 FROM ((SELECT events_table_1.user_id, events_table_1."time", events_table_1.event_type, events_table_1.value_2, events_table_1.value_3, events_table_1.value_4 FROM (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.event_type, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_6'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, event_type integer, value_2 integer, value_3 double precision, value_4 bigint)) events_table_1 WHERE (events_table_1.user_id OPERATOR(pg_catalog.=) users_table_limited.user_id)) bar LEFT JOIN (SELECT u2_1.user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_7'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) u2_1) u2 ON ((u2.user_id OPERATOR(pg_catalog.=) bar.value_2)))) foo(user_id, "time", event_type, value_2, value_3, value_4, user_id_1, time_1, value_1, value_2_1, value_3_1, value_4_1) ON (true))))) +DEBUG: Router planner cannot handle multi-shard select queries + count +--------------------------------------------------------------------- + 62 +(1 row) + -- make sure that non-colocated subquery joins work fine in -- modifications CREATE TABLE table1 (id int, tenant_id int); diff --git a/src/test/regress/expected/set_operation_and_local_tables.out b/src/test/regress/expected/set_operation_and_local_tables.out index 40c7d618c..92cde1148 100644 --- a/src/test/regress/expected/set_operation_and_local_tables.out +++ b/src/test/regress/expected/set_operation_and_local_tables.out @@ -250,10 +250,17 @@ DEBUG: push down of limit count: 1 DEBUG: generating subplan XXX_2 for subquery SELECT x, y FROM recursive_set_local.test ORDER BY x LIMIT 1 DEBUG: Creating router plan DEBUG: generating subplan XXX_3 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) INTERSECT SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT u.x, u.y, test.y FROM ((SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u LEFT JOIN recursive_set_local.test USING (x)) ORDER BY u.x, u.y +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "test" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "test" to a subquery DEBUG: Router planner cannot handle multi-shard select queries -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table +DEBUG: generating subplan XXX_4 for subquery SELECT x, y FROM recursive_set_local.test WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT u.x, u.y, test.y FROM ((SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u LEFT JOIN (SELECT test_1.x, test_1.y FROM (SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) test_1) test USING (x)) ORDER BY u.x, u.y +DEBUG: Creating router plan + x | y | y +--------------------------------------------------------------------- +(0 rows) + -- we replace some queries including the local query, the intermediate result is on the inner part of an outer join SELECT * FROM ((SELECT * FROM local_test) INTERSECT (SELECT * FROM test ORDER BY x LIMIT 1)) u RIGHT JOIN test USING (x) ORDER BY 1,2; DEBUG: Local tables cannot be used in distributed queries. diff --git a/src/test/regress/expected/set_operations.out b/src/test/regress/expected/set_operations.out index 250c8ae7c..9c9d22fb9 100644 --- a/src/test/regress/expected/set_operations.out +++ b/src/test/regress/expected/set_operations.out @@ -505,10 +505,20 @@ DEBUG: Router planner cannot handle multi-shard select queries DEBUG: generating subplan XXX_2 for subquery SELECT x, y FROM recursive_union.test DEBUG: Creating router plan DEBUG: generating subplan XXX_3 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) UNION ALL SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT u.x, u.y, test.y FROM ((SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u LEFT JOIN recursive_union.test USING (x)) ORDER BY u.x, u.y +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "test" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "test" to a subquery DEBUG: Router planner cannot handle multi-shard select queries -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table +DEBUG: generating subplan XXX_4 for subquery SELECT x, y FROM recursive_union.test WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT u.x, u.y, test.y FROM ((SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u LEFT JOIN (SELECT test_1.x, test_1.y FROM (SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) test_1) test USING (x)) ORDER BY u.x, u.y +DEBUG: Creating router plan + x | y | y +--------------------------------------------------------------------- + 1 | 1 | 1 + 1 | 1 | 1 + 2 | 2 | 2 +(3 rows) + -- unions in a join without partition column equality (column names from first query are used for join) SELECT * FROM ((SELECT x, y FROM test) UNION (SELECT y, x FROM test)) u JOIN test USING (x) ORDER BY 1,2; DEBUG: Router planner cannot handle multi-shard select queries @@ -686,10 +696,18 @@ DEBUG: Router planner cannot handle multi-shard select queries DEBUG: generating subplan XXX_2 for subquery SELECT x, y FROM recursive_union.test DEBUG: Creating router plan DEBUG: generating subplan XXX_3 for subquery SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) INTERSECT SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT u.x, u.y, test.y FROM ((SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u LEFT JOIN recursive_union.test USING (x)) ORDER BY u.x, u.y +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "test" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "test" to a subquery DEBUG: Router planner cannot handle multi-shard select queries -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table +DEBUG: generating subplan XXX_4 for subquery SELECT x, y FROM recursive_union.test WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT u.x, u.y, test.y FROM ((SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) u LEFT JOIN (SELECT test_1.x, test_1.y FROM (SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) test_1) test USING (x)) ORDER BY u.x, u.y +DEBUG: Creating router plan + x | y | y +--------------------------------------------------------------------- + 1 | 1 | 1 +(1 row) + -- distributed table in WHERE clause is recursively planned SELECT * FROM ((SELECT * FROM test) UNION (SELECT * FROM ref WHERE a IN (SELECT x FROM test))) u ORDER BY 1,2; DEBUG: Router planner cannot handle multi-shard select queries diff --git a/src/test/regress/expected/sqlancer_failures.out b/src/test/regress/expected/sqlancer_failures.out index 234eadd87..207e71d56 100644 --- a/src/test/regress/expected/sqlancer_failures.out +++ b/src/test/regress/expected/sqlancer_failures.out @@ -149,8 +149,11 @@ SELECT create_reference_table('t10'); SELECT count(*) FROM ( SELECT ALL t7.c1, t7.c0, t8.c1, t10.c1, t8.c0 FROM t7 CROSS JOIN t10 FULL OUTER JOIN t8 ON (((((((('[832125354,1134163512)'::int4range)*('(0,2106623281)'::int4range)))-('(-600267905,509840582]'::int4range)))*('(-365203965,1662828182)'::int4range)))&<((((((('(-1286467417,697584012]'::int4range)*('[-1691485781,1341103963)'::int4range)))*((('(-1768368435,1719707648)'::int4range)*('(139536997,1275813540]'::int4range)))))*((((('[-2103910157,-1961746758)'::int4range)*('[-834534078,533073939)'::int4range)))*((('[-1030552151,552856781]'::int4range)*('[-1109419376,1205173697]'::int4range)))))))) ) AS foo; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 0 +(1 row) + CREATE TABLE reference_table(id int, it_name varchar(25), k_no int); SELECT create_reference_table('reference_table'); create_reference_table diff --git a/src/test/regress/expected/values.out b/src/test/regress/expected/values.out index cc26f7432..ad5f8a911 100644 --- a/src/test/regress/expected/values.out +++ b/src/test/regress/expected/values.out @@ -248,7 +248,7 @@ ON (key = num); 101 (1 row) --- VALUES with unsupported OUTER join +-- VALUES with supported OUTER join (since test_values is recursively planned) SELECT count(*) FROM @@ -256,8 +256,16 @@ FROM RIGHT JOIN (SELECT a,b FROM (VALUES (1, 'one'), (2, 'two'), (3, 'three')) as t(a,b) ) as foo (num,letter) ON (key = num); -ERROR: cannot pushdown the subquery -DETAIL: There exist a VALUES clause in the outer part of the outer join +DEBUG: recursively planning left side of the right join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "test_values" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "test_values" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT key FROM values_subquery.test_values WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT test_values_1.key, NULL::text AS value, NULL::jsonb AS data FROM (SELECT intermediate_result.key FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer)) test_values_1) test_values RIGHT JOIN (SELECT t.a, t.b FROM (VALUES (1,'one'::text), (2,'two'::text), (3,'three'::text)) t(a, b)) foo(num, letter) ON ((test_values.key OPERATOR(pg_catalog.=) foo.num))) + count +--------------------------------------------------------------------- + 3 +(1 row) + -- values with router queries SELECT count(*) @@ -456,12 +464,20 @@ SELECT count(*) FROM 296 (1 row) --- VALUES cannot be the right relationship in a join +-- VALUES can be the right relationship in a join SELECT count(*) FROM (SELECT random() FROM test_values RIGHT JOIN (SELECT a, b FROM (VALUES (1, 'one'), (2, 'two'), (3, 'three')) as t(a,b)) as values_data(a,b) ON test_values.key > values_data.a) subquery_1; -ERROR: cannot pushdown the subquery -DETAIL: There exist a VALUES clause in the outer part of the outer join +DEBUG: recursively planning left side of the right join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "test_values" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "test_values" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT key FROM values_subquery.test_values WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT random() AS random FROM ((SELECT test_values_1.key, NULL::text AS value, NULL::jsonb AS data FROM (SELECT intermediate_result.key FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer)) test_values_1) test_values RIGHT JOIN (SELECT t.a, t.b FROM (VALUES (1,'one'::text), (2,'two'::text), (3,'three'::text)) t(a, b)) values_data(a, b) ON ((test_values.key OPERATOR(pg_catalog.>) values_data.a)))) subquery_1 + count +--------------------------------------------------------------------- + 294 +(1 row) + -- subquery IN WHERE clause need to be recursively planned -- but it is correlated so cannot be pushed down SELECT diff --git a/src/test/regress/expected/with_join.out b/src/test/regress/expected/with_join.out index dd985fbb1..23cafd2cc 100644 --- a/src/test/regress/expected/with_join.out +++ b/src/test/regress/expected/with_join.out @@ -151,7 +151,7 @@ LIMIT 2 (5 rows) --- cte LEFT JOIN distributed_table should error out +-- cte LEFT JOIN distributed_table should work -- as long as the CTE is recursively planned WITH cte AS MATERIALIZED ( SELECT * FROM users_table WHERE user_id = 1 ORDER BY value_1 @@ -166,8 +166,15 @@ ORDER BY 1,2,3 LIMIT 5; -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table + user_id | time | event_type +--------------------------------------------------------------------- + 1 | Wed Nov 22 22:51:43.132261 2017 | 0 + 1 | Wed Nov 22 22:51:43.132261 2017 | 0 + 1 | Wed Nov 22 22:51:43.132261 2017 | 1 + 1 | Wed Nov 22 22:51:43.132261 2017 | 1 + 1 | Wed Nov 22 22:51:43.132261 2017 | 2 +(5 rows) + -- cte RIGHT JOIN distributed_table should work WITH cte AS MATERIALIZED ( SELECT * FROM users_table WHERE user_id = 1 ORDER BY value_1 @@ -214,7 +221,7 @@ LIMIT 1 | Thu Nov 23 09:26:42.145043 2017 | 2 (5 rows) --- distributed_table RIGHT JOIN cte should error out +-- distributed_table RIGHT JOIN cte should work WITH cte AS MATERIALIZED ( SELECT * FROM users_table WHERE value_1 = 1 ORDER BY value_1 ) @@ -228,9 +235,16 @@ ORDER BY 1,2,3 LIMIT 5; -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table --- cte FULL JOIN distributed_table should error out + user_id | time | event_type +--------------------------------------------------------------------- + 1 | Thu Nov 23 09:26:42.145043 2017 | 0 + 1 | Thu Nov 23 09:26:42.145043 2017 | 0 + 1 | Thu Nov 23 09:26:42.145043 2017 | 1 + 1 | Thu Nov 23 09:26:42.145043 2017 | 1 + 1 | Thu Nov 23 09:26:42.145043 2017 | 2 +(5 rows) + +-- cte FULL JOIN distributed_table should work WITH cte AS MATERIALIZED ( SELECT * FROM users_table WHERE user_id = 1 ORDER BY value_1 ) @@ -244,8 +258,15 @@ ORDER BY 1,2,3 LIMIT 5; -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table + user_id | time | event_type +--------------------------------------------------------------------- + 1 | Wed Nov 22 22:51:43.132261 2017 | 0 + 1 | Wed Nov 22 22:51:43.132261 2017 | 0 + 1 | Wed Nov 22 22:51:43.132261 2017 | 1 + 1 | Wed Nov 22 22:51:43.132261 2017 | 1 + 1 | Wed Nov 22 22:51:43.132261 2017 | 2 +(5 rows) + -- Joins with reference tables are planned as router queries WITH cte AS MATERIALIZED ( SELECT value_2, max(user_id) AS user_id FROM users_table WHERE value_2 = 1 GROUP BY value_2 HAVING count(*) > 1 diff --git a/src/test/regress/sql/citus_local_tables_queries.sql b/src/test/regress/sql/citus_local_tables_queries.sql index 5f1d561b3..adae17118 100644 --- a/src/test/regress/sql/citus_local_tables_queries.sql +++ b/src/test/regress/sql/citus_local_tables_queries.sql @@ -210,7 +210,6 @@ SELECT count(*) FROM reference_table LEFT JOIN postgres_local_table ON (true) LEFT JOIN reference_table r2 ON (true); --- not supported direct outer join SELECT count(*) FROM citus_local_table LEFT JOIN distributed_table ON (true); -- distinct in subquery on CTE diff --git a/src/test/regress/sql/citus_local_tables_queries_mx.sql b/src/test/regress/sql/citus_local_tables_queries_mx.sql index cad6a0386..8bdb41a19 100644 --- a/src/test/regress/sql/citus_local_tables_queries_mx.sql +++ b/src/test/regress/sql/citus_local_tables_queries_mx.sql @@ -203,7 +203,7 @@ SELECT count(*) FROM reference_table LEFT JOIN postgres_local_table ON (true) LEFT JOIN reference_table r2 ON (true); --- not supported direct outer join +-- supported outer join SELECT count(*) FROM citus_local_table LEFT JOIN distributed_table ON (true); -- distinct in subquery on CTE diff --git a/src/test/regress/sql/cross_join.sql b/src/test/regress/sql/cross_join.sql index 3e776368b..0726a79e1 100644 --- a/src/test/regress/sql/cross_join.sql +++ b/src/test/regress/sql/cross_join.sql @@ -33,7 +33,7 @@ SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table r -- two reference tables CROSS JOINNed, and later JOINED with distributed tables -- but the reference table CROSS JOIN is in the outer side of the JOIN with the distributed table --- so we cannot pushdown +-- so this is supported by recursively planning the distributed table (users_table) SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id = users_table.user_id); SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 FULL JOIN users_table ON (ref1.id = users_table.user_id); SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id != users_table.user_id); diff --git a/src/test/regress/sql/mixed_relkind_tests.sql b/src/test/regress/sql/mixed_relkind_tests.sql index 8e258b7d1..6b7463cfd 100644 --- a/src/test/regress/sql/mixed_relkind_tests.sql +++ b/src/test/regress/sql/mixed_relkind_tests.sql @@ -103,10 +103,8 @@ INSERT INTO partitioned_distributed_table SELECT foo.* FROM partitioned_distribu INSERT INTO partitioned_distributed_table SELECT foo.* FROM distributed_table AS foo JOIN citus_local_table ON (true); INSERT INTO distributed_table SELECT foo.a FROM partitioned_distributed_table AS foo JOIN citus_local_table ON (true); --- should fail SELECT COUNT(*) FROM reference_table LEFT JOIN partitioned_distributed_table ON true; - -- non-colocated subquery should work SELECT COUNT(*) FROM (SELECT *, random() FROM partitioned_distributed_table) AS foo, diff --git a/src/test/regress/sql/multi_dropped_column_aliases.sql b/src/test/regress/sql/multi_dropped_column_aliases.sql index 7e6729351..204a895f1 100644 --- a/src/test/regress/sql/multi_dropped_column_aliases.sql +++ b/src/test/regress/sql/multi_dropped_column_aliases.sql @@ -22,12 +22,11 @@ SELECT * FROM customer LIMIT 2; -- Verify joins work with dropped columns. SELECT count(*) FROM customer, orders WHERE c_custkey = o_custkey; --- Test joinExpr aliases by performing an outer-join. This code path is --- currently not exercised, but we are adding this test to catch this bug when --- we start supporting outer joins. +-- Test joinExpr aliases by performing an outer-join. SELECT c_custkey FROM (customer LEFT OUTER JOIN orders ON (c_custkey = o_custkey)) AS test(c_custkey, c_nationkey) INNER JOIN lineitem ON (test.c_custkey = l_orderkey) +ORDER BY 1 LIMIT 10; diff --git a/src/test/regress/sql/multi_outer_join_reference.sql b/src/test/regress/sql/multi_outer_join_reference.sql index 6aa2658b7..9c824736b 100644 --- a/src/test/regress/sql/multi_outer_join_reference.sql +++ b/src/test/regress/sql/multi_outer_join_reference.sql @@ -159,7 +159,7 @@ FROM multi_outer_join_left_hash a LEFT JOIN multi_outer_join_right_reference b ON (l_custkey = r_custkey AND l_custkey = -1 /* nonexistant */); --- Right join should be disallowed in this case +-- Right join is allowed as we recursively plan the distributed table (multi_outer_join_left_hash) SELECT min(r_custkey), max(r_custkey) FROM @@ -259,7 +259,7 @@ FROM ON (l_custkey = r_custkey AND r_custkey = 21); --- Right join should not be allowed in this case +-- Right join should be allowed in this case as we recursively plan the distributed table (multi_outer_join_left_hash SELECT min(r_custkey), max(r_custkey) FROM @@ -377,12 +377,13 @@ WHERE l_custkey is NULL or r_custkey is NULL ORDER BY 1,2 DESC; --- full outer join should error out for mismatched shards +-- full outer join should work as we recursively plan the distributed table (multi_outer_join_left_hash SELECT l_custkey, t_custkey FROM multi_outer_join_left_hash l1 - FULL JOIN multi_outer_join_third_reference t1 ON (l1.l_custkey = t1.t_custkey); + FULL JOIN multi_outer_join_third_reference t1 ON (l1.l_custkey = t1.t_custkey) +ORDER BY 1,2; -- inner join + single shard left join should work SELECT diff --git a/src/test/regress/sql/multi_shard_update_delete.sql b/src/test/regress/sql/multi_shard_update_delete.sql index e0777ed27..a15ddc3d0 100644 --- a/src/test/regress/sql/multi_shard_update_delete.sql +++ b/src/test/regress/sql/multi_shard_update_delete.sql @@ -568,13 +568,18 @@ WHERE user_id IN SELECT user_id FROM events_test_table); --- Reference tables can not locate on the outer part of the outer join +-- Reference tables can locate on the outer part of the outer join +-- Note that we don't need to sort the output because +-- citus.sort_returning is enabled by default during +-- regression tests. UPDATE users_test_table SET value_1 = 4 -WHERE user_id IN - (SELECT DISTINCT e2.user_id - FROM users_reference_copy_table - LEFT JOIN users_test_table e2 ON (e2.user_id = users_reference_copy_table.value_1)) RETURNING *; +WHERE user_id IN ( + SELECT DISTINCT e2.user_id + FROM users_reference_copy_table + LEFT JOIN users_test_table e2 ON (e2.user_id = users_reference_copy_table.value_1) +) +RETURNING *; -- Volatile functions are also not supported UPDATE users_test_table diff --git a/src/test/regress/sql/multi_subquery_complex_reference_clause.sql b/src/test/regress/sql/multi_subquery_complex_reference_clause.sql index 56cc4455d..64ff78c48 100644 --- a/src/test/regress/sql/multi_subquery_complex_reference_clause.sql +++ b/src/test/regress/sql/multi_subquery_complex_reference_clause.sql @@ -42,9 +42,9 @@ SELECT subquery_1.user_id FROM ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_1 ORDER BY 1; --- Shouldn't work, reference table at the outer side is not allowed +-- Should work, reference table at the outer side is allowed SELECT * FROM - (SELECT random() FROM users_ref_test_table LEFT JOIN user_buy_test_table + (SELECT random() > 2 FROM users_ref_test_table LEFT JOIN user_buy_test_table ON users_ref_test_table.id = user_buy_test_table.user_id) subquery_1; -- Should work, reference table at the inner side is allowed @@ -52,9 +52,9 @@ SELECT count(*) FROM (SELECT random() FROM users_ref_test_table RIGHT JOIN user_buy_test_table ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1; --- Shouldn't work, reference table at the outer side is not allowed +-- Should work, reference table at the outer side is allowed SELECT * FROM - (SELECT random() FROM user_buy_test_table RIGHT JOIN users_ref_test_table + (SELECT random() > 2 FROM user_buy_test_table RIGHT JOIN users_ref_test_table ON user_buy_test_table.user_id = users_ref_test_table.id) subquery_1; -- Equi join test with reference table on non-partition keys @@ -166,7 +166,7 @@ SELECT count(*) FROM SELECT count(*) FROM user_buy_test_table LEFT JOIN (SELECT * FROM generate_series(1,10) id) users_ref_test_table ON user_buy_test_table.item_id = users_ref_test_table.id; --- table function cannot be the outer relationship in an outer join +-- table function can be the outer relationship in an outer join SELECT count(*) FROM (SELECT random() FROM user_buy_test_table RIGHT JOIN generate_series(1,10) AS users_ref_test_table(id) ON user_buy_test_table.item_id > users_ref_test_table.id) subquery_1; @@ -208,7 +208,7 @@ ON user_buy_test_table.item_id = users_ref_test_table.id; SELECT count(*) FROM user_buy_test_table LEFT JOIN (SELECT 5 AS id) users_ref_test_table ON user_buy_test_table.item_id = users_ref_test_table.id; --- subquery without FROM cannot be the outer relationship in an outer join +-- subquery without FROM can be the outer relationship in an outer join SELECT count(*) FROM user_buy_test_table RIGHT JOIN (SELECT 5 AS id) users_ref_test_table ON user_buy_test_table.item_id = users_ref_test_table.id; @@ -276,8 +276,8 @@ SELECT * FROM SELECT user_id FROM user_buy_test_table) sub ORDER BY 1 DESC; --- query can be pushed down when a reference table inside union query is --- joined with a distributed table. reference table cannot be at +-- query is supported when a reference table inside union query is +-- joined with a distributed table. reference table can be at -- the outer part. SELECT * FROM (SELECT user_id FROM users_ref_test_table ref LEFT JOIN user_buy_test_table dis @@ -300,7 +300,7 @@ FROM ) as foo GROUP BY user_id ORDER BY 2 DESC LIMIT 10; --- should not be able to pushdown since reference table is in the +-- supported even if the reference table is in the -- direct outer part of the left join SELECT user_id, sum(value_1) @@ -313,15 +313,15 @@ FROM ) as foo GROUP BY user_id ORDER BY 2 DESC LIMIT 10; --- should not be able to pushdown since reference table is in the +-- supported even if the reference table is in the -- direct outer part of the left join wrapped into a subquery SELECT - * + COUNT(*) = 1581 FROM (SELECT *, random() FROM events_reference_table) as ref_all LEFT JOIN users_table ON (users_table.user_id = ref_all.value_2); --- should not be able to pushdown since reference table is in the +-- supported even if the reference table is in the -- outer part of the left join SELECT user_id, sum(value_1) @@ -1082,7 +1082,7 @@ INNER JOIN GROUP BY types ORDER BY types; --- just a sanity check that we don't allow this if the reference table is on the +-- just a sanity check that we allow this even if the reference table is on the -- left part of the left join SELECT count(*) FROM (SELECT random() FROM users_ref_test_table LEFT JOIN user_buy_test_table @@ -1425,7 +1425,7 @@ JOIN ORDER BY 1 LIMIT 5; --- outer part of the LEFT JOIN consists only reference tables, so we cannot push down +-- supported even if the outer part of the LEFT JOIN consists only reference tables -- we have different combinations for ON condition, true/false/two column join/single column filter SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON true; SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON true; @@ -1439,7 +1439,7 @@ SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table re SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id LEFT JOIN user_buy_test_table ON (ref2.id = user_buy_test_table.user_id); --- outer part of the LEFT JOIN consists only reference tables within a subquery, so we cannot push down +-- outer part of the LEFT JOIN consists only reference tables within a subquery -- we have different combinations for ON condition, true/false/two column join/single column filter SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON true; SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON true; @@ -1454,16 +1454,13 @@ SELECT *, random() FROM ( SELECT *,random() FROM user_buy_test_table WHERE user_id > ( SELECT count(*) FROM (SELECT *,random() FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as bar) as foo LEFT JOIN (SELECT *, random() FROM (SELECT *,random() FROM user_buy_test_table d1 JOIN user_buy_test_table d2 USING (user_id)) as bar_inner ) as bar ON true)) as boo; --- In theory, we should be able to pushdown this query --- however, as the LEFT JOIN condition is between a reference table and the distributed table --- Postgres generates a LEFT JOIN alternative among those tables SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN user_buy_test_table u1 on ref1.id = u1.user_id) as foo LEFT JOIN user_buy_test_table ON (foo.id = user_buy_test_table.user_id); -- same as the above query, but this time LEFT JOIN condition is between distributed tables -- so Postgres doesn't generate join restriction between reference and distributed tables SELECT count(*) FROM (SELECT u1.*, random() FROM users_ref_test_table ref1 INNER JOIN user_buy_test_table u1 on ref1.id = u1.user_id) as foo LEFT JOIN user_buy_test_table ON (foo.user_id = user_buy_test_table.user_id); --- outer part of the LEFT JOIN consists only intermediate result due to LIMIT, so we cannot push down +-- outer part of the LEFT JOIN consists only intermediate result due to LIMIT but this is still supported SELECT count(*) FROM (SELECT ref1.* FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LIMIT 5) as foo LEFT JOIN user_buy_test_table ON true; -- should be fine as OUTER part is the distributed table diff --git a/src/test/regress/sql/multi_view.sql b/src/test/regress/sql/multi_view.sql index 3165f50c4..d80ed5c97 100644 --- a/src/test/regress/sql/multi_view.sql +++ b/src/test/regress/sql/multi_view.sql @@ -231,15 +231,16 @@ SELECT * FROM ) s1 ORDER BY 2 DESC, 1; --- event vs table non-partition-key join is not supported --- given that we cannot recursively plan tables yet -SELECT * FROM +-- event vs table non-partition-key join is supported +-- given that we can recursively plan events_table +SELECT count(*), user_id, done_event FROM (SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event FROM recent_users ru LEFT JOIN events_table et ON(ru.user_id = et.event_type) ) s1 -ORDER BY 2 DESC, 1; +GROUP BY user_id, done_event +ORDER BY 1,2,3; -- create a select only view CREATE VIEW selected_users AS SELECT * FROM users_table WHERE value_1 >= 1 and value_1 <3; diff --git a/src/test/regress/sql/non_colocated_subquery_joins.sql b/src/test/regress/sql/non_colocated_subquery_joins.sql index 0c8953d2c..e1d25a691 100644 --- a/src/test/regress/sql/non_colocated_subquery_joins.sql +++ b/src/test/regress/sql/non_colocated_subquery_joins.sql @@ -724,7 +724,7 @@ $$); -- similar to the above, make sure that we skip recursive planning when -- the subquery contains only intermediate results -SELECT * +SELECT COUNT(*) = 176 FROM ( SELECT * FROM( diff --git a/src/test/regress/sql/values.sql b/src/test/regress/sql/values.sql index 51328d4ff..4a5bb8352 100644 --- a/src/test/regress/sql/values.sql +++ b/src/test/regress/sql/values.sql @@ -159,7 +159,7 @@ FROM (SELECT a,b FROM (VALUES (1, 'one'), (2, 'two'), (3, 'three')) as t(a,b) ) as foo (num,letter) ON (key = num); --- VALUES with unsupported OUTER join +-- VALUES with supported OUTER join (since test_values is recursively planned) SELECT count(*) FROM @@ -299,7 +299,7 @@ SELECT count(*) FROM (SELECT random() FROM test_values LEFT JOIN (SELECT a, b FROM (VALUES (1, 'one'), (2, 'two'), (3, 'three')) as t(a,b)) as values_data(a,b) ON test_values.key > values_data.a) subquery_1; --- VALUES cannot be the right relationship in a join +-- VALUES can be the right relationship in a join SELECT count(*) FROM (SELECT random() FROM test_values RIGHT JOIN (SELECT a, b FROM (VALUES (1, 'one'), (2, 'two'), (3, 'three')) as t(a,b)) as values_data(a,b) ON test_values.key > values_data.a) subquery_1; diff --git a/src/test/regress/sql/with_join.sql b/src/test/regress/sql/with_join.sql index 2f6268c8e..2f721af68 100644 --- a/src/test/regress/sql/with_join.sql +++ b/src/test/regress/sql/with_join.sql @@ -126,7 +126,7 @@ ORDER BY LIMIT 5; --- cte LEFT JOIN distributed_table should error out +-- cte LEFT JOIN distributed_table should work -- as long as the CTE is recursively planned WITH cte AS MATERIALIZED ( SELECT * FROM users_table WHERE user_id = 1 ORDER BY value_1 @@ -173,7 +173,7 @@ ORDER BY LIMIT 5; --- distributed_table RIGHT JOIN cte should error out +-- distributed_table RIGHT JOIN cte should work WITH cte AS MATERIALIZED ( SELECT * FROM users_table WHERE value_1 = 1 ORDER BY value_1 ) @@ -188,7 +188,7 @@ ORDER BY LIMIT 5; --- cte FULL JOIN distributed_table should error out +-- cte FULL JOIN distributed_table should work WITH cte AS MATERIALIZED ( SELECT * FROM users_table WHERE user_id = 1 ORDER BY value_1 ) From f52381387e0454d5bc78644f45ced4b32e70933c Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Tue, 22 Nov 2022 18:35:33 +0300 Subject: [PATCH 2/5] Phase - II: recursively plan non-recurring subqueries too --- .../distributed/planner/recursive_planning.c | 12 +-- src/test/regress/expected/cte_inline.out | 35 +++------ src/test/regress/expected/cte_inline_0.out | 35 +++------ ...lti_insert_select_non_pushable_queries.out | 32 ++++++-- ...ulti_subquery_complex_reference_clause.out | 75 ++++++++++++++---- .../multi_subquery_in_where_clause.out | 9 ++- .../expected/non_colocated_subquery_joins.out | 13 +++- .../expected/subqueries_not_supported.out | 27 +++++-- src/test/regress/expected/with_basics.out | 76 +++++++++++++++---- src/test/regress/sql/cte_inline.sql | 4 - ...lti_insert_select_non_pushable_queries.sql | 10 ++- ...ulti_subquery_complex_reference_clause.sql | 7 +- .../sql/non_colocated_subquery_joins.sql | 2 - .../regress/sql/subqueries_not_supported.sql | 8 +- src/test/regress/sql/with_basics.sql | 6 +- 15 files changed, 229 insertions(+), 122 deletions(-) diff --git a/src/backend/distributed/planner/recursive_planning.c b/src/backend/distributed/planner/recursive_planning.c index 28b856b2c..779ee79fa 100644 --- a/src/backend/distributed/planner/recursive_planning.c +++ b/src/backend/distributed/planner/recursive_planning.c @@ -878,12 +878,14 @@ RecursivelyPlanDistributedJoinNode(Node *node, Query *query, else if (distributedRte->rtekind == RTE_SUBQUERY) { /* - * XXX: Similar to JoinExpr, we don't know how to recursively plan distributed - * subqueries within join expressions yet. + * We don't try logging the subquery here because RecursivelyPlanSubquery + * will anyway do so if the query doesn't reference the outer query. */ - ereport(DEBUG4, (errmsg("recursive planner cannot plan distributed " - "subqueries within join expressions yet"))); - return; + ereport(DEBUG1, (errmsg("recursively planning the distributed subquery " + "since it is part of a distributed join node " + "that is outer joined with a recurring rel"))); + + RecursivelyPlanSubquery(distributedRte->subquery, recursivePlanningContext); } else { diff --git a/src/test/regress/expected/cte_inline.out b/src/test/regress/expected/cte_inline.out index 7e46227d2..39d48e915 100644 --- a/src/test/regress/expected/cte_inline.out +++ b/src/test/regress/expected/cte_inline.out @@ -1357,22 +1357,17 @@ DEBUG: Router planner cannot handle multi-shard select queries { "8" : "test18", "8" : "test28", "8" : "test38", "8" : "test48", "8" : "test58", "8" : "test68", "8" : "test78", "8" : "test8", "8" : "test88", "8" : "test98" } (1 row) --- this test can only work if the CTE is recursively --- planned WITH b AS (SELECT * FROM test_table) SELECT count(*) FROM (SELECT key as x FROM test_table OFFSET 0) as ref LEFT JOIN b ON (ref.x = b.key); DEBUG: CTE b is going to be inlined via distributed planning DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries DEBUG: generating subplan XXX_1 for subquery SELECT key AS x FROM cte_inline.test_table OFFSET 0 -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.x FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) ref LEFT JOIN (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table) b ON ((ref.x OPERATOR(pg_catalog.=) b.key))) +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for CTE b: SELECT key, value, other_value FROM cte_inline.test_table -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_2 for subquery SELECT key AS x FROM cte_inline.test_table OFFSET 0 -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.x FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) ref LEFT JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) b ON ((ref.x OPERATOR(pg_catalog.=) b.key))) +DEBUG: generating subplan XXX_2 for subquery SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.x FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) ref LEFT JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) b ON ((ref.x OPERATOR(pg_catalog.=) b.key))) DEBUG: Creating router plan count --------------------------------------------------------------------- @@ -1397,8 +1392,6 @@ DEBUG: Router planner cannot handle multi-shard select queries 480 (1 row) --- cte a has to be recursively planned because of OFFSET 0 --- after that, cte b also requires recursive planning WITH a AS (SELECT * FROM test_table OFFSET 0), b AS (SELECT * FROM test_table) SELECT min(a.key) FROM a LEFT JOIN b ON (a.value = b.value); @@ -1407,13 +1400,10 @@ DEBUG: CTE b is going to be inlined via distributed planning DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries DEBUG: generating subplan XXX_1 for subquery SELECT key, value, other_value FROM cte_inline.test_table OFFSET 0 -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT min(a.key) AS min FROM ((SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) a LEFT JOIN (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table) b ON ((a.value OPERATOR(pg_catalog.=) b.value))) -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for CTE a: SELECT key, value, other_value FROM cte_inline.test_table OFFSET 0 -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_2 for CTE b: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_2 for subquery SELECT key, value, other_value FROM cte_inline.test_table DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT min(a.key) AS min FROM ((SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) a LEFT JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) b ON ((a.value OPERATOR(pg_catalog.=) b.value))) DEBUG: Creating router plan min @@ -1450,14 +1440,11 @@ DEBUG: CTE cte_2 is going to be inlined via distributed planning DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries DEBUG: generating subplan XXX_1 for subquery SELECT value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 3) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT value FROM ((SELECT test_table.value FROM cte_inline.test_table WHERE (test_table.key OPERATOR(pg_catalog.>) 1)) cte_1 FULL JOIN (SELECT intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_2 USING (value)) ORDER BY value DESC LIMIT 3 +DEBUG: recursively planning left side of the full join since the other side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 1) -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_2 for CTE cte_2: SELECT value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 3) -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT value FROM ((SELECT intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_1 FULL JOIN (SELECT intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_2 USING (value)) ORDER BY value DESC LIMIT 3 +DEBUG: generating subplan XXX_2 for subquery SELECT value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 1) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT value FROM ((SELECT intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_1 FULL JOIN (SELECT intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_2 USING (value)) ORDER BY value DESC LIMIT 3 DEBUG: Creating router plan value --------------------------------------------------------------------- diff --git a/src/test/regress/expected/cte_inline_0.out b/src/test/regress/expected/cte_inline_0.out index ab2b91791..a727d4d21 100644 --- a/src/test/regress/expected/cte_inline_0.out +++ b/src/test/regress/expected/cte_inline_0.out @@ -1357,22 +1357,17 @@ DEBUG: Router planner cannot handle multi-shard select queries { "8" : "test18", "8" : "test28", "8" : "test38", "8" : "test48", "8" : "test58", "8" : "test68", "8" : "test78", "8" : "test8", "8" : "test88", "8" : "test98" } (1 row) --- this test can only work if the CTE is recursively --- planned WITH b AS (SELECT * FROM test_table) SELECT count(*) FROM (SELECT key as x FROM test_table OFFSET 0) as ref LEFT JOIN b ON (ref.x = b.key); DEBUG: CTE b is going to be inlined via distributed planning DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries DEBUG: generating subplan XXX_1 for subquery SELECT key AS x FROM cte_inline.test_table OFFSET 0 -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.x FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) ref LEFT JOIN (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table) b ON ((ref.x OPERATOR(pg_catalog.=) b.key))) +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for CTE b: SELECT key, value, other_value FROM cte_inline.test_table -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_2 for subquery SELECT key AS x FROM cte_inline.test_table OFFSET 0 -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.x FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) ref LEFT JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) b ON ((ref.x OPERATOR(pg_catalog.=) b.key))) +DEBUG: generating subplan XXX_2 for subquery SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.x FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) ref LEFT JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) b ON ((ref.x OPERATOR(pg_catalog.=) b.key))) DEBUG: Creating router plan count --------------------------------------------------------------------- @@ -1397,8 +1392,6 @@ DEBUG: Router planner cannot handle multi-shard select queries 480 (1 row) --- cte a has to be recursively planned because of OFFSET 0 --- after that, cte b also requires recursive planning WITH a AS (SELECT * FROM test_table OFFSET 0), b AS (SELECT * FROM test_table) SELECT min(a.key) FROM a LEFT JOIN b ON (a.value = b.value); @@ -1407,13 +1400,10 @@ DEBUG: CTE b is going to be inlined via distributed planning DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries DEBUG: generating subplan XXX_1 for subquery SELECT key, value, other_value FROM cte_inline.test_table OFFSET 0 -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT min(a.key) AS min FROM ((SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) a LEFT JOIN (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table) b ON ((a.value OPERATOR(pg_catalog.=) b.value))) -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for CTE a: SELECT key, value, other_value FROM cte_inline.test_table OFFSET 0 -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_2 for CTE b: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_2 for subquery SELECT key, value, other_value FROM cte_inline.test_table DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT min(a.key) AS min FROM ((SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) a LEFT JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) b ON ((a.value OPERATOR(pg_catalog.=) b.value))) DEBUG: Creating router plan min @@ -1450,14 +1440,11 @@ DEBUG: CTE cte_2 is going to be inlined via distributed planning DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries DEBUG: generating subplan XXX_1 for subquery SELECT value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 3) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT value FROM ((SELECT test_table.value FROM cte_inline.test_table WHERE (test_table.key OPERATOR(pg_catalog.>) 1)) cte_1 FULL JOIN (SELECT intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_2 USING (value)) ORDER BY value DESC LIMIT 3 +DEBUG: recursively planning left side of the full join since the other side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 1) -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_2 for CTE cte_2: SELECT value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 3) -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT value FROM ((SELECT intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_1 FULL JOIN (SELECT intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_2 USING (value)) ORDER BY value DESC LIMIT 3 +DEBUG: generating subplan XXX_2 for subquery SELECT value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 1) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT value FROM ((SELECT intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_1 FULL JOIN (SELECT intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_2 USING (value)) ORDER BY value DESC LIMIT 3 DEBUG: Creating router plan value --------------------------------------------------------------------- diff --git a/src/test/regress/expected/multi_insert_select_non_pushable_queries.out b/src/test/regress/expected/multi_insert_select_non_pushable_queries.out index 222157a87..fc3f62385 100644 --- a/src/test/regress/expected/multi_insert_select_non_pushable_queries.out +++ b/src/test/regress/expected/multi_insert_select_non_pushable_queries.out @@ -107,7 +107,7 @@ FROM ( ERROR: the query contains a join that requires repartitioning HINT: Set citus.enable_repartition_joins to on to enable repartitioning -- the LEFT JOIN conditon is not on the partition column (i.e., is it part_key divided by 2) --- still, recursive planning will kick in to plan some part of the query +-- but, we can plan the query thanks to recursive planning SET client_min_messages TO DEBUG1; INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg ) SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event) @@ -145,14 +145,22 @@ FROM ( AND e.event_type IN (106, 107, 108) ) t2 ON (t1.user_id = (t2.user_id)/2) GROUP BY t1.user_id, hasdone_event -) t GROUP BY user_id, hasdone_event; +) t GROUP BY user_id, hasdone_event +RETURNING user_id, value_1_agg, value_2_agg; DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries DEBUG: generating subplan XXX_1 for subquery SELECT u.user_id, 'step=>1'::text AS event, e."time" FROM public.users_table u, public.events_table e WHERE ((u.user_id OPERATOR(pg_catalog.=) e.user_id) AND (u.user_id OPERATOR(pg_catalog.>=) 10) AND (u.user_id OPERATOR(pg_catalog.<=) 25) AND (e.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[100, 101, 102]))) DEBUG: generating subplan XXX_2 for subquery SELECT u.user_id, 'step=>2'::text AS event, e."time" FROM public.users_table u, public.events_table e WHERE ((u.user_id OPERATOR(pg_catalog.=) e.user_id) AND (u.user_id OPERATOR(pg_catalog.>=) 10) AND (u.user_id OPERATOR(pg_catalog.<=) 25) AND (e.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[103, 104, 105]))) DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT intermediate_result.user_id, intermediate_result.event, intermediate_result."time" FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event text, "time" timestamp without time zone) UNION SELECT intermediate_result.user_id, intermediate_result.event, intermediate_result."time" FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event text, "time" timestamp without time zone) DEBUG: generating subplan XXX_1 for subquery SELECT u.user_id, 'step=>1'::text AS event, e."time" FROM public.users_table u, public.events_table e WHERE ((u.user_id OPERATOR(pg_catalog.=) e.user_id) AND (u.user_id OPERATOR(pg_catalog.>=) 10) AND (u.user_id OPERATOR(pg_catalog.<=) 25) AND (e.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[100, 101, 102]))) UNION SELECT u.user_id, 'step=>2'::text AS event, e."time" FROM public.users_table u, public.events_table e WHERE ((u.user_id OPERATOR(pg_catalog.=) e.user_id) AND (u.user_id OPERATOR(pg_catalog.>=) 10) AND (u.user_id OPERATOR(pg_catalog.<=) 25) AND (e.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[103, 104, 105]))) -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: generating subplan XXX_2 for subquery SELECT DISTINCT user_id, 'Has done event'::text AS hasdone_event FROM public.events_table e WHERE ((user_id OPERATOR(pg_catalog.>=) 10) AND (user_id OPERATOR(pg_catalog.<=) 25) AND (event_type OPERATOR(pg_catalog.=) ANY (ARRAY[106, 107, 108]))) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, int4(sum(array_length(events_table, 1))) AS value_1_agg, length(hasdone_event) AS value_2_agg FROM (SELECT t1.user_id, array_agg(t1.event ORDER BY t1."time") AS events_table, COALESCE(t2.hasdone_event, 'Has not done event'::text) AS hasdone_event FROM ((SELECT intermediate_result.user_id, intermediate_result.event, intermediate_result."time" FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event text, "time" timestamp without time zone)) t1 LEFT JOIN (SELECT intermediate_result.user_id, intermediate_result.hasdone_event FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, hasdone_event text)) t2 ON ((t1.user_id OPERATOR(pg_catalog.=) (t2.user_id OPERATOR(pg_catalog./) 2)))) GROUP BY t1.user_id, t2.hasdone_event) t GROUP BY user_id, hasdone_event +DEBUG: Collecting INSERT ... SELECT results on coordinator + user_id | value_1_agg | value_2_agg +--------------------------------------------------------------------- +(0 rows) + RESET client_min_messages; --------------------------------------------------------------------- --------------------------------------------------------------------- @@ -229,7 +237,7 @@ ORDER BY ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- not pushable since the JOIN condition is not equi JOIN -- (subquery_1 JOIN subquery_2) --- still, recursive planning will kick in +-- but, we can plan the query thanks to recursive planning SET client_min_messages TO DEBUG1; INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg) SELECT @@ -295,14 +303,22 @@ WHERE GROUP BY count_pay, user_id ORDER BY - count_pay; + count_pay +RETURNING user_id, value_1_agg, value_2_agg; DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries DEBUG: generating subplan XXX_1 for subquery SELECT users_table.user_id, 'action=>1'::text AS event, events_table."time" FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (users_table.user_id OPERATOR(pg_catalog.>=) 10) AND (users_table.user_id OPERATOR(pg_catalog.<=) 70) AND (events_table.event_type OPERATOR(pg_catalog.>) 10) AND (events_table.event_type OPERATOR(pg_catalog.<) 12)) DEBUG: generating subplan XXX_2 for subquery SELECT users_table.user_id, 'action=>2'::text AS event, events_table."time" FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (users_table.user_id OPERATOR(pg_catalog.>=) 10) AND (users_table.user_id OPERATOR(pg_catalog.<=) 70) AND (events_table.event_type OPERATOR(pg_catalog.>) 12) AND (events_table.event_type OPERATOR(pg_catalog.<) 14)) DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT intermediate_result.user_id, intermediate_result.event, intermediate_result."time" FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event text, "time" timestamp without time zone) UNION SELECT intermediate_result.user_id, intermediate_result.event, intermediate_result."time" FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event text, "time" timestamp without time zone) DEBUG: generating subplan XXX_1 for subquery SELECT users_table.user_id, 'action=>1'::text AS event, events_table."time" FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (users_table.user_id OPERATOR(pg_catalog.>=) 10) AND (users_table.user_id OPERATOR(pg_catalog.<=) 70) AND (events_table.event_type OPERATOR(pg_catalog.>) 10) AND (events_table.event_type OPERATOR(pg_catalog.<) 12)) UNION SELECT users_table.user_id, 'action=>2'::text AS event, events_table."time" FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (users_table.user_id OPERATOR(pg_catalog.>=) 10) AND (users_table.user_id OPERATOR(pg_catalog.<=) 70) AND (events_table.event_type OPERATOR(pg_catalog.>) 12) AND (events_table.event_type OPERATOR(pg_catalog.<) 14)) -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: generating subplan XXX_2 for subquery SELECT user_id, count(*) AS count_pay FROM public.users_table WHERE ((user_id OPERATOR(pg_catalog.>=) 10) AND (user_id OPERATOR(pg_catalog.<=) 70) AND (value_1 OPERATOR(pg_catalog.>) 15) AND (value_1 OPERATOR(pg_catalog.<) 17)) GROUP BY user_id HAVING (count(*) OPERATOR(pg_catalog.>) 1) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, int4(avg(array_length(events_table, 1))) AS value_1_agg, int4(count_pay) AS value_2_agg FROM (SELECT subquery_1.user_id, array_agg(subquery_1.event ORDER BY subquery_1."time") AS events_table, COALESCE(subquery_2.count_pay, (0)::bigint) AS count_pay FROM ((SELECT intermediate_result.user_id, intermediate_result.event, intermediate_result."time" FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event text, "time" timestamp without time zone)) subquery_1 LEFT JOIN (SELECT intermediate_result.user_id, intermediate_result.count_pay FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, count_pay bigint)) subquery_2 ON ((subquery_1.user_id OPERATOR(pg_catalog.>) subquery_2.user_id))) GROUP BY subquery_1.user_id, subquery_2.count_pay) subquery_top WHERE (array_ndims(events_table) OPERATOR(pg_catalog.>) 0) GROUP BY count_pay, user_id ORDER BY count_pay +DEBUG: Collecting INSERT ... SELECT results on coordinator + user_id | value_1_agg | value_2_agg +--------------------------------------------------------------------- +(0 rows) + RESET client_min_messages; --------------------------------------------------------------------- --------------------------------------------------------------------- diff --git a/src/test/regress/expected/multi_subquery_complex_reference_clause.out b/src/test/regress/expected/multi_subquery_complex_reference_clause.out index 22a64ccb5..6fe7c1570 100644 --- a/src/test/regress/expected/multi_subquery_complex_reference_clause.out +++ b/src/test/regress/expected/multi_subquery_complex_reference_clause.out @@ -679,8 +679,14 @@ SELECT * FROM WHERE user_id > 2 and value_2 = 1) as foo_in ON (event_val_2 = user_id)) as foo LEFT JOIN (SELECT user_id as user_user_id FROM users_table) as fooo ON (user_id = user_user_id)) as bar; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + user_id +--------------------------------------------------------------------- + + 3 + 5 + 4 +(4 rows) + -- we could even support the following where the subquery -- on the outer part of the left join contains a reference table SELECT max(events_all.cnt), events_all.usr_id @@ -1126,7 +1132,7 @@ count(*) AS cnt, "generated_group_field" 84 | 0 (6 rows) - -- RIGHT JOINs used with INNER JOINs should error out since reference table exist in the + -- RIGHT JOINs used with INNER JOINs should work even if the reference table exist in the -- right side of the RIGHT JOIN. SELECT count(*) AS cnt, "generated_group_field" @@ -1164,8 +1170,16 @@ count(*) AS cnt, "generated_group_field" ORDER BY cnt DESC, generated_group_field ASC LIMIT 10; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + cnt | generated_group_field +--------------------------------------------------------------------- + 1007 | 2 + 952 | 5 + 773 | 1 + 696 | 3 + 433 | 4 + 190 | 0 +(6 rows) + -- right join where the inner part of the join includes a reference table -- joined with hash partitioned table using non-equi join SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event @@ -1316,8 +1330,27 @@ FROM ORDER BY user_id limit 50; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + user_id | lastseen +--------------------------------------------------------------------- + 1 | + 1 | + 2 | Thu Nov 23 17:26:14.563216 2017 + 2 | Thu Nov 23 17:26:14.563216 2017 + 2 | Thu Nov 23 17:26:14.563216 2017 + 3 | Thu Nov 23 18:08:26.550729 2017 + 3 | Thu Nov 23 18:08:26.550729 2017 + 3 | Thu Nov 23 18:08:26.550729 2017 + 4 | + 4 | + 4 | + 5 | + 5 | + 5 | + 5 | + 5 | + 6 | +(17 rows) + -- -- UNIONs and JOINs with reference tables, should error out -- @@ -1553,9 +1586,6 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS c 67 (1 row) --- we could not push this query not due to non colocated --- subqueries (i.e., they are recursively planned) --- but due to outer join restrictions SELECT count(*) AS cnt, "generated_group_field" FROM @@ -1593,9 +1623,20 @@ count(*) AS cnt, "generated_group_field" cnt DESC, generated_group_field ASC LIMIT 10; DEBUG: generating subplan XXX_1 for subquery SELECT user_id, value_2 AS generated_group_field FROM public.users_table users -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS cnt, generated_group_field FROM (SELECT "eventQuery".user_id, random() AS random, "eventQuery".generated_group_field FROM (SELECT multi_group_wrapper_1."time", multi_group_wrapper_1.event_user_id, multi_group_wrapper_1.user_id, left_group_by_1.generated_group_field, random() AS random FROM ((SELECT temp_data_queries."time", temp_data_queries.event_user_id, user_filters_1.user_id FROM ((SELECT events."time", events.user_id AS event_user_id FROM public.events_table events WHERE (events.user_id OPERATOR(pg_catalog.>) 2)) temp_data_queries JOIN (SELECT users.user_id FROM public.users_reference_table users WHERE ((users.user_id OPERATOR(pg_catalog.>) 2) AND (users.value_2 OPERATOR(pg_catalog.=) 5))) user_filters_1 ON ((temp_data_queries.event_user_id OPERATOR(pg_catalog.<) user_filters_1.user_id)))) multi_group_wrapper_1 RIGHT JOIN (SELECT intermediate_result.user_id, intermediate_result.generated_group_field FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, generated_group_field integer)) left_group_by_1 ON ((left_group_by_1.user_id OPERATOR(pg_catalog.>) multi_group_wrapper_1.event_user_id)))) "eventQuery") "pushedDownQuery" GROUP BY generated_group_field ORDER BY (count(*)) DESC, generated_group_field LIMIT 10 -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table +DEBUG: recursively planning left side of the right join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: generating subplan XXX_2 for subquery SELECT temp_data_queries."time", temp_data_queries.event_user_id, user_filters_1.user_id FROM ((SELECT events."time", events.user_id AS event_user_id FROM public.events_table events WHERE (events.user_id OPERATOR(pg_catalog.>) 2)) temp_data_queries JOIN (SELECT users.user_id FROM public.users_reference_table users WHERE ((users.user_id OPERATOR(pg_catalog.>) 2) AND (users.value_2 OPERATOR(pg_catalog.=) 5))) user_filters_1 ON ((temp_data_queries.event_user_id OPERATOR(pg_catalog.<) user_filters_1.user_id))) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS cnt, generated_group_field FROM (SELECT "eventQuery".user_id, random() AS random, "eventQuery".generated_group_field FROM (SELECT multi_group_wrapper_1."time", multi_group_wrapper_1.event_user_id, multi_group_wrapper_1.user_id, left_group_by_1.generated_group_field, random() AS random FROM ((SELECT intermediate_result."time", intermediate_result.event_user_id, intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result("time" timestamp without time zone, event_user_id integer, user_id integer)) multi_group_wrapper_1 RIGHT JOIN (SELECT intermediate_result.user_id, intermediate_result.generated_group_field FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, generated_group_field integer)) left_group_by_1 ON ((left_group_by_1.user_id OPERATOR(pg_catalog.>) multi_group_wrapper_1.event_user_id)))) "eventQuery") "pushedDownQuery" GROUP BY generated_group_field ORDER BY (count(*)) DESC, generated_group_field LIMIT 10 + cnt | generated_group_field +--------------------------------------------------------------------- + 2042 | 1 + 1675 | 2 + 1470 | 4 + 1259 | 3 + 941 | 0 + 686 | 5 +(6 rows) + RESET client_min_messages; -- two hash partitioned relations are not joined -- on partiton keys although reference table is fine @@ -2132,12 +2173,14 @@ SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INN 6 (1 row) --- one example where unsupported outer join is deep inside a subquery +-- one example where supported outer join is deep inside a subquery SELECT *, random() FROM ( SELECT *,random() FROM user_buy_test_table WHERE user_id > ( SELECT count(*) FROM (SELECT *,random() FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as bar) as foo LEFT JOIN (SELECT *, random() FROM (SELECT *,random() FROM user_buy_test_table d1 JOIN user_buy_test_table d2 USING (user_id)) as bar_inner ) as bar ON true)) as boo; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + user_id | item_id | buy_count | random | random +--------------------------------------------------------------------- +(0 rows) + SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN user_buy_test_table u1 on ref1.id = u1.user_id) as foo LEFT JOIN user_buy_test_table ON (foo.id = user_buy_test_table.user_id); count --------------------------------------------------------------------- diff --git a/src/test/regress/expected/multi_subquery_in_where_clause.out b/src/test/regress/expected/multi_subquery_in_where_clause.out index b314f72d5..834cef505 100644 --- a/src/test/regress/expected/multi_subquery_in_where_clause.out +++ b/src/test/regress/expected/multi_subquery_in_where_clause.out @@ -534,8 +534,13 @@ WHERE GROUP BY user_id HAVING count(*) > 1 AND sum(value_2) > 29 ORDER BY 1; -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table + user_id +--------------------------------------------------------------------- + 2 + 3 + 4 +(3 rows) + -- NOT EXISTS query has non-equi join SELECT user_id, array_length(events_table, 1) FROM ( diff --git a/src/test/regress/expected/non_colocated_subquery_joins.out b/src/test/regress/expected/non_colocated_subquery_joins.out index a4d66aba2..d03d4ecf3 100644 --- a/src/test/regress/expected/non_colocated_subquery_joins.out +++ b/src/test/regress/expected/non_colocated_subquery_joins.out @@ -715,8 +715,6 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS c (1 row) -- recursive planning should kick in for outer joins as well --- but this time recursive planning might convert the query --- into a not supported join SELECT true AS valid FROM explain_json_2($$ SELECT @@ -729,8 +727,15 @@ SELECT true AS valid FROM explain_json_2($$ $$); DEBUG: function does not have co-located tables DEBUG: generating subplan XXX_1 for subquery SELECT value_2, random() AS random FROM public.users_table -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT users_table.value_2, random() AS random FROM public.users_table) u1 RIGHT JOIN (SELECT intermediate_result.value_2, intermediate_result.random FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer, random double precision)) u2 USING (value_2)) -ERROR: cannot pushdown the subquery +DEBUG: recursively planning left side of the right join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: generating subplan XXX_2 for subquery SELECT value_2, random() AS random FROM public.users_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.value_2, intermediate_result.random FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer, random double precision)) u1 RIGHT JOIN (SELECT intermediate_result.value_2, intermediate_result.random FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer, random double precision)) u2 USING (value_2)) + valid +--------------------------------------------------------------------- + t +(1 row) + -- set operations may produce not very efficient plans -- although we could have picked a as our anchor subquery, -- we pick foo in this case and recursively plan a diff --git a/src/test/regress/expected/subqueries_not_supported.out b/src/test/regress/expected/subqueries_not_supported.out index b69256866..1a5895a54 100644 --- a/src/test/regress/expected/subqueries_not_supported.out +++ b/src/test/regress/expected/subqueries_not_supported.out @@ -70,19 +70,34 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id FROM ERROR: cannot handle complex subqueries when the router executor is disabled SET citus.enable_router_execution TO true; -- OUTER JOINs where the outer part is recursively planned and not the other way --- around is not supported +-- around are supported SELECT foo.value_2 +INTO result_table FROM - (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4) LIMIT 5) as foo + (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4) ORDER BY users_table.value_2 LIMIT 5) as foo LEFT JOIN (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar ON(foo.value_2 = bar.value_2); DEBUG: push down of limit count: 5 -DEBUG: generating subplan XXX_1 for subquery SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4]))) LIMIT 5 -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT foo.value_2 FROM ((SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) foo LEFT JOIN (SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))) bar ON ((foo.value_2 OPERATOR(pg_catalog.=) bar.value_2))) -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table +DEBUG: generating subplan XXX_1 for subquery SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4]))) ORDER BY users_table.value_2 LIMIT 5 +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: generating subplan XXX_2 for subquery SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8]))) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT foo.value_2 FROM ((SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) foo LEFT JOIN (SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) bar ON ((foo.value_2 OPERATOR(pg_catalog.=) bar.value_2))) +SELECT COUNT(*) = 60 FROM result_table WHERE value_2 = 0; + ?column? +--------------------------------------------------------------------- + t +(1 row) + +SELECT COUNT(*) = 0 FROM result_table WHERE value_2 != 0; + ?column? +--------------------------------------------------------------------- + t +(1 row) + +DROP TABLE result_table; -- We do not support GROUPING SETS in subqueries -- This also includes ROLLUP or CUBE clauses SELECT * FROM (SELECT user_id, value_1 FROM users_table GROUP BY GROUPING SETS ((user_id), (value_1))) s; diff --git a/src/test/regress/expected/with_basics.out b/src/test/regress/expected/with_basics.out index 0d0070fd6..4eefb8837 100644 --- a/src/test/regress/expected/with_basics.out +++ b/src/test/regress/expected/with_basics.out @@ -840,8 +840,18 @@ LEFT JOIN WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id ORDER BY 2 DESC, 1 DESC; -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table + user_id | user_id +--------------------------------------------------------------------- + 7 | + 6 | + 4 | + 2 | + 8 | 8 + 5 | 5 + 3 | 3 + 1 | 1 +(8 rows) + -- similar query as the above, but this time -- use NOT EXITS, which is pretty common struct WITH distinct_undistribured AS @@ -855,9 +865,20 @@ LEFT JOIN WHERE NOT EXISTS (SELECT NULL FROM distinct_undistribured - WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id; -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table + WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id +ORDER BY 1,2; + user_id | user_id +--------------------------------------------------------------------- + 1 | + 2 | + 3 | + 4 | + 5 | + 6 | + 7 | + 8 | +(8 rows) + -- same NOT EXISTS struct, but with CTE -- so should work WITH distinct_undistribured AS ( @@ -905,8 +926,11 @@ LEFT JOIN (SELECT NULL FROM distinct_undistribured d2 WHERE d1.user_id = d2.user_id )) AS bar USING (user_id); -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table + count +--------------------------------------------------------------------- + 8 +(1 row) + -- should work fine with materialized ctes WITH distinct_undistribured AS MATERIALIZED ( SELECT DISTINCT user_id @@ -951,8 +975,18 @@ LEFT JOIN WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id ORDER BY 2 DESC, 1 DESC; -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table + user_id | user_id +--------------------------------------------------------------------- + 7 | + 6 | + 4 | + 2 | + 8 | 8 + 5 | 5 + 3 | 3 + 1 | 1 +(8 rows) + WITH distinct_undistribured AS MATERIALIZED (SELECT DISTINCT user_id FROM test_cte) @@ -964,9 +998,20 @@ LEFT JOIN WHERE NOT EXISTS (SELECT NULL FROM distinct_undistribured - WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id; -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table + WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id +ORDER BY 1,2; + user_id | user_id +--------------------------------------------------------------------- + 1 | + 2 | + 3 | + 4 | + 5 | + 6 | + 7 | + 8 | +(8 rows) + -- NOT EXISTS struct, with cte inlining disabled WITH distinct_undistribured AS MATERIALIZED( SELECT DISTINCT user_id @@ -1013,8 +1058,11 @@ LEFT JOIN (SELECT NULL FROM distinct_undistribured d2 WHERE d1.user_id = d2.user_id )) AS bar USING (user_id); -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table + count +--------------------------------------------------------------------- + 8 +(1 row) + -- some test with failures WITH a AS MATERIALIZED (SELECT * FROM users_table LIMIT 10) SELECT user_id/0 FROM users_table JOIN a USING (user_id); diff --git a/src/test/regress/sql/cte_inline.sql b/src/test/regress/sql/cte_inline.sql index ab11c3749..862a8510b 100644 --- a/src/test/regress/sql/cte_inline.sql +++ b/src/test/regress/sql/cte_inline.sql @@ -566,8 +566,6 @@ EXECUTE retry_planning(4); EXECUTE retry_planning(5); EXECUTE retry_planning(6); --- this test can only work if the CTE is recursively --- planned WITH b AS (SELECT * FROM test_table) SELECT count(*) FROM (SELECT key as x FROM test_table OFFSET 0) as ref LEFT JOIN b ON (ref.x = b.key); @@ -578,8 +576,6 @@ WITH a AS (SELECT * FROM test_table), b AS (SELECT * FROM test_table) SELECT count(*) FROM a LEFT JOIN b ON (a.value = b.value); --- cte a has to be recursively planned because of OFFSET 0 --- after that, cte b also requires recursive planning WITH a AS (SELECT * FROM test_table OFFSET 0), b AS (SELECT * FROM test_table) SELECT min(a.key) FROM a LEFT JOIN b ON (a.value = b.value); diff --git a/src/test/regress/sql/multi_insert_select_non_pushable_queries.sql b/src/test/regress/sql/multi_insert_select_non_pushable_queries.sql index 399883ed9..b4654144b 100644 --- a/src/test/regress/sql/multi_insert_select_non_pushable_queries.sql +++ b/src/test/regress/sql/multi_insert_select_non_pushable_queries.sql @@ -112,7 +112,7 @@ FROM ( ) t GROUP BY user_id, hasdone_event; -- the LEFT JOIN conditon is not on the partition column (i.e., is it part_key divided by 2) --- still, recursive planning will kick in to plan some part of the query +-- but, we can plan the query thanks to recursive planning SET client_min_messages TO DEBUG1; INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg ) @@ -153,7 +153,8 @@ FROM ( ) t2 ON (t1.user_id = (t2.user_id)/2) GROUP BY t1.user_id, hasdone_event -) t GROUP BY user_id, hasdone_event; +) t GROUP BY user_id, hasdone_event +RETURNING user_id, value_1_agg, value_2_agg; RESET client_min_messages; ------------------------------------ @@ -232,7 +233,7 @@ ORDER BY -- not pushable since the JOIN condition is not equi JOIN -- (subquery_1 JOIN subquery_2) --- still, recursive planning will kick in +-- but, we can plan the query thanks to recursive planning SET client_min_messages TO DEBUG1; INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg) SELECT @@ -298,7 +299,8 @@ WHERE GROUP BY count_pay, user_id ORDER BY - count_pay; + count_pay +RETURNING user_id, value_1_agg, value_2_agg; RESET client_min_messages; ------------------------------------ diff --git a/src/test/regress/sql/multi_subquery_complex_reference_clause.sql b/src/test/regress/sql/multi_subquery_complex_reference_clause.sql index 64ff78c48..b722bd8f9 100644 --- a/src/test/regress/sql/multi_subquery_complex_reference_clause.sql +++ b/src/test/regress/sql/multi_subquery_complex_reference_clause.sql @@ -723,7 +723,7 @@ count(*) AS cnt, "generated_group_field" cnt DESC, generated_group_field ASC LIMIT 10; - -- RIGHT JOINs used with INNER JOINs should error out since reference table exist in the + -- RIGHT JOINs used with INNER JOINs should work even if the reference table exist in the -- right side of the RIGHT JOIN. SELECT count(*) AS cnt, "generated_group_field" @@ -1097,9 +1097,6 @@ SELECT count(*) FROM ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_2 WHERE subquery_1.user_id != subquery_2.user_id ; --- we could not push this query not due to non colocated --- subqueries (i.e., they are recursively planned) --- but due to outer join restrictions SELECT count(*) AS cnt, "generated_group_field" FROM @@ -1449,7 +1446,7 @@ SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INN SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON (user_buy_test_table.user_id > 19); SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON (foo.id = user_buy_test_table.user_id); --- one example where unsupported outer join is deep inside a subquery +-- one example where supported outer join is deep inside a subquery SELECT *, random() FROM ( SELECT *,random() FROM user_buy_test_table WHERE user_id > ( SELECT count(*) FROM (SELECT *,random() FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as bar) as foo LEFT JOIN (SELECT *, random() FROM (SELECT *,random() FROM user_buy_test_table d1 JOIN user_buy_test_table d2 USING (user_id)) as bar_inner ) as bar ON true)) as boo; diff --git a/src/test/regress/sql/non_colocated_subquery_joins.sql b/src/test/regress/sql/non_colocated_subquery_joins.sql index e1d25a691..bde8f5b0a 100644 --- a/src/test/regress/sql/non_colocated_subquery_joins.sql +++ b/src/test/regress/sql/non_colocated_subquery_joins.sql @@ -532,8 +532,6 @@ $$); -- recursive planning should kick in for outer joins as well --- but this time recursive planning might convert the query --- into a not supported join SELECT true AS valid FROM explain_json_2($$ SELECT diff --git a/src/test/regress/sql/subqueries_not_supported.sql b/src/test/regress/sql/subqueries_not_supported.sql index 242623a3f..1360a9313 100644 --- a/src/test/regress/sql/subqueries_not_supported.sql +++ b/src/test/regress/sql/subqueries_not_supported.sql @@ -68,15 +68,19 @@ FROM SET citus.enable_router_execution TO true; -- OUTER JOINs where the outer part is recursively planned and not the other way --- around is not supported +-- around are supported SELECT foo.value_2 +INTO result_table FROM - (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4) LIMIT 5) as foo + (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4) ORDER BY users_table.value_2 LIMIT 5) as foo LEFT JOIN (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar ON(foo.value_2 = bar.value_2); +SELECT COUNT(*) = 60 FROM result_table WHERE value_2 = 0; +SELECT COUNT(*) = 0 FROM result_table WHERE value_2 != 0; +DROP TABLE result_table; -- We do not support GROUPING SETS in subqueries -- This also includes ROLLUP or CUBE clauses diff --git a/src/test/regress/sql/with_basics.sql b/src/test/regress/sql/with_basics.sql index 8c0bf2823..ff3c8ebce 100644 --- a/src/test/regress/sql/with_basics.sql +++ b/src/test/regress/sql/with_basics.sql @@ -573,7 +573,8 @@ LEFT JOIN WHERE NOT EXISTS (SELECT NULL FROM distinct_undistribured - WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id; + WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id +ORDER BY 1,2; -- same NOT EXISTS struct, but with CTE -- so should work @@ -658,7 +659,8 @@ LEFT JOIN WHERE NOT EXISTS (SELECT NULL FROM distinct_undistribured - WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id; + WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id +ORDER BY 1,2; -- NOT EXISTS struct, with cte inlining disabled WITH distinct_undistribured AS MATERIALIZED( From e7e48812896742c22787b2edc0c485a302f19690 Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Tue, 29 Nov 2022 17:38:06 +0300 Subject: [PATCH 3/5] Phase - III: recursively plan non-recurring sub join trees too --- .../distributed/planner/recursive_planning.c | 29 ++-- .../regress/expected/multi_outer_join.out | 40 ++++- .../expected/multi_outer_join_reference.out | 43 ++++- .../regress/expected/sqlancer_failures.out | 160 +++++++++++++----- src/test/regress/sql/multi_outer_join.sql | 2 +- .../sql/multi_outer_join_reference.sql | 5 +- src/test/regress/sql/sqlancer_failures.sql | 27 ++- 7 files changed, 238 insertions(+), 68 deletions(-) diff --git a/src/backend/distributed/planner/recursive_planning.c b/src/backend/distributed/planner/recursive_planning.c index 779ee79fa..b74b59de3 100644 --- a/src/backend/distributed/planner/recursive_planning.c +++ b/src/backend/distributed/planner/recursive_planning.c @@ -826,19 +826,28 @@ RecursivelyPlanDistributedJoinNode(Node *node, Query *query, if (IsA(node, JoinExpr)) { /* - * XXX: This, for example, means that RecursivelyPlanRecurringTupleOuterJoins - * needs to plan inner side, i.e., INNER JOIN , - * of the following join: + * This, for example, means that RecursivelyPlanRecurringTupleOuterJoinWalker + * needs to plan inner side, i.e., " INNER JOIN ", + * of the following join: + * LEFT JOIN ( JOIN ) * - * LEFT JOIN ( INNER JOIN ) + * XXX: Ideally, we should handle such a sub join tree by moving + * it into a subquery "as a whole" but this implies that we need to + * rebuild the rtable and re-point all the Vars to the new rtable + * indexes, so we've not implemented that yet. * - * However, this would require moving part of the join tree into a - * subquery but this implies that we need to rebuild the rtable and - * re-point all the Vars to the new rtable indexes. We have not - * implemented that yet. + * Instead, we recursively plan all the distributed tables in that + * sub join tree. This is much more inefficient than the other + * approach (since we lose the opportunity to push-down the whole + * sub join tree into the workers) but is easier to implement. */ - ereport(DEBUG4, (errmsg("recursive planner cannot plan distributed sub " - "join nodes yet"))); + + RecursivelyPlanDistributedJoinNode(((JoinExpr *) node)->larg, + query, recursivePlanningContext); + + RecursivelyPlanDistributedJoinNode(((JoinExpr *) node)->rarg, + query, recursivePlanningContext); + return; } diff --git a/src/test/regress/expected/multi_outer_join.out b/src/test/regress/expected/multi_outer_join.out index 8884c876f..17b13773d 100644 --- a/src/test/regress/expected/multi_outer_join.out +++ b/src/test/regress/expected/multi_outer_join.out @@ -406,7 +406,7 @@ ORDER BY l_custkey, r_custkey, t_custkey; 30 | 30 | 30 (17 rows) --- Right join with single shard right most table should error out +-- Right join with single shard right most table should work SELECT l_custkey, r_custkey, t_custkey FROM @@ -414,8 +414,42 @@ FROM LEFT JOIN multi_outer_join_right r1 ON (l1.l_custkey = r1.r_custkey) RIGHT JOIN multi_outer_join_third_reference t1 ON (r1.r_custkey = t1.t_custkey) ORDER BY l_custkey, r_custkey, t_custkey; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join +LOG: join order: [ "multi_outer_join_left" ] +LOG: join order: [ "multi_outer_join_right" ] + l_custkey | r_custkey | t_custkey +--------------------------------------------------------------------- + 11 | 11 | 11 + 12 | 12 | 12 + 14 | 14 | 14 + 16 | 16 | 16 + 17 | 17 | 17 + 18 | 18 | 18 + 20 | 20 | 20 + 21 | 21 | 21 + 22 | 22 | 22 + 24 | 24 | 24 + 26 | 26 | 26 + 27 | 27 | 27 + 28 | 28 | 28 + 30 | 30 | 30 + | | 1 + | | 2 + | | 3 + | | 4 + | | 5 + | | 6 + | | 7 + | | 8 + | | 9 + | | 10 + | | 13 + | | 15 + | | 19 + | | 23 + | | 25 + | | 29 +(30 rows) + -- Right join with single shard left most table should work SELECT t_custkey, r_custkey, l_custkey diff --git a/src/test/regress/expected/multi_outer_join_reference.out b/src/test/regress/expected/multi_outer_join_reference.out index 1e705d14d..aca91bda7 100644 --- a/src/test/regress/expected/multi_outer_join_reference.out +++ b/src/test/regress/expected/multi_outer_join_reference.out @@ -401,15 +401,50 @@ ORDER BY 1; 30 | 30 | 30 (25 rows) --- Right join with single shard right most table should error out +-- Right join with single shard right most table should work SELECT l_custkey, r_custkey, t_custkey FROM multi_outer_join_left_hash l1 LEFT JOIN multi_outer_join_right_hash r1 ON (l1.l_custkey = r1.r_custkey) - RIGHT JOIN multi_outer_join_third_reference t1 ON (r1.r_custkey = t1.t_custkey); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + RIGHT JOIN multi_outer_join_third_reference t1 ON (r1.r_custkey = t1.t_custkey) +ORDER BY 1,2,3; +LOG: join order: [ "multi_outer_join_left_hash" ] +LOG: join order: [ "multi_outer_join_right_hash" ] + l_custkey | r_custkey | t_custkey +--------------------------------------------------------------------- + 11 | 11 | 11 + 12 | 12 | 12 + 13 | 13 | 13 + 14 | 14 | 14 + 15 | 15 | 15 + 21 | 21 | 21 + 22 | 22 | 22 + 23 | 23 | 23 + 24 | 24 | 24 + 25 | 25 | 25 + 26 | 26 | 26 + 27 | 27 | 27 + 28 | 28 | 28 + 29 | 29 | 29 + 30 | 30 | 30 + | | 1 + | | 2 + | | 3 + | | 4 + | | 5 + | | 6 + | | 7 + | | 8 + | | 9 + | | 10 + | | 16 + | | 17 + | | 18 + | | 19 + | | 20 +(30 rows) + -- Right join with single shard left most table should work SELECT t_custkey, r_custkey, l_custkey diff --git a/src/test/regress/expected/sqlancer_failures.out b/src/test/regress/expected/sqlancer_failures.out index 207e71d56..d4d06a4e4 100644 --- a/src/test/regress/expected/sqlancer_failures.out +++ b/src/test/regress/expected/sqlancer_failures.out @@ -168,33 +168,55 @@ SELECT create_distributed_table('distributed_table', 'user_id'); (1 row) +INSERT INTO distributed_table VALUES +(1, 10), +(2, 22), +(3, 34), +(7, 40); +INSERT INTO reference_table VALUES +(1, '100'), +(null, '202'), +(4, '300'), +(null, '401'), +(null, '402'); -- postgres plans below queries by evaluating joins as below: -- L -- / \ -- ref L -- / \ -- dist ref --- so we should error out as reference table is in the outer part of the top level (left) outer join SELECT count(*) FROM distributed_table a LEFT JOIN reference_table b ON (true) RIGHT JOIN reference_table c ON (true); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 100 +(1 row) + SELECT count(*) FROM distributed_table a LEFT JOIN (SELECT * FROM reference_table OFFSET 0) b ON (true) RIGHT JOIN (SELECT * FROM reference_table OFFSET 0) c ON (true); -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table + count +--------------------------------------------------------------------- + 100 +(1 row) + SELECT count(*) FROM distributed_table a LEFT JOIN reference_table b ON (true) RIGHT JOIN reference_table c ON (c.id > 0); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 43 +(1 row) + SELECT count(*) FROM distributed_table a LEFT JOIN (SELECT * FROM reference_table OFFSET 0) b ON (true) RIGHT JOIN (SELECT * FROM reference_table OFFSET 0) c ON (c.id > 0); -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table + count +--------------------------------------------------------------------- + 43 +(1 row) + -- drop existing sqlancer tables before next tests DROP TABLE t0, t1, t2, t3, t4 CASCADE; CREATE TABLE tbl1(a REAL, b FLOAT, c money); @@ -258,14 +280,16 @@ SELECT create_reference_table('t4'); -- t1(ref) L -- / \ -- t0(dist) t4(ref) --- -- so we should error out SELECT count(*) FROM ( SELECT ALL t4.c1, t0.c0, t0.c1 FROM ONLY t0 LEFT OUTER JOIN t4 ON CAST(masklen('142.158.96.44') AS BOOLEAN) RIGHT OUTER JOIN t1 ON ((0.024767844)::MONEY) BETWEEN (t1.c1) AND (CAST(0.0602135 AS MONEY)) ) AS foo; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 0 +(1 row) + -- first subquery has the same join tree as above, so we should error out SELECT count(*) FROM ( SELECT ALL t4.c1, t0.c0, t0.c1 FROM ONLY t0 @@ -281,21 +305,29 @@ UNION ALL SELECT ALL t4.c1, t0.c0, t0.c1 FROM ONLY t0 RIGHT OUTER JOIN t1 ON ((0.024767844)::MONEY) BETWEEN (t1.c1) AND ((0.0602135)::MONEY) WHERE (NOT (((t0.c0)LIKE((t4.c0))))) ISNULL ) AS foo; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 0 +(1 row) + -- unsupported outer JOIN inside a subquery in WHERE clause SELECT * FROM distributed_table WHERE buy_count > ( SELECT count(*) FROM distributed_table a LEFT JOIN reference_table b ON (true) RIGHT JOIN reference_table c ON (false)); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + user_id | item_id | buy_count +--------------------------------------------------------------------- +(0 rows) + -- unsupported outer JOIN via subqueries SELECT count(*) FROM (SELECT *, random() FROM distributed_table) AS a LEFT JOIN (SELECT *, random() FROM reference_table) AS b ON (true) RIGHT JOIN (SELECT *, random() FROM reference_table) AS c ON (false); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 5 +(1 row) + -- unsupported outer JOIN in a sublevel subquery SELECT count(*) @@ -310,8 +342,11 @@ JOIN RIGHT JOIN reference_table c ON (true) ) AS unsupported_join ON (true); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 400 +(1 row) + SELECT count(*) FROM @@ -325,32 +360,41 @@ JOIN RIGHT JOIN (SELECT * FROM reference_table OFFSET 0) c ON (true) ) AS unsupported_join ON (true); -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table + count +--------------------------------------------------------------------- + 400 +(1 row) + -- unsupported outer JOIN in a sublevel INNER JOIN SELECT - unsupported_join.* + COUNT(unsupported_join.*) FROM (distributed_table a LEFT JOIN reference_table b ON (true) RIGHT JOIN reference_table c ON (true)) as unsupported_join (x,y,z,t,e,f,q) JOIN (reference_table d JOIN reference_table e ON(true)) ON (true); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 2500 +(1 row) + -- unsupported outer JOIN in a sublevel LEFT JOIN SELECT - unsupported_join.* + COUNT(unsupported_join.*) FROM (distributed_table a LEFT JOIN reference_table b ON (true) RIGHT JOIN reference_table c ON (true)) as unsupported_join LEFT JOIN (reference_table d JOIN reference_table e ON(true)) ON (true); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 2500 +(1 row) + SELECT - unsupported_join.* + COUNT(unsupported_join.*) FROM (distributed_table a LEFT JOIN (SELECT * FROM reference_table OFFSET 0) b ON (true) @@ -363,21 +407,27 @@ LEFT JOIN ON(true) ) ON (true); -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table + count +--------------------------------------------------------------------- + 2500 +(1 row) + -- unsupported outer JOIN in a sublevel RIGHT JOIN SELECT - unsupported_join.* + COUNT(unsupported_join.*) FROM (distributed_table a LEFT JOIN reference_table b ON (true) RIGHT JOIN reference_table c ON (false)) as unsupported_join RIGHT JOIN (reference_table d JOIN reference_table e ON(true)) ON (true); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + count +--------------------------------------------------------------------- + 125 +(1 row) + SELECT - unsupported_join.* + COUNT(unsupported_join.*) FROM (distributed_table a LEFT JOIN (SELECT * FROM reference_table OFFSET 0) b ON (true) @@ -390,9 +440,12 @@ RIGHT JOIN ON(true) ) ON (true); -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table -EXPLAIN SELECT + count +--------------------------------------------------------------------- + 125 +(1 row) + +EXPLAIN (COSTS OFF) SELECT unsupported_join.* FROM (distributed_table a @@ -400,7 +453,34 @@ FROM RIGHT JOIN reference_table c ON (true)) as unsupported_join (x,y,z,t,e,f,q) JOIN (reference_table d JOIN reference_table e ON(true)) ON (d.id > 0); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) + -> Distributed Subplan XXX_1 + -> Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on distributed_table_92862439 a + Task Count: 1 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Nested Loop + -> Nested Loop + -> Nested Loop Left Join + -> Seq Scan on reference_table_92862438 c + -> Nested Loop Left Join + -> Function Scan on read_intermediate_result intermediate_result + -> Materialize + -> Seq Scan on reference_table_92862438 b + -> Materialize + -> Seq Scan on reference_table_92862438 d + Filter: (id > 0) + -> Materialize + -> Seq Scan on reference_table_92862438 e +(25 rows) + SET client_min_messages TO WARNING; DROP SCHEMA sqlancer_failures CASCADE; diff --git a/src/test/regress/sql/multi_outer_join.sql b/src/test/regress/sql/multi_outer_join.sql index 549e0ae2c..5f911dc3b 100644 --- a/src/test/regress/sql/multi_outer_join.sql +++ b/src/test/regress/sql/multi_outer_join.sql @@ -302,7 +302,7 @@ FROM LEFT JOIN multi_outer_join_third_reference t1 ON (r1.r_custkey = t1.t_custkey) ORDER BY l_custkey, r_custkey, t_custkey; --- Right join with single shard right most table should error out +-- Right join with single shard right most table should work SELECT l_custkey, r_custkey, t_custkey FROM diff --git a/src/test/regress/sql/multi_outer_join_reference.sql b/src/test/regress/sql/multi_outer_join_reference.sql index 9c824736b..04a9c23e2 100644 --- a/src/test/regress/sql/multi_outer_join_reference.sql +++ b/src/test/regress/sql/multi_outer_join_reference.sql @@ -302,13 +302,14 @@ FROM LEFT JOIN multi_outer_join_third_reference t1 ON (r1.r_custkey = t1.t_custkey) ORDER BY 1; --- Right join with single shard right most table should error out +-- Right join with single shard right most table should work SELECT l_custkey, r_custkey, t_custkey FROM multi_outer_join_left_hash l1 LEFT JOIN multi_outer_join_right_hash r1 ON (l1.l_custkey = r1.r_custkey) - RIGHT JOIN multi_outer_join_third_reference t1 ON (r1.r_custkey = t1.t_custkey); + RIGHT JOIN multi_outer_join_third_reference t1 ON (r1.r_custkey = t1.t_custkey) +ORDER BY 1,2,3; -- Right join with single shard left most table should work SELECT diff --git a/src/test/regress/sql/sqlancer_failures.sql b/src/test/regress/sql/sqlancer_failures.sql index afb7b909f..d003d58be 100644 --- a/src/test/regress/sql/sqlancer_failures.sql +++ b/src/test/regress/sql/sqlancer_failures.sql @@ -64,13 +64,25 @@ SELECT create_reference_table('reference_table'); CREATE TABLE distributed_table(user_id int, item_id int, buy_count int); SELECT create_distributed_table('distributed_table', 'user_id'); +INSERT INTO distributed_table VALUES +(1, 10), +(2, 22), +(3, 34), +(7, 40); + +INSERT INTO reference_table VALUES +(1, '100'), +(null, '202'), +(4, '300'), +(null, '401'), +(null, '402'); + -- postgres plans below queries by evaluating joins as below: -- L -- / \ -- ref L -- / \ -- dist ref --- so we should error out as reference table is in the outer part of the top level (left) outer join SELECT count(*) FROM distributed_table a LEFT JOIN reference_table b ON (true) @@ -123,7 +135,6 @@ SELECT create_reference_table('t4'); -- t1(ref) L -- / \ -- t0(dist) t4(ref) --- -- so we should error out SELECT count(*) FROM ( SELECT ALL t4.c1, t0.c0, t0.c1 FROM ONLY t0 LEFT OUTER JOIN t4 ON CAST(masklen('142.158.96.44') AS BOOLEAN) @@ -189,7 +200,7 @@ ON (true); -- unsupported outer JOIN in a sublevel INNER JOIN SELECT - unsupported_join.* + COUNT(unsupported_join.*) FROM (distributed_table a LEFT JOIN reference_table b ON (true) @@ -199,7 +210,7 @@ JOIN -- unsupported outer JOIN in a sublevel LEFT JOIN SELECT - unsupported_join.* + COUNT(unsupported_join.*) FROM (distributed_table a LEFT JOIN reference_table b ON (true) @@ -208,7 +219,7 @@ LEFT JOIN (reference_table d JOIN reference_table e ON(true)) ON (true); SELECT - unsupported_join.* + COUNT(unsupported_join.*) FROM (distributed_table a LEFT JOIN (SELECT * FROM reference_table OFFSET 0) b ON (true) @@ -224,7 +235,7 @@ ON (true); -- unsupported outer JOIN in a sublevel RIGHT JOIN SELECT - unsupported_join.* + COUNT(unsupported_join.*) FROM (distributed_table a LEFT JOIN reference_table b ON (true) @@ -233,7 +244,7 @@ RIGHT JOIN (reference_table d JOIN reference_table e ON(true)) ON (true); SELECT - unsupported_join.* + COUNT(unsupported_join.*) FROM (distributed_table a LEFT JOIN (SELECT * FROM reference_table OFFSET 0) b ON (true) @@ -247,7 +258,7 @@ RIGHT JOIN ) ON (true); -EXPLAIN SELECT +EXPLAIN (COSTS OFF) SELECT unsupported_join.* FROM (distributed_table a From 2803470b589cc193c84b924075819586ce5387b6 Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Thu, 1 Dec 2022 18:18:55 +0300 Subject: [PATCH 4/5] Add lateral join checks for outer joins and drop the useless ones for semi joins --- .../planner/query_pushdown_planning.c | 128 ++++++++---------- .../distributed/planner/recursive_planning.c | 29 +++- .../regress/expected/multi_insert_select.out | 8 +- .../expected/multi_insert_select_0.out | 8 +- ...ulti_subquery_complex_reference_clause.out | 10 +- ...lti_subquery_in_where_reference_clause.out | 3 +- 6 files changed, 90 insertions(+), 96 deletions(-) diff --git a/src/backend/distributed/planner/query_pushdown_planning.c b/src/backend/distributed/planner/query_pushdown_planning.c index 7dca6579f..5cae19497 100644 --- a/src/backend/distributed/planner/query_pushdown_planning.c +++ b/src/backend/distributed/planner/query_pushdown_planning.c @@ -105,6 +105,7 @@ static List * CreateSubqueryTargetListAndAdjustVars(List *columnList); static AttrNumber FindResnoForVarInTargetList(List *targetList, int varno, int varattno); static bool RelationInfoContainsOnlyRecurringTuples(PlannerInfo *plannerInfo, Relids relids); +static char * RecurringTypeDescription(RecurringTuplesType recurType); static DeferredErrorMessage * DeferredErrorIfUnsupportedLateralSubquery( PlannerInfo *plannerInfo, Relids recurringRelIds, Relids nonRecurringRelIds); static Var * PartitionColumnForPushedDownSubquery(Query *query); @@ -603,7 +604,6 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery, return error; } - /* we shouldn't allow reference tables in the outer part of outer joins */ error = DeferredErrorIfUnsupportedRecurringTuplesJoin(plannerRestrictionContext); if (error) { @@ -643,7 +643,8 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery, * sublinks into joins. * * In some cases, sublinks are pulled up and converted into outer joins. Those cases - * are already handled with DeferredErrorIfUnsupportedRecurringTuplesJoin(). + * are already handled with RecursivelyPlanRecurringTupleOuterJoinWalker() or thrown + * an error for in DeferredErrorIfUnsupportedRecurringTuplesJoin(). * * If the sublinks are not pulled up, we should still error out in if the expression * in the FROM clause would recur for every shard in a subquery on the WHERE clause. @@ -751,20 +752,11 @@ FromClauseRecurringTupleType(Query *queryTree) /* - * DeferredErrorIfUnsupportedRecurringTuplesJoin returns true if there exists a outer join - * between reference table and distributed tables which does not follow - * the rules : - * - Reference tables can not be located in the outer part of the semi join or the - * anti join. Otherwise, we may have duplicate results. Although getting duplicate - * results is not possible by checking the equality on the column of the reference - * table and partition column of distributed table, we still keep these checks. - * Because, using the reference table in the outer part of the semi join or anti - * join is not very common. - * - Reference tables can not be located in the outer part of the left join - * (Note that PostgreSQL converts right joins to left joins. While converting - * join types, innerrel and outerrel are also switched.) Otherwise we will - * definitely have duplicate rows. Beside, reference tables can not be used - * with full outer joins because of the same reason. + * DeferredErrorIfUnsupportedRecurringTuplesJoin returns a DeferredError if + * there exists a join between a recurring rel (such as reference tables + * and intermediate_results) and a non-recurring rel (such as distributed tables + * and subqueries that we can push-down to worker nodes) that can return an + * incorrect result set due to recurring tuples coming from the recurring rel. */ static DeferredErrorMessage * DeferredErrorIfUnsupportedRecurringTuplesJoin( @@ -783,31 +775,48 @@ DeferredErrorIfUnsupportedRecurringTuplesJoin( Relids innerrelRelids = joinRestriction->innerrelRelids; Relids outerrelRelids = joinRestriction->outerrelRelids; - if (joinType == JOIN_SEMI || joinType == JOIN_ANTI || joinType == JOIN_LEFT) + /* + * This loop aims to determine whether this join is between a recurring + * rel and a non-recurring rel, and if so, whether it can yield an incorrect + * result set due to recurring tuples. + * + * For outer joins, this can only happen if it's a lateral outer join + * where the inner distributed subquery references the recurring outer + * rel. This because, such outer joins should not appear here because + * the recursive planner (RecursivelyPlanRecurringTupleOuterJoinWalker) + * should have already planned the non-recurring side if it wasn't a + * lateral join. For this reason, if the outer join is between a recurring + * rel --on the outer side-- and a non-recurring rel --on the other side--, + * we throw an error assuming that it's a lateral outer join. + * Also note that; in the context of outer joins, we only check left outer + * and full outer joins because PostgreSQL converts right joins to left + * joins before passing them through "set_join_pathlist_hook"s. + * + * For semi / anti joins, we anyway throw an error when the inner + * side is a distributed subquery that references a recurring outer rel + * (in the FROM clause) thanks to DeferErrorIfFromClauseRecurs. And when + * the inner side is a recurring rel and the outer side a non-recurring + * one, then the non-recurring side can't reference the recurring side + * anyway. + * + * For those reasons, here we perform below lateral join checks only for + * outer (except anti) / inner joins but not for anti / semi joins. + */ + + if (joinType == JOIN_LEFT) { - /* - * If there are only recurring tuples on the inner side of a join then - * we can push it down, regardless of whether the outer side is - * recurring or not. Otherwise, we check the outer side for recurring - * tuples. - */ if (RelationInfoContainsOnlyRecurringTuples(plannerInfo, innerrelRelids)) { + /* inner side only contains recurring rels */ continue; } - - /* - * If the outer side of the join doesn't have any distributed tables - * (e.g., contains only recurring tuples), Citus should not pushdown - * the query. The reason is that recurring tuples on every shard would - * be added to the result, which is wrong. - */ if (RelationInfoContainsOnlyRecurringTuples(plannerInfo, outerrelRelids)) { /* - * Find the first (or only) recurring RTE to give a meaningful - * error to the user. + * Inner side contains distributed rels but the outer side only + * contains recurring rels, must be an unsupported lateral outer + * join. */ recurType = FetchFirstRecurType(plannerInfo, outerrelRelids); @@ -816,11 +825,6 @@ DeferredErrorIfUnsupportedRecurringTuplesJoin( } else if (joinType == JOIN_FULL) { - /* - * If one of the outer or inner side contains recurring tuples and the other side - * contains nonrecurring tuples, then duplicate results can exist in the result. - * Thus, Citus should not pushdown the query. - */ bool innerContainOnlyRecurring = RelationInfoContainsOnlyRecurringTuples(plannerInfo, innerrelRelids); bool outerContainOnlyRecurring = @@ -829,8 +833,9 @@ DeferredErrorIfUnsupportedRecurringTuplesJoin( if (innerContainOnlyRecurring && !outerContainOnlyRecurring) { /* - * Find the first (or only) recurring RTE to give a meaningful - * error to the user. + * Right side contains distributed rels but the left side only + * contains recurring rels, must be an unsupported lateral outer + * join. */ recurType = FetchFirstRecurType(plannerInfo, innerrelRelids); @@ -840,8 +845,9 @@ DeferredErrorIfUnsupportedRecurringTuplesJoin( if (!innerContainOnlyRecurring && outerContainOnlyRecurring) { /* - * Find the first (or only) recurring RTE to give a meaningful - * error to the user. + * Left side contains distributed rels but the right side only + * contains recurring rels, must be an unsupported lateral outer + * join. */ recurType = FetchFirstRecurType(plannerInfo, outerrelRelids); @@ -856,7 +862,7 @@ DeferredErrorIfUnsupportedRecurringTuplesJoin( * See comment on DeferredErrorIfUnsupportedLateralSubquery for * details. * - * When planning inner joins postgres can move RTEs from left to + * When planning inner joins, postgres can move RTEs from left to * right and from right to left. So we don't know on which side the * lateral join wil appear. Thus we try to find a side of the join * that only contains recurring tuples. And then we check the other @@ -893,41 +899,13 @@ DeferredErrorIfUnsupportedRecurringTuplesJoin( } } - if (recurType == RECURRING_TUPLES_REFERENCE_TABLE) + if (recurType != RECURRING_TUPLES_INVALID) { + char *errmsg = psprintf("cannot perform a lateral outer join when " + "a distributed subquery references %s", + RecurringTypeDescription(recurType)); return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "cannot pushdown the subquery", - "There exist a reference table in the outer " - "part of the outer join", NULL); - } - else if (recurType == RECURRING_TUPLES_FUNCTION) - { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "cannot pushdown the subquery", - "There exist a table function in the outer " - "part of the outer join", NULL); - } - else if (recurType == RECURRING_TUPLES_EMPTY_JOIN_TREE) - { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "cannot pushdown the subquery", - "There exist a subquery without FROM in the outer " - "part of the outer join", NULL); - } - else if (recurType == RECURRING_TUPLES_RESULT_FUNCTION) - { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "cannot pushdown the subquery", - "Complex subqueries, CTEs and local tables cannot be in " - "the outer part of an outer join with a distributed table", - NULL); - } - else if (recurType == RECURRING_TUPLES_VALUES) - { - return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, - "cannot pushdown the subquery", - "There exist a VALUES clause in the outer " - "part of the outer join", NULL); + errmsg, NULL, NULL); } return NULL; diff --git a/src/backend/distributed/planner/recursive_planning.c b/src/backend/distributed/planner/recursive_planning.c index b74b59de3..f540ae7aa 100644 --- a/src/backend/distributed/planner/recursive_planning.c +++ b/src/backend/distributed/planner/recursive_planning.c @@ -372,9 +372,6 @@ RecursivelyPlanSubqueriesAndCTEs(Query *query, RecursivePlanningContext *context * side of the outer join. That way, inner rel gets converted into an intermediate * result and logical planner can handle the new query since it's of the from * " LEFT JOIN ". - * - * See DeferredErrorIfUnsupportedRecurringTuplesJoin for the supported join - * types. */ if (ShouldRecursivelyPlanOuterJoins(context)) { @@ -719,7 +716,7 @@ RecursivelyPlanRecurringTupleOuterJoinWalker(Node *node, Query *query, /* * A LEFT JOIN is recurring if the lhs is recurring. - * Note that we should have converted the rhs into a recurring + * Note that we might have converted the rhs into a recurring * one too if the lhs is recurring, but this anyway has no * effects when deciding whether a LEFT JOIN is recurring. */ @@ -812,6 +809,11 @@ RecursivelyPlanRecurringTupleOuterJoinWalker(Node *node, Query *query, * RecursivelyPlanRecurringTupleOuterJoinWalker that recursively plans given * distributed node that is known to be inner side of an outer join. * + * Fails to do so if the distributed join node references the recurring one. + * In that case, we don't throw an error here but instead we let + * DeferredErrorIfUnsupportedRecurringTuplesJoin to so for a better error + * message. + * * We call a node "distributed" if it points to a distributed table or a * more complex object (i.e., a join tree or a subquery) that can be pushed * down to the worker nodes directly. For a join, this means that it's either @@ -894,7 +896,24 @@ RecursivelyPlanDistributedJoinNode(Node *node, Query *query, "since it is part of a distributed join node " "that is outer joined with a recurring rel"))); - RecursivelyPlanSubquery(distributedRte->subquery, recursivePlanningContext); + bool recursivelyPlanned = RecursivelyPlanSubquery(distributedRte->subquery, + recursivePlanningContext); + if (!recursivelyPlanned) + { + /* + * RecursivelyPlanSubquery fails to plan a subquery only if it + * contains references to the outer query. This means that, we can't + * plan such outer joins (like ) + * if it's a LATERAL join where the distributed side is a subquery that + * references the outer side, as in, + * + * SELECT * FROM reference + * LEFT JOIN LATERAL + * (SELECT * FROM distributed WHERE reference.b > distributed.b) q + * USING (a); + */ + Assert(ContainsReferencesToOuterQuery(distributedRte->subquery)); + } } else { diff --git a/src/test/regress/expected/multi_insert_select.out b/src/test/regress/expected/multi_insert_select.out index b1bf7a5e6..1dd326ccc 100644 --- a/src/test/regress/expected/multi_insert_select.out +++ b/src/test/regress/expected/multi_insert_select.out @@ -1839,8 +1839,6 @@ FROM (SELECT SUM(raw_events_second.value_4) AS v4, ON (f.id = f2.id) WHERE f.id IN (SELECT user_id FROM raw_events_second)); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join -- cannot push down since join is not equi join (f.id > f2.id) INSERT INTO raw_events_second (user_id) @@ -1869,8 +1867,6 @@ FROM (SELECT SUM(raw_events_second.value_4) AS v4, ON (f.id > f2.id) WHERE f.id IN (SELECT user_id FROM raw_events_second)); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join -- we currently not support grouping sets INSERT INTO agg_events (user_id, @@ -1978,7 +1974,7 @@ INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) SELECT count(*) FROM raw_events_second; count --------------------------------------------------------------------- - 36 + 45 (1 row) INSERT INTO raw_events_second SELECT * FROM test_view; @@ -1988,7 +1984,7 @@ INSERT INTO raw_events_second SELECT * FROM test_view WHERE user_id = 17 GROUP B SELECT count(*) FROM raw_events_second; count --------------------------------------------------------------------- - 38 + 47 (1 row) -- intermediate results (CTEs) should be allowed when doing INSERT...SELECT within a CTE diff --git a/src/test/regress/expected/multi_insert_select_0.out b/src/test/regress/expected/multi_insert_select_0.out index 2947f8be0..edf2f19ed 100644 --- a/src/test/regress/expected/multi_insert_select_0.out +++ b/src/test/regress/expected/multi_insert_select_0.out @@ -1839,8 +1839,6 @@ FROM (SELECT SUM(raw_events_second.value_4) AS v4, ON (f.id = f2.id) WHERE f.id IN (SELECT user_id FROM raw_events_second)); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join -- cannot push down since join is not equi join (f.id > f2.id) INSERT INTO raw_events_second (user_id) @@ -1869,8 +1867,6 @@ FROM (SELECT SUM(raw_events_second.value_4) AS v4, ON (f.id > f2.id) WHERE f.id IN (SELECT user_id FROM raw_events_second)); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join -- we currently not support grouping sets INSERT INTO agg_events (user_id, @@ -1978,7 +1974,7 @@ INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) SELECT count(*) FROM raw_events_second; count --------------------------------------------------------------------- - 36 + 45 (1 row) INSERT INTO raw_events_second SELECT * FROM test_view; @@ -1988,7 +1984,7 @@ INSERT INTO raw_events_second SELECT * FROM test_view WHERE user_id = 17 GROUP B SELECT count(*) FROM raw_events_second; count --------------------------------------------------------------------- - 38 + 47 (1 row) -- intermediate results (CTEs) should be allowed when doing INSERT...SELECT within a CTE diff --git a/src/test/regress/expected/multi_subquery_complex_reference_clause.out b/src/test/regress/expected/multi_subquery_complex_reference_clause.out index 6fe7c1570..7db33834f 100644 --- a/src/test/regress/expected/multi_subquery_complex_reference_clause.out +++ b/src/test/regress/expected/multi_subquery_complex_reference_clause.out @@ -489,8 +489,14 @@ SELECT * FROM SELECT user_id FROM user_buy_test_table) sub ORDER BY 1 DESC; DEBUG: Router planner cannot handle multi-shard select queries -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + user_id +--------------------------------------------------------------------- + 7 + 3 + 2 + 1 +(4 rows) + SELECT * FROM (SELECT user_id FROM users_ref_test_table ref JOIN user_buy_test_table dis on (ref.id = dis.user_id) diff --git a/src/test/regress/expected/multi_subquery_in_where_reference_clause.out b/src/test/regress/expected/multi_subquery_in_where_reference_clause.out index d4ada83d3..52cbe3917 100644 --- a/src/test/regress/expected/multi_subquery_in_where_reference_clause.out +++ b/src/test/regress/expected/multi_subquery_in_where_reference_clause.out @@ -361,8 +361,7 @@ SELECT user_id, value_2 FROM users_table WHERE HAVING sum(submit_card_info) > 0 ) ORDER BY 1, 2; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join +ERROR: cannot perform a lateral outer join when a distributed subquery references a reference table -- non-partition key equality with reference table SELECT user_id, count(*) From b177975371a1349158c8c39e5fc789c651a3bb6a Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Tue, 29 Nov 2022 13:50:06 +0300 Subject: [PATCH 5/5] Add new regression tests --- ...arbitrary_configs_recurring_outer_join.out | 119 + .../regress/expected/recurring_outer_join.out | 2023 +++++++++++++++++ src/test/regress/multi_1_schedule | 1 + ...arbitrary_configs_recurring_outer_join.sql | 74 + src/test/regress/sql/recurring_outer_join.sql | 1030 +++++++++ src/test/regress/sql_schedule | 2 +- 6 files changed, 3248 insertions(+), 1 deletion(-) create mode 100644 src/test/regress/expected/arbitrary_configs_recurring_outer_join.out create mode 100644 src/test/regress/expected/recurring_outer_join.out create mode 100644 src/test/regress/sql/arbitrary_configs_recurring_outer_join.sql create mode 100644 src/test/regress/sql/recurring_outer_join.sql diff --git a/src/test/regress/expected/arbitrary_configs_recurring_outer_join.out b/src/test/regress/expected/arbitrary_configs_recurring_outer_join.out new file mode 100644 index 000000000..4df6ea698 --- /dev/null +++ b/src/test/regress/expected/arbitrary_configs_recurring_outer_join.out @@ -0,0 +1,119 @@ +SET search_path TO local_dist_join_mixed; +SELECT COUNT(*) FROM reference LEFT JOIN distributed USING (id); + count +--------------------------------------------------------------------- + 101 +(1 row) + +SELECT COUNT(*) FROM distributed RIGHT JOIN reference USING (id); + count +--------------------------------------------------------------------- + 101 +(1 row) + +SELECT COUNT(*) FROM reference FULL JOIN distributed USING (id); + count +--------------------------------------------------------------------- + 101 +(1 row) + +SELECT COUNT(*) FROM distributed FULL JOIN reference USING (id); + count +--------------------------------------------------------------------- + 101 +(1 row) + +SELECT COUNT(*) FROM distributed FULL JOIN reference USING (id); + count +--------------------------------------------------------------------- + 101 +(1 row) + +-- distributed side is a subquery +SELECT COUNT(*) FROM reference LEFT JOIN (SELECT * FROM distributed) q USING (id); + count +--------------------------------------------------------------------- + 101 +(1 row) + +-- distributed side is a join tree +SELECT COUNT(*) FROM reference LEFT JOIN (distributed t1 JOIN distributed t2 USING (id)) q USING (id); + count +--------------------------------------------------------------------- + 101 +(1 row) + +SELECT COUNT(*) FROM reference LEFT JOIN (distributed t1 LEFT JOIN distributed t2 USING (id)) q USING (id); + count +--------------------------------------------------------------------- + 101 +(1 row) + +SELECT COUNT(*) FROM +-- 2) right side is distributed but t1 is recurring, hence what +-- makes the right side distributed (t4) is recursively planned +reference t1 +LEFT JOIN +( + distributed t4 + JOIN + -- 1) t6 is recursively planned since the outer side is recurring + (SELECT t6.id FROM distributed t6 RIGHT JOIN reference t7 USING(id)) t5 + USING(id) +) q +USING(id) +-- 3) outer side of the join tree became recurring, hence t8 is +-- recursively planned too +LEFT JOIN +distributed t8 +USING (id) +WHERE t8.id IS NULL; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT COUNT(*) FROM +local t1 +LEFT JOIN +-- 2) t6 subquery is distributed so needs to be recursively planned +-- because t1 is first recursively planned +( + SELECT * FROM + (SELECT * FROM reference t2 JOIN distributed t3 USING (id)) p + JOIN + -- 1) t5 is recursively planned since the outer side is recurring + (SELECT * FROM reference t4 LEFT JOIN distributed t5 USING (id)) q + USING(id) +) t6 +USING (id); + count +--------------------------------------------------------------------- + 101 +(1 row) + +BEGIN; + WITH cte AS ( + DELETE FROM distributed + USING ( + SELECT t1.id, t1.id*3 FROM reference t1 + LEFT JOIN + ( + SELECT * FROM distributed t2 WHERE EXISTS ( + SELECT * FROM distributed t4 + WHERE t4.id = t2.id + ) + ) t3 + USING (id) + ) q + WHERE distributed.id = q.id AND + distributed.id > 65 + RETURNING * + ) + SELECT COUNT(*) FROM cte; + count +--------------------------------------------------------------------- + 35 +(1 row) + +ROLLBACK; diff --git a/src/test/regress/expected/recurring_outer_join.out b/src/test/regress/expected/recurring_outer_join.out new file mode 100644 index 000000000..3cd7cc6dc --- /dev/null +++ b/src/test/regress/expected/recurring_outer_join.out @@ -0,0 +1,2023 @@ +CREATE SCHEMA recurring_outer_join; +SET search_path TO recurring_outer_join; +SET citus.next_shard_id TO 1520000; +SET citus.shard_count TO 32; +-- idempotently add node to allow this test to run without add_coordinator +SET client_min_messages TO WARNING; +SELECT 1 FROM citus_add_node('localhost', :master_port, groupid => 0); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +SET client_min_messages TO DEBUG1; +CREATE TABLE dist_1 (a int, b int); +SELECT create_distributed_table('dist_1', 'a'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO dist_1 VALUES +(1, 10), +(1, 11), +(1, 12), +(2, 20), +(2, 21), +(2, 22), +(2, 23), +(3, 30), +(3, 31), +(3, 32), +(3, 33), +(3, 34), +(7, 40), +(7, 41), +(7, 42); +CREATE TABLE dist_2_columnar(LIKE dist_1) USING columnar; +INSERT INTO dist_2_columnar SELECT * FROM dist_1; +DEBUG: Flushing Stripe of size 15 +SELECT create_distributed_table('dist_2_columnar', 'a'); +DEBUG: pathlist hook for columnar table am +CONTEXT: SQL statement "SELECT TRUE FROM recurring_outer_join.dist_2_columnar LIMIT 1" +NOTICE: Copying data from local table... +DEBUG: Copied 15 rows +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$recurring_outer_join.dist_2_columnar$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE dist_3_partitioned(LIKE dist_1) PARTITION BY RANGE(a); +CREATE TABLE dist_3_partitioned_p1 PARTITION OF dist_3_partitioned FOR VALUES FROM (0) TO (2); +CREATE TABLE dist_3_partitioned_p2 PARTITION OF dist_3_partitioned FOR VALUES FROM (2) TO (4); +CREATE TABLE dist_3_partitioned_p3 PARTITION OF dist_3_partitioned FOR VALUES FROM (4) TO (100); +SELECT create_distributed_table('dist_3_partitioned', 'a'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO dist_3_partitioned SELECT * FROM dist_1; +CREATE TABLE ref_1 (a int, b int); +SELECT create_reference_table('ref_1'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO ref_1 VALUES +(1, 100), +(1, 11), +(null, 102), +(2, 200), +(2, 21), +(null, 202), +(2, 203), +(4, 300), +(4, 301), +(null, 302), +(4, 303), +(4, 304), +(null, 400), +(null, 401), +(null, 402); +CREATE TABLE local_1 (a int, b int); +INSERT INTO local_1 VALUES +(null, 1000), +(1, 11), +(1, 100), +(5, 2000), +(5, 2001), +(5, 2002), +(null, 2003), +(6, 3000), +(6, 3001), +(6, 3002), +(null, 3003), +(6, 3004), +(null, 4000), +(null, 4001), +(null, 4002); +CREATE TABLE citus_local_1(LIKE local_1); +INSERT INTO citus_local_1 SELECT * FROM local_1; +SELECT citus_add_local_table_to_metadata('citus_local_1'); + citus_add_local_table_to_metadata +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE dist_4_different_colocation_group(LIKE dist_1); +INSERT INTO dist_4_different_colocation_group SELECT * FROM local_1; +DELETE FROM dist_4_different_colocation_group WHERE a IS NULL; +SELECT create_distributed_table('dist_4_different_colocation_group', 'a', colocate_with=>'none'); +NOTICE: Copying data from local table... +DEBUG: Copied 9 rows +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$recurring_outer_join.dist_4_different_colocation_group$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE dist_5_with_pkey(LIKE dist_1); +INSERT INTO dist_5_with_pkey VALUES +(1, 11), +(2, 22), +(3, 34), +(7, 40); +SELECT create_distributed_table('dist_5_with_pkey', 'a'); +NOTICE: Copying data from local table... +DEBUG: Copied 4 rows +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$recurring_outer_join.dist_5_with_pkey$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE dist_5_with_pkey ADD CONSTRAINT pkey_1 PRIMARY KEY (a); +DEBUG: ALTER TABLE / ADD PRIMARY KEY will create implicit index "pkey_1" for table "dist_5_with_pkey" +DEBUG: verifying table "dist_5_with_pkey" +-- +-- basic cases +-- +SELECT COUNT(*) FROM ref_1 LEFT JOIN dist_1 USING (a); +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.dist_1 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (recurring_outer_join.ref_1 LEFT JOIN (SELECT dist_1_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) dist_1_1) dist_1 USING (a)) + count +--------------------------------------------------------------------- + 28 +(1 row) + +SELECT COUNT(*) FROM ref_1 LEFT JOIN dist_1 USING (a,b); +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_outer_join.dist_1 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (recurring_outer_join.ref_1 LEFT JOIN (SELECT dist_1_1.a, dist_1_1.b FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) dist_1_1) dist_1 USING (a, b)) + count +--------------------------------------------------------------------- + 15 +(1 row) + +SELECT COUNT(*) FROM dist_1 RIGHT JOIN ref_1 USING (a); +DEBUG: recursively planning left side of the right join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.dist_1 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT dist_1_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) dist_1_1) dist_1 RIGHT JOIN recurring_outer_join.ref_1 USING (a)) + count +--------------------------------------------------------------------- + 28 +(1 row) + +SELECT COUNT(*) FROM ref_1 FULL JOIN dist_1 USING (a); +DEBUG: recursively planning right side of the full join since the other side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.dist_1 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (recurring_outer_join.ref_1 FULL JOIN (SELECT dist_1_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) dist_1_1) dist_1 USING (a)) + count +--------------------------------------------------------------------- + 36 +(1 row) + +SELECT COUNT(*) FROM dist_1 FULL JOIN ref_1 USING (a); +DEBUG: recursively planning left side of the full join since the other side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.dist_1 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT dist_1_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) dist_1_1) dist_1 FULL JOIN recurring_outer_join.ref_1 USING (a)) + count +--------------------------------------------------------------------- + 36 +(1 row) + +SELECT COUNT(*) FROM dist_1 FULL JOIN ref_1 USING (a,b); +DEBUG: recursively planning left side of the full join since the other side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_outer_join.dist_1 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT dist_1_1.a, dist_1_1.b FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) dist_1_1) dist_1 FULL JOIN recurring_outer_join.ref_1 USING (a, b)) + count +--------------------------------------------------------------------- + 28 +(1 row) + +-- distributed side is a subquery +SELECT COUNT(*) FROM ref_1 LEFT JOIN (SELECT * FROM dist_1) q USING (a); +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_outer_join.dist_1 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (recurring_outer_join.ref_1 LEFT JOIN (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) q USING (a)) + count +--------------------------------------------------------------------- + 28 +(1 row) + +-- distributed side is a join tree +SELECT COUNT(*) FROM ref_1 LEFT JOIN (dist_1 t1 JOIN dist_1 t2 USING (a)) q USING (a); +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t1" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t1" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.dist_1 t1 WHERE true +DEBUG: recursively planning distributed relation "dist_1" "t2" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t2" to a subquery +DEBUG: generating subplan XXX_2 for subquery SELECT a FROM recurring_outer_join.dist_1 t2 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (recurring_outer_join.ref_1 LEFT JOIN ((SELECT t1_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t1_1) t1 JOIN (SELECT t2_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t2_1) t2 USING (a)) q(a, b, b_1) USING (a)) + count +--------------------------------------------------------------------- + 76 +(1 row) + +SELECT COUNT(*) FROM ref_1 LEFT JOIN (dist_1 t1 LEFT JOIN dist_1 t2 USING (a)) q USING (a); +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t1" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t1" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.dist_1 t1 WHERE true +DEBUG: recursively planning distributed relation "dist_1" "t2" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t2" to a subquery +DEBUG: generating subplan XXX_2 for subquery SELECT a FROM recurring_outer_join.dist_1 t2 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (recurring_outer_join.ref_1 LEFT JOIN ((SELECT t1_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t1_1) t1 LEFT JOIN (SELECT t2_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t2_1) t2 USING (a)) q(a, b, b_1) USING (a)) + count +--------------------------------------------------------------------- + 76 +(1 row) + +-- use functions/VALUES clauses/intrermediate results as the recurring rel + -- values clause + SELECT COUNT(*) FROM (SELECT a, b FROM (VALUES (1, 'one'), (2, 'two'), (3, 'three')) as t(a,b)) recurring LEFT JOIN dist_1 USING (a); +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.dist_1 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT t.a, t.b FROM (VALUES (1,'one'::text), (2,'two'::text), (3,'three'::text)) t(a, b)) recurring LEFT JOIN (SELECT dist_1_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) dist_1_1) dist_1 USING (a)) + count +--------------------------------------------------------------------- + 12 +(1 row) + + -- generate_series() + SELECT COUNT(*) FROM dist_1 RIGHT JOIN (SELECT a FROM generate_series(1, 10) a) recurring USING (a); +DEBUG: recursively planning left side of the right join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.dist_1 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT dist_1_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) dist_1_1) dist_1 RIGHT JOIN (SELECT a.a FROM generate_series(1, 10) a(a)) recurring USING (a)) + count +--------------------------------------------------------------------- + 21 +(1 row) + + -- materialized cte + WITH dist_1_materialized AS MATERIALIZED ( + SELECT * FROM dist_1 + ) + SELECT COUNT(*) FROM dist_1 RIGHT JOIN dist_1_materialized USING (a); +DEBUG: generating subplan XXX_1 for CTE dist_1_materialized: SELECT a, b FROM recurring_outer_join.dist_1 +DEBUG: recursively planning left side of the right join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" to a subquery +DEBUG: generating subplan XXX_2 for subquery SELECT a FROM recurring_outer_join.dist_1 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT dist_1_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) dist_1_1) dist_1 RIGHT JOIN (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) dist_1_materialized USING (a)) + count +--------------------------------------------------------------------- + 59 +(1 row) + + -- offset in the subquery + SELECT COUNT(*) FROM dist_1 t1 RIGHT JOIN (SELECT * FROM dist_1 OFFSET 0) t2 USING (a); +DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_outer_join.dist_1 OFFSET 0 +DEBUG: recursively planning left side of the right join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t1" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t1" to a subquery +DEBUG: generating subplan XXX_2 for subquery SELECT a FROM recurring_outer_join.dist_1 t1 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT t1_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t1_1) t1 RIGHT JOIN (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t2 USING (a)) + count +--------------------------------------------------------------------- + 59 +(1 row) + + -- limit in the subquery + SELECT COUNT(*) FROM dist_1 t1 RIGHT JOIN (SELECT * FROM dist_1 ORDER BY 1,2 LIMIT 2) t2 USING (a); +DEBUG: push down of limit count: 2 +DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_outer_join.dist_1 ORDER BY a, b LIMIT 2 +DEBUG: recursively planning left side of the right join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t1" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t1" to a subquery +DEBUG: generating subplan XXX_2 for subquery SELECT a FROM recurring_outer_join.dist_1 t1 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT t1_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t1_1) t1 RIGHT JOIN (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t2 USING (a)) + count +--------------------------------------------------------------------- + 6 +(1 row) + + -- local-distributed join as the recurring rel + -- + -- We plan local-distributed join by converting local_1 into an intermediate result + -- and hence it becomes a recurring rel. Then we convert distributed - inner side of + -- the right join (dist_1) into an intermediate result too and this makes rhs of the + -- full join a recurring rel. And finally, we convert lhs of the full join (t1) into + -- an intermediate result too. + SELECT COUNT(*) FROM dist_1 t1 FULL JOIN (dist_1 RIGHT JOIN local_1 USING(a)) t2 USING (a); +DEBUG: Wrapping relation "local_1" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.local_1 WHERE true +DEBUG: recursively planning left side of the right join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" to a subquery +DEBUG: generating subplan XXX_2 for subquery SELECT a FROM recurring_outer_join.dist_1 WHERE true +DEBUG: recursively planning left side of the full join since the other side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t1" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t1" to a subquery +DEBUG: generating subplan XXX_3 for subquery SELECT a FROM recurring_outer_join.dist_1 t1 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT t1_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t1_1) t1 FULL JOIN ((SELECT dist_1_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) dist_1_1) dist_1 RIGHT JOIN (SELECT local_1_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) local_1_1) local_1 USING (a)) t2(a, b, b_1) USING (a)) + count +--------------------------------------------------------------------- + 43 +(1 row) + + SELECT COUNT(*) FROM dist_1 t1 FULL JOIN (dist_1 RIGHT JOIN citus_local_1 USING(a)) t2 USING (a); +DEBUG: Wrapping relation "citus_local_1" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.citus_local_1 WHERE true +DEBUG: recursively planning left side of the right join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" to a subquery +DEBUG: generating subplan XXX_2 for subquery SELECT a FROM recurring_outer_join.dist_1 WHERE true +DEBUG: recursively planning left side of the full join since the other side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t1" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t1" to a subquery +DEBUG: generating subplan XXX_3 for subquery SELECT a FROM recurring_outer_join.dist_1 t1 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT t1_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t1_1) t1 FULL JOIN ((SELECT dist_1_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) dist_1_1) dist_1 RIGHT JOIN (SELECT citus_local_1_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) citus_local_1_1) citus_local_1 USING (a)) t2(a, b, b_1) USING (a)) + count +--------------------------------------------------------------------- + 43 +(1 row) + + -- subqury without FROM + SELECT COUNT(*) FROM dist_1 t1 RIGHT JOIN (SELECT generate_series(1,10) AS a) t2 USING (a); +DEBUG: recursively planning left side of the right join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t1" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t1" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.dist_1 t1 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT t1_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t1_1) t1 RIGHT JOIN (SELECT generate_series(1, 10) AS a) t2 USING (a)) + count +--------------------------------------------------------------------- + 21 +(1 row) + +-- such semi joins / anti joins are supported too + -- reference table + SELECT COUNT(*) FROM + ref_1 t1 + JOIN dist_1 t2 + ON (t1.a = t2.a) + WHERE t1.a IN (SELECT a FROM dist_1 t3); + count +--------------------------------------------------------------------- + 18 +(1 row) + + -- not supported because we join t3 (inner rel of the anti join) with a column + -- of reference table, not with the distribution column of the other distributed + -- table (t2) + SELECT COUNT(*) FROM + ref_1 t1 + JOIN dist_1 t2 + ON (t1.a = t2.a) + WHERE NOT EXISTS (SELECT * FROM dist_1 t3 WHERE t1.a = a); +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns + -- supported because the semi join is performed based on distribution keys + -- of the distributed tables + SELECT COUNT(*) FROM + ref_1 t1 + JOIN dist_1 t2 + ON (t1.a = t2.a) + WHERE NOT EXISTS (SELECT * FROM dist_1 t3 WHERE t2.a = a); + count +--------------------------------------------------------------------- + 0 +(1 row) + + -- values clause + SELECT COUNT(*) FROM + (SELECT a, b FROM (VALUES (1, 'one'), (2, 'two'), (3, 'three')) as t(a,b)) t1 + JOIN dist_1 t2 + ON (t1.a = t2.a) + WHERE EXISTS (SELECT * FROM dist_1 t3 WHERE t1.a = a); + count +--------------------------------------------------------------------- + 12 +(1 row) + + -- offset in the subquery + SELECT COUNT(*) FROM + (SELECT * FROM dist_1 OFFSET 0) t1 + JOIN dist_1 t2 + ON (t1.a = t2.a) + WHERE t1.a IN (SELECT a FROM dist_1 t3); +DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_outer_join.dist_1 OFFSET 0 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t1 JOIN recurring_outer_join.dist_1 t2 ON ((t1.a OPERATOR(pg_catalog.=) t2.a))) WHERE (t1.a OPERATOR(pg_catalog.=) ANY (SELECT t3.a FROM recurring_outer_join.dist_1 t3)) + count +--------------------------------------------------------------------- + 59 +(1 row) + + -- local-distributed join as the recurring rel + SELECT COUNT(*) FROM + (dist_1 RIGHT JOIN local_1 USING(a)) t1 + JOIN dist_1 t2 + ON (t1.a = t2.a) + WHERE t1.a IN (SELECT a FROM dist_1 t3); +DEBUG: Wrapping relation "local_1" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.local_1 WHERE true +DEBUG: recursively planning left side of the right join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" to a subquery +DEBUG: generating subplan XXX_2 for subquery SELECT a FROM recurring_outer_join.dist_1 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (((SELECT dist_1_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) dist_1_1) dist_1 RIGHT JOIN (SELECT local_1_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) local_1_1) local_1 USING (a)) t1(a, b, b_1) JOIN recurring_outer_join.dist_1 t2 ON ((t1.a OPERATOR(pg_catalog.=) t2.a))) WHERE (t1.a OPERATOR(pg_catalog.=) ANY (SELECT t3.a FROM recurring_outer_join.dist_1 t3)) + count +--------------------------------------------------------------------- + 18 +(1 row) + + -- materialized cte + WITH dist_1_materialized AS MATERIALIZED ( + SELECT * FROM dist_1 + ) + SELECT COUNT(*) FROM + dist_1_materialized t1 + JOIN dist_1 t2 + ON (t1.a = t2.a) + WHERE t1.a IN (SELECT a FROM dist_1 t3); +DEBUG: generating subplan XXX_1 for CTE dist_1_materialized: SELECT a, b FROM recurring_outer_join.dist_1 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t1 JOIN recurring_outer_join.dist_1 t2 ON ((t1.a OPERATOR(pg_catalog.=) t2.a))) WHERE (t1.a OPERATOR(pg_catalog.=) ANY (SELECT t3.a FROM recurring_outer_join.dist_1 t3)) + count +--------------------------------------------------------------------- + 59 +(1 row) + + WITH dist_1_materialized AS MATERIALIZED ( + SELECT * FROM dist_1 + ) + SELECT COUNT(*) FROM + dist_1_materialized t1 + JOIN dist_1 t2 + ON (t1.a = t2.a) + WHERE EXISTS (SELECT a FROM dist_1 t3 WHERE t3.a = t1.a); +DEBUG: generating subplan XXX_1 for CTE dist_1_materialized: SELECT a, b FROM recurring_outer_join.dist_1 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t1 JOIN recurring_outer_join.dist_1 t2 ON ((t1.a OPERATOR(pg_catalog.=) t2.a))) WHERE (EXISTS (SELECT t3.a FROM recurring_outer_join.dist_1 t3 WHERE (t3.a OPERATOR(pg_catalog.=) t1.a))) + count +--------------------------------------------------------------------- + 59 +(1 row) + + -- not supported because we anti-join t3 --inner rel-- with a column + -- of t1 (intermediate result) --outer-rel-- + WITH dist_1_materialized AS MATERIALIZED ( + SELECT * FROM dist_1 + ) + SELECT COUNT(*) FROM + dist_1_materialized t1 + JOIN dist_1 t2 + ON (t1.a = t2.a) + WHERE NOT EXISTS (SELECT a FROM dist_1 t3 WHERE t3.a = t1.a); +DEBUG: generating subplan XXX_1 for CTE dist_1_materialized: SELECT a, b FROM recurring_outer_join.dist_1 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t1 JOIN recurring_outer_join.dist_1 t2 ON ((t1.a OPERATOR(pg_catalog.=) t2.a))) WHERE (NOT (EXISTS (SELECT t3.a FROM recurring_outer_join.dist_1 t3 WHERE (t3.a OPERATOR(pg_catalog.=) t1.a)))) +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns + -- so this is supported because now t3 is joined with t2, not t1 + WITH dist_1_materialized AS MATERIALIZED ( + SELECT a AS a_alias, b AS b_alias FROM dist_1 + ) + SELECT COUNT(*) FROM + dist_1_materialized t1 + JOIN dist_1 t2 + ON (t1.a_alias = t2.a) + WHERE NOT EXISTS (SELECT a FROM dist_1 t3 WHERE t3.a = t2.a); +DEBUG: generating subplan XXX_1 for CTE dist_1_materialized: SELECT a AS a_alias, b AS b_alias FROM recurring_outer_join.dist_1 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.a_alias, intermediate_result.b_alias FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a_alias integer, b_alias integer)) t1 JOIN recurring_outer_join.dist_1 t2 ON ((t1.a_alias OPERATOR(pg_catalog.=) t2.a))) WHERE (NOT (EXISTS (SELECT t3.a FROM recurring_outer_join.dist_1 t3 WHERE (t3.a OPERATOR(pg_catalog.=) t2.a)))) + count +--------------------------------------------------------------------- + 0 +(1 row) + + WITH dist_1_materialized AS MATERIALIZED ( + SELECT a AS a_alias, b AS b_alias FROM dist_1 + ) + SELECT COUNT(*) FROM + dist_1_materialized t1 + JOIN dist_1 t2 + ON (t1.a_alias = t2.a) + WHERE t1.a_alias NOT IN (SELECT a FROM dist_1 t3); +DEBUG: generating subplan XXX_1 for CTE dist_1_materialized: SELECT a AS a_alias, b AS b_alias FROM recurring_outer_join.dist_1 +DEBUG: generating subplan XXX_2 for subquery SELECT a FROM recurring_outer_join.dist_1 t3 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.a_alias, intermediate_result.b_alias FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a_alias integer, b_alias integer)) t1 JOIN recurring_outer_join.dist_1 t2 ON ((t1.a_alias OPERATOR(pg_catalog.=) t2.a))) WHERE (NOT (t1.a_alias OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.a FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer)))) + count +--------------------------------------------------------------------- + 0 +(1 row) + + -- generate_series() + SELECT COUNT(*) FROM + (SELECT a FROM generate_series(1, 10) a) t1 + JOIN dist_1 t2 + ON (t1.a = t2.a) + WHERE t1.a IN (SELECT a FROM dist_1 t3); + count +--------------------------------------------------------------------- + 15 +(1 row) + + -- subqury without FROM + SELECT COUNT(*) FROM + (SELECT generate_series(1,10) AS a) t1 + JOIN dist_1 t2 + ON (t1.a = t2.a) + WHERE t1.a IN (SELECT a FROM dist_1 t3); + count +--------------------------------------------------------------------- + 15 +(1 row) + +-- together with correlated subqueries +SELECT COUNT(*) FROM ref_1 t1 +LEFT JOIN dist_1 t2 USING (a,b) +WHERE EXISTS (SELECT * FROM dist_1 t3 WHERE t1.a = t3.a); +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t2" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t2" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_outer_join.dist_1 t2 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (recurring_outer_join.ref_1 t1 LEFT JOIN (SELECT t2_1.a, t2_1.b FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t2_1) t2 USING (a, b)) WHERE (EXISTS (SELECT t3.a, t3.b FROM recurring_outer_join.dist_1 t3 WHERE (t1.a OPERATOR(pg_catalog.=) t3.a))) +ERROR: correlated subqueries are not supported when the FROM clause contains a reference table +SELECT COUNT(*) FROM dist_1 t1 +RIGHT JOIN ref_1 t2 USING (a,b) +WHERE EXISTS (SELECT * FROM dist_1 t3 WHERE t2.a = t3.a); +DEBUG: recursively planning left side of the right join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t1" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t1" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_outer_join.dist_1 t1 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT t1_1.a, t1_1.b FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t1_1) t1 RIGHT JOIN recurring_outer_join.ref_1 t2 USING (a, b)) WHERE (EXISTS (SELECT t3.a, t3.b FROM recurring_outer_join.dist_1 t3 WHERE (t2.a OPERATOR(pg_catalog.=) t3.a))) +ERROR: correlated subqueries are not supported when the FROM clause contains a CTE or subquery +-- "dist_1 t2" can't contribute to result set of the right join with +-- a tuple having "(t2.a) a = NULL" because t2 is in the inner side of +-- right join. For this reason, Postgres knows that can +-- never evaluate to true (because never yields "true") +-- and replaces the right join with an inner join. +-- And as a result, we can push-down the query without having to go +-- through recursive planning. +SELECT COUNT(*) FROM dist_1 t1 +WHERE EXISTS ( + SELECT * FROM dist_1 t2 + RIGHT JOIN ref_1 t3 USING (a) + WHERE t2.a = t1.a +); + count +--------------------------------------------------------------------- + 7 +(1 row) + +-- same here, Postgres converts the left join into an inner one +SELECT foo.* FROM +ref_1 r1, +LATERAL +( + SELECT * FROM ref_1 r2 + LEFT JOIN dist_1 + USING (a) + WHERE r1.a > dist_1.b +) as foo; + a | b | b +--------------------------------------------------------------------- +(0 rows) + +-- Qual is the same but top-level join is an anti-join. Right join +-- stays as is and hence requires recursive planning. +SELECT COUNT(*) FROM dist_1 t1 +WHERE NOT EXISTS ( + SELECT * FROM dist_1 t2 + RIGHT JOIN ref_1 t3 USING (a) + WHERE t2.a = t1.a +); +DEBUG: recursively planning left side of the right join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t2" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t2" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.dist_1 t2 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM recurring_outer_join.dist_1 t1 WHERE (NOT (EXISTS (SELECT t3.a, t2.b, t3.b FROM ((SELECT t2_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t2_1) t2 RIGHT JOIN recurring_outer_join.ref_1 t3 USING (a)) WHERE (t2.a OPERATOR(pg_catalog.=) t1.a)))) + count +--------------------------------------------------------------------- + 8 +(1 row) + +-- This time the semi-join qual is (not <) +-- where t3 is the outer rel of the right join. Hence Postgres can't +-- replace right join with an inner join and so we recursively plan +-- inner side of the right join since the outer side is a recurring +-- rel. +SELECT COUNT(*) FROM dist_1 t1 +WHERE EXISTS ( + SELECT * FROM dist_1 t2 + RIGHT JOIN ref_1 t3 USING (a) + WHERE t3.a = t1.a +); +DEBUG: recursively planning left side of the right join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t2" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t2" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.dist_1 t2 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM recurring_outer_join.dist_1 t1 WHERE (EXISTS (SELECT t3.a, t2.b, t3.b FROM ((SELECT t2_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t2_1) t2 RIGHT JOIN recurring_outer_join.ref_1 t3 USING (a)) WHERE (t3.a OPERATOR(pg_catalog.=) t1.a))) + count +--------------------------------------------------------------------- + 7 +(1 row) + +SELECT COUNT(*) FROM dist_1 t1 +WHERE NOT EXISTS ( + SELECT * FROM dist_1 t2 + RIGHT JOIN ref_1 t3 USING (a) + WHERE t3.a = t1.a +); +DEBUG: recursively planning left side of the right join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t2" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t2" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.dist_1 t2 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM recurring_outer_join.dist_1 t1 WHERE (NOT (EXISTS (SELECT t3.a, t2.b, t3.b FROM ((SELECT t2_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t2_1) t2 RIGHT JOIN recurring_outer_join.ref_1 t3 USING (a)) WHERE (t3.a OPERATOR(pg_catalog.=) t1.a)))) + count +--------------------------------------------------------------------- + 8 +(1 row) + +-- +-- more complex cases +-- +SELECT COUNT(*) FROM +-- 1) right side is distributed but t1 is recurring, hence what +-- makes the right side distributed (t3) is recursively planned +ref_1 t1 +LEFT JOIN +(ref_1 t2 RIGHT JOIN dist_1 t3(x,y) ON (t2.a=t3.x)) t5 +USING(a) +-- 2) outer side of the join tree became recurring, hence t4 is +-- recursively planned too +LEFT JOIN +dist_1 t4 +ON (t4.a = t5.a AND t4.b = t5.b) +WHERE t4.b IS NULL; +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t3" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t3" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT x AS a FROM recurring_outer_join.dist_1 t3(x, y) WHERE true +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t4" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t4" to a subquery +DEBUG: generating subplan XXX_2 for subquery SELECT a, b FROM recurring_outer_join.dist_1 t4 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((recurring_outer_join.ref_1 t1 LEFT JOIN (recurring_outer_join.ref_1 t2(a_1, b) RIGHT JOIN (SELECT t3_1.x AS a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t3_1(x)) t3(x, y) ON ((t2.a_1 OPERATOR(pg_catalog.=) t3.x))) t5(a, b, x, y) USING (a)) LEFT JOIN (SELECT t4_1.a, t4_1.b FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t4_1) t4 ON (((t4.a OPERATOR(pg_catalog.=) t5.a) AND (t4.b OPERATOR(pg_catalog.=) t5.b)))) WHERE (t4.b IS NULL) + count +--------------------------------------------------------------------- + 40 +(1 row) + +SELECT COUNT(*) FROM +-- 2) right side is distributed but t1 is recurring, hence what +-- makes the right side distributed (t4) is recursively planned +ref_1 t1 +LEFT JOIN +( + dist_1 t4 + JOIN + -- 1) t6 is recursively planned since the outer side is recurring + (SELECT t6.a FROM dist_1 t6 RIGHT JOIN ref_1 t7 USING(a)) t5 + USING(a) +) q +USING(a) +-- 3) outer side of the join tree became recurring, hence t8 is +-- recursively planned too +LEFT JOIN +dist_1 t8 +USING (a) +WHERE t8.b IS NULL; +DEBUG: recursively planning left side of the right join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t6" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t6" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.dist_1 t6 WHERE true +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t4" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t4" to a subquery +DEBUG: generating subplan XXX_2 for subquery SELECT a FROM recurring_outer_join.dist_1 t4 WHERE true +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t8" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t8" to a subquery +DEBUG: generating subplan XXX_3 for subquery SELECT a, b FROM recurring_outer_join.dist_1 t8 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((recurring_outer_join.ref_1 t1 LEFT JOIN ((SELECT t4_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t4_1) t4 JOIN (SELECT t6.a FROM ((SELECT t6_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t6_1) t6 RIGHT JOIN recurring_outer_join.ref_1 t7 USING (a))) t5 USING (a)) q USING (a)) LEFT JOIN (SELECT t8_1.a, t8_1.b FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t8_1) t8 USING (a)) WHERE (t8.b IS NULL) + count +--------------------------------------------------------------------- + 10 +(1 row) + +SELECT COUNT(*) FROM +ref_1 t1 +-- all distributed tables in the rhs will be recursively planned +-- in the order of t3, t4, t5 +LEFT JOIN +( + ref_1 t2 + JOIN + dist_1 t3 + USING (a) + JOIN + (dist_1 t4 JOIN dist_1 t5 USING (a)) + USING(a) +) +USING (a); +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t3" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t3" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.dist_1 t3 WHERE true +DEBUG: recursively planning distributed relation "dist_1" "t4" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t4" to a subquery +DEBUG: generating subplan XXX_2 for subquery SELECT a FROM recurring_outer_join.dist_1 t4 WHERE true +DEBUG: recursively planning distributed relation "dist_1" "t5" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t5" to a subquery +DEBUG: generating subplan XXX_3 for subquery SELECT a FROM recurring_outer_join.dist_1 t5 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (recurring_outer_join.ref_1 t1 LEFT JOIN ((recurring_outer_join.ref_1 t2 JOIN (SELECT t3_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t3_1) t3 USING (a)) JOIN ((SELECT t4_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t4_1) t4 JOIN (SELECT t5_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t5_1) t5 USING (a)) USING (a)) USING (a)) + count +--------------------------------------------------------------------- + 694 +(1 row) + +-- Even if dist_1 and dist_4_different_colocation_group belong to different +-- colocation groups, we can run query without doing a repartition join as +-- we first decide recursively planning lhs of the right join because rhs +-- (ref_1) is a recurring rel. And while doing so, we anyway recursively plan +-- the distributed tables in the subjoin tree individually hence the whole join +-- tree becomes: +-- RIGHT JOIN +-- / \ +-- intermediate_result_for_dist_1 ref_1 +-- JOIN +-- intermediate_result_for_dist_4_different_colocation_group +-- +-- When we decide implementing the optimization noted in +-- RecursivelyPlanDistributedJoinNode in an XXX comment, then this query would +-- require enabling repartition joins. +SELECT COUNT(*) FROM +dist_1 JOIN dist_4_different_colocation_group USING(a) +RIGHT JOIN ref_1 USING(a); +DEBUG: recursively planning left side of the right join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.dist_1 WHERE true +DEBUG: recursively planning distributed relation "dist_4_different_colocation_group" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_4_different_colocation_group" to a subquery +DEBUG: generating subplan XXX_2 for subquery SELECT a FROM recurring_outer_join.dist_4_different_colocation_group WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (((SELECT dist_1_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) dist_1_1) dist_1 JOIN (SELECT dist_4_different_colocation_group_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) dist_4_different_colocation_group_1) dist_4_different_colocation_group USING (a)) RIGHT JOIN recurring_outer_join.ref_1 USING (a)) + count +--------------------------------------------------------------------- + 25 +(1 row) + +SELECT COUNT(*) FROM +ref_1 t1 +LEFT JOIN +-- 2) t6 subquery is distributed so needs to be recursively planned +-- because t1 is recurring +( + SELECT * FROM + (SELECT * FROM ref_1 t2 JOIN dist_1 t3 USING (a) WHERE t3.b IS NULL) p + JOIN + -- 1) t5 is recursively planned since the outer side is recurring + (SELECT * FROM ref_1 t4 LEFT JOIN dist_1 t5 USING (a)) q + USING(a) +) t6 +USING (a); +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t5" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t5" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.dist_1 t5 WHERE true +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: generating subplan XXX_2 for subquery SELECT p.a, p.b, p.b_1 AS b, q.b, q.b_1 AS b FROM ((SELECT t2.a, t2.b, t3.b FROM (recurring_outer_join.ref_1 t2 JOIN recurring_outer_join.dist_1 t3 USING (a)) WHERE (t3.b IS NULL)) p(a, b, b_1) JOIN (SELECT t4.a, t4.b, t5.b FROM (recurring_outer_join.ref_1 t4 LEFT JOIN (SELECT t5_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t5_1) t5 USING (a))) q(a, b, b_1) USING (a)) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (recurring_outer_join.ref_1 t1 LEFT JOIN (SELECT intermediate_result.a, intermediate_result.b, intermediate_result.b_1 AS b, intermediate_result.b_2 AS b, intermediate_result.b_3 AS b FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer, b_1 integer, b_2 integer, b_3 integer)) t6(a, b, b_1, b_2, b_3) USING (a)) + count +--------------------------------------------------------------------- + 15 +(1 row) + +-- No need to recursively plan dist_5_with_pkey thanks to +-- pkey optimizations done by Postgres. +SELECT COUNT(*) FROM ref_1 LEFT JOIN dist_5_with_pkey USING(a); + count +--------------------------------------------------------------------- + 15 +(1 row) + +-- Similarly, implies that "dist_1.a" cannot be NULL +-- and hence Postgres converts the LEFT JOIN into an INNER JOIN form. +-- For this reason, we don't need to recursively plan dist_1. +SELECT COUNT(*) FROM ref_1 LEFT JOIN dist_1 USING(a) WHERE dist_1.a IN (1,4); + count +--------------------------------------------------------------------- + 6 +(1 row) + +SELECT COUNT(*) FROM +ref_1 t1 +LEFT JOIN +-- 2) t6 subquery is distributed so needs to be recursively planned +-- because t1 is recurring +( + SELECT * FROM + (SELECT * FROM ref_1 t2 JOIN dist_3_partitioned t3 USING (a) WHERE t3.b IS NULL) p + JOIN + -- 1) t5 is recursively planned since the outer side is recurring + (SELECT * FROM ref_1 t4 LEFT JOIN dist_1 t5 USING (a)) q + USING(a) +) t6 +USING (a); +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t5" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t5" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.dist_1 t5 WHERE true +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: generating subplan XXX_2 for subquery SELECT p.a, p.b, p.b_1 AS b, q.b, q.b_1 AS b FROM ((SELECT t2.a, t2.b, t3.b FROM (recurring_outer_join.ref_1 t2 JOIN recurring_outer_join.dist_3_partitioned t3 USING (a)) WHERE (t3.b IS NULL)) p(a, b, b_1) JOIN (SELECT t4.a, t4.b, t5.b FROM (recurring_outer_join.ref_1 t4 LEFT JOIN (SELECT t5_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t5_1) t5 USING (a))) q(a, b, b_1) USING (a)) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (recurring_outer_join.ref_1 t1 LEFT JOIN (SELECT intermediate_result.a, intermediate_result.b, intermediate_result.b_1 AS b, intermediate_result.b_2 AS b, intermediate_result.b_3 AS b FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer, b_1 integer, b_2 integer, b_3 integer)) t6(a, b, b_1, b_2, b_3) USING (a)) + count +--------------------------------------------------------------------- + 15 +(1 row) + +SELECT COUNT(t1.a), t1.b FROM +ref_1 t1 +LEFT JOIN +-- 2) t6 subquery is distributed so needs to be recursively planned +-- because t1 is recurring +( + SELECT * FROM + (SELECT * FROM ref_1 t2 JOIN dist_3_partitioned t3 USING (a) WHERE t3.b IS NULL) p + JOIN + -- 1) t5 is recursively planned since the outer side is recurring + (SELECT * FROM ref_1 t4 LEFT JOIN dist_1 t5 USING (a)) q + USING(a) +) t6 +USING (a) +GROUP BY (t1.b) +HAVING t1.b > 200 +ORDER BY 1,2; +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t5" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t5" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.dist_1 t5 WHERE true +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: generating subplan XXX_2 for subquery SELECT p.a, p.b, p.b_1 AS b, q.b, q.b_1 AS b FROM ((SELECT t2.a, t2.b, t3.b FROM (recurring_outer_join.ref_1 t2 JOIN recurring_outer_join.dist_3_partitioned t3 USING (a)) WHERE (t3.b IS NULL)) p(a, b, b_1) JOIN (SELECT t4.a, t4.b, t5.b FROM (recurring_outer_join.ref_1 t4 LEFT JOIN (SELECT t5_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t5_1) t5 USING (a))) q(a, b, b_1) USING (a)) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(t1.a) AS count, t1.b FROM (recurring_outer_join.ref_1 t1 LEFT JOIN (SELECT intermediate_result.a, intermediate_result.b, intermediate_result.b_1 AS b, intermediate_result.b_2 AS b, intermediate_result.b_3 AS b FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer, b_1 integer, b_2 integer, b_3 integer)) t6(a, b, b_1, b_2, b_3) USING (a)) GROUP BY t1.b HAVING (t1.b OPERATOR(pg_catalog.>) 200) ORDER BY (count(t1.a)), t1.b + count | b +--------------------------------------------------------------------- + 0 | 202 + 0 | 302 + 0 | 400 + 0 | 401 + 0 | 402 + 1 | 203 + 1 | 300 + 1 | 301 + 1 | 303 + 1 | 304 +(10 rows) + +SELECT COUNT(t1.a), t1.b FROM +ref_1 t1 +LEFT JOIN +-- 2) t6 subquery is distributed so needs to be recursively planned +-- because t1 is recurring +( + SELECT * FROM + (SELECT * FROM ref_1 t2 JOIN dist_3_partitioned t3 USING (a) WHERE t3.b IS NULL) p + JOIN + -- 1) t5 is recursively planned since the outer side is recurring + (SELECT * FROM ref_1 t4 LEFT JOIN dist_1 t5 USING (a)) q + USING(a) +) t6 +USING (a) +GROUP BY (t1.b) +HAVING ( + EXISTS ( + SELECT * FROM ref_1 t6 + LEFT JOIN dist_1 t7 USING (a) + WHERE t7.b > 10 + ) +) +ORDER BY 1,2; +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t7" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t7" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_outer_join.dist_1 t7 WHERE (b OPERATOR(pg_catalog.>) 10) +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t5" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t5" to a subquery +DEBUG: generating subplan XXX_2 for subquery SELECT a FROM recurring_outer_join.dist_1 t5 WHERE true +DEBUG: generating subplan XXX_3 for subquery SELECT t6.a, t6.b, t7.b FROM (recurring_outer_join.ref_1 t6 LEFT JOIN (SELECT t7_1.a, t7_1.b FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t7_1) t7 USING (a)) WHERE (t7.b OPERATOR(pg_catalog.>) 10) +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: generating subplan XXX_4 for subquery SELECT p.a, p.b, p.b_1 AS b, q.b, q.b_1 AS b FROM ((SELECT t2.a, t2.b, t3.b FROM (recurring_outer_join.ref_1 t2 JOIN recurring_outer_join.dist_3_partitioned t3 USING (a)) WHERE (t3.b IS NULL)) p(a, b, b_1) JOIN (SELECT t4.a, t4.b, t5.b FROM (recurring_outer_join.ref_1 t4 LEFT JOIN (SELECT t5_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t5_1) t5 USING (a))) q(a, b, b_1) USING (a)) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(t1.a) AS count, t1.b FROM (recurring_outer_join.ref_1 t1 LEFT JOIN (SELECT intermediate_result.a, intermediate_result.b, intermediate_result.b_1 AS b, intermediate_result.b_2 AS b, intermediate_result.b_3 AS b FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer, b_1 integer, b_2 integer, b_3 integer)) t6(a, b, b_1, b_2, b_3) USING (a)) GROUP BY t1.b HAVING (EXISTS (SELECT intermediate_result.a, intermediate_result.b, intermediate_result.b_1 AS b FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer, b_1 integer))) ORDER BY (count(t1.a)), t1.b + count | b +--------------------------------------------------------------------- + 0 | 102 + 0 | 202 + 0 | 302 + 0 | 400 + 0 | 401 + 0 | 402 + 1 | 11 + 1 | 21 + 1 | 100 + 1 | 200 + 1 | 203 + 1 | 300 + 1 | 301 + 1 | 303 + 1 | 304 +(15 rows) + +SELECT COUNT(*) FROM +citus_local_1 t1 +LEFT JOIN +-- 2) t6 subquery is distributed so needs to be recursively planned +-- because t1 is first recursively planned +( + SELECT * FROM + (SELECT * FROM ref_1 t2 JOIN dist_1 t3 USING (a) WHERE t3.b IS NULL) p + JOIN + -- 1) t5 is recursively planned since the outer side is recurring + (SELECT * FROM ref_1 t4 LEFT JOIN dist_1 t5 USING (a)) q + USING(a) +) t6 +USING (a); +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t5" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t5" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.dist_1 t5 WHERE true +DEBUG: Wrapping relation "citus_local_1" "t1" to a subquery +DEBUG: generating subplan XXX_2 for subquery SELECT a FROM recurring_outer_join.citus_local_1 t1 WHERE true +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: generating subplan XXX_3 for subquery SELECT p.a, p.b, p.b_1 AS b, q.b, q.b_1 AS b FROM ((SELECT t2.a, t2.b, t3.b FROM (recurring_outer_join.ref_1 t2 JOIN recurring_outer_join.dist_1 t3 USING (a)) WHERE (t3.b IS NULL)) p(a, b, b_1) JOIN (SELECT t4.a, t4.b, t5.b FROM (recurring_outer_join.ref_1 t4 LEFT JOIN (SELECT t5_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t5_1) t5 USING (a))) q(a, b, b_1) USING (a)) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT t1_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t1_1) t1 LEFT JOIN (SELECT intermediate_result.a, intermediate_result.b, intermediate_result.b_1 AS b, intermediate_result.b_2 AS b, intermediate_result.b_3 AS b FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer, b_1 integer, b_2 integer, b_3 integer)) t6(a, b, b_1, b_2, b_3) USING (a)) + count +--------------------------------------------------------------------- + 15 +(1 row) + +SELECT COUNT(*) FROM +-- 2) t1 is recursively planned because the outer side (t2) is +-- converted into a recurring rel +dist_2_columnar t1 +RIGHT JOIN +( + -- 1) t4 is recursively planned since the outer side is recurring + ref_1 t3 LEFT JOIN dist_1 t4 USING(a) +) t2 +USING (a); +DEBUG: pathlist hook for columnar table am +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t4" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t4" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.dist_1 t4 WHERE true +DEBUG: recursively planning left side of the right join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_2_columnar" "t1" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_2_columnar" "t1" to a subquery +DEBUG: pathlist hook for columnar table am +DEBUG: generating subplan XXX_2 for subquery SELECT a FROM recurring_outer_join.dist_2_columnar t1 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT t1_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t1_1) t1 RIGHT JOIN (recurring_outer_join.ref_1 t3 LEFT JOIN (SELECT t4_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t4_1) t4 USING (a)) t2(a, b, b_1) USING (a)) + count +--------------------------------------------------------------------- + 76 +(1 row) + +SELECT COUNT(*) FROM +-- 3) t1 is recursively planned because the outer side (t2) is +-- converted into a recurring rel +dist_1 t1 +RIGHT JOIN +( + -- 2) t6 is recursively planned because now it's part of a distributed + -- inner join node that is about to be outer joined with t3 + ref_1 t3 + LEFT JOIN + ( + -- 1-a) t4 is recursively planned since the outer side is recurring + (ref_1 t5 LEFT JOIN dist_1 t4 USING(a)) + JOIN + dist_1 t6 + USING(a) + JOIN + -- 1-b) t8 is recursively planned since the outer side is recurring + (ref_1 t7 LEFT JOIN dist_1 t8 USING(a)) + USING(a) + ) + USING(a) +) t2 +USING (a); +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t4" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t4" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.dist_1 t4 WHERE true +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t8" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t8" to a subquery +DEBUG: generating subplan XXX_2 for subquery SELECT a FROM recurring_outer_join.dist_1 t8 WHERE true +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t6" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t6" to a subquery +DEBUG: generating subplan XXX_3 for subquery SELECT a FROM recurring_outer_join.dist_1 t6 WHERE true +DEBUG: recursively planning left side of the right join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t1" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t1" to a subquery +DEBUG: generating subplan XXX_4 for subquery SELECT a FROM recurring_outer_join.dist_1 t1 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT t1_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t1_1) t1 RIGHT JOIN (recurring_outer_join.ref_1 t3 LEFT JOIN (((recurring_outer_join.ref_1 t5 LEFT JOIN (SELECT t4_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t4_1) t4 USING (a)) JOIN (SELECT t6_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t6_1) t6 USING (a)) JOIN (recurring_outer_join.ref_1 t7 LEFT JOIN (SELECT t8_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t8_1) t8 USING (a)) USING (a)) USING (a)) t2(a, b, b_1, b_2, b_3, b_4, b_5) USING (a)) + count +--------------------------------------------------------------------- + 7570 +(1 row) + +SELECT COUNT(*) FROM +ref_1 t6 +LEFT JOIN +( + ref_1 t1 + LEFT JOIN + ( + -- t3 is a distributed join tree so needs to be recursively planned + -- because t2 is recurring + ref_1 t2 LEFT JOIN (dist_1 t7 JOIN dist_1 t8 USING (a)) t3 USING(a) + JOIN + ref_1 t5 + USING(a) + ) + USING(a) +) +USING(a); +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t7" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t7" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.dist_1 t7 WHERE true +DEBUG: recursively planning distributed relation "dist_1" "t8" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t8" to a subquery +DEBUG: generating subplan XXX_2 for subquery SELECT a FROM recurring_outer_join.dist_1 t8 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (recurring_outer_join.ref_1 t6 LEFT JOIN (recurring_outer_join.ref_1 t1 LEFT JOIN ((recurring_outer_join.ref_1 t2 LEFT JOIN ((SELECT t7_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t7_1) t7 JOIN (SELECT t8_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t8_1) t8 USING (a)) t3(a, b, b_1) USING (a)) JOIN recurring_outer_join.ref_1 t5 USING (a)) USING (a)) USING (a)) + count +--------------------------------------------------------------------- + 1702 +(1 row) + +SELECT COUNT(*) FROM +ref_1 t6 +LEFT JOIN +( + ref_1 t1 + LEFT JOIN + ( + -- t4 subquery is distributed so needs to be recursively planned + -- because t2 is recurring + ref_1 t2 LEFT JOIN (SELECT * FROM dist_1 t3) t4 USING(a) + JOIN + ref_1 t5 + USING(a) + ) + USING(a) +) +USING(a); +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_outer_join.dist_1 t3 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (recurring_outer_join.ref_1 t6 LEFT JOIN (recurring_outer_join.ref_1 t1 LEFT JOIN ((recurring_outer_join.ref_1 t2 LEFT JOIN (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t4 USING (a)) JOIN recurring_outer_join.ref_1 t5 USING (a)) USING (a)) USING (a)) + count +--------------------------------------------------------------------- + 634 +(1 row) + +SELECT COUNT(*) FROM +ref_1 t6 +LEFT JOIN +( + ref_1 t1 + LEFT JOIN + ( + -- t4 subquery is distributed so needs to be recursively planned + -- because t2 is recurring + ref_1 t2 LEFT JOIN (SELECT * FROM dist_3_partitioned t3) t4 USING(a) + JOIN + ref_1 t5 + USING(a) + ) + USING(a) +) +USING(a); +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_outer_join.dist_3_partitioned t3 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (recurring_outer_join.ref_1 t6 LEFT JOIN (recurring_outer_join.ref_1 t1 LEFT JOIN ((recurring_outer_join.ref_1 t2 LEFT JOIN (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t4 USING (a)) JOIN recurring_outer_join.ref_1 t5 USING (a)) USING (a)) USING (a)) + count +--------------------------------------------------------------------- + 634 +(1 row) + +-- cannot recursively plan because t3 (inner - distributed) +-- references t1 (outer - recurring) +SELECT COUNT(*) FROM ref_1 t1 LEFT JOIN LATERAL (SELECT * FROM dist_1 t2 WHERE t1.b < t2.b) t3 USING (a); +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +ERROR: cannot perform a lateral outer join when a distributed subquery references a reference table +SELECT COUNT(*) FROM (SELECT * FROM dist_1 OFFSET 100) t1 LEFT JOIN LATERAL (SELECT * FROM dist_1 t2 WHERE t1.b < t2.b) t3 USING (a); +DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_outer_join.dist_1 OFFSET 100 +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t1 LEFT JOIN LATERAL (SELECT t2.a, t2.b FROM recurring_outer_join.dist_1 t2 WHERE (t1.b OPERATOR(pg_catalog.<) t2.b)) t3 USING (a)) +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +ERROR: cannot perform a lateral outer join when a distributed subquery references complex subqueries, CTEs or local tables +SELECT COUNT(*) FROM local_1 t1 LEFT JOIN LATERAL (SELECT * FROM dist_1 t2 WHERE t1.b < t2.b) t3 USING (a); +DEBUG: Wrapping relation "local_1" "t1" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_outer_join.local_1 t1 WHERE true +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT t1_1.a, t1_1.b FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t1_1) t1 LEFT JOIN LATERAL (SELECT t2.a, t2.b FROM recurring_outer_join.dist_1 t2 WHERE (t1.b OPERATOR(pg_catalog.<) t2.b)) t3 USING (a)) +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +ERROR: cannot perform a lateral outer join when a distributed subquery references complex subqueries, CTEs or local tables +SELECT COUNT(*) FROM (SELECT 1 a, generate_series(1,2) b) t1 LEFT JOIN LATERAL (SELECT * FROM dist_1 t2 WHERE t1.b < t2.b) t3 USING (a); +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +ERROR: cannot perform a lateral outer join when a distributed subquery references a subquery without FROM +SELECT COUNT(*) FROM (ref_1 t10 JOIN ref_1 t11 USING(a,b)) t1 LEFT JOIN LATERAL (SELECT * FROM dist_1 t2 WHERE t1.b < t2.b) t3 USING (a); +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +ERROR: cannot perform a lateral outer join when a distributed subquery references a reference table +-- cannot plan because the query in the WHERE clause of t3 +-- (inner - distributed) references t1 (outer - recurring) +SELECT COUNT(*) FROM ref_1 t1 +LEFT JOIN LATERAL +( + SELECT * FROM dist_1 t2 WHERE EXISTS ( + SELECT * FROM dist_1 t4 + WHERE t4.a = t2.a AND t4.b > t1.b + ) +) t3 +USING (a); +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +ERROR: cannot perform a lateral outer join when a distributed subquery references a reference table +-- can recursively plan after dropping (t4.b > t1.b) qual from t3 +SELECT COUNT(*) FROM ref_1 t1 +LEFT JOIN +( + SELECT * FROM dist_1 t2 WHERE EXISTS ( + SELECT * FROM dist_1 t4 + WHERE t4.a = t2.a + ) +) t3 +USING (a); +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_outer_join.dist_1 t2 WHERE (EXISTS (SELECT t4.a, t4.b FROM recurring_outer_join.dist_1 t4 WHERE (t4.a OPERATOR(pg_catalog.=) t2.a))) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (recurring_outer_join.ref_1 t1 LEFT JOIN (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t3 USING (a)) + count +--------------------------------------------------------------------- + 28 +(1 row) + +-- same test using a view, can be recursively planned +CREATE VIEW my_view_1 AS +SELECT * FROM dist_1 t2 WHERE EXISTS ( + SELECT * FROM dist_1 t4 + WHERE t4.a = t2.a +); +SELECT COUNT(*) FROM ref_1 t1 +LEFT JOIN +my_view_1 t3 +USING (a); +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: generating subplan XXX_1 for subquery SELECT t2.a, t2.b FROM recurring_outer_join.dist_1 t2 WHERE (EXISTS (SELECT t4.a, t4.b FROM recurring_outer_join.dist_1 t4 WHERE (t4.a OPERATOR(pg_catalog.=) t2.a))) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (recurring_outer_join.ref_1 t1 LEFT JOIN (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t3 USING (a)) + count +--------------------------------------------------------------------- + 28 +(1 row) + +SELECT COUNT(*) FROM +ref_1 t6 +LEFT JOIN +( + ref_1 t1 + LEFT JOIN + ( + -- t4 subquery is distributed so needs to be recursively planned + -- because t2 is recurring. + -- However, we fail to recursively plan t4 because it references + -- t6. + ref_1 t2 LEFT JOIN LATERAL (SELECT * FROM dist_2_columnar t3 WHERE t3.a > t6.a) t4 USING(a) + JOIN + ref_1 t5 + USING(a) + ) + USING(a) +) +USING(a); +DEBUG: pathlist hook for columnar table am +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +ERROR: cannot perform a lateral outer join when a distributed subquery references a reference table +SELECT COUNT(*) FROM +ref_1 t6 +LEFT JOIN +( + ref_1 t1 + LEFT JOIN + ( + -- t4 subquery is distributed so needs to be recursively planned + -- because t2 is recurring. + -- Even if the query says t2 is lateral joined with t4, t4 doesn't + -- reference anywhere else and hence can be planned recursively. + ref_1 t2 LEFT JOIN LATERAL (SELECT * FROM dist_1 t3) t4 USING(a) + JOIN + ref_1 t5 + USING(a) + ) + USING(a) +) +USING(a); +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_outer_join.dist_1 t3 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (recurring_outer_join.ref_1 t6 LEFT JOIN (recurring_outer_join.ref_1 t1 LEFT JOIN ((recurring_outer_join.ref_1 t2 LEFT JOIN LATERAL (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t4 USING (a)) JOIN recurring_outer_join.ref_1 t5 USING (a)) USING (a)) USING (a)) + count +--------------------------------------------------------------------- + 634 +(1 row) + +-- since t1 is recurring and t6 is distributed, all the distributed +-- tables in t6 will be recursively planned +SELECT COUNT(*) FROM ref_1 t1 +LEFT JOIN +( + ((SELECT * FROM ref_1 WHERE a > 1) t2 JOIN dist_1 t3 USING (a)) + JOIN + (dist_1 t4 JOIN dist_1 t5 USING (a)) + USING(a) +) t6 +USING (a); +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t3" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t3" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.dist_1 t3 WHERE true +DEBUG: recursively planning distributed relation "dist_1" "t4" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t4" to a subquery +DEBUG: generating subplan XXX_2 for subquery SELECT a FROM recurring_outer_join.dist_1 t4 WHERE true +DEBUG: recursively planning distributed relation "dist_1" "t5" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t5" to a subquery +DEBUG: generating subplan XXX_3 for subquery SELECT a FROM recurring_outer_join.dist_1 t5 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (recurring_outer_join.ref_1 t1 LEFT JOIN (((SELECT ref_1.a, ref_1.b FROM recurring_outer_join.ref_1 WHERE (ref_1.a OPERATOR(pg_catalog.>) 1)) t2 JOIN (SELECT t3_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t3_1) t3 USING (a)) JOIN ((SELECT t4_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t4_1) t4 JOIN (SELECT t5_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t5_1) t5 USING (a)) USING (a)) t6(a, b, b_1, b_2, b_3) USING (a)) + count +--------------------------------------------------------------------- + 588 +(1 row) + +BEGIN; + -- same test but this time should fail due to + -- citus.max_intermediate_result_size + SET LOCAL citus.max_intermediate_result_size TO "0.5kB"; + SELECT COUNT(*) FROM ref_1 t1 + LEFT JOIN + ( + ((SELECT * FROM ref_1 WHERE a > 1) t2 JOIN dist_1 t3 USING (a)) + JOIN + (dist_1 t4 JOIN dist_1 t5 USING (a)) + USING(a) + ) t6 + USING (a); +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t3" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t3" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.dist_1 t3 WHERE true +DEBUG: recursively planning distributed relation "dist_1" "t4" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t4" to a subquery +DEBUG: generating subplan XXX_2 for subquery SELECT a FROM recurring_outer_join.dist_1 t4 WHERE true +DEBUG: recursively planning distributed relation "dist_1" "t5" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t5" to a subquery +DEBUG: generating subplan XXX_3 for subquery SELECT a FROM recurring_outer_join.dist_1 t5 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (recurring_outer_join.ref_1 t1 LEFT JOIN (((SELECT ref_1.a, ref_1.b FROM recurring_outer_join.ref_1 WHERE (ref_1.a OPERATOR(pg_catalog.>) 1)) t2 JOIN (SELECT t3_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t3_1) t3 USING (a)) JOIN ((SELECT t4_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t4_1) t4 JOIN (SELECT t5_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t5_1) t5 USING (a)) USING (a)) t6(a, b, b_1, b_2, b_3) USING (a)) +ERROR: the intermediate result size exceeds citus.max_intermediate_result_size (currently 0 kB) +DETAIL: Citus restricts the size of intermediate results of complex subqueries and CTEs to avoid accidentally pulling large result sets into once place. +HINT: To run the current query, set citus.max_intermediate_result_size to a higher value or -1 to disable. +ROLLBACK; +-- Same test using some views, can be recursively planned too. +-- Since t1 is recurring and t6 is distributed, all the distributed +-- tables in t6 will be recursively planned. +CREATE VIEW my_view_2 AS +(SELECT * FROM ref_1 WHERE a > 1); +CREATE VIEW my_view_3 AS +(SELECT * FROM ref_1); +SELECT COUNT(*) FROM my_view_3 t1 +LEFT JOIN +( + (my_view_2 t2 JOIN dist_1 t3 USING (a)) + JOIN + (dist_1 t4 JOIN dist_1 t5 USING (a)) + USING(a) +) t6 +USING (a); +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t3" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t3" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.dist_1 t3 WHERE true +DEBUG: recursively planning distributed relation "dist_1" "t4" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t4" to a subquery +DEBUG: generating subplan XXX_2 for subquery SELECT a FROM recurring_outer_join.dist_1 t4 WHERE true +DEBUG: recursively planning distributed relation "dist_1" "t5" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t5" to a subquery +DEBUG: generating subplan XXX_3 for subquery SELECT a FROM recurring_outer_join.dist_1 t5 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT ref_1.a, ref_1.b FROM recurring_outer_join.ref_1) t1 LEFT JOIN (((SELECT ref_1.a, ref_1.b FROM recurring_outer_join.ref_1 WHERE (ref_1.a OPERATOR(pg_catalog.>) 1)) t2 JOIN (SELECT t3_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t3_1) t3 USING (a)) JOIN ((SELECT t4_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t4_1) t4 JOIN (SELECT t5_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t5_1) t5 USING (a)) USING (a)) t6(a, b, b_1, b_2, b_3) USING (a)) + count +--------------------------------------------------------------------- + 588 +(1 row) + +SELECT COUNT(*) FROM ref_1 t1 +-- 2) Since t8 is distributed and t1 is recurring, t8 needs be converted +-- to a recurring rel too. For this reason, subquery t8 is recursively +-- planned because t7 is recurring already. +LEFT JOIN +( + SELECT * FROM (SELECT * FROM ref_1 t2 RIGHT JOIN dist_1 t3 USING (a)) AS t4 + JOIN + -- 1) subquery t6 is recursively planned because t5 is recurring + (SELECT * FROM ref_1 t5 LEFT JOIN (SELECT * FROM dist_2_columnar WHERE b < 150) t6 USING (a)) as t7 + USING(a) +) t8 +USING (a); +DEBUG: pathlist hook for columnar table am +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: pathlist hook for columnar table am +DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_outer_join.dist_2_columnar WHERE (b OPERATOR(pg_catalog.<) 150) +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: generating subplan XXX_2 for subquery SELECT t4.a, t4.b, t4.b_1 AS b, t7.b, t7.b_1 AS b FROM ((SELECT t3.a, t2.b, t3.b FROM (recurring_outer_join.ref_1 t2 RIGHT JOIN recurring_outer_join.dist_1 t3 USING (a))) t4(a, b, b_1) JOIN (SELECT t5.a, t5.b, t6.b FROM (recurring_outer_join.ref_1 t5 LEFT JOIN (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t6 USING (a))) t7(a, b, b_1) USING (a)) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (recurring_outer_join.ref_1 t1 LEFT JOIN (SELECT intermediate_result.a, intermediate_result.b, intermediate_result.b_1 AS b, intermediate_result.b_2 AS b, intermediate_result.b_3 AS b FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer, b_1 integer, b_2 integer, b_3 integer)) t8(a, b, b_1, b_2, b_3) USING (a)) + count +--------------------------------------------------------------------- + 514 +(1 row) + +-- same test using a prepared statement +PREPARE recurring_outer_join_p1 AS +SELECT COUNT(*) FROM ref_1 t1 +-- 2) Since t8 is distributed and t1 is recurring, t8 needs be converted +-- to a recurring rel too. For this reason, subquery t8 is recursively +-- planned because t7 is recurring already. +LEFT JOIN +( + SELECT * FROM (SELECT * FROM ref_1 t2 RIGHT JOIN dist_1 t3 USING (a)) AS t4 + JOIN + -- 1) subquery t6 is recursively planned because t5 is recurring + (SELECT * FROM ref_1 t5 LEFT JOIN (SELECT * FROM dist_2_columnar WHERE b < $1) t6 USING (a)) as t7 + USING(a) +) t8 +USING (a); +EXECUTE recurring_outer_join_p1(0); +DEBUG: pathlist hook for columnar table am +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: pathlist hook for columnar table am +DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_outer_join.dist_2_columnar WHERE (b OPERATOR(pg_catalog.<) 0) +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: generating subplan XXX_2 for subquery SELECT t4.a, t4.b, t4.b_1 AS b, t7.b, t7.b_1 AS b FROM ((SELECT t3.a, t2.b, t3.b FROM (recurring_outer_join.ref_1 t2 RIGHT JOIN recurring_outer_join.dist_1 t3 USING (a))) t4(a, b, b_1) JOIN (SELECT t5.a, t5.b, t6.b FROM (recurring_outer_join.ref_1 t5 LEFT JOIN (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t6 USING (a))) t7(a, b, b_1) USING (a)) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (recurring_outer_join.ref_1 t1 LEFT JOIN (SELECT intermediate_result.a, intermediate_result.b, intermediate_result.b_1 AS b, intermediate_result.b_2 AS b, intermediate_result.b_3 AS b FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer, b_1 integer, b_2 integer, b_3 integer)) t8(a, b, b_1, b_2, b_3) USING (a)) + count +--------------------------------------------------------------------- + 142 +(1 row) + +EXECUTE recurring_outer_join_p1(100); +DEBUG: pathlist hook for columnar table am +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: pathlist hook for columnar table am +DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_outer_join.dist_2_columnar WHERE (b OPERATOR(pg_catalog.<) 100) +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: generating subplan XXX_2 for subquery SELECT t4.a, t4.b, t4.b_1 AS b, t7.b, t7.b_1 AS b FROM ((SELECT t3.a, t2.b, t3.b FROM (recurring_outer_join.ref_1 t2 RIGHT JOIN recurring_outer_join.dist_1 t3 USING (a))) t4(a, b, b_1) JOIN (SELECT t5.a, t5.b, t6.b FROM (recurring_outer_join.ref_1 t5 LEFT JOIN (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t6 USING (a))) t7(a, b, b_1) USING (a)) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (recurring_outer_join.ref_1 t1 LEFT JOIN (SELECT intermediate_result.a, intermediate_result.b, intermediate_result.b_1 AS b, intermediate_result.b_2 AS b, intermediate_result.b_3 AS b FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer, b_1 integer, b_2 integer, b_3 integer)) t8(a, b, b_1, b_2, b_3) USING (a)) + count +--------------------------------------------------------------------- + 514 +(1 row) + +EXECUTE recurring_outer_join_p1(100); +DEBUG: pathlist hook for columnar table am +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: pathlist hook for columnar table am +DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_outer_join.dist_2_columnar WHERE (b OPERATOR(pg_catalog.<) 100) +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: generating subplan XXX_2 for subquery SELECT t4.a, t4.b, t4.b_1 AS b, t7.b, t7.b_1 AS b FROM ((SELECT t3.a, t2.b, t3.b FROM (recurring_outer_join.ref_1 t2 RIGHT JOIN recurring_outer_join.dist_1 t3 USING (a))) t4(a, b, b_1) JOIN (SELECT t5.a, t5.b, t6.b FROM (recurring_outer_join.ref_1 t5 LEFT JOIN (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t6 USING (a))) t7(a, b, b_1) USING (a)) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (recurring_outer_join.ref_1 t1 LEFT JOIN (SELECT intermediate_result.a, intermediate_result.b, intermediate_result.b_1 AS b, intermediate_result.b_2 AS b, intermediate_result.b_3 AS b FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer, b_1 integer, b_2 integer, b_3 integer)) t8(a, b, b_1, b_2, b_3) USING (a)) + count +--------------------------------------------------------------------- + 514 +(1 row) + +EXECUTE recurring_outer_join_p1(10); +DEBUG: pathlist hook for columnar table am +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: pathlist hook for columnar table am +DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_outer_join.dist_2_columnar WHERE (b OPERATOR(pg_catalog.<) 10) +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: generating subplan XXX_2 for subquery SELECT t4.a, t4.b, t4.b_1 AS b, t7.b, t7.b_1 AS b FROM ((SELECT t3.a, t2.b, t3.b FROM (recurring_outer_join.ref_1 t2 RIGHT JOIN recurring_outer_join.dist_1 t3 USING (a))) t4(a, b, b_1) JOIN (SELECT t5.a, t5.b, t6.b FROM (recurring_outer_join.ref_1 t5 LEFT JOIN (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t6 USING (a))) t7(a, b, b_1) USING (a)) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (recurring_outer_join.ref_1 t1 LEFT JOIN (SELECT intermediate_result.a, intermediate_result.b, intermediate_result.b_1 AS b, intermediate_result.b_2 AS b, intermediate_result.b_3 AS b FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer, b_1 integer, b_2 integer, b_3 integer)) t8(a, b, b_1, b_2, b_3) USING (a)) + count +--------------------------------------------------------------------- + 142 +(1 row) + +EXECUTE recurring_outer_join_p1(10); +DEBUG: pathlist hook for columnar table am +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: pathlist hook for columnar table am +DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_outer_join.dist_2_columnar WHERE (b OPERATOR(pg_catalog.<) 10) +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: generating subplan XXX_2 for subquery SELECT t4.a, t4.b, t4.b_1 AS b, t7.b, t7.b_1 AS b FROM ((SELECT t3.a, t2.b, t3.b FROM (recurring_outer_join.ref_1 t2 RIGHT JOIN recurring_outer_join.dist_1 t3 USING (a))) t4(a, b, b_1) JOIN (SELECT t5.a, t5.b, t6.b FROM (recurring_outer_join.ref_1 t5 LEFT JOIN (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t6 USING (a))) t7(a, b, b_1) USING (a)) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (recurring_outer_join.ref_1 t1 LEFT JOIN (SELECT intermediate_result.a, intermediate_result.b, intermediate_result.b_1 AS b, intermediate_result.b_2 AS b, intermediate_result.b_3 AS b FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer, b_1 integer, b_2 integer, b_3 integer)) t8(a, b, b_1, b_2, b_3) USING (a)) + count +--------------------------------------------------------------------- + 142 +(1 row) + +EXECUTE recurring_outer_join_p1(1000); +DEBUG: pathlist hook for columnar table am +DEBUG: pathlist hook for columnar table am +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: pathlist hook for columnar table am +DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_outer_join.dist_2_columnar WHERE (b OPERATOR(pg_catalog.<) 1000) +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: generating subplan XXX_2 for subquery SELECT t4.a, t4.b, t4.b_1 AS b, t7.b, t7.b_1 AS b FROM ((SELECT t3.a, t2.b, t3.b FROM (recurring_outer_join.ref_1 t2 RIGHT JOIN recurring_outer_join.dist_1 t3 USING (a))) t4(a, b, b_1) JOIN (SELECT t5.a, t5.b, t6.b FROM (recurring_outer_join.ref_1 t5 LEFT JOIN (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t6 USING (a))) t7(a, b, b_1) USING (a)) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (recurring_outer_join.ref_1 t1 LEFT JOIN (SELECT intermediate_result.a, intermediate_result.b, intermediate_result.b_1 AS b, intermediate_result.b_2 AS b, intermediate_result.b_3 AS b FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer, b_1 integer, b_2 integer, b_3 integer)) t8(a, b, b_1, b_2, b_3) USING (a)) + count +--------------------------------------------------------------------- + 514 +(1 row) + +EXECUTE recurring_outer_join_p1(1000); +DEBUG: pathlist hook for columnar table am +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: pathlist hook for columnar table am +DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_outer_join.dist_2_columnar WHERE (b OPERATOR(pg_catalog.<) 1000) +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: generating subplan XXX_2 for subquery SELECT t4.a, t4.b, t4.b_1 AS b, t7.b, t7.b_1 AS b FROM ((SELECT t3.a, t2.b, t3.b FROM (recurring_outer_join.ref_1 t2 RIGHT JOIN recurring_outer_join.dist_1 t3 USING (a))) t4(a, b, b_1) JOIN (SELECT t5.a, t5.b, t6.b FROM (recurring_outer_join.ref_1 t5 LEFT JOIN (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t6 USING (a))) t7(a, b, b_1) USING (a)) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (recurring_outer_join.ref_1 t1 LEFT JOIN (SELECT intermediate_result.a, intermediate_result.b, intermediate_result.b_1 AS b, intermediate_result.b_2 AS b, intermediate_result.b_3 AS b FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer, b_1 integer, b_2 integer, b_3 integer)) t8(a, b, b_1, b_2, b_3) USING (a)) + count +--------------------------------------------------------------------- + 514 +(1 row) + +-- t5 is recursively planned because the outer side of the final +-- left join is recurring +SELECT * FROM ref_1 t1 +JOIN ref_1 t2 USING (a) +LEFT JOIN ref_1 t3 USING (a) +LEFT JOIN ref_1 t4 USING (a) +LEFT JOIN dist_1 t5 USING (a) +ORDER BY 1,2,3,4,5,6 DESC +LIMIT 5; +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t5" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t5" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_outer_join.dist_1 t5 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT t1.a, t1.b, t2.b, t3.b, t4.b, t5.b FROM ((((recurring_outer_join.ref_1 t1 JOIN recurring_outer_join.ref_1 t2 USING (a)) LEFT JOIN recurring_outer_join.ref_1 t3 USING (a)) LEFT JOIN recurring_outer_join.ref_1 t4 USING (a)) LEFT JOIN (SELECT t5_1.a, t5_1.b FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t5_1) t5 USING (a)) ORDER BY t1.a, t1.b, t2.b, t3.b, t4.b, t5.b DESC LIMIT 5 + a | b | b | b | b | b +--------------------------------------------------------------------- + 1 | 11 | 11 | 11 | 11 | 12 + 1 | 11 | 11 | 11 | 11 | 11 + 1 | 11 | 11 | 11 | 11 | 10 + 1 | 11 | 11 | 11 | 100 | 12 + 1 | 11 | 11 | 11 | 100 | 11 +(5 rows) + +-- t6 is recursively planned because the outer side of the final +-- left join is recurring +SELECT * FROM (SELECT * FROM ref_1 ORDER BY 1,2 LIMIT 7) t1 +JOIN ref_1 t2 USING (a) +LEFT JOIN (SELECT *, random() > 1 FROM dist_1 t3) t4 USING (a) +LEFT JOIN ref_1 t5 USING (a) +LEFT JOIN dist_1 t6 USING (a) +ORDER BY 1,2,3,4,5,6,7 DESC +LIMIT 10; +DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_outer_join.ref_1 ORDER BY a, b LIMIT 7 +DEBUG: generating subplan XXX_2 for subquery SELECT a, b, (random() OPERATOR(pg_catalog.>) (1)::double precision) FROM recurring_outer_join.dist_1 t3 +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t6" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t6" to a subquery +DEBUG: generating subplan XXX_3 for subquery SELECT a, b FROM recurring_outer_join.dist_1 t6 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT t1.a, t1.b, t2.b, t4.b, t4."?column?", t5.b, t6.b FROM (((((SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t1 JOIN recurring_outer_join.ref_1 t2 USING (a)) LEFT JOIN (SELECT intermediate_result.a, intermediate_result.b, intermediate_result."?column?" FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer, "?column?" boolean)) t4(a, b, "?column?") USING (a)) LEFT JOIN recurring_outer_join.ref_1 t5 USING (a)) LEFT JOIN (SELECT t6_1.a, t6_1.b FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t6_1) t6 USING (a)) ORDER BY t1.a, t1.b, t2.b, t4.b, t4."?column?", t5.b, t6.b DESC LIMIT 10 + a | b | b | b | ?column? | b | b +--------------------------------------------------------------------- + 1 | 11 | 11 | 10 | f | 11 | 12 + 1 | 11 | 11 | 10 | f | 11 | 11 + 1 | 11 | 11 | 10 | f | 11 | 10 + 1 | 11 | 11 | 10 | f | 100 | 12 + 1 | 11 | 11 | 10 | f | 100 | 11 + 1 | 11 | 11 | 10 | f | 100 | 10 + 1 | 11 | 11 | 11 | f | 11 | 12 + 1 | 11 | 11 | 11 | f | 11 | 11 + 1 | 11 | 11 | 11 | f | 11 | 10 + 1 | 11 | 11 | 11 | f | 100 | 12 +(10 rows) + +-- +-- Such join rels can recursively appear anywhere in the query instead +-- of simple relation rtes. +-- +SELECT COUNT(*) FROM + (SELECT ref_1.a, t10.b FROM ref_1 LEFT JOIN dist_1 t10 USING(b)) AS t1, + (SELECT ref_1.a, t20.b FROM ref_1 LEFT JOIN dist_1 t20 USING(b)) AS t2, + (SELECT ref_1.a, t30.b FROM ref_1 LEFT JOIN dist_1 t30 USING(b)) AS t3, + (SELECT ref_1.a, t40.b FROM ref_1 LEFT JOIN dist_1 t40 USING(b)) AS t4, + (SELECT ref_1.a, t50.b FROM ref_1 LEFT JOIN dist_1 t50 USING(b)) AS t5 +WHERE + t1.a = t5.a AND + t1.a = t4.a AND + t1.a = t3.a AND + t1.a = t2.a AND + t1.a = t1.a; +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t10" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t10" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT b FROM recurring_outer_join.dist_1 t10 WHERE true +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t20" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t20" to a subquery +DEBUG: generating subplan XXX_2 for subquery SELECT b FROM recurring_outer_join.dist_1 t20 WHERE true +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t30" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t30" to a subquery +DEBUG: generating subplan XXX_3 for subquery SELECT b FROM recurring_outer_join.dist_1 t30 WHERE true +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t40" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t40" to a subquery +DEBUG: generating subplan XXX_4 for subquery SELECT b FROM recurring_outer_join.dist_1 t40 WHERE true +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t50" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t50" to a subquery +DEBUG: generating subplan XXX_5 for subquery SELECT b FROM recurring_outer_join.dist_1 t50 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT ref_1.a, t10.b FROM (recurring_outer_join.ref_1 LEFT JOIN (SELECT NULL::integer AS a, t10_1.b FROM (SELECT intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(b integer)) t10_1) t10 USING (b))) t1, (SELECT ref_1.a, t20.b FROM (recurring_outer_join.ref_1 LEFT JOIN (SELECT NULL::integer AS a, t20_1.b FROM (SELECT intermediate_result.b FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(b integer)) t20_1) t20 USING (b))) t2, (SELECT ref_1.a, t30.b FROM (recurring_outer_join.ref_1 LEFT JOIN (SELECT NULL::integer AS a, t30_1.b FROM (SELECT intermediate_result.b FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(b integer)) t30_1) t30 USING (b))) t3, (SELECT ref_1.a, t40.b FROM (recurring_outer_join.ref_1 LEFT JOIN (SELECT NULL::integer AS a, t40_1.b FROM (SELECT intermediate_result.b FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(b integer)) t40_1) t40 USING (b))) t4, (SELECT ref_1.a, t50.b FROM (recurring_outer_join.ref_1 LEFT JOIN (SELECT NULL::integer AS a, t50_1.b FROM (SELECT intermediate_result.b FROM read_intermediate_result('XXX_5'::text, 'binary'::citus_copy_format) intermediate_result(b integer)) t50_1) t50 USING (b))) t5 WHERE ((t1.a OPERATOR(pg_catalog.=) t5.a) AND (t1.a OPERATOR(pg_catalog.=) t4.a) AND (t1.a OPERATOR(pg_catalog.=) t3.a) AND (t1.a OPERATOR(pg_catalog.=) t2.a) AND (t1.a OPERATOR(pg_catalog.=) t1.a)) + count +--------------------------------------------------------------------- + 1299 +(1 row) + +-- subqueries in the target list +SELECT t1.b, (SELECT b FROM ref_1 WHERE t1.a = a ORDER BY a,b LIMIT 1), (SELECT t2.a) +FROM ref_1 +LEFT JOIN dist_1 t1 USING (a,b) +JOIN dist_1 t2 USING (a,b) +ORDER BY 1,2,3 LIMIT 5; +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t1" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t1" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_outer_join.dist_1 t1 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT t1.b, (SELECT ref_1_1.b FROM recurring_outer_join.ref_1 ref_1_1 WHERE (t1.a OPERATOR(pg_catalog.=) ref_1_1.a) ORDER BY ref_1_1.a, ref_1_1.b LIMIT 1) AS b, (SELECT t2.a) AS a FROM ((recurring_outer_join.ref_1 LEFT JOIN (SELECT t1_1.a, t1_1.b FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t1_1) t1 USING (a, b)) JOIN recurring_outer_join.dist_1 t2 USING (a, b)) ORDER BY t1.b, (SELECT ref_1_1.b FROM recurring_outer_join.ref_1 ref_1_1 WHERE (t1.a OPERATOR(pg_catalog.=) ref_1_1.a) ORDER BY ref_1_1.a, ref_1_1.b LIMIT 1), (SELECT t2.a) LIMIT 5 +DEBUG: push down of limit count: 5 + b | b | a +--------------------------------------------------------------------- + 11 | 11 | 1 + 21 | 21 | 2 +(2 rows) + +WITH +outer_cte_1 AS ( + SELECT + t1.b, + -- 9) t3 is recursively planned since t2 is recurring + (SELECT a FROM ref_1 t2 LEFT JOIN dist_1 t3 USING(a,b) WHERE t2.a=t1.a ORDER BY 1 LIMIT 1) + FROM dist_1 t1 + ORDER BY 1,2 LIMIT 10 +), +outer_cte_2 AS ( + SELECT * FROM ( + SELECT * FROM ( + SELECT * FROM ( + SELECT * FROM ( + SELECT * FROM ( + -- 10) t5 is recursively planned since t4 is recurring + SELECT * FROM ref_1 t4 + LEFT JOIN dist_1 t5 + USING(a,b) + ) AS t6 + ) AS t7 + ) AS t8 + ) AS t9 + OFFSET 0 + )AS t10 + -- 11) t11 is recursively planned since lhs of the join tree became recurring + LEFT JOIN dist_1 t11 USING (b) +) +SELECT * FROM ref_1 t36 WHERE (b,100,a) IN ( + WITH + cte_1 AS ( + WITH cte_1_inner_cte AS ( + -- 3) t12 is recursively planned because t11 is recurring + SELECT * FROM ref_1 t11 + LEFT JOIN dist_1 t12 + USING (a,b) + ) + -- 4) t14 is recursively planned because t13 is recurring + SELECT * FROM ref_1 t13 + LEFT JOIN dist_1 t14 USING (a,b) + JOIN cte_1_inner_cte t15 + USING (a,b) + OFFSET 0 + ) + -- 6) t31 is recursively planned since t35 is recurring + -- 7) t34 is recursively planned since lhs of the join tree is now recurring + SELECT + DISTINCT t31.b, + -- 1) we first search for such joins in the target list and recursively plan t33 + -- because t32 is recurring + (SELECT max(b) FROM ref_1 t32 LEFT JOIN dist_1 t33 USING(a,b) WHERE t31.a = t32.a), + (SELECT t34.a) + FROM ref_1 t35 + LEFT JOIN dist_1 t31 USING (a,b) + LEFT JOIN dist_1 t34 USING (a,b) + -- 2) cte_1 was inlided, so we then recursively check for such joins there. + -- When doing so, we first check for cte_1_inner_cte was since it was + -- also inlined. + LEFT JOIN cte_1 USING (a,b) + -- 5) Since rhs of below join is a subquery too, we recursively search + -- for such joins there and plan distributed side of all those 10 + -- joins. + LEFT JOIN ( + SELECT COUNT(DISTINCT t20.a) AS a + FROM + (SELECT r.a, d.b FROM ref_1 r LEFT JOIN dist_1 d USING(b) WHERE r.a IS NOT NULL) AS t20, + (SELECT r.a, d.b FROM ref_1 r LEFT JOIN dist_1 d USING(b) WHERE r.a IS NOT NULL) AS t21, + (SELECT r.a, d.b FROM ref_1 r LEFT JOIN dist_1 d USING(b) WHERE r.a IS NOT NULL) AS t22, + (SELECT r.a, d.b FROM ref_1 r LEFT JOIN dist_1 d USING(b) WHERE r.a IS NOT NULL) AS t23, + (SELECT r.a, d.b FROM ref_1 r LEFT JOIN dist_1 d USING(b) WHERE r.a IS NOT NULL) AS t24, + (SELECT r.a, d.b FROM ref_1 r LEFT JOIN dist_1 d USING(b) WHERE r.a IS NOT NULL) AS t25, + (SELECT r.a, d.b FROM ref_1 r LEFT JOIN dist_1 d USING(b) WHERE r.a IS NOT NULL) AS t26, + (SELECT r.a, d.b FROM ref_1 r LEFT JOIN dist_1 d USING(b) WHERE r.a IS NOT NULL) AS t27, + (SELECT r.a, d.b FROM ref_1 r LEFT JOIN dist_1 d USING(b) WHERE r.a IS NOT NULL) AS t28, + (SELECT r.a, d.b FROM ref_1 r LEFT JOIN dist_1 d USING(b) WHERE r.a IS NOT NULL) AS t29 + WHERE + t20.a = t29.a AND + t20.a = t28.a AND + t20.a = t27.a AND + t20.a = t26.a AND + t20.a = t25.a AND + t20.a = t24.a AND + t20.a = t23.a AND + t20.a = t21.a AND + t20.a = t21.a AND + t20.a = t20.a + ) AS t30 + ON (t30.a = cte_1.a) + ORDER BY 1,2,3 +) AND +-- 8) Then we search for such joins in the next (and final) qual of the WHERE clause. +-- Since both outer_cte_1 and outer_cte_2 were inlined, we will first +-- recursively check for such joins in them. +a NOT IN (SELECT outer_cte_1.b FROM outer_cte_1 LEFT JOIN outer_cte_2 USING (b)); +DEBUG: CTE outer_cte_1 is going to be inlined via distributed planning +DEBUG: CTE outer_cte_2 is going to be inlined via distributed planning +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_1_inner_cte is going to be inlined via distributed planning +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t33" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t33" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_outer_join.dist_1 t33 WHERE true +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t12" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t12" to a subquery +DEBUG: generating subplan XXX_2 for subquery SELECT a, b FROM recurring_outer_join.dist_1 t12 WHERE true +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t14" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t14" to a subquery +DEBUG: generating subplan XXX_3 for subquery SELECT a, b FROM recurring_outer_join.dist_1 t14 WHERE true +DEBUG: generating subplan XXX_4 for subquery SELECT t13.a, t13.b FROM ((recurring_outer_join.ref_1 t13 LEFT JOIN (SELECT t14_1.a, t14_1.b FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t14_1) t14 USING (a, b)) JOIN (SELECT t11.a, t11.b FROM (recurring_outer_join.ref_1 t11 LEFT JOIN (SELECT t12_1.a, t12_1.b FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t12_1) t12 USING (a, b))) t15 USING (a, b)) OFFSET 0 +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "d" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "d" to a subquery +DEBUG: generating subplan XXX_5 for subquery SELECT NULL::integer AS "dummy-1" FROM recurring_outer_join.dist_1 d WHERE true +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "d" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "d" to a subquery +DEBUG: generating subplan XXX_6 for subquery SELECT NULL::integer AS "dummy-1" FROM recurring_outer_join.dist_1 d WHERE true +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "d" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "d" to a subquery +DEBUG: generating subplan XXX_7 for subquery SELECT NULL::integer AS "dummy-1" FROM recurring_outer_join.dist_1 d WHERE true +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "d" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "d" to a subquery +DEBUG: generating subplan XXX_8 for subquery SELECT NULL::integer AS "dummy-1" FROM recurring_outer_join.dist_1 d WHERE true +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "d" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "d" to a subquery +DEBUG: generating subplan XXX_9 for subquery SELECT NULL::integer AS "dummy-1" FROM recurring_outer_join.dist_1 d WHERE true +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "d" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "d" to a subquery +DEBUG: generating subplan XXX_10 for subquery SELECT NULL::integer AS "dummy-1" FROM recurring_outer_join.dist_1 d WHERE true +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "d" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "d" to a subquery +DEBUG: generating subplan XXX_11 for subquery SELECT NULL::integer AS "dummy-1" FROM recurring_outer_join.dist_1 d WHERE true +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "d" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "d" to a subquery +DEBUG: generating subplan XXX_12 for subquery SELECT NULL::integer AS "dummy-1" FROM recurring_outer_join.dist_1 d WHERE true +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "d" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "d" to a subquery +DEBUG: generating subplan XXX_13 for subquery SELECT NULL::integer AS "dummy-1" FROM recurring_outer_join.dist_1 d WHERE true +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "d" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "d" to a subquery +DEBUG: generating subplan XXX_14 for subquery SELECT NULL::integer AS "dummy-1" FROM recurring_outer_join.dist_1 d WHERE true +DEBUG: generating subplan XXX_15 for subquery SELECT count(DISTINCT t20.a) AS a FROM (SELECT r.a, d.b FROM (recurring_outer_join.ref_1 r LEFT JOIN (SELECT NULL::integer AS a, NULL::integer AS b FROM (SELECT intermediate_result."dummy-1" FROM read_intermediate_result('XXX_5'::text, 'binary'::citus_copy_format) intermediate_result("dummy-1" integer)) d_1) d USING (b)) WHERE (r.a IS NOT NULL)) t20, (SELECT r.a, d.b FROM (recurring_outer_join.ref_1 r LEFT JOIN (SELECT NULL::integer AS a, NULL::integer AS b FROM (SELECT intermediate_result."dummy-1" FROM read_intermediate_result('XXX_6'::text, 'binary'::citus_copy_format) intermediate_result("dummy-1" integer)) d_1) d USING (b)) WHERE (r.a IS NOT NULL)) t21, (SELECT r.a, d.b FROM (recurring_outer_join.ref_1 r LEFT JOIN (SELECT NULL::integer AS a, NULL::integer AS b FROM (SELECT intermediate_result."dummy-1" FROM read_intermediate_result('XXX_7'::text, 'binary'::citus_copy_format) intermediate_result("dummy-1" integer)) d_1) d USING (b)) WHERE (r.a IS NOT NULL)) t22, (SELECT r.a, d.b FROM (recurring_outer_join.ref_1 r LEFT JOIN (SELECT NULL::integer AS a, NULL::integer AS b FROM (SELECT intermediate_result."dummy-1" FROM read_intermediate_result('XXX_8'::text, 'binary'::citus_copy_format) intermediate_result("dummy-1" integer)) d_1) d USING (b)) WHERE (r.a IS NOT NULL)) t23, (SELECT r.a, d.b FROM (recurring_outer_join.ref_1 r LEFT JOIN (SELECT NULL::integer AS a, NULL::integer AS b FROM (SELECT intermediate_result."dummy-1" FROM read_intermediate_result('XXX_9'::text, 'binary'::citus_copy_format) intermediate_result("dummy-1" integer)) d_1) d USING (b)) WHERE (r.a IS NOT NULL)) t24, (SELECT r.a, d.b FROM (recurring_outer_join.ref_1 r LEFT JOIN (SELECT NULL::integer AS a, NULL::integer AS b FROM (SELECT intermediate_result."dummy-1" FROM read_intermediate_result('XXX_10'::text, 'binary'::citus_copy_format) intermediate_result("dummy-1" integer)) d_1) d USING (b)) WHERE (r.a IS NOT NULL)) t25, (SELECT r.a, d.b FROM (recurring_outer_join.ref_1 r LEFT JOIN (SELECT NULL::integer AS a, NULL::integer AS b FROM (SELECT intermediate_result."dummy-1" FROM read_intermediate_result('XXX_11'::text, 'binary'::citus_copy_format) intermediate_result("dummy-1" integer)) d_1) d USING (b)) WHERE (r.a IS NOT NULL)) t26, (SELECT r.a, d.b FROM (recurring_outer_join.ref_1 r LEFT JOIN (SELECT NULL::integer AS a, NULL::integer AS b FROM (SELECT intermediate_result."dummy-1" FROM read_intermediate_result('XXX_12'::text, 'binary'::citus_copy_format) intermediate_result("dummy-1" integer)) d_1) d USING (b)) WHERE (r.a IS NOT NULL)) t27, (SELECT r.a, d.b FROM (recurring_outer_join.ref_1 r LEFT JOIN (SELECT NULL::integer AS a, NULL::integer AS b FROM (SELECT intermediate_result."dummy-1" FROM read_intermediate_result('XXX_13'::text, 'binary'::citus_copy_format) intermediate_result("dummy-1" integer)) d_1) d USING (b)) WHERE (r.a IS NOT NULL)) t28, (SELECT r.a, d.b FROM (recurring_outer_join.ref_1 r LEFT JOIN (SELECT NULL::integer AS a, NULL::integer AS b FROM (SELECT intermediate_result."dummy-1" FROM read_intermediate_result('XXX_14'::text, 'binary'::citus_copy_format) intermediate_result("dummy-1" integer)) d_1) d USING (b)) WHERE (r.a IS NOT NULL)) t29 WHERE ((t20.a OPERATOR(pg_catalog.=) t29.a) AND (t20.a OPERATOR(pg_catalog.=) t28.a) AND (t20.a OPERATOR(pg_catalog.=) t27.a) AND (t20.a OPERATOR(pg_catalog.=) t26.a) AND (t20.a OPERATOR(pg_catalog.=) t25.a) AND (t20.a OPERATOR(pg_catalog.=) t24.a) AND (t20.a OPERATOR(pg_catalog.=) t23.a) AND (t20.a OPERATOR(pg_catalog.=) t21.a) AND (t20.a OPERATOR(pg_catalog.=) t21.a) AND (t20.a OPERATOR(pg_catalog.=) t20.a)) +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t31" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t31" to a subquery +DEBUG: generating subplan XXX_16 for subquery SELECT a, b FROM recurring_outer_join.dist_1 t31 WHERE true +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t34" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t34" to a subquery +DEBUG: generating subplan XXX_17 for subquery SELECT a, b FROM recurring_outer_join.dist_1 t34 WHERE true +DEBUG: generating subplan XXX_18 for subquery SELECT DISTINCT t31.b, (SELECT max(t32.b) AS max FROM (recurring_outer_join.ref_1 t32 LEFT JOIN (SELECT t33_1.a, t33_1.b FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t33_1) t33 USING (a, b)) WHERE (t31.a OPERATOR(pg_catalog.=) t32.a)) AS max, (SELECT t34.a) AS a FROM ((((recurring_outer_join.ref_1 t35 LEFT JOIN (SELECT t31_1.a, t31_1.b FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_16'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t31_1) t31 USING (a, b)) LEFT JOIN (SELECT t34_1.a, t34_1.b FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_17'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t34_1) t34 USING (a, b)) LEFT JOIN (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) cte_1 USING (a, b)) LEFT JOIN (SELECT intermediate_result.a FROM read_intermediate_result('XXX_15'::text, 'binary'::citus_copy_format) intermediate_result(a bigint)) t30 ON ((t30.a OPERATOR(pg_catalog.=) cte_1.a))) ORDER BY t31.b, (SELECT max(t32.b) AS max FROM (recurring_outer_join.ref_1 t32 LEFT JOIN (SELECT t33_1.a, t33_1.b FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t33_1) t33 USING (a, b)) WHERE (t31.a OPERATOR(pg_catalog.=) t32.a)), (SELECT t34.a) +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t3" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t3" to a subquery +DEBUG: generating subplan XXX_19 for subquery SELECT a, b FROM recurring_outer_join.dist_1 t3 WHERE true +DEBUG: push down of limit count: 10 +DEBUG: generating subplan XXX_20 for subquery SELECT b, (SELECT t2.a FROM (recurring_outer_join.ref_1 t2 LEFT JOIN (SELECT t3_1.a, t3_1.b FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_19'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t3_1) t3 USING (a, b)) WHERE (t2.a OPERATOR(pg_catalog.=) t1.a) ORDER BY t2.a LIMIT 1) AS a FROM recurring_outer_join.dist_1 t1 ORDER BY b, (SELECT t2.a FROM (recurring_outer_join.ref_1 t2 LEFT JOIN (SELECT t3_1.a, t3_1.b FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_19'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t3_1) t3 USING (a, b)) WHERE (t2.a OPERATOR(pg_catalog.=) t1.a) ORDER BY t2.a LIMIT 1) LIMIT 10 +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t5" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t5" to a subquery +DEBUG: generating subplan XXX_21 for subquery SELECT a, b FROM recurring_outer_join.dist_1 t5 WHERE true +DEBUG: generating subplan XXX_22 for subquery SELECT a, b FROM (SELECT t8.a, t8.b FROM (SELECT t7.a, t7.b FROM (SELECT t6.a, t6.b FROM (SELECT t4.a, t4.b FROM (recurring_outer_join.ref_1 t4 LEFT JOIN (SELECT t5_1.a, t5_1.b FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_21'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t5_1) t5 USING (a, b))) t6) t7) t8) t9 OFFSET 0 +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t11" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t11" to a subquery +DEBUG: generating subplan XXX_23 for subquery SELECT b FROM recurring_outer_join.dist_1 t11 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT a, b FROM recurring_outer_join.ref_1 t36 WHERE (((b, 100, a) OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.b, intermediate_result.max, intermediate_result.a FROM read_intermediate_result('XXX_18'::text, 'binary'::citus_copy_format) intermediate_result(b integer, max integer, a integer))) AND (NOT (a OPERATOR(pg_catalog.=) ANY (SELECT outer_cte_1.b FROM ((SELECT intermediate_result.b, intermediate_result.a FROM read_intermediate_result('XXX_20'::text, 'binary'::citus_copy_format) intermediate_result(b integer, a integer)) outer_cte_1 LEFT JOIN (SELECT t10.b, t10.a, t11.a FROM ((SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_22'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t10 LEFT JOIN (SELECT NULL::integer AS a, t11_1.b FROM (SELECT intermediate_result.b FROM read_intermediate_result('XXX_23'::text, 'binary'::citus_copy_format) intermediate_result(b integer)) t11_1) t11 USING (b))) outer_cte_2(b, a, a_1) USING (b)))))) + a | b +--------------------------------------------------------------------- + 1 | 11 +(1 row) + +WITH +cte_1 AS ( + SELECT COUNT(*) FROM dist_1 t1 + JOIN + ( + ( + dist_1 t2 JOIN dist_1 t3 USING (a) + ) + JOIN + ( + dist_1 t4 JOIN ( + dist_1 t5 JOIN ( + dist_1 t6 JOIN ( + ref_1 t7 LEFT JOIN dist_1 t8 USING (a) + ) USING(a) + ) USING(a) + ) USING (a) + ) USING(a) + ) USING (a) +), +cte_2 AS ( + SELECT COUNT(*) FROM dist_1 t9 + JOIN + ( + ( + dist_1 t10 JOIN dist_1 t11 USING (a) + ) + JOIN + ( + dist_1 t12 JOIN ( + dist_1 t13 JOIN ( + dist_1 t14 JOIN ( + ref_1 t15 LEFT JOIN dist_1 t16 USING (a) + ) USING(a) + ) USING(a) + ) USING (a) + ) USING(a) + ) USING (a) +) +SELECT * FROM cte_1, cte_2; +DEBUG: CTE cte_1 is going to be inlined via distributed planning +DEBUG: CTE cte_2 is going to be inlined via distributed planning +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t8" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t8" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.dist_1 t8 WHERE true +DEBUG: generating subplan XXX_2 for subquery SELECT count(*) AS count FROM (recurring_outer_join.dist_1 t1 JOIN ((recurring_outer_join.dist_1 t2 JOIN recurring_outer_join.dist_1 t3 USING (a)) JOIN (recurring_outer_join.dist_1 t4 JOIN (recurring_outer_join.dist_1 t5 JOIN (recurring_outer_join.dist_1 t6 JOIN (recurring_outer_join.ref_1 t7 LEFT JOIN (SELECT t8_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t8_1) t8 USING (a)) USING (a)) USING (a)) USING (a)) USING (a)) USING (a)) +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t16" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t16" to a subquery +DEBUG: generating subplan XXX_3 for subquery SELECT a FROM recurring_outer_join.dist_1 t16 WHERE true +DEBUG: generating subplan XXX_4 for subquery SELECT count(*) AS count FROM (recurring_outer_join.dist_1 t9 JOIN ((recurring_outer_join.dist_1 t10 JOIN recurring_outer_join.dist_1 t11 USING (a)) JOIN (recurring_outer_join.dist_1 t12 JOIN (recurring_outer_join.dist_1 t13 JOIN (recurring_outer_join.dist_1 t14 JOIN (recurring_outer_join.ref_1 t15 LEFT JOIN (SELECT t16_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t16_1) t16 USING (a)) USING (a)) USING (a)) USING (a)) USING (a)) USING (a)) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT cte_1.count, cte_2.count FROM (SELECT intermediate_result.count FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(count bigint)) cte_1, (SELECT intermediate_result.count FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(count bigint)) cte_2 + count | count +--------------------------------------------------------------------- + 53526 | 53526 +(1 row) + +-- such joins can appear within SET operations too +SELECT COUNT(*) FROM +-- 2) given that the rhs of the right join is recurring due to set +-- operation, t1 is recursively planned too +dist_1 t1 +RIGHT JOIN +( + SELECT * FROM dist_1 t2 + UNION + ( + -- 1) t3 is recursively planned because t4 is recurring + SELECT t3.a, t3.b FROM dist_1 t3 + FULL JOIN + ref_1 t4 + USING (a) + ) +) t5 +USING(a); +DEBUG: recursively planning left side of the full join since the other side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t3" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t3" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_outer_join.dist_1 t3 WHERE true +DEBUG: generating subplan XXX_2 for subquery SELECT a, b FROM recurring_outer_join.dist_1 t2 +DEBUG: generating subplan XXX_3 for subquery SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer) UNION SELECT t3.a, t3.b FROM ((SELECT t3_1.a, t3_1.b FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t3_1) t3 FULL JOIN recurring_outer_join.ref_1 t4 USING (a)) +DEBUG: recursively planning left side of the right join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t1" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t1" to a subquery +DEBUG: generating subplan XXX_4 for subquery SELECT a FROM recurring_outer_join.dist_1 t1 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT t1_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t1_1) t1 RIGHT JOIN (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t5 USING (a)) + count +--------------------------------------------------------------------- + 60 +(1 row) + +-- simple modification queries +CREATE TABLE dist_5 (LIKE dist_1); +INSERT INTO dist_5 SELECT * FROM dist_1 WHERE a < 5; +SELECT create_distributed_table('dist_5', 'a'); +NOTICE: Copying data from local table... +DEBUG: Copied 12 rows +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$recurring_outer_join.dist_5$$) + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +BEGIN; + DELETE FROM dist_5 + USING ( + SELECT t1.a, t1.b FROM ref_1 t1 + LEFT JOIN + ( + SELECT * FROM dist_1 t2 WHERE EXISTS ( + SELECT * FROM dist_1 t4 + WHERE t4.a = t2.a + ) + ) t3 + USING (a) + ) q + WHERE dist_5.a = q.a + RETURNING *; +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_outer_join.dist_1 t2 WHERE (EXISTS (SELECT t4.a, t4.b FROM recurring_outer_join.dist_1 t4 WHERE (t4.a OPERATOR(pg_catalog.=) t2.a))) +DEBUG: Plan XXX query after replacing subqueries and CTEs: DELETE FROM recurring_outer_join.dist_5 USING (SELECT t1.a, t1.b FROM (recurring_outer_join.ref_1 t1 LEFT JOIN (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t3 USING (a))) q WHERE (dist_5.a OPERATOR(pg_catalog.=) q.a) RETURNING dist_5.a, dist_5.b, q.a, q.b + a | b | a | b +--------------------------------------------------------------------- + 1 | 10 | 1 | 11 + 1 | 11 | 1 | 11 + 1 | 12 | 1 | 11 + 2 | 20 | 2 | 203 + 2 | 21 | 2 | 203 + 2 | 22 | 2 | 203 + 2 | 23 | 2 | 203 +(7 rows) + +ROLLBACK; +BEGIN; + UPDATE dist_5 + SET b = 10 + WHERE a IN ( + SELECT t1.a FROM ref_1 t1 + LEFT JOIN + ( + SELECT * FROM dist_1 t2 WHERE EXISTS ( + SELECT * FROM dist_1 t4 + WHERE t4.a = t2.a + ) + ) t3 + USING (a) + ) + RETURNING *; +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_outer_join.dist_1 t2 WHERE (EXISTS (SELECT t4.a, t4.b FROM recurring_outer_join.dist_1 t4 WHERE (t4.a OPERATOR(pg_catalog.=) t2.a))) +DEBUG: Plan XXX query after replacing subqueries and CTEs: UPDATE recurring_outer_join.dist_5 SET b = 10 WHERE (a OPERATOR(pg_catalog.=) ANY (SELECT t1.a FROM (recurring_outer_join.ref_1 t1 LEFT JOIN (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t3 USING (a)))) RETURNING a, b + a | b +--------------------------------------------------------------------- + 1 | 10 + 1 | 10 + 1 | 10 + 2 | 10 + 2 | 10 + 2 | 10 + 2 | 10 +(7 rows) + +ROLLBACK; +-- INSERT .. SELECT: pull to coordinator +BEGIN; + DELETE FROM ref_1 WHERE a IS NULL; + INSERT INTO dist_1 + SELECT t1.* + FROM ref_1 t1 + LEFT JOIN dist_1 t2 + ON (t1.a = t2.a); +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: The target table's partition column should correspond to a partition column in the subquery. +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t2" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t2" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.dist_1 t2 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT t1.a, t1.b FROM (recurring_outer_join.ref_1 t1 LEFT JOIN (SELECT t2_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t2_1) t2 ON ((t1.a OPERATOR(pg_catalog.=) t2.a))) +DEBUG: Collecting INSERT ... SELECT results on coordinator +ROLLBACK; +-- INSERT .. SELECT: repartitioned (due to ) +BEGIN; + INSERT INTO dist_1 + SELECT t1.a*3, t1.b + FROM dist_1 t1 + JOIN + (ref_1 t2 LEFT JOIN dist_1 t3 USING(a)) t4 + ON (t1.a = t4.a); +DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match +DETAIL: Subquery contains an operator in the same position as the target table's partition column. +HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t3" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t3" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.dist_1 t3 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT (t1.a OPERATOR(pg_catalog.*) 3) AS a, t1.b FROM (recurring_outer_join.dist_1 t1 JOIN (recurring_outer_join.ref_1 t2 LEFT JOIN (SELECT t3_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t3_1) t3 USING (a)) t4(a, b, b_1) ON ((t1.a OPERATOR(pg_catalog.=) t4.a))) +DEBUG: performing repartitioned INSERT ... SELECT +ROLLBACK; +-- INSERT .. SELECT: repartitioned +-- should be able to push-down once https://github.com/citusdata/citus/issues/6544 is fixed +BEGIN; + INSERT INTO dist_1 + SELECT t1.* + FROM dist_1 t1 + JOIN + (ref_1 t2 LEFT JOIN dist_1 t3 USING(a)) t4 + ON (t1.a = t4.a); +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning distributed relation "dist_1" "t3" since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: Wrapping relation "dist_1" "t3" to a subquery +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.dist_1 t3 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT t1.a, t1.b FROM (recurring_outer_join.dist_1 t1 JOIN (recurring_outer_join.ref_1 t2 LEFT JOIN (SELECT t3_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t3_1) t3 USING (a)) t4(a, b, b_1) ON ((t1.a OPERATOR(pg_catalog.=) t4.a))) +DEBUG: performing repartitioned INSERT ... SELECT +ROLLBACK; +SET client_min_messages TO ERROR; +DROP SCHEMA recurring_outer_join CASCADE; +SELECT master_remove_node('localhost', :master_port); + master_remove_node +--------------------------------------------------------------------- + +(1 row) + diff --git a/src/test/regress/multi_1_schedule b/src/test/regress/multi_1_schedule index 14c02a97b..477d55098 100644 --- a/src/test/regress/multi_1_schedule +++ b/src/test/regress/multi_1_schedule @@ -198,6 +198,7 @@ test: local_dist_join_modifications test: local_table_join test: local_dist_join_mixed test: citus_local_dist_joins +test: recurring_outer_join test: pg_dump # --------- diff --git a/src/test/regress/sql/arbitrary_configs_recurring_outer_join.sql b/src/test/regress/sql/arbitrary_configs_recurring_outer_join.sql new file mode 100644 index 000000000..6f85ab08d --- /dev/null +++ b/src/test/regress/sql/arbitrary_configs_recurring_outer_join.sql @@ -0,0 +1,74 @@ +SET search_path TO local_dist_join_mixed; + +SELECT COUNT(*) FROM reference LEFT JOIN distributed USING (id); + +SELECT COUNT(*) FROM distributed RIGHT JOIN reference USING (id); + +SELECT COUNT(*) FROM reference FULL JOIN distributed USING (id); +SELECT COUNT(*) FROM distributed FULL JOIN reference USING (id); + +SELECT COUNT(*) FROM distributed FULL JOIN reference USING (id); + +-- distributed side is a subquery +SELECT COUNT(*) FROM reference LEFT JOIN (SELECT * FROM distributed) q USING (id); + +-- distributed side is a join tree +SELECT COUNT(*) FROM reference LEFT JOIN (distributed t1 JOIN distributed t2 USING (id)) q USING (id); +SELECT COUNT(*) FROM reference LEFT JOIN (distributed t1 LEFT JOIN distributed t2 USING (id)) q USING (id); + + +SELECT COUNT(*) FROM +-- 2) right side is distributed but t1 is recurring, hence what +-- makes the right side distributed (t4) is recursively planned +reference t1 +LEFT JOIN +( + distributed t4 + JOIN + -- 1) t6 is recursively planned since the outer side is recurring + (SELECT t6.id FROM distributed t6 RIGHT JOIN reference t7 USING(id)) t5 + USING(id) +) q +USING(id) +-- 3) outer side of the join tree became recurring, hence t8 is +-- recursively planned too +LEFT JOIN +distributed t8 +USING (id) +WHERE t8.id IS NULL; + +SELECT COUNT(*) FROM +local t1 +LEFT JOIN +-- 2) t6 subquery is distributed so needs to be recursively planned +-- because t1 is first recursively planned +( + SELECT * FROM + (SELECT * FROM reference t2 JOIN distributed t3 USING (id)) p + JOIN + -- 1) t5 is recursively planned since the outer side is recurring + (SELECT * FROM reference t4 LEFT JOIN distributed t5 USING (id)) q + USING(id) +) t6 +USING (id); + +BEGIN; + WITH cte AS ( + DELETE FROM distributed + USING ( + SELECT t1.id, t1.id*3 FROM reference t1 + LEFT JOIN + ( + SELECT * FROM distributed t2 WHERE EXISTS ( + SELECT * FROM distributed t4 + WHERE t4.id = t2.id + ) + ) t3 + USING (id) + ) q + WHERE distributed.id = q.id AND + distributed.id > 65 + RETURNING * + ) + SELECT COUNT(*) FROM cte; +ROLLBACK; diff --git a/src/test/regress/sql/recurring_outer_join.sql b/src/test/regress/sql/recurring_outer_join.sql new file mode 100644 index 000000000..e26df4b86 --- /dev/null +++ b/src/test/regress/sql/recurring_outer_join.sql @@ -0,0 +1,1030 @@ +CREATE SCHEMA recurring_outer_join; +SET search_path TO recurring_outer_join; + +SET citus.next_shard_id TO 1520000; +SET citus.shard_count TO 32; + +-- idempotently add node to allow this test to run without add_coordinator +SET client_min_messages TO WARNING; +SELECT 1 FROM citus_add_node('localhost', :master_port, groupid => 0); + +SET client_min_messages TO DEBUG1; + +CREATE TABLE dist_1 (a int, b int); +SELECT create_distributed_table('dist_1', 'a'); +INSERT INTO dist_1 VALUES +(1, 10), +(1, 11), +(1, 12), +(2, 20), +(2, 21), +(2, 22), +(2, 23), +(3, 30), +(3, 31), +(3, 32), +(3, 33), +(3, 34), +(7, 40), +(7, 41), +(7, 42); + +CREATE TABLE dist_2_columnar(LIKE dist_1) USING columnar; +INSERT INTO dist_2_columnar SELECT * FROM dist_1; +SELECT create_distributed_table('dist_2_columnar', 'a'); + +CREATE TABLE dist_3_partitioned(LIKE dist_1) PARTITION BY RANGE(a); +CREATE TABLE dist_3_partitioned_p1 PARTITION OF dist_3_partitioned FOR VALUES FROM (0) TO (2); +CREATE TABLE dist_3_partitioned_p2 PARTITION OF dist_3_partitioned FOR VALUES FROM (2) TO (4); +CREATE TABLE dist_3_partitioned_p3 PARTITION OF dist_3_partitioned FOR VALUES FROM (4) TO (100); +SELECT create_distributed_table('dist_3_partitioned', 'a'); +INSERT INTO dist_3_partitioned SELECT * FROM dist_1; + +CREATE TABLE ref_1 (a int, b int); +SELECT create_reference_table('ref_1'); +INSERT INTO ref_1 VALUES +(1, 100), +(1, 11), +(null, 102), +(2, 200), +(2, 21), +(null, 202), +(2, 203), +(4, 300), +(4, 301), +(null, 302), +(4, 303), +(4, 304), +(null, 400), +(null, 401), +(null, 402); + +CREATE TABLE local_1 (a int, b int); +INSERT INTO local_1 VALUES +(null, 1000), +(1, 11), +(1, 100), +(5, 2000), +(5, 2001), +(5, 2002), +(null, 2003), +(6, 3000), +(6, 3001), +(6, 3002), +(null, 3003), +(6, 3004), +(null, 4000), +(null, 4001), +(null, 4002); + +CREATE TABLE citus_local_1(LIKE local_1); +INSERT INTO citus_local_1 SELECT * FROM local_1; +SELECT citus_add_local_table_to_metadata('citus_local_1'); + +CREATE TABLE dist_4_different_colocation_group(LIKE dist_1); +INSERT INTO dist_4_different_colocation_group SELECT * FROM local_1; +DELETE FROM dist_4_different_colocation_group WHERE a IS NULL; +SELECT create_distributed_table('dist_4_different_colocation_group', 'a', colocate_with=>'none'); + +CREATE TABLE dist_5_with_pkey(LIKE dist_1); +INSERT INTO dist_5_with_pkey VALUES +(1, 11), +(2, 22), +(3, 34), +(7, 40); +SELECT create_distributed_table('dist_5_with_pkey', 'a'); +ALTER TABLE dist_5_with_pkey ADD CONSTRAINT pkey_1 PRIMARY KEY (a); + +-- +-- basic cases +-- + +SELECT COUNT(*) FROM ref_1 LEFT JOIN dist_1 USING (a); + +SELECT COUNT(*) FROM ref_1 LEFT JOIN dist_1 USING (a,b); + +SELECT COUNT(*) FROM dist_1 RIGHT JOIN ref_1 USING (a); + +SELECT COUNT(*) FROM ref_1 FULL JOIN dist_1 USING (a); +SELECT COUNT(*) FROM dist_1 FULL JOIN ref_1 USING (a); + +SELECT COUNT(*) FROM dist_1 FULL JOIN ref_1 USING (a,b); + +-- distributed side is a subquery +SELECT COUNT(*) FROM ref_1 LEFT JOIN (SELECT * FROM dist_1) q USING (a); + +-- distributed side is a join tree +SELECT COUNT(*) FROM ref_1 LEFT JOIN (dist_1 t1 JOIN dist_1 t2 USING (a)) q USING (a); +SELECT COUNT(*) FROM ref_1 LEFT JOIN (dist_1 t1 LEFT JOIN dist_1 t2 USING (a)) q USING (a); + +-- use functions/VALUES clauses/intrermediate results as the recurring rel + + -- values clause + SELECT COUNT(*) FROM (SELECT a, b FROM (VALUES (1, 'one'), (2, 'two'), (3, 'three')) as t(a,b)) recurring LEFT JOIN dist_1 USING (a); + + -- generate_series() + SELECT COUNT(*) FROM dist_1 RIGHT JOIN (SELECT a FROM generate_series(1, 10) a) recurring USING (a); + + -- materialized cte + WITH dist_1_materialized AS MATERIALIZED ( + SELECT * FROM dist_1 + ) + SELECT COUNT(*) FROM dist_1 RIGHT JOIN dist_1_materialized USING (a); + + -- offset in the subquery + SELECT COUNT(*) FROM dist_1 t1 RIGHT JOIN (SELECT * FROM dist_1 OFFSET 0) t2 USING (a); + + -- limit in the subquery + SELECT COUNT(*) FROM dist_1 t1 RIGHT JOIN (SELECT * FROM dist_1 ORDER BY 1,2 LIMIT 2) t2 USING (a); + + -- local-distributed join as the recurring rel + -- + -- We plan local-distributed join by converting local_1 into an intermediate result + -- and hence it becomes a recurring rel. Then we convert distributed - inner side of + -- the right join (dist_1) into an intermediate result too and this makes rhs of the + -- full join a recurring rel. And finally, we convert lhs of the full join (t1) into + -- an intermediate result too. + SELECT COUNT(*) FROM dist_1 t1 FULL JOIN (dist_1 RIGHT JOIN local_1 USING(a)) t2 USING (a); + + SELECT COUNT(*) FROM dist_1 t1 FULL JOIN (dist_1 RIGHT JOIN citus_local_1 USING(a)) t2 USING (a); + + -- subqury without FROM + SELECT COUNT(*) FROM dist_1 t1 RIGHT JOIN (SELECT generate_series(1,10) AS a) t2 USING (a); + +-- such semi joins / anti joins are supported too + + -- reference table + SELECT COUNT(*) FROM + ref_1 t1 + JOIN dist_1 t2 + ON (t1.a = t2.a) + WHERE t1.a IN (SELECT a FROM dist_1 t3); + + -- not supported because we join t3 (inner rel of the anti join) with a column + -- of reference table, not with the distribution column of the other distributed + -- table (t2) + SELECT COUNT(*) FROM + ref_1 t1 + JOIN dist_1 t2 + ON (t1.a = t2.a) + WHERE NOT EXISTS (SELECT * FROM dist_1 t3 WHERE t1.a = a); + + -- supported because the semi join is performed based on distribution keys + -- of the distributed tables + SELECT COUNT(*) FROM + ref_1 t1 + JOIN dist_1 t2 + ON (t1.a = t2.a) + WHERE NOT EXISTS (SELECT * FROM dist_1 t3 WHERE t2.a = a); + + -- values clause + SELECT COUNT(*) FROM + (SELECT a, b FROM (VALUES (1, 'one'), (2, 'two'), (3, 'three')) as t(a,b)) t1 + JOIN dist_1 t2 + ON (t1.a = t2.a) + WHERE EXISTS (SELECT * FROM dist_1 t3 WHERE t1.a = a); + + -- offset in the subquery + SELECT COUNT(*) FROM + (SELECT * FROM dist_1 OFFSET 0) t1 + JOIN dist_1 t2 + ON (t1.a = t2.a) + WHERE t1.a IN (SELECT a FROM dist_1 t3); + + -- local-distributed join as the recurring rel + SELECT COUNT(*) FROM + (dist_1 RIGHT JOIN local_1 USING(a)) t1 + JOIN dist_1 t2 + ON (t1.a = t2.a) + WHERE t1.a IN (SELECT a FROM dist_1 t3); + + -- materialized cte + WITH dist_1_materialized AS MATERIALIZED ( + SELECT * FROM dist_1 + ) + SELECT COUNT(*) FROM + dist_1_materialized t1 + JOIN dist_1 t2 + ON (t1.a = t2.a) + WHERE t1.a IN (SELECT a FROM dist_1 t3); + + WITH dist_1_materialized AS MATERIALIZED ( + SELECT * FROM dist_1 + ) + SELECT COUNT(*) FROM + dist_1_materialized t1 + JOIN dist_1 t2 + ON (t1.a = t2.a) + WHERE EXISTS (SELECT a FROM dist_1 t3 WHERE t3.a = t1.a); + + -- not supported because we anti-join t3 --inner rel-- with a column + -- of t1 (intermediate result) --outer-rel-- + WITH dist_1_materialized AS MATERIALIZED ( + SELECT * FROM dist_1 + ) + SELECT COUNT(*) FROM + dist_1_materialized t1 + JOIN dist_1 t2 + ON (t1.a = t2.a) + WHERE NOT EXISTS (SELECT a FROM dist_1 t3 WHERE t3.a = t1.a); + + -- so this is supported because now t3 is joined with t2, not t1 + WITH dist_1_materialized AS MATERIALIZED ( + SELECT a AS a_alias, b AS b_alias FROM dist_1 + ) + SELECT COUNT(*) FROM + dist_1_materialized t1 + JOIN dist_1 t2 + ON (t1.a_alias = t2.a) + WHERE NOT EXISTS (SELECT a FROM dist_1 t3 WHERE t3.a = t2.a); + + WITH dist_1_materialized AS MATERIALIZED ( + SELECT a AS a_alias, b AS b_alias FROM dist_1 + ) + SELECT COUNT(*) FROM + dist_1_materialized t1 + JOIN dist_1 t2 + ON (t1.a_alias = t2.a) + WHERE t1.a_alias NOT IN (SELECT a FROM dist_1 t3); + + -- generate_series() + SELECT COUNT(*) FROM + (SELECT a FROM generate_series(1, 10) a) t1 + JOIN dist_1 t2 + ON (t1.a = t2.a) + WHERE t1.a IN (SELECT a FROM dist_1 t3); + + -- subqury without FROM + SELECT COUNT(*) FROM + (SELECT generate_series(1,10) AS a) t1 + JOIN dist_1 t2 + ON (t1.a = t2.a) + WHERE t1.a IN (SELECT a FROM dist_1 t3); + +-- together with correlated subqueries + +SELECT COUNT(*) FROM ref_1 t1 +LEFT JOIN dist_1 t2 USING (a,b) +WHERE EXISTS (SELECT * FROM dist_1 t3 WHERE t1.a = t3.a); + +SELECT COUNT(*) FROM dist_1 t1 +RIGHT JOIN ref_1 t2 USING (a,b) +WHERE EXISTS (SELECT * FROM dist_1 t3 WHERE t2.a = t3.a); + + +-- "dist_1 t2" can't contribute to result set of the right join with +-- a tuple having "(t2.a) a = NULL" because t2 is in the inner side of +-- right join. For this reason, Postgres knows that can +-- never evaluate to true (because never yields "true") +-- and replaces the right join with an inner join. +-- And as a result, we can push-down the query without having to go +-- through recursive planning. +SELECT COUNT(*) FROM dist_1 t1 +WHERE EXISTS ( + SELECT * FROM dist_1 t2 + RIGHT JOIN ref_1 t3 USING (a) + WHERE t2.a = t1.a +); + +-- same here, Postgres converts the left join into an inner one +SELECT foo.* FROM +ref_1 r1, +LATERAL +( + SELECT * FROM ref_1 r2 + LEFT JOIN dist_1 + USING (a) + WHERE r1.a > dist_1.b +) as foo; + +-- Qual is the same but top-level join is an anti-join. Right join +-- stays as is and hence requires recursive planning. +SELECT COUNT(*) FROM dist_1 t1 +WHERE NOT EXISTS ( + SELECT * FROM dist_1 t2 + RIGHT JOIN ref_1 t3 USING (a) + WHERE t2.a = t1.a +); + +-- This time the semi-join qual is (not <) +-- where t3 is the outer rel of the right join. Hence Postgres can't +-- replace right join with an inner join and so we recursively plan +-- inner side of the right join since the outer side is a recurring +-- rel. +SELECT COUNT(*) FROM dist_1 t1 +WHERE EXISTS ( + SELECT * FROM dist_1 t2 + RIGHT JOIN ref_1 t3 USING (a) + WHERE t3.a = t1.a +); + +SELECT COUNT(*) FROM dist_1 t1 +WHERE NOT EXISTS ( + SELECT * FROM dist_1 t2 + RIGHT JOIN ref_1 t3 USING (a) + WHERE t3.a = t1.a +); + +-- +-- more complex cases +-- + +SELECT COUNT(*) FROM +-- 1) right side is distributed but t1 is recurring, hence what +-- makes the right side distributed (t3) is recursively planned +ref_1 t1 +LEFT JOIN +(ref_1 t2 RIGHT JOIN dist_1 t3(x,y) ON (t2.a=t3.x)) t5 +USING(a) +-- 2) outer side of the join tree became recurring, hence t4 is +-- recursively planned too +LEFT JOIN +dist_1 t4 +ON (t4.a = t5.a AND t4.b = t5.b) +WHERE t4.b IS NULL; + +SELECT COUNT(*) FROM +-- 2) right side is distributed but t1 is recurring, hence what +-- makes the right side distributed (t4) is recursively planned +ref_1 t1 +LEFT JOIN +( + dist_1 t4 + JOIN + -- 1) t6 is recursively planned since the outer side is recurring + (SELECT t6.a FROM dist_1 t6 RIGHT JOIN ref_1 t7 USING(a)) t5 + USING(a) +) q +USING(a) +-- 3) outer side of the join tree became recurring, hence t8 is +-- recursively planned too +LEFT JOIN +dist_1 t8 +USING (a) +WHERE t8.b IS NULL; + +SELECT COUNT(*) FROM +ref_1 t1 +-- all distributed tables in the rhs will be recursively planned +-- in the order of t3, t4, t5 +LEFT JOIN +( + ref_1 t2 + JOIN + dist_1 t3 + USING (a) + JOIN + (dist_1 t4 JOIN dist_1 t5 USING (a)) + USING(a) +) +USING (a); + +-- Even if dist_1 and dist_4_different_colocation_group belong to different +-- colocation groups, we can run query without doing a repartition join as +-- we first decide recursively planning lhs of the right join because rhs +-- (ref_1) is a recurring rel. And while doing so, we anyway recursively plan +-- the distributed tables in the subjoin tree individually hence the whole join +-- tree becomes: +-- RIGHT JOIN +-- / \ +-- intermediate_result_for_dist_1 ref_1 +-- JOIN +-- intermediate_result_for_dist_4_different_colocation_group +-- +-- When we decide implementing the optimization noted in +-- RecursivelyPlanDistributedJoinNode in an XXX comment, then this query would +-- require enabling repartition joins. +SELECT COUNT(*) FROM +dist_1 JOIN dist_4_different_colocation_group USING(a) +RIGHT JOIN ref_1 USING(a); + +SELECT COUNT(*) FROM +ref_1 t1 +LEFT JOIN +-- 2) t6 subquery is distributed so needs to be recursively planned +-- because t1 is recurring +( + SELECT * FROM + (SELECT * FROM ref_1 t2 JOIN dist_1 t3 USING (a) WHERE t3.b IS NULL) p + JOIN + -- 1) t5 is recursively planned since the outer side is recurring + (SELECT * FROM ref_1 t4 LEFT JOIN dist_1 t5 USING (a)) q + USING(a) +) t6 +USING (a); + +-- No need to recursively plan dist_5_with_pkey thanks to +-- pkey optimizations done by Postgres. +SELECT COUNT(*) FROM ref_1 LEFT JOIN dist_5_with_pkey USING(a); + +-- Similarly, implies that "dist_1.a" cannot be NULL +-- and hence Postgres converts the LEFT JOIN into an INNER JOIN form. +-- For this reason, we don't need to recursively plan dist_1. +SELECT COUNT(*) FROM ref_1 LEFT JOIN dist_1 USING(a) WHERE dist_1.a IN (1,4); + +SELECT COUNT(*) FROM +ref_1 t1 +LEFT JOIN +-- 2) t6 subquery is distributed so needs to be recursively planned +-- because t1 is recurring +( + SELECT * FROM + (SELECT * FROM ref_1 t2 JOIN dist_3_partitioned t3 USING (a) WHERE t3.b IS NULL) p + JOIN + -- 1) t5 is recursively planned since the outer side is recurring + (SELECT * FROM ref_1 t4 LEFT JOIN dist_1 t5 USING (a)) q + USING(a) +) t6 +USING (a); + +SELECT COUNT(t1.a), t1.b FROM +ref_1 t1 +LEFT JOIN +-- 2) t6 subquery is distributed so needs to be recursively planned +-- because t1 is recurring +( + SELECT * FROM + (SELECT * FROM ref_1 t2 JOIN dist_3_partitioned t3 USING (a) WHERE t3.b IS NULL) p + JOIN + -- 1) t5 is recursively planned since the outer side is recurring + (SELECT * FROM ref_1 t4 LEFT JOIN dist_1 t5 USING (a)) q + USING(a) +) t6 +USING (a) +GROUP BY (t1.b) +HAVING t1.b > 200 +ORDER BY 1,2; + +SELECT COUNT(t1.a), t1.b FROM +ref_1 t1 +LEFT JOIN +-- 2) t6 subquery is distributed so needs to be recursively planned +-- because t1 is recurring +( + SELECT * FROM + (SELECT * FROM ref_1 t2 JOIN dist_3_partitioned t3 USING (a) WHERE t3.b IS NULL) p + JOIN + -- 1) t5 is recursively planned since the outer side is recurring + (SELECT * FROM ref_1 t4 LEFT JOIN dist_1 t5 USING (a)) q + USING(a) +) t6 +USING (a) +GROUP BY (t1.b) +HAVING ( + EXISTS ( + SELECT * FROM ref_1 t6 + LEFT JOIN dist_1 t7 USING (a) + WHERE t7.b > 10 + ) +) +ORDER BY 1,2; + +SELECT COUNT(*) FROM +citus_local_1 t1 +LEFT JOIN +-- 2) t6 subquery is distributed so needs to be recursively planned +-- because t1 is first recursively planned +( + SELECT * FROM + (SELECT * FROM ref_1 t2 JOIN dist_1 t3 USING (a) WHERE t3.b IS NULL) p + JOIN + -- 1) t5 is recursively planned since the outer side is recurring + (SELECT * FROM ref_1 t4 LEFT JOIN dist_1 t5 USING (a)) q + USING(a) +) t6 +USING (a); + +SELECT COUNT(*) FROM +-- 2) t1 is recursively planned because the outer side (t2) is +-- converted into a recurring rel +dist_2_columnar t1 +RIGHT JOIN +( + -- 1) t4 is recursively planned since the outer side is recurring + ref_1 t3 LEFT JOIN dist_1 t4 USING(a) +) t2 +USING (a); + +SELECT COUNT(*) FROM +-- 3) t1 is recursively planned because the outer side (t2) is +-- converted into a recurring rel +dist_1 t1 +RIGHT JOIN +( + -- 2) t6 is recursively planned because now it's part of a distributed + -- inner join node that is about to be outer joined with t3 + ref_1 t3 + LEFT JOIN + ( + -- 1-a) t4 is recursively planned since the outer side is recurring + (ref_1 t5 LEFT JOIN dist_1 t4 USING(a)) + JOIN + dist_1 t6 + USING(a) + JOIN + -- 1-b) t8 is recursively planned since the outer side is recurring + (ref_1 t7 LEFT JOIN dist_1 t8 USING(a)) + USING(a) + ) + USING(a) +) t2 +USING (a); + +SELECT COUNT(*) FROM +ref_1 t6 +LEFT JOIN +( + ref_1 t1 + LEFT JOIN + ( + -- t3 is a distributed join tree so needs to be recursively planned + -- because t2 is recurring + ref_1 t2 LEFT JOIN (dist_1 t7 JOIN dist_1 t8 USING (a)) t3 USING(a) + JOIN + ref_1 t5 + USING(a) + ) + USING(a) +) +USING(a); + +SELECT COUNT(*) FROM +ref_1 t6 +LEFT JOIN +( + ref_1 t1 + LEFT JOIN + ( + -- t4 subquery is distributed so needs to be recursively planned + -- because t2 is recurring + ref_1 t2 LEFT JOIN (SELECT * FROM dist_1 t3) t4 USING(a) + JOIN + ref_1 t5 + USING(a) + ) + USING(a) +) +USING(a); + +SELECT COUNT(*) FROM +ref_1 t6 +LEFT JOIN +( + ref_1 t1 + LEFT JOIN + ( + -- t4 subquery is distributed so needs to be recursively planned + -- because t2 is recurring + ref_1 t2 LEFT JOIN (SELECT * FROM dist_3_partitioned t3) t4 USING(a) + JOIN + ref_1 t5 + USING(a) + ) + USING(a) +) +USING(a); + +-- cannot recursively plan because t3 (inner - distributed) +-- references t1 (outer - recurring) +SELECT COUNT(*) FROM ref_1 t1 LEFT JOIN LATERAL (SELECT * FROM dist_1 t2 WHERE t1.b < t2.b) t3 USING (a); +SELECT COUNT(*) FROM (SELECT * FROM dist_1 OFFSET 100) t1 LEFT JOIN LATERAL (SELECT * FROM dist_1 t2 WHERE t1.b < t2.b) t3 USING (a); +SELECT COUNT(*) FROM local_1 t1 LEFT JOIN LATERAL (SELECT * FROM dist_1 t2 WHERE t1.b < t2.b) t3 USING (a); +SELECT COUNT(*) FROM (SELECT 1 a, generate_series(1,2) b) t1 LEFT JOIN LATERAL (SELECT * FROM dist_1 t2 WHERE t1.b < t2.b) t3 USING (a); +SELECT COUNT(*) FROM (ref_1 t10 JOIN ref_1 t11 USING(a,b)) t1 LEFT JOIN LATERAL (SELECT * FROM dist_1 t2 WHERE t1.b < t2.b) t3 USING (a); + +-- cannot plan because the query in the WHERE clause of t3 +-- (inner - distributed) references t1 (outer - recurring) +SELECT COUNT(*) FROM ref_1 t1 +LEFT JOIN LATERAL +( + SELECT * FROM dist_1 t2 WHERE EXISTS ( + SELECT * FROM dist_1 t4 + WHERE t4.a = t2.a AND t4.b > t1.b + ) +) t3 +USING (a); + +-- can recursively plan after dropping (t4.b > t1.b) qual from t3 +SELECT COUNT(*) FROM ref_1 t1 +LEFT JOIN +( + SELECT * FROM dist_1 t2 WHERE EXISTS ( + SELECT * FROM dist_1 t4 + WHERE t4.a = t2.a + ) +) t3 +USING (a); + +-- same test using a view, can be recursively planned +CREATE VIEW my_view_1 AS +SELECT * FROM dist_1 t2 WHERE EXISTS ( + SELECT * FROM dist_1 t4 + WHERE t4.a = t2.a +); + +SELECT COUNT(*) FROM ref_1 t1 +LEFT JOIN +my_view_1 t3 +USING (a); + +SELECT COUNT(*) FROM +ref_1 t6 +LEFT JOIN +( + ref_1 t1 + LEFT JOIN + ( + -- t4 subquery is distributed so needs to be recursively planned + -- because t2 is recurring. + -- However, we fail to recursively plan t4 because it references + -- t6. + ref_1 t2 LEFT JOIN LATERAL (SELECT * FROM dist_2_columnar t3 WHERE t3.a > t6.a) t4 USING(a) + JOIN + ref_1 t5 + USING(a) + ) + USING(a) +) +USING(a); + +SELECT COUNT(*) FROM +ref_1 t6 +LEFT JOIN +( + ref_1 t1 + LEFT JOIN + ( + -- t4 subquery is distributed so needs to be recursively planned + -- because t2 is recurring. + -- Even if the query says t2 is lateral joined with t4, t4 doesn't + -- reference anywhere else and hence can be planned recursively. + ref_1 t2 LEFT JOIN LATERAL (SELECT * FROM dist_1 t3) t4 USING(a) + JOIN + ref_1 t5 + USING(a) + ) + USING(a) +) +USING(a); + +-- since t1 is recurring and t6 is distributed, all the distributed +-- tables in t6 will be recursively planned +SELECT COUNT(*) FROM ref_1 t1 +LEFT JOIN +( + ((SELECT * FROM ref_1 WHERE a > 1) t2 JOIN dist_1 t3 USING (a)) + JOIN + (dist_1 t4 JOIN dist_1 t5 USING (a)) + USING(a) +) t6 +USING (a); + +BEGIN; + -- same test but this time should fail due to + -- citus.max_intermediate_result_size + SET LOCAL citus.max_intermediate_result_size TO "0.5kB"; + SELECT COUNT(*) FROM ref_1 t1 + LEFT JOIN + ( + ((SELECT * FROM ref_1 WHERE a > 1) t2 JOIN dist_1 t3 USING (a)) + JOIN + (dist_1 t4 JOIN dist_1 t5 USING (a)) + USING(a) + ) t6 + USING (a); +ROLLBACK; + +-- Same test using some views, can be recursively planned too. +-- Since t1 is recurring and t6 is distributed, all the distributed +-- tables in t6 will be recursively planned. +CREATE VIEW my_view_2 AS +(SELECT * FROM ref_1 WHERE a > 1); + +CREATE VIEW my_view_3 AS +(SELECT * FROM ref_1); + +SELECT COUNT(*) FROM my_view_3 t1 +LEFT JOIN +( + (my_view_2 t2 JOIN dist_1 t3 USING (a)) + JOIN + (dist_1 t4 JOIN dist_1 t5 USING (a)) + USING(a) +) t6 +USING (a); + +SELECT COUNT(*) FROM ref_1 t1 +-- 2) Since t8 is distributed and t1 is recurring, t8 needs be converted +-- to a recurring rel too. For this reason, subquery t8 is recursively +-- planned because t7 is recurring already. +LEFT JOIN +( + SELECT * FROM (SELECT * FROM ref_1 t2 RIGHT JOIN dist_1 t3 USING (a)) AS t4 + JOIN + -- 1) subquery t6 is recursively planned because t5 is recurring + (SELECT * FROM ref_1 t5 LEFT JOIN (SELECT * FROM dist_2_columnar WHERE b < 150) t6 USING (a)) as t7 + USING(a) +) t8 +USING (a); + +-- same test using a prepared statement +PREPARE recurring_outer_join_p1 AS +SELECT COUNT(*) FROM ref_1 t1 +-- 2) Since t8 is distributed and t1 is recurring, t8 needs be converted +-- to a recurring rel too. For this reason, subquery t8 is recursively +-- planned because t7 is recurring already. +LEFT JOIN +( + SELECT * FROM (SELECT * FROM ref_1 t2 RIGHT JOIN dist_1 t3 USING (a)) AS t4 + JOIN + -- 1) subquery t6 is recursively planned because t5 is recurring + (SELECT * FROM ref_1 t5 LEFT JOIN (SELECT * FROM dist_2_columnar WHERE b < $1) t6 USING (a)) as t7 + USING(a) +) t8 +USING (a); + +EXECUTE recurring_outer_join_p1(0); +EXECUTE recurring_outer_join_p1(100); +EXECUTE recurring_outer_join_p1(100); +EXECUTE recurring_outer_join_p1(10); +EXECUTE recurring_outer_join_p1(10); +EXECUTE recurring_outer_join_p1(1000); +EXECUTE recurring_outer_join_p1(1000); + +-- t5 is recursively planned because the outer side of the final +-- left join is recurring +SELECT * FROM ref_1 t1 +JOIN ref_1 t2 USING (a) +LEFT JOIN ref_1 t3 USING (a) +LEFT JOIN ref_1 t4 USING (a) +LEFT JOIN dist_1 t5 USING (a) +ORDER BY 1,2,3,4,5,6 DESC +LIMIT 5; + +-- t6 is recursively planned because the outer side of the final +-- left join is recurring +SELECT * FROM (SELECT * FROM ref_1 ORDER BY 1,2 LIMIT 7) t1 +JOIN ref_1 t2 USING (a) +LEFT JOIN (SELECT *, random() > 1 FROM dist_1 t3) t4 USING (a) +LEFT JOIN ref_1 t5 USING (a) +LEFT JOIN dist_1 t6 USING (a) +ORDER BY 1,2,3,4,5,6,7 DESC +LIMIT 10; + +-- +-- Such join rels can recursively appear anywhere in the query instead +-- of simple relation rtes. +-- + +SELECT COUNT(*) FROM + (SELECT ref_1.a, t10.b FROM ref_1 LEFT JOIN dist_1 t10 USING(b)) AS t1, + (SELECT ref_1.a, t20.b FROM ref_1 LEFT JOIN dist_1 t20 USING(b)) AS t2, + (SELECT ref_1.a, t30.b FROM ref_1 LEFT JOIN dist_1 t30 USING(b)) AS t3, + (SELECT ref_1.a, t40.b FROM ref_1 LEFT JOIN dist_1 t40 USING(b)) AS t4, + (SELECT ref_1.a, t50.b FROM ref_1 LEFT JOIN dist_1 t50 USING(b)) AS t5 +WHERE + t1.a = t5.a AND + t1.a = t4.a AND + t1.a = t3.a AND + t1.a = t2.a AND + t1.a = t1.a; + +-- subqueries in the target list + +SELECT t1.b, (SELECT b FROM ref_1 WHERE t1.a = a ORDER BY a,b LIMIT 1), (SELECT t2.a) +FROM ref_1 +LEFT JOIN dist_1 t1 USING (a,b) +JOIN dist_1 t2 USING (a,b) +ORDER BY 1,2,3 LIMIT 5; + +WITH +outer_cte_1 AS ( + SELECT + t1.b, + -- 9) t3 is recursively planned since t2 is recurring + (SELECT a FROM ref_1 t2 LEFT JOIN dist_1 t3 USING(a,b) WHERE t2.a=t1.a ORDER BY 1 LIMIT 1) + FROM dist_1 t1 + ORDER BY 1,2 LIMIT 10 +), +outer_cte_2 AS ( + SELECT * FROM ( + SELECT * FROM ( + SELECT * FROM ( + SELECT * FROM ( + SELECT * FROM ( + -- 10) t5 is recursively planned since t4 is recurring + SELECT * FROM ref_1 t4 + LEFT JOIN dist_1 t5 + USING(a,b) + ) AS t6 + ) AS t7 + ) AS t8 + ) AS t9 + OFFSET 0 + )AS t10 + -- 11) t11 is recursively planned since lhs of the join tree became recurring + LEFT JOIN dist_1 t11 USING (b) +) +SELECT * FROM ref_1 t36 WHERE (b,100,a) IN ( + WITH + cte_1 AS ( + WITH cte_1_inner_cte AS ( + -- 3) t12 is recursively planned because t11 is recurring + SELECT * FROM ref_1 t11 + LEFT JOIN dist_1 t12 + USING (a,b) + ) + -- 4) t14 is recursively planned because t13 is recurring + SELECT * FROM ref_1 t13 + LEFT JOIN dist_1 t14 USING (a,b) + JOIN cte_1_inner_cte t15 + USING (a,b) + OFFSET 0 + ) + -- 6) t31 is recursively planned since t35 is recurring + -- 7) t34 is recursively planned since lhs of the join tree is now recurring + SELECT + DISTINCT t31.b, + -- 1) we first search for such joins in the target list and recursively plan t33 + -- because t32 is recurring + (SELECT max(b) FROM ref_1 t32 LEFT JOIN dist_1 t33 USING(a,b) WHERE t31.a = t32.a), + (SELECT t34.a) + FROM ref_1 t35 + LEFT JOIN dist_1 t31 USING (a,b) + LEFT JOIN dist_1 t34 USING (a,b) + -- 2) cte_1 was inlided, so we then recursively check for such joins there. + -- When doing so, we first check for cte_1_inner_cte was since it was + -- also inlined. + LEFT JOIN cte_1 USING (a,b) + -- 5) Since rhs of below join is a subquery too, we recursively search + -- for such joins there and plan distributed side of all those 10 + -- joins. + LEFT JOIN ( + SELECT COUNT(DISTINCT t20.a) AS a + FROM + (SELECT r.a, d.b FROM ref_1 r LEFT JOIN dist_1 d USING(b) WHERE r.a IS NOT NULL) AS t20, + (SELECT r.a, d.b FROM ref_1 r LEFT JOIN dist_1 d USING(b) WHERE r.a IS NOT NULL) AS t21, + (SELECT r.a, d.b FROM ref_1 r LEFT JOIN dist_1 d USING(b) WHERE r.a IS NOT NULL) AS t22, + (SELECT r.a, d.b FROM ref_1 r LEFT JOIN dist_1 d USING(b) WHERE r.a IS NOT NULL) AS t23, + (SELECT r.a, d.b FROM ref_1 r LEFT JOIN dist_1 d USING(b) WHERE r.a IS NOT NULL) AS t24, + (SELECT r.a, d.b FROM ref_1 r LEFT JOIN dist_1 d USING(b) WHERE r.a IS NOT NULL) AS t25, + (SELECT r.a, d.b FROM ref_1 r LEFT JOIN dist_1 d USING(b) WHERE r.a IS NOT NULL) AS t26, + (SELECT r.a, d.b FROM ref_1 r LEFT JOIN dist_1 d USING(b) WHERE r.a IS NOT NULL) AS t27, + (SELECT r.a, d.b FROM ref_1 r LEFT JOIN dist_1 d USING(b) WHERE r.a IS NOT NULL) AS t28, + (SELECT r.a, d.b FROM ref_1 r LEFT JOIN dist_1 d USING(b) WHERE r.a IS NOT NULL) AS t29 + WHERE + t20.a = t29.a AND + t20.a = t28.a AND + t20.a = t27.a AND + t20.a = t26.a AND + t20.a = t25.a AND + t20.a = t24.a AND + t20.a = t23.a AND + t20.a = t21.a AND + t20.a = t21.a AND + t20.a = t20.a + ) AS t30 + ON (t30.a = cte_1.a) + ORDER BY 1,2,3 +) AND +-- 8) Then we search for such joins in the next (and final) qual of the WHERE clause. +-- Since both outer_cte_1 and outer_cte_2 were inlined, we will first +-- recursively check for such joins in them. +a NOT IN (SELECT outer_cte_1.b FROM outer_cte_1 LEFT JOIN outer_cte_2 USING (b)); + +WITH +cte_1 AS ( + SELECT COUNT(*) FROM dist_1 t1 + JOIN + ( + ( + dist_1 t2 JOIN dist_1 t3 USING (a) + ) + JOIN + ( + dist_1 t4 JOIN ( + dist_1 t5 JOIN ( + dist_1 t6 JOIN ( + ref_1 t7 LEFT JOIN dist_1 t8 USING (a) + ) USING(a) + ) USING(a) + ) USING (a) + ) USING(a) + ) USING (a) +), +cte_2 AS ( + SELECT COUNT(*) FROM dist_1 t9 + JOIN + ( + ( + dist_1 t10 JOIN dist_1 t11 USING (a) + ) + JOIN + ( + dist_1 t12 JOIN ( + dist_1 t13 JOIN ( + dist_1 t14 JOIN ( + ref_1 t15 LEFT JOIN dist_1 t16 USING (a) + ) USING(a) + ) USING(a) + ) USING (a) + ) USING(a) + ) USING (a) +) +SELECT * FROM cte_1, cte_2; + +-- such joins can appear within SET operations too +SELECT COUNT(*) FROM +-- 2) given that the rhs of the right join is recurring due to set +-- operation, t1 is recursively planned too +dist_1 t1 +RIGHT JOIN +( + SELECT * FROM dist_1 t2 + UNION + ( + -- 1) t3 is recursively planned because t4 is recurring + SELECT t3.a, t3.b FROM dist_1 t3 + FULL JOIN + ref_1 t4 + USING (a) + ) +) t5 +USING(a); + +-- simple modification queries + +CREATE TABLE dist_5 (LIKE dist_1); +INSERT INTO dist_5 SELECT * FROM dist_1 WHERE a < 5; +SELECT create_distributed_table('dist_5', 'a'); + +BEGIN; + DELETE FROM dist_5 + USING ( + SELECT t1.a, t1.b FROM ref_1 t1 + LEFT JOIN + ( + SELECT * FROM dist_1 t2 WHERE EXISTS ( + SELECT * FROM dist_1 t4 + WHERE t4.a = t2.a + ) + ) t3 + USING (a) + ) q + WHERE dist_5.a = q.a + RETURNING *; +ROLLBACK; + +BEGIN; + UPDATE dist_5 + SET b = 10 + WHERE a IN ( + SELECT t1.a FROM ref_1 t1 + LEFT JOIN + ( + SELECT * FROM dist_1 t2 WHERE EXISTS ( + SELECT * FROM dist_1 t4 + WHERE t4.a = t2.a + ) + ) t3 + USING (a) + ) + RETURNING *; +ROLLBACK; + +-- INSERT .. SELECT: pull to coordinator +BEGIN; + DELETE FROM ref_1 WHERE a IS NULL; + + INSERT INTO dist_1 + SELECT t1.* + FROM ref_1 t1 + LEFT JOIN dist_1 t2 + ON (t1.a = t2.a); +ROLLBACK; + +-- INSERT .. SELECT: repartitioned (due to ) +BEGIN; + INSERT INTO dist_1 + SELECT t1.a*3, t1.b + FROM dist_1 t1 + JOIN + (ref_1 t2 LEFT JOIN dist_1 t3 USING(a)) t4 + ON (t1.a = t4.a); +ROLLBACK; + +-- INSERT .. SELECT: repartitioned +-- should be able to push-down once https://github.com/citusdata/citus/issues/6544 is fixed +BEGIN; + INSERT INTO dist_1 + SELECT t1.* + FROM dist_1 t1 + JOIN + (ref_1 t2 LEFT JOIN dist_1 t3 USING(a)) t4 + ON (t1.a = t4.a); +ROLLBACK; + +SET client_min_messages TO ERROR; +DROP SCHEMA recurring_outer_join CASCADE; + +SELECT master_remove_node('localhost', :master_port); diff --git a/src/test/regress/sql_schedule b/src/test/regress/sql_schedule index 413cfb784..ca7f32f1a 100644 --- a/src/test/regress/sql_schedule +++ b/src/test/regress/sql_schedule @@ -4,7 +4,7 @@ test: ch_benchmarks_1 ch_benchmarks_2 ch_benchmarks_3 test: ch_benchmarks_4 ch_benchmarks_5 ch_benchmarks_6 test: intermediate_result_pruning_queries_1 intermediate_result_pruning_queries_2 test: dropped_columns_1 distributed_planning -test: local_dist_join nested_execution +test: local_dist_join nested_execution arbitrary_configs_recurring_outer_join test: connectivity_checks citus_run_command test: schemas test: views