diff --git a/src/backend/distributed/planner/multi_logical_planner.c b/src/backend/distributed/planner/multi_logical_planner.c index 91cdd76bb..a2cf53a03 100644 --- a/src/backend/distributed/planner/multi_logical_planner.c +++ b/src/backend/distributed/planner/multi_logical_planner.c @@ -158,7 +158,6 @@ static MultiNode * SubqueryMultiNodeTree(Query *originalQuery, Query *queryTree, PlannerRestrictionContext * plannerRestrictionContext); -static List * SublinkList(Query *originalQuery); static bool ExtractSublinkWalker(Node *node, List **sublinkList); static MultiNode * SubqueryPushdownMultiNodeTree(Query *queryTree); @@ -316,7 +315,7 @@ FindNodeCheck(Node *node, bool (*check)(Node *)) * that the function should be called on the original query given that postgres * standard_planner() may convert the subqueries in WHERE clause to joins. */ -static List * +List * SublinkList(Query *originalQuery) { FromExpr *joinTree = originalQuery->jointree; diff --git a/src/backend/distributed/planner/recursive_planning.c b/src/backend/distributed/planner/recursive_planning.c index f1afe8e54..acb818d1b 100644 --- a/src/backend/distributed/planner/recursive_planning.c +++ b/src/backend/distributed/planner/recursive_planning.c @@ -116,6 +116,12 @@ typedef struct VarLevelsUpWalkerContext static DeferredErrorMessage * RecursivelyPlanSubqueriesAndCTEs(Query *query, RecursivePlanningContext * context); + +static bool ShouldRecursivelyPlanNonColocatedSubqueries(Query *subquery, + RecursivePlanningContext * + context); +static void RecursivelyPlanNonColocatedSubqueries(Query *subquery, + RecursivePlanningContext *context); static bool ShouldRecursivelyPlanAllSubqueriesInWhere(Query *query); static bool RecursivelyPlanAllSubqueries(Node *node, RecursivePlanningContext *planningContext); @@ -272,10 +278,77 @@ RecursivelyPlanSubqueriesAndCTEs(Query *query, RecursivePlanningContext *context RecursivelyPlanAllSubqueries((Node *) query->jointree->quals, context); } + if (ShouldRecursivelyPlanNonColocatedSubqueries(query, context)) + { + RecursivelyPlanNonColocatedSubqueries(query, context); + } + return NULL; } +/* + * ShouldRecursivelyPlanNonColocatedSubqueries returns true if the input query contains joins + * that are not on the distribution key. + * * + * Note that at the point that this function is called, we've already recursively planned all + * the leaf subqueries. Thus, we're actually checking whether the joins among the subqueries + * on the distribution key or not. + */ +static bool +ShouldRecursivelyPlanNonColocatedSubqueries(Query *subquery, + RecursivePlanningContext *context) +{ + /* if the input query already contains the equality, simply return */ + if (context->queryContainsDistributionKeyEquality) + { + return false; + } + + /* + * This check helps us in two ways: + * (i) We're not targeting queries that don't include subqueries at all, + * they should go through regular planning. + * (ii) Lower level subqueries are already recursively planned, so we should + * only bother non-colocated subquery joins, which only happens when + * there are subqueries. + */ + if (SubqueryEntryList(subquery) == NIL && SublinkList(subquery) == NIL) + { + return false; + } + + /* + * At this point, we might be recursively planning a a subquery which will be pulled + * by PostgreSQL standard_planner (i.e., tpch_7_nested). However, checking for those + * cases are pretty complicated and, seems not super useful thing to implement. + */ + + + /* direct joins with local tables are not supported by any of Citus planners */ + if (FindNodeCheckInRangeTableList(subquery->rtable, IsLocalTableRTE)) + { + return false; + } + + /* + * Finally, check whether this subquery contains distribution key equality or not. + */ + if (!SubqueryContainsDistributionKeyEquality(subquery, + context->plannerRestrictionContext)) + { + return true; + } + + return false; +} + + +static void +RecursivelyPlanNonColocatedSubqueries(Query *query, RecursivePlanningContext *context) +{ } + + /* * ShouldRecursivelyPlanAllSubqueriesInWhere returns true if the query has * a WHERE clause and a recurring FROM clause (does not contain a distributed diff --git a/src/include/distributed/multi_logical_planner.h b/src/include/distributed/multi_logical_planner.h index f5748a374..73ece0cda 100644 --- a/src/include/distributed/multi_logical_planner.h +++ b/src/include/distributed/multi_logical_planner.h @@ -187,6 +187,7 @@ extern bool SubqueryPushdown; extern MultiTreeRoot * MultiLogicalPlanCreate(Query *originalQuery, Query *queryTree, PlannerRestrictionContext * plannerRestrictionContext); +extern List * SublinkList(Query *originalQuery); extern bool SingleRelationRepartitionSubquery(Query *queryTree); extern DeferredErrorMessage * DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool