From e58d730e64c850b00c1133bffff2348eb02d45f1 Mon Sep 17 00:00:00 2001 From: eaydingol Date: Thu, 7 Aug 2025 11:06:58 +0300 Subject: [PATCH] Extend the logic to right joins --- .../distributed/planner/deparse_shard_query.c | 2 +- .../distributed/planner/recursive_planning.c | 48 +++++++++++++------ src/include/distributed/recursive_planning.h | 4 +- .../expected/multi_outer_join_reference.out | 6 +-- ...lti_subquery_in_where_reference_clause.out | 10 +++- .../expected/recurring_join_pushdown.out | 46 ++++++++++++++++++ .../sql/multi_outer_join_reference.sql | 4 +- ...lti_subquery_in_where_reference_clause.sql | 2 +- .../regress/sql/recurring_join_pushdown.sql | 9 ++++ 9 files changed, 105 insertions(+), 26 deletions(-) diff --git a/src/backend/distributed/planner/deparse_shard_query.c b/src/backend/distributed/planner/deparse_shard_query.c index 7cb194738..f726fab65 100644 --- a/src/backend/distributed/planner/deparse_shard_query.c +++ b/src/backend/distributed/planner/deparse_shard_query.c @@ -310,7 +310,7 @@ DefineQualsForShardInterval(RelationShard *relationShard, int attnum, int rtinde /* null test for the first shard */ NullTest *nullTest = makeNode(NullTest); nullTest->nulltesttype = IS_NULL; /* Check for IS NULL */ - nullTest->arg = (Expr *) partitionColumnVar; /* The variable to check */ + nullTest->arg = (Expr *) outerTablePartitionColumnVar; /* The variable to check */ nullTest->argisrow = false; shardIntervalBoundQuals = (Node *) make_orclause(list_make2(nullTest, shardIntervalBoundQuals)); diff --git a/src/backend/distributed/planner/recursive_planning.c b/src/backend/distributed/planner/recursive_planning.c index eb2aaf732..a6737353c 100644 --- a/src/backend/distributed/planner/recursive_planning.c +++ b/src/backend/distributed/planner/recursive_planning.c @@ -756,7 +756,8 @@ RecursivelyPlanRecurringTupleOuterJoinWalker(Node *node, Query *query, /* left join */ if (leftNodeRecurs && !rightNodeRecurs) { - if (chainedJoin || !CheckPushDownFeasibilityLeftJoin(joinExpr, query)) + if (chainedJoin || !CheckPushDownFeasibilityOuterJoin(joinExpr, + query)) { ereport(DEBUG1, (errmsg("recursively planning right side of " "the left join since the outer side " @@ -786,11 +787,21 @@ RecursivelyPlanRecurringTupleOuterJoinWalker(Node *node, Query *query, /* right join */ if (!leftNodeRecurs && rightNodeRecurs) { - ereport(DEBUG1, (errmsg("recursively planning left side of " - "the right join since the outer side " - "is a recurring rel"))); - RecursivelyPlanDistributedJoinNode(leftNode, query, - recursivePlanningContext); + if (chainedJoin || !CheckPushDownFeasibilityOuterJoin(joinExpr, + query)) + { + ereport(DEBUG1, (errmsg("recursively planning left side of " + "the right join since the outer side " + "is a recurring rel"))); + RecursivelyPlanDistributedJoinNode(leftNode, query, + recursivePlanningContext); + } + else + { + ereport(DEBUG3, (errmsg( + "a push down safe right join with recurring left side"))); + rightNodeRecurs = false; /* right node will be pushed down */ + } } /* @@ -2665,11 +2676,13 @@ hasPseudoconstantQuals(RelationRestrictionContext *relationRestrictionContext) /* - * IsPushdownSafeForRTEInLeftJoin returns true if the given range table entry - * is safe for pushdown. Currently, we only allow reference tables. + * IsPushdownSafeForOuterRTEInOuterJoin returns true if the given range table entry + * is safe for pushdown when it is the outer relation of a outer join when the + * inner relation is not recurring. + * Currently, we only allow reference tables. */ bool -IsPushdownSafeForRTEInLeftJoin(RangeTblEntry *rte) +IsPushdownSafeForOuterRTEInOuterJoin(RangeTblEntry *rte) { if (IsCitusTable(rte->relid) && IsCitusTableType(rte->relid, REFERENCE_TABLE)) { @@ -2778,8 +2791,7 @@ CheckPushDownFeasibilityAndComputeIndexes(JoinExpr *joinExpr, Query *query, return false; } - /* TODO: generalize to right joins */ - if (joinExpr->jointype != JOIN_LEFT) + if (joinExpr->jointype != JOIN_LEFT && joinExpr->jointype != JOIN_RIGHT) { return false; } @@ -2799,10 +2811,18 @@ CheckPushDownFeasibilityAndComputeIndexes(JoinExpr *joinExpr, Query *query, return false; } - *outerRtIndex = (((RangeTblRef *) joinExpr->larg)->rtindex); + if (joinExpr->jointype == JOIN_LEFT) + { + *outerRtIndex = (((RangeTblRef *) joinExpr->larg)->rtindex); + } + else /* JOIN_RIGHT */ + { + *outerRtIndex = (((RangeTblRef *) joinExpr->rarg)->rtindex); + } + *outerRte = rt_fetch(*outerRtIndex, query->rtable); - if (!IsPushdownSafeForRTEInLeftJoin(*outerRte)) + if (!IsPushdownSafeForOuterRTEInOuterJoin(*outerRte)) { return false; } @@ -2902,7 +2922,7 @@ CheckPushDownFeasibilityAndComputeIndexes(JoinExpr *joinExpr, Query *query, * See CheckPushDownFeasibilityAndComputeIndexes for more details. */ bool -CheckPushDownFeasibilityLeftJoin(JoinExpr *joinExpr, Query *query) +CheckPushDownFeasibilityOuterJoin(JoinExpr *joinExpr, Query *query) { int outerRtIndex; RangeTblEntry *outerRte = NULL; diff --git a/src/include/distributed/recursive_planning.h b/src/include/distributed/recursive_planning.h index 7bdfef18a..feb6dadd2 100644 --- a/src/include/distributed/recursive_planning.h +++ b/src/include/distributed/recursive_planning.h @@ -52,13 +52,13 @@ extern bool IsRecursivelyPlannableRelation(RangeTblEntry *rangeTableEntry); extern bool IsRelationLocalTableOrMatView(Oid relationId); extern bool ContainsReferencesToOuterQuery(Query *query); extern void UpdateVarNosInNode(Node *node, Index newVarNo); -extern bool IsPushdownSafeForRTEInLeftJoin(RangeTblEntry *rte); +extern bool IsPushdownSafeForOuterRTEInOuterJoin(RangeTblEntry *rte); extern bool CheckPushDownFeasibilityAndComputeIndexes(JoinExpr *joinExpr, Query *query, int *outerRtIndex, RangeTblEntry **outerRte, RangeTblEntry **distRte, int *attnum); -extern bool CheckPushDownFeasibilityLeftJoin(JoinExpr *joinExpr, Query *query); +extern bool CheckPushDownFeasibilityOuterJoin(JoinExpr *joinExpr, Query *query); bool ResolveBaseVarFromSubquery(Var *var, Query *query, Var **baseVar, RangeTblEntry **baseRte); bool CheckPushDownConditionOnInnerVar(Var *innervar, RangeTblEntry *rte); diff --git a/src/test/regress/expected/multi_outer_join_reference.out b/src/test/regress/expected/multi_outer_join_reference.out index aca91bda7..0a474f9d9 100644 --- a/src/test/regress/expected/multi_outer_join_reference.out +++ b/src/test/regress/expected/multi_outer_join_reference.out @@ -208,12 +208,11 @@ FROM 20 | 0 (1 row) --- Right join is allowed as we recursively plan the distributed table (multi_outer_join_left_hash) +-- Right join is allowed (multi_outer_join_left_hash) SELECT min(r_custkey), max(r_custkey) FROM multi_outer_join_left_hash a RIGHT JOIN multi_outer_join_right_reference b ON (l_custkey = r_custkey); -LOG: join order: [ "multi_outer_join_left_hash" ] min | max --------------------------------------------------------------------- 1 | 15 @@ -323,12 +322,11 @@ FROM 25 | 1 (1 row) --- Right join should be allowed in this case as we recursively plan the distributed table (multi_outer_join_left_hash +-- Right join should be allowed in this case (multi_outer_join_left_hash) SELECT min(r_custkey), max(r_custkey) FROM multi_outer_join_left_hash a RIGHT JOIN multi_outer_join_right_reference b ON (l_custkey = r_custkey); -LOG: join order: [ "multi_outer_join_left_hash" ] min | max --------------------------------------------------------------------- 11 | 30 diff --git a/src/test/regress/expected/multi_subquery_in_where_reference_clause.out b/src/test/regress/expected/multi_subquery_in_where_reference_clause.out index 4f9f215bc..d2f9f2986 100644 --- a/src/test/regress/expected/multi_subquery_in_where_reference_clause.out +++ b/src/test/regress/expected/multi_subquery_in_where_reference_clause.out @@ -146,7 +146,7 @@ LIMIT 3; 3 (3 rows) --- outer join could still recur +-- outer join pushed down SELECT DISTINCT user_id FROM @@ -162,7 +162,13 @@ WHERE ) ORDER BY user_id LIMIT 3; -ERROR: correlated subqueries are not supported when the FROM clause contains a CTE or subquery + user_id +--------------------------------------------------------------------- + 1 + 2 + 3 +(3 rows) + -- subqueries in WHERE with IN operator without equality SELECT users_table.user_id, count(*) diff --git a/src/test/regress/expected/recurring_join_pushdown.out b/src/test/regress/expected/recurring_join_pushdown.out index b7ded4253..efb4c1d71 100644 --- a/src/test/regress/expected/recurring_join_pushdown.out +++ b/src/test/regress/expected/recurring_join_pushdown.out @@ -967,3 +967,49 @@ DEBUG: assigned task to node localhost:xxxxx Filter: ((a IS NULL) OR ((btint4cmp('-2147483648'::integer, hashint4(a)) < 0) AND (btint4cmp(hashint4(a), '-1073741825'::integer) <= 0))) (26 rows) + -- Basic cases with RIGHT JOIN +SET client_min_messages TO DEBUG3; +SELECT count(*) FROM d1 RIGHT JOIN r1 USING (a); +DEBUG: no shard pruning constraints on d1 found +DEBUG: shard count after pruning for d1: 4 +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: a push down safe right join with recurring left side +DEBUG: no shard pruning constraints on d1 found +DEBUG: shard count after pruning for d1: 4 +DEBUG: assigned task to node localhost:xxxxx +DEBUG: assigned task to node localhost:xxxxx +DEBUG: assigned task to node localhost:xxxxx +DEBUG: assigned task to node localhost:xxxxx + count +--------------------------------------------------------------------- + 21 +(1 row) + +SELECT count(*) FROM d1_local RIGHT JOIN r1_local USING (a); + count +--------------------------------------------------------------------- + 21 +(1 row) + +SELECT count(*) FROM (SELECT * FROM d1) RIGHT JOIN r1 USING (a); +DEBUG: no shard pruning constraints on d1 found +DEBUG: shard count after pruning for d1: 4 +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: a push down safe right join with recurring left side +DEBUG: no shard pruning constraints on d1 found +DEBUG: shard count after pruning for d1: 4 +DEBUG: assigned task to node localhost:xxxxx +DEBUG: assigned task to node localhost:xxxxx +DEBUG: assigned task to node localhost:xxxxx +DEBUG: assigned task to node localhost:xxxxx + count +--------------------------------------------------------------------- + 21 +(1 row) + +SELECT count(*) FROM (SELECT * FROM d1_local) RIGHT JOIN r1_local USING (a); + count +--------------------------------------------------------------------- + 21 +(1 row) + diff --git a/src/test/regress/sql/multi_outer_join_reference.sql b/src/test/regress/sql/multi_outer_join_reference.sql index 04a9c23e2..4cede154f 100644 --- a/src/test/regress/sql/multi_outer_join_reference.sql +++ b/src/test/regress/sql/multi_outer_join_reference.sql @@ -159,7 +159,7 @@ FROM multi_outer_join_left_hash a LEFT JOIN multi_outer_join_right_reference b ON (l_custkey = r_custkey AND l_custkey = -1 /* nonexistant */); --- Right join is allowed as we recursively plan the distributed table (multi_outer_join_left_hash) +-- Right join is allowed (multi_outer_join_left_hash) SELECT min(r_custkey), max(r_custkey) FROM @@ -259,7 +259,7 @@ FROM ON (l_custkey = r_custkey AND r_custkey = 21); --- Right join should be allowed in this case as we recursively plan the distributed table (multi_outer_join_left_hash +-- Right join should be allowed in this case (multi_outer_join_left_hash) SELECT min(r_custkey), max(r_custkey) FROM diff --git a/src/test/regress/sql/multi_subquery_in_where_reference_clause.sql b/src/test/regress/sql/multi_subquery_in_where_reference_clause.sql index c99da5ee6..ab9f4a0cc 100644 --- a/src/test/regress/sql/multi_subquery_in_where_reference_clause.sql +++ b/src/test/regress/sql/multi_subquery_in_where_reference_clause.sql @@ -126,7 +126,7 @@ WHERE ORDER BY user_id LIMIT 3; --- outer join could still recur +-- outer join pushed down SELECT DISTINCT user_id FROM diff --git a/src/test/regress/sql/recurring_join_pushdown.sql b/src/test/regress/sql/recurring_join_pushdown.sql index 874f6e532..42036cd40 100644 --- a/src/test/regress/sql/recurring_join_pushdown.sql +++ b/src/test/regress/sql/recurring_join_pushdown.sql @@ -140,3 +140,12 @@ SELECT count(*) FROM (SELECT d1_1.a, r1_local.b FROM r1_local LEFT JOIN d1_local (SELECT d2_local.a, d2_local.c, r1_local.b FROM r1_local LEFT JOIN d2_local ON r1_local.a = d2_local.a) AS t2_local ON t1_local.a = t2_local.a; EXPLAIN (COSTS OFF) SELECT count(*) FROM (SELECT d1_1.a, r1.b FROM r1 LEFT JOIN d1 as d1_1 ON r1.a = d1_1.a) AS t1 LEFT JOIN (SELECT d2.a, d2.c, r1.b FROM r1 LEFT JOIN d2 ON r1.a = d2.a) AS t2 ON t1.a = t2.a; + + + -- Basic cases with RIGHT JOIN +SET client_min_messages TO DEBUG3; +SELECT count(*) FROM d1 RIGHT JOIN r1 USING (a); +SELECT count(*) FROM d1_local RIGHT JOIN r1_local USING (a); + +SELECT count(*) FROM (SELECT * FROM d1) RIGHT JOIN r1 USING (a); +SELECT count(*) FROM (SELECT * FROM d1_local) RIGHT JOIN r1_local USING (a);