Extend the logic to right joins

onur-leftjoin_push-improvements
eaydingol 2025-08-07 11:06:58 +03:00
parent 49d163fb63
commit e58d730e64
9 changed files with 105 additions and 26 deletions

View File

@ -310,7 +310,7 @@ DefineQualsForShardInterval(RelationShard *relationShard, int attnum, int rtinde
/* null test for the first shard */ /* null test for the first shard */
NullTest *nullTest = makeNode(NullTest); NullTest *nullTest = makeNode(NullTest);
nullTest->nulltesttype = IS_NULL; /* Check for IS NULL */ nullTest->nulltesttype = IS_NULL; /* Check for IS NULL */
nullTest->arg = (Expr *) partitionColumnVar; /* The variable to check */ nullTest->arg = (Expr *) outerTablePartitionColumnVar; /* The variable to check */
nullTest->argisrow = false; nullTest->argisrow = false;
shardIntervalBoundQuals = (Node *) make_orclause(list_make2(nullTest, shardIntervalBoundQuals = (Node *) make_orclause(list_make2(nullTest,
shardIntervalBoundQuals)); shardIntervalBoundQuals));

View File

@ -756,7 +756,8 @@ RecursivelyPlanRecurringTupleOuterJoinWalker(Node *node, Query *query,
/* <recurring> left join <distributed> */ /* <recurring> left join <distributed> */
if (leftNodeRecurs && !rightNodeRecurs) if (leftNodeRecurs && !rightNodeRecurs)
{ {
if (chainedJoin || !CheckPushDownFeasibilityLeftJoin(joinExpr, query)) if (chainedJoin || !CheckPushDownFeasibilityOuterJoin(joinExpr,
query))
{ {
ereport(DEBUG1, (errmsg("recursively planning right side of " ereport(DEBUG1, (errmsg("recursively planning right side of "
"the left join since the outer side " "the left join since the outer side "
@ -786,11 +787,21 @@ RecursivelyPlanRecurringTupleOuterJoinWalker(Node *node, Query *query,
/* <distributed> right join <recurring> */ /* <distributed> right join <recurring> */
if (!leftNodeRecurs && rightNodeRecurs) if (!leftNodeRecurs && rightNodeRecurs)
{ {
ereport(DEBUG1, (errmsg("recursively planning left side of " if (chainedJoin || !CheckPushDownFeasibilityOuterJoin(joinExpr,
"the right join since the outer side " query))
"is a recurring rel"))); {
RecursivelyPlanDistributedJoinNode(leftNode, query, ereport(DEBUG1, (errmsg("recursively planning left side of "
recursivePlanningContext); "the right join since the outer side "
"is a recurring rel")));
RecursivelyPlanDistributedJoinNode(leftNode, query,
recursivePlanningContext);
}
else
{
ereport(DEBUG3, (errmsg(
"a push down safe right join with recurring left side")));
rightNodeRecurs = false; /* right node will be pushed down */
}
} }
/* /*
@ -2665,11 +2676,13 @@ hasPseudoconstantQuals(RelationRestrictionContext *relationRestrictionContext)
/* /*
* IsPushdownSafeForRTEInLeftJoin returns true if the given range table entry * IsPushdownSafeForOuterRTEInOuterJoin returns true if the given range table entry
* is safe for pushdown. Currently, we only allow reference tables. * is safe for pushdown when it is the outer relation of a outer join when the
* inner relation is not recurring.
* Currently, we only allow reference tables.
*/ */
bool bool
IsPushdownSafeForRTEInLeftJoin(RangeTblEntry *rte) IsPushdownSafeForOuterRTEInOuterJoin(RangeTblEntry *rte)
{ {
if (IsCitusTable(rte->relid) && IsCitusTableType(rte->relid, REFERENCE_TABLE)) if (IsCitusTable(rte->relid) && IsCitusTableType(rte->relid, REFERENCE_TABLE))
{ {
@ -2778,8 +2791,7 @@ CheckPushDownFeasibilityAndComputeIndexes(JoinExpr *joinExpr, Query *query,
return false; return false;
} }
/* TODO: generalize to right joins */ if (joinExpr->jointype != JOIN_LEFT && joinExpr->jointype != JOIN_RIGHT)
if (joinExpr->jointype != JOIN_LEFT)
{ {
return false; return false;
} }
@ -2799,10 +2811,18 @@ CheckPushDownFeasibilityAndComputeIndexes(JoinExpr *joinExpr, Query *query,
return false; return false;
} }
*outerRtIndex = (((RangeTblRef *) joinExpr->larg)->rtindex); if (joinExpr->jointype == JOIN_LEFT)
{
*outerRtIndex = (((RangeTblRef *) joinExpr->larg)->rtindex);
}
else /* JOIN_RIGHT */
{
*outerRtIndex = (((RangeTblRef *) joinExpr->rarg)->rtindex);
}
*outerRte = rt_fetch(*outerRtIndex, query->rtable); *outerRte = rt_fetch(*outerRtIndex, query->rtable);
if (!IsPushdownSafeForRTEInLeftJoin(*outerRte)) if (!IsPushdownSafeForOuterRTEInOuterJoin(*outerRte))
{ {
return false; return false;
} }
@ -2902,7 +2922,7 @@ CheckPushDownFeasibilityAndComputeIndexes(JoinExpr *joinExpr, Query *query,
* See CheckPushDownFeasibilityAndComputeIndexes for more details. * See CheckPushDownFeasibilityAndComputeIndexes for more details.
*/ */
bool bool
CheckPushDownFeasibilityLeftJoin(JoinExpr *joinExpr, Query *query) CheckPushDownFeasibilityOuterJoin(JoinExpr *joinExpr, Query *query)
{ {
int outerRtIndex; int outerRtIndex;
RangeTblEntry *outerRte = NULL; RangeTblEntry *outerRte = NULL;

View File

@ -52,13 +52,13 @@ extern bool IsRecursivelyPlannableRelation(RangeTblEntry *rangeTableEntry);
extern bool IsRelationLocalTableOrMatView(Oid relationId); extern bool IsRelationLocalTableOrMatView(Oid relationId);
extern bool ContainsReferencesToOuterQuery(Query *query); extern bool ContainsReferencesToOuterQuery(Query *query);
extern void UpdateVarNosInNode(Node *node, Index newVarNo); extern void UpdateVarNosInNode(Node *node, Index newVarNo);
extern bool IsPushdownSafeForRTEInLeftJoin(RangeTblEntry *rte); extern bool IsPushdownSafeForOuterRTEInOuterJoin(RangeTblEntry *rte);
extern bool CheckPushDownFeasibilityAndComputeIndexes(JoinExpr *joinExpr, Query *query, extern bool CheckPushDownFeasibilityAndComputeIndexes(JoinExpr *joinExpr, Query *query,
int *outerRtIndex, int *outerRtIndex,
RangeTblEntry **outerRte, RangeTblEntry **outerRte,
RangeTblEntry **distRte, RangeTblEntry **distRte,
int *attnum); int *attnum);
extern bool CheckPushDownFeasibilityLeftJoin(JoinExpr *joinExpr, Query *query); extern bool CheckPushDownFeasibilityOuterJoin(JoinExpr *joinExpr, Query *query);
bool ResolveBaseVarFromSubquery(Var *var, Query *query, Var **baseVar, bool ResolveBaseVarFromSubquery(Var *var, Query *query, Var **baseVar,
RangeTblEntry **baseRte); RangeTblEntry **baseRte);
bool CheckPushDownConditionOnInnerVar(Var *innervar, RangeTblEntry *rte); bool CheckPushDownConditionOnInnerVar(Var *innervar, RangeTblEntry *rte);

View File

@ -208,12 +208,11 @@ FROM
20 | 0 20 | 0
(1 row) (1 row)
-- Right join is allowed as we recursively plan the distributed table (multi_outer_join_left_hash) -- Right join is allowed (multi_outer_join_left_hash)
SELECT SELECT
min(r_custkey), max(r_custkey) min(r_custkey), max(r_custkey)
FROM FROM
multi_outer_join_left_hash a RIGHT JOIN multi_outer_join_right_reference b ON (l_custkey = r_custkey); multi_outer_join_left_hash a RIGHT JOIN multi_outer_join_right_reference b ON (l_custkey = r_custkey);
LOG: join order: [ "multi_outer_join_left_hash" ]
min | max min | max
--------------------------------------------------------------------- ---------------------------------------------------------------------
1 | 15 1 | 15
@ -323,12 +322,11 @@ FROM
25 | 1 25 | 1
(1 row) (1 row)
-- Right join should be allowed in this case as we recursively plan the distributed table (multi_outer_join_left_hash -- Right join should be allowed in this case (multi_outer_join_left_hash)
SELECT SELECT
min(r_custkey), max(r_custkey) min(r_custkey), max(r_custkey)
FROM FROM
multi_outer_join_left_hash a RIGHT JOIN multi_outer_join_right_reference b ON (l_custkey = r_custkey); multi_outer_join_left_hash a RIGHT JOIN multi_outer_join_right_reference b ON (l_custkey = r_custkey);
LOG: join order: [ "multi_outer_join_left_hash" ]
min | max min | max
--------------------------------------------------------------------- ---------------------------------------------------------------------
11 | 30 11 | 30

View File

@ -146,7 +146,7 @@ LIMIT 3;
3 3
(3 rows) (3 rows)
-- outer join could still recur -- outer join pushed down
SELECT SELECT
DISTINCT user_id DISTINCT user_id
FROM FROM
@ -162,7 +162,13 @@ WHERE
) )
ORDER BY user_id ORDER BY user_id
LIMIT 3; LIMIT 3;
ERROR: correlated subqueries are not supported when the FROM clause contains a CTE or subquery user_id
---------------------------------------------------------------------
1
2
3
(3 rows)
-- subqueries in WHERE with IN operator without equality -- subqueries in WHERE with IN operator without equality
SELECT SELECT
users_table.user_id, count(*) users_table.user_id, count(*)

View File

@ -967,3 +967,49 @@ DEBUG: assigned task to node localhost:xxxxx
Filter: ((a IS NULL) OR ((btint4cmp('-2147483648'::integer, hashint4(a)) < 0) AND (btint4cmp(hashint4(a), '-1073741825'::integer) <= 0))) Filter: ((a IS NULL) OR ((btint4cmp('-2147483648'::integer, hashint4(a)) < 0) AND (btint4cmp(hashint4(a), '-1073741825'::integer) <= 0)))
(26 rows) (26 rows)
-- Basic cases with RIGHT JOIN
SET client_min_messages TO DEBUG3;
SELECT count(*) FROM d1 RIGHT JOIN r1 USING (a);
DEBUG: no shard pruning constraints on d1 found
DEBUG: shard count after pruning for d1: 4
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: a push down safe right join with recurring left side
DEBUG: no shard pruning constraints on d1 found
DEBUG: shard count after pruning for d1: 4
DEBUG: assigned task to node localhost:xxxxx
DEBUG: assigned task to node localhost:xxxxx
DEBUG: assigned task to node localhost:xxxxx
DEBUG: assigned task to node localhost:xxxxx
count
---------------------------------------------------------------------
21
(1 row)
SELECT count(*) FROM d1_local RIGHT JOIN r1_local USING (a);
count
---------------------------------------------------------------------
21
(1 row)
SELECT count(*) FROM (SELECT * FROM d1) RIGHT JOIN r1 USING (a);
DEBUG: no shard pruning constraints on d1 found
DEBUG: shard count after pruning for d1: 4
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: a push down safe right join with recurring left side
DEBUG: no shard pruning constraints on d1 found
DEBUG: shard count after pruning for d1: 4
DEBUG: assigned task to node localhost:xxxxx
DEBUG: assigned task to node localhost:xxxxx
DEBUG: assigned task to node localhost:xxxxx
DEBUG: assigned task to node localhost:xxxxx
count
---------------------------------------------------------------------
21
(1 row)
SELECT count(*) FROM (SELECT * FROM d1_local) RIGHT JOIN r1_local USING (a);
count
---------------------------------------------------------------------
21
(1 row)

View File

@ -159,7 +159,7 @@ FROM
multi_outer_join_left_hash a LEFT JOIN multi_outer_join_right_reference b multi_outer_join_left_hash a LEFT JOIN multi_outer_join_right_reference b
ON (l_custkey = r_custkey AND l_custkey = -1 /* nonexistant */); ON (l_custkey = r_custkey AND l_custkey = -1 /* nonexistant */);
-- Right join is allowed as we recursively plan the distributed table (multi_outer_join_left_hash) -- Right join is allowed (multi_outer_join_left_hash)
SELECT SELECT
min(r_custkey), max(r_custkey) min(r_custkey), max(r_custkey)
FROM FROM
@ -259,7 +259,7 @@ FROM
ON (l_custkey = r_custkey AND r_custkey = 21); ON (l_custkey = r_custkey AND r_custkey = 21);
-- Right join should be allowed in this case as we recursively plan the distributed table (multi_outer_join_left_hash -- Right join should be allowed in this case (multi_outer_join_left_hash)
SELECT SELECT
min(r_custkey), max(r_custkey) min(r_custkey), max(r_custkey)
FROM FROM

View File

@ -126,7 +126,7 @@ WHERE
ORDER BY user_id ORDER BY user_id
LIMIT 3; LIMIT 3;
-- outer join could still recur -- outer join pushed down
SELECT SELECT
DISTINCT user_id DISTINCT user_id
FROM FROM

View File

@ -140,3 +140,12 @@ SELECT count(*) FROM (SELECT d1_1.a, r1_local.b FROM r1_local LEFT JOIN d1_local
(SELECT d2_local.a, d2_local.c, r1_local.b FROM r1_local LEFT JOIN d2_local ON r1_local.a = d2_local.a) AS t2_local ON t1_local.a = t2_local.a; (SELECT d2_local.a, d2_local.c, r1_local.b FROM r1_local LEFT JOIN d2_local ON r1_local.a = d2_local.a) AS t2_local ON t1_local.a = t2_local.a;
EXPLAIN (COSTS OFF) SELECT count(*) FROM (SELECT d1_1.a, r1.b FROM r1 LEFT JOIN d1 as d1_1 ON r1.a = d1_1.a) AS t1 LEFT JOIN EXPLAIN (COSTS OFF) SELECT count(*) FROM (SELECT d1_1.a, r1.b FROM r1 LEFT JOIN d1 as d1_1 ON r1.a = d1_1.a) AS t1 LEFT JOIN
(SELECT d2.a, d2.c, r1.b FROM r1 LEFT JOIN d2 ON r1.a = d2.a) AS t2 ON t1.a = t2.a; (SELECT d2.a, d2.c, r1.b FROM r1 LEFT JOIN d2 ON r1.a = d2.a) AS t2 ON t1.a = t2.a;
-- Basic cases with RIGHT JOIN
SET client_min_messages TO DEBUG3;
SELECT count(*) FROM d1 RIGHT JOIN r1 USING (a);
SELECT count(*) FROM d1_local RIGHT JOIN r1_local USING (a);
SELECT count(*) FROM (SELECT * FROM d1) RIGHT JOIN r1 USING (a);
SELECT count(*) FROM (SELECT * FROM d1_local) RIGHT JOIN r1_local USING (a);