Extend the logic to right joins

onur-leftjoin_push-improvements
eaydingol 2025-08-07 11:06:58 +03:00
parent 49d163fb63
commit e58d730e64
9 changed files with 105 additions and 26 deletions

View File

@ -310,7 +310,7 @@ DefineQualsForShardInterval(RelationShard *relationShard, int attnum, int rtinde
/* null test for the first shard */
NullTest *nullTest = makeNode(NullTest);
nullTest->nulltesttype = IS_NULL; /* Check for IS NULL */
nullTest->arg = (Expr *) partitionColumnVar; /* The variable to check */
nullTest->arg = (Expr *) outerTablePartitionColumnVar; /* The variable to check */
nullTest->argisrow = false;
shardIntervalBoundQuals = (Node *) make_orclause(list_make2(nullTest,
shardIntervalBoundQuals));

View File

@ -756,7 +756,8 @@ RecursivelyPlanRecurringTupleOuterJoinWalker(Node *node, Query *query,
/* <recurring> left join <distributed> */
if (leftNodeRecurs && !rightNodeRecurs)
{
if (chainedJoin || !CheckPushDownFeasibilityLeftJoin(joinExpr, query))
if (chainedJoin || !CheckPushDownFeasibilityOuterJoin(joinExpr,
query))
{
ereport(DEBUG1, (errmsg("recursively planning right side of "
"the left join since the outer side "
@ -786,11 +787,21 @@ RecursivelyPlanRecurringTupleOuterJoinWalker(Node *node, Query *query,
/* <distributed> right join <recurring> */
if (!leftNodeRecurs && rightNodeRecurs)
{
ereport(DEBUG1, (errmsg("recursively planning left side of "
"the right join since the outer side "
"is a recurring rel")));
RecursivelyPlanDistributedJoinNode(leftNode, query,
recursivePlanningContext);
if (chainedJoin || !CheckPushDownFeasibilityOuterJoin(joinExpr,
query))
{
ereport(DEBUG1, (errmsg("recursively planning left side of "
"the right join since the outer side "
"is a recurring rel")));
RecursivelyPlanDistributedJoinNode(leftNode, query,
recursivePlanningContext);
}
else
{
ereport(DEBUG3, (errmsg(
"a push down safe right join with recurring left side")));
rightNodeRecurs = false; /* right node will be pushed down */
}
}
/*
@ -2665,11 +2676,13 @@ hasPseudoconstantQuals(RelationRestrictionContext *relationRestrictionContext)
/*
* IsPushdownSafeForRTEInLeftJoin returns true if the given range table entry
* is safe for pushdown. Currently, we only allow reference tables.
* IsPushdownSafeForOuterRTEInOuterJoin returns true if the given range table entry
* is safe for pushdown when it is the outer relation of a outer join when the
* inner relation is not recurring.
* Currently, we only allow reference tables.
*/
bool
IsPushdownSafeForRTEInLeftJoin(RangeTblEntry *rte)
IsPushdownSafeForOuterRTEInOuterJoin(RangeTblEntry *rte)
{
if (IsCitusTable(rte->relid) && IsCitusTableType(rte->relid, REFERENCE_TABLE))
{
@ -2778,8 +2791,7 @@ CheckPushDownFeasibilityAndComputeIndexes(JoinExpr *joinExpr, Query *query,
return false;
}
/* TODO: generalize to right joins */
if (joinExpr->jointype != JOIN_LEFT)
if (joinExpr->jointype != JOIN_LEFT && joinExpr->jointype != JOIN_RIGHT)
{
return false;
}
@ -2799,10 +2811,18 @@ CheckPushDownFeasibilityAndComputeIndexes(JoinExpr *joinExpr, Query *query,
return false;
}
*outerRtIndex = (((RangeTblRef *) joinExpr->larg)->rtindex);
if (joinExpr->jointype == JOIN_LEFT)
{
*outerRtIndex = (((RangeTblRef *) joinExpr->larg)->rtindex);
}
else /* JOIN_RIGHT */
{
*outerRtIndex = (((RangeTblRef *) joinExpr->rarg)->rtindex);
}
*outerRte = rt_fetch(*outerRtIndex, query->rtable);
if (!IsPushdownSafeForRTEInLeftJoin(*outerRte))
if (!IsPushdownSafeForOuterRTEInOuterJoin(*outerRte))
{
return false;
}
@ -2902,7 +2922,7 @@ CheckPushDownFeasibilityAndComputeIndexes(JoinExpr *joinExpr, Query *query,
* See CheckPushDownFeasibilityAndComputeIndexes for more details.
*/
bool
CheckPushDownFeasibilityLeftJoin(JoinExpr *joinExpr, Query *query)
CheckPushDownFeasibilityOuterJoin(JoinExpr *joinExpr, Query *query)
{
int outerRtIndex;
RangeTblEntry *outerRte = NULL;

View File

@ -52,13 +52,13 @@ extern bool IsRecursivelyPlannableRelation(RangeTblEntry *rangeTableEntry);
extern bool IsRelationLocalTableOrMatView(Oid relationId);
extern bool ContainsReferencesToOuterQuery(Query *query);
extern void UpdateVarNosInNode(Node *node, Index newVarNo);
extern bool IsPushdownSafeForRTEInLeftJoin(RangeTblEntry *rte);
extern bool IsPushdownSafeForOuterRTEInOuterJoin(RangeTblEntry *rte);
extern bool CheckPushDownFeasibilityAndComputeIndexes(JoinExpr *joinExpr, Query *query,
int *outerRtIndex,
RangeTblEntry **outerRte,
RangeTblEntry **distRte,
int *attnum);
extern bool CheckPushDownFeasibilityLeftJoin(JoinExpr *joinExpr, Query *query);
extern bool CheckPushDownFeasibilityOuterJoin(JoinExpr *joinExpr, Query *query);
bool ResolveBaseVarFromSubquery(Var *var, Query *query, Var **baseVar,
RangeTblEntry **baseRte);
bool CheckPushDownConditionOnInnerVar(Var *innervar, RangeTblEntry *rte);

View File

@ -208,12 +208,11 @@ FROM
20 | 0
(1 row)
-- Right join is allowed as we recursively plan the distributed table (multi_outer_join_left_hash)
-- Right join is allowed (multi_outer_join_left_hash)
SELECT
min(r_custkey), max(r_custkey)
FROM
multi_outer_join_left_hash a RIGHT JOIN multi_outer_join_right_reference b ON (l_custkey = r_custkey);
LOG: join order: [ "multi_outer_join_left_hash" ]
min | max
---------------------------------------------------------------------
1 | 15
@ -323,12 +322,11 @@ FROM
25 | 1
(1 row)
-- Right join should be allowed in this case as we recursively plan the distributed table (multi_outer_join_left_hash
-- Right join should be allowed in this case (multi_outer_join_left_hash)
SELECT
min(r_custkey), max(r_custkey)
FROM
multi_outer_join_left_hash a RIGHT JOIN multi_outer_join_right_reference b ON (l_custkey = r_custkey);
LOG: join order: [ "multi_outer_join_left_hash" ]
min | max
---------------------------------------------------------------------
11 | 30

View File

@ -146,7 +146,7 @@ LIMIT 3;
3
(3 rows)
-- outer join could still recur
-- outer join pushed down
SELECT
DISTINCT user_id
FROM
@ -162,7 +162,13 @@ WHERE
)
ORDER BY user_id
LIMIT 3;
ERROR: correlated subqueries are not supported when the FROM clause contains a CTE or subquery
user_id
---------------------------------------------------------------------
1
2
3
(3 rows)
-- subqueries in WHERE with IN operator without equality
SELECT
users_table.user_id, count(*)

View File

@ -967,3 +967,49 @@ DEBUG: assigned task to node localhost:xxxxx
Filter: ((a IS NULL) OR ((btint4cmp('-2147483648'::integer, hashint4(a)) < 0) AND (btint4cmp(hashint4(a), '-1073741825'::integer) <= 0)))
(26 rows)
-- Basic cases with RIGHT JOIN
SET client_min_messages TO DEBUG3;
SELECT count(*) FROM d1 RIGHT JOIN r1 USING (a);
DEBUG: no shard pruning constraints on d1 found
DEBUG: shard count after pruning for d1: 4
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: a push down safe right join with recurring left side
DEBUG: no shard pruning constraints on d1 found
DEBUG: shard count after pruning for d1: 4
DEBUG: assigned task to node localhost:xxxxx
DEBUG: assigned task to node localhost:xxxxx
DEBUG: assigned task to node localhost:xxxxx
DEBUG: assigned task to node localhost:xxxxx
count
---------------------------------------------------------------------
21
(1 row)
SELECT count(*) FROM d1_local RIGHT JOIN r1_local USING (a);
count
---------------------------------------------------------------------
21
(1 row)
SELECT count(*) FROM (SELECT * FROM d1) RIGHT JOIN r1 USING (a);
DEBUG: no shard pruning constraints on d1 found
DEBUG: shard count after pruning for d1: 4
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: a push down safe right join with recurring left side
DEBUG: no shard pruning constraints on d1 found
DEBUG: shard count after pruning for d1: 4
DEBUG: assigned task to node localhost:xxxxx
DEBUG: assigned task to node localhost:xxxxx
DEBUG: assigned task to node localhost:xxxxx
DEBUG: assigned task to node localhost:xxxxx
count
---------------------------------------------------------------------
21
(1 row)
SELECT count(*) FROM (SELECT * FROM d1_local) RIGHT JOIN r1_local USING (a);
count
---------------------------------------------------------------------
21
(1 row)

View File

@ -159,7 +159,7 @@ FROM
multi_outer_join_left_hash a LEFT JOIN multi_outer_join_right_reference b
ON (l_custkey = r_custkey AND l_custkey = -1 /* nonexistant */);
-- Right join is allowed as we recursively plan the distributed table (multi_outer_join_left_hash)
-- Right join is allowed (multi_outer_join_left_hash)
SELECT
min(r_custkey), max(r_custkey)
FROM
@ -259,7 +259,7 @@ FROM
ON (l_custkey = r_custkey AND r_custkey = 21);
-- Right join should be allowed in this case as we recursively plan the distributed table (multi_outer_join_left_hash
-- Right join should be allowed in this case (multi_outer_join_left_hash)
SELECT
min(r_custkey), max(r_custkey)
FROM

View File

@ -126,7 +126,7 @@ WHERE
ORDER BY user_id
LIMIT 3;
-- outer join could still recur
-- outer join pushed down
SELECT
DISTINCT user_id
FROM

View File

@ -140,3 +140,12 @@ SELECT count(*) FROM (SELECT d1_1.a, r1_local.b FROM r1_local LEFT JOIN d1_local
(SELECT d2_local.a, d2_local.c, r1_local.b FROM r1_local LEFT JOIN d2_local ON r1_local.a = d2_local.a) AS t2_local ON t1_local.a = t2_local.a;
EXPLAIN (COSTS OFF) SELECT count(*) FROM (SELECT d1_1.a, r1.b FROM r1 LEFT JOIN d1 as d1_1 ON r1.a = d1_1.a) AS t1 LEFT JOIN
(SELECT d2.a, d2.c, r1.b FROM r1 LEFT JOIN d2 ON r1.a = d2.a) AS t2 ON t1.a = t2.a;
-- Basic cases with RIGHT JOIN
SET client_min_messages TO DEBUG3;
SELECT count(*) FROM d1 RIGHT JOIN r1 USING (a);
SELECT count(*) FROM d1_local RIGHT JOIN r1_local USING (a);
SELECT count(*) FROM (SELECT * FROM d1) RIGHT JOIN r1 USING (a);
SELECT count(*) FROM (SELECT * FROM d1_local) RIGHT JOIN r1_local USING (a);