From a79b4b31c9cd60d1b46882ec8fc7c0c97237682c Mon Sep 17 00:00:00 2001 From: eaydingol Date: Thu, 14 Aug 2025 15:12:42 +0300 Subject: [PATCH] Improve lateral check and add test cases --- .../distributed/planner/recursive_planning.c | 69 ++++++++++++++++++- .../expected/recurring_join_pushdown.out | 31 +++++++++ .../regress/sql/recurring_join_pushdown.sql | 10 +++ 3 files changed, 107 insertions(+), 3 deletions(-) diff --git a/src/backend/distributed/planner/recursive_planning.c b/src/backend/distributed/planner/recursive_planning.c index 1ed22be64..5fcb91eab 100644 --- a/src/backend/distributed/planner/recursive_planning.c +++ b/src/backend/distributed/planner/recursive_planning.c @@ -201,6 +201,7 @@ static Query * CreateOuterSubquery(RangeTblEntry *rangeTableEntry, List *outerSubqueryTargetList); static List * GenerateRequiredColNamesFromTargetList(List *targetList); static char * GetRelationNameAndAliasName(RangeTblEntry *rangeTablentry); +static bool JoinTreeContainsLateral(Node *node, List *rtable); #if PG_VERSION_NUM < PG_VERSION_17 static bool hasPseudoconstantQuals( RelationRestrictionContext *relationRestrictionContext); @@ -2784,6 +2785,66 @@ CheckPushDownConditionOnInnerVar(Var *innerVar, RangeTblEntry *rte) } +/* + * JoinTreeContainsLateral checks if the given node contains a lateral + * join. It returns true if it does, otherwise false. + * + * It recursively traverses the join tree and checks each RangeTblRef and JoinExpr + * for lateral joins. + */ +static bool +JoinTreeContainsLateral(Node *node, List *rtable) +{ + if (node == NULL) + { + return false; + } + + if (IsA(node, RangeTblRef)) + { + RangeTblEntry *rte = rt_fetch(((RangeTblRef *) node)->rtindex, rtable); + if (rte == NULL) + { + return false; + } + + if (rte->lateral) + { + return true; + } + + if(rte->rtekind == RTE_SUBQUERY) + { + if (rte->subquery) + { + return JoinTreeContainsLateral((Node *) rte->subquery->jointree, rte->subquery->rtable); + } + } + return false; + } + else if (IsA(node, JoinExpr)) + { + JoinExpr *join = (JoinExpr *) node; + return JoinTreeContainsLateral(join->larg, rtable) || + JoinTreeContainsLateral(join->rarg, rtable); + } + else if (IsA(node, FromExpr)) + { + FromExpr *fromExpr = (FromExpr *) node; + ListCell *lc = NULL; + foreach(lc, fromExpr->fromlist) + { + if (JoinTreeContainsLateral((Node *) lfirst(lc), rtable)) + { + return true; + } + } + } + return false; +} + + + /* * CheckPushDownFeasibilityAndComputeIndexes checks if the given join expression * is a left outer join and if it is feasible to push down the join. If feasible, @@ -2842,9 +2903,11 @@ CheckPushDownFeasibilityAndComputeIndexes(JoinExpr *joinExpr, Query *query, return false; } - RangeTblEntry *rRte = rt_fetch((((RangeTblRef *) joinExpr->rarg)->rtindex), - query->rtable); - if (rRte && rRte->lateral) + /* For now if we see any lateral join in the join tree, we return false. + * This check can be improved to support the cases where the lateral reference + * does not cause an error in the final planner checks. + */ + if (JoinTreeContainsLateral(joinExpr->rarg, query->rtable) || JoinTreeContainsLateral(joinExpr->larg, query->rtable)) { ereport(DEBUG5, (errmsg( "Lateral join is not supported for pushdown in this path."))); diff --git a/src/test/regress/expected/recurring_join_pushdown.out b/src/test/regress/expected/recurring_join_pushdown.out index e37acccc6..696861a17 100644 --- a/src/test/regress/expected/recurring_join_pushdown.out +++ b/src/test/regress/expected/recurring_join_pushdown.out @@ -941,4 +941,35 @@ EXPLAIN (COSTS OFF) SELECT count(*) FROM r1 LEFT JOIN (d1 INNER JOIN d2 on d1.a -> Function Scan on read_intermediate_result intermediate_result_1 (34 rows) +SET client_min_messages TO DEBUG3; +-- The following query is recursively computed due to the lateral join in the subquery. +-- Leaving the inner side of the query to query push down causes an error in deferred error +-- checks even if it is possible to push down the whole query. +SELECT DISTINCT sq.a FROM ( + SELECT d1.a FROM d1 JOIN LATERAL ( SELECT * FROM r1 WHERE r1.a = d1.a ) sq2 ON true ) AS sq RIGHT JOIN r1 USING (a) +ORDER BY sq.a +LIMIT 1; +DEBUG: no shard pruning constraints on d1 found +DEBUG: shard count after pruning for d1: 4 +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: recursively planning left side of the right join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: no shard pruning constraints on d1 found +DEBUG: shard count after pruning for d1: 4 +DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: no shard pruning constraints on d1 found +DEBUG: shard count after pruning for d1: 4 +DEBUG: assigned task to node localhost:xxxxx +DEBUG: assigned task to node localhost:xxxxx +DEBUG: assigned task to node localhost:xxxxx +DEBUG: assigned task to node localhost:xxxxx +DEBUG: generating subplan XXX_1 for subquery SELECT d1.a FROM (recurring_join_pushdown.d1 JOIN LATERAL (SELECT r1.a, r1.b FROM recurring_join_pushdown.r1 WHERE (r1.a OPERATOR(pg_catalog.=) d1.a)) sq2 ON (true)) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT DISTINCT sq.a FROM ((SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) sq RIGHT JOIN recurring_join_pushdown.r1 USING (a)) ORDER BY sq.a LIMIT 1 +DEBUG: Creating router plan + a +--------------------------------------------------------------------- + 1 +(1 row) + +SET client_min_messages TO ERROR; DROP SCHEMA recurring_join_pushdown CASCADE; diff --git a/src/test/regress/sql/recurring_join_pushdown.sql b/src/test/regress/sql/recurring_join_pushdown.sql index 20cb08899..8dec836f7 100644 --- a/src/test/regress/sql/recurring_join_pushdown.sql +++ b/src/test/regress/sql/recurring_join_pushdown.sql @@ -124,4 +124,14 @@ EXPLAIN (COSTS OFF) SELECT count(*) FROM r1_local LEFT JOIN d1 ON r1_local.a = d EXPLAIN (COSTS OFF) SELECT count(*) FROM (SELECT * FROM r1) sq LEFT JOIN d1 ON sq.a = d1.a; EXPLAIN (COSTS OFF) SELECT count(*) FROM r1 LEFT JOIN (d1 INNER JOIN d2 on d1.a = d2.a) on r1.a = d2.a; +SET client_min_messages TO DEBUG3; +-- The following query is recursively computed due to the lateral join in the subquery. +-- Leaving the inner side of the query to query push down causes an error in deferred error +-- checks even if it is possible to push down the whole query. +SELECT DISTINCT sq.a FROM ( + SELECT d1.a FROM d1 JOIN LATERAL ( SELECT * FROM r1 WHERE r1.a = d1.a ) sq2 ON true ) AS sq RIGHT JOIN r1 USING (a) +ORDER BY sq.a +LIMIT 1; + +SET client_min_messages TO ERROR; DROP SCHEMA recurring_join_pushdown CASCADE;