From 458299035bd2e492cebad12136c2fa868748e416 Mon Sep 17 00:00:00 2001 From: Colm Date: Thu, 30 Oct 2025 11:49:28 +0000 Subject: [PATCH] PG18: fix regress test failures in subquery_in_targetlist. The failing queries all have a GROUP BY, and the fix teaches the Citus recursive planner how to handle a PG18 GROUP range table in the outer query: - In recursive query planning, don't recurse into subquery expressions in a GROUP BY clause - Flatten references to a GROUP rte before creating the worker subquery in pushdown planning - If a PARAM node points to a GROUP rte then tunnel through to the underlying expression Fixes #8296. --- .../planner/insert_select_planner.c | 23 ++++--------- .../planner/multi_logical_planner.c | 13 ++++++- .../planner/query_pushdown_planning.c | 33 +++++++++++++++++- .../distributed/planner/recursive_planning.c | 13 +++---- .../relation_restriction_equivalence.c | 34 +++++++++++++++++++ .../distributed/query_pushdown_planning.h | 2 +- src/test/regress/citus_tests/run_test.py | 3 ++ 7 files changed, 92 insertions(+), 29 deletions(-) diff --git a/src/backend/distributed/planner/insert_select_planner.c b/src/backend/distributed/planner/insert_select_planner.c index 1cd4161b6..554ac631e 100644 --- a/src/backend/distributed/planner/insert_select_planner.c +++ b/src/backend/distributed/planner/insert_select_planner.c @@ -512,23 +512,12 @@ PrepareInsertSelectForCitusPlanner(Query *insertSelectQuery) bool isWrapped = false; -#if PG_VERSION_NUM >= PG_VERSION_18 - -/* - * PG18 is stricter about GroupRTE/GroupVar. For INSERT … SELECT with a GROUP BY, - * flatten the SELECT’s targetList and havingQual so Vars point to base RTEs and - * avoid Unrecognized range table id. - */ - if (selectRte->subquery->hasGroupRTE) - { - Query *selectQuery = selectRte->subquery; - selectQuery->targetList = (List *) - flatten_group_exprs(NULL, selectQuery, - (Node *) selectQuery->targetList); - selectQuery->havingQual = - flatten_group_exprs(NULL, selectQuery, selectQuery->havingQual); - } -#endif + /* + * PG18 is stricter about GroupRTE/GroupVar. For INSERT … SELECT with a GROUP BY, + * flatten the SELECT’s targetList and havingQual so Vars point to base RTEs and + * avoid Unrecognized range table id. + */ + FlattenGroupExprs(selectRte->subquery); if (selectRte->subquery->setOperations != NULL) { diff --git a/src/backend/distributed/planner/multi_logical_planner.c b/src/backend/distributed/planner/multi_logical_planner.c index e4141644f..8b50efc1e 100644 --- a/src/backend/distributed/planner/multi_logical_planner.c +++ b/src/backend/distributed/planner/multi_logical_planner.c @@ -297,8 +297,19 @@ TargetListOnPartitionColumn(Query *query, List *targetEntryList) bool FindNodeMatchingCheckFunctionInRangeTableList(List *rtable, CheckNodeFunc checker) { + int rtWalkFlags = QTW_EXAMINE_RTES_BEFORE; + +#if PG_VERSION_NUM >= PG_VERSION_18 + + /* + * PG18+: Do not descend into GROUP BY expressions subqueries, they + * have already been visited as recursive planning is depth-first. + */ + rtWalkFlags |= QTW_IGNORE_GROUPEXPRS; +#endif + return range_table_walker(rtable, FindNodeMatchingCheckFunction, checker, - QTW_EXAMINE_RTES_BEFORE); + rtWalkFlags); } diff --git a/src/backend/distributed/planner/query_pushdown_planning.c b/src/backend/distributed/planner/query_pushdown_planning.c index f7232e2bb..b94412f2b 100644 --- a/src/backend/distributed/planner/query_pushdown_planning.c +++ b/src/backend/distributed/planner/query_pushdown_planning.c @@ -333,7 +333,9 @@ WhereOrHavingClauseContainsSubquery(Query *query) bool TargetListContainsSubquery(List *targetList) { - return FindNodeMatchingCheckFunction((Node *) targetList, IsNodeSubquery); + bool hasSubquery = FindNodeMatchingCheckFunction((Node *) targetList, IsNodeSubquery); + + return hasSubquery; } @@ -1093,6 +1095,28 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi } +/* + * FlattenGroupExprs flattens the GROUP BY expressions in the query tree + * by replacing VAR nodes referencing the GROUP range table with the actual + * GROUP BY expression. This is used by Citus planning to ensure correctness + * when analysing and building the distributed plan. + */ +void +FlattenGroupExprs(Query *queryTree) +{ +#if PG_VERSION_NUM >= PG_VERSION_18 + if (queryTree->hasGroupRTE) + { + queryTree->targetList = (List *) + flatten_group_exprs(NULL, queryTree, + (Node *) queryTree->targetList); + queryTree->havingQual = + flatten_group_exprs(NULL, queryTree, queryTree->havingQual); + } +#endif +} + + /* * DeferErrorIfSubqueryRequiresMerge returns a deferred error if the subquery * requires a merge step on the coordinator (e.g. limit, group by non-distribution @@ -1953,6 +1977,13 @@ static MultiNode * SubqueryPushdownMultiNodeTree(Query *originalQuery) { Query *queryTree = copyObject(originalQuery); + + /* + * PG18+ need to flatten GROUP BY expressions to ensure correct processing + * later on, such as identification of partition columns in GROUP BY. + */ + FlattenGroupExprs(queryTree); + List *targetEntryList = queryTree->targetList; MultiCollect *subqueryCollectNode = CitusMakeNode(MultiCollect); diff --git a/src/backend/distributed/planner/recursive_planning.c b/src/backend/distributed/planner/recursive_planning.c index 07aa442b0..139b30231 100644 --- a/src/backend/distributed/planner/recursive_planning.c +++ b/src/backend/distributed/planner/recursive_planning.c @@ -261,7 +261,6 @@ GenerateSubplansForSubqueriesAndCTEs(uint64 planId, Query *originalQuery, */ context.allDistributionKeysInQueryAreEqual = AllDistributionKeysInQueryAreEqual(originalQuery, plannerRestrictionContext); - DeferredErrorMessage *error = RecursivelyPlanSubqueriesAndCTEs(originalQuery, &context); if (error != NULL) @@ -1123,14 +1122,10 @@ ExtractSublinkWalker(Node *node, List **sublinkList) static bool ShouldRecursivelyPlanSublinks(Query *query) { - if (FindNodeMatchingCheckFunctionInRangeTableList(query->rtable, - IsDistributedTableRTE)) - { - /* there is a distributed table in the FROM clause */ - return false; - } - - return true; + bool hasDistributedTable = (FindNodeMatchingCheckFunctionInRangeTableList( + query->rtable, + IsDistributedTableRTE)); + return !hasDistributedTable; } diff --git a/src/backend/distributed/planner/relation_restriction_equivalence.c b/src/backend/distributed/planner/relation_restriction_equivalence.c index c657c7c03..5a63503f0 100644 --- a/src/backend/distributed/planner/relation_restriction_equivalence.c +++ b/src/backend/distributed/planner/relation_restriction_equivalence.c @@ -971,6 +971,40 @@ GetVarFromAssignedParam(List *outerPlanParamsList, Param *plannerParam, } } +#if PG_VERSION_NUM >= PG_VERSION_18 + + /* + * In PG18+, the dereferenced PARAM node could be a GroupVar if the + * query has a GROUP BY. In that case, we need to make an extra + * hop to get the underlying Var from the grouping expressions. + */ + if (assignedVar != NULL) + { + Query *parse = (*rootContainingVar)->parse; + if (parse->hasGroupRTE) + { + RangeTblEntry *rte = rt_fetch(assignedVar->varno, parse->rtable); + if (rte->rtekind == RTE_GROUP) + { + Assert(assignedVar->varattno >= 1 && + assignedVar->varattno <= list_length(rte->groupexprs)); + Node *groupVar = list_nth(rte->groupexprs, assignedVar->varattno - 1); + if (IsA(groupVar, Var)) + { + assignedVar = (Var *) groupVar; + } + else + { + /* todo: handle PlaceHolderVar case if needed */ + ereport(DEBUG2, (errmsg( + "GroupVar maps to non-Var group expr; bailing out"))); + assignedVar = NULL; + } + } + } + } +#endif + return assignedVar; } diff --git a/src/include/distributed/query_pushdown_planning.h b/src/include/distributed/query_pushdown_planning.h index ba92a0462..287c8da62 100644 --- a/src/include/distributed/query_pushdown_planning.h +++ b/src/include/distributed/query_pushdown_planning.h @@ -49,6 +49,6 @@ extern DeferredErrorMessage * DeferErrorIfCannotPushdownSubquery(Query *subquery extern DeferredErrorMessage * DeferErrorIfUnsupportedUnionQuery(Query *queryTree); extern bool IsJsonTableRTE(RangeTblEntry *rte); extern bool IsOuterJoinExpr(Node *node); - +extern void FlattenGroupExprs(Query *query); #endif /* QUERY_PUSHDOWN_PLANNING_H */ diff --git a/src/test/regress/citus_tests/run_test.py b/src/test/regress/citus_tests/run_test.py index e5a567af8..03384ac28 100755 --- a/src/test/regress/citus_tests/run_test.py +++ b/src/test/regress/citus_tests/run_test.py @@ -265,6 +265,9 @@ DEPS = { "subquery_in_where": TestDeps( "minimal_schedule", ["multi_behavioral_analytics_create_table"] ), + "subquery_in_targetlist": TestDeps( + "minimal_schedule", ["multi_behavioral_analytics_create_table"] + ), }