PG18: fix regress test failures in subquery_in_targetlist.

The failing queries all have a GROUP BY, and the fix teaches the Citus recursive planner how to handle a PG18 GROUP range table in the outer query:
- In recursive query planning, don't recurse into subquery expressions in a GROUP BY clause
- Flatten references to a GROUP rte before creating the worker subquery in pushdown planning
- If a PARAM node points to a GROUP rte then tunnel through to the underlying expression
    
Fixes #8296.
pull/8301/head^2
Colm 2025-10-30 11:49:28 +00:00 committed by GitHub
parent 188c182be4
commit 458299035b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 92 additions and 29 deletions

View File

@ -512,23 +512,12 @@ PrepareInsertSelectForCitusPlanner(Query *insertSelectQuery)
bool isWrapped = false;
#if PG_VERSION_NUM >= PG_VERSION_18
/*
* PG18 is stricter about GroupRTE/GroupVar. For INSERT SELECT with a GROUP BY,
* flatten the SELECTs targetList and havingQual so Vars point to base RTEs and
* avoid Unrecognized range table id.
*/
if (selectRte->subquery->hasGroupRTE)
{
Query *selectQuery = selectRte->subquery;
selectQuery->targetList = (List *)
flatten_group_exprs(NULL, selectQuery,
(Node *) selectQuery->targetList);
selectQuery->havingQual =
flatten_group_exprs(NULL, selectQuery, selectQuery->havingQual);
}
#endif
FlattenGroupExprs(selectRte->subquery);
if (selectRte->subquery->setOperations != NULL)
{

View File

@ -297,8 +297,19 @@ TargetListOnPartitionColumn(Query *query, List *targetEntryList)
bool
FindNodeMatchingCheckFunctionInRangeTableList(List *rtable, CheckNodeFunc checker)
{
int rtWalkFlags = QTW_EXAMINE_RTES_BEFORE;
#if PG_VERSION_NUM >= PG_VERSION_18
/*
* PG18+: Do not descend into GROUP BY expressions subqueries, they
* have already been visited as recursive planning is depth-first.
*/
rtWalkFlags |= QTW_IGNORE_GROUPEXPRS;
#endif
return range_table_walker(rtable, FindNodeMatchingCheckFunction, checker,
QTW_EXAMINE_RTES_BEFORE);
rtWalkFlags);
}

View File

@ -333,7 +333,9 @@ WhereOrHavingClauseContainsSubquery(Query *query)
bool
TargetListContainsSubquery(List *targetList)
{
return FindNodeMatchingCheckFunction((Node *) targetList, IsNodeSubquery);
bool hasSubquery = FindNodeMatchingCheckFunction((Node *) targetList, IsNodeSubquery);
return hasSubquery;
}
@ -1093,6 +1095,28 @@ DeferErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerMostQueryHasLi
}
/*
* FlattenGroupExprs flattens the GROUP BY expressions in the query tree
* by replacing VAR nodes referencing the GROUP range table with the actual
* GROUP BY expression. This is used by Citus planning to ensure correctness
* when analysing and building the distributed plan.
*/
void
FlattenGroupExprs(Query *queryTree)
{
#if PG_VERSION_NUM >= PG_VERSION_18
if (queryTree->hasGroupRTE)
{
queryTree->targetList = (List *)
flatten_group_exprs(NULL, queryTree,
(Node *) queryTree->targetList);
queryTree->havingQual =
flatten_group_exprs(NULL, queryTree, queryTree->havingQual);
}
#endif
}
/*
* DeferErrorIfSubqueryRequiresMerge returns a deferred error if the subquery
* requires a merge step on the coordinator (e.g. limit, group by non-distribution
@ -1953,6 +1977,13 @@ static MultiNode *
SubqueryPushdownMultiNodeTree(Query *originalQuery)
{
Query *queryTree = copyObject(originalQuery);
/*
* PG18+ need to flatten GROUP BY expressions to ensure correct processing
* later on, such as identification of partition columns in GROUP BY.
*/
FlattenGroupExprs(queryTree);
List *targetEntryList = queryTree->targetList;
MultiCollect *subqueryCollectNode = CitusMakeNode(MultiCollect);

View File

@ -261,7 +261,6 @@ GenerateSubplansForSubqueriesAndCTEs(uint64 planId, Query *originalQuery,
*/
context.allDistributionKeysInQueryAreEqual =
AllDistributionKeysInQueryAreEqual(originalQuery, plannerRestrictionContext);
DeferredErrorMessage *error = RecursivelyPlanSubqueriesAndCTEs(originalQuery,
&context);
if (error != NULL)
@ -1123,14 +1122,10 @@ ExtractSublinkWalker(Node *node, List **sublinkList)
static bool
ShouldRecursivelyPlanSublinks(Query *query)
{
if (FindNodeMatchingCheckFunctionInRangeTableList(query->rtable,
IsDistributedTableRTE))
{
/* there is a distributed table in the FROM clause */
return false;
}
return true;
bool hasDistributedTable = (FindNodeMatchingCheckFunctionInRangeTableList(
query->rtable,
IsDistributedTableRTE));
return !hasDistributedTable;
}

View File

@ -971,6 +971,40 @@ GetVarFromAssignedParam(List *outerPlanParamsList, Param *plannerParam,
}
}
#if PG_VERSION_NUM >= PG_VERSION_18
/*
* In PG18+, the dereferenced PARAM node could be a GroupVar if the
* query has a GROUP BY. In that case, we need to make an extra
* hop to get the underlying Var from the grouping expressions.
*/
if (assignedVar != NULL)
{
Query *parse = (*rootContainingVar)->parse;
if (parse->hasGroupRTE)
{
RangeTblEntry *rte = rt_fetch(assignedVar->varno, parse->rtable);
if (rte->rtekind == RTE_GROUP)
{
Assert(assignedVar->varattno >= 1 &&
assignedVar->varattno <= list_length(rte->groupexprs));
Node *groupVar = list_nth(rte->groupexprs, assignedVar->varattno - 1);
if (IsA(groupVar, Var))
{
assignedVar = (Var *) groupVar;
}
else
{
/* todo: handle PlaceHolderVar case if needed */
ereport(DEBUG2, (errmsg(
"GroupVar maps to non-Var group expr; bailing out")));
assignedVar = NULL;
}
}
}
}
#endif
return assignedVar;
}

View File

@ -49,6 +49,6 @@ extern DeferredErrorMessage * DeferErrorIfCannotPushdownSubquery(Query *subquery
extern DeferredErrorMessage * DeferErrorIfUnsupportedUnionQuery(Query *queryTree);
extern bool IsJsonTableRTE(RangeTblEntry *rte);
extern bool IsOuterJoinExpr(Node *node);
extern void FlattenGroupExprs(Query *query);
#endif /* QUERY_PUSHDOWN_PLANNING_H */

View File

@ -265,6 +265,9 @@ DEPS = {
"subquery_in_where": TestDeps(
"minimal_schedule", ["multi_behavioral_analytics_create_table"]
),
"subquery_in_targetlist": TestDeps(
"minimal_schedule", ["multi_behavioral_analytics_create_table"]
),
}