From e02d49c22077a79f1260b657740c5ba7a560d16d Mon Sep 17 00:00:00 2001 From: Mehmet Yilmaz Date: Tue, 25 Mar 2025 10:50:49 +0000 Subject: [PATCH] Refactor CTE inlining logic to improve clarity and remove unused functions --- citus-tools | 1 + src/backend/distributed/planner/cte_inline.c | 93 ++++++------------- .../planner/multi_router_planner.c | 2 - src/include/distributed/distributed_planner.h | 1 - 4 files changed, 29 insertions(+), 68 deletions(-) create mode 160000 citus-tools diff --git a/citus-tools b/citus-tools new file mode 160000 index 000000000..3376bd684 --- /dev/null +++ b/citus-tools @@ -0,0 +1 @@ +Subproject commit 3376bd6845f0614908ed304f5033bd644c82d3bf diff --git a/src/backend/distributed/planner/cte_inline.c b/src/backend/distributed/planner/cte_inline.c index ff2190aa7..63be41723 100644 --- a/src/backend/distributed/planner/cte_inline.c +++ b/src/backend/distributed/planner/cte_inline.c @@ -16,7 +16,6 @@ #include "nodes/nodeFuncs.h" #include "optimizer/optimizer.h" #include "rewrite/rewriteManip.h" -#include "distributed/citus_ruleutils.h" /* Ensure this header declares contain_nextval_expression_walker */ #include "pg_version_compat.h" #include "pg_version_constants.h" @@ -49,9 +48,6 @@ static bool RecursivelyInlineCteWalker(Node *node, void *context); static void InlineCTEsInQueryTree(Query *query); static bool QueryTreeContainsInlinableCteWalker(Node *node, void *context); -static bool -QueryContainsNextval2(Query *q); - /* * RecursivelyInlineCtesInQueryTree gets a query and recursively traverses the @@ -97,77 +93,44 @@ RecursivelyInlineCteWalker(Node *node, void *context) } +/* + * InlineCTEsInQueryTree gets a query tree and tries to inline CTEs as subqueries + * in the query tree. + * + * Most of the code is coming from PostgreSQL's CTE inlining logic, there are very + * few additions that Citus added, which are already commented in the code. + */ void InlineCTEsInQueryTree(Query *query) { - ListCell *cteCell = NULL; - List *copyOfCteList = list_copy(query->cteList); + ListCell *cteCell = NULL; - foreach(cteCell, copyOfCteList) - { - CommonTableExpr *cte = (CommonTableExpr *) lfirst(cteCell); + /* iterate on the copy of the list because we'll be modifying query->cteList */ + List *copyOfCteList = list_copy(query->cteList); + foreach(cteCell, copyOfCteList) + { + CommonTableExpr *cte = (CommonTableExpr *) lfirst(cteCell); - /* Extra check: if the CTE query contains nextval, do not inline it */ - if (QueryContainsNextval2((Query *) cte->ctequery)) - { - elog(DEBUG1, "CTE \"%s\" contains nextval; materializing it instead of inlining", cte->ctename); - continue; - } + /* + * First, make sure that Postgres is OK to inline the CTE. Later, check for + * distributed query planning constraints that might prevent inlining. + */ + if (PostgreSQLCTEInlineCondition(cte, query->commandType)) + { + elog(DEBUG1, "CTE %s is going to be inlined via " + "distributed planning", cte->ctename); - /* Standard PostgreSQL inline condition */ - if (PostgreSQLCTEInlineCondition(cte, query->commandType)) - { - elog(DEBUG1, "CTE \"%s\" is going to be inlined via distributed planning", cte->ctename); - inline_cte(query, cte); + /* do the hard work of cte inlining */ + inline_cte(query, cte); - /* Reset reference count and remove from cteList */ - cte->cterefcount = 0; - query->cteList = list_delete_ptr(query->cteList, cte); - } - } + /* clean-up the necessary fields for distributed planning */ + cte->cterefcount = 0; + query->cteList = list_delete_ptr(query->cteList, cte); + } + } } -static bool -QueryContainsNextval2(Query *q) -{ - ListCell *lc = NULL; - - /* Check the target list */ - foreach(lc, q->targetList) - { - TargetEntry *tle = (TargetEntry *) lfirst(lc); - if (contain_nextval_expression_walker((Node *) tle->expr, NULL)) - return true; - } - - /* Check the WHERE clause (jointree quals) */ - if (q->jointree && q->jointree->quals && - contain_nextval_expression_walker((Node *) q->jointree->quals, NULL)) - return true; - - /* Recursively check subqueries in the range table */ - foreach(lc, q->rtable) - { - RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc); - if (rte->rtekind == RTE_SUBQUERY && rte->subquery != NULL) - { - if (QueryContainsNextval2(rte->subquery)) - return true; - } - } - - /* Optionally, check havingQual if needed */ - if (q->havingQual && - contain_nextval_expression_walker((Node *) q->havingQual, NULL)) - return true; - - return false; -} - - - - /* * QueryTreeContainsInlinableCTE recursively traverses the queryTree, and returns true * if any of the (sub)queries in the queryTree contains at least one CTE. diff --git a/src/backend/distributed/planner/multi_router_planner.c b/src/backend/distributed/planner/multi_router_planner.c index ba6d63285..a6815fbcd 100644 --- a/src/backend/distributed/planner/multi_router_planner.c +++ b/src/backend/distributed/planner/multi_router_planner.c @@ -3888,8 +3888,6 @@ QueryContainsNextval(Query *query) } } - /* 5) Possibly check setOperations, windowClause offsets, etc. in a more robust approach. */ - /* If none found, return false */ return false; } diff --git a/src/include/distributed/distributed_planner.h b/src/include/distributed/distributed_planner.h index 5832fb7a2..633ae0435 100644 --- a/src/include/distributed/distributed_planner.h +++ b/src/include/distributed/distributed_planner.h @@ -251,7 +251,6 @@ extern PlannedStmt * FinalizePlan(PlannedStmt *localPlan, struct DistributedPlan *distributedPlan); extern bool ContainsSingleShardTable(Query *query); extern RTEListProperties * GetRTEListPropertiesForQuery(Query *query); -extern List *ExtractRangeTableEntryList(Query *query); extern bool ListContainsDistributedTableRTE(List *rangeTableList, bool *maybeHasForeignDistributedTable);