From b63572d72ff66bafb8171118a3120310feb22e7d Mon Sep 17 00:00:00 2001 From: Mehmet YILMAZ Date: Wed, 5 Nov 2025 14:08:58 +0300 Subject: [PATCH] PG18 deparser: map Vars through JOIN aliases (fixes whole-row join column names) (#8300) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fixes #8278 Please check issue: https://github.com/citusdata/citus/issues/8278#issuecomment-3431707484 https://github.com/postgres/postgres/commit/f4e7756ef9a437f30a4dc5ded41b8824a9d291b9 ### What PG18 changed SELECT creates diff has a **named join**: ```sql (...) AS unsupported_join (x,y,z,t,e,f,q) ``` On PG17, `COUNT(unsupported_join.*)` stayed as a single whole-row Var that referenced the **join alias**. On PG18, the parser expands that whole-row Var **early** into a `ROW(...)` of **base** columns: ``` ROW(a.user_id, a.item_id, a.buy_count, b.id, b.it_name, b.k_no, c.id, c.it_name, c.k_no) ``` But since the join is *named*, inner aliases `a/b/c` are hidden. Referencing them later blows up with “invalid reference to FROM-clause entry for table ‘a’”. ### What this PR changes 1. **Retarget at `RowExpr` deparse (not in `get_variable`)** * In `get_rule_expr()`’s `T_RowExpr` branch, each element `e` of `ROW(...)` is examined. * If `e` unwraps to a simple, same-level `Var` (`varlevelsup == 0`, `varattno > 0`) and there is a **named `RTE_JOIN`** with `joinaliasvars`, we **do not** change `varno/varattno`. * Instead, we build a copy of the Var and set **`varnosyn/varattnosyn`** to the matching join alias column (from `joinaliasvars`). * Then we deparse that Var via `get_rule_expr_toplevel(...)`, which naturally prints `join_alias.colname`. * Scope is limited to **query deparsing** (`dpns->plan == NULL`), exactly where PG18 expands whole-row vars into `ROW(...)` of base Vars. 2. **Helpers (PG18-only file)** * `unwrap_simple_var(Node*)`: strips trivial wrappers (`RelabelType`, `CoerceToDomain`, `CollateExpr`) to reveal a `Var`. * `var_matches_base(const Var*, int varno, AttrNumber attno)`: matches canonical or synonym identity. * `dpns_has_named_join(const deparse_namespace*)`: fast precheck for any named join with `joinaliasvars`. * `map_var_through_join_alias(...)`: scans `joinaliasvars` to locate the **JOIN RTE index + attno** for a 1:1 alias; the caller uses these to set `varnosyn/varattnosyn`. 3. **Safety and non-goals** * **No effect on plan deparsing** (`dpns->plan != NULL`). * **No change to semantic identity**: we leave `varno/varattno` untouched; only set `varnosyn/varattnosyn`. * Skip whole-row/system columns (`attno <= 0`) and non-simple join columns (computed expressions). * Works with named joins **with or without** an explicit column list (we rely on `joinaliasvars`, not the alias collist). ### Reproducer ```sql CREATE TABLE distributed_table(user_id int, item_id int, buy_count int); CREATE TABLE reference_table(id int, it_name varchar(25), k_no int); SELECT create_distributed_table('distributed_table', 'user_id'); SELECT COUNT(unsupported_join.*) FROM (distributed_table a LEFT JOIN reference_table b ON true RIGHT JOIN reference_table c ON true) AS unsupported_join (x,y,z,t,e,f,q) JOIN (reference_table d JOIN reference_table e ON true) ON true; ``` **Before (PG18):** deparser emitted `ROW(a.user_id, …)` → `ERROR: invalid reference to FROM-clause entry for table "a"` **After:** deparser emits `ROW(unsupported_join.x, ..., unsupported_join.k_no)` → runs successfully. Now maps to `unsupported_join.` and runs. --- .../distributed/deparser/ruleutils_18.c | 119 ++++++++++++++++++ 1 file changed, 119 insertions(+) diff --git a/src/backend/distributed/deparser/ruleutils_18.c b/src/backend/distributed/deparser/ruleutils_18.c index 44e4c8d38..79318fa63 100644 --- a/src/backend/distributed/deparser/ruleutils_18.c +++ b/src/backend/distributed/deparser/ruleutils_18.c @@ -510,6 +510,12 @@ static void get_json_table_nested_columns(TableFunc *tf, JsonTablePlan *plan, deparse_context *context, bool showimplicit, bool needcomma); +static void +map_var_through_join_alias(deparse_namespace *dpns, Var *v); +static Var *unwrap_simple_var(Node *expr); +static bool dpns_has_named_join(const deparse_namespace *dpns); +static inline bool +var_matches_base(const Var *v, Index want_varno, AttrNumber want_attno); #define only_marker(rte) ((rte)->inh ? "" : "ONLY ") @@ -4576,6 +4582,103 @@ get_variable(Var *var, int levelsup, bool istoplevel, deparse_context *context) return attname; } + +/* Any named join with joinaliasvars hides its inner aliases. */ +static inline bool +dpns_has_named_join(const deparse_namespace *dpns) +{ + if (!dpns || dpns->rtable == NIL) + return false; + + ListCell *lc; + foreach (lc, dpns->rtable) + { + RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc); + if (rte && rte->rtekind == RTE_JOIN && + rte->alias != NULL && rte->joinaliasvars != NIL) + return true; + } + return false; +} + + +/* Unwrap trivial wrappers around a Var; return Var* or NULL. */ +static Var * +unwrap_simple_var(Node *expr) +{ + for (;;) + { + if (expr == NULL) + return NULL; + if (IsA(expr, Var)) + return (Var *) expr; + if (IsA(expr, RelabelType)) + { expr = (Node *) ((RelabelType *) expr)->arg; continue; } + if (IsA(expr, CoerceToDomain)) + { expr = (Node *) ((CoerceToDomain *) expr)->arg; continue; } + if (IsA(expr, CollateExpr)) + { expr = (Node *) ((CollateExpr *) expr)->arg; continue; } + /* Not a simple Var */ + return NULL; + } +} + + +/* Base identity (canonical/synonym) match against a wanted (varno,attno) pair. */ +static inline bool +var_matches_base(const Var *v, Index want_varno, AttrNumber want_attno) +{ + if (v->varlevelsup != 0) + return false; + if (v->varno == want_varno && v->varattno == want_attno) + return true; + if (v->varnosyn > 0 && v->varattnosyn > 0 && + v->varnosyn == want_varno && v->varattnosyn == want_attno) + return true; + return false; +} + + + +/* Mutate v in place: if v maps to a named JOIN's column, set varnosyn/varattnosyn. + * Returns true iff SYN fields were set. Query deparse only (dpns->plan == NULL). */ +static void +map_var_through_join_alias(deparse_namespace *dpns, Var *v) +{ + if (!dpns || dpns->plan != NULL || !v || + v->varlevelsup != 0 || v->varattno <= 0) + return; + + int rti = 0; + ListCell *lc; + foreach (lc, dpns->rtable) + { + rti++; + RangeTblEntry *jrte = (RangeTblEntry *) lfirst(lc); + if (!jrte || jrte->rtekind != RTE_JOIN || + jrte->alias == NULL || jrte->joinaliasvars == NIL) + continue; + + AttrNumber jattno = 0; + ListCell *vlc; + foreach (vlc, jrte->joinaliasvars) + { + jattno++; + Var *aliasVar = unwrap_simple_var((Node *) lfirst(vlc)); + if (!aliasVar) + continue; + + if (var_matches_base(aliasVar, v->varno, v->varattno)) + { + v->varnosyn = (Index) rti; + v->varattnosyn = jattno; + return; + } + } + } +} + + /* * Deparse a Var which references OUTER_VAR, INNER_VAR, or INDEX_VAR. This * routine is actually a callback for get_special_varno, which handles finding @@ -6590,6 +6693,11 @@ get_rule_expr(Node *node, deparse_context *context, Assert(list_length(rowexpr->args) <= tupdesc->natts); } + /* Precompute deparse ns and whether we even need to try mapping */ + deparse_namespace *dpns = (context->namespaces != NIL) + ? (deparse_namespace *) linitial(context->namespaces) : NULL; + bool try_map = (dpns && dpns->plan == NULL && dpns_has_named_join(dpns)); + /* * SQL99 allows "ROW" to be omitted when there is more than * one column, but for simplicity we always print it. @@ -6605,6 +6713,17 @@ get_rule_expr(Node *node, deparse_context *context, !TupleDescAttr(tupdesc, i)->attisdropped) { appendStringInfoString(buf, sep); + + /* PG18: if element is a simple base Var, set its SYN to the JOIN alias */ + if (try_map) + { + Var *v = unwrap_simple_var(e); + if (v) + { + map_var_through_join_alias(dpns, v); + } + } + /* Whole-row Vars need special treatment here */ get_rule_expr_toplevel(e, context, true); sep = ", ";