PG18 deparser: map Vars through JOIN aliases (fixes whole-row join column names) (#8300)

fixes #8278

Please check issue:
https://github.com/citusdata/citus/issues/8278#issuecomment-3431707484


f4e7756ef9

### What PG18 changed

SELECT creates diff has a **named join**:

```sql
(...) AS unsupported_join (x,y,z,t,e,f,q)
```

On PG17, `COUNT(unsupported_join.*)` stayed as a single whole-row Var
that referenced the **join alias**.

On PG18, the parser expands that whole-row Var **early** into a
`ROW(...)` of **base** columns:

```
ROW(a.user_id, a.item_id, a.buy_count,
    b.id, b.it_name, b.k_no,
    c.id, c.it_name, c.k_no)
```

But since the join is *named*, inner aliases `a/b/c` are hidden.
Referencing them later blows up with
“invalid reference to FROM-clause entry for table ‘a’”.

### What this PR changes

1. **Retarget at `RowExpr` deparse (not in `get_variable`)**

* In `get_rule_expr()`’s `T_RowExpr` branch, each element `e` of
`ROW(...)` is examined.
* If `e` unwraps to a simple, same-level `Var` (`varlevelsup == 0`,
`varattno > 0`) and there is a **named `RTE_JOIN`** with
`joinaliasvars`, we **do not** change `varno/varattno`.
* Instead, we build a copy of the Var and set **`varnosyn/varattnosyn`**
to the matching join alias column (from `joinaliasvars`).
* Then we deparse that Var via `get_rule_expr_toplevel(...)`, which
naturally prints `join_alias.colname`.
* Scope is limited to **query deparsing** (`dpns->plan == NULL`),
exactly where PG18 expands whole-row vars into `ROW(...)` of base Vars.

2. **Helpers (PG18-only file)**

* `unwrap_simple_var(Node*)`: strips trivial wrappers (`RelabelType`,
`CoerceToDomain`, `CollateExpr`) to reveal a `Var`.
* `var_matches_base(const Var*, int varno, AttrNumber attno)`: matches
canonical or synonym identity.
* `dpns_has_named_join(const deparse_namespace*)`: fast precheck for any
named join with `joinaliasvars`.
* `map_var_through_join_alias(...)`: scans `joinaliasvars` to locate the
**JOIN RTE index + attno** for a 1:1 alias; the caller uses these to set
`varnosyn/varattnosyn`.

3. **Safety and non-goals**

   * **No effect on plan deparsing** (`dpns->plan != NULL`).
* **No change to semantic identity**: we leave `varno/varattno`
untouched; only set `varnosyn/varattnosyn`.
* Skip whole-row/system columns (`attno <= 0`) and non-simple join
columns (computed expressions).
* Works with named joins **with or without** an explicit column list (we
rely on `joinaliasvars`, not the alias collist).

### Reproducer

```sql
CREATE TABLE distributed_table(user_id int, item_id int, buy_count int);
CREATE TABLE reference_table(id int, it_name varchar(25), k_no int);
SELECT create_distributed_table('distributed_table', 'user_id');

SELECT COUNT(unsupported_join.*)
FROM (distributed_table a
      LEFT JOIN reference_table b ON true
      RIGHT JOIN reference_table c ON true)
     AS unsupported_join (x,y,z,t,e,f,q)
JOIN (reference_table d JOIN reference_table e ON true) ON true;
```

**Before (PG18):** deparser emitted `ROW(a.user_id, …)` → `ERROR:
invalid reference to FROM-clause entry for table "a"`
**After:** deparser emits
`ROW(unsupported_join.x, ..., unsupported_join.k_no)` → runs
successfully.

Now maps to `unsupported_join.<auto_col_names>` and runs.
pull/8322/head
Mehmet YILMAZ 2025-11-05 14:08:58 +03:00 committed by GitHub
parent 7a7a0ba9c7
commit b63572d72f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 119 additions and 0 deletions

View File

@ -510,6 +510,12 @@ static void get_json_table_nested_columns(TableFunc *tf, JsonTablePlan *plan,
deparse_context *context, deparse_context *context,
bool showimplicit, bool showimplicit,
bool needcomma); bool needcomma);
static void
map_var_through_join_alias(deparse_namespace *dpns, Var *v);
static Var *unwrap_simple_var(Node *expr);
static bool dpns_has_named_join(const deparse_namespace *dpns);
static inline bool
var_matches_base(const Var *v, Index want_varno, AttrNumber want_attno);
#define only_marker(rte) ((rte)->inh ? "" : "ONLY ") #define only_marker(rte) ((rte)->inh ? "" : "ONLY ")
@ -4576,6 +4582,103 @@ get_variable(Var *var, int levelsup, bool istoplevel, deparse_context *context)
return attname; return attname;
} }
/* Any named join with joinaliasvars hides its inner aliases. */
static inline bool
dpns_has_named_join(const deparse_namespace *dpns)
{
if (!dpns || dpns->rtable == NIL)
return false;
ListCell *lc;
foreach (lc, dpns->rtable)
{
RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
if (rte && rte->rtekind == RTE_JOIN &&
rte->alias != NULL && rte->joinaliasvars != NIL)
return true;
}
return false;
}
/* Unwrap trivial wrappers around a Var; return Var* or NULL. */
static Var *
unwrap_simple_var(Node *expr)
{
for (;;)
{
if (expr == NULL)
return NULL;
if (IsA(expr, Var))
return (Var *) expr;
if (IsA(expr, RelabelType))
{ expr = (Node *) ((RelabelType *) expr)->arg; continue; }
if (IsA(expr, CoerceToDomain))
{ expr = (Node *) ((CoerceToDomain *) expr)->arg; continue; }
if (IsA(expr, CollateExpr))
{ expr = (Node *) ((CollateExpr *) expr)->arg; continue; }
/* Not a simple Var */
return NULL;
}
}
/* Base identity (canonical/synonym) match against a wanted (varno,attno) pair. */
static inline bool
var_matches_base(const Var *v, Index want_varno, AttrNumber want_attno)
{
if (v->varlevelsup != 0)
return false;
if (v->varno == want_varno && v->varattno == want_attno)
return true;
if (v->varnosyn > 0 && v->varattnosyn > 0 &&
v->varnosyn == want_varno && v->varattnosyn == want_attno)
return true;
return false;
}
/* Mutate v in place: if v maps to a named JOIN's column, set varnosyn/varattnosyn.
* Returns true iff SYN fields were set. Query deparse only (dpns->plan == NULL). */
static void
map_var_through_join_alias(deparse_namespace *dpns, Var *v)
{
if (!dpns || dpns->plan != NULL || !v ||
v->varlevelsup != 0 || v->varattno <= 0)
return;
int rti = 0;
ListCell *lc;
foreach (lc, dpns->rtable)
{
rti++;
RangeTblEntry *jrte = (RangeTblEntry *) lfirst(lc);
if (!jrte || jrte->rtekind != RTE_JOIN ||
jrte->alias == NULL || jrte->joinaliasvars == NIL)
continue;
AttrNumber jattno = 0;
ListCell *vlc;
foreach (vlc, jrte->joinaliasvars)
{
jattno++;
Var *aliasVar = unwrap_simple_var((Node *) lfirst(vlc));
if (!aliasVar)
continue;
if (var_matches_base(aliasVar, v->varno, v->varattno))
{
v->varnosyn = (Index) rti;
v->varattnosyn = jattno;
return;
}
}
}
}
/* /*
* Deparse a Var which references OUTER_VAR, INNER_VAR, or INDEX_VAR. This * Deparse a Var which references OUTER_VAR, INNER_VAR, or INDEX_VAR. This
* routine is actually a callback for get_special_varno, which handles finding * routine is actually a callback for get_special_varno, which handles finding
@ -6590,6 +6693,11 @@ get_rule_expr(Node *node, deparse_context *context,
Assert(list_length(rowexpr->args) <= tupdesc->natts); Assert(list_length(rowexpr->args) <= tupdesc->natts);
} }
/* Precompute deparse ns and whether we even need to try mapping */
deparse_namespace *dpns = (context->namespaces != NIL)
? (deparse_namespace *) linitial(context->namespaces) : NULL;
bool try_map = (dpns && dpns->plan == NULL && dpns_has_named_join(dpns));
/* /*
* SQL99 allows "ROW" to be omitted when there is more than * SQL99 allows "ROW" to be omitted when there is more than
* one column, but for simplicity we always print it. * one column, but for simplicity we always print it.
@ -6605,6 +6713,17 @@ get_rule_expr(Node *node, deparse_context *context,
!TupleDescAttr(tupdesc, i)->attisdropped) !TupleDescAttr(tupdesc, i)->attisdropped)
{ {
appendStringInfoString(buf, sep); appendStringInfoString(buf, sep);
/* PG18: if element is a simple base Var, set its SYN to the JOIN alias */
if (try_map)
{
Var *v = unwrap_simple_var(e);
if (v)
{
map_var_through_join_alias(dpns, v);
}
}
/* Whole-row Vars need special treatment here */ /* Whole-row Vars need special treatment here */
get_rule_expr_toplevel(e, context, true); get_rule_expr_toplevel(e, context, true);
sep = ", "; sep = ", ";