mirror of https://github.com/citusdata/citus.git
Fix CTE traversal for outer Vars in FindReferencedTableColumn (remove assert; correct parentQueryList handling) (#8106)
fixes #8105 This change lets `FindReferencedTableColumn()` correctly resolve columns through a CTE even when the expression comes from an outer query level (`varlevelsup > 0`, `skipOuterVars = false`). Before, we hit an `Assert(skipOuterVars)` in this path. **Problem** * Hitting a CTE after walking outer Vars triggered `Assert(skipOuterVars)`. * Cause: we modified `parentQueryList` in place and didn’t rebuild the correct parent chain before recursing into the CTE, so the path was considered unsafe. **Fix** * Remove the `Assert(skipOuterVars)` in the `RTE_CTE` branch. * Find the CTE’s owning level via `ctelevelsup` and compute `cteParentListIndex`. * Rebuild a private parent list for recursion: `list_copy` → `list_truncate` → `lappend(current query)`. * Add a bounds check before indexing the CTE’s `targetList`. **Why it works** ```diff -parentQueryList = lappend(parentQueryList, query); -FindReferencedTableColumn(targetEntry->expr, parentQueryList, - cteQuery, column, rteContainingReferencedColumn, - skipOuterVars); + /* hand a private, bounded parent list to the recursion */ + List *newParent = list_copy(parentQueryList); + newParent = list_truncate(newParent, cteParentListIndex + 1); + newParent = lappend(newParent, query); + + FindReferencedTableColumn(targetEntry->expr, + newParent, + cteQuery, + column, + rteContainingReferencedColumn, + skipOuterVars); +} ``` **Before:** We changed `parentQueryList` in place (`parentQueryList = lappend(...)`) and didn’t trim it to the CTE’s owner level. **After:** We copy the list, trim it to the CTE’s owner level, then append the current query. This keeps the parent list accurate for the current recursion and safe when following outer Vars. **Example: Nested subquery referencing the CTE (two levels down)** ``` WITH c AS MATERIALIZED (SELECT user_id FROM raw_events_first) SELECT 1 FROM raw_events_first t WHERE EXISTS ( SELECT 1 FROM (SELECT user_id FROM c) c2 WHERE c2.user_id = t.user_id ); ``` Levels: Q0 = top SELECT Q1 = EXISTS subquery Q2 = inner (SELECT user_id FROM c) When resolving c2.user_id inside Q2: - parentQueryList is [Q0, Q1, Q2]. - `ctelevelsup`: 2 `cteParentListIndex = length(parentQueryList) - ctelevelsup - 1` - Recurse into the CTE’s query with [Q0, Q2]. **Tests (added in `multi_insert_select`)** * **T1:** Correlated subquery that references a CTE (one level down) Verifies that resolving through `RTE_CTE` after following an outer `Var` succeeds, row count matches source table. * **T2:** Nested subquery that references a CTE (two levels down) Exercises deeper recursion and confirms identical to T1. * **T3:** Scalar subquery in a target list that reads from the outer CTE Checks expected row count and that no NULLs are inserted. These tests cover the cases that previously hit `Assert(skipOuterVars)` and confirm CTE references while following outer Vars.pull/8108/head^2
parent
71d6328378
commit
a6161f5a21
|
|
@ -4583,11 +4583,10 @@ FindReferencedTableColumn(Expr *columnExpression, List *parentQueryList, Query *
|
|||
else if (rangeTableEntry->rtekind == RTE_CTE)
|
||||
{
|
||||
/*
|
||||
* When outerVars are considered, we modify parentQueryList, so this
|
||||
* logic might need to change when we support outervars in CTEs.
|
||||
* Resolve through a CTE even when skipOuterVars == false.
|
||||
* Maintain the invariant that each recursion level owns a private,
|
||||
* correctly-bounded copy of parentQueryList.
|
||||
*/
|
||||
Assert(skipOuterVars);
|
||||
|
||||
int cteParentListIndex = list_length(parentQueryList) -
|
||||
rangeTableEntry->ctelevelsup - 1;
|
||||
Query *cteParentQuery = NULL;
|
||||
|
|
@ -4618,16 +4617,36 @@ FindReferencedTableColumn(Expr *columnExpression, List *parentQueryList, Query *
|
|||
if (cte != NULL)
|
||||
{
|
||||
Query *cteQuery = (Query *) cte->ctequery;
|
||||
List *targetEntryList = cteQuery->targetList;
|
||||
AttrNumber targetEntryIndex = candidateColumn->varattno - 1;
|
||||
TargetEntry *targetEntry = list_nth(targetEntryList, targetEntryIndex);
|
||||
|
||||
parentQueryList = lappend(parentQueryList, query);
|
||||
FindReferencedTableColumn(targetEntry->expr, parentQueryList,
|
||||
cteQuery, column, rteContainingReferencedColumn,
|
||||
if (targetEntryIndex >= 0 &&
|
||||
targetEntryIndex < list_length(cteQuery->targetList))
|
||||
{
|
||||
TargetEntry *targetEntry =
|
||||
list_nth(cteQuery->targetList, targetEntryIndex);
|
||||
|
||||
/* Build a private, bounded parentQueryList before recursing into the CTE.
|
||||
* Invariant: list is [top … current], owned by this call (no aliasing).
|
||||
* For RTE_CTE:
|
||||
* owner_idx = list_length(parentQueryList) - rangeTableEntry->ctelevelsup - 1;
|
||||
* newParent = lappend(list_truncate(list_copy(parentQueryList), owner_idx + 1), query);
|
||||
* Example (Q0 owns CTE; we’re in Q2 via nested subquery):
|
||||
* parent=[Q0,Q1,Q2], ctelevelsup=2 ⇒ owner_idx=0 ⇒ newParent=[Q0,Q2].
|
||||
* Keeps outer-Var level math correct without mutating the caller’s list.
|
||||
*/
|
||||
List *newParent = list_copy(parentQueryList);
|
||||
newParent = list_truncate(newParent, cteParentListIndex + 1);
|
||||
newParent = lappend(newParent, query);
|
||||
|
||||
FindReferencedTableColumn(targetEntry->expr,
|
||||
newParent,
|
||||
cteQuery,
|
||||
column,
|
||||
rteContainingReferencedColumn,
|
||||
skipOuterVars);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -3637,5 +3637,157 @@ SELECT id, val FROM version_dist_union ORDER BY id;
|
|||
(6 rows)
|
||||
|
||||
-- End of Issue #7784
|
||||
-- PR #8106 — CTE traversal works when following outer Vars
|
||||
-- This script exercises three shapes:
|
||||
-- T1) CTE referenced inside a correlated subquery (one level down)
|
||||
-- T2) CTE referenced inside a nested subquery (two levels down)
|
||||
-- T3) Subquery targetlist uses a scalar sublink into the outer CTE
|
||||
CREATE SCHEMA pr8106_cte_outervar;
|
||||
SET search_path = pr8106_cte_outervar, public;
|
||||
-- Base tables for the tests
|
||||
DROP TABLE IF EXISTS raw_events_first CASCADE;
|
||||
NOTICE: table "raw_events_first" does not exist, skipping
|
||||
DROP TABLE IF EXISTS agg_events CASCADE;
|
||||
NOTICE: table "agg_events" does not exist, skipping
|
||||
CREATE TABLE raw_events_first(
|
||||
user_id int,
|
||||
value_1 int
|
||||
);
|
||||
CREATE TABLE agg_events(
|
||||
user_id int,
|
||||
value_1_agg int
|
||||
);
|
||||
-- Distribute and colocate (distribution key = user_id)
|
||||
SELECT create_distributed_table('raw_events_first', 'user_id');
|
||||
create_distributed_table
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT create_distributed_table('agg_events', 'user_id');
|
||||
create_distributed_table
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
-- Seed data (duplicates on some user_ids; some NULL value_1’s)
|
||||
INSERT INTO raw_events_first(user_id, value_1) VALUES
|
||||
(1, 10), (1, 20), (1, NULL),
|
||||
(2, NULL),
|
||||
(3, 30),
|
||||
(4, NULL),
|
||||
(5, 50), (5, NULL),
|
||||
(6, NULL);
|
||||
---------------------------------------------------------------------
|
||||
-- T1) CTE referenced inside a correlated subquery (one level down)
|
||||
---------------------------------------------------------------------
|
||||
TRUNCATE agg_events;
|
||||
WITH c AS MATERIALIZED (
|
||||
SELECT user_id FROM raw_events_first
|
||||
)
|
||||
INSERT INTO agg_events (user_id)
|
||||
SELECT t.user_id
|
||||
FROM raw_events_first t
|
||||
WHERE EXISTS (SELECT 1 FROM c WHERE c.user_id = t.user_id);
|
||||
-- Expect one insert per row in raw_events_first (EXISTS always true per user_id)
|
||||
SELECT 't1_count_matches' AS test,
|
||||
(SELECT count(*) FROM agg_events) =
|
||||
(SELECT count(*) FROM raw_events_first) AS ok;
|
||||
test | ok
|
||||
---------------------------------------------------------------------
|
||||
t1_count_matches | t
|
||||
(1 row)
|
||||
|
||||
-- Spot-check: how many rows were inserted
|
||||
SELECT 't1_rows' AS test, count(*) AS rows FROM agg_events;
|
||||
test | rows
|
||||
---------------------------------------------------------------------
|
||||
t1_rows | 9
|
||||
(1 row)
|
||||
|
||||
---------------------------------------------------------------------
|
||||
-- T2) CTE referenced inside a nested subquery (two levels down)
|
||||
---------------------------------------------------------------------
|
||||
TRUNCATE agg_events;
|
||||
WITH c AS MATERIALIZED (
|
||||
SELECT user_id FROM raw_events_first
|
||||
)
|
||||
INSERT INTO agg_events (user_id)
|
||||
SELECT t.user_id
|
||||
FROM raw_events_first t
|
||||
WHERE EXISTS (
|
||||
SELECT 1
|
||||
FROM (SELECT user_id FROM c) c2
|
||||
WHERE c2.user_id = t.user_id
|
||||
);
|
||||
-- Same cardinality expectation as T1
|
||||
SELECT 't2_count_matches' AS test,
|
||||
(SELECT count(*) FROM agg_events) =
|
||||
(SELECT count(*) FROM raw_events_first) AS ok;
|
||||
test | ok
|
||||
---------------------------------------------------------------------
|
||||
t2_count_matches | t
|
||||
(1 row)
|
||||
|
||||
SELECT 't2_rows' AS test, count(*) AS rows FROM agg_events;
|
||||
test | rows
|
||||
---------------------------------------------------------------------
|
||||
t2_rows | 9
|
||||
(1 row)
|
||||
|
||||
---------------------------------------------------------------------
|
||||
-- T3) Subquery targetlist uses a scalar sublink into the outer CTE
|
||||
-- (use MAX() to keep scalar subquery single-row)
|
||||
---------------------------------------------------------------------
|
||||
TRUNCATE agg_events;
|
||||
WITH c AS (SELECT user_id, value_1 FROM raw_events_first)
|
||||
INSERT INTO agg_events (user_id, value_1_agg)
|
||||
SELECT d.user_id, d.value_1_agg
|
||||
FROM (
|
||||
SELECT t.user_id,
|
||||
(SELECT max(c.value_1) FROM c WHERE c.user_id = t.user_id) AS value_1_agg
|
||||
FROM raw_events_first t
|
||||
) AS d
|
||||
WHERE d.value_1_agg IS NOT NULL;
|
||||
-- Expect one insert per row in raw_events_first whose user_id has at least one non-NULL value_1
|
||||
SELECT 't3_count_matches' AS test,
|
||||
(SELECT count(*) FROM agg_events) =
|
||||
(
|
||||
SELECT count(*)
|
||||
FROM raw_events_first t
|
||||
WHERE EXISTS (
|
||||
SELECT 1 FROM raw_events_first c
|
||||
WHERE c.user_id = t.user_id AND c.value_1 IS NOT NULL
|
||||
)
|
||||
) AS ok;
|
||||
test | ok
|
||||
---------------------------------------------------------------------
|
||||
t3_count_matches | t
|
||||
(1 row)
|
||||
|
||||
-- Also verify no NULLs were inserted into value_1_agg
|
||||
SELECT 't3_no_null_value_1_agg' AS test,
|
||||
NOT EXISTS (SELECT 1 FROM agg_events WHERE value_1_agg IS NULL) AS ok;
|
||||
test | ok
|
||||
---------------------------------------------------------------------
|
||||
t3_no_null_value_1_agg | t
|
||||
(1 row)
|
||||
|
||||
-- Deterministic sample of results
|
||||
SELECT 't3_sample' AS test, user_id, value_1_agg
|
||||
FROM agg_events
|
||||
ORDER BY user_id
|
||||
LIMIT 5;
|
||||
test | user_id | value_1_agg
|
||||
---------------------------------------------------------------------
|
||||
t3_sample | 1 | 20
|
||||
t3_sample | 1 | 20
|
||||
t3_sample | 1 | 20
|
||||
t3_sample | 3 | 30
|
||||
t3_sample | 5 | 50
|
||||
(5 rows)
|
||||
|
||||
-- End of PR #8106 — CTE traversal works when following outer Vars
|
||||
SET client_min_messages TO ERROR;
|
||||
DROP SCHEMA pr8106_cte_outervar CASCADE;
|
||||
DROP SCHEMA multi_insert_select CASCADE;
|
||||
|
|
|
|||
|
|
@ -2583,5 +2583,127 @@ SELECT id, val FROM version_dist_union ORDER BY id;
|
|||
|
||||
-- End of Issue #7784
|
||||
|
||||
-- PR #8106 — CTE traversal works when following outer Vars
|
||||
-- This script exercises three shapes:
|
||||
-- T1) CTE referenced inside a correlated subquery (one level down)
|
||||
-- T2) CTE referenced inside a nested subquery (two levels down)
|
||||
-- T3) Subquery targetlist uses a scalar sublink into the outer CTE
|
||||
|
||||
CREATE SCHEMA pr8106_cte_outervar;
|
||||
SET search_path = pr8106_cte_outervar, public;
|
||||
|
||||
-- Base tables for the tests
|
||||
DROP TABLE IF EXISTS raw_events_first CASCADE;
|
||||
DROP TABLE IF EXISTS agg_events CASCADE;
|
||||
|
||||
CREATE TABLE raw_events_first(
|
||||
user_id int,
|
||||
value_1 int
|
||||
);
|
||||
|
||||
CREATE TABLE agg_events(
|
||||
user_id int,
|
||||
value_1_agg int
|
||||
);
|
||||
|
||||
-- Distribute and colocate (distribution key = user_id)
|
||||
SELECT create_distributed_table('raw_events_first', 'user_id');
|
||||
SELECT create_distributed_table('agg_events', 'user_id');
|
||||
|
||||
-- Seed data (duplicates on some user_ids; some NULL value_1’s)
|
||||
INSERT INTO raw_events_first(user_id, value_1) VALUES
|
||||
(1, 10), (1, 20), (1, NULL),
|
||||
(2, NULL),
|
||||
(3, 30),
|
||||
(4, NULL),
|
||||
(5, 50), (5, NULL),
|
||||
(6, NULL);
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- T1) CTE referenced inside a correlated subquery (one level down)
|
||||
----------------------------------------------------------------------
|
||||
TRUNCATE agg_events;
|
||||
|
||||
WITH c AS MATERIALIZED (
|
||||
SELECT user_id FROM raw_events_first
|
||||
)
|
||||
INSERT INTO agg_events (user_id)
|
||||
SELECT t.user_id
|
||||
FROM raw_events_first t
|
||||
WHERE EXISTS (SELECT 1 FROM c WHERE c.user_id = t.user_id);
|
||||
|
||||
-- Expect one insert per row in raw_events_first (EXISTS always true per user_id)
|
||||
SELECT 't1_count_matches' AS test,
|
||||
(SELECT count(*) FROM agg_events) =
|
||||
(SELECT count(*) FROM raw_events_first) AS ok;
|
||||
|
||||
-- Spot-check: how many rows were inserted
|
||||
SELECT 't1_rows' AS test, count(*) AS rows FROM agg_events;
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- T2) CTE referenced inside a nested subquery (two levels down)
|
||||
----------------------------------------------------------------------
|
||||
TRUNCATE agg_events;
|
||||
|
||||
WITH c AS MATERIALIZED (
|
||||
SELECT user_id FROM raw_events_first
|
||||
)
|
||||
INSERT INTO agg_events (user_id)
|
||||
SELECT t.user_id
|
||||
FROM raw_events_first t
|
||||
WHERE EXISTS (
|
||||
SELECT 1
|
||||
FROM (SELECT user_id FROM c) c2
|
||||
WHERE c2.user_id = t.user_id
|
||||
);
|
||||
|
||||
-- Same cardinality expectation as T1
|
||||
SELECT 't2_count_matches' AS test,
|
||||
(SELECT count(*) FROM agg_events) =
|
||||
(SELECT count(*) FROM raw_events_first) AS ok;
|
||||
|
||||
SELECT 't2_rows' AS test, count(*) AS rows FROM agg_events;
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- T3) Subquery targetlist uses a scalar sublink into the outer CTE
|
||||
-- (use MAX() to keep scalar subquery single-row)
|
||||
----------------------------------------------------------------------
|
||||
TRUNCATE agg_events;
|
||||
|
||||
WITH c AS (SELECT user_id, value_1 FROM raw_events_first)
|
||||
INSERT INTO agg_events (user_id, value_1_agg)
|
||||
SELECT d.user_id, d.value_1_agg
|
||||
FROM (
|
||||
SELECT t.user_id,
|
||||
(SELECT max(c.value_1) FROM c WHERE c.user_id = t.user_id) AS value_1_agg
|
||||
FROM raw_events_first t
|
||||
) AS d
|
||||
WHERE d.value_1_agg IS NOT NULL;
|
||||
|
||||
-- Expect one insert per row in raw_events_first whose user_id has at least one non-NULL value_1
|
||||
SELECT 't3_count_matches' AS test,
|
||||
(SELECT count(*) FROM agg_events) =
|
||||
(
|
||||
SELECT count(*)
|
||||
FROM raw_events_first t
|
||||
WHERE EXISTS (
|
||||
SELECT 1 FROM raw_events_first c
|
||||
WHERE c.user_id = t.user_id AND c.value_1 IS NOT NULL
|
||||
)
|
||||
) AS ok;
|
||||
|
||||
-- Also verify no NULLs were inserted into value_1_agg
|
||||
SELECT 't3_no_null_value_1_agg' AS test,
|
||||
NOT EXISTS (SELECT 1 FROM agg_events WHERE value_1_agg IS NULL) AS ok;
|
||||
|
||||
-- Deterministic sample of results
|
||||
SELECT 't3_sample' AS test, user_id, value_1_agg
|
||||
FROM agg_events
|
||||
ORDER BY user_id
|
||||
LIMIT 5;
|
||||
|
||||
-- End of PR #8106 — CTE traversal works when following outer Vars
|
||||
|
||||
SET client_min_messages TO ERROR;
|
||||
DROP SCHEMA pr8106_cte_outervar CASCADE;
|
||||
DROP SCHEMA multi_insert_select CASCADE;
|
||||
|
|
|
|||
Loading…
Reference in New Issue