[Bug Fix] SEGV on query with Left Outer Join (#7787) (#7901)

DESCRIPTION: Fixes a crash in left outer joins that can happen when
there is an an aggregate on a column from the inner side of the join.

Fix the SEGV seen in #7787 and #7899; it occurs because a column in the
targetlist of a worker subquery can contain a non-empty varnullingrels
field if the column is from the inner side of a left outer join. The
issue can also occur with the columns in the HAVING clause, and this is
also tested in the fix. The issue was triggered by the introduction of
the varnullingrels to Vars in Postgres 16 (2489d76c)

There is a related issue, #7705, where a non-empty varnullingrels was
incorrectly copied into the query tree for the combine query. Here, a
non-empty varnullingrels field of a var is incorrectly copied into the
query tree for a worker subquery.

The regress file from #7705 is used (and renamed) to also test this
(#7787). An alternative test output file is required for Postgres 15
because of an optimization to DISTINCT in Postgres 16 (1349d2790bf).
pull/7922/head
Colm 2025-02-18 12:41:34 +00:00 committed by naisila
parent 2b5dfbbd08
commit 89674d9630
5 changed files with 677 additions and 34 deletions

View File

@ -2097,6 +2097,16 @@ CreateSubqueryTargetListAndAdjustVars(List *columnList)
*/ */
column->varno = 1; column->varno = 1;
column->varattno = resNo; column->varattno = resNo;
/*
* 1 subquery means there is one range table entry so with Postgres 16+ we need
* to ensure that column's varnullingrels - the set of join rels that can null
* the var - is empty. Otherwise, when given the query, the Postgres planner
* may attempt to access a non-existent range table and segfault, as in #7787.
*/
#if PG_VERSION_NUM >= PG_VERSION_16
column->varnullingrels = NULL;
#endif
} }
return subqueryTargetEntryList; return subqueryTargetEntryList;

View File

@ -0,0 +1,426 @@
--- Test for verifying that column references (var nodes) in targets that cannot be pushed down
--- do not cause issues for the postgres planner, in particular postgres versions 16+, where the
--- varnullingrels field of a VAR node may contain relids of join relations that can make the var
--- NULL; in a rewritten distributed query without a join such relids do not have a meaning.
-- This test has an alternative goldfile because of the following feature in Postgres 16:
-- https://github.com/postgres/postgres/commit/1349d2790bf48a4de072931c722f39337e72055e
--
SHOW server_version \gset
SELECT substring(:'server_version', '\d+')::int >= 16 AS server_version_ge_16;
server_version_ge_16
---------------------------------------------------------------------
t
(1 row)
CREATE SCHEMA outer_join_columns_testing;
SET search_path to 'outer_join_columns_testing';
SET citus.next_shard_id TO 30070000;
SET citus.shard_replication_factor TO 1;
SET citus.enable_local_execution TO ON;
CREATE TABLE t1 (id INT PRIMARY KEY);
INSERT INTO t1 VALUES (1), (2);
CREATE TABLE t2 (id INT, account_id INT, a2 INT, PRIMARY KEY(id, account_id));
INSERT INTO t2 VALUES (3, 1, 10), (4, 2, 20), (5, 1, NULL);
SELECT create_distributed_table('t1', 'id');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$outer_join_columns_testing.t1$$)
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('t2', 'account_id');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$outer_join_columns_testing.t2$$)
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- Test the issue seen in #7705; a target expression with
-- a window function that cannot be pushed down because the
-- partion by is not on the distribution column also includes
-- a column from the inner side of a left outer join, which
-- produces a non-empty varnullingrels set in PG 16 (and higher)
SELECT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id)
FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id;
id | max
---------------------------------------------------------------------
1 | 10
2 | 20
1 |
(3 rows)
EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
SELECT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id)
FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id;
QUERY PLAN
---------------------------------------------------------------------
WindowAgg
Output: remote_scan.id, max(remote_scan.max) OVER (?), remote_scan.worker_column_3
-> Sort
Output: remote_scan.worker_column_3, remote_scan.id, remote_scan.max
Sort Key: remote_scan.worker_column_3
-> Custom Scan (Citus Adaptive)
Output: remote_scan.worker_column_3, remote_scan.id, remote_scan.max
Task Count: 4
Tasks Shown: One of 4
-> Task
Query: SELECT worker_column_1 AS id, worker_column_2 AS max, worker_column_3 FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2, t2.id AS worker_column_3 FROM (outer_join_columns_testing.t1_30070000 t1 LEFT JOIN outer_join_columns_testing.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery
Node: host=localhost port=xxxxx dbname=regression
-> Hash Right Join
Output: t1.id, t2.a2, t2.id
Inner Unique: true
Hash Cond: (t2.account_id = t1.id)
-> Seq Scan on outer_join_columns_testing.t2_30070004 t2
Output: t2.id, t2.account_id, t2.a2
-> Hash
Output: t1.id
-> Seq Scan on outer_join_columns_testing.t1_30070000 t1
Output: t1.id
(22 rows)
SELECT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id)
FROM t2 RIGHT OUTER JOIN t1 ON t1.id = t2.account_id;
id | max
---------------------------------------------------------------------
1 | 10
2 | 20
1 |
(3 rows)
EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
SELECT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id)
FROM t2 RIGHT OUTER JOIN t1 ON t1.id = t2.account_id;
QUERY PLAN
---------------------------------------------------------------------
WindowAgg
Output: remote_scan.id, max(remote_scan.max) OVER (?), remote_scan.worker_column_3
-> Sort
Output: remote_scan.worker_column_3, remote_scan.id, remote_scan.max
Sort Key: remote_scan.worker_column_3
-> Custom Scan (Citus Adaptive)
Output: remote_scan.worker_column_3, remote_scan.id, remote_scan.max
Task Count: 4
Tasks Shown: One of 4
-> Task
Query: SELECT worker_column_1 AS id, worker_column_2 AS max, worker_column_3 FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2, t2.id AS worker_column_3 FROM (outer_join_columns_testing.t2_30070004 t2 RIGHT JOIN outer_join_columns_testing.t1_30070000 t1 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery
Node: host=localhost port=xxxxx dbname=regression
-> Hash Right Join
Output: t1.id, t2.a2, t2.id
Inner Unique: true
Hash Cond: (t2.account_id = t1.id)
-> Seq Scan on outer_join_columns_testing.t2_30070004 t2
Output: t2.id, t2.account_id, t2.a2
-> Hash
Output: t1.id
-> Seq Scan on outer_join_columns_testing.t1_30070000 t1
Output: t1.id
(22 rows)
SELECT DISTINCT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id)
FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id;
id | max
---------------------------------------------------------------------
1 |
1 | 10
2 | 20
(3 rows)
EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
SELECT DISTINCT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id)
FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id;
QUERY PLAN
---------------------------------------------------------------------
HashAggregate
Output: remote_scan.id, (max(remote_scan.max) OVER (?)), remote_scan.worker_column_3
Group Key: remote_scan.id, max(remote_scan.max) OVER (?)
-> WindowAgg
Output: remote_scan.id, max(remote_scan.max) OVER (?), remote_scan.worker_column_3
-> Sort
Output: remote_scan.worker_column_3, remote_scan.id, remote_scan.max
Sort Key: remote_scan.worker_column_3
-> Custom Scan (Citus Adaptive)
Output: remote_scan.worker_column_3, remote_scan.id, remote_scan.max
Task Count: 4
Tasks Shown: One of 4
-> Task
Query: SELECT worker_column_1 AS id, worker_column_2 AS max, worker_column_3 FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2, t2.id AS worker_column_3 FROM (outer_join_columns_testing.t1_30070000 t1 LEFT JOIN outer_join_columns_testing.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery
Node: host=localhost port=xxxxx dbname=regression
-> Hash Right Join
Output: t1.id, t2.a2, t2.id
Inner Unique: true
Hash Cond: (t2.account_id = t1.id)
-> Seq Scan on outer_join_columns_testing.t2_30070004 t2
Output: t2.id, t2.account_id, t2.a2
-> Hash
Output: t1.id
-> Seq Scan on outer_join_columns_testing.t1_30070000 t1
Output: t1.id
(25 rows)
CREATE SEQUENCE test_seq START 101;
CREATE OR REPLACE FUNCTION TEST_F(int) returns INT language sql stable as $$ select $1 + 42; $$ ;
-- Issue #7705 also occurs if a target expression includes a column
-- of a distributed table that is on the inner side of a left outer
-- join and a call to nextval(), because nextval() cannot be pushed
-- down, and must be run on the coordinator
SELECT t1.id, TEST_F(t2.a2 + nextval('test_seq') :: int)
FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
ORDER BY t1.id;
id | test_f
---------------------------------------------------------------------
1 | 153
1 |
2 | 165
(3 rows)
EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
SELECT t1.id, TEST_F(t2.a2 + nextval('test_seq') :: int)
FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
ORDER BY t1.id;
QUERY PLAN
---------------------------------------------------------------------
Result
Output: remote_scan.id, ((remote_scan.test_f + (nextval('test_seq'::regclass))::integer) + 42)
-> Sort
Output: remote_scan.id, remote_scan.test_f
Sort Key: remote_scan.id
-> Custom Scan (Citus Adaptive)
Output: remote_scan.id, remote_scan.test_f
Task Count: 4
Tasks Shown: One of 4
-> Task
Query: SELECT worker_column_1 AS id, worker_column_2 AS test_f FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2 FROM (outer_join_columns_testing.t1_30070000 t1 LEFT JOIN outer_join_columns_testing.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery
Node: host=localhost port=xxxxx dbname=regression
-> Hash Right Join
Output: t1.id, t2.a2
Inner Unique: true
Hash Cond: (t2.account_id = t1.id)
-> Seq Scan on outer_join_columns_testing.t2_30070004 t2
Output: t2.id, t2.account_id, t2.a2
-> Hash
Output: t1.id
-> Seq Scan on outer_join_columns_testing.t1_30070000 t1
Output: t1.id
(22 rows)
SELECT t1.id, CASE nextval('test_seq') % 2 = 0 WHEN true THEN t2.a2 ELSE 1 END
FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
ORDER BY t1.id;
id | case
---------------------------------------------------------------------
1 | 10
1 | 1
2 | 20
(3 rows)
EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
SELECT t1.id, CASE nextval('test_seq') %2 = 0 WHEN true THEN t2.a2 ELSE 1 END
FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
ORDER BY t1.id;
QUERY PLAN
---------------------------------------------------------------------
Result
Output: remote_scan.id, CASE ((nextval('test_seq'::regclass) % '2'::bigint) = 0) WHEN CASE_TEST_EXPR THEN remote_scan."case" ELSE 1 END
-> Sort
Output: remote_scan.id, remote_scan."case"
Sort Key: remote_scan.id
-> Custom Scan (Citus Adaptive)
Output: remote_scan.id, remote_scan."case"
Task Count: 4
Tasks Shown: One of 4
-> Task
Query: SELECT worker_column_1 AS id, worker_column_2 AS "case" FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2 FROM (outer_join_columns_testing.t1_30070000 t1 LEFT JOIN outer_join_columns_testing.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery
Node: host=localhost port=xxxxx dbname=regression
-> Hash Right Join
Output: t1.id, t2.a2
Inner Unique: true
Hash Cond: (t2.account_id = t1.id)
-> Seq Scan on outer_join_columns_testing.t2_30070004 t2
Output: t2.id, t2.account_id, t2.a2
-> Hash
Output: t1.id
-> Seq Scan on outer_join_columns_testing.t1_30070000 t1
Output: t1.id
(22 rows)
-- Issue #7787: count distinct of a column from the inner side of a
-- left outer join will have a non-empty varnullingrels in the query
-- tree returned by Postgres 16+, so ensure this is not reflected in
-- the worker subquery constructed by Citus; it has just one relation,
-- for the pushed down subquery.
SELECT COUNT(DISTINCT a2)
FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id;
count
---------------------------------------------------------------------
2
(1 row)
EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
SELECT COUNT(DISTINCT a2)
FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id;
QUERY PLAN
---------------------------------------------------------------------
Aggregate
Output: count(DISTINCT remote_scan.count)
-> Sort
Output: remote_scan.count
Sort Key: remote_scan.count
-> Custom Scan (Citus Adaptive)
Output: remote_scan.count
Task Count: 4
Tasks Shown: One of 4
-> Task
Query: SELECT worker_column_1 AS count FROM (SELECT t2.a2 AS worker_column_1 FROM (outer_join_columns_testing.t1_30070000 t1 LEFT JOIN outer_join_columns_testing.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery GROUP BY worker_column_1
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Output: t2.a2
Group Key: t2.a2
-> Hash Right Join
Output: t2.a2
Inner Unique: true
Hash Cond: (t2.account_id = t1.id)
-> Seq Scan on outer_join_columns_testing.t2_30070004 t2
Output: t2.id, t2.account_id, t2.a2
-> Hash
Output: t1.id
-> Seq Scan on outer_join_columns_testing.t1_30070000 t1
Output: t1.id
(25 rows)
-- Issue #7787 also occurs with a HAVING clause
SELECT 1
FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
HAVING COUNT(DISTINCT a2) > 1;
?column?
---------------------------------------------------------------------
1
(1 row)
EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
SELECT 1
FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
HAVING COUNT(DISTINCT a2) > 1;
QUERY PLAN
---------------------------------------------------------------------
Aggregate
Output: remote_scan."?column?"
Filter: (count(DISTINCT remote_scan.worker_column_2) > 1)
-> Sort
Output: remote_scan."?column?", remote_scan.worker_column_2
Sort Key: remote_scan.worker_column_2
-> Custom Scan (Citus Adaptive)
Output: remote_scan."?column?", remote_scan.worker_column_2
Task Count: 4
Tasks Shown: One of 4
-> Task
Query: SELECT 1, worker_column_1 AS worker_column_2 FROM (SELECT t2.a2 AS worker_column_1 FROM (outer_join_columns_testing.t1_30070000 t1 LEFT JOIN outer_join_columns_testing.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery GROUP BY worker_column_1
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Output: 1, t2.a2
Group Key: t2.a2
-> Hash Right Join
Output: t2.a2
Inner Unique: true
Hash Cond: (t2.account_id = t1.id)
-> Seq Scan on outer_join_columns_testing.t2_30070004 t2
Output: t2.id, t2.account_id, t2.a2
-> Hash
Output: t1.id
-> Seq Scan on outer_join_columns_testing.t1_30070000 t1
Output: t1.id
(26 rows)
-- Check right outer join
SELECT COUNT(DISTINCT a2)
FROM t2 RIGHT OUTER JOIN t1 ON t2.account_id = t1.id;
count
---------------------------------------------------------------------
2
(1 row)
EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
SELECT COUNT(DISTINCT a2)
FROM t2 RIGHT OUTER JOIN t1 ON t2.account_id = t1.id;
QUERY PLAN
---------------------------------------------------------------------
Aggregate
Output: count(DISTINCT remote_scan.count)
-> Sort
Output: remote_scan.count
Sort Key: remote_scan.count
-> Custom Scan (Citus Adaptive)
Output: remote_scan.count
Task Count: 4
Tasks Shown: One of 4
-> Task
Query: SELECT worker_column_1 AS count FROM (SELECT t2.a2 AS worker_column_1 FROM (outer_join_columns_testing.t2_30070004 t2 RIGHT JOIN outer_join_columns_testing.t1_30070000 t1 ON ((t2.account_id OPERATOR(pg_catalog.=) t1.id)))) worker_subquery GROUP BY worker_column_1
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Output: t2.a2
Group Key: t2.a2
-> Hash Right Join
Output: t2.a2
Inner Unique: true
Hash Cond: (t2.account_id = t1.id)
-> Seq Scan on outer_join_columns_testing.t2_30070004 t2
Output: t2.id, t2.account_id, t2.a2
-> Hash
Output: t1.id
-> Seq Scan on outer_join_columns_testing.t1_30070000 t1
Output: t1.id
(25 rows)
-- Check both count distinct and having clause
SELECT COUNT(DISTINCT a2)
FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
HAVING COUNT(DISTINCT t2.id) > 1;
count
---------------------------------------------------------------------
2
(1 row)
EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
SELECT COUNT(DISTINCT a2)
FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
HAVING COUNT(DISTINCT t2.id) > 1;
QUERY PLAN
---------------------------------------------------------------------
Aggregate
Output: count(DISTINCT remote_scan.count)
Filter: (count(DISTINCT remote_scan.worker_column_2) > 1)
-> Sort
Output: remote_scan.count, remote_scan.worker_column_2
Sort Key: remote_scan.count
-> Custom Scan (Citus Adaptive)
Output: remote_scan.count, remote_scan.worker_column_2
Task Count: 4
Tasks Shown: One of 4
-> Task
Query: SELECT worker_column_1 AS count, worker_column_2 FROM (SELECT t2.a2 AS worker_column_1, t2.id AS worker_column_2 FROM (outer_join_columns_testing.t1_30070000 t1 LEFT JOIN outer_join_columns_testing.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery GROUP BY worker_column_1, worker_column_2
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Output: t2.a2, t2.id
Group Key: t2.a2, t2.id
-> Hash Right Join
Output: t2.a2, t2.id
Inner Unique: true
Hash Cond: (t2.account_id = t1.id)
-> Seq Scan on outer_join_columns_testing.t2_30070004 t2
Output: t2.id, t2.account_id, t2.a2
-> Hash
Output: t1.id
-> Seq Scan on outer_join_columns_testing.t1_30070000 t1
Output: t1.id
(26 rows)
--- cleanup
\set VERBOSITY TERSE
DROP SCHEMA outer_join_columns_testing CASCADE;
NOTICE: drop cascades to 4 other objects
RESET all;

View File

@ -2,10 +2,18 @@
--- do not cause issues for the postgres planner, in particular postgres versions 16+, where the --- do not cause issues for the postgres planner, in particular postgres versions 16+, where the
--- varnullingrels field of a VAR node may contain relids of join relations that can make the var --- varnullingrels field of a VAR node may contain relids of join relations that can make the var
--- NULL; in a rewritten distributed query without a join such relids do not have a meaning. --- NULL; in a rewritten distributed query without a join such relids do not have a meaning.
--- Issue #7705: [SEGFAULT] Querying distributed tables with window partition causes segmentation fault -- This test has an alternative goldfile because of the following feature in Postgres 16:
--- https://github.com/citusdata/citus/issues/7705 -- https://github.com/postgres/postgres/commit/1349d2790bf48a4de072931c722f39337e72055e
CREATE SCHEMA issue_7705; --
SET search_path to 'issue_7705'; SHOW server_version \gset
SELECT substring(:'server_version', '\d+')::int >= 16 AS server_version_ge_16;
server_version_ge_16
---------------------------------------------------------------------
f
(1 row)
CREATE SCHEMA outer_join_columns_testing;
SET search_path to 'outer_join_columns_testing';
SET citus.next_shard_id TO 30070000; SET citus.next_shard_id TO 30070000;
SET citus.shard_replication_factor TO 1; SET citus.shard_replication_factor TO 1;
SET citus.enable_local_execution TO ON; SET citus.enable_local_execution TO ON;
@ -17,7 +25,7 @@ SELECT create_distributed_table('t1', 'id');
NOTICE: Copying data from local table... NOTICE: Copying data from local table...
NOTICE: copying the data has completed NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk. DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$issue_7705.t1$$) HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$outer_join_columns_testing.t1$$)
create_distributed_table create_distributed_table
--------------------------------------------------------------------- ---------------------------------------------------------------------
@ -27,7 +35,7 @@ SELECT create_distributed_table('t2', 'account_id');
NOTICE: Copying data from local table... NOTICE: Copying data from local table...
NOTICE: copying the data has completed NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk. DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$issue_7705.t2$$) HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$outer_join_columns_testing.t2$$)
create_distributed_table create_distributed_table
--------------------------------------------------------------------- ---------------------------------------------------------------------
@ -50,7 +58,7 @@ FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id;
EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF) EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
SELECT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id) SELECT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id)
FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id; FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id;
QUERY PLAN QUERY PLAN
--------------------------------------------------------------------- ---------------------------------------------------------------------
WindowAgg WindowAgg
Output: remote_scan.id, max(remote_scan.max) OVER (?), remote_scan.worker_column_3 Output: remote_scan.id, max(remote_scan.max) OVER (?), remote_scan.worker_column_3
@ -62,17 +70,17 @@ FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id;
Task Count: 4 Task Count: 4
Tasks Shown: One of 4 Tasks Shown: One of 4
-> Task -> Task
Query: SELECT worker_column_1 AS id, worker_column_2 AS max, worker_column_3 FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2, t2.id AS worker_column_3 FROM (issue_7705.t1_30070000 t1 LEFT JOIN issue_7705.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery Query: SELECT worker_column_1 AS id, worker_column_2 AS max, worker_column_3 FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2, t2.id AS worker_column_3 FROM (outer_join_columns_testing.t1_30070000 t1 LEFT JOIN outer_join_columns_testing.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery
Node: host=localhost port=xxxxx dbname=regression Node: host=localhost port=xxxxx dbname=regression
-> Hash Right Join -> Hash Right Join
Output: t1.id, t2.a2, t2.id Output: t1.id, t2.a2, t2.id
Inner Unique: true Inner Unique: true
Hash Cond: (t2.account_id = t1.id) Hash Cond: (t2.account_id = t1.id)
-> Seq Scan on issue_7705.t2_30070004 t2 -> Seq Scan on outer_join_columns_testing.t2_30070004 t2
Output: t2.id, t2.account_id, t2.a2 Output: t2.id, t2.account_id, t2.a2
-> Hash -> Hash
Output: t1.id Output: t1.id
-> Seq Scan on issue_7705.t1_30070000 t1 -> Seq Scan on outer_join_columns_testing.t1_30070000 t1
Output: t1.id Output: t1.id
(22 rows) (22 rows)
@ -88,7 +96,7 @@ FROM t2 RIGHT OUTER JOIN t1 ON t1.id = t2.account_id;
EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF) EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
SELECT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id) SELECT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id)
FROM t2 RIGHT OUTER JOIN t1 ON t1.id = t2.account_id; FROM t2 RIGHT OUTER JOIN t1 ON t1.id = t2.account_id;
QUERY PLAN QUERY PLAN
--------------------------------------------------------------------- ---------------------------------------------------------------------
WindowAgg WindowAgg
Output: remote_scan.id, max(remote_scan.max) OVER (?), remote_scan.worker_column_3 Output: remote_scan.id, max(remote_scan.max) OVER (?), remote_scan.worker_column_3
@ -100,17 +108,17 @@ FROM t2 RIGHT OUTER JOIN t1 ON t1.id = t2.account_id;
Task Count: 4 Task Count: 4
Tasks Shown: One of 4 Tasks Shown: One of 4
-> Task -> Task
Query: SELECT worker_column_1 AS id, worker_column_2 AS max, worker_column_3 FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2, t2.id AS worker_column_3 FROM (issue_7705.t2_30070004 t2 RIGHT JOIN issue_7705.t1_30070000 t1 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery Query: SELECT worker_column_1 AS id, worker_column_2 AS max, worker_column_3 FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2, t2.id AS worker_column_3 FROM (outer_join_columns_testing.t2_30070004 t2 RIGHT JOIN outer_join_columns_testing.t1_30070000 t1 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery
Node: host=localhost port=xxxxx dbname=regression Node: host=localhost port=xxxxx dbname=regression
-> Hash Right Join -> Hash Right Join
Output: t1.id, t2.a2, t2.id Output: t1.id, t2.a2, t2.id
Inner Unique: true Inner Unique: true
Hash Cond: (t2.account_id = t1.id) Hash Cond: (t2.account_id = t1.id)
-> Seq Scan on issue_7705.t2_30070004 t2 -> Seq Scan on outer_join_columns_testing.t2_30070004 t2
Output: t2.id, t2.account_id, t2.a2 Output: t2.id, t2.account_id, t2.a2
-> Hash -> Hash
Output: t1.id Output: t1.id
-> Seq Scan on issue_7705.t1_30070000 t1 -> Seq Scan on outer_join_columns_testing.t1_30070000 t1
Output: t1.id Output: t1.id
(22 rows) (22 rows)
@ -126,7 +134,7 @@ FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id;
EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF) EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
SELECT DISTINCT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id) SELECT DISTINCT t1.id, MAX(t2.a2) OVER (PARTITION BY t2.id)
FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id; FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id;
QUERY PLAN QUERY PLAN
--------------------------------------------------------------------- ---------------------------------------------------------------------
HashAggregate HashAggregate
Output: remote_scan.id, (max(remote_scan.max) OVER (?)), remote_scan.worker_column_3 Output: remote_scan.id, (max(remote_scan.max) OVER (?)), remote_scan.worker_column_3
@ -141,17 +149,17 @@ FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id;
Task Count: 4 Task Count: 4
Tasks Shown: One of 4 Tasks Shown: One of 4
-> Task -> Task
Query: SELECT worker_column_1 AS id, worker_column_2 AS max, worker_column_3 FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2, t2.id AS worker_column_3 FROM (issue_7705.t1_30070000 t1 LEFT JOIN issue_7705.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery Query: SELECT worker_column_1 AS id, worker_column_2 AS max, worker_column_3 FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2, t2.id AS worker_column_3 FROM (outer_join_columns_testing.t1_30070000 t1 LEFT JOIN outer_join_columns_testing.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery
Node: host=localhost port=xxxxx dbname=regression Node: host=localhost port=xxxxx dbname=regression
-> Hash Right Join -> Hash Right Join
Output: t1.id, t2.a2, t2.id Output: t1.id, t2.a2, t2.id
Inner Unique: true Inner Unique: true
Hash Cond: (t2.account_id = t1.id) Hash Cond: (t2.account_id = t1.id)
-> Seq Scan on issue_7705.t2_30070004 t2 -> Seq Scan on outer_join_columns_testing.t2_30070004 t2
Output: t2.id, t2.account_id, t2.a2 Output: t2.id, t2.account_id, t2.a2
-> Hash -> Hash
Output: t1.id Output: t1.id
-> Seq Scan on issue_7705.t1_30070000 t1 -> Seq Scan on outer_join_columns_testing.t1_30070000 t1
Output: t1.id Output: t1.id
(25 rows) (25 rows)
@ -175,7 +183,7 @@ EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
SELECT t1.id, TEST_F(t2.a2 + nextval('test_seq') :: int) SELECT t1.id, TEST_F(t2.a2 + nextval('test_seq') :: int)
FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
ORDER BY t1.id; ORDER BY t1.id;
QUERY PLAN QUERY PLAN
--------------------------------------------------------------------- ---------------------------------------------------------------------
Result Result
Output: remote_scan.id, ((remote_scan.test_f + (nextval('test_seq'::regclass))::integer) + 42) Output: remote_scan.id, ((remote_scan.test_f + (nextval('test_seq'::regclass))::integer) + 42)
@ -187,17 +195,17 @@ ORDER BY t1.id;
Task Count: 4 Task Count: 4
Tasks Shown: One of 4 Tasks Shown: One of 4
-> Task -> Task
Query: SELECT worker_column_1 AS id, worker_column_2 AS test_f FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2 FROM (issue_7705.t1_30070000 t1 LEFT JOIN issue_7705.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery Query: SELECT worker_column_1 AS id, worker_column_2 AS test_f FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2 FROM (outer_join_columns_testing.t1_30070000 t1 LEFT JOIN outer_join_columns_testing.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery
Node: host=localhost port=xxxxx dbname=regression Node: host=localhost port=xxxxx dbname=regression
-> Hash Right Join -> Hash Right Join
Output: t1.id, t2.a2 Output: t1.id, t2.a2
Inner Unique: true Inner Unique: true
Hash Cond: (t2.account_id = t1.id) Hash Cond: (t2.account_id = t1.id)
-> Seq Scan on issue_7705.t2_30070004 t2 -> Seq Scan on outer_join_columns_testing.t2_30070004 t2
Output: t2.id, t2.account_id, t2.a2 Output: t2.id, t2.account_id, t2.a2
-> Hash -> Hash
Output: t1.id Output: t1.id
-> Seq Scan on issue_7705.t1_30070000 t1 -> Seq Scan on outer_join_columns_testing.t1_30070000 t1
Output: t1.id Output: t1.id
(22 rows) (22 rows)
@ -215,7 +223,7 @@ EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
SELECT t1.id, CASE nextval('test_seq') %2 = 0 WHEN true THEN t2.a2 ELSE 1 END SELECT t1.id, CASE nextval('test_seq') %2 = 0 WHEN true THEN t2.a2 ELSE 1 END
FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
ORDER BY t1.id; ORDER BY t1.id;
QUERY PLAN QUERY PLAN
--------------------------------------------------------------------- ---------------------------------------------------------------------
Result Result
Output: remote_scan.id, CASE ((nextval('test_seq'::regclass) % '2'::bigint) = 0) WHEN CASE_TEST_EXPR THEN remote_scan."case" ELSE 1 END Output: remote_scan.id, CASE ((nextval('test_seq'::regclass) % '2'::bigint) = 0) WHEN CASE_TEST_EXPR THEN remote_scan."case" ELSE 1 END
@ -227,22 +235,180 @@ ORDER BY t1.id;
Task Count: 4 Task Count: 4
Tasks Shown: One of 4 Tasks Shown: One of 4
-> Task -> Task
Query: SELECT worker_column_1 AS id, worker_column_2 AS "case" FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2 FROM (issue_7705.t1_30070000 t1 LEFT JOIN issue_7705.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery Query: SELECT worker_column_1 AS id, worker_column_2 AS "case" FROM (SELECT t1.id AS worker_column_1, t2.a2 AS worker_column_2 FROM (outer_join_columns_testing.t1_30070000 t1 LEFT JOIN outer_join_columns_testing.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery
Node: host=localhost port=xxxxx dbname=regression Node: host=localhost port=xxxxx dbname=regression
-> Hash Right Join -> Hash Right Join
Output: t1.id, t2.a2 Output: t1.id, t2.a2
Inner Unique: true Inner Unique: true
Hash Cond: (t2.account_id = t1.id) Hash Cond: (t2.account_id = t1.id)
-> Seq Scan on issue_7705.t2_30070004 t2 -> Seq Scan on outer_join_columns_testing.t2_30070004 t2
Output: t2.id, t2.account_id, t2.a2 Output: t2.id, t2.account_id, t2.a2
-> Hash -> Hash
Output: t1.id Output: t1.id
-> Seq Scan on issue_7705.t1_30070000 t1 -> Seq Scan on outer_join_columns_testing.t1_30070000 t1
Output: t1.id Output: t1.id
(22 rows) (22 rows)
-- Issue #7787: count distinct of a column from the inner side of a
-- left outer join will have a non-empty varnullingrels in the query
-- tree returned by Postgres 16+, so ensure this is not reflected in
-- the worker subquery constructed by Citus; it has just one relation,
-- for the pushed down subquery.
SELECT COUNT(DISTINCT a2)
FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id;
count
---------------------------------------------------------------------
2
(1 row)
EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
SELECT COUNT(DISTINCT a2)
FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id;
QUERY PLAN
---------------------------------------------------------------------
Aggregate
Output: count(DISTINCT remote_scan.count)
-> Custom Scan (Citus Adaptive)
Output: remote_scan.count
Task Count: 4
Tasks Shown: One of 4
-> Task
Query: SELECT worker_column_1 AS count FROM (SELECT t2.a2 AS worker_column_1 FROM (outer_join_columns_testing.t1_30070000 t1 LEFT JOIN outer_join_columns_testing.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery GROUP BY worker_column_1
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Output: t2.a2
Group Key: t2.a2
-> Hash Right Join
Output: t2.a2
Inner Unique: true
Hash Cond: (t2.account_id = t1.id)
-> Seq Scan on outer_join_columns_testing.t2_30070004 t2
Output: t2.id, t2.account_id, t2.a2
-> Hash
Output: t1.id
-> Seq Scan on outer_join_columns_testing.t1_30070000 t1
Output: t1.id
(22 rows)
-- Issue #7787 also occurs with a HAVING clause
SELECT 1
FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
HAVING COUNT(DISTINCT a2) > 1;
?column?
---------------------------------------------------------------------
1
(1 row)
EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
SELECT 1
FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
HAVING COUNT(DISTINCT a2) > 1;
QUERY PLAN
---------------------------------------------------------------------
Aggregate
Output: remote_scan."?column?"
Filter: (count(DISTINCT remote_scan.worker_column_2) > 1)
-> Custom Scan (Citus Adaptive)
Output: remote_scan."?column?", remote_scan.worker_column_2
Task Count: 4
Tasks Shown: One of 4
-> Task
Query: SELECT 1, worker_column_1 AS worker_column_2 FROM (SELECT t2.a2 AS worker_column_1 FROM (outer_join_columns_testing.t1_30070000 t1 LEFT JOIN outer_join_columns_testing.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery GROUP BY worker_column_1
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Output: 1, t2.a2
Group Key: t2.a2
-> Hash Right Join
Output: t2.a2
Inner Unique: true
Hash Cond: (t2.account_id = t1.id)
-> Seq Scan on outer_join_columns_testing.t2_30070004 t2
Output: t2.id, t2.account_id, t2.a2
-> Hash
Output: t1.id
-> Seq Scan on outer_join_columns_testing.t1_30070000 t1
Output: t1.id
(23 rows)
-- Check right outer join
SELECT COUNT(DISTINCT a2)
FROM t2 RIGHT OUTER JOIN t1 ON t2.account_id = t1.id;
count
---------------------------------------------------------------------
2
(1 row)
EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
SELECT COUNT(DISTINCT a2)
FROM t2 RIGHT OUTER JOIN t1 ON t2.account_id = t1.id;
QUERY PLAN
---------------------------------------------------------------------
Aggregate
Output: count(DISTINCT remote_scan.count)
-> Custom Scan (Citus Adaptive)
Output: remote_scan.count
Task Count: 4
Tasks Shown: One of 4
-> Task
Query: SELECT worker_column_1 AS count FROM (SELECT t2.a2 AS worker_column_1 FROM (outer_join_columns_testing.t2_30070004 t2 RIGHT JOIN outer_join_columns_testing.t1_30070000 t1 ON ((t2.account_id OPERATOR(pg_catalog.=) t1.id)))) worker_subquery GROUP BY worker_column_1
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Output: t2.a2
Group Key: t2.a2
-> Hash Right Join
Output: t2.a2
Inner Unique: true
Hash Cond: (t2.account_id = t1.id)
-> Seq Scan on outer_join_columns_testing.t2_30070004 t2
Output: t2.id, t2.account_id, t2.a2
-> Hash
Output: t1.id
-> Seq Scan on outer_join_columns_testing.t1_30070000 t1
Output: t1.id
(22 rows)
-- Check both count distinct and having clause
SELECT COUNT(DISTINCT a2)
FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
HAVING COUNT(DISTINCT t2.id) > 1;
count
---------------------------------------------------------------------
2
(1 row)
EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
SELECT COUNT(DISTINCT a2)
FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
HAVING COUNT(DISTINCT t2.id) > 1;
QUERY PLAN
---------------------------------------------------------------------
Aggregate
Output: count(DISTINCT remote_scan.count)
Filter: (count(DISTINCT remote_scan.worker_column_2) > 1)
-> Custom Scan (Citus Adaptive)
Output: remote_scan.count, remote_scan.worker_column_2
Task Count: 4
Tasks Shown: One of 4
-> Task
Query: SELECT worker_column_1 AS count, worker_column_2 FROM (SELECT t2.a2 AS worker_column_1, t2.id AS worker_column_2 FROM (outer_join_columns_testing.t1_30070000 t1 LEFT JOIN outer_join_columns_testing.t2_30070004 t2 ON ((t1.id OPERATOR(pg_catalog.=) t2.account_id)))) worker_subquery GROUP BY worker_column_1, worker_column_2
Node: host=localhost port=xxxxx dbname=regression
-> HashAggregate
Output: t2.a2, t2.id
Group Key: t2.a2, t2.id
-> Hash Right Join
Output: t2.a2, t2.id
Inner Unique: true
Hash Cond: (t2.account_id = t1.id)
-> Seq Scan on outer_join_columns_testing.t2_30070004 t2
Output: t2.id, t2.account_id, t2.a2
-> Hash
Output: t1.id
-> Seq Scan on outer_join_columns_testing.t1_30070000 t1
Output: t1.id
(23 rows)
--- cleanup --- cleanup
\set VERBOSITY TERSE \set VERBOSITY TERSE
DROP SCHEMA issue_7705 CASCADE; DROP SCHEMA outer_join_columns_testing CASCADE;
NOTICE: drop cascades to 4 other objects NOTICE: drop cascades to 4 other objects
RESET all; RESET all;

View File

@ -85,7 +85,7 @@ test: forcedelegation_functions system_queries
test: join_pushdown test: join_pushdown
test: multi_subquery_union multi_subquery_in_where_clause multi_subquery_misc statement_cancel_error_message test: multi_subquery_union multi_subquery_in_where_clause multi_subquery_misc statement_cancel_error_message
test: multi_agg_distinct test: multi_agg_distinct
test: multi_limit_clause_approximate multi_outer_join_reference multi_single_relation_subquery multi_prepare_plsql set_role_in_transaction test: multi_limit_clause_approximate multi_outer_join_reference multi_outer_join_columns multi_single_relation_subquery multi_prepare_plsql set_role_in_transaction
test: multi_reference_table multi_select_for_update relation_access_tracking pg13_with_ties test: multi_reference_table multi_select_for_update relation_access_tracking pg13_with_ties
test: custom_aggregate_support aggregate_support tdigest_aggregate_support test: custom_aggregate_support aggregate_support tdigest_aggregate_support
test: multi_average_expression multi_working_columns multi_having_pushdown having_subquery test: multi_average_expression multi_working_columns multi_having_pushdown having_subquery
@ -104,7 +104,7 @@ test: multi_dropped_column_aliases foreign_key_restriction_enforcement
test: binary_protocol test: binary_protocol
test: alter_table_set_access_method test: alter_table_set_access_method
test: alter_distributed_table test: alter_distributed_table
test: issue_5248 issue_5099 issue_5763 issue_6543 issue_6758 issue_7477 issue_7705 test: issue_5248 issue_5099 issue_5763 issue_6543 issue_6758 issue_7477
test: object_propagation_debug test: object_propagation_debug
test: undistribute_table test: undistribute_table
test: run_command_on_all_nodes test: run_command_on_all_nodes

View File

@ -2,11 +2,16 @@
--- do not cause issues for the postgres planner, in particular postgres versions 16+, where the --- do not cause issues for the postgres planner, in particular postgres versions 16+, where the
--- varnullingrels field of a VAR node may contain relids of join relations that can make the var --- varnullingrels field of a VAR node may contain relids of join relations that can make the var
--- NULL; in a rewritten distributed query without a join such relids do not have a meaning. --- NULL; in a rewritten distributed query without a join such relids do not have a meaning.
--- Issue #7705: [SEGFAULT] Querying distributed tables with window partition causes segmentation fault
--- https://github.com/citusdata/citus/issues/7705
CREATE SCHEMA issue_7705; -- This test has an alternative goldfile because of the following feature in Postgres 16:
SET search_path to 'issue_7705'; -- https://github.com/postgres/postgres/commit/1349d2790bf48a4de072931c722f39337e72055e
--
SHOW server_version \gset
SELECT substring(:'server_version', '\d+')::int >= 16 AS server_version_ge_16;
CREATE SCHEMA outer_join_columns_testing;
SET search_path to 'outer_join_columns_testing';
SET citus.next_shard_id TO 30070000; SET citus.next_shard_id TO 30070000;
SET citus.shard_replication_factor TO 1; SET citus.shard_replication_factor TO 1;
SET citus.enable_local_execution TO ON; SET citus.enable_local_execution TO ON;
@ -66,7 +71,43 @@ SELECT t1.id, CASE nextval('test_seq') %2 = 0 WHEN true THEN t2.a2 ELSE 1 END
FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
ORDER BY t1.id; ORDER BY t1.id;
-- Issue #7787: count distinct of a column from the inner side of a
-- left outer join will have a non-empty varnullingrels in the query
-- tree returned by Postgres 16+, so ensure this is not reflected in
-- the worker subquery constructed by Citus; it has just one relation,
-- for the pushed down subquery.
SELECT COUNT(DISTINCT a2)
FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id;
EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
SELECT COUNT(DISTINCT a2)
FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id;
-- Issue #7787 also occurs with a HAVING clause
SELECT 1
FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
HAVING COUNT(DISTINCT a2) > 1;
EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
SELECT 1
FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
HAVING COUNT(DISTINCT a2) > 1;
-- Check right outer join
SELECT COUNT(DISTINCT a2)
FROM t2 RIGHT OUTER JOIN t1 ON t2.account_id = t1.id;
EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
SELECT COUNT(DISTINCT a2)
FROM t2 RIGHT OUTER JOIN t1 ON t2.account_id = t1.id;
-- Check both count distinct and having clause
SELECT COUNT(DISTINCT a2)
FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
HAVING COUNT(DISTINCT t2.id) > 1;
EXPLAIN (VERBOSE, COSTS OFF, TIMING OFF)
SELECT COUNT(DISTINCT a2)
FROM t1 LEFT OUTER JOIN t2 ON t1.id = t2.account_id
HAVING COUNT(DISTINCT t2.id) > 1;
--- cleanup --- cleanup
\set VERBOSITY TERSE \set VERBOSITY TERSE
DROP SCHEMA issue_7705 CASCADE; DROP SCHEMA outer_join_columns_testing CASCADE;
RESET all; RESET all;