Add more tests

onur-leftjoin_push-improvements
ebruaydingol 2025-08-14 11:01:50 +00:00 committed by eaydingol
parent a964d45323
commit 7a5f5c1c08
5 changed files with 202 additions and 3 deletions

View File

@ -336,13 +336,46 @@ DEBUG: Router planner cannot handle multi-shard select queries
4 | 130
(2 rows)
SET client_min_messages TO DEBUG3;
CREATE TABLE users_ref(user_id int, dept int);
SELECT create_reference_table('users_ref');
create_reference_table
---------------------------------------------------------------------
(1 row)
INSERT INTO users_ref VALUES (1, 3), (2, 4), (3, 3), (4, 4);
DEBUG: Creating router plan
DEBUG: assigned task to node localhost:xxxxx
-- In PG17, the planner can pull up a correlated ANY subquery to a join, resulting
-- in a different query plan compared to PG16. Specifically, for the following query
-- the rewritten query has a lateral recurring outer join, which requires recursive
-- computation of the inner part. However, this join is not analyzed during the recursive
-- planning step, as it is performed on the original query structure. As a result,
-- the lateral join is not recursively planned, and a lateral join error is raised
-- at a later stage.
SELECT user_id FROM
users RIGHT JOIN users_ref USING (user_id)
WHERE users_ref.dept IN
(
SELECT events.event_type FROM events WHERE events.user_id = users.user_id
) ORDER BY 1 LIMIT 1;
DEBUG: no shard pruning constraints on users found
DEBUG: shard count after pruning for users: 2
DEBUG: no shard pruning constraints on events found
DEBUG: shard count after pruning for events: 2
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: a push down safe right join with recurring left side
ERROR: cannot perform a lateral outer join when a distributed subquery references a reference table
RESET client_min_messages;
RESET search_path;
DROP SCHEMA pg17_corr_subq_folding CASCADE;
NOTICE: drop cascades to 3 other objects
NOTICE: drop cascades to 5 other objects
DETAIL: drop cascades to table pg17_corr_subq_folding.test
drop cascades to table pg17_corr_subq_folding.users
drop cascades to table pg17_corr_subq_folding.events
drop cascades to table pg17_corr_subq_folding.users_ref
drop cascades to table pg17_corr_subq_folding.users_ref_20240023
-- Queries with outer joins with pseudoconstant quals work only in PG17
-- Relevant PG17 commit:
-- https://github.com/postgres/postgres/commit/9e9931d2b

View File

@ -280,13 +280,57 @@ DEBUG: Router planner cannot handle multi-shard select queries
4 | 130
(2 rows)
SET client_min_messages TO DEBUG3;
CREATE TABLE users_ref(user_id int, dept int);
SELECT create_reference_table('users_ref');
create_reference_table
---------------------------------------------------------------------
(1 row)
INSERT INTO users_ref VALUES (1, 3), (2, 4), (3, 3), (4, 4);
DEBUG: Creating router plan
DEBUG: assigned task to node localhost:xxxxx
-- In PG17, the planner can pull up a correlated ANY subquery to a join, resulting
-- in a different query plan compared to PG16. Specifically, for the following query
-- the rewritten query has a lateral recurring outer join, which requires recursive
-- computation of the inner part. However, this join is not analyzed during the recursive
-- planning step, as it is performed on the original query structure. As a result,
-- the lateral join is not recursively planned, and a lateral join error is raised
-- at a later stage.
SELECT user_id FROM
users RIGHT JOIN users_ref USING (user_id)
WHERE users_ref.dept IN
(
SELECT events.event_type FROM events WHERE events.user_id = users.user_id
) ORDER BY 1 LIMIT 1;
DEBUG: no shard pruning constraints on events found
DEBUG: shard count after pruning for events: 2
DEBUG: no shard pruning constraints on users found
DEBUG: shard count after pruning for users: 2
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: a push down safe right join with recurring left side
DEBUG: push down of limit count: 1
DEBUG: no shard pruning constraints on events found
DEBUG: shard count after pruning for events: 2
DEBUG: no shard pruning constraints on users found
DEBUG: shard count after pruning for users: 2
DEBUG: assigned task to node localhost:xxxxx
DEBUG: assigned task to node localhost:xxxxx
user_id
---------------------------------------------------------------------
1
(1 row)
RESET client_min_messages;
RESET search_path;
DROP SCHEMA pg17_corr_subq_folding CASCADE;
NOTICE: drop cascades to 3 other objects
NOTICE: drop cascades to 5 other objects
DETAIL: drop cascades to table pg17_corr_subq_folding.test
drop cascades to table pg17_corr_subq_folding.users
drop cascades to table pg17_corr_subq_folding.events
drop cascades to table pg17_corr_subq_folding.users_ref
drop cascades to table pg17_corr_subq_folding.users_ref_20240023
-- Queries with outer joins with pseudoconstant quals work only in PG17
-- Relevant PG17 commit:
-- https://github.com/postgres/postgres/commit/9e9931d2b

View File

@ -845,4 +845,100 @@ DEBUG: assigned task to node localhost:xxxxx
(1 row)
SET client_min_messages TO ERROR;
-- The following queries trigger recursive computing, recurring outer-join push down
-- methods introduced in#7973 can be enhanced to cover these cases in the future.
CREATE TABLE r1_local AS SELECT * FROM r1;
EXPLAIN (COSTS OFF) SELECT count(*) FROM r1_local LEFT JOIN d1 ON r1_local.a = d1.a;
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Seq Scan on r1_local
-> Distributed Subplan XXX_2
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Seq Scan on d1_1520001 d1
Task Count: 1
Tasks Shown: All
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Aggregate
-> Merge Left Join
Merge Cond: (intermediate_result.a = intermediate_result_1.a)
-> Sort
Sort Key: intermediate_result.a
-> Function Scan on read_intermediate_result intermediate_result
-> Sort
Sort Key: intermediate_result_1.a
-> Function Scan on read_intermediate_result intermediate_result_1
(23 rows)
EXPLAIN (COSTS OFF) SELECT count(*) FROM (SELECT * FROM r1) sq LEFT JOIN d1 ON sq.a = d1.a;
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Seq Scan on d1_1520001 d1
Task Count: 1
Tasks Shown: All
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Aggregate
-> Merge Right Join
Merge Cond: (intermediate_result.a = r1.a)
-> Sort
Sort Key: intermediate_result.a
-> Function Scan on read_intermediate_result intermediate_result
-> Sort
Sort Key: r1.a
-> Seq Scan on r1_1520000 r1
(21 rows)
EXPLAIN (COSTS OFF) SELECT count(*) FROM r1 LEFT JOIN (d1 INNER JOIN d2 on d1.a = d2.a) on r1.a = d2.a;
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Seq Scan on d1_1520001 d1
-> Distributed Subplan XXX_2
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Seq Scan on d2_1520005 d2
Task Count: 1
Tasks Shown: All
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Aggregate
-> Merge Left Join
Merge Cond: (r1.a = intermediate_result_1.a)
-> Sort
Sort Key: r1.a
-> Seq Scan on r1_1520000 r1
-> Materialize
-> Merge Join
Merge Cond: (intermediate_result.a = intermediate_result_1.a)
-> Sort
Sort Key: intermediate_result.a
-> Function Scan on read_intermediate_result intermediate_result
-> Sort
Sort Key: intermediate_result_1.a
-> Function Scan on read_intermediate_result intermediate_result_1
(34 rows)
DROP SCHEMA recurring_join_pushdown CASCADE;

View File

@ -165,8 +165,26 @@ WHERE d1.user_id = users.user_id
AND users.dept IN (3,4)
AND users.user_id = d2.user_id) dt
GROUP BY dept;
RESET client_min_messages;
SET client_min_messages TO DEBUG3;
CREATE TABLE users_ref(user_id int, dept int);
SELECT create_reference_table('users_ref');
INSERT INTO users_ref VALUES (1, 3), (2, 4), (3, 3), (4, 4);
-- In PG17, the planner can pull up a correlated ANY subquery to a join, resulting
-- in a different query plan compared to PG16. Specifically, for the following query
-- the rewritten query has a lateral recurring outer join, which requires recursive
-- computation of the inner part. However, this join is not analyzed during the recursive
-- planning step, as it is performed on the original query structure. As a result,
-- the lateral join is not recursively planned, and a lateral join error is raised
-- at a later stage.
SELECT user_id FROM
users RIGHT JOIN users_ref USING (user_id)
WHERE users_ref.dept IN
(
SELECT events.event_type FROM events WHERE events.user_id = users.user_id
) ORDER BY 1 LIMIT 1;
RESET client_min_messages;
RESET search_path;
DROP SCHEMA pg17_corr_subq_folding CASCADE;

View File

@ -116,4 +116,12 @@ SELECT count(*) FROM d1 RIGHT JOIN r1 USING (a);
SELECT count(*) FROM (SELECT * FROM d1) AS t1 RIGHT JOIN r1 USING (a);
SET client_min_messages TO ERROR;
-- The following queries trigger recursive computing, recurring outer-join push down
-- methods introduced in#7973 can be enhanced to cover these cases in the future.
CREATE TABLE r1_local AS SELECT * FROM r1;
EXPLAIN (COSTS OFF) SELECT count(*) FROM r1_local LEFT JOIN d1 ON r1_local.a = d1.a;
EXPLAIN (COSTS OFF) SELECT count(*) FROM (SELECT * FROM r1) sq LEFT JOIN d1 ON sq.a = d1.a;
EXPLAIN (COSTS OFF) SELECT count(*) FROM r1 LEFT JOIN (d1 INNER JOIN d2 on d1.a = d2.a) on r1.a = d2.a;
DROP SCHEMA recurring_join_pushdown CASCADE;