update tests

onur-leftjoin_push-improvements
eaydingol 2025-07-25 14:31:11 +03:00
parent cd9f6ae313
commit 96a7d70fa9
5 changed files with 39 additions and 39 deletions

View File

@ -754,8 +754,7 @@ RecursivelyPlanRecurringTupleOuterJoinWalker(Node *node, Query *query,
{
ereport(DEBUG1, (errmsg("recursively planning right side of "
"the left join since the outer side "
"is a recurring rel and it is not "
"feasible to push down")));
"is a recurring rel")));
RecursivelyPlanDistributedJoinNode(rightNode, query,
recursivePlanningContext);
}

View File

@ -765,16 +765,10 @@ DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_133000
raw_events_second.user_id
FROM
reference_table LEFT JOIN raw_events_second ON reference_table.user_id = raw_events_second.user_id;
DEBUG: cannot perform a lateral outer join when a distributed subquery references a reference table
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: recursively planning right side of the left join since the outer side is a recurring rel
DEBUG: recursively planning distributed relation "raw_events_second" since it is part of a distributed join node that is outer joined with a recurring rel
DEBUG: Wrapping relation "raw_events_second" to a subquery
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM multi_insert_select.raw_events_second WHERE true
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT raw_events_second.user_id FROM (multi_insert_select.reference_table LEFT JOIN (SELECT raw_events_second_1.user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) raw_events_second_1) raw_events_second ON ((reference_table.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id)))
DEBUG: Creating router plan
DEBUG: Collecting INSERT ... SELECT results on coordinator
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_second.user_id FROM (multi_insert_select.reference_table_13300012 reference_table LEFT JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((reference_table.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_second.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_second.user_id FROM (multi_insert_select.reference_table_13300012 reference_table LEFT JOIN multi_insert_select.raw_events_second_13300005 raw_events_second ON ((reference_table.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_second.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_second.user_id FROM (multi_insert_select.reference_table_13300012 reference_table LEFT JOIN multi_insert_select.raw_events_second_13300006 raw_events_second ON ((reference_table.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_second.user_id IS NOT NULL)
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_second.user_id FROM (multi_insert_select.reference_table_13300012 reference_table LEFT JOIN multi_insert_select.raw_events_second_13300007 raw_events_second ON ((reference_table.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_second.user_id IS NOT NULL)
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
@ -3378,23 +3372,31 @@ $$);
Task Count: 1
(4 rows)
-- verify that insert select cannot be pushed down when we have reference table in outside of outer join.
-- verify that insert select can be pushed down when we have reference table in outside of outer join.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT a.id FROM dist_table_5 a LEFT JOIN ref_table_1 b ON (true) RIGHT JOIN ref_table_1 c ON (true);
$$);
coordinator_plan
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: pull to coordinator
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Custom Scan (Citus Adaptive)
Task Count: 4
(6 rows)
Custom Scan (Citus Adaptive)
Task Count: 4
(2 rows)
-- verify that insert select cannot be pushed down when it has a recurring outer join in a subquery.
-- verify that insert select can be pushed down when we have reference table in outside of left join.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id FROM ref_table_1 LEFT JOIN dist_table_5 USING(id);
$$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus INSERT ... SELECT)
INSERT/SELECT method: repartition
-> Custom Scan (Citus Adaptive)
Task Count: 4
(4 rows)
-- verify that insert select cannot be pushed down when we have reference table in outside of left join and joined on non-distribution column.
SELECT coordinator_plan($$
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT ref_table_1.id FROM ref_table_1 LEFT JOIN dist_table_5 ON ref_table_1.id = dist_table_5.id2;
$$);
coordinator_plan
---------------------------------------------------------------------

View File

@ -62,12 +62,9 @@ RIGHT JOIN (
RIGHT JOIN tbl_dist1 USING (id)
) AS table_4 USING (id);
DEBUG: CTE cte_0 is going to be inlined via distributed planning
DEBUG: recursively planning right side of the left join since the outer side is a recurring rel
DEBUG: recursively planning distributed relation "tbl_dist1" since it is part of a distributed join node that is outer joined with a recurring rel
DEBUG: Wrapping relation "tbl_dist1" to a subquery
DEBUG: generating subplan XXX_1 for subquery SELECT id FROM multi_recursive.tbl_dist1 WHERE true
DEBUG: generating subplan XXX_1 for subquery SELECT tbl_ref1.id FROM (multi_recursive.tbl_ref1 LEFT JOIN multi_recursive.tbl_dist1 USING (id))
DEBUG: push down of limit count: 0
DEBUG: generating subplan XXX_2 for subquery SELECT id FROM (SELECT tbl_dist1.id FROM multi_recursive.tbl_dist1 WHERE (tbl_dist1.id OPERATOR(pg_catalog.=) ANY (SELECT tbl_ref1.id FROM (multi_recursive.tbl_ref1 LEFT JOIN (SELECT tbl_dist1_2.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) tbl_dist1_2) tbl_dist1_1 USING (id))))) cte_0 LIMIT 0
DEBUG: generating subplan XXX_2 for subquery SELECT id FROM (SELECT tbl_dist1.id FROM multi_recursive.tbl_dist1 WHERE (tbl_dist1.id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)))) cte_0 LIMIT 0
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(table_4.id) AS count FROM (multi_recursive.tbl_dist1 RIGHT JOIN (SELECT table_5.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_5 RIGHT JOIN multi_recursive.tbl_dist1 tbl_dist1_1 USING (id))) table_4 USING (id))
DEBUG: generating subplan XXX_1 for subquery SELECT table_5.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_5 RIGHT JOIN multi_recursive.tbl_dist1 USING (id))
DEBUG: recursively planning left side of the right join since the outer side is a recurring rel
@ -76,11 +73,8 @@ DEBUG: Wrapping relation "tbl_dist1" to a subquery
DEBUG: generating subplan XXX_2 for subquery SELECT id FROM multi_recursive.tbl_dist1 WHERE true
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(table_4.id) AS count FROM ((SELECT tbl_dist1_1.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) tbl_dist1_1) tbl_dist1 RIGHT JOIN (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_4 USING (id))
DEBUG: generating subplan XXX_1 for CTE cte_0: SELECT id FROM multi_recursive.tbl_dist1 WHERE (id OPERATOR(pg_catalog.=) ANY (SELECT tbl_ref1.id FROM (multi_recursive.tbl_ref1 LEFT JOIN multi_recursive.tbl_dist1 tbl_dist1_1 USING (id))))
DEBUG: recursively planning right side of the left join since the outer side is a recurring rel
DEBUG: recursively planning distributed relation "tbl_dist1" since it is part of a distributed join node that is outer joined with a recurring rel
DEBUG: Wrapping relation "tbl_dist1" to a subquery
DEBUG: generating subplan XXX_1 for subquery SELECT id FROM multi_recursive.tbl_dist1 WHERE true
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT id FROM multi_recursive.tbl_dist1 WHERE (id OPERATOR(pg_catalog.=) ANY (SELECT tbl_ref1.id FROM (multi_recursive.tbl_ref1 LEFT JOIN (SELECT tbl_dist1_2.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) tbl_dist1_2) tbl_dist1_1 USING (id))))
DEBUG: generating subplan XXX_1 for subquery SELECT tbl_ref1.id FROM (multi_recursive.tbl_ref1 LEFT JOIN multi_recursive.tbl_dist1 USING (id))
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT id FROM multi_recursive.tbl_dist1 WHERE (id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)))
DEBUG: generating subplan XXX_2 for subquery SELECT id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) cte_0 LIMIT 0
DEBUG: generating subplan XXX_3 for subquery SELECT table_5.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_5 RIGHT JOIN multi_recursive.tbl_dist1 USING (id))
DEBUG: recursively planning left side of the right join since the outer side is a recurring rel

View File

@ -300,7 +300,7 @@ ORDER BY 1, 2;
5 | 5
(2 rows)
-- reference table LEFT JOIN distributed table in WHERE is still not ok
-- reference table LEFT JOIN distributed table in WHERE is ok
SELECT user_id, value_2 FROM users_table WHERE
value_1 > 1 AND value_1 < 3
AND value_2 >= 5
@ -361,7 +361,12 @@ SELECT user_id, value_2 FROM users_table WHERE
HAVING sum(submit_card_info) > 0
)
ORDER BY 1, 2;
ERROR: cannot perform a lateral outer join when a distributed subquery references a reference table
user_id | value_2
---------------------------------------------------------------------
5 | 5
5 | 5
(2 rows)
-- non-partition key equality with reference table
SELECT
user_id, count(*)

View File

@ -90,7 +90,7 @@ SELECT * FROM r1_local LEFT JOIN d1_local using (a, b) ORDER BY 1, 2;
SET client_min_messages TO DEBUG1;
-- Test that the join is not pushed down when joined on a non-distributed column
SELECT count(*) FROM r1 LEFT JOIN d1 USING (b);
DEBUG: recursively planning right side of the left join since the outer side is a recurring rel and it is not feasible to push down
DEBUG: recursively planning right side of the left join since the outer side is a recurring rel
DEBUG: recursively planning distributed relation "d1" since it is part of a distributed join node that is outer joined with a recurring rel
DEBUG: Wrapping relation "d1" to a subquery
DEBUG: generating subplan XXX_1 for subquery SELECT b FROM recurring_join_pushdown.d1 WHERE true
@ -196,7 +196,7 @@ SELECT count(*) FROM r1 LEFT JOIN d1 ON r1.b = d1.b;
DEBUG: no shard pruning constraints on d1 found
DEBUG: shard count after pruning for d1: 4
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: recursively planning right side of the left join since the outer side is a recurring rel and it is not feasible to push down
DEBUG: recursively planning right side of the left join since the outer side is a recurring rel
DEBUG: recursively planning distributed relation "d1" since it is part of a distributed join node that is outer joined with a recurring rel
DEBUG: Wrapping relation "d1" to a subquery
DEBUG: no shard pruning constraints on d1 found
@ -226,7 +226,7 @@ SELECT count(*) FROM r1 LEFT JOIN d1 ON r1.a = d1.b;
DEBUG: no shard pruning constraints on d1 found
DEBUG: shard count after pruning for d1: 4
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: recursively planning right side of the left join since the outer side is a recurring rel and it is not feasible to push down
DEBUG: recursively planning right side of the left join since the outer side is a recurring rel
DEBUG: recursively planning distributed relation "d1" since it is part of a distributed join node that is outer joined with a recurring rel
DEBUG: Wrapping relation "d1" to a subquery
DEBUG: no shard pruning constraints on d1 found
@ -255,7 +255,7 @@ SELECT count(*) FROM r1_local LEFT JOIN d1_local ON r1_local.a = d1_local.b;
SET client_min_messages TO DEBUG1;
-- Test that the join is not pushed down when joined on a distributed column with disjunctive conditions
SELECT count(*) FROM r1 LEFT JOIN d1 ON r1.a = d1.a OR r1.b = d1.b;
DEBUG: recursively planning right side of the left join since the outer side is a recurring rel and it is not feasible to push down
DEBUG: recursively planning right side of the left join since the outer side is a recurring rel
DEBUG: recursively planning distributed relation "d1" since it is part of a distributed join node that is outer joined with a recurring rel
DEBUG: Wrapping relation "d1" to a subquery
DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_join_pushdown.d1 WHERE true
@ -372,7 +372,7 @@ DEBUG: shard count after pruning for d1: 4
DEBUG: no shard pruning constraints on d1 found
DEBUG: shard count after pruning for d1: 4
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: recursively planning right side of the left join since the outer side is a recurring rel and it is not feasible to push down
DEBUG: recursively planning right side of the left join since the outer side is a recurring rel
DEBUG: recursively planning distributed relation "d1" since it is part of a distributed join node that is outer joined with a recurring rel
DEBUG: Wrapping relation "d1" to a subquery
DEBUG: no shard pruning constraints on d1 found