diff --git a/src/test/regress/citus_tests/run_test.py b/src/test/regress/citus_tests/run_test.py index eed25fa4f..e01525df0 100755 --- a/src/test/regress/citus_tests/run_test.py +++ b/src/test/regress/citus_tests/run_test.py @@ -243,6 +243,9 @@ DEPS = { "minimal_schedule", ["multi_create_table", "multi_create_users", "multi_multiuser_load_data"], ), + "multi_subquery_complex_reference_clause": TestDeps( + "minimal_schedule", ["multi_behavioral_analytics_create_table"] + ), } diff --git a/src/test/regress/expected/multi_subquery_complex_reference_clause.out b/src/test/regress/expected/multi_subquery_complex_reference_clause.out index 3d10c53ec..09efe8d89 100644 --- a/src/test/regress/expected/multi_subquery_complex_reference_clause.out +++ b/src/test/regress/expected/multi_subquery_complex_reference_clause.out @@ -1,7 +1,7 @@ -- -- multi subquery complex queries aims to expand existing subquery pushdown -- regression tests to cover more caeses --- the tables that are used depends to multi_insert_select_behavioral_analytics_create_table.sql +-- the tables that are used depends to multi_behavioral_analytics_create_table.sql -- -- We don't need shard id sequence here, so commented out to prevent conflicts with concurrent tests -- SET citus.next_shard_id TO 1400000; @@ -2444,5 +2444,4 @@ SELECT count(*) FROM (SELECT ref1.* FROM users_ref_test_table ref1 INNER JOIN us (1 row) DROP TABLE user_buy_test_table; -DROP TABLE users_ref_test_table; DROP TABLE users_return_test_table; diff --git a/src/test/regress/expected/query_single_shard_table.out b/src/test/regress/expected/query_single_shard_table.out index fa1641a2b..6eefd21b0 100644 --- a/src/test/regress/expected/query_single_shard_table.out +++ b/src/test/regress/expected/query_single_shard_table.out @@ -1985,11 +1985,8 @@ INSERT INTO raw_events_second (user_id, value_1) SELECT (a+5)*-1, b FROM cte; DEBUG: distributed INSERT ... SELECT can only select from distributed tables DEBUG: CTE cte is going to be inlined via distributed planning -DEBUG: recursively planning left side of the right join since the outer side is a recurring rel -DEBUG: recursively planning distributed relation "distributed_table" since it is part of a distributed join node that is outer joined with a recurring rel -DEBUG: Wrapping relation "distributed_table" to a subquery -DEBUG: generating subplan XXX_1 for subquery SELECT a FROM query_single_shard_table.distributed_table WHERE true -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT "?column?" AS user_id, b AS value_1 FROM (SELECT ((cte.a OPERATOR(pg_catalog.+) 5) OPERATOR(pg_catalog.*) '-1'::integer), cte.b FROM (SELECT DISTINCT reference_table.a, 1 AS b FROM ((SELECT distributed_table_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) distributed_table_1) distributed_table RIGHT JOIN query_single_shard_table.reference_table USING (a))) cte) citus_insert_select_subquery("?column?", b) +DEBUG: generating subplan XXX_1 for subquery SELECT DISTINCT reference_table.a, 1 AS b FROM (query_single_shard_table.distributed_table RIGHT JOIN query_single_shard_table.reference_table USING (a)) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT "?column?" AS user_id, b AS value_1 FROM (SELECT ((cte.a OPERATOR(pg_catalog.+) 5) OPERATOR(pg_catalog.*) '-1'::integer), cte.b FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) cte) citus_insert_select_subquery("?column?", b) DEBUG: Collecting INSERT ... SELECT results on coordinator -- .. and via SELECT's cte list too INSERT INTO raw_events_second (user_id, value_1) @@ -1999,12 +1996,10 @@ WITH cte AS ( ) SELECT (a+5)*2, b FROM cte; DEBUG: CTE cte is going to be inlined via distributed planning -DEBUG: cannot perform a lateral outer join when a distributed subquery references a reference table -DEBUG: recursively planning left side of the right join since the outer side is a recurring rel -DEBUG: recursively planning distributed relation "distributed_table" since it is part of a distributed join node that is outer joined with a recurring rel -DEBUG: Wrapping relation "distributed_table" to a subquery -DEBUG: generating subplan XXX_1 for subquery SELECT a FROM query_single_shard_table.distributed_table WHERE true -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT ((a OPERATOR(pg_catalog.+) 5) OPERATOR(pg_catalog.*) 2) AS user_id, b AS value_1 FROM (SELECT DISTINCT reference_table.a, 1 AS b FROM ((SELECT distributed_table_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) distributed_table_1) distributed_table RIGHT JOIN query_single_shard_table.reference_table USING (a))) cte +DEBUG: cannot push down this subquery +DETAIL: Distinct on columns without partition column is currently unsupported +DEBUG: generating subplan XXX_1 for subquery SELECT DISTINCT reference_table.a, 1 AS b FROM (query_single_shard_table.distributed_table RIGHT JOIN query_single_shard_table.reference_table USING (a)) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT ((a OPERATOR(pg_catalog.+) 5) OPERATOR(pg_catalog.*) 2) AS user_id, b AS value_1 FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) cte DEBUG: Collecting INSERT ... SELECT results on coordinator -- using set operations INSERT INTO @@ -2241,13 +2236,6 @@ UPDATE nullkey_c1_t1 SET b = 5 WHERE nullkey_c1_t1.b IN (SELECT b FROM cte); DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: generating subplan XXX_1 for CTE cte: SELECT reference_table.a, 1 AS b FROM (query_single_shard_table.distributed_table RIGHT JOIN query_single_shard_table.reference_table USING (a)) DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: recursively planning left side of the right join since the outer side is a recurring rel -DEBUG: recursively planning distributed relation "distributed_table" since it is part of a distributed join node that is outer joined with a recurring rel -DEBUG: Wrapping relation "distributed_table" to a subquery -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for subquery SELECT a FROM query_single_shard_table.distributed_table WHERE true -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT reference_table.a, 1 AS b FROM ((SELECT distributed_table_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) distributed_table_1) distributed_table RIGHT JOIN query_single_shard_table.reference_table USING (a)) -DEBUG: Creating router plan DEBUG: Plan XXX query after replacing subqueries and CTEs: UPDATE query_single_shard_table.nullkey_c1_t1 SET b = 5 WHERE (b OPERATOR(pg_catalog.=) ANY (SELECT cte.b FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) cte)) DEBUG: Creating router plan UPDATE nullkey_c1_t1 SET b = 5 FROM reference_table WHERE EXISTS ( @@ -2351,13 +2339,6 @@ DELETE FROM nullkey_c1_t1 WHERE nullkey_c1_t1.b IN (SELECT b FROM cte); DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns DEBUG: generating subplan XXX_1 for CTE cte: SELECT reference_table.a, 1 AS b FROM (query_single_shard_table.distributed_table RIGHT JOIN query_single_shard_table.reference_table USING (a)) DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: recursively planning left side of the right join since the outer side is a recurring rel -DEBUG: recursively planning distributed relation "distributed_table" since it is part of a distributed join node that is outer joined with a recurring rel -DEBUG: Wrapping relation "distributed_table" to a subquery -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for subquery SELECT a FROM query_single_shard_table.distributed_table WHERE true -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT reference_table.a, 1 AS b FROM ((SELECT distributed_table_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) distributed_table_1) distributed_table RIGHT JOIN query_single_shard_table.reference_table USING (a)) -DEBUG: Creating router plan DEBUG: Plan XXX query after replacing subqueries and CTEs: DELETE FROM query_single_shard_table.nullkey_c1_t1 WHERE (b OPERATOR(pg_catalog.=) ANY (SELECT cte.b FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) cte)) DEBUG: Creating router plan DELETE FROM nullkey_c1_t1 USING reference_table WHERE EXISTS ( @@ -3314,7 +3295,8 @@ DEBUG: Creating router plan DEBUG: generating subplan XXX_1 for subquery SELECT a FROM query_single_shard_table.nullkey_c2_t1 table_3 ORDER BY a LIMIT 0 DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count, avg(a) AS avg FROM (SELECT table_0.a FROM (query_single_shard_table.nullkey_c1_t1 table_0 RIGHT JOIN (SELECT table_2.a FROM ((SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) table_2 JOIN query_single_shard_table.nullkey_c2_t1 table_4 USING (a)) WHERE (table_4.a OPERATOR(pg_catalog.<) 8)) table_1 USING (a))) avgsub DEBUG: router planner does not support queries that reference non-colocated distributed tables -ERROR: cannot perform a lateral outer join when a distributed subquery references complex subqueries, CTEs or local tables +ERROR: cannot push down this subquery +DETAIL: nullkey_c1_t1 and nullkey_c2_t1 are not colocated -- test nested exec CREATE FUNCTION dist_query_single_shard(p_key int) RETURNS bigint diff --git a/src/test/regress/expected/recurring_join_pushdown.out b/src/test/regress/expected/recurring_join_pushdown.out index efb4c1d71..1f102921e 100644 --- a/src/test/regress/expected/recurring_join_pushdown.out +++ b/src/test/regress/expected/recurring_join_pushdown.out @@ -1013,3 +1013,5 @@ SELECT count(*) FROM (SELECT * FROM d1_local) RIGHT JOIN r1_local USING (a); 21 (1 row) +SET client_min_messages TO ERROR; +DROP SCHEMA recurring_join_pushdown CASCADE; diff --git a/src/test/regress/expected/recurring_outer_join.out b/src/test/regress/expected/recurring_outer_join.out index 143e3c96b..f2137ffb4 100644 --- a/src/test/regress/expected/recurring_outer_join.out +++ b/src/test/regress/expected/recurring_outer_join.out @@ -151,11 +151,6 @@ SELECT COUNT(*) FROM ref_1 LEFT JOIN dist_1 USING (a,b); (1 row) SELECT COUNT(*) FROM dist_1 RIGHT JOIN ref_1 USING (a); -DEBUG: recursively planning left side of the right join since the outer side is a recurring rel -DEBUG: recursively planning distributed relation "dist_1" since it is part of a distributed join node that is outer joined with a recurring rel -DEBUG: Wrapping relation "dist_1" to a subquery -DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.dist_1 WHERE true -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT dist_1_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) dist_1_1) dist_1 RIGHT JOIN recurring_outer_join.ref_1 USING (a)) count --------------------------------------------------------------------- 28 @@ -532,12 +527,7 @@ ERROR: complex joins are only supported when all distributed tables are co-loca SELECT COUNT(*) FROM dist_1 t1 RIGHT JOIN ref_1 t2 USING (a,b) WHERE EXISTS (SELECT * FROM dist_1 t3 WHERE t2.a = t3.a); -DEBUG: recursively planning left side of the right join since the outer side is a recurring rel -DEBUG: recursively planning distributed relation "dist_1" "t1" since it is part of a distributed join node that is outer joined with a recurring rel -DEBUG: Wrapping relation "dist_1" "t1" to a subquery -DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_outer_join.dist_1 t1 WHERE true -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT t1_1.a, t1_1.b FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t1_1) t1 RIGHT JOIN recurring_outer_join.ref_1 t2 USING (a, b)) WHERE (EXISTS (SELECT t3.a, t3.b FROM recurring_outer_join.dist_1 t3 WHERE (t2.a OPERATOR(pg_catalog.=) t3.a))) -ERROR: correlated subqueries are not supported when the FROM clause contains a CTE or subquery +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- "dist_1 t2" can't contribute to result set of the right join with -- a tuple having "(t2.a) a = NULL" because t2 is in the inner side of -- right join. For this reason, Postgres knows that can @@ -571,18 +561,13 @@ LATERAL (0 rows) -- Qual is the same but top-level join is an anti-join. Right join --- stays as is and hence requires recursive planning. +-- is pushed down. SELECT COUNT(*) FROM dist_1 t1 WHERE NOT EXISTS ( SELECT * FROM dist_1 t2 RIGHT JOIN ref_1 t3 USING (a) WHERE t2.a = t1.a ); -DEBUG: recursively planning left side of the right join since the outer side is a recurring rel -DEBUG: recursively planning distributed relation "dist_1" "t2" since it is part of a distributed join node that is outer joined with a recurring rel -DEBUG: Wrapping relation "dist_1" "t2" to a subquery -DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.dist_1 t2 WHERE true -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM recurring_outer_join.dist_1 t1 WHERE (NOT (EXISTS (SELECT t3.a, t2.b, t3.b FROM ((SELECT t2_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t2_1) t2 RIGHT JOIN recurring_outer_join.ref_1 t3 USING (a)) WHERE (t2.a OPERATOR(pg_catalog.=) t1.a)))) count --------------------------------------------------------------------- 8 @@ -663,7 +648,8 @@ LEFT JOIN ( dist_1 t4 JOIN - -- 1) t6 is recursively planned since the outer side is recurring + -- 1) t6 is not recursively planned since it is + -- safe to push down the recurring outer side with constraints (SELECT t6.a FROM dist_1 t6 RIGHT JOIN ref_1 t7 USING(a)) t5 USING(a) ) q @@ -674,19 +660,17 @@ LEFT JOIN dist_1 t8 USING (a) WHERE t8.b IS NULL; -DEBUG: recursively planning left side of the right join since the outer side is a recurring rel -DEBUG: recursively planning distributed relation "dist_1" "t6" since it is part of a distributed join node that is outer joined with a recurring rel -DEBUG: Wrapping relation "dist_1" "t6" to a subquery -DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.dist_1 t6 WHERE true DEBUG: recursively planning right side of the left join since the outer side is a recurring rel DEBUG: recursively planning distributed relation "dist_1" "t4" since it is part of a distributed join node that is outer joined with a recurring rel DEBUG: Wrapping relation "dist_1" "t4" to a subquery -DEBUG: generating subplan XXX_2 for subquery SELECT a FROM recurring_outer_join.dist_1 t4 WHERE true +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM recurring_outer_join.dist_1 t4 WHERE true +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: generating subplan XXX_2 for subquery SELECT t6.a FROM (recurring_outer_join.dist_1 t6 RIGHT JOIN recurring_outer_join.ref_1 t7 USING (a)) DEBUG: recursively planning right side of the left join since the outer side is a recurring rel DEBUG: recursively planning distributed relation "dist_1" "t8" since it is part of a distributed join node that is outer joined with a recurring rel DEBUG: Wrapping relation "dist_1" "t8" to a subquery DEBUG: generating subplan XXX_3 for subquery SELECT a, b FROM recurring_outer_join.dist_1 t8 WHERE true -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((recurring_outer_join.ref_1 t1 LEFT JOIN ((SELECT t4_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t4_1) t4 JOIN (SELECT t6.a FROM ((SELECT t6_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t6_1) t6 RIGHT JOIN recurring_outer_join.ref_1 t7 USING (a))) t5 USING (a)) q USING (a)) LEFT JOIN (SELECT t8_1.a, t8_1.b FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t8_1) t8 USING (a)) WHERE (t8.b IS NULL) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((recurring_outer_join.ref_1 t1 LEFT JOIN ((SELECT t4_1.a, NULL::integer AS b FROM (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t4_1) t4 JOIN (SELECT intermediate_result.a FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer)) t5 USING (a)) q USING (a)) LEFT JOIN (SELECT t8_1.a, t8_1.b FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t8_1) t8 USING (a)) WHERE (t8.b IS NULL) count --------------------------------------------------------------------- 10 diff --git a/src/test/regress/sql/multi_subquery_complex_reference_clause.sql b/src/test/regress/sql/multi_subquery_complex_reference_clause.sql index 48dbcf988..7d1aef052 100644 --- a/src/test/regress/sql/multi_subquery_complex_reference_clause.sql +++ b/src/test/regress/sql/multi_subquery_complex_reference_clause.sql @@ -1,7 +1,7 @@ -- -- multi subquery complex queries aims to expand existing subquery pushdown -- regression tests to cover more caeses --- the tables that are used depends to multi_insert_select_behavioral_analytics_create_table.sql +-- the tables that are used depends to multi_behavioral_analytics_create_table.sql -- -- We don't need shard id sequence here, so commented out to prevent conflicts with concurrent tests @@ -1532,5 +1532,4 @@ SELECT count(*) FROM (SELECT u1.*, random() FROM users_ref_test_table ref1 INNER SELECT count(*) FROM (SELECT ref1.* FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LIMIT 5) as foo FULL JOIN user_buy_test_table ON true; DROP TABLE user_buy_test_table; -DROP TABLE users_ref_test_table; DROP TABLE users_return_test_table; diff --git a/src/test/regress/sql/recurring_join_pushdown.sql b/src/test/regress/sql/recurring_join_pushdown.sql index 42036cd40..457d0742b 100644 --- a/src/test/regress/sql/recurring_join_pushdown.sql +++ b/src/test/regress/sql/recurring_join_pushdown.sql @@ -149,3 +149,6 @@ SELECT count(*) FROM d1_local RIGHT JOIN r1_local USING (a); SELECT count(*) FROM (SELECT * FROM d1) RIGHT JOIN r1 USING (a); SELECT count(*) FROM (SELECT * FROM d1_local) RIGHT JOIN r1_local USING (a); + +SET client_min_messages TO ERROR; +DROP SCHEMA recurring_join_pushdown CASCADE; diff --git a/src/test/regress/sql/recurring_outer_join.sql b/src/test/regress/sql/recurring_outer_join.sql index e12911442..6851d5cf0 100644 --- a/src/test/regress/sql/recurring_outer_join.sql +++ b/src/test/regress/sql/recurring_outer_join.sql @@ -293,7 +293,7 @@ LATERAL ) as foo; -- Qual is the same but top-level join is an anti-join. Right join --- stays as is and hence requires recursive planning. +-- is pushed down. SELECT COUNT(*) FROM dist_1 t1 WHERE NOT EXISTS ( SELECT * FROM dist_1 t2 @@ -346,7 +346,8 @@ LEFT JOIN ( dist_1 t4 JOIN - -- 1) t6 is recursively planned since the outer side is recurring + -- 1) t6 is not recursively planned since it is + -- safe to push down the recurring outer side with constraints (SELECT t6.a FROM dist_1 t6 RIGHT JOIN ref_1 t7 USING(a)) t5 USING(a) ) q