From 83ef600f27aa0666cbd201aeedc17f5765042887 Mon Sep 17 00:00:00 2001 From: aykut-bozkurt <51649454+aykut-bozkurt@users.noreply.github.com> Date: Wed, 30 Nov 2022 14:17:56 +0300 Subject: [PATCH] fix false full join pushdown error check (#6523) **Problem**: Currently, we error out if we detect recurring tuples in one side without checking the other side of the join. **Solution**: When one side of the full join consists recurring tuples and the other side consists nonrecurring tuples, we should not pushdown to prevent duplicate results. Otherwise, safe to pushdown. --- .../planner/query_pushdown_planning.c | 14 +- .../regress/expected/multi_outer_join.out | 18 +++ ...ulti_subquery_complex_reference_clause.out | 132 ++++++++++++++++++ src/test/regress/expected/with_join.out | 23 +++ src/test/regress/sql/multi_outer_join.sql | 10 ++ ...ulti_subquery_complex_reference_clause.sql | 65 +++++++++ src/test/regress/sql/with_join.sql | 10 ++ 7 files changed, 270 insertions(+), 2 deletions(-) diff --git a/src/backend/distributed/planner/query_pushdown_planning.c b/src/backend/distributed/planner/query_pushdown_planning.c index be8e33ba8..7dca6579f 100644 --- a/src/backend/distributed/planner/query_pushdown_planning.c +++ b/src/backend/distributed/planner/query_pushdown_planning.c @@ -816,7 +816,17 @@ DeferredErrorIfUnsupportedRecurringTuplesJoin( } else if (joinType == JOIN_FULL) { - if (RelationInfoContainsOnlyRecurringTuples(plannerInfo, innerrelRelids)) + /* + * If one of the outer or inner side contains recurring tuples and the other side + * contains nonrecurring tuples, then duplicate results can exist in the result. + * Thus, Citus should not pushdown the query. + */ + bool innerContainOnlyRecurring = + RelationInfoContainsOnlyRecurringTuples(plannerInfo, innerrelRelids); + bool outerContainOnlyRecurring = + RelationInfoContainsOnlyRecurringTuples(plannerInfo, outerrelRelids); + + if (innerContainOnlyRecurring && !outerContainOnlyRecurring) { /* * Find the first (or only) recurring RTE to give a meaningful @@ -827,7 +837,7 @@ DeferredErrorIfUnsupportedRecurringTuplesJoin( break; } - if (RelationInfoContainsOnlyRecurringTuples(plannerInfo, outerrelRelids)) + if (!innerContainOnlyRecurring && outerContainOnlyRecurring) { /* * Find the first (or only) recurring RTE to give a meaningful diff --git a/src/test/regress/expected/multi_outer_join.out b/src/test/regress/expected/multi_outer_join.out index 1748a0dfc..8884c876f 100644 --- a/src/test/regress/expected/multi_outer_join.out +++ b/src/test/regress/expected/multi_outer_join.out @@ -491,6 +491,24 @@ ORDER BY 1,2,3; | | 29 (17 rows) +-- full outer join should work between reference tables +SELECT + t_custkey, r_custkey +FROM + (SELECT * FROM multi_outer_join_third_reference r1 + FULL JOIN multi_outer_join_right_reference r2 ON (r1.t_custkey = r2.r_custkey) + ) AS foo + INNER JOIN multi_outer_join_right USING (r_custkey) +ORDER BY 1,2; + t_custkey | r_custkey +--------------------------------------------------------------------- + 11 | 11 + 12 | 12 + 13 | 13 + 14 | 14 + 15 | 15 +(5 rows) + -- full outer join should work with 1-1 matched shards SELECT l_custkey, r_custkey diff --git a/src/test/regress/expected/multi_subquery_complex_reference_clause.out b/src/test/regress/expected/multi_subquery_complex_reference_clause.out index b9579ae7a..65ff161ca 100644 --- a/src/test/regress/expected/multi_subquery_complex_reference_clause.out +++ b/src/test/regress/expected/multi_subquery_complex_reference_clause.out @@ -2059,6 +2059,138 @@ SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table r 4 (1 row) +-- left outer part of the FULL JOIN consists only reference tables, so we cannot push down +-- we have different combinations for ON condition, true/false/two column join/single column filter +SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id FULL JOIN user_buy_test_table ON true; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id FULL JOIN user_buy_test_table ON true; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id FULL JOIN user_buy_test_table ON true; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id FULL JOIN user_buy_test_table ON false; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id FULL JOIN user_buy_test_table ON false; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id FULL JOIN user_buy_test_table ON (ref1.id > 5); +ERROR: FULL JOIN is only supported with merge-joinable or hash-joinable join conditions +SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id FULL JOIN user_buy_test_table ON (user_buy_test_table.user_id > 5); +ERROR: FULL JOIN is only supported with merge-joinable or hash-joinable join conditions +SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id FULL JOIN user_buy_test_table ON (ref1.id = user_buy_test_table.user_id); +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id FULL JOIN user_buy_test_table ON (ref2.id = user_buy_test_table.user_id); +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id FULL JOIN user_buy_test_table ON (ref1.id = user_buy_test_table.user_id); +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id FULL JOIN user_buy_test_table ON (ref2.id = user_buy_test_table.user_id); +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +-- right outer part of the FULL JOIN consists only reference tables, so we cannot push down +-- we have different combinations for ON condition, true/false/two column join/single column filter +SELECT count(*) FROM user_buy_test_table FULL JOIN users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id ON true; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM user_buy_test_table FULL JOIN users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id ON true; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM user_buy_test_table FULL JOIN users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id ON true; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM user_buy_test_table FULL JOIN users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id ON false; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM user_buy_test_table FULL JOIN users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id ON false; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM user_buy_test_table FULL JOIN users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id ON (ref1.id > 5); +ERROR: FULL JOIN is only supported with merge-joinable or hash-joinable join conditions +SELECT count(*) FROM user_buy_test_table FULL JOIN users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id ON (user_buy_test_table.user_id > 5); +ERROR: FULL JOIN is only supported with merge-joinable or hash-joinable join conditions +SELECT count(*) FROM user_buy_test_table FULL JOIN users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id ON (ref1.id = user_buy_test_table.user_id); +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM user_buy_test_table FULL JOIN users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id ON (ref2.id = user_buy_test_table.user_id); +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM user_buy_test_table FULL JOIN users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id ON (ref1.id = user_buy_test_table.user_id); +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM user_buy_test_table FULL JOIN users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id ON (ref2.id = user_buy_test_table.user_id); +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +-- left outer part of the FULL JOIN consists only reference tables within a subquery, so we cannot push down +-- we have different combinations for ON condition, true/false/two column join/single column filter +SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo FULL JOIN user_buy_test_table ON true; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo FULL JOIN user_buy_test_table ON true; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo FULL JOIN user_buy_test_table ON false; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo FULL JOIN user_buy_test_table ON false; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo FULL JOIN user_buy_test_table ON (foo.id > 5); +ERROR: FULL JOIN is only supported with merge-joinable or hash-joinable join conditions +SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo FULL JOIN user_buy_test_table ON (user_buy_test_table.user_id > 19); +ERROR: FULL JOIN is only supported with merge-joinable or hash-joinable join conditions +SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo FULL JOIN user_buy_test_table ON (foo.id = user_buy_test_table.user_id); +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +-- right outer part of the FULL JOIN consists only reference tables within a subquery, so we cannot push down +-- we have different combinations for ON condition, true/false/two column join/single column filter +SELECT count(*) FROM user_buy_test_table FULL JOIN (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo ON true; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM user_buy_test_table FULL JOIN (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo ON true; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM user_buy_test_table FULL JOIN (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo ON false; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM user_buy_test_table FULL JOIN (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo ON false; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM user_buy_test_table FULL JOIN (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo ON (foo.id > 5); +ERROR: FULL JOIN is only supported with merge-joinable or hash-joinable join conditions +SELECT count(*) FROM user_buy_test_table FULL JOIN (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo ON (user_buy_test_table.user_id > 19); +ERROR: FULL JOIN is only supported with merge-joinable or hash-joinable join conditions +SELECT count(*) FROM user_buy_test_table FULL JOIN (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo ON (foo.id = user_buy_test_table.user_id); +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +-- one example where unsupported outer join is deep inside a subquery +SELECT *, random() FROM ( +SELECT *,random() FROM user_buy_test_table WHERE user_id > ( +SELECT count(*) FROM (SELECT *,random() FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as bar) as foo FULL JOIN (SELECT *, random() FROM (SELECT *,random() FROM user_buy_test_table d1 JOIN user_buy_test_table d2 USING (user_id)) as bar_inner ) as bar ON true)) as boo; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +-- In theory, we should be able to pushdown this query +-- however, as the FULL JOIN condition is between a reference table and the distributed table +-- Postgres generates a FULL JOIN alternative among those tables +SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN user_buy_test_table u1 on ref1.id = u1.user_id) as foo FULL JOIN user_buy_test_table ON (foo.id = user_buy_test_table.user_id); +ERROR: cannot pushdown the subquery +DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table +-- same as the above query, but this time FULL JOIN condition is between distributed tables +-- so Postgres doesn't generate join restriction between reference and distributed tables +SELECT count(*) FROM (SELECT u1.*, random() FROM users_ref_test_table ref1 INNER JOIN user_buy_test_table u1 on ref1.id = u1.user_id) as foo FULL JOIN user_buy_test_table ON (foo.user_id = user_buy_test_table.user_id); + count +--------------------------------------------------------------------- + 4 +(1 row) + +-- left outer part of the FULL JOIN consists only intermediate result due to LIMIT, so we cannot push down +SELECT count(*) FROM (SELECT ref1.* FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LIMIT 5) as foo FULL JOIN user_buy_test_table ON true; +ERROR: cannot pushdown the subquery +DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table DROP TABLE user_buy_test_table; DROP TABLE users_ref_test_table; DROP TABLE users_return_test_table; diff --git a/src/test/regress/expected/with_join.out b/src/test/regress/expected/with_join.out index f20334403..dd985fbb1 100644 --- a/src/test/regress/expected/with_join.out +++ b/src/test/regress/expected/with_join.out @@ -266,6 +266,29 @@ LIMIT 1 | (2 rows) +-- full outer join should work between a reference table and a cte +WITH cte AS ( + SELECT max(user_id) AS max_user_id FROM users_table +) +SELECT user_id, time FROM + (SELECT r1.user_id AS user_id FROM cte FULL JOIN reference_table r1 ON (cte.max_user_id = r1.user_id)) AS foo + INNER JOIN users_table USING (user_id) +ORDER BY 1,2; + user_id | time +--------------------------------------------------------------------- + 6 | Wed Nov 22 20:15:53.317797 2017 + 6 | Wed Nov 22 23:01:24.82289 2017 + 6 | Thu Nov 23 00:07:11.068353 2017 + 6 | Thu Nov 23 00:09:44.19812 2017 + 6 | Thu Nov 23 01:13:50.526322 2017 + 6 | Thu Nov 23 01:14:55.769581 2017 + 6 | Thu Nov 23 10:22:11.02918 2017 + 6 | Thu Nov 23 11:08:04.244582 2017 + 6 | Thu Nov 23 13:51:16.92838 2017 + 6 | Thu Nov 23 14:43:18.024104 2017 + 6 | +(11 rows) + -- some more tests for more complex outer-joins -- with reference tables CREATE TABLE distributed_1 (col1 int, col2 int, distrib_col int); diff --git a/src/test/regress/sql/multi_outer_join.sql b/src/test/regress/sql/multi_outer_join.sql index 280f4e211..549e0ae2c 100644 --- a/src/test/regress/sql/multi_outer_join.sql +++ b/src/test/regress/sql/multi_outer_join.sql @@ -340,6 +340,16 @@ FROM RIGHT JOIN multi_outer_join_left l1 ON (r1.r_custkey = l1.l_custkey) ORDER BY 1,2,3; +-- full outer join should work between reference tables +SELECT + t_custkey, r_custkey +FROM + (SELECT * FROM multi_outer_join_third_reference r1 + FULL JOIN multi_outer_join_right_reference r2 ON (r1.t_custkey = r2.r_custkey) + ) AS foo + INNER JOIN multi_outer_join_right USING (r_custkey) +ORDER BY 1,2; + -- full outer join should work with 1-1 matched shards SELECT l_custkey, r_custkey diff --git a/src/test/regress/sql/multi_subquery_complex_reference_clause.sql b/src/test/regress/sql/multi_subquery_complex_reference_clause.sql index b8451f559..56cc4455d 100644 --- a/src/test/regress/sql/multi_subquery_complex_reference_clause.sql +++ b/src/test/regress/sql/multi_subquery_complex_reference_clause.sql @@ -1471,6 +1471,71 @@ SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table r SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id RIGHT JOIN user_buy_test_table ON false; SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id RIGHT JOIN user_buy_test_table ON (ref1.id = user_id); +-- left outer part of the FULL JOIN consists only reference tables, so we cannot push down +-- we have different combinations for ON condition, true/false/two column join/single column filter +SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id FULL JOIN user_buy_test_table ON true; +SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id FULL JOIN user_buy_test_table ON true; +SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id FULL JOIN user_buy_test_table ON true; +SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id FULL JOIN user_buy_test_table ON false; +SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id FULL JOIN user_buy_test_table ON false; +SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id FULL JOIN user_buy_test_table ON (ref1.id > 5); +SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id FULL JOIN user_buy_test_table ON (user_buy_test_table.user_id > 5); +SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id FULL JOIN user_buy_test_table ON (ref1.id = user_buy_test_table.user_id); +SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id FULL JOIN user_buy_test_table ON (ref2.id = user_buy_test_table.user_id); +SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id FULL JOIN user_buy_test_table ON (ref1.id = user_buy_test_table.user_id); +SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id FULL JOIN user_buy_test_table ON (ref2.id = user_buy_test_table.user_id); + +-- right outer part of the FULL JOIN consists only reference tables, so we cannot push down +-- we have different combinations for ON condition, true/false/two column join/single column filter +SELECT count(*) FROM user_buy_test_table FULL JOIN users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id ON true; +SELECT count(*) FROM user_buy_test_table FULL JOIN users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id ON true; +SELECT count(*) FROM user_buy_test_table FULL JOIN users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id ON true; +SELECT count(*) FROM user_buy_test_table FULL JOIN users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id ON false; +SELECT count(*) FROM user_buy_test_table FULL JOIN users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id ON false; +SELECT count(*) FROM user_buy_test_table FULL JOIN users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id ON (ref1.id > 5); +SELECT count(*) FROM user_buy_test_table FULL JOIN users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id ON (user_buy_test_table.user_id > 5); +SELECT count(*) FROM user_buy_test_table FULL JOIN users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id ON (ref1.id = user_buy_test_table.user_id); +SELECT count(*) FROM user_buy_test_table FULL JOIN users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id ON (ref2.id = user_buy_test_table.user_id); +SELECT count(*) FROM user_buy_test_table FULL JOIN users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id ON (ref1.id = user_buy_test_table.user_id); +SELECT count(*) FROM user_buy_test_table FULL JOIN users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id ON (ref2.id = user_buy_test_table.user_id); + +-- left outer part of the FULL JOIN consists only reference tables within a subquery, so we cannot push down +-- we have different combinations for ON condition, true/false/two column join/single column filter +SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo FULL JOIN user_buy_test_table ON true; +SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo FULL JOIN user_buy_test_table ON true; +SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo FULL JOIN user_buy_test_table ON false; +SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo FULL JOIN user_buy_test_table ON false; +SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo FULL JOIN user_buy_test_table ON (foo.id > 5); +SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo FULL JOIN user_buy_test_table ON (user_buy_test_table.user_id > 19); +SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo FULL JOIN user_buy_test_table ON (foo.id = user_buy_test_table.user_id); + +-- right outer part of the FULL JOIN consists only reference tables within a subquery, so we cannot push down +-- we have different combinations for ON condition, true/false/two column join/single column filter +SELECT count(*) FROM user_buy_test_table FULL JOIN (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo ON true; +SELECT count(*) FROM user_buy_test_table FULL JOIN (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo ON true; +SELECT count(*) FROM user_buy_test_table FULL JOIN (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo ON false; +SELECT count(*) FROM user_buy_test_table FULL JOIN (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo ON false; +SELECT count(*) FROM user_buy_test_table FULL JOIN (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo ON (foo.id > 5); +SELECT count(*) FROM user_buy_test_table FULL JOIN (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo ON (user_buy_test_table.user_id > 19); +SELECT count(*) FROM user_buy_test_table FULL JOIN (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo ON (foo.id = user_buy_test_table.user_id); + +-- one example where unsupported outer join is deep inside a subquery +SELECT *, random() FROM ( +SELECT *,random() FROM user_buy_test_table WHERE user_id > ( +SELECT count(*) FROM (SELECT *,random() FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as bar) as foo FULL JOIN (SELECT *, random() FROM (SELECT *,random() FROM user_buy_test_table d1 JOIN user_buy_test_table d2 USING (user_id)) as bar_inner ) as bar ON true)) as boo; + +-- In theory, we should be able to pushdown this query +-- however, as the FULL JOIN condition is between a reference table and the distributed table +-- Postgres generates a FULL JOIN alternative among those tables +SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN user_buy_test_table u1 on ref1.id = u1.user_id) as foo FULL JOIN user_buy_test_table ON (foo.id = user_buy_test_table.user_id); + +-- same as the above query, but this time FULL JOIN condition is between distributed tables +-- so Postgres doesn't generate join restriction between reference and distributed tables +SELECT count(*) FROM (SELECT u1.*, random() FROM users_ref_test_table ref1 INNER JOIN user_buy_test_table u1 on ref1.id = u1.user_id) as foo FULL JOIN user_buy_test_table ON (foo.user_id = user_buy_test_table.user_id); + +-- left outer part of the FULL JOIN consists only intermediate result due to LIMIT, so we cannot push down +SELECT count(*) FROM (SELECT ref1.* FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LIMIT 5) as foo FULL JOIN user_buy_test_table ON true; + DROP TABLE user_buy_test_table; DROP TABLE users_ref_test_table; DROP TABLE users_return_test_table; diff --git a/src/test/regress/sql/with_join.sql b/src/test/regress/sql/with_join.sql index 54669d75f..2f6268c8e 100644 --- a/src/test/regress/sql/with_join.sql +++ b/src/test/regress/sql/with_join.sql @@ -220,6 +220,16 @@ LIMIT 5; +-- full outer join should work between a reference table and a cte +WITH cte AS ( + SELECT max(user_id) AS max_user_id FROM users_table +) +SELECT user_id, time FROM + (SELECT r1.user_id AS user_id FROM cte FULL JOIN reference_table r1 ON (cte.max_user_id = r1.user_id)) AS foo + INNER JOIN users_table USING (user_id) +ORDER BY 1,2; + + -- some more tests for more complex outer-joins -- with reference tables CREATE TABLE distributed_1 (col1 int, col2 int, distrib_col int);