From 15e724c073b3990aee43a2c2d9e3e81ee4819dec Mon Sep 17 00:00:00 2001 From: Onder Kalaci Date: Thu, 8 Oct 2020 14:45:47 +0200 Subject: [PATCH] Add regression tests for outer/cross JOINs --- .../planner/query_pushdown_planning.c | 12 + src/test/regress/expected/cross_join.out | 253 ++++++++++++++++++ ...ulti_subquery_complex_reference_clause.out | 132 ++++++++- .../regress/expected/sqlancer_failures.out | 43 +++ src/test/regress/multi_schedule | 2 +- src/test/regress/sql/cross_join.sql | 131 +++++++++ ...ulti_subquery_complex_reference_clause.sql | 101 ++++++- src/test/regress/sql/sqlancer_failures.sql | 19 ++ 8 files changed, 681 insertions(+), 12 deletions(-) create mode 100644 src/test/regress/expected/cross_join.out create mode 100644 src/test/regress/sql/cross_join.sql diff --git a/src/backend/distributed/planner/query_pushdown_planning.c b/src/backend/distributed/planner/query_pushdown_planning.c index 4bc5589f8..863933759 100644 --- a/src/backend/distributed/planner/query_pushdown_planning.c +++ b/src/backend/distributed/planner/query_pushdown_planning.c @@ -805,6 +805,10 @@ DeferredErrorIfUnsupportedRecurringTuplesJoin( */ if (RelationInfoContainsOnlyRecurringTuples(plannerInfo, outerrel)) { + /* + * Find the first (or only) recurring RTE to give a meaningful + * error to the user. + */ recurType = FetchFirstRecurType(plannerInfo, outerrel); break; @@ -814,6 +818,10 @@ DeferredErrorIfUnsupportedRecurringTuplesJoin( { if (RelationInfoContainsOnlyRecurringTuples(plannerInfo, innerrel)) { + /* + * Find the first (or only) recurring RTE to give a meaningful + * error to the user. + */ recurType = FetchFirstRecurType(plannerInfo, innerrel); break; @@ -821,6 +829,10 @@ DeferredErrorIfUnsupportedRecurringTuplesJoin( if (RelationInfoContainsOnlyRecurringTuples(plannerInfo, outerrel)) { + /* + * Find the first (or only) recurring RTE to give a meaningful + * error to the user. + */ recurType = FetchFirstRecurType(plannerInfo, outerrel); break; diff --git a/src/test/regress/expected/cross_join.out b/src/test/regress/expected/cross_join.out new file mode 100644 index 000000000..8ced8541d --- /dev/null +++ b/src/test/regress/expected/cross_join.out @@ -0,0 +1,253 @@ +-- this test file relies on multi_behavioral_analytics_create_table +-- and aims to have variety of tests covering CROSS JOINs +-- "t1 CROSS JOIN t2" is equivalent of "t1 JOIN t2 ON true" +-- a distributed table can be cross joined with a reference table +-- and the CROSS JOIN can be in the outer part of an outer JOIN +SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 LEFT JOIN users_table u ON (e2.user_id = u.user_id); + count +--------------------------------------------------------------------- + 176649 +(1 row) + +-- two distributed tables cannot be cross joined +-- as it lacks distribution key equality +SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 CROSS JOIN users_table u; +ERROR: cannot perform distributed planning on this query +DETAIL: Cartesian products are currently unsupported +SELECT count(*) FROM events_reference_table e1, events_table e2, users_table u; +ERROR: cannot perform distributed planning on this query +DETAIL: Cartesian products are currently unsupported +-- we can provide the distribution key equality via WHERE clause +SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 CROSS JOIN users_table u WHERE u.user_id = e2.user_id; + count +--------------------------------------------------------------------- + 176649 +(1 row) + +-- two reference tables are JOINed, and later CROSS JOINed with a distributed table +-- it is safe to pushdown +SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table; + count +--------------------------------------------------------------------- + 606 +(1 row) + +SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table; + count +--------------------------------------------------------------------- + 606 +(1 row) + +SELECT count(*) FROM users_ref_test_table ref1 RIGHT JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table; + count +--------------------------------------------------------------------- + 606 +(1 row) + +SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 CROSS JOIN users_table; + count +--------------------------------------------------------------------- + 3636 +(1 row) + +-- two reference tables CROSS JOINNed, and later JOINED with distributed tables +-- it is safe to pushdown +SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table ON false; + count +--------------------------------------------------------------------- + 101 +(1 row) + +SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 JOIN users_table ON false; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 JOIN users_table ON true; + count +--------------------------------------------------------------------- + 3636 +(1 row) + +SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table ON true; + count +--------------------------------------------------------------------- + 3636 +(1 row) + +SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table ON (ref1.id = users_table.user_id); + count +--------------------------------------------------------------------- + 606 +(1 row) + +SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 JOIN users_table ON (ref1.id = users_table.user_id); + count +--------------------------------------------------------------------- + 606 +(1 row) + +-- two reference tables CROSS JOINNed, and later JOINED with distributed tables +-- but the reference table CROSS JOIN is in the outer side of the JOIN with the distributed table +-- so we cannot pushdown +SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id = users_table.user_id); +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 FULL JOIN users_table ON (ref1.id = users_table.user_id); +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id != users_table.user_id); +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id > 0); +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (users_table.user_id > 0); +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON true; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON false; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +-- a reference tables CROSS JOINed with a distribted table, and later JOINED with distributed tables on distribution keys +-- so safe to pushdown +SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 JOIN users_table u2 ON (u1.user_id = u2.user_id); + count +--------------------------------------------------------------------- + 11802 +(1 row) + +SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table u2 ON (u1.user_id = u2.user_id); + count +--------------------------------------------------------------------- + 11802 +(1 row) + +SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 FULL JOIN users_table u2 ON (u1.user_id = u2.user_id); + count +--------------------------------------------------------------------- + 11802 +(1 row) + +SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table u2 ON (u1.user_id = u2.user_id); + count +--------------------------------------------------------------------- + 11802 +(1 row) + +-- a reference tables CROSS JOINed with a distribted table, and later JOINED with distributed tables on reference table column +-- so not safe to pushdown +SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table u2 ON (ref2.id = u2.user_id); +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 FULL JOIN users_table u2 ON (ref2.id = u2.user_id); +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table u2 ON (ref2.id = u2.user_id); +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +-- via repartitioning, Citus can handle this query as the result of "u1 CROSS JOIN ref2" +-- can be repartitioned on ref2.id +Set citus.enable_repartition_joins to on; +SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 JOIN users_table u2 ON (ref2.id = u2.user_id); + count +--------------------------------------------------------------------- + 10201 +(1 row) + +reset citus.enable_repartition_joins; +-- although the following has the "ref LEFT JOIN dist" type of query, the LEFT JOIN is eliminated by Postgres +-- because the INNER JOIN eliminates the LEFT JOIN +SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id = users_table.user_id) JOIN users_table u2 ON (u2.user_id = users_table.user_id); + count +--------------------------------------------------------------------- + 11802 +(1 row) + +-- this is the same query as the above, but this time the outer query is also LEFT JOIN, meaning that Postgres +-- cannot eliminate the outer join +SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id = users_table.user_id) LEFT JOIN users_table u2 ON (u2.user_id = users_table.user_id); +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +-- cross join that goes through non-colocated subquery logic +-- for the "events_table" subquery as both distributed tables +-- do not have JOIN on the distribution key +SELECT max(events_all.cnt), + events_all.usr_id + FROM +(SELECT *, random() FROM + (SELECT *, random() + FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt + FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table + INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id) + GROUP BY users_table.user_id) AS events_all_inner + ) AS events_all + ) AS events_all + CROSS JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table +GROUP BY 2 +ORDER BY 1 DESC, + 2 DESC +LIMIT 5; + max | usr_id +--------------------------------------------------------------------- + 432 | 2 + 391 | 4 + 364 | 5 + 357 | 3 + 105 | 1 +(5 rows) + +-- cross join that goes through non-colocated subquery logic +-- for the "events_all" subquery as both distributed tables +-- do not have JOIN on the distribution key +SELECT max(events_all.cnt), + events_all.usr_id + FROM events_table + CROSS JOIN (SELECT *, random() + FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt + FROM events_reference_table + INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id) + GROUP BY users_table.user_id) AS events_all_inner + ) AS events_all +GROUP BY 2 +ORDER BY 1 DESC, + 2 DESC +LIMIT 5; + max | usr_id +--------------------------------------------------------------------- + 432 | 2 + 391 | 4 + 364 | 5 + 357 | 3 + 105 | 1 +(5 rows) + +-- cross join is between a reference table and distributed table, and +-- deep inside a subquery. The subquery can be in the outer part of the LEFT JOIN +SELECT + users_table.* +FROM + (SELECT + events_all.*, random() + FROM + events_reference_table JOIN users_table USING(user_id) + JOIN + (SELECT *, random() + FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt + FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table + CROSS JOIN users_table + GROUP BY users_table.user_id) AS events_all_inner + ) AS events_all ON (user_id = usr_id) + ) AS events_all + LEFT JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table ON (events_all.usr_id = events_table.user_id) + LEFT JOIN users_table USING (user_id) +ORDER BY 1,2,3,4 LIMIT 5; + user_id | time | value_1 | value_2 | value_3 | value_4 +--------------------------------------------------------------------- + 1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 | + 1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 | + 1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 | + 1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 | + 1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 | +(5 rows) + diff --git a/src/test/regress/expected/multi_subquery_complex_reference_clause.out b/src/test/regress/expected/multi_subquery_complex_reference_clause.out index a40a6221c..9329e2df4 100644 --- a/src/test/regress/expected/multi_subquery_complex_reference_clause.out +++ b/src/test/regress/expected/multi_subquery_complex_reference_clause.out @@ -660,6 +660,132 @@ LIMIT 5; 105 | 1 (5 rows) +-- should be fine even if the tables are deep inside subqueries +SELECT max(events_all.cnt), + events_all.usr_id + FROM +(SELECT *, random() FROM + (SELECT *, random() + FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt + FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table + INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id) + GROUP BY users_table.user_id) AS events_all_inner + ) AS events_all + ) AS events_all + LEFT JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table ON (events_all.usr_id = events_table.user_id) +GROUP BY 2 +ORDER BY 1 DESC, + 2 DESC +LIMIT 5; + max | usr_id +--------------------------------------------------------------------- + 432 | 2 + 391 | 4 + 364 | 5 + 357 | 3 + 105 | 1 +(5 rows) + +-- should be fine with FULL join as well +SELECT max(events_all.cnt), + events_all.usr_id + FROM events_table + FULL JOIN (SELECT *, random() + FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt + FROM events_reference_table + INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id) + GROUP BY users_table.user_id) AS events_all_inner + ) AS events_all ON (events_all.usr_id = events_table.user_id) +GROUP BY 2 +ORDER BY 1 DESC, + 2 DESC +LIMIT 5; + max | usr_id +--------------------------------------------------------------------- + 432 | 2 + 391 | 4 + 364 | 5 + 357 | 3 + 105 | 1 +(5 rows) + +-- two levels of "(ref_table JOIN dist_table) LEFT JOIN" +-- should be fine as well +SELECT + users_table.* +FROM + (SELECT + events_all.*, random() + FROM + events_reference_table JOIN users_table USING(user_id) + JOIN + (SELECT *, random() + FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt + FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table + INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id) + GROUP BY users_table.user_id) AS events_all_inner + ) AS events_all ON (user_id = usr_id) + ) AS events_all + LEFT JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table ON (events_all.usr_id = events_table.user_id) + LEFT JOIN users_table USING (user_id) +ORDER BY 1,2,3,4 LIMIT 5; + user_id | time | value_1 | value_2 | value_3 | value_4 +--------------------------------------------------------------------- + 1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 | + 1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 | + 1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 | + 1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 | + 1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 | +(5 rows) + +-- we should be able to support OUTER joins involving +-- reference tables even if the subquery is in WHERE clause +SELECT count(*) +FROM events_table +WHERE user_id IN + (SELECT subquery_1.user_id + FROM + (SELECT tt1.user_id, + random() + FROM users_table AS tt1 + JOIN events_table AS tt2 ON tt1.user_id = tt2.user_id) subquery_1 + RIGHT JOIN + (SELECT *, + random() + FROM + (SELECT tt1.user_id, + random() + FROM users_table AS tt1 + JOIN events_reference_table AS REF ON tt1.user_id = ref.user_id) subquery_2_inner) subquery_2 ON subquery_1.user_id = subquery_2.user_id); + count +--------------------------------------------------------------------- + 101 +(1 row) + +-- we should be able to support OUTER joins involving +-- reference tables even if the subquery is in the outer part of a JOIN +SELECT count(*) +FROM users_table +RIGHT JOIN + (SELECT subquery_1.user_id + FROM + (SELECT tt1.user_id, + random() + FROM users_table AS tt1 + JOIN events_table AS tt2 ON tt1.user_id = tt2.user_id) subquery_1 + RIGHT JOIN + (SELECT *, + random() + FROM + (SELECT tt1.user_id, + random() + FROM users_table AS tt1 + JOIN events_reference_table AS REF ON tt1.user_id = ref.user_id) subquery_2_inner) subquery_2 ON subquery_1.user_id = subquery_2.user_id) AS foo USING (user_id); + count +--------------------------------------------------------------------- + 12664199 +(1 row) + -- LATERAL JOINs used with INNER JOINs with reference tables SET citus.subquery_pushdown to ON; NOTICE: Setting citus.subquery_pushdown flag is discouraged becuase it forces the planner to pushdown certain queries, skipping relevant correctness checks. @@ -1910,12 +2036,6 @@ SELECT count(*) FROM (SELECT u1.*, random() FROM users_ref_test_table ref1 INNER 3 (1 row) --- again, in theory should be OK to pushdown but --- Postgres generates join restriction between reference and distributed tables --- in one of the cases -SELECT count(*) FROM user_buy_test_table a LEFT JOIN users_ref_test_table b ON (true) RIGHT JOIN users_ref_test_table c ON (b.id = c.id); -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join -- outer part of the LEFT JOIN consists only intermediate result due to LIMIT, so we cannot push down SELECT count(*) FROM (SELECT ref1.* FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LIMIT 5) as foo LEFT JOIN user_buy_test_table ON true; ERROR: cannot pushdown the subquery diff --git a/src/test/regress/expected/sqlancer_failures.out b/src/test/regress/expected/sqlancer_failures.out index 84d9ecf10..2cb3de1cd 100644 --- a/src/test/regress/expected/sqlancer_failures.out +++ b/src/test/regress/expected/sqlancer_failures.out @@ -108,5 +108,48 @@ EXPLAIN (COSTS FALSE) SELECT FROM t5 WHERE c0 = CASE WHEN 2 BETWEEN 1 AND 3 THEN Filter: (c0 = 2) (7 rows) +CREATE TABLE IF NOT EXISTS t6(c0 TEXT CHECK (TRUE), c1 money ) WITH (autovacuum_vacuum_threshold=1180014707, autovacuum_freeze_table_age=13771154, autovacuum_vacuum_cost_delay=23, autovacuum_analyze_threshold=1935153914, autovacuum_freeze_min_age=721733768, autovacuum_enabled=0, autovacuum_vacuum_cost_limit=9983); +CREATE UNLOGGED TABLE IF NOT EXISTS t7(LIKE t6); +CREATE TABLE t8(LIKE t6 INCLUDING INDEXES); +CREATE UNLOGGED TABLE t9(LIKE t6 EXCLUDING STATISTICS); +CREATE TABLE t10(LIKE t7); +SELECT create_distributed_table('t6', 'c0'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE t6 ALTER COLUMN c0 SET NOT NULL; +SELECT create_reference_table('t7'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('t8', 'c0'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE t8 ALTER COLUMN c0 SET NOT NULL; +SELECT create_distributed_table('t9', 'c0'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE t9 ALTER COLUMN c0 SET NOT NULL; +SELECT create_reference_table('t10'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM ( +SELECT ALL t7.c1, t7.c0, t8.c1, t10.c1, t8.c0 FROM t7 CROSS JOIN t10 FULL OUTER JOIN t8 ON (((((((('[832125354,1134163512)'::int4range)*('(0,2106623281)'::int4range)))-('(-600267905,509840582]'::int4range)))*('(-365203965,1662828182)'::int4range)))&<((((((('(-1286467417,697584012]'::int4range)*('[-1691485781,1341103963)'::int4range)))*((('(-1768368435,1719707648)'::int4range)*('(139536997,1275813540]'::int4range)))))*((((('[-2103910157,-1961746758)'::int4range)*('[-834534078,533073939)'::int4range)))*((('[-1030552151,552856781]'::int4range)*('[-1109419376,1205173697]'::int4range)))))))) +) AS foo; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join SET client_min_messages TO WARNING; DROP SCHEMA sqlancer_failures CASCADE; diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule index 4a772fe0a..a20731e03 100644 --- a/src/test/regress/multi_schedule +++ b/src/test/regress/multi_schedule @@ -88,7 +88,7 @@ test: subquery_prepared_statements pg12 cte_inline pg13 # ---------- test: multi_deparse_shard_query multi_distributed_transaction_id intermediate_results limit_intermediate_size rollback_to_savepoint test: multi_explain hyperscale_tutorial partitioned_intermediate_results distributed_intermediate_results multi_real_time_transaction -test: multi_basic_queries multi_complex_expressions multi_subquery multi_subquery_complex_queries multi_subquery_behavioral_analytics +test: multi_basic_queries cross_join multi_complex_expressions multi_subquery multi_subquery_complex_queries multi_subquery_behavioral_analytics test: multi_subquery_complex_reference_clause multi_subquery_window_functions multi_view multi_sql_function multi_prepare_sql test: sql_procedure multi_function_in_join row_types materialized_view undistribute_table test: multi_subquery_in_where_reference_clause full_join adaptive_executor propagate_set_commands diff --git a/src/test/regress/sql/cross_join.sql b/src/test/regress/sql/cross_join.sql new file mode 100644 index 000000000..c301e753b --- /dev/null +++ b/src/test/regress/sql/cross_join.sql @@ -0,0 +1,131 @@ +-- this test file relies on multi_behavioral_analytics_create_table +-- and aims to have variety of tests covering CROSS JOINs +-- "t1 CROSS JOIN t2" is equivalent of "t1 JOIN t2 ON true" + +-- a distributed table can be cross joined with a reference table +-- and the CROSS JOIN can be in the outer part of an outer JOIN +SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 LEFT JOIN users_table u ON (e2.user_id = u.user_id); + +-- two distributed tables cannot be cross joined +-- as it lacks distribution key equality +SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 CROSS JOIN users_table u; +SELECT count(*) FROM events_reference_table e1, events_table e2, users_table u; + +-- we can provide the distribution key equality via WHERE clause +SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 CROSS JOIN users_table u WHERE u.user_id = e2.user_id; + + +-- two reference tables are JOINed, and later CROSS JOINed with a distributed table +-- it is safe to pushdown +SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table; +SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table; +SELECT count(*) FROM users_ref_test_table ref1 RIGHT JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table; +SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 CROSS JOIN users_table; + +-- two reference tables CROSS JOINNed, and later JOINED with distributed tables +-- it is safe to pushdown +SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table ON false; +SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 JOIN users_table ON false; +SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 JOIN users_table ON true; +SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table ON true; +SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table ON (ref1.id = users_table.user_id); +SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 JOIN users_table ON (ref1.id = users_table.user_id); + +-- two reference tables CROSS JOINNed, and later JOINED with distributed tables +-- but the reference table CROSS JOIN is in the outer side of the JOIN with the distributed table +-- so we cannot pushdown +SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id = users_table.user_id); +SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 FULL JOIN users_table ON (ref1.id = users_table.user_id); +SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id != users_table.user_id); +SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id > 0); +SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (users_table.user_id > 0); +SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON true; +SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON false; + +-- a reference tables CROSS JOINed with a distribted table, and later JOINED with distributed tables on distribution keys +-- so safe to pushdown +SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 JOIN users_table u2 ON (u1.user_id = u2.user_id); +SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table u2 ON (u1.user_id = u2.user_id); +SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 FULL JOIN users_table u2 ON (u1.user_id = u2.user_id); +SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table u2 ON (u1.user_id = u2.user_id); + +-- a reference tables CROSS JOINed with a distribted table, and later JOINED with distributed tables on reference table column +-- so not safe to pushdown +SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table u2 ON (ref2.id = u2.user_id); +SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 FULL JOIN users_table u2 ON (ref2.id = u2.user_id); +SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table u2 ON (ref2.id = u2.user_id); + +-- via repartitioning, Citus can handle this query as the result of "u1 CROSS JOIN ref2" +-- can be repartitioned on ref2.id +Set citus.enable_repartition_joins to on; +SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 JOIN users_table u2 ON (ref2.id = u2.user_id); +reset citus.enable_repartition_joins; + +-- although the following has the "ref LEFT JOIN dist" type of query, the LEFT JOIN is eliminated by Postgres +-- because the INNER JOIN eliminates the LEFT JOIN +SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id = users_table.user_id) JOIN users_table u2 ON (u2.user_id = users_table.user_id); + +-- this is the same query as the above, but this time the outer query is also LEFT JOIN, meaning that Postgres +-- cannot eliminate the outer join +SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id = users_table.user_id) LEFT JOIN users_table u2 ON (u2.user_id = users_table.user_id); + +-- cross join that goes through non-colocated subquery logic +-- for the "events_table" subquery as both distributed tables +-- do not have JOIN on the distribution key +SELECT max(events_all.cnt), + events_all.usr_id + FROM +(SELECT *, random() FROM + (SELECT *, random() + FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt + FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table + INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id) + GROUP BY users_table.user_id) AS events_all_inner + ) AS events_all + ) AS events_all + CROSS JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table +GROUP BY 2 +ORDER BY 1 DESC, + 2 DESC +LIMIT 5; + +-- cross join that goes through non-colocated subquery logic +-- for the "events_all" subquery as both distributed tables +-- do not have JOIN on the distribution key +SELECT max(events_all.cnt), + events_all.usr_id + FROM events_table + CROSS JOIN (SELECT *, random() + FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt + FROM events_reference_table + INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id) + GROUP BY users_table.user_id) AS events_all_inner + ) AS events_all +GROUP BY 2 +ORDER BY 1 DESC, + 2 DESC +LIMIT 5; + + +-- cross join is between a reference table and distributed table, and +-- deep inside a subquery. The subquery can be in the outer part of the LEFT JOIN +SELECT + users_table.* +FROM + (SELECT + events_all.*, random() + FROM + events_reference_table JOIN users_table USING(user_id) + JOIN + (SELECT *, random() + FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt + FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table + CROSS JOIN users_table + GROUP BY users_table.user_id) AS events_all_inner + ) AS events_all ON (user_id = usr_id) + ) AS events_all + LEFT JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table ON (events_all.usr_id = events_table.user_id) + LEFT JOIN users_table USING (user_id) +ORDER BY 1,2,3,4 LIMIT 5; + + diff --git a/src/test/regress/sql/multi_subquery_complex_reference_clause.sql b/src/test/regress/sql/multi_subquery_complex_reference_clause.sql index 2704e37db..b8451f559 100644 --- a/src/test/regress/sql/multi_subquery_complex_reference_clause.sql +++ b/src/test/regress/sql/multi_subquery_complex_reference_clause.sql @@ -401,6 +401,102 @@ ORDER BY 1 DESC, 2 DESC LIMIT 5; +-- should be fine even if the tables are deep inside subqueries +SELECT max(events_all.cnt), + events_all.usr_id + FROM +(SELECT *, random() FROM + (SELECT *, random() + FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt + FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table + INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id) + GROUP BY users_table.user_id) AS events_all_inner + ) AS events_all + ) AS events_all + LEFT JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table ON (events_all.usr_id = events_table.user_id) +GROUP BY 2 +ORDER BY 1 DESC, + 2 DESC +LIMIT 5; + +-- should be fine with FULL join as well +SELECT max(events_all.cnt), + events_all.usr_id + FROM events_table + FULL JOIN (SELECT *, random() + FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt + FROM events_reference_table + INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id) + GROUP BY users_table.user_id) AS events_all_inner + ) AS events_all ON (events_all.usr_id = events_table.user_id) +GROUP BY 2 +ORDER BY 1 DESC, + 2 DESC +LIMIT 5; + +-- two levels of "(ref_table JOIN dist_table) LEFT JOIN" +-- should be fine as well +SELECT + users_table.* +FROM + (SELECT + events_all.*, random() + FROM + events_reference_table JOIN users_table USING(user_id) + JOIN + (SELECT *, random() + FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt + FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table + INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id) + GROUP BY users_table.user_id) AS events_all_inner + ) AS events_all ON (user_id = usr_id) + ) AS events_all + LEFT JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table ON (events_all.usr_id = events_table.user_id) + LEFT JOIN users_table USING (user_id) +ORDER BY 1,2,3,4 LIMIT 5; + + +-- we should be able to support OUTER joins involving +-- reference tables even if the subquery is in WHERE clause +SELECT count(*) +FROM events_table +WHERE user_id IN + (SELECT subquery_1.user_id + FROM + (SELECT tt1.user_id, + random() + FROM users_table AS tt1 + JOIN events_table AS tt2 ON tt1.user_id = tt2.user_id) subquery_1 + RIGHT JOIN + (SELECT *, + random() + FROM + (SELECT tt1.user_id, + random() + FROM users_table AS tt1 + JOIN events_reference_table AS REF ON tt1.user_id = ref.user_id) subquery_2_inner) subquery_2 ON subquery_1.user_id = subquery_2.user_id); + +-- we should be able to support OUTER joins involving +-- reference tables even if the subquery is in the outer part of a JOIN + +SELECT count(*) +FROM users_table +RIGHT JOIN + (SELECT subquery_1.user_id + FROM + (SELECT tt1.user_id, + random() + FROM users_table AS tt1 + JOIN events_table AS tt2 ON tt1.user_id = tt2.user_id) subquery_1 + RIGHT JOIN + (SELECT *, + random() + FROM + (SELECT tt1.user_id, + random() + FROM users_table AS tt1 + JOIN events_reference_table AS REF ON tt1.user_id = ref.user_id) subquery_2_inner) subquery_2 ON subquery_1.user_id = subquery_2.user_id) AS foo USING (user_id); + -- LATERAL JOINs used with INNER JOINs with reference tables SET citus.subquery_pushdown to ON; SELECT user_id, lastseen @@ -1367,11 +1463,6 @@ SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INN -- so Postgres doesn't generate join restriction between reference and distributed tables SELECT count(*) FROM (SELECT u1.*, random() FROM users_ref_test_table ref1 INNER JOIN user_buy_test_table u1 on ref1.id = u1.user_id) as foo LEFT JOIN user_buy_test_table ON (foo.user_id = user_buy_test_table.user_id); --- again, in theory should be OK to pushdown but --- Postgres generates join restriction between reference and distributed tables --- in one of the cases -SELECT count(*) FROM user_buy_test_table a LEFT JOIN users_ref_test_table b ON (true) RIGHT JOIN users_ref_test_table c ON (b.id = c.id); - -- outer part of the LEFT JOIN consists only intermediate result due to LIMIT, so we cannot push down SELECT count(*) FROM (SELECT ref1.* FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LIMIT 5) as foo LEFT JOIN user_buy_test_table ON true; diff --git a/src/test/regress/sql/sqlancer_failures.sql b/src/test/regress/sql/sqlancer_failures.sql index 0555af383..57531ea84 100644 --- a/src/test/regress/sql/sqlancer_failures.sql +++ b/src/test/regress/sql/sqlancer_failures.sql @@ -39,5 +39,24 @@ INSERT INTO t5 VALUES (CASE WHEN 2 BETWEEN 1 AND 3 THEN 2 ELSE 1 END); EXPLAIN (COSTS FALSE) SELECT FROM t5 WHERE c0 = 2; EXPLAIN (COSTS FALSE) SELECT FROM t5 WHERE c0 = CASE WHEN 2 BETWEEN 1 AND 3 THEN 2 ELSE 1 END; +CREATE TABLE IF NOT EXISTS t6(c0 TEXT CHECK (TRUE), c1 money ) WITH (autovacuum_vacuum_threshold=1180014707, autovacuum_freeze_table_age=13771154, autovacuum_vacuum_cost_delay=23, autovacuum_analyze_threshold=1935153914, autovacuum_freeze_min_age=721733768, autovacuum_enabled=0, autovacuum_vacuum_cost_limit=9983); +CREATE UNLOGGED TABLE IF NOT EXISTS t7(LIKE t6); +CREATE TABLE t8(LIKE t6 INCLUDING INDEXES); +CREATE UNLOGGED TABLE t9(LIKE t6 EXCLUDING STATISTICS); +CREATE TABLE t10(LIKE t7); + +SELECT create_distributed_table('t6', 'c0'); +ALTER TABLE t6 ALTER COLUMN c0 SET NOT NULL; +SELECT create_reference_table('t7'); +SELECT create_distributed_table('t8', 'c0'); +ALTER TABLE t8 ALTER COLUMN c0 SET NOT NULL; +SELECT create_distributed_table('t9', 'c0'); +ALTER TABLE t9 ALTER COLUMN c0 SET NOT NULL; +SELECT create_reference_table('t10'); + +SELECT count(*) FROM ( +SELECT ALL t7.c1, t7.c0, t8.c1, t10.c1, t8.c0 FROM t7 CROSS JOIN t10 FULL OUTER JOIN t8 ON (((((((('[832125354,1134163512)'::int4range)*('(0,2106623281)'::int4range)))-('(-600267905,509840582]'::int4range)))*('(-365203965,1662828182)'::int4range)))&<((((((('(-1286467417,697584012]'::int4range)*('[-1691485781,1341103963)'::int4range)))*((('(-1768368435,1719707648)'::int4range)*('(139536997,1275813540]'::int4range)))))*((((('[-2103910157,-1961746758)'::int4range)*('[-834534078,533073939)'::int4range)))*((('[-1030552151,552856781]'::int4range)*('[-1109419376,1205173697]'::int4range)))))))) +) AS foo; + SET client_min_messages TO WARNING; DROP SCHEMA sqlancer_failures CASCADE;