Add regression tests for outer/cross JOINs

pull/3597/head
Onder Kalaci 2020-10-08 14:45:47 +02:00
parent de33079065
commit 15e724c073
8 changed files with 681 additions and 12 deletions

View File

@ -805,6 +805,10 @@ DeferredErrorIfUnsupportedRecurringTuplesJoin(
*/
if (RelationInfoContainsOnlyRecurringTuples(plannerInfo, outerrel))
{
/*
* Find the first (or only) recurring RTE to give a meaningful
* error to the user.
*/
recurType = FetchFirstRecurType(plannerInfo, outerrel);
break;
@ -814,6 +818,10 @@ DeferredErrorIfUnsupportedRecurringTuplesJoin(
{
if (RelationInfoContainsOnlyRecurringTuples(plannerInfo, innerrel))
{
/*
* Find the first (or only) recurring RTE to give a meaningful
* error to the user.
*/
recurType = FetchFirstRecurType(plannerInfo, innerrel);
break;
@ -821,6 +829,10 @@ DeferredErrorIfUnsupportedRecurringTuplesJoin(
if (RelationInfoContainsOnlyRecurringTuples(plannerInfo, outerrel))
{
/*
* Find the first (or only) recurring RTE to give a meaningful
* error to the user.
*/
recurType = FetchFirstRecurType(plannerInfo, outerrel);
break;

View File

@ -0,0 +1,253 @@
-- this test file relies on multi_behavioral_analytics_create_table
-- and aims to have variety of tests covering CROSS JOINs
-- "t1 CROSS JOIN t2" is equivalent of "t1 JOIN t2 ON true"
-- a distributed table can be cross joined with a reference table
-- and the CROSS JOIN can be in the outer part of an outer JOIN
SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 LEFT JOIN users_table u ON (e2.user_id = u.user_id);
count
---------------------------------------------------------------------
176649
(1 row)
-- two distributed tables cannot be cross joined
-- as it lacks distribution key equality
SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 CROSS JOIN users_table u;
ERROR: cannot perform distributed planning on this query
DETAIL: Cartesian products are currently unsupported
SELECT count(*) FROM events_reference_table e1, events_table e2, users_table u;
ERROR: cannot perform distributed planning on this query
DETAIL: Cartesian products are currently unsupported
-- we can provide the distribution key equality via WHERE clause
SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 CROSS JOIN users_table u WHERE u.user_id = e2.user_id;
count
---------------------------------------------------------------------
176649
(1 row)
-- two reference tables are JOINed, and later CROSS JOINed with a distributed table
-- it is safe to pushdown
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table;
count
---------------------------------------------------------------------
606
(1 row)
SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table;
count
---------------------------------------------------------------------
606
(1 row)
SELECT count(*) FROM users_ref_test_table ref1 RIGHT JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table;
count
---------------------------------------------------------------------
606
(1 row)
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 CROSS JOIN users_table;
count
---------------------------------------------------------------------
3636
(1 row)
-- two reference tables CROSS JOINNed, and later JOINED with distributed tables
-- it is safe to pushdown
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table ON false;
count
---------------------------------------------------------------------
101
(1 row)
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 JOIN users_table ON false;
count
---------------------------------------------------------------------
0
(1 row)
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 JOIN users_table ON true;
count
---------------------------------------------------------------------
3636
(1 row)
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table ON true;
count
---------------------------------------------------------------------
3636
(1 row)
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table ON (ref1.id = users_table.user_id);
count
---------------------------------------------------------------------
606
(1 row)
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 JOIN users_table ON (ref1.id = users_table.user_id);
count
---------------------------------------------------------------------
606
(1 row)
-- two reference tables CROSS JOINNed, and later JOINED with distributed tables
-- but the reference table CROSS JOIN is in the outer side of the JOIN with the distributed table
-- so we cannot pushdown
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id = users_table.user_id);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 FULL JOIN users_table ON (ref1.id = users_table.user_id);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id != users_table.user_id);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id > 0);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (users_table.user_id > 0);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON true;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON false;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- a reference tables CROSS JOINed with a distribted table, and later JOINED with distributed tables on distribution keys
-- so safe to pushdown
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 JOIN users_table u2 ON (u1.user_id = u2.user_id);
count
---------------------------------------------------------------------
11802
(1 row)
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table u2 ON (u1.user_id = u2.user_id);
count
---------------------------------------------------------------------
11802
(1 row)
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 FULL JOIN users_table u2 ON (u1.user_id = u2.user_id);
count
---------------------------------------------------------------------
11802
(1 row)
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table u2 ON (u1.user_id = u2.user_id);
count
---------------------------------------------------------------------
11802
(1 row)
-- a reference tables CROSS JOINed with a distribted table, and later JOINED with distributed tables on reference table column
-- so not safe to pushdown
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table u2 ON (ref2.id = u2.user_id);
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 FULL JOIN users_table u2 ON (ref2.id = u2.user_id);
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table u2 ON (ref2.id = u2.user_id);
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- via repartitioning, Citus can handle this query as the result of "u1 CROSS JOIN ref2"
-- can be repartitioned on ref2.id
Set citus.enable_repartition_joins to on;
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 JOIN users_table u2 ON (ref2.id = u2.user_id);
count
---------------------------------------------------------------------
10201
(1 row)
reset citus.enable_repartition_joins;
-- although the following has the "ref LEFT JOIN dist" type of query, the LEFT JOIN is eliminated by Postgres
-- because the INNER JOIN eliminates the LEFT JOIN
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id = users_table.user_id) JOIN users_table u2 ON (u2.user_id = users_table.user_id);
count
---------------------------------------------------------------------
11802
(1 row)
-- this is the same query as the above, but this time the outer query is also LEFT JOIN, meaning that Postgres
-- cannot eliminate the outer join
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id = users_table.user_id) LEFT JOIN users_table u2 ON (u2.user_id = users_table.user_id);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- cross join that goes through non-colocated subquery logic
-- for the "events_table" subquery as both distributed tables
-- do not have JOIN on the distribution key
SELECT max(events_all.cnt),
events_all.usr_id
FROM
(SELECT *, random() FROM
(SELECT *, random()
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
GROUP BY users_table.user_id) AS events_all_inner
) AS events_all
) AS events_all
CROSS JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table
GROUP BY 2
ORDER BY 1 DESC,
2 DESC
LIMIT 5;
max | usr_id
---------------------------------------------------------------------
432 | 2
391 | 4
364 | 5
357 | 3
105 | 1
(5 rows)
-- cross join that goes through non-colocated subquery logic
-- for the "events_all" subquery as both distributed tables
-- do not have JOIN on the distribution key
SELECT max(events_all.cnt),
events_all.usr_id
FROM events_table
CROSS JOIN (SELECT *, random()
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
FROM events_reference_table
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
GROUP BY users_table.user_id) AS events_all_inner
) AS events_all
GROUP BY 2
ORDER BY 1 DESC,
2 DESC
LIMIT 5;
max | usr_id
---------------------------------------------------------------------
432 | 2
391 | 4
364 | 5
357 | 3
105 | 1
(5 rows)
-- cross join is between a reference table and distributed table, and
-- deep inside a subquery. The subquery can be in the outer part of the LEFT JOIN
SELECT
users_table.*
FROM
(SELECT
events_all.*, random()
FROM
events_reference_table JOIN users_table USING(user_id)
JOIN
(SELECT *, random()
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table
CROSS JOIN users_table
GROUP BY users_table.user_id) AS events_all_inner
) AS events_all ON (user_id = usr_id)
) AS events_all
LEFT JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table ON (events_all.usr_id = events_table.user_id)
LEFT JOIN users_table USING (user_id)
ORDER BY 1,2,3,4 LIMIT 5;
user_id | time | value_1 | value_2 | value_3 | value_4
---------------------------------------------------------------------
1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 |
1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 |
1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 |
1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 |
1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 |
(5 rows)

View File

@ -660,6 +660,132 @@ LIMIT 5;
105 | 1
(5 rows)
-- should be fine even if the tables are deep inside subqueries
SELECT max(events_all.cnt),
events_all.usr_id
FROM
(SELECT *, random() FROM
(SELECT *, random()
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
GROUP BY users_table.user_id) AS events_all_inner
) AS events_all
) AS events_all
LEFT JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table ON (events_all.usr_id = events_table.user_id)
GROUP BY 2
ORDER BY 1 DESC,
2 DESC
LIMIT 5;
max | usr_id
---------------------------------------------------------------------
432 | 2
391 | 4
364 | 5
357 | 3
105 | 1
(5 rows)
-- should be fine with FULL join as well
SELECT max(events_all.cnt),
events_all.usr_id
FROM events_table
FULL JOIN (SELECT *, random()
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
FROM events_reference_table
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
GROUP BY users_table.user_id) AS events_all_inner
) AS events_all ON (events_all.usr_id = events_table.user_id)
GROUP BY 2
ORDER BY 1 DESC,
2 DESC
LIMIT 5;
max | usr_id
---------------------------------------------------------------------
432 | 2
391 | 4
364 | 5
357 | 3
105 | 1
(5 rows)
-- two levels of "(ref_table JOIN dist_table) LEFT JOIN"
-- should be fine as well
SELECT
users_table.*
FROM
(SELECT
events_all.*, random()
FROM
events_reference_table JOIN users_table USING(user_id)
JOIN
(SELECT *, random()
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
GROUP BY users_table.user_id) AS events_all_inner
) AS events_all ON (user_id = usr_id)
) AS events_all
LEFT JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table ON (events_all.usr_id = events_table.user_id)
LEFT JOIN users_table USING (user_id)
ORDER BY 1,2,3,4 LIMIT 5;
user_id | time | value_1 | value_2 | value_3 | value_4
---------------------------------------------------------------------
1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 |
1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 |
1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 |
1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 |
1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 |
(5 rows)
-- we should be able to support OUTER joins involving
-- reference tables even if the subquery is in WHERE clause
SELECT count(*)
FROM events_table
WHERE user_id IN
(SELECT subquery_1.user_id
FROM
(SELECT tt1.user_id,
random()
FROM users_table AS tt1
JOIN events_table AS tt2 ON tt1.user_id = tt2.user_id) subquery_1
RIGHT JOIN
(SELECT *,
random()
FROM
(SELECT tt1.user_id,
random()
FROM users_table AS tt1
JOIN events_reference_table AS REF ON tt1.user_id = ref.user_id) subquery_2_inner) subquery_2 ON subquery_1.user_id = subquery_2.user_id);
count
---------------------------------------------------------------------
101
(1 row)
-- we should be able to support OUTER joins involving
-- reference tables even if the subquery is in the outer part of a JOIN
SELECT count(*)
FROM users_table
RIGHT JOIN
(SELECT subquery_1.user_id
FROM
(SELECT tt1.user_id,
random()
FROM users_table AS tt1
JOIN events_table AS tt2 ON tt1.user_id = tt2.user_id) subquery_1
RIGHT JOIN
(SELECT *,
random()
FROM
(SELECT tt1.user_id,
random()
FROM users_table AS tt1
JOIN events_reference_table AS REF ON tt1.user_id = ref.user_id) subquery_2_inner) subquery_2 ON subquery_1.user_id = subquery_2.user_id) AS foo USING (user_id);
count
---------------------------------------------------------------------
12664199
(1 row)
-- LATERAL JOINs used with INNER JOINs with reference tables
SET citus.subquery_pushdown to ON;
NOTICE: Setting citus.subquery_pushdown flag is discouraged becuase it forces the planner to pushdown certain queries, skipping relevant correctness checks.
@ -1910,12 +2036,6 @@ SELECT count(*) FROM (SELECT u1.*, random() FROM users_ref_test_table ref1 INNER
3
(1 row)
-- again, in theory should be OK to pushdown but
-- Postgres generates join restriction between reference and distributed tables
-- in one of the cases
SELECT count(*) FROM user_buy_test_table a LEFT JOIN users_ref_test_table b ON (true) RIGHT JOIN users_ref_test_table c ON (b.id = c.id);
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
-- outer part of the LEFT JOIN consists only intermediate result due to LIMIT, so we cannot push down
SELECT count(*) FROM (SELECT ref1.* FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LIMIT 5) as foo LEFT JOIN user_buy_test_table ON true;
ERROR: cannot pushdown the subquery

View File

@ -108,5 +108,48 @@ EXPLAIN (COSTS FALSE) SELECT FROM t5 WHERE c0 = CASE WHEN 2 BETWEEN 1 AND 3 THEN
Filter: (c0 = 2)
(7 rows)
CREATE TABLE IF NOT EXISTS t6(c0 TEXT CHECK (TRUE), c1 money ) WITH (autovacuum_vacuum_threshold=1180014707, autovacuum_freeze_table_age=13771154, autovacuum_vacuum_cost_delay=23, autovacuum_analyze_threshold=1935153914, autovacuum_freeze_min_age=721733768, autovacuum_enabled=0, autovacuum_vacuum_cost_limit=9983);
CREATE UNLOGGED TABLE IF NOT EXISTS t7(LIKE t6);
CREATE TABLE t8(LIKE t6 INCLUDING INDEXES);
CREATE UNLOGGED TABLE t9(LIKE t6 EXCLUDING STATISTICS);
CREATE TABLE t10(LIKE t7);
SELECT create_distributed_table('t6', 'c0');
create_distributed_table
---------------------------------------------------------------------
(1 row)
ALTER TABLE t6 ALTER COLUMN c0 SET NOT NULL;
SELECT create_reference_table('t7');
create_reference_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('t8', 'c0');
create_distributed_table
---------------------------------------------------------------------
(1 row)
ALTER TABLE t8 ALTER COLUMN c0 SET NOT NULL;
SELECT create_distributed_table('t9', 'c0');
create_distributed_table
---------------------------------------------------------------------
(1 row)
ALTER TABLE t9 ALTER COLUMN c0 SET NOT NULL;
SELECT create_reference_table('t10');
create_reference_table
---------------------------------------------------------------------
(1 row)
SELECT count(*) FROM (
SELECT ALL t7.c1, t7.c0, t8.c1, t10.c1, t8.c0 FROM t7 CROSS JOIN t10 FULL OUTER JOIN t8 ON (((((((('[832125354,1134163512)'::int4range)*('(0,2106623281)'::int4range)))-('(-600267905,509840582]'::int4range)))*('(-365203965,1662828182)'::int4range)))&<((((((('(-1286467417,697584012]'::int4range)*('[-1691485781,1341103963)'::int4range)))*((('(-1768368435,1719707648)'::int4range)*('(139536997,1275813540]'::int4range)))))*((((('[-2103910157,-1961746758)'::int4range)*('[-834534078,533073939)'::int4range)))*((('[-1030552151,552856781]'::int4range)*('[-1109419376,1205173697]'::int4range))))))))
) AS foo;
ERROR: cannot pushdown the subquery
DETAIL: There exist a reference table in the outer part of the outer join
SET client_min_messages TO WARNING;
DROP SCHEMA sqlancer_failures CASCADE;

View File

@ -88,7 +88,7 @@ test: subquery_prepared_statements pg12 cte_inline pg13
# ----------
test: multi_deparse_shard_query multi_distributed_transaction_id intermediate_results limit_intermediate_size rollback_to_savepoint
test: multi_explain hyperscale_tutorial partitioned_intermediate_results distributed_intermediate_results multi_real_time_transaction
test: multi_basic_queries multi_complex_expressions multi_subquery multi_subquery_complex_queries multi_subquery_behavioral_analytics
test: multi_basic_queries cross_join multi_complex_expressions multi_subquery multi_subquery_complex_queries multi_subquery_behavioral_analytics
test: multi_subquery_complex_reference_clause multi_subquery_window_functions multi_view multi_sql_function multi_prepare_sql
test: sql_procedure multi_function_in_join row_types materialized_view undistribute_table
test: multi_subquery_in_where_reference_clause full_join adaptive_executor propagate_set_commands

View File

@ -0,0 +1,131 @@
-- this test file relies on multi_behavioral_analytics_create_table
-- and aims to have variety of tests covering CROSS JOINs
-- "t1 CROSS JOIN t2" is equivalent of "t1 JOIN t2 ON true"
-- a distributed table can be cross joined with a reference table
-- and the CROSS JOIN can be in the outer part of an outer JOIN
SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 LEFT JOIN users_table u ON (e2.user_id = u.user_id);
-- two distributed tables cannot be cross joined
-- as it lacks distribution key equality
SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 CROSS JOIN users_table u;
SELECT count(*) FROM events_reference_table e1, events_table e2, users_table u;
-- we can provide the distribution key equality via WHERE clause
SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 CROSS JOIN users_table u WHERE u.user_id = e2.user_id;
-- two reference tables are JOINed, and later CROSS JOINed with a distributed table
-- it is safe to pushdown
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table;
SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table;
SELECT count(*) FROM users_ref_test_table ref1 RIGHT JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table;
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 CROSS JOIN users_table;
-- two reference tables CROSS JOINNed, and later JOINED with distributed tables
-- it is safe to pushdown
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table ON false;
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 JOIN users_table ON false;
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 JOIN users_table ON true;
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table ON true;
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table ON (ref1.id = users_table.user_id);
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 JOIN users_table ON (ref1.id = users_table.user_id);
-- two reference tables CROSS JOINNed, and later JOINED with distributed tables
-- but the reference table CROSS JOIN is in the outer side of the JOIN with the distributed table
-- so we cannot pushdown
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id = users_table.user_id);
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 FULL JOIN users_table ON (ref1.id = users_table.user_id);
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id != users_table.user_id);
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id > 0);
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (users_table.user_id > 0);
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON true;
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON false;
-- a reference tables CROSS JOINed with a distribted table, and later JOINED with distributed tables on distribution keys
-- so safe to pushdown
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 JOIN users_table u2 ON (u1.user_id = u2.user_id);
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table u2 ON (u1.user_id = u2.user_id);
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 FULL JOIN users_table u2 ON (u1.user_id = u2.user_id);
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table u2 ON (u1.user_id = u2.user_id);
-- a reference tables CROSS JOINed with a distribted table, and later JOINED with distributed tables on reference table column
-- so not safe to pushdown
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table u2 ON (ref2.id = u2.user_id);
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 FULL JOIN users_table u2 ON (ref2.id = u2.user_id);
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table u2 ON (ref2.id = u2.user_id);
-- via repartitioning, Citus can handle this query as the result of "u1 CROSS JOIN ref2"
-- can be repartitioned on ref2.id
Set citus.enable_repartition_joins to on;
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 JOIN users_table u2 ON (ref2.id = u2.user_id);
reset citus.enable_repartition_joins;
-- although the following has the "ref LEFT JOIN dist" type of query, the LEFT JOIN is eliminated by Postgres
-- because the INNER JOIN eliminates the LEFT JOIN
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id = users_table.user_id) JOIN users_table u2 ON (u2.user_id = users_table.user_id);
-- this is the same query as the above, but this time the outer query is also LEFT JOIN, meaning that Postgres
-- cannot eliminate the outer join
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id = users_table.user_id) LEFT JOIN users_table u2 ON (u2.user_id = users_table.user_id);
-- cross join that goes through non-colocated subquery logic
-- for the "events_table" subquery as both distributed tables
-- do not have JOIN on the distribution key
SELECT max(events_all.cnt),
events_all.usr_id
FROM
(SELECT *, random() FROM
(SELECT *, random()
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
GROUP BY users_table.user_id) AS events_all_inner
) AS events_all
) AS events_all
CROSS JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table
GROUP BY 2
ORDER BY 1 DESC,
2 DESC
LIMIT 5;
-- cross join that goes through non-colocated subquery logic
-- for the "events_all" subquery as both distributed tables
-- do not have JOIN on the distribution key
SELECT max(events_all.cnt),
events_all.usr_id
FROM events_table
CROSS JOIN (SELECT *, random()
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
FROM events_reference_table
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
GROUP BY users_table.user_id) AS events_all_inner
) AS events_all
GROUP BY 2
ORDER BY 1 DESC,
2 DESC
LIMIT 5;
-- cross join is between a reference table and distributed table, and
-- deep inside a subquery. The subquery can be in the outer part of the LEFT JOIN
SELECT
users_table.*
FROM
(SELECT
events_all.*, random()
FROM
events_reference_table JOIN users_table USING(user_id)
JOIN
(SELECT *, random()
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table
CROSS JOIN users_table
GROUP BY users_table.user_id) AS events_all_inner
) AS events_all ON (user_id = usr_id)
) AS events_all
LEFT JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table ON (events_all.usr_id = events_table.user_id)
LEFT JOIN users_table USING (user_id)
ORDER BY 1,2,3,4 LIMIT 5;

View File

@ -401,6 +401,102 @@ ORDER BY 1 DESC,
2 DESC
LIMIT 5;
-- should be fine even if the tables are deep inside subqueries
SELECT max(events_all.cnt),
events_all.usr_id
FROM
(SELECT *, random() FROM
(SELECT *, random()
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
GROUP BY users_table.user_id) AS events_all_inner
) AS events_all
) AS events_all
LEFT JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table ON (events_all.usr_id = events_table.user_id)
GROUP BY 2
ORDER BY 1 DESC,
2 DESC
LIMIT 5;
-- should be fine with FULL join as well
SELECT max(events_all.cnt),
events_all.usr_id
FROM events_table
FULL JOIN (SELECT *, random()
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
FROM events_reference_table
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
GROUP BY users_table.user_id) AS events_all_inner
) AS events_all ON (events_all.usr_id = events_table.user_id)
GROUP BY 2
ORDER BY 1 DESC,
2 DESC
LIMIT 5;
-- two levels of "(ref_table JOIN dist_table) LEFT JOIN"
-- should be fine as well
SELECT
users_table.*
FROM
(SELECT
events_all.*, random()
FROM
events_reference_table JOIN users_table USING(user_id)
JOIN
(SELECT *, random()
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
GROUP BY users_table.user_id) AS events_all_inner
) AS events_all ON (user_id = usr_id)
) AS events_all
LEFT JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table ON (events_all.usr_id = events_table.user_id)
LEFT JOIN users_table USING (user_id)
ORDER BY 1,2,3,4 LIMIT 5;
-- we should be able to support OUTER joins involving
-- reference tables even if the subquery is in WHERE clause
SELECT count(*)
FROM events_table
WHERE user_id IN
(SELECT subquery_1.user_id
FROM
(SELECT tt1.user_id,
random()
FROM users_table AS tt1
JOIN events_table AS tt2 ON tt1.user_id = tt2.user_id) subquery_1
RIGHT JOIN
(SELECT *,
random()
FROM
(SELECT tt1.user_id,
random()
FROM users_table AS tt1
JOIN events_reference_table AS REF ON tt1.user_id = ref.user_id) subquery_2_inner) subquery_2 ON subquery_1.user_id = subquery_2.user_id);
-- we should be able to support OUTER joins involving
-- reference tables even if the subquery is in the outer part of a JOIN
SELECT count(*)
FROM users_table
RIGHT JOIN
(SELECT subquery_1.user_id
FROM
(SELECT tt1.user_id,
random()
FROM users_table AS tt1
JOIN events_table AS tt2 ON tt1.user_id = tt2.user_id) subquery_1
RIGHT JOIN
(SELECT *,
random()
FROM
(SELECT tt1.user_id,
random()
FROM users_table AS tt1
JOIN events_reference_table AS REF ON tt1.user_id = ref.user_id) subquery_2_inner) subquery_2 ON subquery_1.user_id = subquery_2.user_id) AS foo USING (user_id);
-- LATERAL JOINs used with INNER JOINs with reference tables
SET citus.subquery_pushdown to ON;
SELECT user_id, lastseen
@ -1367,11 +1463,6 @@ SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INN
-- so Postgres doesn't generate join restriction between reference and distributed tables
SELECT count(*) FROM (SELECT u1.*, random() FROM users_ref_test_table ref1 INNER JOIN user_buy_test_table u1 on ref1.id = u1.user_id) as foo LEFT JOIN user_buy_test_table ON (foo.user_id = user_buy_test_table.user_id);
-- again, in theory should be OK to pushdown but
-- Postgres generates join restriction between reference and distributed tables
-- in one of the cases
SELECT count(*) FROM user_buy_test_table a LEFT JOIN users_ref_test_table b ON (true) RIGHT JOIN users_ref_test_table c ON (b.id = c.id);
-- outer part of the LEFT JOIN consists only intermediate result due to LIMIT, so we cannot push down
SELECT count(*) FROM (SELECT ref1.* FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LIMIT 5) as foo LEFT JOIN user_buy_test_table ON true;

View File

@ -39,5 +39,24 @@ INSERT INTO t5 VALUES (CASE WHEN 2 BETWEEN 1 AND 3 THEN 2 ELSE 1 END);
EXPLAIN (COSTS FALSE) SELECT FROM t5 WHERE c0 = 2;
EXPLAIN (COSTS FALSE) SELECT FROM t5 WHERE c0 = CASE WHEN 2 BETWEEN 1 AND 3 THEN 2 ELSE 1 END;
CREATE TABLE IF NOT EXISTS t6(c0 TEXT CHECK (TRUE), c1 money ) WITH (autovacuum_vacuum_threshold=1180014707, autovacuum_freeze_table_age=13771154, autovacuum_vacuum_cost_delay=23, autovacuum_analyze_threshold=1935153914, autovacuum_freeze_min_age=721733768, autovacuum_enabled=0, autovacuum_vacuum_cost_limit=9983);
CREATE UNLOGGED TABLE IF NOT EXISTS t7(LIKE t6);
CREATE TABLE t8(LIKE t6 INCLUDING INDEXES);
CREATE UNLOGGED TABLE t9(LIKE t6 EXCLUDING STATISTICS);
CREATE TABLE t10(LIKE t7);
SELECT create_distributed_table('t6', 'c0');
ALTER TABLE t6 ALTER COLUMN c0 SET NOT NULL;
SELECT create_reference_table('t7');
SELECT create_distributed_table('t8', 'c0');
ALTER TABLE t8 ALTER COLUMN c0 SET NOT NULL;
SELECT create_distributed_table('t9', 'c0');
ALTER TABLE t9 ALTER COLUMN c0 SET NOT NULL;
SELECT create_reference_table('t10');
SELECT count(*) FROM (
SELECT ALL t7.c1, t7.c0, t8.c1, t10.c1, t8.c0 FROM t7 CROSS JOIN t10 FULL OUTER JOIN t8 ON (((((((('[832125354,1134163512)'::int4range)*('(0,2106623281)'::int4range)))-('(-600267905,509840582]'::int4range)))*('(-365203965,1662828182)'::int4range)))&<((((((('(-1286467417,697584012]'::int4range)*('[-1691485781,1341103963)'::int4range)))*((('(-1768368435,1719707648)'::int4range)*('(139536997,1275813540]'::int4range)))))*((((('[-2103910157,-1961746758)'::int4range)*('[-834534078,533073939)'::int4range)))*((('[-1030552151,552856781]'::int4range)*('[-1109419376,1205173697]'::int4range))))))))
) AS foo;
SET client_min_messages TO WARNING;
DROP SCHEMA sqlancer_failures CASCADE;