mirror of https://github.com/citusdata/citus.git
Add regression tests for outer/cross JOINs
parent
de33079065
commit
15e724c073
|
@ -805,6 +805,10 @@ DeferredErrorIfUnsupportedRecurringTuplesJoin(
|
|||
*/
|
||||
if (RelationInfoContainsOnlyRecurringTuples(plannerInfo, outerrel))
|
||||
{
|
||||
/*
|
||||
* Find the first (or only) recurring RTE to give a meaningful
|
||||
* error to the user.
|
||||
*/
|
||||
recurType = FetchFirstRecurType(plannerInfo, outerrel);
|
||||
|
||||
break;
|
||||
|
@ -814,6 +818,10 @@ DeferredErrorIfUnsupportedRecurringTuplesJoin(
|
|||
{
|
||||
if (RelationInfoContainsOnlyRecurringTuples(plannerInfo, innerrel))
|
||||
{
|
||||
/*
|
||||
* Find the first (or only) recurring RTE to give a meaningful
|
||||
* error to the user.
|
||||
*/
|
||||
recurType = FetchFirstRecurType(plannerInfo, innerrel);
|
||||
|
||||
break;
|
||||
|
@ -821,6 +829,10 @@ DeferredErrorIfUnsupportedRecurringTuplesJoin(
|
|||
|
||||
if (RelationInfoContainsOnlyRecurringTuples(plannerInfo, outerrel))
|
||||
{
|
||||
/*
|
||||
* Find the first (or only) recurring RTE to give a meaningful
|
||||
* error to the user.
|
||||
*/
|
||||
recurType = FetchFirstRecurType(plannerInfo, outerrel);
|
||||
|
||||
break;
|
||||
|
|
|
@ -0,0 +1,253 @@
|
|||
-- this test file relies on multi_behavioral_analytics_create_table
|
||||
-- and aims to have variety of tests covering CROSS JOINs
|
||||
-- "t1 CROSS JOIN t2" is equivalent of "t1 JOIN t2 ON true"
|
||||
-- a distributed table can be cross joined with a reference table
|
||||
-- and the CROSS JOIN can be in the outer part of an outer JOIN
|
||||
SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 LEFT JOIN users_table u ON (e2.user_id = u.user_id);
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
176649
|
||||
(1 row)
|
||||
|
||||
-- two distributed tables cannot be cross joined
|
||||
-- as it lacks distribution key equality
|
||||
SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 CROSS JOIN users_table u;
|
||||
ERROR: cannot perform distributed planning on this query
|
||||
DETAIL: Cartesian products are currently unsupported
|
||||
SELECT count(*) FROM events_reference_table e1, events_table e2, users_table u;
|
||||
ERROR: cannot perform distributed planning on this query
|
||||
DETAIL: Cartesian products are currently unsupported
|
||||
-- we can provide the distribution key equality via WHERE clause
|
||||
SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 CROSS JOIN users_table u WHERE u.user_id = e2.user_id;
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
176649
|
||||
(1 row)
|
||||
|
||||
-- two reference tables are JOINed, and later CROSS JOINed with a distributed table
|
||||
-- it is safe to pushdown
|
||||
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table;
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
606
|
||||
(1 row)
|
||||
|
||||
SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table;
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
606
|
||||
(1 row)
|
||||
|
||||
SELECT count(*) FROM users_ref_test_table ref1 RIGHT JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table;
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
606
|
||||
(1 row)
|
||||
|
||||
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 CROSS JOIN users_table;
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
3636
|
||||
(1 row)
|
||||
|
||||
-- two reference tables CROSS JOINNed, and later JOINED with distributed tables
|
||||
-- it is safe to pushdown
|
||||
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table ON false;
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
101
|
||||
(1 row)
|
||||
|
||||
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 JOIN users_table ON false;
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
0
|
||||
(1 row)
|
||||
|
||||
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 JOIN users_table ON true;
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
3636
|
||||
(1 row)
|
||||
|
||||
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table ON true;
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
3636
|
||||
(1 row)
|
||||
|
||||
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table ON (ref1.id = users_table.user_id);
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
606
|
||||
(1 row)
|
||||
|
||||
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 JOIN users_table ON (ref1.id = users_table.user_id);
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
606
|
||||
(1 row)
|
||||
|
||||
-- two reference tables CROSS JOINNed, and later JOINED with distributed tables
|
||||
-- but the reference table CROSS JOIN is in the outer side of the JOIN with the distributed table
|
||||
-- so we cannot pushdown
|
||||
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id = users_table.user_id);
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 FULL JOIN users_table ON (ref1.id = users_table.user_id);
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id != users_table.user_id);
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id > 0);
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (users_table.user_id > 0);
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON true;
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON false;
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
-- a reference tables CROSS JOINed with a distribted table, and later JOINED with distributed tables on distribution keys
|
||||
-- so safe to pushdown
|
||||
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 JOIN users_table u2 ON (u1.user_id = u2.user_id);
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
11802
|
||||
(1 row)
|
||||
|
||||
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table u2 ON (u1.user_id = u2.user_id);
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
11802
|
||||
(1 row)
|
||||
|
||||
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 FULL JOIN users_table u2 ON (u1.user_id = u2.user_id);
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
11802
|
||||
(1 row)
|
||||
|
||||
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table u2 ON (u1.user_id = u2.user_id);
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
11802
|
||||
(1 row)
|
||||
|
||||
-- a reference tables CROSS JOINed with a distribted table, and later JOINED with distributed tables on reference table column
|
||||
-- so not safe to pushdown
|
||||
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table u2 ON (ref2.id = u2.user_id);
|
||||
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
|
||||
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 FULL JOIN users_table u2 ON (ref2.id = u2.user_id);
|
||||
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
|
||||
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table u2 ON (ref2.id = u2.user_id);
|
||||
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
|
||||
-- via repartitioning, Citus can handle this query as the result of "u1 CROSS JOIN ref2"
|
||||
-- can be repartitioned on ref2.id
|
||||
Set citus.enable_repartition_joins to on;
|
||||
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 JOIN users_table u2 ON (ref2.id = u2.user_id);
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
10201
|
||||
(1 row)
|
||||
|
||||
reset citus.enable_repartition_joins;
|
||||
-- although the following has the "ref LEFT JOIN dist" type of query, the LEFT JOIN is eliminated by Postgres
|
||||
-- because the INNER JOIN eliminates the LEFT JOIN
|
||||
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id = users_table.user_id) JOIN users_table u2 ON (u2.user_id = users_table.user_id);
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
11802
|
||||
(1 row)
|
||||
|
||||
-- this is the same query as the above, but this time the outer query is also LEFT JOIN, meaning that Postgres
|
||||
-- cannot eliminate the outer join
|
||||
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id = users_table.user_id) LEFT JOIN users_table u2 ON (u2.user_id = users_table.user_id);
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
-- cross join that goes through non-colocated subquery logic
|
||||
-- for the "events_table" subquery as both distributed tables
|
||||
-- do not have JOIN on the distribution key
|
||||
SELECT max(events_all.cnt),
|
||||
events_all.usr_id
|
||||
FROM
|
||||
(SELECT *, random() FROM
|
||||
(SELECT *, random()
|
||||
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
|
||||
FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table
|
||||
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
|
||||
GROUP BY users_table.user_id) AS events_all_inner
|
||||
) AS events_all
|
||||
) AS events_all
|
||||
CROSS JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table
|
||||
GROUP BY 2
|
||||
ORDER BY 1 DESC,
|
||||
2 DESC
|
||||
LIMIT 5;
|
||||
max | usr_id
|
||||
---------------------------------------------------------------------
|
||||
432 | 2
|
||||
391 | 4
|
||||
364 | 5
|
||||
357 | 3
|
||||
105 | 1
|
||||
(5 rows)
|
||||
|
||||
-- cross join that goes through non-colocated subquery logic
|
||||
-- for the "events_all" subquery as both distributed tables
|
||||
-- do not have JOIN on the distribution key
|
||||
SELECT max(events_all.cnt),
|
||||
events_all.usr_id
|
||||
FROM events_table
|
||||
CROSS JOIN (SELECT *, random()
|
||||
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
|
||||
FROM events_reference_table
|
||||
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
|
||||
GROUP BY users_table.user_id) AS events_all_inner
|
||||
) AS events_all
|
||||
GROUP BY 2
|
||||
ORDER BY 1 DESC,
|
||||
2 DESC
|
||||
LIMIT 5;
|
||||
max | usr_id
|
||||
---------------------------------------------------------------------
|
||||
432 | 2
|
||||
391 | 4
|
||||
364 | 5
|
||||
357 | 3
|
||||
105 | 1
|
||||
(5 rows)
|
||||
|
||||
-- cross join is between a reference table and distributed table, and
|
||||
-- deep inside a subquery. The subquery can be in the outer part of the LEFT JOIN
|
||||
SELECT
|
||||
users_table.*
|
||||
FROM
|
||||
(SELECT
|
||||
events_all.*, random()
|
||||
FROM
|
||||
events_reference_table JOIN users_table USING(user_id)
|
||||
JOIN
|
||||
(SELECT *, random()
|
||||
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
|
||||
FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table
|
||||
CROSS JOIN users_table
|
||||
GROUP BY users_table.user_id) AS events_all_inner
|
||||
) AS events_all ON (user_id = usr_id)
|
||||
) AS events_all
|
||||
LEFT JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table ON (events_all.usr_id = events_table.user_id)
|
||||
LEFT JOIN users_table USING (user_id)
|
||||
ORDER BY 1,2,3,4 LIMIT 5;
|
||||
user_id | time | value_1 | value_2 | value_3 | value_4
|
||||
---------------------------------------------------------------------
|
||||
1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 |
|
||||
1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 |
|
||||
1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 |
|
||||
1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 |
|
||||
1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 |
|
||||
(5 rows)
|
||||
|
|
@ -660,6 +660,132 @@ LIMIT 5;
|
|||
105 | 1
|
||||
(5 rows)
|
||||
|
||||
-- should be fine even if the tables are deep inside subqueries
|
||||
SELECT max(events_all.cnt),
|
||||
events_all.usr_id
|
||||
FROM
|
||||
(SELECT *, random() FROM
|
||||
(SELECT *, random()
|
||||
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
|
||||
FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table
|
||||
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
|
||||
GROUP BY users_table.user_id) AS events_all_inner
|
||||
) AS events_all
|
||||
) AS events_all
|
||||
LEFT JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table ON (events_all.usr_id = events_table.user_id)
|
||||
GROUP BY 2
|
||||
ORDER BY 1 DESC,
|
||||
2 DESC
|
||||
LIMIT 5;
|
||||
max | usr_id
|
||||
---------------------------------------------------------------------
|
||||
432 | 2
|
||||
391 | 4
|
||||
364 | 5
|
||||
357 | 3
|
||||
105 | 1
|
||||
(5 rows)
|
||||
|
||||
-- should be fine with FULL join as well
|
||||
SELECT max(events_all.cnt),
|
||||
events_all.usr_id
|
||||
FROM events_table
|
||||
FULL JOIN (SELECT *, random()
|
||||
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
|
||||
FROM events_reference_table
|
||||
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
|
||||
GROUP BY users_table.user_id) AS events_all_inner
|
||||
) AS events_all ON (events_all.usr_id = events_table.user_id)
|
||||
GROUP BY 2
|
||||
ORDER BY 1 DESC,
|
||||
2 DESC
|
||||
LIMIT 5;
|
||||
max | usr_id
|
||||
---------------------------------------------------------------------
|
||||
432 | 2
|
||||
391 | 4
|
||||
364 | 5
|
||||
357 | 3
|
||||
105 | 1
|
||||
(5 rows)
|
||||
|
||||
-- two levels of "(ref_table JOIN dist_table) LEFT JOIN"
|
||||
-- should be fine as well
|
||||
SELECT
|
||||
users_table.*
|
||||
FROM
|
||||
(SELECT
|
||||
events_all.*, random()
|
||||
FROM
|
||||
events_reference_table JOIN users_table USING(user_id)
|
||||
JOIN
|
||||
(SELECT *, random()
|
||||
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
|
||||
FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table
|
||||
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
|
||||
GROUP BY users_table.user_id) AS events_all_inner
|
||||
) AS events_all ON (user_id = usr_id)
|
||||
) AS events_all
|
||||
LEFT JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table ON (events_all.usr_id = events_table.user_id)
|
||||
LEFT JOIN users_table USING (user_id)
|
||||
ORDER BY 1,2,3,4 LIMIT 5;
|
||||
user_id | time | value_1 | value_2 | value_3 | value_4
|
||||
---------------------------------------------------------------------
|
||||
1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 |
|
||||
1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 |
|
||||
1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 |
|
||||
1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 |
|
||||
1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 |
|
||||
(5 rows)
|
||||
|
||||
-- we should be able to support OUTER joins involving
|
||||
-- reference tables even if the subquery is in WHERE clause
|
||||
SELECT count(*)
|
||||
FROM events_table
|
||||
WHERE user_id IN
|
||||
(SELECT subquery_1.user_id
|
||||
FROM
|
||||
(SELECT tt1.user_id,
|
||||
random()
|
||||
FROM users_table AS tt1
|
||||
JOIN events_table AS tt2 ON tt1.user_id = tt2.user_id) subquery_1
|
||||
RIGHT JOIN
|
||||
(SELECT *,
|
||||
random()
|
||||
FROM
|
||||
(SELECT tt1.user_id,
|
||||
random()
|
||||
FROM users_table AS tt1
|
||||
JOIN events_reference_table AS REF ON tt1.user_id = ref.user_id) subquery_2_inner) subquery_2 ON subquery_1.user_id = subquery_2.user_id);
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
101
|
||||
(1 row)
|
||||
|
||||
-- we should be able to support OUTER joins involving
|
||||
-- reference tables even if the subquery is in the outer part of a JOIN
|
||||
SELECT count(*)
|
||||
FROM users_table
|
||||
RIGHT JOIN
|
||||
(SELECT subquery_1.user_id
|
||||
FROM
|
||||
(SELECT tt1.user_id,
|
||||
random()
|
||||
FROM users_table AS tt1
|
||||
JOIN events_table AS tt2 ON tt1.user_id = tt2.user_id) subquery_1
|
||||
RIGHT JOIN
|
||||
(SELECT *,
|
||||
random()
|
||||
FROM
|
||||
(SELECT tt1.user_id,
|
||||
random()
|
||||
FROM users_table AS tt1
|
||||
JOIN events_reference_table AS REF ON tt1.user_id = ref.user_id) subquery_2_inner) subquery_2 ON subquery_1.user_id = subquery_2.user_id) AS foo USING (user_id);
|
||||
count
|
||||
---------------------------------------------------------------------
|
||||
12664199
|
||||
(1 row)
|
||||
|
||||
-- LATERAL JOINs used with INNER JOINs with reference tables
|
||||
SET citus.subquery_pushdown to ON;
|
||||
NOTICE: Setting citus.subquery_pushdown flag is discouraged becuase it forces the planner to pushdown certain queries, skipping relevant correctness checks.
|
||||
|
@ -1910,12 +2036,6 @@ SELECT count(*) FROM (SELECT u1.*, random() FROM users_ref_test_table ref1 INNER
|
|||
3
|
||||
(1 row)
|
||||
|
||||
-- again, in theory should be OK to pushdown but
|
||||
-- Postgres generates join restriction between reference and distributed tables
|
||||
-- in one of the cases
|
||||
SELECT count(*) FROM user_buy_test_table a LEFT JOIN users_ref_test_table b ON (true) RIGHT JOIN users_ref_test_table c ON (b.id = c.id);
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
-- outer part of the LEFT JOIN consists only intermediate result due to LIMIT, so we cannot push down
|
||||
SELECT count(*) FROM (SELECT ref1.* FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LIMIT 5) as foo LEFT JOIN user_buy_test_table ON true;
|
||||
ERROR: cannot pushdown the subquery
|
||||
|
|
|
@ -108,5 +108,48 @@ EXPLAIN (COSTS FALSE) SELECT FROM t5 WHERE c0 = CASE WHEN 2 BETWEEN 1 AND 3 THEN
|
|||
Filter: (c0 = 2)
|
||||
(7 rows)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS t6(c0 TEXT CHECK (TRUE), c1 money ) WITH (autovacuum_vacuum_threshold=1180014707, autovacuum_freeze_table_age=13771154, autovacuum_vacuum_cost_delay=23, autovacuum_analyze_threshold=1935153914, autovacuum_freeze_min_age=721733768, autovacuum_enabled=0, autovacuum_vacuum_cost_limit=9983);
|
||||
CREATE UNLOGGED TABLE IF NOT EXISTS t7(LIKE t6);
|
||||
CREATE TABLE t8(LIKE t6 INCLUDING INDEXES);
|
||||
CREATE UNLOGGED TABLE t9(LIKE t6 EXCLUDING STATISTICS);
|
||||
CREATE TABLE t10(LIKE t7);
|
||||
SELECT create_distributed_table('t6', 'c0');
|
||||
create_distributed_table
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
ALTER TABLE t6 ALTER COLUMN c0 SET NOT NULL;
|
||||
SELECT create_reference_table('t7');
|
||||
create_reference_table
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT create_distributed_table('t8', 'c0');
|
||||
create_distributed_table
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
ALTER TABLE t8 ALTER COLUMN c0 SET NOT NULL;
|
||||
SELECT create_distributed_table('t9', 'c0');
|
||||
create_distributed_table
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
ALTER TABLE t9 ALTER COLUMN c0 SET NOT NULL;
|
||||
SELECT create_reference_table('t10');
|
||||
create_reference_table
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT count(*) FROM (
|
||||
SELECT ALL t7.c1, t7.c0, t8.c1, t10.c1, t8.c0 FROM t7 CROSS JOIN t10 FULL OUTER JOIN t8 ON (((((((('[832125354,1134163512)'::int4range)*('(0,2106623281)'::int4range)))-('(-600267905,509840582]'::int4range)))*('(-365203965,1662828182)'::int4range)))&<((((((('(-1286467417,697584012]'::int4range)*('[-1691485781,1341103963)'::int4range)))*((('(-1768368435,1719707648)'::int4range)*('(139536997,1275813540]'::int4range)))))*((((('[-2103910157,-1961746758)'::int4range)*('[-834534078,533073939)'::int4range)))*((('[-1030552151,552856781]'::int4range)*('[-1109419376,1205173697]'::int4range))))))))
|
||||
) AS foo;
|
||||
ERROR: cannot pushdown the subquery
|
||||
DETAIL: There exist a reference table in the outer part of the outer join
|
||||
SET client_min_messages TO WARNING;
|
||||
DROP SCHEMA sqlancer_failures CASCADE;
|
||||
|
|
|
@ -88,7 +88,7 @@ test: subquery_prepared_statements pg12 cte_inline pg13
|
|||
# ----------
|
||||
test: multi_deparse_shard_query multi_distributed_transaction_id intermediate_results limit_intermediate_size rollback_to_savepoint
|
||||
test: multi_explain hyperscale_tutorial partitioned_intermediate_results distributed_intermediate_results multi_real_time_transaction
|
||||
test: multi_basic_queries multi_complex_expressions multi_subquery multi_subquery_complex_queries multi_subquery_behavioral_analytics
|
||||
test: multi_basic_queries cross_join multi_complex_expressions multi_subquery multi_subquery_complex_queries multi_subquery_behavioral_analytics
|
||||
test: multi_subquery_complex_reference_clause multi_subquery_window_functions multi_view multi_sql_function multi_prepare_sql
|
||||
test: sql_procedure multi_function_in_join row_types materialized_view undistribute_table
|
||||
test: multi_subquery_in_where_reference_clause full_join adaptive_executor propagate_set_commands
|
||||
|
|
|
@ -0,0 +1,131 @@
|
|||
-- this test file relies on multi_behavioral_analytics_create_table
|
||||
-- and aims to have variety of tests covering CROSS JOINs
|
||||
-- "t1 CROSS JOIN t2" is equivalent of "t1 JOIN t2 ON true"
|
||||
|
||||
-- a distributed table can be cross joined with a reference table
|
||||
-- and the CROSS JOIN can be in the outer part of an outer JOIN
|
||||
SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 LEFT JOIN users_table u ON (e2.user_id = u.user_id);
|
||||
|
||||
-- two distributed tables cannot be cross joined
|
||||
-- as it lacks distribution key equality
|
||||
SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 CROSS JOIN users_table u;
|
||||
SELECT count(*) FROM events_reference_table e1, events_table e2, users_table u;
|
||||
|
||||
-- we can provide the distribution key equality via WHERE clause
|
||||
SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 CROSS JOIN users_table u WHERE u.user_id = e2.user_id;
|
||||
|
||||
|
||||
-- two reference tables are JOINed, and later CROSS JOINed with a distributed table
|
||||
-- it is safe to pushdown
|
||||
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table;
|
||||
SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table;
|
||||
SELECT count(*) FROM users_ref_test_table ref1 RIGHT JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table;
|
||||
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 CROSS JOIN users_table;
|
||||
|
||||
-- two reference tables CROSS JOINNed, and later JOINED with distributed tables
|
||||
-- it is safe to pushdown
|
||||
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table ON false;
|
||||
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 JOIN users_table ON false;
|
||||
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 JOIN users_table ON true;
|
||||
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table ON true;
|
||||
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table ON (ref1.id = users_table.user_id);
|
||||
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 JOIN users_table ON (ref1.id = users_table.user_id);
|
||||
|
||||
-- two reference tables CROSS JOINNed, and later JOINED with distributed tables
|
||||
-- but the reference table CROSS JOIN is in the outer side of the JOIN with the distributed table
|
||||
-- so we cannot pushdown
|
||||
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id = users_table.user_id);
|
||||
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 FULL JOIN users_table ON (ref1.id = users_table.user_id);
|
||||
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id != users_table.user_id);
|
||||
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id > 0);
|
||||
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (users_table.user_id > 0);
|
||||
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON true;
|
||||
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON false;
|
||||
|
||||
-- a reference tables CROSS JOINed with a distribted table, and later JOINED with distributed tables on distribution keys
|
||||
-- so safe to pushdown
|
||||
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 JOIN users_table u2 ON (u1.user_id = u2.user_id);
|
||||
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table u2 ON (u1.user_id = u2.user_id);
|
||||
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 FULL JOIN users_table u2 ON (u1.user_id = u2.user_id);
|
||||
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table u2 ON (u1.user_id = u2.user_id);
|
||||
|
||||
-- a reference tables CROSS JOINed with a distribted table, and later JOINED with distributed tables on reference table column
|
||||
-- so not safe to pushdown
|
||||
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table u2 ON (ref2.id = u2.user_id);
|
||||
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 FULL JOIN users_table u2 ON (ref2.id = u2.user_id);
|
||||
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table u2 ON (ref2.id = u2.user_id);
|
||||
|
||||
-- via repartitioning, Citus can handle this query as the result of "u1 CROSS JOIN ref2"
|
||||
-- can be repartitioned on ref2.id
|
||||
Set citus.enable_repartition_joins to on;
|
||||
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 JOIN users_table u2 ON (ref2.id = u2.user_id);
|
||||
reset citus.enable_repartition_joins;
|
||||
|
||||
-- although the following has the "ref LEFT JOIN dist" type of query, the LEFT JOIN is eliminated by Postgres
|
||||
-- because the INNER JOIN eliminates the LEFT JOIN
|
||||
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id = users_table.user_id) JOIN users_table u2 ON (u2.user_id = users_table.user_id);
|
||||
|
||||
-- this is the same query as the above, but this time the outer query is also LEFT JOIN, meaning that Postgres
|
||||
-- cannot eliminate the outer join
|
||||
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id = users_table.user_id) LEFT JOIN users_table u2 ON (u2.user_id = users_table.user_id);
|
||||
|
||||
-- cross join that goes through non-colocated subquery logic
|
||||
-- for the "events_table" subquery as both distributed tables
|
||||
-- do not have JOIN on the distribution key
|
||||
SELECT max(events_all.cnt),
|
||||
events_all.usr_id
|
||||
FROM
|
||||
(SELECT *, random() FROM
|
||||
(SELECT *, random()
|
||||
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
|
||||
FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table
|
||||
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
|
||||
GROUP BY users_table.user_id) AS events_all_inner
|
||||
) AS events_all
|
||||
) AS events_all
|
||||
CROSS JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table
|
||||
GROUP BY 2
|
||||
ORDER BY 1 DESC,
|
||||
2 DESC
|
||||
LIMIT 5;
|
||||
|
||||
-- cross join that goes through non-colocated subquery logic
|
||||
-- for the "events_all" subquery as both distributed tables
|
||||
-- do not have JOIN on the distribution key
|
||||
SELECT max(events_all.cnt),
|
||||
events_all.usr_id
|
||||
FROM events_table
|
||||
CROSS JOIN (SELECT *, random()
|
||||
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
|
||||
FROM events_reference_table
|
||||
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
|
||||
GROUP BY users_table.user_id) AS events_all_inner
|
||||
) AS events_all
|
||||
GROUP BY 2
|
||||
ORDER BY 1 DESC,
|
||||
2 DESC
|
||||
LIMIT 5;
|
||||
|
||||
|
||||
-- cross join is between a reference table and distributed table, and
|
||||
-- deep inside a subquery. The subquery can be in the outer part of the LEFT JOIN
|
||||
SELECT
|
||||
users_table.*
|
||||
FROM
|
||||
(SELECT
|
||||
events_all.*, random()
|
||||
FROM
|
||||
events_reference_table JOIN users_table USING(user_id)
|
||||
JOIN
|
||||
(SELECT *, random()
|
||||
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
|
||||
FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table
|
||||
CROSS JOIN users_table
|
||||
GROUP BY users_table.user_id) AS events_all_inner
|
||||
) AS events_all ON (user_id = usr_id)
|
||||
) AS events_all
|
||||
LEFT JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table ON (events_all.usr_id = events_table.user_id)
|
||||
LEFT JOIN users_table USING (user_id)
|
||||
ORDER BY 1,2,3,4 LIMIT 5;
|
||||
|
||||
|
|
@ -401,6 +401,102 @@ ORDER BY 1 DESC,
|
|||
2 DESC
|
||||
LIMIT 5;
|
||||
|
||||
-- should be fine even if the tables are deep inside subqueries
|
||||
SELECT max(events_all.cnt),
|
||||
events_all.usr_id
|
||||
FROM
|
||||
(SELECT *, random() FROM
|
||||
(SELECT *, random()
|
||||
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
|
||||
FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table
|
||||
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
|
||||
GROUP BY users_table.user_id) AS events_all_inner
|
||||
) AS events_all
|
||||
) AS events_all
|
||||
LEFT JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table ON (events_all.usr_id = events_table.user_id)
|
||||
GROUP BY 2
|
||||
ORDER BY 1 DESC,
|
||||
2 DESC
|
||||
LIMIT 5;
|
||||
|
||||
-- should be fine with FULL join as well
|
||||
SELECT max(events_all.cnt),
|
||||
events_all.usr_id
|
||||
FROM events_table
|
||||
FULL JOIN (SELECT *, random()
|
||||
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
|
||||
FROM events_reference_table
|
||||
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
|
||||
GROUP BY users_table.user_id) AS events_all_inner
|
||||
) AS events_all ON (events_all.usr_id = events_table.user_id)
|
||||
GROUP BY 2
|
||||
ORDER BY 1 DESC,
|
||||
2 DESC
|
||||
LIMIT 5;
|
||||
|
||||
-- two levels of "(ref_table JOIN dist_table) LEFT JOIN"
|
||||
-- should be fine as well
|
||||
SELECT
|
||||
users_table.*
|
||||
FROM
|
||||
(SELECT
|
||||
events_all.*, random()
|
||||
FROM
|
||||
events_reference_table JOIN users_table USING(user_id)
|
||||
JOIN
|
||||
(SELECT *, random()
|
||||
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
|
||||
FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table
|
||||
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
|
||||
GROUP BY users_table.user_id) AS events_all_inner
|
||||
) AS events_all ON (user_id = usr_id)
|
||||
) AS events_all
|
||||
LEFT JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table ON (events_all.usr_id = events_table.user_id)
|
||||
LEFT JOIN users_table USING (user_id)
|
||||
ORDER BY 1,2,3,4 LIMIT 5;
|
||||
|
||||
|
||||
-- we should be able to support OUTER joins involving
|
||||
-- reference tables even if the subquery is in WHERE clause
|
||||
SELECT count(*)
|
||||
FROM events_table
|
||||
WHERE user_id IN
|
||||
(SELECT subquery_1.user_id
|
||||
FROM
|
||||
(SELECT tt1.user_id,
|
||||
random()
|
||||
FROM users_table AS tt1
|
||||
JOIN events_table AS tt2 ON tt1.user_id = tt2.user_id) subquery_1
|
||||
RIGHT JOIN
|
||||
(SELECT *,
|
||||
random()
|
||||
FROM
|
||||
(SELECT tt1.user_id,
|
||||
random()
|
||||
FROM users_table AS tt1
|
||||
JOIN events_reference_table AS REF ON tt1.user_id = ref.user_id) subquery_2_inner) subquery_2 ON subquery_1.user_id = subquery_2.user_id);
|
||||
|
||||
-- we should be able to support OUTER joins involving
|
||||
-- reference tables even if the subquery is in the outer part of a JOIN
|
||||
|
||||
SELECT count(*)
|
||||
FROM users_table
|
||||
RIGHT JOIN
|
||||
(SELECT subquery_1.user_id
|
||||
FROM
|
||||
(SELECT tt1.user_id,
|
||||
random()
|
||||
FROM users_table AS tt1
|
||||
JOIN events_table AS tt2 ON tt1.user_id = tt2.user_id) subquery_1
|
||||
RIGHT JOIN
|
||||
(SELECT *,
|
||||
random()
|
||||
FROM
|
||||
(SELECT tt1.user_id,
|
||||
random()
|
||||
FROM users_table AS tt1
|
||||
JOIN events_reference_table AS REF ON tt1.user_id = ref.user_id) subquery_2_inner) subquery_2 ON subquery_1.user_id = subquery_2.user_id) AS foo USING (user_id);
|
||||
|
||||
-- LATERAL JOINs used with INNER JOINs with reference tables
|
||||
SET citus.subquery_pushdown to ON;
|
||||
SELECT user_id, lastseen
|
||||
|
@ -1367,11 +1463,6 @@ SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INN
|
|||
-- so Postgres doesn't generate join restriction between reference and distributed tables
|
||||
SELECT count(*) FROM (SELECT u1.*, random() FROM users_ref_test_table ref1 INNER JOIN user_buy_test_table u1 on ref1.id = u1.user_id) as foo LEFT JOIN user_buy_test_table ON (foo.user_id = user_buy_test_table.user_id);
|
||||
|
||||
-- again, in theory should be OK to pushdown but
|
||||
-- Postgres generates join restriction between reference and distributed tables
|
||||
-- in one of the cases
|
||||
SELECT count(*) FROM user_buy_test_table a LEFT JOIN users_ref_test_table b ON (true) RIGHT JOIN users_ref_test_table c ON (b.id = c.id);
|
||||
|
||||
-- outer part of the LEFT JOIN consists only intermediate result due to LIMIT, so we cannot push down
|
||||
SELECT count(*) FROM (SELECT ref1.* FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id LIMIT 5) as foo LEFT JOIN user_buy_test_table ON true;
|
||||
|
||||
|
|
|
@ -39,5 +39,24 @@ INSERT INTO t5 VALUES (CASE WHEN 2 BETWEEN 1 AND 3 THEN 2 ELSE 1 END);
|
|||
EXPLAIN (COSTS FALSE) SELECT FROM t5 WHERE c0 = 2;
|
||||
EXPLAIN (COSTS FALSE) SELECT FROM t5 WHERE c0 = CASE WHEN 2 BETWEEN 1 AND 3 THEN 2 ELSE 1 END;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS t6(c0 TEXT CHECK (TRUE), c1 money ) WITH (autovacuum_vacuum_threshold=1180014707, autovacuum_freeze_table_age=13771154, autovacuum_vacuum_cost_delay=23, autovacuum_analyze_threshold=1935153914, autovacuum_freeze_min_age=721733768, autovacuum_enabled=0, autovacuum_vacuum_cost_limit=9983);
|
||||
CREATE UNLOGGED TABLE IF NOT EXISTS t7(LIKE t6);
|
||||
CREATE TABLE t8(LIKE t6 INCLUDING INDEXES);
|
||||
CREATE UNLOGGED TABLE t9(LIKE t6 EXCLUDING STATISTICS);
|
||||
CREATE TABLE t10(LIKE t7);
|
||||
|
||||
SELECT create_distributed_table('t6', 'c0');
|
||||
ALTER TABLE t6 ALTER COLUMN c0 SET NOT NULL;
|
||||
SELECT create_reference_table('t7');
|
||||
SELECT create_distributed_table('t8', 'c0');
|
||||
ALTER TABLE t8 ALTER COLUMN c0 SET NOT NULL;
|
||||
SELECT create_distributed_table('t9', 'c0');
|
||||
ALTER TABLE t9 ALTER COLUMN c0 SET NOT NULL;
|
||||
SELECT create_reference_table('t10');
|
||||
|
||||
SELECT count(*) FROM (
|
||||
SELECT ALL t7.c1, t7.c0, t8.c1, t10.c1, t8.c0 FROM t7 CROSS JOIN t10 FULL OUTER JOIN t8 ON (((((((('[832125354,1134163512)'::int4range)*('(0,2106623281)'::int4range)))-('(-600267905,509840582]'::int4range)))*('(-365203965,1662828182)'::int4range)))&<((((((('(-1286467417,697584012]'::int4range)*('[-1691485781,1341103963)'::int4range)))*((('(-1768368435,1719707648)'::int4range)*('(139536997,1275813540]'::int4range)))))*((((('[-2103910157,-1961746758)'::int4range)*('[-834534078,533073939)'::int4range)))*((('[-1030552151,552856781]'::int4range)*('[-1109419376,1205173697]'::int4range))))))))
|
||||
) AS foo;
|
||||
|
||||
SET client_min_messages TO WARNING;
|
||||
DROP SCHEMA sqlancer_failures CASCADE;
|
||||
|
|
Loading…
Reference in New Issue