mirror of https://github.com/citusdata/citus.git
148 lines
8.6 KiB
SQL
148 lines
8.6 KiB
SQL
-- this test file relies on multi_behavioral_analytics_create_table
|
|
-- and aims to have variety of tests covering CROSS JOINs
|
|
-- "t1 CROSS JOIN t2" is equivalent of "t1 JOIN t2 ON true"
|
|
|
|
-- a distributed table can be cross joined with a reference table
|
|
-- and the CROSS JOIN can be in the outer part of an outer JOIN
|
|
SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 LEFT JOIN users_table u ON (e2.user_id = u.user_id);
|
|
|
|
-- two distributed tables cannot be cross joined
|
|
-- as it lacks distribution key equality
|
|
SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 CROSS JOIN users_table u;
|
|
SELECT count(*) FROM events_reference_table e1, events_table e2, users_table u;
|
|
|
|
-- we can provide the distribution key equality via WHERE clause
|
|
SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 CROSS JOIN users_table u WHERE u.user_id = e2.user_id;
|
|
|
|
|
|
-- two reference tables are JOINed, and later CROSS JOINed with a distributed table
|
|
-- it is safe to pushdown
|
|
SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table;
|
|
SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table;
|
|
SELECT count(*) FROM users_ref_test_table ref1 RIGHT JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table;
|
|
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 CROSS JOIN users_table;
|
|
|
|
-- two reference tables CROSS JOINNed, and later JOINED with distributed tables
|
|
-- it is safe to pushdown
|
|
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table ON false;
|
|
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 JOIN users_table ON false;
|
|
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 JOIN users_table ON true;
|
|
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table ON true;
|
|
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table ON (ref1.id = users_table.user_id);
|
|
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 JOIN users_table ON (ref1.id = users_table.user_id);
|
|
|
|
-- two reference tables CROSS JOINNed, and later JOINED with distributed tables
|
|
-- but the reference table CROSS JOIN is in the outer side of the JOIN with the distributed table
|
|
-- so this is supported by recursively planning the distributed table (users_table)
|
|
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id = users_table.user_id);
|
|
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 FULL JOIN users_table ON (ref1.id = users_table.user_id);
|
|
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id != users_table.user_id);
|
|
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id > 0);
|
|
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (users_table.user_id > 0);
|
|
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON true;
|
|
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON false;
|
|
|
|
-- a reference tables CROSS JOINed with a distribted table, and later JOINED with distributed tables on distribution keys
|
|
-- so safe to pushdown
|
|
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 JOIN users_table u2 ON (u1.user_id = u2.user_id);
|
|
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table u2 ON (u1.user_id = u2.user_id);
|
|
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 FULL JOIN users_table u2 ON (u1.user_id = u2.user_id);
|
|
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table u2 ON (u1.user_id = u2.user_id);
|
|
|
|
-- a reference tables CROSS JOINed with a distribted table, and later JOINED with distributed tables on reference table column
|
|
-- so not safe to pushdown
|
|
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table u2 ON (ref2.id = u2.user_id);
|
|
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 FULL JOIN users_table u2 ON (ref2.id = u2.user_id);
|
|
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 RIGHT JOIN users_table u2 ON (ref2.id = u2.user_id);
|
|
|
|
-- via repartitioning, Citus can handle this query as the result of "u1 CROSS JOIN ref2"
|
|
-- can be repartitioned on ref2.id
|
|
Set citus.enable_repartition_joins to on;
|
|
SELECT count(*) FROM users_table u1 CROSS JOIN users_ref_test_table ref2 JOIN users_table u2 ON (ref2.id = u2.user_id);
|
|
reset citus.enable_repartition_joins;
|
|
|
|
-- although the following has the "ref LEFT JOIN dist" type of query, the LEFT JOIN is eliminated by Postgres
|
|
-- because the INNER JOIN eliminates the LEFT JOIN
|
|
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id = users_table.user_id) JOIN users_table u2 ON (u2.user_id = users_table.user_id);
|
|
|
|
-- this is the same query as the above, but this time the outer query is also LEFT JOIN, meaning that Postgres
|
|
-- cannot eliminate the outer join
|
|
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 LEFT JOIN users_table ON (ref1.id = users_table.user_id) LEFT JOIN users_table u2 ON (u2.user_id = users_table.user_id);
|
|
|
|
-- cross join that goes through non-colocated subquery logic
|
|
-- for the "events_table" subquery as both distributed tables
|
|
-- do not have JOIN on the distribution key
|
|
SELECT max(events_all.cnt),
|
|
events_all.usr_id
|
|
FROM
|
|
(SELECT *, random() FROM
|
|
(SELECT *, random()
|
|
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
|
|
FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table
|
|
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
|
|
GROUP BY users_table.user_id) AS events_all_inner
|
|
) AS events_all
|
|
) AS events_all
|
|
CROSS JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table
|
|
GROUP BY 2
|
|
ORDER BY 1 DESC,
|
|
2 DESC
|
|
LIMIT 5;
|
|
|
|
-- cross join that goes through non-colocated subquery logic
|
|
-- for the "events_all" subquery as both distributed tables
|
|
-- do not have JOIN on the distribution key
|
|
SELECT max(events_all.cnt),
|
|
events_all.usr_id
|
|
FROM events_table
|
|
CROSS JOIN (SELECT *, random()
|
|
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
|
|
FROM events_reference_table
|
|
INNER JOIN users_table ON (users_table.user_id = events_reference_table.user_id)
|
|
GROUP BY users_table.user_id) AS events_all_inner
|
|
) AS events_all
|
|
GROUP BY 2
|
|
ORDER BY 1 DESC,
|
|
2 DESC
|
|
LIMIT 5;
|
|
|
|
|
|
-- cross join is between a reference table and distributed table, and
|
|
-- deep inside a subquery. The subquery can be in the outer part of the LEFT JOIN
|
|
SELECT
|
|
users_table.*
|
|
FROM
|
|
(SELECT
|
|
events_all.*, random()
|
|
FROM
|
|
events_reference_table JOIN users_table USING(user_id)
|
|
JOIN
|
|
(SELECT *, random()
|
|
FROM (SELECT users_table.user_id AS usr_id, count(*) AS cnt
|
|
FROM (SELECT *,random FROM (SELECT *, random() FROM events_reference_table) as events_reference_table) as events_reference_table
|
|
CROSS JOIN users_table
|
|
GROUP BY users_table.user_id) AS events_all_inner
|
|
) AS events_all ON (user_id = usr_id)
|
|
) AS events_all
|
|
LEFT JOIN (SELECT *,random() FROM (SELECT *, random() FROM events_table) as events_table) as events_table ON (events_all.usr_id = events_table.user_id)
|
|
LEFT JOIN users_table USING (user_id)
|
|
ORDER BY 1,2,3,4 LIMIT 5;
|
|
|
|
-- we don't support cross JOINs between distributed tables
|
|
-- and without target list entries
|
|
CREATE TABLE dist1(c0 int);
|
|
CREATE TABLE dist2(c0 int);
|
|
CREATE TABLE dist3(c0 int , c1 int);
|
|
CREATE TABLE dist4(c0 int , c1 int);
|
|
|
|
SELECT create_distributed_table('dist1', 'c0');
|
|
SELECT create_distributed_table('dist2', 'c0');
|
|
SELECT create_distributed_table('dist3', 'c1');
|
|
SELECT create_distributed_table('dist4', 'c1');
|
|
|
|
SELECT dist2.c0 FROM dist1, dist3, dist4, dist2 WHERE (dist3.c0) IN (dist4.c0);
|
|
SELECT 1 FROM dist3, dist4, dist2 WHERE (dist3.c0) IN (dist4.c0);
|
|
SELECT FROM dist3, dist4, dist2 WHERE (dist3.c0) IN (dist4.c0);
|
|
SELECT dist2.c0 FROM dist3, dist4, dist2 WHERE (dist3.c0) IN (dist4.c0);
|
|
SELECT dist2.* FROM dist3, dist4, dist2 WHERE (dist3.c0) IN (dist4.c0);
|