mirror of https://github.com/citusdata/citus.git
265 lines
6.4 KiB
SQL
265 lines
6.4 KiB
SQL
CREATE SCHEMA with_join;
|
|
SET search_path TO with_join, public;
|
|
SET citus.next_shard_id TO 1501000;
|
|
|
|
CREATE TABLE with_join.reference_table(user_id int);
|
|
SELECT create_reference_table('with_join.reference_table');
|
|
INSERT INTO reference_table VALUES (6), (7);
|
|
|
|
SET citus.enable_repartition_joins TO on;
|
|
|
|
-- Two colocated CTE under a non-colocated join
|
|
WITH colocated_1 AS (
|
|
SELECT
|
|
users_table.user_id, events_table.value_2
|
|
FROM
|
|
users_table, events_table
|
|
WHERE
|
|
users_table.user_id = events_table.user_id AND event_type IN (1, 2, 3)
|
|
),
|
|
colocated_2 AS (
|
|
SELECT
|
|
users_table.user_id, events_table.value_2
|
|
FROM
|
|
users_table, events_table
|
|
WHERE
|
|
users_table.user_id = events_table.user_id AND event_type IN (4, 5, 6)
|
|
)
|
|
SELECT colocated_1.user_id, count(*)
|
|
FROM
|
|
colocated_1, colocated_2
|
|
WHERE
|
|
colocated_1.value_2 = colocated_2.value_2
|
|
GROUP BY
|
|
1
|
|
ORDER BY
|
|
2 DESC, 1;
|
|
|
|
-- Two non-colocated CTE under a co-located join
|
|
WITH non_colocated_1 AS (
|
|
SELECT
|
|
users_table.user_id
|
|
FROM
|
|
users_table, events_table
|
|
WHERE
|
|
users_table.user_id = events_table.value_2 AND event_type IN (1, 2, 3)
|
|
),
|
|
non_colocated_2 AS MATERIALIZED (
|
|
SELECT
|
|
users_table.user_id
|
|
FROM
|
|
users_table, events_table
|
|
WHERE
|
|
users_table.user_id = events_table.value_2 AND event_type IN (4, 5, 6)
|
|
)
|
|
|
|
SELECT non_colocated_1.user_id, count(*)
|
|
FROM
|
|
non_colocated_1, non_colocated_2
|
|
WHERE
|
|
non_colocated_1.user_id = non_colocated_2.user_id
|
|
GROUP BY
|
|
1
|
|
ORDER BY
|
|
2 DESC, 1;
|
|
|
|
|
|
-- Subqueries in WHERE and FROM are mixed
|
|
-- In this query, only subquery in WHERE is not a colocated join
|
|
-- but we're able to recursively plan that as well
|
|
WITH users_events AS MATERIALIZED (
|
|
WITH colocated_join AS MATERIALIZED (
|
|
SELECT
|
|
users_table.user_id as uid, event_type
|
|
FROM
|
|
users_table
|
|
join
|
|
events_table
|
|
on (users_table.user_id = events_table.user_id)
|
|
WHERE
|
|
events_table.event_type IN (1, 2, 3)
|
|
),
|
|
colocated_join_2 AS MATERIALIZED (
|
|
SELECT
|
|
users_table.user_id, event_type
|
|
FROM
|
|
users_table
|
|
join
|
|
events_table
|
|
on (users_table.user_id = events_table.user_id)
|
|
WHERE
|
|
events_table.event_type IN (4, 5, 6)
|
|
)
|
|
SELECT
|
|
uid, colocated_join.event_type
|
|
FROM
|
|
colocated_join,
|
|
colocated_join_2
|
|
WHERE
|
|
colocated_join.uid = colocated_join_2.user_id AND
|
|
colocated_join.event_type IN (
|
|
WITH some_events AS MATERIALIZED (
|
|
SELECT
|
|
event_type
|
|
FROM
|
|
events_table
|
|
WHERE
|
|
user_id < 100
|
|
GROUP BY
|
|
1
|
|
ORDER BY
|
|
1
|
|
LIMIT 10
|
|
)
|
|
SELECT
|
|
*
|
|
FROM
|
|
some_events
|
|
)
|
|
)
|
|
SELECT
|
|
DISTINCT uid
|
|
FROM
|
|
users_events
|
|
ORDER BY
|
|
1 DESC
|
|
LIMIT
|
|
5;
|
|
|
|
-- cte LEFT JOIN distributed_table should error out
|
|
-- as long as the CTE is recursively planned
|
|
WITH cte AS MATERIALIZED (
|
|
SELECT * FROM users_table WHERE user_id = 1 ORDER BY value_1
|
|
)
|
|
SELECT
|
|
cte.user_id, cte.time, events_table.event_type
|
|
FROM
|
|
cte
|
|
LEFT JOIN
|
|
events_table ON cte.user_id = events_table.user_id
|
|
ORDER BY
|
|
1,2,3
|
|
LIMIT
|
|
5;
|
|
|
|
|
|
-- cte RIGHT JOIN distributed_table should work
|
|
WITH cte AS MATERIALIZED (
|
|
SELECT * FROM users_table WHERE user_id = 1 ORDER BY value_1
|
|
)
|
|
SELECT
|
|
cte.user_id, cte.time, events_table.event_type
|
|
FROM
|
|
cte
|
|
RIGHT JOIN
|
|
events_table ON cte.user_id = events_table.user_id
|
|
ORDER BY
|
|
1,2,3
|
|
LIMIT
|
|
5;
|
|
|
|
-- distributed_table LEFT JOIN cte should work
|
|
WITH cte AS MATERIALIZED (
|
|
SELECT * FROM users_table WHERE value_1 = 1 ORDER BY value_1
|
|
)
|
|
SELECT
|
|
cte.user_id, cte.time, events_table.event_type
|
|
FROM
|
|
events_table
|
|
LEFT JOIN
|
|
cte ON cte.user_id = events_table.user_id
|
|
ORDER BY
|
|
1,2,3
|
|
LIMIT
|
|
5;
|
|
|
|
-- distributed_table RIGHT JOIN cte should error out
|
|
WITH cte AS MATERIALIZED (
|
|
SELECT * FROM users_table WHERE value_1 = 1 ORDER BY value_1
|
|
)
|
|
SELECT
|
|
cte.user_id, cte.time, events_table.event_type
|
|
FROM
|
|
events_table
|
|
RIGHT JOIN
|
|
cte ON cte.user_id = events_table.user_id
|
|
ORDER BY
|
|
1,2,3
|
|
LIMIT
|
|
5;
|
|
|
|
-- cte FULL JOIN distributed_table should error out
|
|
WITH cte AS MATERIALIZED (
|
|
SELECT * FROM users_table WHERE user_id = 1 ORDER BY value_1
|
|
)
|
|
SELECT
|
|
cte.user_id, cte.time, events_table.event_type
|
|
FROM
|
|
events_table
|
|
FULL JOIN
|
|
cte ON cte.user_id = events_table.user_id
|
|
ORDER BY
|
|
1,2,3
|
|
LIMIT
|
|
5;
|
|
|
|
|
|
-- Joins with reference tables are planned as router queries
|
|
WITH cte AS MATERIALIZED (
|
|
SELECT value_2, max(user_id) AS user_id FROM users_table WHERE value_2 = 1 GROUP BY value_2 HAVING count(*) > 1
|
|
)
|
|
SELECT
|
|
row_number() OVER(), cte.user_id
|
|
FROM
|
|
cte
|
|
FULL JOIN
|
|
reference_table ON cte.user_id + 1 = reference_table.user_id
|
|
ORDER BY
|
|
user_id
|
|
LIMIT
|
|
5;
|
|
|
|
|
|
-- some more tests for more complex outer-joins
|
|
-- with reference tables
|
|
CREATE TABLE distributed_1 (col1 int, col2 int, distrib_col int);
|
|
CREATE TABLE distributed_2 (col1 int, col2 int, distrib_col int);
|
|
CREATE TABLE reference_1 (col1 int, col2 int);
|
|
CREATE TABLE reference_2(col1 int, col2 int);
|
|
|
|
SELECT create_distributed_table('distributed_1','distrib_col');
|
|
SELECT create_distributed_table('distributed_2','distrib_col');
|
|
SELECT create_reference_table('reference_1');
|
|
SELECT create_reference_table('reference_2');
|
|
|
|
INSERT INTO distributed_1 SELECT i, i, i FROM generate_series(0,100) i;
|
|
INSERT INTO distributed_2 SELECT i%2, i%2, i%2 FROM generate_series(0,100) i;
|
|
INSERT INTO reference_1 SELECT i%3, i%3 FROM generate_series(0,100) i;
|
|
INSERT INTO reference_2 SELECT i%4, i%4 FROM generate_series(0,100) i;
|
|
|
|
select count(*) from distributed_1 AS d1
|
|
LEFT JOIN reference_1 AS r1 ON d1.col2=r1.col2
|
|
LEFT JOIN reference_2 AS r2 ON r2.col1 = r1.col1
|
|
join (select distrib_col,count(*) from distributed_2 group by distrib_col) d2 ON d2.distrib_col=d1.distrib_col;
|
|
|
|
with d2 AS MATERIALIZED (select distrib_col,count(*) from distributed_2 group by distrib_col)
|
|
select count(*) from distributed_1 AS d1
|
|
LEFT JOIN reference_1 AS r1 ON d1.col2=r1.col2
|
|
LEFT JOIN reference_2 AS r2 ON r2.col1 = r1.col1
|
|
join d2 ON d2.distrib_col=d1.distrib_col;
|
|
|
|
with d2 AS MATERIALIZED (select distrib_col,col1 from distributed_2)
|
|
select count(*) from distributed_1 AS d1
|
|
LEFT JOIN reference_1 AS r1 ON d1.col2=r1.col2
|
|
LEFT JOIN reference_2 AS r2 ON r2.col1 = r1.col1
|
|
join d2 ON d2.distrib_col=d1.distrib_col;
|
|
|
|
with cte_1 AS MATERIALIZED (select col1 from reference_1)
|
|
select count(*) from distributed_1 AS d1
|
|
LEFT JOIN reference_1 AS r1 ON d1.col2=r1.col2
|
|
LEFT JOIN reference_2 AS r2 ON r2.col1 = r1.col1
|
|
join cte_1 ON cte_1.col1=d1.distrib_col;
|
|
|
|
RESET client_min_messages;
|
|
DROP SCHEMA with_join CASCADE;
|