mirror of https://github.com/citusdata/citus.git
1023 lines
28 KiB
PL/PgSQL
1023 lines
28 KiB
PL/PgSQL
CREATE SCHEMA recurring_outer_join;
|
|
SET search_path TO recurring_outer_join;
|
|
|
|
SET citus.next_shard_id TO 1520000;
|
|
SET citus.shard_count TO 32;
|
|
|
|
SET client_min_messages TO DEBUG1;
|
|
CREATE TABLE dist_1 (a int, b int);
|
|
SELECT create_distributed_table('dist_1', 'a');
|
|
INSERT INTO dist_1 VALUES
|
|
(1, 10),
|
|
(1, 11),
|
|
(1, 12),
|
|
(2, 20),
|
|
(2, 21),
|
|
(2, 22),
|
|
(2, 23),
|
|
(3, 30),
|
|
(3, 31),
|
|
(3, 32),
|
|
(3, 33),
|
|
(3, 34),
|
|
(7, 40),
|
|
(7, 41),
|
|
(7, 42);
|
|
|
|
CREATE TABLE dist_2_columnar(LIKE dist_1) USING columnar;
|
|
INSERT INTO dist_2_columnar SELECT * FROM dist_1;
|
|
SELECT create_distributed_table('dist_2_columnar', 'a');
|
|
|
|
CREATE TABLE dist_3_partitioned(LIKE dist_1) PARTITION BY RANGE(a);
|
|
CREATE TABLE dist_3_partitioned_p1 PARTITION OF dist_3_partitioned FOR VALUES FROM (0) TO (2);
|
|
CREATE TABLE dist_3_partitioned_p2 PARTITION OF dist_3_partitioned FOR VALUES FROM (2) TO (4);
|
|
CREATE TABLE dist_3_partitioned_p3 PARTITION OF dist_3_partitioned FOR VALUES FROM (4) TO (100);
|
|
SELECT create_distributed_table('dist_3_partitioned', 'a');
|
|
INSERT INTO dist_3_partitioned SELECT * FROM dist_1;
|
|
|
|
CREATE TABLE ref_1 (a int, b int);
|
|
SELECT create_reference_table('ref_1');
|
|
INSERT INTO ref_1 VALUES
|
|
(1, 100),
|
|
(1, 11),
|
|
(null, 102),
|
|
(2, 200),
|
|
(2, 21),
|
|
(null, 202),
|
|
(2, 203),
|
|
(4, 300),
|
|
(4, 301),
|
|
(null, 302),
|
|
(4, 303),
|
|
(4, 304),
|
|
(null, 400),
|
|
(null, 401),
|
|
(null, 402);
|
|
|
|
CREATE TABLE local_1 (a int, b int);
|
|
INSERT INTO local_1 VALUES
|
|
(null, 1000),
|
|
(1, 11),
|
|
(1, 100),
|
|
(5, 2000),
|
|
(5, 2001),
|
|
(5, 2002),
|
|
(null, 2003),
|
|
(6, 3000),
|
|
(6, 3001),
|
|
(6, 3002),
|
|
(null, 3003),
|
|
(6, 3004),
|
|
(null, 4000),
|
|
(null, 4001),
|
|
(null, 4002);
|
|
|
|
CREATE TABLE citus_local_1(LIKE local_1);
|
|
INSERT INTO citus_local_1 SELECT * FROM local_1;
|
|
SELECT citus_add_local_table_to_metadata('citus_local_1');
|
|
|
|
CREATE TABLE dist_4_different_colocation_group(LIKE dist_1);
|
|
INSERT INTO dist_4_different_colocation_group SELECT * FROM local_1;
|
|
DELETE FROM dist_4_different_colocation_group WHERE a IS NULL;
|
|
SELECT create_distributed_table('dist_4_different_colocation_group', 'a', colocate_with=>'none');
|
|
|
|
CREATE TABLE dist_5_with_pkey(LIKE dist_1);
|
|
INSERT INTO dist_5_with_pkey VALUES
|
|
(1, 11),
|
|
(2, 22),
|
|
(3, 34),
|
|
(7, 40);
|
|
SELECT create_distributed_table('dist_5_with_pkey', 'a');
|
|
ALTER TABLE dist_5_with_pkey ADD CONSTRAINT pkey_1 PRIMARY KEY (a);
|
|
|
|
--
|
|
-- basic cases
|
|
--
|
|
|
|
SELECT COUNT(*) FROM ref_1 LEFT JOIN dist_1 USING (a);
|
|
|
|
SELECT COUNT(*) FROM ref_1 LEFT JOIN dist_1 USING (a,b);
|
|
|
|
SELECT COUNT(*) FROM dist_1 RIGHT JOIN ref_1 USING (a);
|
|
|
|
SELECT COUNT(*) FROM ref_1 FULL JOIN dist_1 USING (a);
|
|
SELECT COUNT(*) FROM dist_1 FULL JOIN ref_1 USING (a);
|
|
|
|
SELECT COUNT(*) FROM dist_1 FULL JOIN ref_1 USING (a,b);
|
|
|
|
-- distributed side is a subquery
|
|
SELECT COUNT(*) FROM ref_1 LEFT JOIN (SELECT * FROM dist_1) q USING (a);
|
|
|
|
-- distributed side is a join tree
|
|
SELECT COUNT(*) FROM ref_1 LEFT JOIN (dist_1 t1 JOIN dist_1 t2 USING (a)) q USING (a);
|
|
SELECT COUNT(*) FROM ref_1 LEFT JOIN (dist_1 t1 LEFT JOIN dist_1 t2 USING (a)) q USING (a);
|
|
|
|
-- use functions/VALUES clauses/intrermediate results as the recurring rel
|
|
|
|
-- values clause
|
|
SELECT COUNT(*) FROM (SELECT a, b FROM (VALUES (1, 'one'), (2, 'two'), (3, 'three')) as t(a,b)) recurring LEFT JOIN dist_1 USING (a);
|
|
|
|
-- generate_series()
|
|
SELECT COUNT(*) FROM dist_1 RIGHT JOIN (SELECT a FROM generate_series(1, 10) a) recurring USING (a);
|
|
|
|
-- materialized cte
|
|
WITH dist_1_materialized AS MATERIALIZED (
|
|
SELECT * FROM dist_1
|
|
)
|
|
SELECT COUNT(*) FROM dist_1 RIGHT JOIN dist_1_materialized USING (a);
|
|
|
|
-- offset in the subquery
|
|
SELECT COUNT(*) FROM dist_1 t1 RIGHT JOIN (SELECT * FROM dist_1 OFFSET 0) t2 USING (a);
|
|
|
|
-- limit in the subquery
|
|
SELECT COUNT(*) FROM dist_1 t1 RIGHT JOIN (SELECT * FROM dist_1 ORDER BY 1,2 LIMIT 2) t2 USING (a);
|
|
|
|
-- local-distributed join as the recurring rel
|
|
--
|
|
-- We plan local-distributed join by converting local_1 into an intermediate result
|
|
-- and hence it becomes a recurring rel. Then we convert distributed - inner side of
|
|
-- the right join (dist_1) into an intermediate result too and this makes rhs of the
|
|
-- full join a recurring rel. And finally, we convert lhs of the full join (t1) into
|
|
-- an intermediate result too.
|
|
SELECT COUNT(*) FROM dist_1 t1 FULL JOIN (dist_1 RIGHT JOIN local_1 USING(a)) t2 USING (a);
|
|
|
|
SELECT COUNT(*) FROM dist_1 t1 FULL JOIN (dist_1 RIGHT JOIN citus_local_1 USING(a)) t2 USING (a);
|
|
|
|
-- subqury without FROM
|
|
SELECT COUNT(*) FROM dist_1 t1 RIGHT JOIN (SELECT generate_series(1,10) AS a) t2 USING (a);
|
|
|
|
-- such semi joins / anti joins are supported too
|
|
|
|
-- reference table
|
|
SELECT COUNT(*) FROM
|
|
ref_1 t1
|
|
JOIN dist_1 t2
|
|
ON (t1.a = t2.a)
|
|
WHERE t1.a IN (SELECT a FROM dist_1 t3);
|
|
|
|
-- not supported because we join t3 (inner rel of the anti join) with a column
|
|
-- of reference table, not with the distribution column of the other distributed
|
|
-- table (t2)
|
|
SELECT COUNT(*) FROM
|
|
ref_1 t1
|
|
JOIN dist_1 t2
|
|
ON (t1.a = t2.a)
|
|
WHERE NOT EXISTS (SELECT * FROM dist_1 t3 WHERE t1.a = a);
|
|
|
|
-- supported because the semi join is performed based on distribution keys
|
|
-- of the distributed tables
|
|
SELECT COUNT(*) FROM
|
|
ref_1 t1
|
|
JOIN dist_1 t2
|
|
ON (t1.a = t2.a)
|
|
WHERE NOT EXISTS (SELECT * FROM dist_1 t3 WHERE t2.a = a);
|
|
|
|
-- values clause
|
|
SELECT COUNT(*) FROM
|
|
(SELECT a, b FROM (VALUES (1, 'one'), (2, 'two'), (3, 'three')) as t(a,b)) t1
|
|
JOIN dist_1 t2
|
|
ON (t1.a = t2.a)
|
|
WHERE EXISTS (SELECT * FROM dist_1 t3 WHERE t1.a = a);
|
|
|
|
-- offset in the subquery
|
|
SELECT COUNT(*) FROM
|
|
(SELECT * FROM dist_1 OFFSET 0) t1
|
|
JOIN dist_1 t2
|
|
ON (t1.a = t2.a)
|
|
WHERE t1.a IN (SELECT a FROM dist_1 t3);
|
|
|
|
-- local-distributed join as the recurring rel
|
|
SELECT COUNT(*) FROM
|
|
(dist_1 RIGHT JOIN local_1 USING(a)) t1
|
|
JOIN dist_1 t2
|
|
ON (t1.a = t2.a)
|
|
WHERE t1.a IN (SELECT a FROM dist_1 t3);
|
|
|
|
-- materialized cte
|
|
WITH dist_1_materialized AS MATERIALIZED (
|
|
SELECT * FROM dist_1
|
|
)
|
|
SELECT COUNT(*) FROM
|
|
dist_1_materialized t1
|
|
JOIN dist_1 t2
|
|
ON (t1.a = t2.a)
|
|
WHERE t1.a IN (SELECT a FROM dist_1 t3);
|
|
|
|
WITH dist_1_materialized AS MATERIALIZED (
|
|
SELECT * FROM dist_1
|
|
)
|
|
SELECT COUNT(*) FROM
|
|
dist_1_materialized t1
|
|
JOIN dist_1 t2
|
|
ON (t1.a = t2.a)
|
|
WHERE EXISTS (SELECT a FROM dist_1 t3 WHERE t3.a = t1.a);
|
|
|
|
-- not supported because we anti-join t3 --inner rel-- with a column
|
|
-- of t1 (intermediate result) --outer-rel--
|
|
WITH dist_1_materialized AS MATERIALIZED (
|
|
SELECT * FROM dist_1
|
|
)
|
|
SELECT COUNT(*) FROM
|
|
dist_1_materialized t1
|
|
JOIN dist_1 t2
|
|
ON (t1.a = t2.a)
|
|
WHERE NOT EXISTS (SELECT a FROM dist_1 t3 WHERE t3.a = t1.a);
|
|
|
|
-- so this is supported because now t3 is joined with t2, not t1
|
|
WITH dist_1_materialized AS MATERIALIZED (
|
|
SELECT a AS a_alias, b AS b_alias FROM dist_1
|
|
)
|
|
SELECT COUNT(*) FROM
|
|
dist_1_materialized t1
|
|
JOIN dist_1 t2
|
|
ON (t1.a_alias = t2.a)
|
|
WHERE NOT EXISTS (SELECT a FROM dist_1 t3 WHERE t3.a = t2.a);
|
|
|
|
WITH dist_1_materialized AS MATERIALIZED (
|
|
SELECT a AS a_alias, b AS b_alias FROM dist_1
|
|
)
|
|
SELECT COUNT(*) FROM
|
|
dist_1_materialized t1
|
|
JOIN dist_1 t2
|
|
ON (t1.a_alias = t2.a)
|
|
WHERE t1.a_alias NOT IN (SELECT a FROM dist_1 t3);
|
|
|
|
-- generate_series()
|
|
SELECT COUNT(*) FROM
|
|
(SELECT a FROM generate_series(1, 10) a) t1
|
|
JOIN dist_1 t2
|
|
ON (t1.a = t2.a)
|
|
WHERE t1.a IN (SELECT a FROM dist_1 t3);
|
|
|
|
-- subqury without FROM
|
|
SELECT COUNT(*) FROM
|
|
(SELECT generate_series(1,10) AS a) t1
|
|
JOIN dist_1 t2
|
|
ON (t1.a = t2.a)
|
|
WHERE t1.a IN (SELECT a FROM dist_1 t3);
|
|
|
|
-- together with correlated subqueries
|
|
|
|
SELECT COUNT(*) FROM ref_1 t1
|
|
LEFT JOIN dist_1 t2 USING (a,b)
|
|
WHERE EXISTS (SELECT * FROM dist_1 t3 WHERE t1.a = t3.a);
|
|
|
|
SELECT COUNT(*) FROM dist_1 t1
|
|
RIGHT JOIN ref_1 t2 USING (a,b)
|
|
WHERE EXISTS (SELECT * FROM dist_1 t3 WHERE t2.a = t3.a);
|
|
|
|
|
|
-- "dist_1 t2" can't contribute to result set of the right join with
|
|
-- a tuple having "(t2.a) a = NULL" because t2 is in the inner side of
|
|
-- right join. For this reason, Postgres knows that <t2.a = t1.a> can
|
|
-- never evaluate to true (because <NULL = NULL> never yields "true")
|
|
-- and replaces the right join with an inner join.
|
|
-- And as a result, we can push-down the query without having to go
|
|
-- through recursive planning.
|
|
SELECT COUNT(*) FROM dist_1 t1
|
|
WHERE EXISTS (
|
|
SELECT * FROM dist_1 t2
|
|
RIGHT JOIN ref_1 t3 USING (a)
|
|
WHERE t2.a = t1.a
|
|
);
|
|
|
|
-- same here, Postgres converts the left join into an inner one
|
|
SELECT foo.* FROM
|
|
ref_1 r1,
|
|
LATERAL
|
|
(
|
|
SELECT * FROM ref_1 r2
|
|
LEFT JOIN dist_1
|
|
USING (a)
|
|
WHERE r1.a > dist_1.b
|
|
) as foo;
|
|
|
|
-- Qual is the same but top-level join is an anti-join. Right join
|
|
-- stays as is and hence requires recursive planning.
|
|
SELECT COUNT(*) FROM dist_1 t1
|
|
WHERE NOT EXISTS (
|
|
SELECT * FROM dist_1 t2
|
|
RIGHT JOIN ref_1 t3 USING (a)
|
|
WHERE t2.a = t1.a
|
|
);
|
|
|
|
-- This time the semi-join qual is <t3.a = t1.a> (not <<t2.a = t1.a>)
|
|
-- where t3 is the outer rel of the right join. Hence Postgres can't
|
|
-- replace right join with an inner join and so we recursively plan
|
|
-- inner side of the right join since the outer side is a recurring
|
|
-- rel.
|
|
SELECT COUNT(*) FROM dist_1 t1
|
|
WHERE EXISTS (
|
|
SELECT * FROM dist_1 t2
|
|
RIGHT JOIN ref_1 t3 USING (a)
|
|
WHERE t3.a = t1.a
|
|
);
|
|
|
|
SELECT COUNT(*) FROM dist_1 t1
|
|
WHERE NOT EXISTS (
|
|
SELECT * FROM dist_1 t2
|
|
RIGHT JOIN ref_1 t3 USING (a)
|
|
WHERE t3.a = t1.a
|
|
);
|
|
|
|
--
|
|
-- more complex cases
|
|
--
|
|
|
|
SELECT COUNT(*) FROM
|
|
-- 1) right side is distributed but t1 is recurring, hence what
|
|
-- makes the right side distributed (t3) is recursively planned
|
|
ref_1 t1
|
|
LEFT JOIN
|
|
(ref_1 t2 RIGHT JOIN dist_1 t3(x,y) ON (t2.a=t3.x)) t5
|
|
USING(a)
|
|
-- 2) outer side of the join tree became recurring, hence t4 is
|
|
-- recursively planned too
|
|
LEFT JOIN
|
|
dist_1 t4
|
|
ON (t4.a = t5.a AND t4.b = t5.b)
|
|
WHERE t4.b IS NULL;
|
|
|
|
SELECT COUNT(*) FROM
|
|
-- 2) right side is distributed but t1 is recurring, hence what
|
|
-- makes the right side distributed (t4) is recursively planned
|
|
ref_1 t1
|
|
LEFT JOIN
|
|
(
|
|
dist_1 t4
|
|
JOIN
|
|
-- 1) t6 is recursively planned since the outer side is recurring
|
|
(SELECT t6.a FROM dist_1 t6 RIGHT JOIN ref_1 t7 USING(a)) t5
|
|
USING(a)
|
|
) q
|
|
USING(a)
|
|
-- 3) outer side of the join tree became recurring, hence t8 is
|
|
-- recursively planned too
|
|
LEFT JOIN
|
|
dist_1 t8
|
|
USING (a)
|
|
WHERE t8.b IS NULL;
|
|
|
|
SELECT COUNT(*) FROM
|
|
ref_1 t1
|
|
-- all distributed tables in the rhs will be recursively planned
|
|
-- in the order of t3, t4, t5
|
|
LEFT JOIN
|
|
(
|
|
ref_1 t2
|
|
JOIN
|
|
dist_1 t3
|
|
USING (a)
|
|
JOIN
|
|
(dist_1 t4 JOIN dist_1 t5 USING (a))
|
|
USING(a)
|
|
)
|
|
USING (a);
|
|
|
|
-- Even if dist_1 and dist_4_different_colocation_group belong to different
|
|
-- colocation groups, we can run query without doing a repartition join as
|
|
-- we first decide recursively planning lhs of the right join because rhs
|
|
-- (ref_1) is a recurring rel. And while doing so, we anyway recursively plan
|
|
-- the distributed tables in the subjoin tree individually hence the whole join
|
|
-- tree becomes:
|
|
-- RIGHT JOIN
|
|
-- / \
|
|
-- intermediate_result_for_dist_1 ref_1
|
|
-- JOIN
|
|
-- intermediate_result_for_dist_4_different_colocation_group
|
|
--
|
|
-- When we decide implementing the optimization noted in
|
|
-- RecursivelyPlanDistributedJoinNode in an XXX comment, then this query would
|
|
-- require enabling repartition joins.
|
|
SELECT COUNT(*) FROM
|
|
dist_1 JOIN dist_4_different_colocation_group USING(a)
|
|
RIGHT JOIN ref_1 USING(a);
|
|
|
|
SELECT COUNT(*) FROM
|
|
ref_1 t1
|
|
LEFT JOIN
|
|
-- 2) t6 subquery is distributed so needs to be recursively planned
|
|
-- because t1 is recurring
|
|
(
|
|
SELECT * FROM
|
|
(SELECT * FROM ref_1 t2 JOIN dist_1 t3 USING (a) WHERE t3.b IS NULL) p
|
|
JOIN
|
|
-- 1) t5 is recursively planned since the outer side is recurring
|
|
(SELECT * FROM ref_1 t4 LEFT JOIN dist_1 t5 USING (a)) q
|
|
USING(a)
|
|
) t6
|
|
USING (a);
|
|
|
|
-- No need to recursively plan dist_5_with_pkey thanks to
|
|
-- pkey optimizations done by Postgres.
|
|
SELECT COUNT(*) FROM ref_1 LEFT JOIN dist_5_with_pkey USING(a);
|
|
|
|
-- Similarly, <dist_1.a IN (1,4)> implies that "dist_1.a" cannot be NULL
|
|
-- and hence Postgres converts the LEFT JOIN into an INNER JOIN form.
|
|
-- For this reason, we don't need to recursively plan dist_1.
|
|
SELECT COUNT(*) FROM ref_1 LEFT JOIN dist_1 USING(a) WHERE dist_1.a IN (1,4);
|
|
|
|
SELECT COUNT(*) FROM
|
|
ref_1 t1
|
|
LEFT JOIN
|
|
-- 2) t6 subquery is distributed so needs to be recursively planned
|
|
-- because t1 is recurring
|
|
(
|
|
SELECT * FROM
|
|
(SELECT * FROM ref_1 t2 JOIN dist_3_partitioned t3 USING (a) WHERE t3.b IS NULL) p
|
|
JOIN
|
|
-- 1) t5 is recursively planned since the outer side is recurring
|
|
(SELECT * FROM ref_1 t4 LEFT JOIN dist_1 t5 USING (a)) q
|
|
USING(a)
|
|
) t6
|
|
USING (a);
|
|
|
|
SELECT COUNT(t1.a), t1.b FROM
|
|
ref_1 t1
|
|
LEFT JOIN
|
|
-- 2) t6 subquery is distributed so needs to be recursively planned
|
|
-- because t1 is recurring
|
|
(
|
|
SELECT * FROM
|
|
(SELECT * FROM ref_1 t2 JOIN dist_3_partitioned t3 USING (a) WHERE t3.b IS NULL) p
|
|
JOIN
|
|
-- 1) t5 is recursively planned since the outer side is recurring
|
|
(SELECT * FROM ref_1 t4 LEFT JOIN dist_1 t5 USING (a)) q
|
|
USING(a)
|
|
) t6
|
|
USING (a)
|
|
GROUP BY (t1.b)
|
|
HAVING t1.b > 200
|
|
ORDER BY 1,2;
|
|
|
|
SELECT COUNT(t1.a), t1.b FROM
|
|
ref_1 t1
|
|
LEFT JOIN
|
|
-- 2) t6 subquery is distributed so needs to be recursively planned
|
|
-- because t1 is recurring
|
|
(
|
|
SELECT * FROM
|
|
(SELECT * FROM ref_1 t2 JOIN dist_3_partitioned t3 USING (a) WHERE t3.b IS NULL) p
|
|
JOIN
|
|
-- 1) t5 is recursively planned since the outer side is recurring
|
|
(SELECT * FROM ref_1 t4 LEFT JOIN dist_1 t5 USING (a)) q
|
|
USING(a)
|
|
) t6
|
|
USING (a)
|
|
GROUP BY (t1.b)
|
|
HAVING (
|
|
EXISTS (
|
|
SELECT * FROM ref_1 t6
|
|
LEFT JOIN dist_1 t7 USING (a)
|
|
WHERE t7.b > 10
|
|
)
|
|
)
|
|
ORDER BY 1,2;
|
|
|
|
SELECT COUNT(*) FROM
|
|
citus_local_1 t1
|
|
LEFT JOIN
|
|
-- 2) t6 subquery is distributed so needs to be recursively planned
|
|
-- because t1 is first recursively planned
|
|
(
|
|
SELECT * FROM
|
|
(SELECT * FROM ref_1 t2 JOIN dist_1 t3 USING (a) WHERE t3.b IS NULL) p
|
|
JOIN
|
|
-- 1) t5 is recursively planned since the outer side is recurring
|
|
(SELECT * FROM ref_1 t4 LEFT JOIN dist_1 t5 USING (a)) q
|
|
USING(a)
|
|
) t6
|
|
USING (a);
|
|
|
|
SELECT COUNT(*) FROM
|
|
-- 2) t1 is recursively planned because the outer side (t2) is
|
|
-- converted into a recurring rel
|
|
dist_2_columnar t1
|
|
RIGHT JOIN
|
|
(
|
|
-- 1) t4 is recursively planned since the outer side is recurring
|
|
ref_1 t3 LEFT JOIN dist_1 t4 USING(a)
|
|
) t2
|
|
USING (a);
|
|
|
|
SELECT COUNT(*) FROM
|
|
-- 3) t1 is recursively planned because the outer side (t2) is
|
|
-- converted into a recurring rel
|
|
dist_1 t1
|
|
RIGHT JOIN
|
|
(
|
|
-- 2) t6 is recursively planned because now it's part of a distributed
|
|
-- inner join node that is about to be outer joined with t3
|
|
ref_1 t3
|
|
LEFT JOIN
|
|
(
|
|
-- 1-a) t4 is recursively planned since the outer side is recurring
|
|
(ref_1 t5 LEFT JOIN dist_1 t4 USING(a))
|
|
JOIN
|
|
dist_1 t6
|
|
USING(a)
|
|
JOIN
|
|
-- 1-b) t8 is recursively planned since the outer side is recurring
|
|
(ref_1 t7 LEFT JOIN dist_1 t8 USING(a))
|
|
USING(a)
|
|
)
|
|
USING(a)
|
|
) t2
|
|
USING (a);
|
|
|
|
SELECT COUNT(*) FROM
|
|
ref_1 t6
|
|
LEFT JOIN
|
|
(
|
|
ref_1 t1
|
|
LEFT JOIN
|
|
(
|
|
-- t3 is a distributed join tree so needs to be recursively planned
|
|
-- because t2 is recurring
|
|
ref_1 t2 LEFT JOIN (dist_1 t7 JOIN dist_1 t8 USING (a)) t3 USING(a)
|
|
JOIN
|
|
ref_1 t5
|
|
USING(a)
|
|
)
|
|
USING(a)
|
|
)
|
|
USING(a);
|
|
|
|
SELECT COUNT(*) FROM
|
|
ref_1 t6
|
|
LEFT JOIN
|
|
(
|
|
ref_1 t1
|
|
LEFT JOIN
|
|
(
|
|
-- t4 subquery is distributed so needs to be recursively planned
|
|
-- because t2 is recurring
|
|
ref_1 t2 LEFT JOIN (SELECT * FROM dist_1 t3) t4 USING(a)
|
|
JOIN
|
|
ref_1 t5
|
|
USING(a)
|
|
)
|
|
USING(a)
|
|
)
|
|
USING(a);
|
|
|
|
SELECT COUNT(*) FROM
|
|
ref_1 t6
|
|
LEFT JOIN
|
|
(
|
|
ref_1 t1
|
|
LEFT JOIN
|
|
(
|
|
-- t4 subquery is distributed so needs to be recursively planned
|
|
-- because t2 is recurring
|
|
ref_1 t2 LEFT JOIN (SELECT * FROM dist_3_partitioned t3) t4 USING(a)
|
|
JOIN
|
|
ref_1 t5
|
|
USING(a)
|
|
)
|
|
USING(a)
|
|
)
|
|
USING(a);
|
|
|
|
-- cannot recursively plan because t3 (inner - distributed)
|
|
-- references t1 (outer - recurring)
|
|
SELECT COUNT(*) FROM ref_1 t1 LEFT JOIN LATERAL (SELECT * FROM dist_1 t2 WHERE t1.b < t2.b) t3 USING (a);
|
|
SELECT COUNT(*) FROM (SELECT * FROM dist_1 OFFSET 100) t1 LEFT JOIN LATERAL (SELECT * FROM dist_1 t2 WHERE t1.b < t2.b) t3 USING (a);
|
|
SELECT COUNT(*) FROM local_1 t1 LEFT JOIN LATERAL (SELECT * FROM dist_1 t2 WHERE t1.b < t2.b) t3 USING (a);
|
|
SELECT COUNT(*) FROM (SELECT 1 a, generate_series(1,2) b) t1 LEFT JOIN LATERAL (SELECT * FROM dist_1 t2 WHERE t1.b < t2.b) t3 USING (a);
|
|
SELECT COUNT(*) FROM (ref_1 t10 JOIN ref_1 t11 USING(a,b)) t1 LEFT JOIN LATERAL (SELECT * FROM dist_1 t2 WHERE t1.b < t2.b) t3 USING (a);
|
|
|
|
-- cannot plan because the query in the WHERE clause of t3
|
|
-- (inner - distributed) references t1 (outer - recurring)
|
|
SELECT COUNT(*) FROM ref_1 t1
|
|
LEFT JOIN LATERAL
|
|
(
|
|
SELECT * FROM dist_1 t2 WHERE EXISTS (
|
|
SELECT * FROM dist_1 t4
|
|
WHERE t4.a = t2.a AND t4.b > t1.b
|
|
)
|
|
) t3
|
|
USING (a);
|
|
|
|
-- can recursively plan after dropping (t4.b > t1.b) qual from t3
|
|
SELECT COUNT(*) FROM ref_1 t1
|
|
LEFT JOIN
|
|
(
|
|
SELECT * FROM dist_1 t2 WHERE EXISTS (
|
|
SELECT * FROM dist_1 t4
|
|
WHERE t4.a = t2.a
|
|
)
|
|
) t3
|
|
USING (a);
|
|
|
|
-- same test using a view, can be recursively planned
|
|
CREATE VIEW my_view_1 AS
|
|
SELECT * FROM dist_1 table_name_for_view WHERE EXISTS (
|
|
SELECT * FROM dist_1 t4
|
|
WHERE t4.a = table_name_for_view.a);
|
|
|
|
SELECT COUNT(*) FROM ref_1 t1
|
|
LEFT JOIN
|
|
my_view_1 t3
|
|
USING (a);
|
|
|
|
SELECT COUNT(*) FROM
|
|
ref_1 t6
|
|
LEFT JOIN
|
|
(
|
|
ref_1 t1
|
|
LEFT JOIN
|
|
(
|
|
-- t4 subquery is distributed so needs to be recursively planned
|
|
-- because t2 is recurring.
|
|
-- However, we fail to recursively plan t4 because it references
|
|
-- t6.
|
|
ref_1 t2 LEFT JOIN LATERAL (SELECT * FROM dist_2_columnar t3 WHERE t3.a > t6.a) t4 USING(a)
|
|
JOIN
|
|
ref_1 t5
|
|
USING(a)
|
|
)
|
|
USING(a)
|
|
)
|
|
USING(a);
|
|
|
|
SELECT COUNT(*) FROM
|
|
ref_1 t6
|
|
LEFT JOIN
|
|
(
|
|
ref_1 t1
|
|
LEFT JOIN
|
|
(
|
|
-- t4 subquery is distributed so needs to be recursively planned
|
|
-- because t2 is recurring.
|
|
-- Even if the query says t2 is lateral joined with t4, t4 doesn't
|
|
-- reference anywhere else and hence can be planned recursively.
|
|
ref_1 t2 LEFT JOIN LATERAL (SELECT * FROM dist_1 t3) t4 USING(a)
|
|
JOIN
|
|
ref_1 t5
|
|
USING(a)
|
|
)
|
|
USING(a)
|
|
)
|
|
USING(a);
|
|
|
|
-- since t1 is recurring and t6 is distributed, all the distributed
|
|
-- tables in t6 will be recursively planned
|
|
SELECT COUNT(*) FROM ref_1 t1
|
|
LEFT JOIN
|
|
(
|
|
((SELECT * FROM ref_1 WHERE a > 1) t2 JOIN dist_1 t3 USING (a))
|
|
JOIN
|
|
(dist_1 t4 JOIN dist_1 t5 USING (a))
|
|
USING(a)
|
|
) t6
|
|
USING (a);
|
|
|
|
BEGIN;
|
|
-- same test but this time should fail due to
|
|
-- citus.max_intermediate_result_size
|
|
SET LOCAL citus.max_intermediate_result_size TO "0.5kB";
|
|
SELECT COUNT(*) FROM ref_1 t1
|
|
LEFT JOIN
|
|
(
|
|
((SELECT * FROM ref_1 WHERE a > 1) t2 JOIN dist_1 t3 USING (a))
|
|
JOIN
|
|
(dist_1 t4 JOIN dist_1 t5 USING (a))
|
|
USING(a)
|
|
) t6
|
|
USING (a);
|
|
ROLLBACK;
|
|
|
|
-- Same test using some views, can be recursively planned too.
|
|
-- Since t1 is recurring and t6 is distributed, all the distributed
|
|
-- tables in t6 will be recursively planned.
|
|
CREATE VIEW my_view_2 AS
|
|
(SELECT * FROM ref_1 WHERE a > 1);
|
|
|
|
CREATE VIEW my_view_3 AS
|
|
(SELECT * FROM ref_1);
|
|
|
|
SELECT COUNT(*) FROM my_view_3 t1
|
|
LEFT JOIN
|
|
(
|
|
(my_view_2 t2 JOIN dist_1 t3 USING (a))
|
|
JOIN
|
|
(dist_1 t4 JOIN dist_1 t5 USING (a))
|
|
USING(a)
|
|
) t6
|
|
USING (a);
|
|
|
|
SELECT COUNT(*) FROM ref_1 t1
|
|
-- 2) Since t8 is distributed and t1 is recurring, t8 needs be converted
|
|
-- to a recurring rel too. For this reason, subquery t8 is recursively
|
|
-- planned because t7 is recurring already.
|
|
LEFT JOIN
|
|
(
|
|
SELECT * FROM (SELECT * FROM ref_1 t2 RIGHT JOIN dist_1 t3 USING (a)) AS t4
|
|
JOIN
|
|
-- 1) subquery t6 is recursively planned because t5 is recurring
|
|
(SELECT * FROM ref_1 t5 LEFT JOIN (SELECT * FROM dist_2_columnar WHERE b < 150) t6 USING (a)) as t7
|
|
USING(a)
|
|
) t8
|
|
USING (a);
|
|
|
|
-- same test using a prepared statement
|
|
PREPARE recurring_outer_join_p1 AS
|
|
SELECT COUNT(*) FROM ref_1 t1
|
|
-- 2) Since t8 is distributed and t1 is recurring, t8 needs be converted
|
|
-- to a recurring rel too. For this reason, subquery t8 is recursively
|
|
-- planned because t7 is recurring already.
|
|
LEFT JOIN
|
|
(
|
|
SELECT * FROM (SELECT * FROM ref_1 t2 RIGHT JOIN dist_1 t3 USING (a)) AS t4
|
|
JOIN
|
|
-- 1) subquery t6 is recursively planned because t5 is recurring
|
|
(SELECT * FROM ref_1 t5 LEFT JOIN (SELECT * FROM dist_2_columnar WHERE b < $1) t6 USING (a)) as t7
|
|
USING(a)
|
|
) t8
|
|
USING (a);
|
|
|
|
EXECUTE recurring_outer_join_p1(0);
|
|
EXECUTE recurring_outer_join_p1(100);
|
|
EXECUTE recurring_outer_join_p1(100);
|
|
EXECUTE recurring_outer_join_p1(10);
|
|
EXECUTE recurring_outer_join_p1(10);
|
|
EXECUTE recurring_outer_join_p1(1000);
|
|
EXECUTE recurring_outer_join_p1(1000);
|
|
|
|
-- t5 is recursively planned because the outer side of the final
|
|
-- left join is recurring
|
|
SELECT * FROM ref_1 t1
|
|
JOIN ref_1 t2 USING (a)
|
|
LEFT JOIN ref_1 t3 USING (a)
|
|
LEFT JOIN ref_1 t4 USING (a)
|
|
LEFT JOIN dist_1 t5 USING (a)
|
|
ORDER BY 1,2,3,4,5,6 DESC
|
|
LIMIT 5;
|
|
|
|
-- t6 is recursively planned because the outer side of the final
|
|
-- left join is recurring
|
|
SELECT * FROM (SELECT * FROM ref_1 ORDER BY 1,2 LIMIT 7) t1
|
|
JOIN ref_1 t2 USING (a)
|
|
LEFT JOIN (SELECT *, random() > 1 FROM dist_1 t3) t4 USING (a)
|
|
LEFT JOIN ref_1 t5 USING (a)
|
|
LEFT JOIN dist_1 t6 USING (a)
|
|
ORDER BY 1,2,3,4,5,6,7 DESC
|
|
LIMIT 10;
|
|
|
|
--
|
|
-- Such join rels can recursively appear anywhere in the query instead
|
|
-- of simple relation rtes.
|
|
--
|
|
|
|
SELECT COUNT(*) FROM
|
|
(SELECT ref_1.a, t10.b FROM ref_1 LEFT JOIN dist_1 t10 USING(b)) AS t1,
|
|
(SELECT ref_1.a, t20.b FROM ref_1 LEFT JOIN dist_1 t20 USING(b)) AS t2,
|
|
(SELECT ref_1.a, t30.b FROM ref_1 LEFT JOIN dist_1 t30 USING(b)) AS t3,
|
|
(SELECT ref_1.a, t40.b FROM ref_1 LEFT JOIN dist_1 t40 USING(b)) AS t4,
|
|
(SELECT ref_1.a, t50.b FROM ref_1 LEFT JOIN dist_1 t50 USING(b)) AS t5
|
|
WHERE
|
|
t1.a = t5.a AND
|
|
t1.a = t4.a AND
|
|
t1.a = t3.a AND
|
|
t1.a = t2.a AND
|
|
t1.a = t1.a;
|
|
|
|
-- subqueries in the target list
|
|
|
|
SELECT t1.b, (SELECT b FROM ref_1 WHERE t1.a = a ORDER BY a,b LIMIT 1), (SELECT t2.a)
|
|
FROM ref_1
|
|
LEFT JOIN dist_1 t1 USING (a,b)
|
|
JOIN dist_1 t2 USING (a,b)
|
|
ORDER BY 1,2,3 LIMIT 5;
|
|
|
|
WITH
|
|
outer_cte_1 AS (
|
|
SELECT
|
|
t1.b,
|
|
-- 9) t3 is recursively planned since t2 is recurring
|
|
(SELECT a FROM ref_1 t2 LEFT JOIN dist_1 t3 USING(a,b) WHERE t2.a=t1.a ORDER BY 1 LIMIT 1)
|
|
FROM dist_1 t1
|
|
ORDER BY 1,2 LIMIT 10
|
|
),
|
|
outer_cte_2 AS (
|
|
SELECT * FROM (
|
|
SELECT * FROM (
|
|
SELECT * FROM (
|
|
SELECT * FROM (
|
|
SELECT * FROM (
|
|
-- 10) t5 is recursively planned since t4 is recurring
|
|
SELECT * FROM ref_1 t4
|
|
LEFT JOIN dist_1 t5
|
|
USING(a,b)
|
|
) AS t6
|
|
) AS t7
|
|
) AS t8
|
|
) AS t9
|
|
OFFSET 0
|
|
)AS t10
|
|
-- 11) t11 is recursively planned since lhs of the join tree became recurring
|
|
LEFT JOIN dist_1 t11 USING (b)
|
|
)
|
|
SELECT * FROM ref_1 t36 WHERE (b,100,a) IN (
|
|
WITH
|
|
cte_1 AS (
|
|
WITH cte_1_inner_cte AS (
|
|
-- 3) t12 is recursively planned because t11 is recurring
|
|
SELECT * FROM ref_1 t11
|
|
LEFT JOIN dist_1 t12
|
|
USING (a,b)
|
|
)
|
|
-- 4) t14 is recursively planned because t13 is recurring
|
|
SELECT * FROM ref_1 t13
|
|
LEFT JOIN dist_1 t14 USING (a,b)
|
|
JOIN cte_1_inner_cte t15
|
|
USING (a,b)
|
|
OFFSET 0
|
|
)
|
|
-- 6) t31 is recursively planned since t35 is recurring
|
|
-- 7) t34 is recursively planned since lhs of the join tree is now recurring
|
|
SELECT
|
|
DISTINCT t31.b,
|
|
-- 1) we first search for such joins in the target list and recursively plan t33
|
|
-- because t32 is recurring
|
|
(SELECT max(b) FROM ref_1 t32 LEFT JOIN dist_1 t33 USING(a,b) WHERE t31.a = t32.a),
|
|
(SELECT t34.a)
|
|
FROM ref_1 t35
|
|
LEFT JOIN dist_1 t31 USING (a,b)
|
|
LEFT JOIN dist_1 t34 USING (a,b)
|
|
-- 2) cte_1 was inlided, so we then recursively check for such joins there.
|
|
-- When doing so, we first check for cte_1_inner_cte was since it was
|
|
-- also inlined.
|
|
LEFT JOIN cte_1 USING (a,b)
|
|
-- 5) Since rhs of below join is a subquery too, we recursively search
|
|
-- for such joins there and plan distributed side of all those 10
|
|
-- joins.
|
|
LEFT JOIN (
|
|
SELECT COUNT(DISTINCT t20.a) AS a
|
|
FROM
|
|
(SELECT r.a, d.b FROM ref_1 r LEFT JOIN dist_1 d USING(b) WHERE r.a IS NOT NULL) AS t20,
|
|
(SELECT r.a, d.b FROM ref_1 r LEFT JOIN dist_1 d USING(b) WHERE r.a IS NOT NULL) AS t21,
|
|
(SELECT r.a, d.b FROM ref_1 r LEFT JOIN dist_1 d USING(b) WHERE r.a IS NOT NULL) AS t22,
|
|
(SELECT r.a, d.b FROM ref_1 r LEFT JOIN dist_1 d USING(b) WHERE r.a IS NOT NULL) AS t23,
|
|
(SELECT r.a, d.b FROM ref_1 r LEFT JOIN dist_1 d USING(b) WHERE r.a IS NOT NULL) AS t24,
|
|
(SELECT r.a, d.b FROM ref_1 r LEFT JOIN dist_1 d USING(b) WHERE r.a IS NOT NULL) AS t25,
|
|
(SELECT r.a, d.b FROM ref_1 r LEFT JOIN dist_1 d USING(b) WHERE r.a IS NOT NULL) AS t26,
|
|
(SELECT r.a, d.b FROM ref_1 r LEFT JOIN dist_1 d USING(b) WHERE r.a IS NOT NULL) AS t27,
|
|
(SELECT r.a, d.b FROM ref_1 r LEFT JOIN dist_1 d USING(b) WHERE r.a IS NOT NULL) AS t28,
|
|
(SELECT r.a, d.b FROM ref_1 r LEFT JOIN dist_1 d USING(b) WHERE r.a IS NOT NULL) AS t29
|
|
WHERE
|
|
t20.a = t29.a AND
|
|
t20.a = t28.a AND
|
|
t20.a = t27.a AND
|
|
t20.a = t26.a AND
|
|
t20.a = t25.a AND
|
|
t20.a = t24.a AND
|
|
t20.a = t23.a AND
|
|
t20.a = t21.a AND
|
|
t20.a = t21.a AND
|
|
t20.a = t20.a
|
|
) AS t30
|
|
ON (t30.a = cte_1.a)
|
|
ORDER BY 1,2,3
|
|
) AND
|
|
-- 8) Then we search for such joins in the next (and final) qual of the WHERE clause.
|
|
-- Since both outer_cte_1 and outer_cte_2 were inlined, we will first
|
|
-- recursively check for such joins in them.
|
|
a NOT IN (SELECT outer_cte_1.b FROM outer_cte_1 LEFT JOIN outer_cte_2 USING (b));
|
|
|
|
WITH
|
|
cte_1 AS (
|
|
SELECT COUNT(*) FROM dist_1 t1
|
|
JOIN
|
|
(
|
|
(
|
|
dist_1 t2 JOIN dist_1 t3 USING (a)
|
|
)
|
|
JOIN
|
|
(
|
|
dist_1 t4 JOIN (
|
|
dist_1 t5 JOIN (
|
|
dist_1 t6 JOIN (
|
|
ref_1 t7 LEFT JOIN dist_1 t8 USING (a)
|
|
) USING(a)
|
|
) USING(a)
|
|
) USING (a)
|
|
) USING(a)
|
|
) USING (a)
|
|
),
|
|
cte_2 AS (
|
|
SELECT COUNT(*) FROM dist_1 t9
|
|
JOIN
|
|
(
|
|
(
|
|
dist_1 t10 JOIN dist_1 t11 USING (a)
|
|
)
|
|
JOIN
|
|
(
|
|
dist_1 t12 JOIN (
|
|
dist_1 t13 JOIN (
|
|
dist_1 t14 JOIN (
|
|
ref_1 t15 LEFT JOIN dist_1 t16 USING (a)
|
|
) USING(a)
|
|
) USING(a)
|
|
) USING (a)
|
|
) USING(a)
|
|
) USING (a)
|
|
)
|
|
SELECT * FROM cte_1, cte_2;
|
|
|
|
-- such joins can appear within SET operations too
|
|
SELECT COUNT(*) FROM
|
|
-- 2) given that the rhs of the right join is recurring due to set
|
|
-- operation, t1 is recursively planned too
|
|
dist_1 t1
|
|
RIGHT JOIN
|
|
(
|
|
SELECT * FROM dist_1 t2
|
|
UNION
|
|
(
|
|
-- 1) t3 is recursively planned because t4 is recurring
|
|
SELECT t3.a, t3.b FROM dist_1 t3
|
|
FULL JOIN
|
|
ref_1 t4
|
|
USING (a)
|
|
)
|
|
) t5
|
|
USING(a);
|
|
|
|
-- simple modification queries
|
|
|
|
CREATE TABLE dist_5 (LIKE dist_1);
|
|
INSERT INTO dist_5 SELECT * FROM dist_1 WHERE a < 5;
|
|
SELECT create_distributed_table('dist_5', 'a');
|
|
|
|
BEGIN;
|
|
DELETE FROM dist_5
|
|
USING (
|
|
SELECT t1.a, t1.b FROM ref_1 t1
|
|
LEFT JOIN
|
|
(
|
|
SELECT * FROM dist_1 t2 WHERE EXISTS (
|
|
SELECT * FROM dist_1 t4
|
|
WHERE t4.a = t2.a
|
|
)
|
|
) t3
|
|
USING (a)
|
|
) q
|
|
WHERE dist_5.a = q.a
|
|
RETURNING *;
|
|
ROLLBACK;
|
|
|
|
BEGIN;
|
|
UPDATE dist_5
|
|
SET b = 10
|
|
WHERE a IN (
|
|
SELECT t1.a FROM ref_1 t1
|
|
LEFT JOIN
|
|
(
|
|
SELECT * FROM dist_1 t2 WHERE EXISTS (
|
|
SELECT * FROM dist_1 t4
|
|
WHERE t4.a = t2.a
|
|
)
|
|
) t3
|
|
USING (a)
|
|
)
|
|
RETURNING *;
|
|
ROLLBACK;
|
|
|
|
-- INSERT .. SELECT: pull to coordinator
|
|
BEGIN;
|
|
DELETE FROM ref_1 WHERE a IS NULL;
|
|
|
|
INSERT INTO dist_1
|
|
SELECT t1.*
|
|
FROM ref_1 t1
|
|
LEFT JOIN dist_1 t2
|
|
ON (t1.a = t2.a);
|
|
ROLLBACK;
|
|
|
|
-- INSERT .. SELECT: repartitioned (due to <t1.a*3>)
|
|
BEGIN;
|
|
INSERT INTO dist_1
|
|
SELECT t1.a*3, t1.b
|
|
FROM dist_1 t1
|
|
JOIN
|
|
(ref_1 t2 LEFT JOIN dist_1 t3 USING(a)) t4
|
|
ON (t1.a = t4.a);
|
|
ROLLBACK;
|
|
|
|
-- INSERT .. SELECT: repartitioned
|
|
-- should be able to push-down once https://github.com/citusdata/citus/issues/6544 is fixed
|
|
BEGIN;
|
|
INSERT INTO dist_1
|
|
SELECT t1.*
|
|
FROM dist_1 t1
|
|
JOIN
|
|
(ref_1 t2 LEFT JOIN dist_1 t3 USING(a)) t4
|
|
ON (t1.a = t4.a);
|
|
ROLLBACK;
|
|
|
|
SET client_min_messages TO ERROR;
|
|
DROP SCHEMA recurring_outer_join CASCADE;
|