mirror of https://github.com/citusdata/citus.git
129 lines
3.6 KiB
PL/PgSQL
129 lines
3.6 KiB
PL/PgSQL
SET search_path TO "intermediate result pruning";
|
|
|
|
-- sanity checks for modification queries
|
|
|
|
-- select_data goes to a single node, because it is used in another subquery
|
|
-- raw_data is also the final router query, so hits a single shard
|
|
-- however, the subquery in WHERE clause of the DELETE query is broadcasted to all
|
|
-- nodes
|
|
BEGIN;
|
|
WITH select_data AS MATERIALIZED (
|
|
SELECT * FROM table_1
|
|
),
|
|
raw_data AS MATERIALIZED (
|
|
DELETE FROM table_2 WHERE key >= (SELECT min(key) FROM select_data WHERE key > 1) RETURNING *
|
|
)
|
|
SELECT * FROM raw_data ORDER BY 1,2;
|
|
ROLLBACK;
|
|
|
|
-- select_data goes to a single node, because it is used in another subquery
|
|
-- raw_data is also the final router query, so hits a single shard
|
|
-- however, the subquery in WHERE clause of the DELETE query is broadcasted to all
|
|
-- nodes
|
|
BEGIN;
|
|
WITH select_data AS MATERIALIZED (
|
|
SELECT * FROM table_1
|
|
),
|
|
raw_data AS MATERIALIZED (
|
|
DELETE FROM table_2 WHERE value::int >= (SELECT min(key) FROM select_data WHERE key > 1 + random()) RETURNING *
|
|
)
|
|
SELECT * FROM raw_data ORDER BY 1,2;
|
|
ROLLBACK;
|
|
|
|
-- now, we need only two intermediate results as the subquery in WHERE clause is
|
|
-- router plannable
|
|
BEGIN;
|
|
WITH select_data AS MATERIALIZED (
|
|
SELECT * FROM table_1
|
|
),
|
|
raw_data AS MATERIALIZED (
|
|
DELETE FROM table_2 WHERE value::int >= (SELECT min(key) FROM table_1 WHERE key > random()) AND key = 6 RETURNING *
|
|
)
|
|
SELECT * FROM raw_data ORDER BY 1,2;
|
|
ROLLBACK;
|
|
|
|
-- test with INSERT SELECT via coordinator
|
|
|
|
-- INSERT .. SELECT via coordinator that doesn't have any intermediate results
|
|
-- We use offset 1 to make sure the result needs to be pulled to the coordinator, offset 0 would be optimized away
|
|
BEGIN;
|
|
INSERT INTO table_1
|
|
SELECT * FROM table_2 OFFSET 1;
|
|
ROLLBACK;
|
|
|
|
-- INSERT .. SELECT via coordinator which has intermediate result,
|
|
-- and can be pruned to a single worker because the final query is on
|
|
-- single shard via filter in key
|
|
BEGIN;
|
|
INSERT INTO table_1
|
|
SELECT * FROM table_2 where value IN (SELECT value FROM table_1 WHERE random() > 1) AND key = 1;
|
|
ROLLBACK;
|
|
|
|
-- a similar query, with more complex subquery
|
|
BEGIN;
|
|
INSERT INTO table_1
|
|
SELECT * FROM table_2 where key = 1 AND
|
|
value::int IN
|
|
(WITH cte_1 AS MATERIALIZED
|
|
(
|
|
(SELECT key FROM table_1 WHERE key = 1)
|
|
INTERSECT
|
|
(SELECT key FROM table_1 WHERE key = 2)
|
|
),
|
|
cte_2 AS MATERIALIZED
|
|
(
|
|
(SELECT key FROM table_1 WHERE key = 3)
|
|
INTERSECT
|
|
(SELECT key FROM table_1 WHERE key = 4)
|
|
)
|
|
SELECT * FROM cte_1
|
|
UNION
|
|
SELECT * FROM cte_2);
|
|
ROLLBACK;
|
|
|
|
-- same query, cte is on the FROM clause
|
|
-- and this time the final query (and top-level intermediate result)
|
|
-- hits all the shards because table_2.key != 1
|
|
BEGIN;
|
|
INSERT INTO table_1
|
|
SELECT table_2.* FROM table_2,
|
|
(WITH cte_1 AS MATERIALIZED
|
|
(
|
|
(SELECT key FROM table_1 WHERE key = 1)
|
|
INTERSECT
|
|
(SELECT key FROM table_1 WHERE key = 2)
|
|
),
|
|
cte_2 AS MATERIALIZED
|
|
(
|
|
(SELECT key FROM table_1 WHERE key = 3)
|
|
INTERSECT
|
|
(SELECT key FROM table_1 WHERE key = 4)
|
|
)
|
|
SELECT * FROM cte_1
|
|
UNION
|
|
SELECT * FROM cte_2
|
|
) foo
|
|
where table_2.key != 1 AND
|
|
foo.key = table_2.value::int;
|
|
ROLLBACK;
|
|
|
|
|
|
BEGIN;
|
|
-- Insert..select is planned differently, make sure we have results everywhere.
|
|
-- We put the insert..select in a CTE here to prevent the CTE from being moved
|
|
-- into the select, which would follow the regular code path for select.
|
|
WITH stats AS MATERIALIZED (
|
|
SELECT count(key) m FROM table_3
|
|
),
|
|
inserts AS MATERIALIZED (
|
|
INSERT INTO table_2
|
|
SELECT key, count(*)
|
|
FROM table_1
|
|
WHERE key >= (SELECT m FROM stats)
|
|
GROUP BY key
|
|
HAVING count(*) < (SELECT m FROM stats)
|
|
LIMIT 1
|
|
RETURNING *
|
|
) SELECT count(*) FROM inserts;
|
|
ROLLBACK;
|