citus/src/test/regress/sql/intermediate_result_pruning...

SET search_path TO "intermediate result pruning";

-- sanity checks for modification queries

-- select_data goes to a single node, because it is used in another subquery
-- raw_data is also the final router query, so hits a single shard
-- however, the subquery in WHERE clause of the DELETE query is broadcasted to all
-- nodes
BEGIN;
WITH select_data AS MATERIALIZED (
	SELECT * FROM table_1
),
raw_data AS MATERIALIZED (
	DELETE FROM table_2 WHERE key >= (SELECT min(key) FROM select_data WHERE key > 1) RETURNING *
)
SELECT * FROM raw_data ORDER BY 1,2;
ROLLBACK;

-- select_data goes to a single node, because it is used in another subquery
-- raw_data is also the final router query, so hits a single shard
-- however, the subquery in WHERE clause of the DELETE query is broadcasted to all
-- nodes
BEGIN;
WITH select_data AS MATERIALIZED (
	SELECT * FROM table_1
),
raw_data AS MATERIALIZED (
	DELETE FROM table_2 WHERE value::int >= (SELECT min(key) FROM select_data WHERE key > 1 + random()) RETURNING *
)
SELECT * FROM raw_data ORDER BY 1,2;
ROLLBACK;

-- now, we need only two intermediate results as the subquery in WHERE clause is
-- router plannable
BEGIN;
WITH select_data AS MATERIALIZED (
	SELECT * FROM table_1
),
raw_data AS MATERIALIZED (
	DELETE FROM table_2 WHERE value::int >= (SELECT min(key) FROM table_1 WHERE key > random()) AND key = 6 RETURNING *
)
SELECT * FROM raw_data ORDER BY 1,2;
ROLLBACK;

-- test with INSERT SELECT via coordinator

-- INSERT .. SELECT via coordinator that doesn't have any intermediate results
-- We use offset 1 to make sure the result needs to be pulled to the coordinator, offset 0 would be optimized away
BEGIN;
INSERT INTO table_1
	SELECT * FROM table_2 OFFSET 1;
ROLLBACK;

-- INSERT .. SELECT via coordinator which has intermediate result,
-- and can be pruned to a single worker because the final query is on
-- single shard via filter in key
BEGIN;
INSERT INTO table_1
	SELECT * FROM table_2 where value IN (SELECT value FROM table_1 WHERE random() > 1) AND key = 1;
ROLLBACK;

-- a similar query, with more complex subquery
BEGIN;
INSERT INTO table_1
	SELECT * FROM table_2 where key = 1 AND
 value::int IN
		(WITH cte_1 AS MATERIALIZED
		(
			(SELECT key FROM table_1 WHERE key = 1)
			INTERSECT
			(SELECT key FROM table_1 WHERE key = 2)
		),
		cte_2 AS MATERIALIZED
		(
			(SELECT key FROM table_1 WHERE key = 3)
			INTERSECT
			(SELECT key FROM table_1 WHERE key = 4)
		)
		SELECT * FROM cte_1
			UNION
		SELECT * FROM cte_2);
ROLLBACK;

-- same query, cte is on the FROM clause
-- and this time the final query (and top-level intermediate result)
-- hits all the shards because table_2.key != 1
BEGIN;
INSERT INTO table_1
	SELECT table_2.* FROM table_2,
	(WITH cte_1 AS MATERIALIZED
		(
			(SELECT key FROM table_1 WHERE key = 1)
			INTERSECT
			(SELECT key FROM table_1 WHERE key = 2)
		),
		cte_2 AS MATERIALIZED
		(
			(SELECT key FROM table_1 WHERE key = 3)
			INTERSECT
			(SELECT key FROM table_1 WHERE key = 4)
		)
		SELECT * FROM cte_1
			UNION
		SELECT * FROM cte_2
	 ) foo
	 where table_2.key != 1 AND
 	foo.key = table_2.value::int;
 ROLLBACK;


 BEGIN;
 	-- Insert..select is planned differently, make sure we have results everywhere.
-- We put the insert..select in a CTE here to prevent the CTE from being moved
-- into the select, which would follow the regular code path for select.
WITH stats AS MATERIALIZED (
  SELECT count(key) m FROM table_3
),
inserts AS MATERIALIZED (
  INSERT INTO table_2
  SELECT key, count(*)
  FROM table_1
  WHERE key >= (SELECT m FROM stats)
  GROUP BY key
  HAVING count(*) < (SELECT m FROM stats)
  LIMIT 1
  RETURNING *
) SELECT count(*) FROM inserts;
ROLLBACK;