citus/src/test/regress/sql/merge_repartition1.sql

581 lines
16 KiB
PL/PgSQL

SHOW server_version \gset
SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15
\gset
\if :server_version_ge_15
\else
\q
\endif
-- We create two sets of source and target tables, one set in Postgres and
-- the other in Citus distributed. We run the _exact_ MERGE SQL on both sets
-- and compare the final results of the target tables in Postgres and Citus.
-- The results should match. This process is repeated for various combinations
-- of MERGE SQL.
DROP SCHEMA IF EXISTS merge_repartition1_schema CASCADE;
CREATE SCHEMA merge_repartition1_schema;
SET search_path TO merge_repartition1_schema;
SET citus.shard_count TO 4;
SET citus.next_shard_id TO 5000000;
SET citus.explain_all_tasks TO true;
SET citus.shard_replication_factor TO 1;
SET citus.max_adaptive_executor_pool_size TO 1;
SET client_min_messages = warning;
SELECT 1 FROM master_add_node('localhost', :master_port, groupid => 0);
RESET client_min_messages;
CREATE TABLE pg_target(id int, val int);
CREATE TABLE pg_source(id int, val int, const int);
CREATE TABLE citus_target(id int, val int);
CREATE TABLE citus_source(id int, val int, const int);
SELECT citus_add_local_table_to_metadata('citus_target');
SELECT citus_add_local_table_to_metadata('citus_source');
CREATE OR REPLACE FUNCTION cleanup_data() RETURNS VOID SET search_path TO merge_repartition1_schema AS $$
TRUNCATE pg_target;
TRUNCATE pg_source;
TRUNCATE citus_target;
TRUNCATE citus_source;
SELECT undistribute_table('citus_target');
SELECT undistribute_table('citus_source');
$$
LANGUAGE SQL;
--
-- Load same set of data to both Postgres and Citus tables
--
CREATE OR REPLACE FUNCTION setup_data() RETURNS VOID SET search_path TO merge_repartition1_schema AS $$
INSERT INTO pg_source SELECT i, i+1, 1 FROM generate_series(1, 10000) i;
INSERT INTO pg_target SELECT i, 1 FROM generate_series(5001, 10000) i;
INSERT INTO citus_source SELECT i, i+1, 1 FROM generate_series(1, 10000) i;
INSERT INTO citus_target SELECT i, 1 FROM generate_series(5001, 10000) i;
$$
LANGUAGE SQL;
--
-- Compares the final target tables, merge-modified data, of both Postgres and Citus tables
--
CREATE OR REPLACE FUNCTION check_data(table1_name text, column1_name text, table2_name text, column2_name text)
RETURNS VOID SET search_path TO merge_repartition1_schema AS $$
DECLARE
table1_avg numeric;
table2_avg numeric;
BEGIN
EXECUTE format('SELECT COALESCE(AVG(%I), 0) FROM %I', column1_name, table1_name) INTO table1_avg;
EXECUTE format('SELECT COALESCE(AVG(%I), 0) FROM %I', column2_name, table2_name) INTO table2_avg;
IF table1_avg > table2_avg THEN
RAISE EXCEPTION 'The average of %.% is greater than %.%', table1_name, column1_name, table2_name, column2_name;
ELSIF table1_avg < table2_avg THEN
RAISE EXCEPTION 'The average of %.% is less than %.%', table1_name, column1_name, table2_name, column2_name;
ELSE
RAISE NOTICE 'The average of %.% is equal to %.%', table1_name, column1_name, table2_name, column2_name;
END IF;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION compare_data() RETURNS VOID SET search_path TO merge_repartition1_schema AS $$
SELECT check_data('pg_target', 'id', 'citus_target', 'id');
SELECT check_data('pg_target', 'val', 'citus_target', 'val');
$$
LANGUAGE SQL;
--
-- Target and source are distributed, and non-colocated
--
SELECT cleanup_data();
SELECT setup_data();
SELECT create_distributed_table('citus_target', 'id');
SELECT create_distributed_table('citus_source', 'id', colocate_with=>'none');
MERGE INTO pg_target t
USING pg_source s
ON t.id = s.id
WHEN MATCHED AND t.id <= 7500 THEN
UPDATE SET val = s.val + 1
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(s.id, s.val);
MERGE INTO citus_target t
USING citus_source s
ON t.id = s.id
WHEN MATCHED AND t.id <= 7500 THEN
UPDATE SET val = s.val + 1
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(s.id, s.val);
SELECT compare_data();
--
-- Target and source are distributed, and colocated but not joined on distribution column
--
SELECT cleanup_data();
SELECT setup_data();
SELECT create_distributed_table('citus_target', 'id');
SELECT create_distributed_table('citus_source', 'id', colocate_with=>'citus_target');
MERGE INTO pg_target t
USING (SELECT * FROM pg_source) subq
ON (subq.val = t.id)
WHEN MATCHED AND t.id <= 7500 THEN
UPDATE SET val = subq.val + 1
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(subq.val, subq.id);
MERGE INTO citus_target t
USING (SELECT * FROM citus_source) subq
ON (subq.val = t.id)
WHEN MATCHED AND t.id <= 7500 THEN
UPDATE SET val = subq.val + 1
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(subq.val, subq.id);
SELECT compare_data();
--
-- Target and source are distributed, colocated, joined on distribution column
-- but with nondistribution values
--
SELECT cleanup_data();
SELECT setup_data();
SELECT create_distributed_table('citus_target', 'id');
SELECT create_distributed_table('citus_source', 'id', colocate_with=>'citus_target');
MERGE INTO pg_target t
USING (SELECT id,const FROM pg_source UNION SELECT const,id FROM pg_source ) AS s
ON t.id = s.id
WHEN MATCHED THEN
UPDATE SET val = s.const + 1
WHEN NOT MATCHED THEN
INSERT VALUES(id, const);
MERGE INTO citus_target t
USING (SELECT id,const FROM citus_source UNION SELECT const,id FROM citus_source) AS s
ON t.id = s.id
WHEN MATCHED THEN
UPDATE SET val = s.const + 1
WHEN NOT MATCHED THEN
INSERT VALUES(id, const);
SELECT compare_data();
--
-- Repartition with a predicate on target_table_name rows in ON clause
--
SELECT cleanup_data();
SELECT setup_data();
SELECT create_distributed_table('citus_target', 'id');
SELECT create_distributed_table('citus_source', 'id', colocate_with=>'none');
MERGE INTO pg_target t
USING (SELECT * FROM pg_source WHERE id < 9500) s
ON t.id = s.id AND t.id < 9000
WHEN MATCHED AND t.id <= 7500 THEN
UPDATE SET val = s.val + 1
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(s.id, s.val);
MERGE INTO citus_target t
USING (SELECT * FROM citus_source WHERE id < 9500) s
ON t.id = s.id AND t.id < 9000
WHEN MATCHED AND t.id <= 7500 THEN
UPDATE SET val = s.val + 1
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(s.id, s.val);
SELECT compare_data();
--
-- Test CTE and non-colocated tables
--
SELECT cleanup_data();
SELECT setup_data();
SELECT create_distributed_table('citus_target', 'id');
SELECT create_distributed_table('citus_source', 'id', colocate_with=>'none');
WITH cte AS (
SELECT * FROM pg_source
)
MERGE INTO pg_target t
USING cte s
ON s.id = t.id
WHEN MATCHED AND t.id > 7500 THEN
UPDATE SET val = s.val + 1
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES (s.id, s.val);
WITH cte AS (
SELECT * FROM citus_source
)
MERGE INTO citus_target t
USING cte s
ON s.id = t.id
WHEN MATCHED AND t.id > 7500 THEN
UPDATE SET val = s.val + 1
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES (s.id, s.val);
SELECT compare_data();
--
-- Test nested CTEs
--
SELECT cleanup_data();
SELECT setup_data();
SELECT create_distributed_table('citus_target', 'id');
SELECT create_distributed_table('citus_source', 'id', colocate_with=>'none');
WITH cte1 AS (
SELECT * FROM pg_source ORDER BY 1 LIMIT 9000
),
cte2 AS(
SELECT * FROM cte1
),
cte3 AS(
SELECT * FROM cte2
)
MERGE INTO pg_target t
USING cte3 s
ON (s.id=t.id)
WHEN MATCHED AND t.id > 7500 THEN
UPDATE SET val = s.val + 1
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES (s.id, s.val);
WITH cte1 AS (
SELECT * FROM citus_source ORDER BY 1 LIMIT 9000
),
cte2 AS(
SELECT * FROM cte1
),
cte3 AS(
SELECT * FROM cte2
)
MERGE INTO citus_target t
USING cte3 s
ON (s.id=t.id)
WHEN MATCHED AND t.id > 7500 THEN
UPDATE SET val = s.val + 1
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES (s.id, s.val);
SELECT compare_data();
--
-- Target and source are distributed and colocated
--
SELECT cleanup_data();
SELECT setup_data();
SELECT create_distributed_table('citus_target', 'id');
SELECT create_distributed_table('citus_source', 'id', colocate_with => 'citus_target');
MERGE INTO pg_target t
USING (SELECT 999 as newval, pg_source.* FROM (SELECT * FROM pg_source ORDER BY 1 LIMIT 6000) as src LEFT JOIN pg_source USING(id)) AS s
ON t.id = s.id
WHEN MATCHED AND t.id <= 5500 THEN
UPDATE SET val = newval
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(id, newval);
MERGE INTO citus_target t
USING (SELECT 999 as newval, citus_source.* FROM (SELECT * FROM citus_source ORDER BY 1 LIMIT 6000) as src LEFT JOIN citus_source USING(id)) AS s
ON t.id = s.id
WHEN MATCHED AND t.id <= 5500 THEN
UPDATE SET val = newval
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(id, newval);
SELECT compare_data();
--
-- Target is distributed and source is reference
--
SELECT cleanup_data();
SELECT setup_data();
SELECT create_distributed_table('citus_target', 'id');
SELECT create_reference_table('citus_source');
MERGE INTO pg_target t
USING pg_source s
ON t.id = s.id
WHEN MATCHED AND t.id <= 7500 THEN
UPDATE SET val = s.val + 1
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(s.id, s.val);
MERGE INTO citus_target t
USING citus_source s
ON t.id = s.id
WHEN MATCHED AND t.id <= 7500 THEN
UPDATE SET val = s.val + 1
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(s.id, s.val);
SELECT compare_data();
--
-- Target is distributed and reference as source in a sub-query
--
SELECT cleanup_data();
SELECT setup_data();
SELECT create_distributed_table('citus_target', 'id');
SELECT create_reference_table('citus_source');
MERGE INTO pg_target t
USING (SELECT * FROM pg_source UNION SELECT * FROM pg_source) AS s ON t.id = s.id
WHEN MATCHED AND t.id <= 7500 THEN
UPDATE SET val = s.val + t.val
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(s.id, s.val);
MERGE INTO citus_target t
USING (SELECT * FROM citus_source UNION SELECT * FROM citus_source) AS s ON t.id = s.id
WHEN MATCHED AND t.id <= 7500 THEN
UPDATE SET val = s.val + t.val
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(s.id, s.val);
SELECT compare_data();
--
-- Target is distributed and citus-local as source
--
SELECT cleanup_data();
SELECT setup_data();
SELECT create_distributed_table('citus_target', 'id');
SELECT citus_add_local_table_to_metadata('citus_source');
MERGE INTO pg_target t
USING pg_source s
ON t.id = s.id
WHEN MATCHED AND t.id <= 7500 THEN
UPDATE SET val = s.val + 1
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(s.id, s.val);
MERGE INTO citus_target t
USING citus_source s
ON t.id = s.id
WHEN MATCHED AND t.id <= 7500 THEN
UPDATE SET val = s.val + 1
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(s.id, s.val);
SELECT compare_data();
--
-- Target and source distributed and non-colocated. The source query requires evaluation
-- at the coordinator
--
SELECT cleanup_data();
SELECT setup_data();
SELECT create_distributed_table('citus_target', 'id');
SELECT create_distributed_table('citus_source', 'id', colocate_with=>'none');
MERGE INTO pg_target t
USING (SELECT 100 AS insval, MAX(const) AS updval, val, MAX(id) AS sid
FROM pg_source
GROUP BY val ORDER BY sid LIMIT 6000) AS s
ON t.id = s.sid
WHEN MATCHED AND t.id <= 5500 THEN
UPDATE SET val = updval + 1
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(sid, insval);
MERGE INTO citus_target t
USING (SELECT 100 AS insval, MAX(const) AS updval, val, MAX(id) AS sid
FROM citus_source
GROUP BY val ORDER BY sid LIMIT 6000) AS s
ON t.id = s.sid
WHEN MATCHED AND t.id <= 5500 THEN
UPDATE SET val = updval + 1
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(sid, insval);
SELECT compare_data();
-- Test source-query that requires repartitioning on top of MERGE repartitioning
SET client_min_messages TO WARNING;
SELECT cleanup_data();
RESET client_min_messages;
SELECT setup_data();
SELECT create_distributed_table('citus_target', 'id');
SELECT create_distributed_table('citus_source', 'id', colocate_with=>'none');
MERGE INTO pg_target t
USING (SELECT s1.val FROM pg_source s1 JOIN pg_source s2 USING (val)) AS s
ON t.id = s.val
WHEN MATCHED THEN
UPDATE SET val = t.val + 1;
SET citus.enable_repartition_joins TO true;
MERGE INTO citus_target t
USING (SELECT s1.val FROM citus_source s1 JOIN citus_source s2 USING (val)) AS s
ON t.id = s.val
WHEN MATCHED THEN
UPDATE SET val = t.val + 1;
SELECT compare_data();
--
-- Test columnar as source table
--
SET client_min_messages TO WARNING;
SELECT cleanup_data();
RESET client_min_messages;
SELECT setup_data();
SELECT create_distributed_table('citus_target', 'id');
SELECT create_distributed_table('citus_source', 'id', colocate_with=>'none');
SELECT alter_table_set_access_method('citus_source', 'columnar');
MERGE INTO pg_target t
USING pg_source s
ON t.id = s.id
WHEN MATCHED AND t.id <= 7500 THEN
UPDATE SET val = s.val + 1
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(s.id, s.val);
MERGE INTO citus_target t
USING citus_source s
ON t.id = s.id
WHEN MATCHED AND t.id <= 7500 THEN
UPDATE SET val = s.val + 1
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(s.id, s.val);
SELECT compare_data();
SELECT alter_table_set_access_method('citus_source', 'heap');
-- Test CTE/Subquery in merge-actions (works only for router query)
SET client_min_messages TO WARNING;
SELECT cleanup_data();
RESET client_min_messages;
SELECT setup_data();
SELECT create_distributed_table('citus_target', 'id');
SELECT create_distributed_table('citus_source', 'id', colocate_with=>'citus_target');
MERGE INTO pg_target
USING pg_source
ON (pg_target.id = pg_source.id)
WHEN MATCHED AND (SELECT max_a > 5001 FROM (SELECT max(id) as max_a, max(val) as b FROM pg_target WHERE id = pg_source.id) AS foo) THEN
DELETE
WHEN NOT MATCHED AND (SELECT max_a < 5001 FROM (SELECT max(id) as max_a, max(val) as b FROM pg_target WHERE id = pg_source.id) AS foo) THEN
INSERT VALUES (pg_source.id, 100);
MERGE INTO citus_target
USING citus_source
ON (citus_target.id = citus_source.id)
WHEN MATCHED AND (SELECT max_a > 5001 FROM (SELECT max(id) as max_a, max(val) as b FROM citus_target WHERE id = citus_source.id) AS foo) THEN
DELETE
WHEN NOT MATCHED AND (SELECT max_a < 5001 FROM (SELECT max(id) as max_a, max(val) as b FROM citus_target WHERE id = citus_source.id) AS foo) THEN
INSERT VALUES (citus_source.id, 100);
SELECT compare_data();
--
-- Test target with false clause
--
SET client_min_messages TO WARNING;
SELECT cleanup_data();
RESET client_min_messages;
SELECT setup_data();
SELECT create_distributed_table('citus_target', 'id');
SELECT create_distributed_table('citus_source', 'id', colocate_with => 'citus_target');
MERGE INTO pg_target t
USING (SELECT * FROM pg_source WHERE id > 2500) AS s
ON t.id = s.id AND t.id < 2500
WHEN MATCHED AND t.id <= 5500 THEN
UPDATE SET val = s.val + 1
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(s.id, s.val);
MERGE INTO citus_target t
USING (SELECT * FROM citus_source WHERE id > 2500) AS s
ON t.id = s.id AND t.id < 2500
WHEN MATCHED AND t.id <= 5500 THEN
UPDATE SET val = s.val + 1
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(s.id, s.val);
SELECT compare_data();
SET client_min_messages TO WARNING;
SELECT cleanup_data();
RESET client_min_messages;
SELECT setup_data();
SELECT create_distributed_table('citus_target', 'id');
SELECT create_distributed_table('citus_source', 'id', colocate_with => 'citus_target');
MERGE INTO pg_target t
USING (SELECT * FROM pg_source WHERE id = 2500) AS s
ON t.id = s.id AND t.id = 5000
WHEN MATCHED AND t.id <= 5500 THEN
UPDATE SET val = s.val + 1
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(s.id, s.val);
MERGE INTO citus_target t
USING (SELECT * FROM citus_source WHERE id = 2500) AS s
ON t.id = s.id AND t.id = 5000
WHEN MATCHED AND t.id <= 5500 THEN
UPDATE SET val = s.val + 1
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(s.id, s.val);
SELECT compare_data();
DROP SCHEMA merge_repartition1_schema CASCADE;