citus/src/test/regress/expected/merge.out

4368 lines
182 KiB
Plaintext

SHOW server_version \gset
SELECT substring(:'server_version', '\d+')::int >= 15 AS server_version_ge_15
\gset
\if :server_version_ge_15
\else
\q
\endif
-- MERGE command performs a join from data_source to target_table_name
DROP SCHEMA IF EXISTS merge_schema CASCADE;
NOTICE: schema "merge_schema" does not exist, skipping
--MERGE INTO target
--USING source
--WHEN NOT MATCHED
--WHEN MATCHED AND <condition>
--WHEN MATCHED
CREATE SCHEMA merge_schema;
SET search_path TO merge_schema;
SET citus.shard_count TO 4;
SET citus.next_shard_id TO 4000000;
SET citus.explain_all_tasks TO true;
SET citus.shard_replication_factor TO 1;
SET citus.max_adaptive_executor_pool_size TO 1;
SET client_min_messages = warning;
SELECT 1 FROM master_add_node('localhost', :master_port, groupid => 0);
?column?
---------------------------------------------------------------------
1
(1 row)
ALTER SEQUENCE pg_catalog.pg_dist_colocationid_seq RESTART 13000;
RESET client_min_messages;
CREATE TABLE source
(
order_id INT,
customer_id INT,
order_center VARCHAR,
order_time timestamp
);
CREATE TABLE target
(
customer_id INT,
last_order_id INT,
order_center VARCHAR,
order_count INT,
last_order timestamp
);
CREATE FUNCTION insert_data() RETURNS VOID AS $$
INSERT INTO source (order_id, customer_id, order_center, order_time)
VALUES (101, 30000, 'WX', '2022-01-01 00:00:00'); -- Do not match
INSERT INTO source (order_id, customer_id, order_center, order_time)
VALUES (102, 30001, 'CX', '2022-01-01 00:00:00'); -- Do not match
INSERT INTO source (order_id, customer_id, order_center, order_time)
VALUES (103, 30002, 'AX', '2022-01-01 00:00:00'); -- Does match
INSERT INTO source (order_id, customer_id, order_center, order_time)
VALUES (104, 30003, 'JX','2022-01-01 00:00:00' ); -- Does match
INSERT INTO source (order_id, customer_id, order_center, order_time)
VALUES (105, 30004, 'JX','2022-01-01 00:00:00' ); -- Does match
INSERT INTO target (customer_id, last_order_id, order_center, order_count, last_order)
VALUES (40000, 097, 'MK', -1, '2019-09-15 08:13:00');
INSERT INTO target (customer_id, last_order_id, order_center, order_count, last_order)
VALUES (40001, 098, 'NU', -1, '2020-07-12 01:05:00');
INSERT INTO target (customer_id, last_order_id, order_center, order_count, last_order)
VALUES (40002, 100, 'DS', -1, '2022-05-21 04:12:00');
INSERT INTO target (customer_id, last_order_id, order_center, order_count, last_order)
VALUES (30002, 103, 'AX', -1, '2021-01-17 19:53:00'); -- Matches the source
INSERT INTO target (customer_id, last_order_id, order_center, order_count, last_order)
VALUES (30003, 099, 'JX', -1, '2020-09-11 03:23:00'); -- Matches the source
INSERT INTO target (customer_id, last_order_id, order_center, order_count, last_order)
VALUES (30004, 099, 'XX', -1, '2020-09-11 03:23:00'); -- Matches the source id AND the condition.
$$
LANGUAGE SQL;
SELECT insert_data();
insert_data
---------------------------------------------------------------------
(1 row)
SELECT 'Testing PG tables';
?column?
---------------------------------------------------------------------
Testing PG tables
(1 row)
MERGE INTO target t
USING source s
ON (t.customer_id = s.customer_id)
WHEN MATCHED AND t.order_center = 'XX' THEN
DELETE
WHEN MATCHED THEN
UPDATE SET -- Existing customer, update the order count and last_order_id
order_count = t.order_count + 1,
last_order_id = s.order_id
WHEN NOT MATCHED THEN -- New entry, record it.
INSERT (customer_id, last_order_id, order_center, order_count, last_order)
VALUES (customer_id, s.order_id, s.order_center, 123, s.order_time);
-- Our gold result to compare against
SELECT * INTO pg_result FROM target ORDER BY 1 ;
-- Clean the slate
TRUNCATE source;
TRUNCATE target;
SELECT insert_data();
insert_data
---------------------------------------------------------------------
(1 row)
-- Test with both target and source as Citus local
SELECT 'local - local';
?column?
---------------------------------------------------------------------
local - local
(1 row)
SELECT citus_add_local_table_to_metadata('target');
citus_add_local_table_to_metadata
---------------------------------------------------------------------
(1 row)
SELECT citus_add_local_table_to_metadata('source');
citus_add_local_table_to_metadata
---------------------------------------------------------------------
(1 row)
MERGE INTO target t
USING source s
ON (t.customer_id = s.customer_id)
WHEN MATCHED AND t.order_center = 'XX' THEN
DELETE
WHEN MATCHED THEN
UPDATE SET -- Existing customer, update the order count and last_order_id
order_count = t.order_count + 1,
last_order_id = s.order_id
WHEN NOT MATCHED THEN -- New entry, record it.
INSERT (customer_id, last_order_id, order_center, order_count, last_order)
VALUES (customer_id, s.order_id, s.order_center, 123, s.order_time);
SELECT * INTO local_local FROM target ORDER BY 1 ;
-- Should be equal
SELECT c.*, p.*
FROM local_local c, pg_result p
WHERE c.customer_id = p.customer_id
ORDER BY 1,2;
customer_id | last_order_id | order_center | order_count | last_order | customer_id | last_order_id | order_center | order_count | last_order
---------------------------------------------------------------------
30000 | 101 | WX | 123 | Sat Jan 01 00:00:00 2022 | 30000 | 101 | WX | 123 | Sat Jan 01 00:00:00 2022
30001 | 102 | CX | 123 | Sat Jan 01 00:00:00 2022 | 30001 | 102 | CX | 123 | Sat Jan 01 00:00:00 2022
30002 | 103 | AX | 0 | Sun Jan 17 19:53:00 2021 | 30002 | 103 | AX | 0 | Sun Jan 17 19:53:00 2021
30003 | 104 | JX | 0 | Fri Sep 11 03:23:00 2020 | 30003 | 104 | JX | 0 | Fri Sep 11 03:23:00 2020
40000 | 97 | MK | -1 | Sun Sep 15 08:13:00 2019 | 40000 | 97 | MK | -1 | Sun Sep 15 08:13:00 2019
40001 | 98 | NU | -1 | Sun Jul 12 01:05:00 2020 | 40001 | 98 | NU | -1 | Sun Jul 12 01:05:00 2020
40002 | 100 | DS | -1 | Sat May 21 04:12:00 2022 | 40002 | 100 | DS | -1 | Sat May 21 04:12:00 2022
(7 rows)
-- Must return zero rows
SELECT *
FROM pg_result p
WHERE NOT EXISTS (SELECT FROM local_local c WHERE c.customer_id = p.customer_id);
customer_id | last_order_id | order_center | order_count | last_order
---------------------------------------------------------------------
(0 rows)
SELECT 'Testing Dist - Dist';
?column?
---------------------------------------------------------------------
Testing Dist - Dist
(1 row)
-- Clean the slate
TRUNCATE source;
TRUNCATE target;
SELECT insert_data();
insert_data
---------------------------------------------------------------------
(1 row)
SELECT undistribute_table('target');
NOTICE: creating a new table for merge_schema.target
NOTICE: moving the data of merge_schema.target
NOTICE: dropping the old merge_schema.target
NOTICE: renaming the new table to merge_schema.target
undistribute_table
---------------------------------------------------------------------
(1 row)
SELECT undistribute_table('source');
NOTICE: creating a new table for merge_schema.source
NOTICE: moving the data of merge_schema.source
NOTICE: dropping the old merge_schema.source
NOTICE: renaming the new table to merge_schema.source
undistribute_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('target', 'customer_id');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.target$$)
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('source', 'customer_id', colocate_with=>'target');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.source$$)
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- Updates one of the row with customer_id = 30002
SELECT * from target t WHERE t.customer_id = 30002;
customer_id | last_order_id | order_center | order_count | last_order
---------------------------------------------------------------------
30002 | 103 | AX | -1 | Sun Jan 17 19:53:00 2021
(1 row)
-- Turn on notice to print tasks sent to nodes
SET citus.log_remote_commands to true;
MERGE INTO target t
USING source s
ON (t.customer_id = s.customer_id) AND t.customer_id = 30002
WHEN MATCHED AND t.order_center = 'XX' THEN
DELETE
WHEN MATCHED THEN
UPDATE SET -- Existing customer, update the order count and last_order_id
order_count = t.order_count + 1,
last_order_id = s.order_id
WHEN NOT MATCHED THEN
DO NOTHING;
NOTICE: issuing MERGE INTO merge_schema.target_xxxxxxx t USING merge_schema.source_xxxxxxx s ON ((t.customer_id OPERATOR(pg_catalog.=) s.customer_id) AND (t.customer_id OPERATOR(pg_catalog.=) 30002)) WHEN MATCHED AND ((t.order_center)::text OPERATOR(pg_catalog.=) 'XX'::text) THEN DELETE WHEN MATCHED THEN UPDATE SET last_order_id = s.order_id, order_count = (t.order_count OPERATOR(pg_catalog.+) 1) WHEN NOT MATCHED THEN DO NOTHING
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
SET citus.log_remote_commands to false;
SELECT * from target t WHERE t.customer_id = 30002;
customer_id | last_order_id | order_center | order_count | last_order
---------------------------------------------------------------------
30002 | 103 | AX | 0 | Sun Jan 17 19:53:00 2021
(1 row)
-- Deletes one of the row with customer_id = 30004
SELECT * from target t WHERE t.customer_id = 30004;
customer_id | last_order_id | order_center | order_count | last_order
---------------------------------------------------------------------
30004 | 99 | XX | -1 | Fri Sep 11 03:23:00 2020
(1 row)
MERGE INTO target t
USING source s
ON (t.customer_id = s.customer_id) AND t.customer_id = 30004
WHEN MATCHED AND t.order_center = 'XX' THEN
DELETE
WHEN MATCHED THEN
UPDATE SET -- Existing customer, update the order count and last_order_id
order_count = t.order_count + 1,
last_order_id = s.order_id
WHEN NOT MATCHED THEN -- New entry, record it.
INSERT (customer_id, last_order_id, order_center, order_count, last_order)
VALUES (customer_id, s.order_id, s.order_center, 123, s.order_time);
SELECT * from target t WHERE t.customer_id = 30004;
customer_id | last_order_id | order_center | order_count | last_order
---------------------------------------------------------------------
(0 rows)
-- Updating distribution column is allowed if the operation is a no-op
SELECT * from target t WHERE t.customer_id = 30000;
customer_id | last_order_id | order_center | order_count | last_order
---------------------------------------------------------------------
30000 | 101 | WX | 123 | Sat Jan 01 00:00:00 2022
(1 row)
MERGE INTO target t
USING SOURCE s
ON (t.customer_id = s.customer_id AND t.customer_id = 30000)
WHEN MATCHED THEN
UPDATE SET customer_id = 30000;
MERGE INTO target t
USING SOURCE s
ON (t.customer_id = s.customer_id AND t.customer_id = 30000)
WHEN MATCHED THEN
UPDATE SET customer_id = t.customer_id;
SELECT * from target t WHERE t.customer_id = 30000;
customer_id | last_order_id | order_center | order_count | last_order
---------------------------------------------------------------------
30000 | 101 | WX | 123 | Sat Jan 01 00:00:00 2022
(1 row)
--
-- Test MERGE with CTE as source
--
CREATE TABLE t1(id int, val int);
CREATE TABLE s1(id int, val int);
CREATE FUNCTION load() RETURNS VOID AS $$
INSERT INTO s1 VALUES(1, 0); -- Matches DELETE clause
INSERT INTO s1 VALUES(2, 1); -- Matches UPDATE clause
INSERT INTO s1 VALUES(3, 1); -- No Match INSERT clause
INSERT INTO s1 VALUES(4, 1); -- No Match INSERT clause
INSERT INTO s1 VALUES(6, 1); -- No Match INSERT clause
INSERT INTO t1 VALUES(1, 0); -- Will be deleted
INSERT INTO t1 VALUES(2, 0); -- Will be updated
INSERT INTO t1 VALUES(5, 0); -- Will be intact
$$
LANGUAGE SQL;
SELECT 'Testing PG tables';
?column?
---------------------------------------------------------------------
Testing PG tables
(1 row)
SELECT load();
load
---------------------------------------------------------------------
(1 row)
WITH pg_res AS (
SELECT * FROM s1
)
MERGE INTO t1
USING pg_res ON (pg_res.id = t1.id)
WHEN MATCHED AND pg_res.val = 0 THEN
DELETE
WHEN MATCHED THEN
UPDATE SET val = t1.val + 1
WHEN NOT MATCHED THEN
INSERT (id, val) VALUES (pg_res.id, pg_res.val);
SELECT * FROM t1 order by id;
id | val
---------------------------------------------------------------------
2 | 1
3 | 1
4 | 1
5 | 0
6 | 1
(5 rows)
SELECT * INTO merge_result FROM t1 order by id;
-- Test Citus local tables
TRUNCATE t1;
TRUNCATE s1;
SELECT load();
load
---------------------------------------------------------------------
(1 row)
SELECT citus_add_local_table_to_metadata('t1');
citus_add_local_table_to_metadata
---------------------------------------------------------------------
(1 row)
SELECT citus_add_local_table_to_metadata('s1');
citus_add_local_table_to_metadata
---------------------------------------------------------------------
(1 row)
WITH s1_res AS (
SELECT * FROM s1
)
MERGE INTO t1
USING s1_res ON (s1_res.id = t1.id)
WHEN MATCHED AND s1_res.val = 0 THEN
DELETE
WHEN MATCHED THEN
UPDATE SET val = t1.val + 1
WHEN NOT MATCHED THEN
INSERT (id, val) VALUES (s1_res.id, s1_res.val);
-- Two rows with id 2 and val incremented, id 3, and id 1 is deleted
SELECT * FROM t1 order by id;
id | val
---------------------------------------------------------------------
2 | 1
3 | 1
4 | 1
5 | 0
6 | 1
(5 rows)
-- Should be empty
SELECT *
FROM merge_result p
WHERE NOT EXISTS (SELECT 1 FROM t1 c WHERE c.id = p.id AND c.val = p.val);
id | val
---------------------------------------------------------------------
(0 rows)
SELECT 'Testing dist - dist';
?column?
---------------------------------------------------------------------
Testing dist - dist
(1 row)
SELECT undistribute_table('t1');
NOTICE: creating a new table for merge_schema.t1
NOTICE: moving the data of merge_schema.t1
NOTICE: dropping the old merge_schema.t1
NOTICE: renaming the new table to merge_schema.t1
undistribute_table
---------------------------------------------------------------------
(1 row)
SELECT undistribute_table('s1');
NOTICE: creating a new table for merge_schema.s1
NOTICE: moving the data of merge_schema.s1
NOTICE: dropping the old merge_schema.s1
NOTICE: renaming the new table to merge_schema.s1
undistribute_table
---------------------------------------------------------------------
(1 row)
TRUNCATE t1;
TRUNCATE s1;
SELECT load();
load
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('t1', 'id');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.t1$$)
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('s1', 'id', colocate_with=>'t1');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.s1$$)
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT * FROM t1 order by id;
id | val
---------------------------------------------------------------------
1 | 0
2 | 0
5 | 0
(3 rows)
SET citus.log_remote_commands to true;
WITH s1_res AS (
SELECT * FROM s1
)
MERGE INTO t1
USING s1_res ON (s1_res.id = t1.id) AND t1.id = 6
WHEN MATCHED AND s1_res.val = 0 THEN
DELETE
WHEN MATCHED THEN
UPDATE SET val = t1.val + 1
WHEN NOT MATCHED THEN
INSERT (id, val) VALUES (s1_res.id, s1_res.val);
NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing WITH s1_res AS (SELECT s1.id, s1.val FROM merge_schema.s1_xxxxxxx s1) MERGE INTO merge_schema.t1_xxxxxxx t1 USING s1_res ON ((s1_res.id OPERATOR(pg_catalog.=) t1.id) AND (t1.id OPERATOR(pg_catalog.=) 6)) WHEN MATCHED AND (s1_res.val OPERATOR(pg_catalog.=) 0) THEN DELETE WHEN MATCHED THEN UPDATE SET val = (t1.val OPERATOR(pg_catalog.+) 1) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing WITH s1_res AS (SELECT s1.id, s1.val FROM merge_schema.s1_xxxxxxx s1) MERGE INTO merge_schema.t1_xxxxxxx t1 USING s1_res ON ((s1_res.id OPERATOR(pg_catalog.=) t1.id) AND (t1.id OPERATOR(pg_catalog.=) 6)) WHEN MATCHED AND (s1_res.val OPERATOR(pg_catalog.=) 0) THEN DELETE WHEN MATCHED THEN UPDATE SET val = (t1.val OPERATOR(pg_catalog.+) 1) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing WITH s1_res AS (SELECT s1.id, s1.val FROM merge_schema.s1_xxxxxxx s1) MERGE INTO merge_schema.t1_xxxxxxx t1 USING s1_res ON ((s1_res.id OPERATOR(pg_catalog.=) t1.id) AND (t1.id OPERATOR(pg_catalog.=) 6)) WHEN MATCHED AND (s1_res.val OPERATOR(pg_catalog.=) 0) THEN DELETE WHEN MATCHED THEN UPDATE SET val = (t1.val OPERATOR(pg_catalog.+) 1) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing WITH s1_res AS (SELECT s1.id, s1.val FROM merge_schema.s1_xxxxxxx s1) MERGE INTO merge_schema.t1_xxxxxxx t1 USING s1_res ON ((s1_res.id OPERATOR(pg_catalog.=) t1.id) AND (t1.id OPERATOR(pg_catalog.=) 6)) WHEN MATCHED AND (s1_res.val OPERATOR(pg_catalog.=) 0) THEN DELETE WHEN MATCHED THEN UPDATE SET val = (t1.val OPERATOR(pg_catalog.+) 1) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing PREPARE TRANSACTION 'citus_xx_xx_xx_xx'
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing PREPARE TRANSACTION 'citus_xx_xx_xx_xx'
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing COMMIT PREPARED 'citus_xx_xx_xx_xx'
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing COMMIT PREPARED 'citus_xx_xx_xx_xx'
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
SET citus.log_remote_commands to false;
-- Other than id 6 everything else is a NO match, and should appear in target
SELECT * FROM t1 order by 1, 2;
id | val
---------------------------------------------------------------------
1 | 0
1 | 0
2 | 0
2 | 1
3 | 1
4 | 1
5 | 0
6 | 1
(8 rows)
--
-- Test with multiple join conditions
--
CREATE TABLE t2(id int, val int, src text);
CREATE TABLE s2(id int, val int, src text);
CREATE OR REPLACE FUNCTION insert_data() RETURNS VOID AS $$
INSERT INTO t2 VALUES(1, 0, 'target');
INSERT INTO t2 VALUES(2, 0, 'target');
INSERT INTO t2 VALUES(3, 1, 'match');
INSERT INTO t2 VALUES(4, 0, 'match');
INSERT INTO s2 VALUES(2, 0, 'source'); -- No match insert
INSERT INTO s2 VALUES(4, 0, 'match'); -- Match delete
INSERT INTO s2 VALUES(3, 10, 'match'); -- Match update
$$
LANGUAGE SQL;
SELECT 'Testing PG tables';
?column?
---------------------------------------------------------------------
Testing PG tables
(1 row)
SELECT insert_data();
insert_data
---------------------------------------------------------------------
(1 row)
MERGE INTO t2
USING s2
ON t2.id = s2.id AND t2.src = s2.src
WHEN MATCHED AND t2.val = 1 THEN
UPDATE SET val = s2.val + 10
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT (id, val, src) VALUES (s2.id, s2.val, s2.src);
SELECT * FROM t2 ORDER BY 1;
id | val | src
---------------------------------------------------------------------
1 | 0 | target
2 | 0 | target
2 | 0 | source
3 | 20 | match
(4 rows)
SELECT * INTO pg_t2 FROM t2;
SELECT 'Testing Citus local tables';
?column?
---------------------------------------------------------------------
Testing Citus local tables
(1 row)
TRUNCATE t2;
TRUNCATE s2;
SELECT insert_data();
insert_data
---------------------------------------------------------------------
(1 row)
SELECT citus_add_local_table_to_metadata('t2');
citus_add_local_table_to_metadata
---------------------------------------------------------------------
(1 row)
SELECT citus_add_local_table_to_metadata('s2');
citus_add_local_table_to_metadata
---------------------------------------------------------------------
(1 row)
MERGE INTO t2
USING s2
ON t2.id = s2.id AND t2.src = s2.src
WHEN MATCHED AND t2.val = 1 THEN
UPDATE SET val = s2.val + 10
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT (id, val, src) VALUES (s2.id, s2.val, s2.src);
SELECT * FROM t2 ORDER BY 1;
id | val | src
---------------------------------------------------------------------
1 | 0 | target
2 | 0 | target
2 | 0 | source
3 | 20 | match
(4 rows)
-- Should be empty
SELECT *
FROM pg_t2 p
WHERE NOT EXISTS (SELECT 1 FROM t2 c WHERE c.id = p.id AND c.val = p.val AND c.src = p.src);
id | val | src
---------------------------------------------------------------------
(0 rows)
SELECT 'Testing Dist - Dist';
?column?
---------------------------------------------------------------------
Testing Dist - Dist
(1 row)
-- Clean the slate
TRUNCATE t2;
TRUNCATE s2;
SELECT insert_data();
insert_data
---------------------------------------------------------------------
(1 row)
SELECT undistribute_table('t2');
NOTICE: creating a new table for merge_schema.t2
NOTICE: moving the data of merge_schema.t2
NOTICE: dropping the old merge_schema.t2
NOTICE: renaming the new table to merge_schema.t2
undistribute_table
---------------------------------------------------------------------
(1 row)
SELECT undistribute_table('s2');
NOTICE: creating a new table for merge_schema.s2
NOTICE: moving the data of merge_schema.s2
NOTICE: dropping the old merge_schema.s2
NOTICE: renaming the new table to merge_schema.s2
undistribute_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('t2', 'id');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.t2$$)
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('s2', 'id', colocate_with => 't2');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.s2$$)
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT * FROM t2 ORDER BY 1;
id | val | src
---------------------------------------------------------------------
1 | 0 | target
2 | 0 | target
3 | 1 | match
4 | 0 | match
(4 rows)
SET citus.log_remote_commands to true;
MERGE INTO t2
USING s2
ON t2.id = s2.id AND t2.src = s2.src AND t2.id = 4
WHEN MATCHED AND t2.val = 1 THEN
UPDATE SET val = s2.val + 10
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
DO NOTHING;
NOTICE: issuing MERGE INTO merge_schema.t2_xxxxxxx t2 USING merge_schema.s2_xxxxxxx s2 ON ((t2.id OPERATOR(pg_catalog.=) s2.id) AND (t2.src OPERATOR(pg_catalog.=) s2.src) AND (t2.id OPERATOR(pg_catalog.=) 4)) WHEN MATCHED AND (t2.val OPERATOR(pg_catalog.=) 1) THEN UPDATE SET val = (s2.val OPERATOR(pg_catalog.+) 10) WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN DO NOTHING
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
SET citus.log_remote_commands to false;
-- Row with id = 4 is a match for delete clause, row should be deleted
-- Row with id = 3 is a NO match, row from source will be inserted
SELECT * FROM t2 ORDER BY 1;
id | val | src
---------------------------------------------------------------------
1 | 0 | target
2 | 0 | target
3 | 1 | match
(3 rows)
--
-- With sub-query as the MERGE source
--
TRUNCATE t2;
TRUNCATE s2;
SELECT undistribute_table('t2');
NOTICE: creating a new table for merge_schema.t2
NOTICE: moving the data of merge_schema.t2
NOTICE: dropping the old merge_schema.t2
NOTICE: renaming the new table to merge_schema.t2
undistribute_table
---------------------------------------------------------------------
(1 row)
SELECT undistribute_table('s2');
NOTICE: creating a new table for merge_schema.s2
NOTICE: moving the data of merge_schema.s2
NOTICE: dropping the old merge_schema.s2
NOTICE: renaming the new table to merge_schema.s2
undistribute_table
---------------------------------------------------------------------
(1 row)
SELECT citus_add_local_table_to_metadata('t2');
citus_add_local_table_to_metadata
---------------------------------------------------------------------
(1 row)
SELECT citus_add_local_table_to_metadata('s2');
citus_add_local_table_to_metadata
---------------------------------------------------------------------
(1 row)
SELECT insert_data();
insert_data
---------------------------------------------------------------------
(1 row)
MERGE INTO t2 t
USING (SELECT * FROM s2) s
ON t.id = s.id AND t.src = s.src
WHEN MATCHED AND t.val = 1 THEN
UPDATE SET val = s.val + 10
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT (id, val, src) VALUES (s.id, s.val, s.src);
SELECT * FROM t2 ORDER BY 1;
id | val | src
---------------------------------------------------------------------
1 | 0 | target
2 | 0 | target
2 | 0 | source
3 | 20 | match
(4 rows)
SELECT * INTO dist_res FROM t2 ORDER BY 1;
-- Should be equal
SELECT c.*, p.*
FROM t2 c, pg_t2 p
WHERE c.id = p.id AND c.src = p.src
ORDER BY 1,2;
id | val | src | id | val | src
---------------------------------------------------------------------
1 | 0 | target | 1 | 0 | target
2 | 0 | source | 2 | 0 | source
2 | 0 | target | 2 | 0 | target
3 | 20 | match | 3 | 20 | match
(4 rows)
-- Should be empty
SELECT *
FROM pg_t2 p
WHERE NOT EXISTS (SELECT 1 FROM t2 c WHERE c.id = p.id AND c.val = p.val AND c.src = p.src);
id | val | src
---------------------------------------------------------------------
(0 rows)
--
-- Using two source tables
--
CREATE TABLE t3(id int, val int, src text);
CREATE TABLE s3_1(id int, val int, src text);
CREATE TABLE s3_2(id int, val int, src text);
CREATE OR REPLACE FUNCTION insert_data() RETURNS VOID AS $$
INSERT INTO t3 VALUES(1, 0, 'target'); -- Intact
INSERT INTO t3 VALUES(2, 0, 'target');
INSERT INTO t3 VALUES(3, 0, 'target');
INSERT INTO t3 VALUES(5, 0, 'target'); -- Intact
INSERT INTO s3_1 VALUES(2, 0, 'source1');
INSERT INTO s3_1 VALUES(3, 0, 'source1');
INSERT INTO s3_1 VALUES(4, 0, 'source1');
INSERT INTO s3_2 VALUES(2, 1, 'source2'); -- Match update
INSERT INTO s3_2 VALUES(3, 0, 'source2'); -- Match delete
INSERT INTO s3_2 VALUES(4, 0, 'source2'); -- No match insert
INSERT INTO s3_2 VALUES(6, 0, 'source2'); -- Will miss the source-subquery-join
$$
LANGUAGE SQL;
SELECT insert_data();
insert_data
---------------------------------------------------------------------
(1 row)
MERGE INTO t3
USING (SELECT s3_1.id, s3_2.val, s3_2.src FROM s3_1, s3_2 WHERE s3_1.id = s3_2.id) sub
ON (t3.id = sub.id)
WHEN MATCHED AND sub.val = 1 THEN
UPDATE SET val = t3.val + 10
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT (id, val, src) VALUES (sub.id, sub.val, sub.src);
-- Joining on columns inside the sub-query
MERGE INTO t3
USING (SELECT s3_1.id, s3_2.val, s3_2.src FROM s3_1, s3_2 WHERE s3_1.id = s3_2.id) sub
ON (t3.id = sub.id)
WHEN MATCHED AND sub.val = 1 THEN
UPDATE SET val = t3.val + 1
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT (id, val, src) VALUES (sub.id, sub.val, sub.src);
-- Constant Join condition
WITH s3_res AS (
SELECT * FROM s3_1
)
MERGE INTO t3
USING s3_res ON (FALSE)
WHEN MATCHED AND s3_res.val = 0 THEN
DELETE
WHEN MATCHED THEN
UPDATE SET val = t3.val + 1
WHEN NOT MATCHED THEN
INSERT (id, val, src) VALUES (s3_res.id, s3_res.val, s3_res.src);
SELECT * FROM t3 ORDER BY 1,3;
id | val | src
---------------------------------------------------------------------
1 | 0 | target
2 | 0 | source1
2 | 11 | target
3 | 0 | source1
3 | 0 | source2
4 | 0 | source1
5 | 0 | target
(7 rows)
SELECT * INTO pg_t3 FROM t3 ORDER BY 1;
SELECT 'Testing Local - Local';
?column?
---------------------------------------------------------------------
Testing Local - Local
(1 row)
TRUNCATE t3;
TRUNCATE s3_1;
TRUNCATE s3_2;
SELECT citus_add_local_table_to_metadata('t3');
citus_add_local_table_to_metadata
---------------------------------------------------------------------
(1 row)
SELECT citus_add_local_table_to_metadata('s3_1');
citus_add_local_table_to_metadata
---------------------------------------------------------------------
(1 row)
SELECT citus_add_local_table_to_metadata('s3_2');
citus_add_local_table_to_metadata
---------------------------------------------------------------------
(1 row)
SELECT insert_data();
insert_data
---------------------------------------------------------------------
(1 row)
MERGE INTO t3
USING (SELECT s3_1.id, s3_2.val, s3_2.src FROM s3_1, s3_2 WHERE s3_1.id = s3_2.id) sub
ON (t3.id = sub.id)
WHEN MATCHED AND sub.val = 1 THEN
UPDATE SET val = t3.val + 10
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT (id, val, src) VALUES (sub.id, sub.val, sub.src);
-- Joining on columns inside the sub-query
MERGE INTO t3
USING (SELECT s3_1.id, s3_2.val, s3_2.src FROM s3_1, s3_2 WHERE s3_1.id = s3_2.id) sub
ON (t3.id = sub.id)
WHEN MATCHED AND sub.val = 1 THEN
UPDATE SET val = t3.val + 1
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT (id, val, src) VALUES (sub.id, sub.val, sub.src);
-- Constant Join condition
WITH s3_res AS (
SELECT * FROM s3_1
)
MERGE INTO t3
USING s3_res ON (FALSE)
WHEN MATCHED AND s3_res.val = 0 THEN
DELETE
WHEN MATCHED THEN
UPDATE SET val = t3.val + 1
WHEN NOT MATCHED THEN
INSERT (id, val, src) VALUES (s3_res.id, s3_res.val, s3_res.src);
SELECT * FROM t3 ORDER BY 1,3;
id | val | src
---------------------------------------------------------------------
1 | 0 | target
2 | 0 | source1
2 | 11 | target
3 | 0 | source1
3 | 0 | source2
4 | 0 | source1
5 | 0 | target
(7 rows)
SELECT * INTO local_t3 FROM t3 ORDER BY 1;
-- Should be equal
SELECT c.*, p.*
FROM t3 c, pg_t3 p
WHERE c.id = p.id
ORDER BY 1,2;
id | val | src | id | val | src
---------------------------------------------------------------------
1 | 0 | target | 1 | 0 | target
2 | 0 | source1 | 2 | 0 | source1
2 | 0 | source1 | 2 | 11 | target
2 | 11 | target | 2 | 0 | source1
2 | 11 | target | 2 | 11 | target
3 | 0 | source2 | 3 | 0 | source2
3 | 0 | source2 | 3 | 0 | source1
3 | 0 | source1 | 3 | 0 | source2
3 | 0 | source1 | 3 | 0 | source1
4 | 0 | source1 | 4 | 0 | source1
5 | 0 | target | 5 | 0 | target
(11 rows)
-- Should be empty
SELECT *
FROM pg_t3 p
WHERE NOT EXISTS (SELECT 1 FROM local_t3 c WHERE c.id = p.id AND c.val = p.val AND c.src = p.src);
id | val | src
---------------------------------------------------------------------
(0 rows)
--
-- Test table functions
--
CREATE TABLE tf_target(first_name varchar, last_name varchar, eid text);
WITH doc as (
SELECT '<people>
<person>
<first_name>foo</first_name>
<last_name>bar</last_name>
<eid>100</eid>
</person>
<person>
<first_name>moo</first_name>
<last_name>bar</last_name>
<eid>200</eid>
</person>
</people>'::xml as source_xml
)
MERGE INTO tf_target
USING (
SELECT decoded.*
FROM doc, xmltable(
'//people/person'
passing source_xml
columns
first_name text,
last_name text,
eid text) as decoded) as tf_source
ON tf_source.first_name = tf_target.first_name
WHEN NOT MATCHED THEN
INSERT VALUES (tf_source.first_name, tf_source.last_name, tf_source.eid);
-- Our gold result to compare against
SELECT * INTO tf_result FROM tf_target ORDER BY 1 ;
TRUNCATE tf_target;
SELECT citus_add_local_table_to_metadata('tf_target');
citus_add_local_table_to_metadata
---------------------------------------------------------------------
(1 row)
WITH doc as (
SELECT '<people>
<person>
<first_name>foo</first_name>
<last_name>bar</last_name>
<eid>100</eid>
</person>
<person>
<first_name>moo</first_name>
<last_name>bar</last_name>
<eid>200</eid>
</person>
</people>'::xml as source_xml
)
MERGE INTO tf_target
USING (
SELECT decoded.*
FROM doc, xmltable(
'//people/person'
passing source_xml
columns
first_name text,
last_name text,
eid text) as decoded) as tf_source
ON tf_source.first_name = tf_target.first_name
WHEN NOT MATCHED THEN
INSERT VALUES (tf_source.first_name, tf_source.last_name, tf_source.eid);
SELECT * INTO tf_local FROM tf_target ORDER BY 1 ;
-- Should be equal
SELECT c.*, p.*
FROM tf_local c, tf_result p
WHERE c.eid = p.eid
ORDER BY 1,2;
first_name | last_name | eid | first_name | last_name | eid
---------------------------------------------------------------------
foo | bar | 100 | foo | bar | 100
moo | bar | 200 | moo | bar | 200
(2 rows)
-- Must return zero rows
SELECT *
FROM tf_result p
WHERE NOT EXISTS (SELECT FROM tf_local c WHERE c.eid = p.eid);
first_name | last_name | eid
---------------------------------------------------------------------
(0 rows)
--
-- Test VALUES RTE type
--
CREATE TABLE vl_target(id int, value varchar);
INSERT INTO vl_target VALUES(100, 'target');
MERGE INTO vl_target
USING (SELECT *
FROM (VALUES(100, 'source1'), (200, 'source2')) AS vl (ID, value)) as vl_source
ON vl_source.ID = vl_target.ID
WHEN MATCHED THEN
UPDATE SET value = vl_source.value, id = vl_target.id + 1
WHEN NOT MATCHED THEN
INSERT VALUES(vl_source.ID, vl_source.value);
-- Our gold result to compare against
SELECT * INTO vl_result FROM vl_target ORDER BY 1 ;
-- Clean the slate
TRUNCATE vl_target;
INSERT INTO vl_target VALUES(100, 'target');
SELECT citus_add_local_table_to_metadata('vl_target');
citus_add_local_table_to_metadata
---------------------------------------------------------------------
(1 row)
SET client_min_messages TO DEBUG1;
MERGE INTO vl_target
USING (SELECT *
FROM (VALUES(100, 'source1'), (200, 'source2')) AS vl (ID, value)) as vl_source
ON vl_source.ID = vl_target.ID
WHEN MATCHED THEN
UPDATE SET value = vl_source.value, id = vl_target.id + 1
WHEN NOT MATCHED THEN
INSERT VALUES(vl_source.ID, vl_source.value);
DEBUG: Creating MERGE router plan
DEBUG: <Deparsed MERGE query: MERGE INTO merge_schema.vl_target_xxxxxxx vl_target USING (SELECT vl.id, vl.value FROM (VALUES (100,'source1'::text), (200,'source2'::text)) vl(id, value)) vl_source ON (vl_source.id OPERATOR(pg_catalog.=) vl_target.id) WHEN MATCHED THEN UPDATE SET id = (vl_target.id OPERATOR(pg_catalog.+) 1), value = vl_source.value WHEN NOT MATCHED THEN INSERT (id, value) VALUES (vl_source.id, vl_source.value)>
RESET client_min_messages;
SELECT * INTO vl_local FROM vl_target ORDER BY 1 ;
-- Should be equal
SELECT c.*, p.*
FROM vl_local c, vl_result p
WHERE c.id = p.id
ORDER BY 1,2;
id | value | id | value
---------------------------------------------------------------------
101 | source1 | 101 | source1
200 | source2 | 200 | source2
(2 rows)
-- Must return zero rows
SELECT *
FROM vl_result p
WHERE NOT EXISTS (SELECT FROM vl_local c WHERE c.id = p.id);
id | value
---------------------------------------------------------------------
(0 rows)
--
-- Test function scan
--
CREATE FUNCTION f_immutable(i integer) RETURNS INTEGER AS
$$ BEGIN RETURN i; END; $$ LANGUAGE PLPGSQL IMMUTABLE;
CREATE TABLE rs_target(id int);
MERGE INTO rs_target
USING (SELECT * FROM f_immutable(99) id WHERE id in (SELECT 99)) AS rs_source
ON rs_source.id = rs_target.id
WHEN MATCHED THEN
DO NOTHING
WHEN NOT MATCHED THEN
INSERT VALUES(rs_source.id);
-- Our gold result to compare against
SELECT * INTO rs_result FROM rs_target ORDER BY 1 ;
-- Clean the slate
TRUNCATE rs_target;
SELECT citus_add_local_table_to_metadata('rs_target');
citus_add_local_table_to_metadata
---------------------------------------------------------------------
(1 row)
SET client_min_messages TO DEBUG1;
MERGE INTO rs_target
USING (SELECT * FROM f_immutable(99) id WHERE id in (SELECT 99)) AS rs_source
ON rs_source.id = rs_target.id
WHEN MATCHED THEN
DO NOTHING
WHEN NOT MATCHED THEN
INSERT VALUES(rs_source.id);
DEBUG: Creating MERGE router plan
DEBUG: <Deparsed MERGE query: MERGE INTO merge_schema.rs_target_xxxxxxx rs_target USING (SELECT id.id FROM merge_schema.f_immutable(99) id(id) WHERE (id.id OPERATOR(pg_catalog.=) ANY (SELECT 99))) rs_source ON (rs_source.id OPERATOR(pg_catalog.=) rs_target.id) WHEN MATCHED THEN DO NOTHING WHEN NOT MATCHED THEN INSERT (id) VALUES (rs_source.id)>
RESET client_min_messages;
SELECT * INTO rs_local FROM rs_target ORDER BY 1 ;
-- Should be equal
SELECT c.*, p.*
FROM rs_local c, rs_result p
WHERE c.id = p.id
ORDER BY 1,2;
id | id
---------------------------------------------------------------------
99 | 99
(1 row)
-- Must return zero rows
SELECT *
FROM rs_result p
WHERE NOT EXISTS (SELECT FROM rs_local c WHERE c.id = p.id);
id
---------------------------------------------------------------------
(0 rows)
--
-- Test Materialized view
--
CREATE TABLE mv_target(id int, val varchar);
CREATE TABLE mv_source_table(id int, val varchar);
INSERT INTO mv_source_table VALUES(1, 'src1');
INSERT INTO mv_source_table VALUES(2, 'src2');
CREATE MATERIALIZED VIEW mv_source AS
SELECT * FROM mv_source_table;
MERGE INTO mv_target
USING mv_source
ON mv_source.id = mv_target.id
WHEN MATCHED THEN
DO NOTHING
WHEN NOT MATCHED THEN
INSERT VALUES(mv_source.id, mv_source.val);
-- Our gold result to compare against
SELECT * INTO mv_result FROM mv_target ORDER BY 1 ;
-- Clean the slate
TRUNCATE mv_target;
SELECT citus_add_local_table_to_metadata('mv_target');
citus_add_local_table_to_metadata
---------------------------------------------------------------------
(1 row)
SELECT citus_add_local_table_to_metadata('mv_source_table');
citus_add_local_table_to_metadata
---------------------------------------------------------------------
(1 row)
DROP MATERIALIZED VIEW mv_source;
CREATE MATERIALIZED VIEW mv_source AS
SELECT * FROM mv_source_table;
MERGE INTO mv_target
USING mv_source
ON mv_source.id = mv_target.id
WHEN MATCHED THEN
DO NOTHING
WHEN NOT MATCHED THEN
INSERT VALUES(mv_source.id, mv_source.val);
SELECT * INTO mv_local FROM mv_target ORDER BY 1 ;
-- Should be equal
SELECT c.*, p.*
FROM mv_local c, mv_result p
WHERE c.id = p.id
ORDER BY 1,2;
id | val | id | val
---------------------------------------------------------------------
1 | src1 | 1 | src1
2 | src2 | 2 | src2
(2 rows)
-- Must return zero rows
SELECT *
FROM mv_result p
WHERE NOT EXISTS (SELECT FROM mv_local c WHERE c.id = p.id);
id | val
---------------------------------------------------------------------
(0 rows)
--
-- Distributed table as source (indirect)
--
CREATE TABLE dist_table(id int, source varchar);
INSERT INTO dist_table VALUES(2, 'dist_table');
INSERT INTO dist_table VALUES(3, 'dist_table');
INSERT INTO dist_table VALUES(100, 'dist_table');
CREATE FUNCTION f_dist() returns SETOF RECORD AS
$$
BEGIN
RETURN QUERY SELECT id, source FROM dist_table;
END;
$$ language plpgsql volatile;
CREATE TABLE fn_target(id int, data varchar);
MERGE INTO fn_target
--USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source
USING (SELECT id, source FROM dist_table) as fn_source
ON fn_source.id = fn_target.id
WHEN MATCHED THEN
DO NOTHING
WHEN NOT MATCHED THEN
INSERT VALUES(fn_source.id, fn_source.source);
-- Our gold result to compare against
SELECT * INTO fn_result FROM fn_target ORDER BY 1 ;
-- Clean the slate
TRUNCATE TABLE fn_target;
SELECT citus_add_local_table_to_metadata('fn_target');
citus_add_local_table_to_metadata
---------------------------------------------------------------------
(1 row)
SELECT citus_add_local_table_to_metadata('dist_table');
citus_add_local_table_to_metadata
---------------------------------------------------------------------
(1 row)
SET client_min_messages TO DEBUG1;
MERGE INTO fn_target
--USING (SELECT * FROM f_dist() f(id integer, source varchar)) as fn_source
USING (SELECT id, source FROM dist_table) as fn_source
ON fn_source.id = fn_target.id
WHEN MATCHED THEN
DO NOTHING
WHEN NOT MATCHED THEN
INSERT VALUES(fn_source.id, fn_source.source);
DEBUG: Creating MERGE router plan
DEBUG: <Deparsed MERGE query: MERGE INTO merge_schema.fn_target_xxxxxxx fn_target USING (SELECT dist_table.id, dist_table.source FROM merge_schema.dist_table_xxxxxxx dist_table) fn_source ON (fn_source.id OPERATOR(pg_catalog.=) fn_target.id) WHEN MATCHED THEN DO NOTHING WHEN NOT MATCHED THEN INSERT (id, data) VALUES (fn_source.id, fn_source.source)>
RESET client_min_messages;
SELECT * INTO fn_local FROM fn_target ORDER BY 1 ;
-- Should be equal
SELECT c.*, p.*
FROM fn_local c, fn_result p
WHERE c.id = p.id
ORDER BY 1,2;
id | data | id | data
---------------------------------------------------------------------
2 | dist_table | 2 | dist_table
3 | dist_table | 3 | dist_table
100 | dist_table | 100 | dist_table
(3 rows)
-- Must return zero rows
SELECT *
FROM fn_result p
WHERE NOT EXISTS (SELECT FROM fn_local c WHERE c.id = p.id);
id | data
---------------------------------------------------------------------
(0 rows)
--
-- Foreign tables
--
CREATE TABLE ft_target (id integer NOT NULL, user_val varchar);
CREATE TABLE ft_source (id integer NOT NULL, user_val varchar);
SELECT citus_add_local_table_to_metadata('ft_source');
citus_add_local_table_to_metadata
---------------------------------------------------------------------
(1 row)
INSERT INTO ft_target VALUES (1, 'target');
INSERT INTO ft_target VALUES (2, 'target');
INSERT INTO ft_source VALUES (2, 'source');
INSERT INTO ft_source VALUES (3, 'source');
SELECT * FROM ft_target;
id | user_val
---------------------------------------------------------------------
1 | target
2 | target
(2 rows)
CREATE EXTENSION postgres_fdw;
CREATE SERVER foreign_server
FOREIGN DATA WRAPPER postgres_fdw
OPTIONS (host 'localhost', port :'master_port', dbname 'regression');
CREATE USER MAPPING FOR CURRENT_USER
SERVER foreign_server
OPTIONS (user 'postgres');
CREATE FOREIGN TABLE foreign_table (
id integer NOT NULL,
user_val text
)
SERVER foreign_server
OPTIONS (schema_name 'merge_schema', table_name 'ft_source');
SELECT citus_add_local_table_to_metadata('foreign_table');
citus_add_local_table_to_metadata
---------------------------------------------------------------------
(1 row)
-- Foreign table as source
SET client_min_messages TO DEBUG1;
MERGE INTO ft_target
USING foreign_table ON (foreign_table.id = ft_target.id)
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT (id, user_val) VALUES (foreign_table.id, foreign_table.user_val);
DEBUG: Creating MERGE router plan
DEBUG: <Deparsed MERGE query: MERGE INTO merge_schema.ft_target USING merge_schema.foreign_table_xxxxxxx foreign_table ON (foreign_table.id OPERATOR(pg_catalog.=) ft_target.id) WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, user_val) VALUES (foreign_table.id, foreign_table.user_val)>
RESET client_min_messages;
SELECT * FROM ft_target;
id | user_val
---------------------------------------------------------------------
1 | target
3 | source
(2 rows)
--
-- complex joins on the source side
--
-- source(join of two relations) relation is an unaliased join
CREATE TABLE target_cj(tid int, src text, val int);
CREATE TABLE source_cj1(sid1 int, src1 text, val1 int);
CREATE TABLE source_cj2(sid2 int, src2 text, val2 int);
INSERT INTO target_cj VALUES (1, 'target', 0);
INSERT INTO target_cj VALUES (2, 'target', 0);
INSERT INTO target_cj VALUES (2, 'target', 0);
INSERT INTO target_cj VALUES (3, 'target', 0);
INSERT INTO source_cj1 VALUES (2, 'source-1', 10);
INSERT INTO source_cj2 VALUES (2, 'source-2', 20);
BEGIN;
MERGE INTO target_cj t
USING source_cj1 s1 INNER JOIN source_cj2 s2 ON sid1 = sid2
ON t.tid = sid1 AND t.tid = 2
WHEN MATCHED THEN
UPDATE SET src = src2
WHEN NOT MATCHED THEN
DO NOTHING;
-- Gold result to compare against
SELECT * FROM target_cj ORDER BY 1;
tid | src | val
---------------------------------------------------------------------
1 | target | 0
2 | source-2 | 0
2 | source-2 | 0
3 | target | 0
(4 rows)
ROLLBACK;
BEGIN;
-- try accessing columns from either side of the source join
MERGE INTO target_cj t
USING source_cj1 s2
INNER JOIN source_cj2 s1 ON sid1 = sid2 AND val1 = 10
ON t.tid = sid1 AND t.tid = 2
WHEN MATCHED THEN
UPDATE SET tid = sid2, src = src1, val = val2
WHEN NOT MATCHED THEN
DO NOTHING;
-- Gold result to compare against
SELECT * FROM target_cj ORDER BY 1;
tid | src | val
---------------------------------------------------------------------
1 | target | 0
2 | source-1 | 20
2 | source-1 | 20
3 | target | 0
(4 rows)
ROLLBACK;
-- Test the same scenarios with distributed tables
SELECT create_distributed_table('target_cj', 'tid');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.target_cj$$)
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('source_cj1', 'sid1', colocate_with => 'target_cj');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.source_cj1$$)
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('source_cj2', 'sid2', colocate_with => 'target_cj');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.source_cj2$$)
create_distributed_table
---------------------------------------------------------------------
(1 row)
BEGIN;
MERGE INTO target_cj t
USING (SELECT * FROM source_cj1 s1 INNER JOIN source_cj2 s2 ON sid1 = sid2) s
ON t.tid = sid1 AND t.tid = 2
WHEN MATCHED THEN
UPDATE SET src = src2
WHEN NOT MATCHED THEN
DO NOTHING;
SELECT * FROM target_cj ORDER BY 1;
tid | src | val
---------------------------------------------------------------------
1 | target | 0
2 | source-2 | 0
2 | source-2 | 0
3 | target | 0
(4 rows)
ROLLBACK;
BEGIN;
-- try accessing columns from either side of the source join
MERGE INTO target_cj t
USING (SELECT * FROM source_cj1 s2
INNER JOIN source_cj2 s1 ON sid1 = sid2 AND val1 = 10) s
ON t.tid = sid1 AND t.tid = 2
WHEN MATCHED THEN
UPDATE SET src = src1, val = val2
WHEN NOT MATCHED THEN
DO NOTHING;
SELECT * FROM target_cj ORDER BY 1;
tid | src | val
---------------------------------------------------------------------
1 | target | 0
2 | source-1 | 20
2 | source-1 | 20
3 | target | 0
(4 rows)
ROLLBACK;
-- sub-query as a source
BEGIN;
MERGE INTO target_cj t
USING (SELECT * FROM source_cj1 WHERE sid1 = 2) sub
ON t.tid = sub.sid1 AND t.tid = 2
WHEN MATCHED THEN
UPDATE SET src = sub.src1, val = val1
WHEN NOT MATCHED THEN
DO NOTHING;
SELECT * FROM target_cj ORDER BY 1;
tid | src | val
---------------------------------------------------------------------
1 | target | 0
2 | source-1 | 10
2 | source-1 | 10
3 | target | 0
(4 rows)
ROLLBACK;
-- Test self-join
BEGIN;
SELECT * FROM target_cj ORDER BY 1;
tid | src | val
---------------------------------------------------------------------
1 | target | 0
2 | target | 0
2 | target | 0
3 | target | 0
(4 rows)
set citus.log_remote_commands to true;
MERGE INTO target_cj t1
USING (SELECT * FROM target_cj) sub
ON t1.tid = sub.tid AND t1.tid = 3
WHEN MATCHED THEN
UPDATE SET src = sub.src, val = sub.val + 100
WHEN NOT MATCHED THEN
DO NOTHING;
NOTICE: issuing MERGE INTO merge_schema.target_cj_xxxxxxx t1 USING (SELECT target_cj.tid, target_cj.src, target_cj.val FROM merge_schema.target_cj_xxxxxxx target_cj) sub ON ((t1.tid OPERATOR(pg_catalog.=) sub.tid) AND (t1.tid OPERATOR(pg_catalog.=) 3)) WHEN MATCHED THEN UPDATE SET src = sub.src, val = (sub.val OPERATOR(pg_catalog.+) 100) WHEN NOT MATCHED THEN DO NOTHING
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
set citus.log_remote_commands to false;
SELECT * FROM target_cj ORDER BY 1;
tid | src | val
---------------------------------------------------------------------
1 | target | 0
2 | target | 0
2 | target | 0
3 | target | 100
(4 rows)
ROLLBACK;
-- Test PREPARE
PREPARE merge_prepare(int) AS
MERGE INTO target_cj target
USING (SELECT * FROM source_cj1) sub
ON target.tid = sub.sid1 AND target.tid = $1
WHEN MATCHED THEN
UPDATE SET val = sub.val1
WHEN NOT MATCHED THEN
DO NOTHING;
SELECT * FROM target_cj ORDER BY 1;
tid | src | val
---------------------------------------------------------------------
1 | target | 0
2 | target | 0
2 | target | 0
3 | target | 0
(4 rows)
BEGIN;
EXECUTE merge_prepare(2);
EXECUTE merge_prepare(2);
EXECUTE merge_prepare(2);
EXECUTE merge_prepare(2);
EXECUTE merge_prepare(2);
SELECT * FROM target_cj ORDER BY 1;
tid | src | val
---------------------------------------------------------------------
1 | target | 0
2 | target | 10
2 | target | 10
3 | target | 0
(4 rows)
ROLLBACK;
BEGIN;
SET citus.log_remote_commands to true;
SET client_min_messages TO DEBUG1;
EXECUTE merge_prepare(2);
DEBUG: <Deparsed MERGE query: MERGE INTO merge_schema.target_cj_xxxxxxx target USING (SELECT source_cj1.sid1, source_cj1.src1, source_cj1.val1 FROM merge_schema.source_cj1_xxxxxxx source_cj1) sub ON ((target.tid OPERATOR(pg_catalog.=) sub.sid1) AND (target.tid OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = sub.val1 WHEN NOT MATCHED THEN DO NOTHING>
DEBUG: Creating MERGE router plan
NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.target_cj_xxxxxxx target USING (SELECT source_cj1.sid1, source_cj1.src1, source_cj1.val1 FROM merge_schema.source_cj1_xxxxxxx source_cj1) sub ON ((target.tid OPERATOR(pg_catalog.=) sub.sid1) AND (target.tid OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = sub.val1 WHEN NOT MATCHED THEN DO NOTHING
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
RESET client_min_messages;
EXECUTE merge_prepare(2);
NOTICE: issuing MERGE INTO merge_schema.target_cj_xxxxxxx target USING (SELECT source_cj1.sid1, source_cj1.src1, source_cj1.val1 FROM merge_schema.source_cj1_xxxxxxx source_cj1) sub ON ((target.tid OPERATOR(pg_catalog.=) sub.sid1) AND (target.tid OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = sub.val1 WHEN NOT MATCHED THEN DO NOTHING
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
SET citus.log_remote_commands to false;
SELECT * FROM target_cj ORDER BY 1;
tid | src | val
---------------------------------------------------------------------
1 | target | 0
2 | target | 10
2 | target | 10
3 | target | 0
(4 rows)
ROLLBACK;
-- Test distributed tables, must be co-located and joined on distribution column.
--
-- We create two sets of source and target tables, one set is Postgres and the other
-- is Citus distributed. Run the _exact_ MERGE SQL on both the sets and compare the
-- final results of target tables of Postgres and Citus, the result should match.
-- This is repeated for various MERGE SQL combinations
--
CREATE TABLE pg_target(id int, val varchar);
CREATE TABLE pg_source(id int, val varchar);
CREATE TABLE citus_target(id int, val varchar);
CREATE TABLE citus_source(id int, val varchar);
-- Half of the source rows do not match
INSERT INTO pg_target SELECT i, 'target' FROM generate_series(250, 500) i;
INSERT INTO pg_source SELECT i, 'source' FROM generate_series(1, 500) i;
INSERT INTO citus_target SELECT i, 'target' FROM generate_series(250, 500) i;
INSERT INTO citus_source SELECT i, 'source' FROM generate_series(1, 500) i;
SELECT create_distributed_table('citus_target', 'id');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.citus_target$$)
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('citus_source', 'id', colocate_with => 'citus_target');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.citus_source$$)
create_distributed_table
---------------------------------------------------------------------
(1 row)
--
-- This routine compares the target tables of Postgres and Citus and
-- returns true if they match, false if the results do not match.
--
CREATE OR REPLACE FUNCTION compare_tables() RETURNS BOOLEAN AS $$
DECLARE ret BOOL;
BEGIN
SELECT count(1) = 0 INTO ret
FROM pg_target
FULL OUTER JOIN citus_target
USING (id, val)
WHERE pg_target.id IS NULL
OR citus_target.id IS NULL;
RETURN ret;
END
$$ LANGUAGE PLPGSQL;
-- Make sure we start with exact data in Postgres and Citus
SELECT compare_tables();
compare_tables
---------------------------------------------------------------------
t
(1 row)
-- Run the MERGE on both Postgres and Citus, and compare the final target tables
BEGIN;
SET citus.log_remote_commands to true;
MERGE INTO pg_target t
USING pg_source s
ON t.id = s.id
WHEN MATCHED AND t.id > 400 THEN
UPDATE SET val = t.val || 'Updated by Merge'
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(s.id, s.val);
MERGE INTO citus_target t
USING citus_source s
ON t.id = s.id
WHEN MATCHED AND t.id > 400 THEN
UPDATE SET val = t.val || 'Updated by Merge'
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(s.id, s.val);
NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = ((t.val)::text OPERATOR(pg_catalog.||) 'Updated by Merge'::text) WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = ((t.val)::text OPERATOR(pg_catalog.||) 'Updated by Merge'::text) WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = ((t.val)::text OPERATOR(pg_catalog.||) 'Updated by Merge'::text) WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = ((t.val)::text OPERATOR(pg_catalog.||) 'Updated by Merge'::text) WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
SET citus.log_remote_commands to false;
SELECT compare_tables();
compare_tables
---------------------------------------------------------------------
t
(1 row)
ROLLBACK;
--
-- ON clause filter on source
--
BEGIN;
SET citus.log_remote_commands to true;
MERGE INTO pg_target t
USING pg_source s
ON t.id = s.id AND s.id < 100
WHEN MATCHED AND t.id > 400 THEN
UPDATE SET val = t.val || 'Updated by Merge'
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(s.id, s.val);
MERGE INTO citus_target t
USING citus_source s
ON t.id = s.id AND s.id < 100
WHEN MATCHED AND t.id > 400 THEN
UPDATE SET val = t.val || 'Updated by Merge'
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(s.id, s.val);
NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = ((t.val)::text OPERATOR(pg_catalog.||) 'Updated by Merge'::text) WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = ((t.val)::text OPERATOR(pg_catalog.||) 'Updated by Merge'::text) WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = ((t.val)::text OPERATOR(pg_catalog.||) 'Updated by Merge'::text) WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = ((t.val)::text OPERATOR(pg_catalog.||) 'Updated by Merge'::text) WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
SET citus.log_remote_commands to false;
SELECT compare_tables();
compare_tables
---------------------------------------------------------------------
t
(1 row)
ROLLBACK;
--
-- ON clause filter on target
--
BEGIN;
SET citus.log_remote_commands to true;
MERGE INTO pg_target t
USING pg_source s
ON t.id = s.id AND t.id < 100
WHEN MATCHED AND t.id > 400 THEN
UPDATE SET val = t.val || 'Updated by Merge'
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(s.id, s.val);
MERGE INTO citus_target t
USING citus_source s
ON t.id = s.id AND t.id < 100
WHEN MATCHED AND t.id > 400 THEN
UPDATE SET val = t.val || 'Updated by Merge'
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(s.id, s.val);
NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (t.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = ((t.val)::text OPERATOR(pg_catalog.||) 'Updated by Merge'::text) WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (t.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = ((t.val)::text OPERATOR(pg_catalog.||) 'Updated by Merge'::text) WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (t.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = ((t.val)::text OPERATOR(pg_catalog.||) 'Updated by Merge'::text) WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (t.id OPERATOR(pg_catalog.<) 100)) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 400) THEN UPDATE SET val = ((t.val)::text OPERATOR(pg_catalog.||) 'Updated by Merge'::text) WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
SET citus.log_remote_commands to false;
SELECT compare_tables();
compare_tables
---------------------------------------------------------------------
t
(1 row)
ROLLBACK;
--
-- NOT MATCHED clause filter on source
--
BEGIN;
SET citus.log_remote_commands to true;
MERGE INTO pg_target t
USING pg_source s
ON t.id = s.id
WHEN MATCHED THEN
DO NOTHING
WHEN NOT MATCHED AND s.id < 100 THEN
INSERT VALUES(s.id, s.val);
MERGE INTO citus_target t
USING citus_source s
ON t.id = s.id
WHEN MATCHED THEN
DO NOTHING
WHEN NOT MATCHED AND s.id < 100 THEN
INSERT VALUES(s.id, s.val);
NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED THEN DO NOTHING WHEN NOT MATCHED AND (s.id OPERATOR(pg_catalog.<) 100) THEN INSERT (id, val) VALUES (s.id, s.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED THEN DO NOTHING WHEN NOT MATCHED AND (s.id OPERATOR(pg_catalog.<) 100) THEN INSERT (id, val) VALUES (s.id, s.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED THEN DO NOTHING WHEN NOT MATCHED AND (s.id OPERATOR(pg_catalog.<) 100) THEN INSERT (id, val) VALUES (s.id, s.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN MATCHED THEN DO NOTHING WHEN NOT MATCHED AND (s.id OPERATOR(pg_catalog.<) 100) THEN INSERT (id, val) VALUES (s.id, s.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
SET citus.log_remote_commands to false;
SELECT compare_tables();
compare_tables
---------------------------------------------------------------------
t
(1 row)
ROLLBACK;
--
-- Test constant filter in ON clause to check if shards are pruned
-- with restriction information
--
--
-- Though constant filter is present, this won't prune shards as
-- NOT MATCHED clause is present
--
BEGIN;
SET citus.log_remote_commands to true;
MERGE INTO pg_target t
USING pg_source s
ON t.id = s.id AND s.id = 250
WHEN MATCHED THEN
UPDATE SET val = t.val || 'Updated by Merge'
WHEN NOT MATCHED THEN
INSERT VALUES(s.id, s.val);
MERGE INTO citus_target t
USING citus_source s
ON t.id = s.id AND s.id = 250
WHEN MATCHED THEN
UPDATE SET val = t.val || 'Updated by Merge'
WHEN NOT MATCHED THEN
INSERT VALUES(s.id, s.val);
NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.=) 250)) WHEN MATCHED THEN UPDATE SET val = ((t.val)::text OPERATOR(pg_catalog.||) 'Updated by Merge'::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.=) 250)) WHEN MATCHED THEN UPDATE SET val = ((t.val)::text OPERATOR(pg_catalog.||) 'Updated by Merge'::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.=) 250)) WHEN MATCHED THEN UPDATE SET val = ((t.val)::text OPERATOR(pg_catalog.||) 'Updated by Merge'::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.=) 250)) WHEN MATCHED THEN UPDATE SET val = ((t.val)::text OPERATOR(pg_catalog.||) 'Updated by Merge'::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s.id, s.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
SET citus.log_remote_commands to false;
SELECT compare_tables();
compare_tables
---------------------------------------------------------------------
t
(1 row)
ROLLBACK;
-- let's create source and target table
CREATE TABLE source_pushdowntest (id integer);
CREATE TABLE target_pushdowntest (id integer );
-- let's distribute both table on id field
SELECT create_distributed_table('source_pushdowntest', 'id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('target_pushdowntest', 'id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- we are doing this operation on single node setup let's figure out colocation id of both tables
-- both has same colocation id so both are colocated.
select colocationid,logicalrelid from pg_dist_partition where logicalrelid = 'source_pushdowntest'::regclass OR logicalrelid = 'target_pushdowntest'::regclass;
colocationid | logicalrelid
---------------------------------------------------------------------
13000 | source_pushdowntest
13000 | target_pushdowntest
(2 rows)
SET client_min_messages TO DEBUG1;
-- Test 1 : tables are colocated AND query is multisharded AND Join On distributed column : should push down to workers.
EXPLAIN (costs off, timing off, summary off)
MERGE INTO target_pushdowntest t
USING source_pushdowntest s
ON t.id = s.id
WHEN NOT MATCHED THEN
INSERT (id)
VALUES (s.id);
DEBUG: <Deparsed MERGE query: MERGE INTO merge_schema.target_pushdowntest_xxxxxxx t USING merge_schema.source_pushdowntest_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN NOT MATCHED THEN INSERT (id) VALUES (s.id)>
DEBUG: <Deparsed MERGE query: MERGE INTO merge_schema.target_pushdowntest_xxxxxxx t USING merge_schema.source_pushdowntest_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN NOT MATCHED THEN INSERT (id) VALUES (s.id)>
DEBUG: <Deparsed MERGE query: MERGE INTO merge_schema.target_pushdowntest_xxxxxxx t USING merge_schema.source_pushdowntest_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN NOT MATCHED THEN INSERT (id) VALUES (s.id)>
DEBUG: <Deparsed MERGE query: MERGE INTO merge_schema.target_pushdowntest_xxxxxxx t USING merge_schema.source_pushdowntest_xxxxxxx s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN NOT MATCHED THEN INSERT (id) VALUES (s.id)>
DEBUG: Creating MERGE router plan
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: All
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Merge on target_pushdowntest_4000068 t
-> Merge Left Join
Merge Cond: (s.id = t.id)
-> Sort
Sort Key: s.id
-> Seq Scan on source_pushdowntest_4000064 s
-> Sort
Sort Key: t.id
-> Seq Scan on target_pushdowntest_4000068 t
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Merge on target_pushdowntest_4000069 t
-> Merge Left Join
Merge Cond: (s.id = t.id)
-> Sort
Sort Key: s.id
-> Seq Scan on source_pushdowntest_4000065 s
-> Sort
Sort Key: t.id
-> Seq Scan on target_pushdowntest_4000069 t
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Merge on target_pushdowntest_4000070 t
-> Merge Left Join
Merge Cond: (s.id = t.id)
-> Sort
Sort Key: s.id
-> Seq Scan on source_pushdowntest_4000066 s
-> Sort
Sort Key: t.id
-> Seq Scan on target_pushdowntest_4000070 t
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Merge on target_pushdowntest_4000071 t
-> Merge Left Join
Merge Cond: (s.id = t.id)
-> Sort
Sort Key: s.id
-> Seq Scan on source_pushdowntest_4000067 s
-> Sort
Sort Key: t.id
-> Seq Scan on target_pushdowntest_4000071 t
(47 rows)
-- Test 2 : tables are colocated AND source query is not multisharded : should push down to worker.
EXPLAIN (costs off, timing off, summary off)
MERGE INTO target_pushdowntest t
USING (SELECT * from source_pushdowntest where id = 1) s
on t.id = s.id
WHEN NOT MATCHED THEN
INSERT (id)
VALUES (s.id);
DEBUG: <Deparsed MERGE query: MERGE INTO merge_schema.target_pushdowntest_xxxxxxx t USING (SELECT source_pushdowntest.id FROM merge_schema.source_pushdowntest_xxxxxxx source_pushdowntest WHERE (source_pushdowntest.id OPERATOR(pg_catalog.=) 1)) s ON (t.id OPERATOR(pg_catalog.=) s.id) WHEN NOT MATCHED THEN INSERT (id) VALUES (s.id)>
DEBUG: Creating MERGE router plan
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (Citus Adaptive)
Task Count: 1
Tasks Shown: All
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Merge on target_pushdowntest_4000068 t
-> Nested Loop Left Join
-> Seq Scan on source_pushdowntest_4000064 source_pushdowntest
Filter: (id = 1)
-> Materialize
-> Seq Scan on target_pushdowntest_4000068 t
Filter: (id = 1)
(12 rows)
-- Test 3 : tables are colocated source query is single sharded but not using source distributed column in insertion. let's not pushdown.
INSERT INTO source_pushdowntest (id) VALUES (3);
EXPLAIN (costs off, timing off, summary off)
MERGE INTO target_pushdowntest t
USING (SELECT 1 as somekey, id from source_pushdowntest where id = 1) s
on t.id = s.somekey
WHEN NOT MATCHED THEN
INSERT (id)
VALUES (s.somekey);
DEBUG: MERGE INSERT must use the source table distribution column value for push down to workers. Otherwise, repartitioning will be applied
DEBUG: MERGE INSERT must use the source table distribution column value for push down to workers. Otherwise, repartitioning will be applied
DEBUG: Creating MERGE repartition plan
DEBUG: Using column - index:0 from the source list to redistribute
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (Citus MERGE INTO ...)
MERGE INTO target_pushdowntest method: pull to coordinator
-> Custom Scan (Citus Adaptive)
Task Count: 1
Tasks Shown: All
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Seq Scan on source_pushdowntest_4000064 source_pushdowntest
Filter: (id = 1)
(9 rows)
-- let's verify if we use some other column from source for value of distributed column in target.
-- it should be inserted to correct shard of target.
CREATE TABLE source_withdata (id integer, some_number integer);
CREATE TABLE target_table (id integer, name text);
SELECT create_distributed_table('source_withdata', 'id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('target_table', 'id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
INSERT INTO source_withdata (id, some_number) VALUES (1, 3);
-- we will use some_number column from source_withdata to insert into distributed column of target.
-- value of some_number is 3 let's verify what shard it should go to.
select worker_hash(3);
worker_hash
---------------------------------------------------------------------
-28094569
(1 row)
-- it should go to second shard of target as target has 4 shard and hash "-28094569" comes in range of second shard.
MERGE INTO target_table t
USING (SELECT id, some_number from source_withdata where id = 1) s
on t.id = s.some_number
WHEN NOT MATCHED THEN
INSERT (id, name)
VALUES (s.some_number, 'parag');
DEBUG: Sub-query is not pushable, try repartitioning
DEBUG: MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: Creating MERGE repartition plan
DEBUG: Using column - index:1 from the source list to redistribute
DEBUG: Collect source query results on coordinator
DEBUG: Create a MERGE task list that needs to be routed
DEBUG: <Deparsed MERGE query: MERGE INTO merge_schema.target_table_xxxxxxx t USING (SELECT intermediate_result.id, intermediate_result.some_number FROM read_intermediate_result('merge_into_XXX_4000076'::text, 'binary'::citus_copy_format) intermediate_result(id integer, some_number integer)) s ON (t.id OPERATOR(pg_catalog.=) s.some_number) WHEN NOT MATCHED THEN INSERT (id, name) VALUES (s.some_number, 'parag'::text)>
DEBUG: <Deparsed MERGE query: MERGE INTO merge_schema.target_table_xxxxxxx t USING (SELECT intermediate_result.id, intermediate_result.some_number FROM read_intermediate_result('merge_into_XXX_4000077'::text, 'binary'::citus_copy_format) intermediate_result(id integer, some_number integer)) s ON (t.id OPERATOR(pg_catalog.=) s.some_number) WHEN NOT MATCHED THEN INSERT (id, name) VALUES (s.some_number, 'parag'::text)>
DEBUG: <Deparsed MERGE query: MERGE INTO merge_schema.target_table_xxxxxxx t USING (SELECT intermediate_result.id, intermediate_result.some_number FROM read_intermediate_result('merge_into_XXX_4000078'::text, 'binary'::citus_copy_format) intermediate_result(id integer, some_number integer)) s ON (t.id OPERATOR(pg_catalog.=) s.some_number) WHEN NOT MATCHED THEN INSERT (id, name) VALUES (s.some_number, 'parag'::text)>
DEBUG: <Deparsed MERGE query: MERGE INTO merge_schema.target_table_xxxxxxx t USING (SELECT intermediate_result.id, intermediate_result.some_number FROM read_intermediate_result('merge_into_XXX_4000079'::text, 'binary'::citus_copy_format) intermediate_result(id integer, some_number integer)) s ON (t.id OPERATOR(pg_catalog.=) s.some_number) WHEN NOT MATCHED THEN INSERT (id, name) VALUES (s.some_number, 'parag'::text)>
DEBUG: Execute MERGE task list
-- let's verify if data inserted to second shard of target.
EXPLAIN (analyze on, costs off, timing off, summary off) SELECT * FROM target_table;
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (Citus Adaptive) (actual rows=1 loops=1)
Task Count: 4
Tuple data received from nodes: 9 bytes
Tasks Shown: All
-> Task
Tuple data received from node: 0 bytes
Node: host=localhost port=xxxxx dbname=regression
-> Seq Scan on target_table_4000076 target_table (actual rows=0 loops=1)
-> Task
Tuple data received from node: 9 bytes
Node: host=localhost port=xxxxx dbname=regression
-> Seq Scan on target_table_4000077 target_table (actual rows=1 loops=1)
-> Task
Tuple data received from node: 0 bytes
Node: host=localhost port=xxxxx dbname=regression
-> Seq Scan on target_table_4000078 target_table (actual rows=0 loops=1)
-> Task
Tuple data received from node: 0 bytes
Node: host=localhost port=xxxxx dbname=regression
-> Seq Scan on target_table_4000079 target_table (actual rows=0 loops=1)
(20 rows)
-- let's verify target data too.
SELECT * FROM target_table;
id | name
---------------------------------------------------------------------
3 | parag
(1 row)
-- test UPDATE : when source is single sharded and table are colocated
MERGE INTO target_table t
USING (SELECT id, some_number from source_withdata where id = 1) s
on t.id = s.some_number
WHEN MATCHED THEN
UPDATE SET name = 'parag jain';
DEBUG: Sub-query is not pushable, try repartitioning
DEBUG: MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: Creating MERGE repartition plan
DEBUG: Using column - index:1 from the source list to redistribute
DEBUG: Collect source query results on coordinator
DEBUG: Create a MERGE task list that needs to be routed
DEBUG: <Deparsed MERGE query: MERGE INTO merge_schema.target_table_xxxxxxx t USING (SELECT intermediate_result.id, intermediate_result.some_number FROM read_intermediate_result('merge_into_XXX_4000076'::text, 'binary'::citus_copy_format) intermediate_result(id integer, some_number integer)) s ON (t.id OPERATOR(pg_catalog.=) s.some_number) WHEN MATCHED THEN UPDATE SET name = 'parag jain'::text>
DEBUG: <Deparsed MERGE query: MERGE INTO merge_schema.target_table_xxxxxxx t USING (SELECT intermediate_result.id, intermediate_result.some_number FROM read_intermediate_result('merge_into_XXX_4000077'::text, 'binary'::citus_copy_format) intermediate_result(id integer, some_number integer)) s ON (t.id OPERATOR(pg_catalog.=) s.some_number) WHEN MATCHED THEN UPDATE SET name = 'parag jain'::text>
DEBUG: <Deparsed MERGE query: MERGE INTO merge_schema.target_table_xxxxxxx t USING (SELECT intermediate_result.id, intermediate_result.some_number FROM read_intermediate_result('merge_into_XXX_4000078'::text, 'binary'::citus_copy_format) intermediate_result(id integer, some_number integer)) s ON (t.id OPERATOR(pg_catalog.=) s.some_number) WHEN MATCHED THEN UPDATE SET name = 'parag jain'::text>
DEBUG: <Deparsed MERGE query: MERGE INTO merge_schema.target_table_xxxxxxx t USING (SELECT intermediate_result.id, intermediate_result.some_number FROM read_intermediate_result('merge_into_XXX_4000079'::text, 'binary'::citus_copy_format) intermediate_result(id integer, some_number integer)) s ON (t.id OPERATOR(pg_catalog.=) s.some_number) WHEN MATCHED THEN UPDATE SET name = 'parag jain'::text>
DEBUG: Execute MERGE task list
-- let's verify if data updated properly.
SELECT * FROM target_table;
id | name
---------------------------------------------------------------------
3 | parag jain
(1 row)
-- let's see what happend when we try to update distributed key of target table
MERGE INTO target_table t
USING (SELECT id, some_number from source_withdata where id = 1) s
on t.id = s.some_number
WHEN MATCHED THEN
UPDATE SET id = 1500;
ERROR: updating the distribution column is not allowed in MERGE actions
SELECT * FROM target_table;
id | name
---------------------------------------------------------------------
3 | parag jain
(1 row)
-- test DELETE : when source is single sharded and table are colocated
MERGE INTO target_table t
USING (SELECT id, some_number from source_withdata where id = 1) s
on t.id = s.some_number
WHEN MATCHED THEN
DELETE;
DEBUG: Sub-query is not pushable, try repartitioning
DEBUG: MERGE command is only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: Creating MERGE repartition plan
DEBUG: Using column - index:1 from the source list to redistribute
DEBUG: Collect source query results on coordinator
DEBUG: Create a MERGE task list that needs to be routed
DEBUG: <Deparsed MERGE query: MERGE INTO merge_schema.target_table_xxxxxxx t USING (SELECT intermediate_result.id, intermediate_result.some_number FROM read_intermediate_result('merge_into_XXX_4000076'::text, 'binary'::citus_copy_format) intermediate_result(id integer, some_number integer)) s ON (t.id OPERATOR(pg_catalog.=) s.some_number) WHEN MATCHED THEN DELETE>
DEBUG: <Deparsed MERGE query: MERGE INTO merge_schema.target_table_xxxxxxx t USING (SELECT intermediate_result.id, intermediate_result.some_number FROM read_intermediate_result('merge_into_XXX_4000077'::text, 'binary'::citus_copy_format) intermediate_result(id integer, some_number integer)) s ON (t.id OPERATOR(pg_catalog.=) s.some_number) WHEN MATCHED THEN DELETE>
DEBUG: <Deparsed MERGE query: MERGE INTO merge_schema.target_table_xxxxxxx t USING (SELECT intermediate_result.id, intermediate_result.some_number FROM read_intermediate_result('merge_into_XXX_4000078'::text, 'binary'::citus_copy_format) intermediate_result(id integer, some_number integer)) s ON (t.id OPERATOR(pg_catalog.=) s.some_number) WHEN MATCHED THEN DELETE>
DEBUG: <Deparsed MERGE query: MERGE INTO merge_schema.target_table_xxxxxxx t USING (SELECT intermediate_result.id, intermediate_result.some_number FROM read_intermediate_result('merge_into_XXX_4000079'::text, 'binary'::citus_copy_format) intermediate_result(id integer, some_number integer)) s ON (t.id OPERATOR(pg_catalog.=) s.some_number) WHEN MATCHED THEN DELETE>
DEBUG: Execute MERGE task list
-- let's verify if data deleted properly.
SELECT * FROM target_table;
id | name
---------------------------------------------------------------------
(0 rows)
--
DELETE FROM source_withdata;
DELETE FROM target_table;
INSERT INTO source VALUES (1,1);
merge into target_table sda
using source_withdata sdn
on sda.id = sdn.id AND sda.id = 1
when not matched then
insert (id)
values (10000);
ERROR: MERGE INSERT is using unsupported expression type for distribution column
DETAIL: Inserting arbitrary values that don't correspond to the joined column values can lead to unpredictable outcomes where rows are incorrectly distributed among different shards
SELECT * FROM target_table WHERE id = 10000;
id | name
---------------------------------------------------------------------
(0 rows)
RESET client_min_messages;
-- This will prune shards with restriction information as NOT MATCHED is void
BEGIN;
SET citus.log_remote_commands to true;
MERGE INTO pg_target t
USING pg_source s
ON t.id = s.id AND s.id = 250
WHEN MATCHED THEN
UPDATE SET val = t.val || 'Updated by Merge'
WHEN NOT MATCHED THEN
DO NOTHING;
MERGE INTO citus_target t
USING citus_source s
ON t.id = s.id AND s.id = 250
WHEN MATCHED THEN
UPDATE SET val = t.val || 'Updated by Merge'
WHEN NOT MATCHED THEN
DO NOTHING;
NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING merge_schema.citus_source_xxxxxxx s ON ((t.id OPERATOR(pg_catalog.=) s.id) AND (s.id OPERATOR(pg_catalog.=) 250)) WHEN MATCHED THEN UPDATE SET val = ((t.val)::text OPERATOR(pg_catalog.||) 'Updated by Merge'::text) WHEN NOT MATCHED THEN DO NOTHING
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
SET citus.log_remote_commands to false;
SELECT compare_tables();
compare_tables
---------------------------------------------------------------------
t
(1 row)
ROLLBACK;
-- Test CTE with distributed tables
CREATE VIEW pg_source_view AS SELECT * FROM pg_source WHERE id < 400;
WARNING: "view pg_source_view" has dependency to "table pg_source" that is not in Citus' metadata
DETAIL: "view pg_source_view" will be created only locally
HINT: Distribute "table pg_source" first to distribute "view pg_source_view"
CREATE VIEW citus_source_view AS SELECT * FROM citus_source WHERE id < 400;
BEGIN;
SEt citus.log_remote_commands to true;
WITH cte AS (
SELECT * FROM pg_source_view
)
MERGE INTO pg_target t
USING cte
ON cte.id = t.id
WHEN MATCHED AND t.id > 350 THEN
UPDATE SET val = t.val || 'Updated by CTE'
WHEN NOT MATCHED THEN
INSERT VALUES (cte.id, cte.val)
WHEN MATCHED AND t.id < 350 THEN
DELETE;
WITH cte AS (
SELECT * FROM citus_source_view
)
MERGE INTO citus_target t
USING cte
ON cte.id = t.id
WHEN MATCHED AND t.id > 350 THEN
UPDATE SET val = t.val || 'Updated by CTE'
WHEN NOT MATCHED THEN
INSERT VALUES (cte.id, cte.val)
WHEN MATCHED AND t.id < 350 THEN
DELETE;
NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing WITH cte AS (SELECT citus_source_view.id, citus_source_view.val FROM (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source WHERE (citus_source.id OPERATOR(pg_catalog.<) 400)) citus_source_view) MERGE INTO merge_schema.citus_target_xxxxxxx t USING cte ON (cte.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = ((t.val)::text OPERATOR(pg_catalog.||) 'Updated by CTE'::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (cte.id, cte.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing WITH cte AS (SELECT citus_source_view.id, citus_source_view.val FROM (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source WHERE (citus_source.id OPERATOR(pg_catalog.<) 400)) citus_source_view) MERGE INTO merge_schema.citus_target_xxxxxxx t USING cte ON (cte.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = ((t.val)::text OPERATOR(pg_catalog.||) 'Updated by CTE'::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (cte.id, cte.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing WITH cte AS (SELECT citus_source_view.id, citus_source_view.val FROM (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source WHERE (citus_source.id OPERATOR(pg_catalog.<) 400)) citus_source_view) MERGE INTO merge_schema.citus_target_xxxxxxx t USING cte ON (cte.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = ((t.val)::text OPERATOR(pg_catalog.||) 'Updated by CTE'::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (cte.id, cte.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing WITH cte AS (SELECT citus_source_view.id, citus_source_view.val FROM (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source WHERE (citus_source.id OPERATOR(pg_catalog.<) 400)) citus_source_view) MERGE INTO merge_schema.citus_target_xxxxxxx t USING cte ON (cte.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = ((t.val)::text OPERATOR(pg_catalog.||) 'Updated by CTE'::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (cte.id, cte.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
SET citus.log_remote_commands to false;
SELECT compare_tables();
compare_tables
---------------------------------------------------------------------
t
(1 row)
ROLLBACK;
-- Test sub-query with distributed tables
BEGIN;
SEt citus.log_remote_commands to true;
MERGE INTO pg_target t
USING (SELECT * FROM pg_source) subq
ON subq.id = t.id
WHEN MATCHED AND t.id > 350 THEN
UPDATE SET val = t.val || 'Updated by subquery'
WHEN NOT MATCHED THEN
INSERT VALUES (subq.id, subq.val)
WHEN MATCHED AND t.id < 350 THEN
DELETE;
MERGE INTO citus_target t
USING (SELECT * FROM citus_source) subq
ON subq.id = t.id
WHEN MATCHED AND t.id > 350 THEN
UPDATE SET val = t.val || 'Updated by subquery'
WHEN NOT MATCHED THEN
INSERT VALUES (subq.id, subq.val)
WHEN MATCHED AND t.id < 350 THEN
DELETE;
NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx');
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) subq ON (subq.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = ((t.val)::text OPERATOR(pg_catalog.||) 'Updated by subquery'::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (subq.id, subq.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) subq ON (subq.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = ((t.val)::text OPERATOR(pg_catalog.||) 'Updated by subquery'::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (subq.id, subq.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) subq ON (subq.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = ((t.val)::text OPERATOR(pg_catalog.||) 'Updated by subquery'::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (subq.id, subq.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx t USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) subq ON (subq.id OPERATOR(pg_catalog.=) t.id) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.>) 350) THEN UPDATE SET val = ((t.val)::text OPERATOR(pg_catalog.||) 'Updated by subquery'::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (subq.id, subq.val) WHEN MATCHED AND (t.id OPERATOR(pg_catalog.<) 350) THEN DELETE
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
SET citus.log_remote_commands to false;
SELECT compare_tables();
compare_tables
---------------------------------------------------------------------
t
(1 row)
ROLLBACK;
-- Test PREPARE
PREPARE pg_prep(int) AS
MERGE INTO pg_target
USING (SELECT * FROM pg_source) sub
ON pg_target.id = sub.id AND pg_target.id = $1
WHEN MATCHED THEN
UPDATE SET val = 'Updated by prepare using ' || sub.val
WHEN NOT MATCHED THEN
INSERT VALUES (sub.id, sub.val);
PREPARE citus_prep(int) AS
MERGE INTO citus_target
USING (SELECT * FROM citus_source) sub
ON citus_target.id = sub.id AND citus_target.id = $1
WHEN MATCHED THEN
UPDATE SET val = 'Updated by prepare using ' || sub.val
WHEN NOT MATCHED THEN
INSERT VALUES (sub.id, sub.val);
BEGIN;
SELECT * FROM pg_target WHERE id = 500; -- before merge
id | val
---------------------------------------------------------------------
500 | target
(1 row)
SELECT count(*) FROM pg_target; -- before merge
count
---------------------------------------------------------------------
251
(1 row)
EXECUTE pg_prep(500);
SELECT * FROM pg_target WHERE id = 500; -- non-cached
id | val
---------------------------------------------------------------------
500 | Updated by prepare using source
(1 row)
EXECUTE pg_prep(500);
EXECUTE pg_prep(500);
EXECUTE pg_prep(500);
EXECUTE pg_prep(500);
EXECUTE pg_prep(500);
SELECT * FROM pg_target WHERE id = 500; -- cached
id | val
---------------------------------------------------------------------
500 | Updated by prepare using source
(1 row)
SELECT count(*) FROM pg_target; -- cached
count
---------------------------------------------------------------------
3245
(1 row)
SELECT * FROM citus_target WHERE id = 500; -- before merge
id | val
---------------------------------------------------------------------
500 | target
(1 row)
SELECT count(*) FROM citus_target; -- before merge
count
---------------------------------------------------------------------
251
(1 row)
SET citus.log_remote_commands to true;
EXECUTE citus_prep(500);
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = ('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val)::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = ('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val)::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = ('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val)::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = ('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val)::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
SELECT * FROM citus_target WHERE id = 500; -- non-cached
NOTICE: issuing SELECT id, val FROM merge_schema.citus_target_xxxxxxx citus_target WHERE (id OPERATOR(pg_catalog.=) 500)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
id | val
---------------------------------------------------------------------
500 | Updated by prepare using source
(1 row)
EXECUTE citus_prep(500);
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = ('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val)::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = ('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val)::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = ('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val)::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = ('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val)::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
EXECUTE citus_prep(500);
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = ('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val)::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = ('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val)::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = ('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val)::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = ('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val)::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
EXECUTE citus_prep(500);
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = ('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val)::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = ('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val)::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = ('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val)::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = ('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val)::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
EXECUTE citus_prep(500);
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = ('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val)::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = ('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val)::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = ('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val)::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = ('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val)::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
EXECUTE citus_prep(500);
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = ('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val)::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = ('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val)::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = ('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val)::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
NOTICE: issuing MERGE INTO merge_schema.citus_target_xxxxxxx citus_target USING (SELECT citus_source.id, citus_source.val FROM merge_schema.citus_source_xxxxxxx citus_source) sub ON ((citus_target.id OPERATOR(pg_catalog.=) sub.id) AND (citus_target.id OPERATOR(pg_catalog.=) $1)) WHEN MATCHED THEN UPDATE SET val = ('Updated by prepare using '::text OPERATOR(pg_catalog.||) (sub.val)::text) WHEN NOT MATCHED THEN INSERT (id, val) VALUES (sub.id, sub.val)
DETAIL: on server postgres@localhost:xxxxx connectionId: xxxxxxx
SET citus.log_remote_commands to false;
SELECT * FROM citus_target WHERE id = 500; -- cached
id | val
---------------------------------------------------------------------
500 | Updated by prepare using source
(1 row)
SELECT count(*) FROM citus_target; -- cached
count
---------------------------------------------------------------------
3245
(1 row)
SELECT compare_tables();
compare_tables
---------------------------------------------------------------------
t
(1 row)
ROLLBACK;
-- Test partitions + distributed tables
CREATE TABLE pg_pa_target (tid integer, balance float, val text)
PARTITION BY LIST (tid);
CREATE TABLE citus_pa_target (tid integer, balance float, val text)
PARTITION BY LIST (tid);
CREATE TABLE part1 PARTITION OF pg_pa_target FOR VALUES IN (1,4)
WITH (autovacuum_enabled=off);
CREATE TABLE part2 PARTITION OF pg_pa_target FOR VALUES IN (2,5,6)
WITH (autovacuum_enabled=off);
CREATE TABLE part3 PARTITION OF pg_pa_target FOR VALUES IN (3,8,9)
WITH (autovacuum_enabled=off);
CREATE TABLE part4 PARTITION OF pg_pa_target DEFAULT
WITH (autovacuum_enabled=off);
CREATE TABLE part5 PARTITION OF citus_pa_target FOR VALUES IN (1,4)
WITH (autovacuum_enabled=off);
CREATE TABLE part6 PARTITION OF citus_pa_target FOR VALUES IN (2,5,6)
WITH (autovacuum_enabled=off);
CREATE TABLE part7 PARTITION OF citus_pa_target FOR VALUES IN (3,8,9)
WITH (autovacuum_enabled=off);
CREATE TABLE part8 PARTITION OF citus_pa_target DEFAULT
WITH (autovacuum_enabled=off);
CREATE TABLE pg_pa_source (sid integer, delta float);
CREATE TABLE citus_pa_source (sid integer, delta float);
-- insert many rows to the source table
INSERT INTO pg_pa_source SELECT id, id * 10 FROM generate_series(1,14) AS id;
INSERT INTO citus_pa_source SELECT id, id * 10 FROM generate_series(1,14) AS id;
-- insert a few rows in the target table (odd numbered tid)
INSERT INTO pg_pa_target SELECT id, id * 100, 'initial' FROM generate_series(1,14,2) AS id;
INSERT INTO citus_pa_target SELECT id, id * 100, 'initial' FROM generate_series(1,14,2) AS id;
SELECT create_distributed_table('citus_pa_target', 'tid');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.part5$$)
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.part6$$)
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.part7$$)
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.part8$$)
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('citus_pa_source', 'sid');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.citus_pa_source$$)
create_distributed_table
---------------------------------------------------------------------
(1 row)
CREATE OR REPLACE FUNCTION pa_compare_tables() RETURNS BOOLEAN AS $$
DECLARE ret BOOL;
BEGIN
SELECT count(1) = 0 INTO ret
FROM pg_pa_target
FULL OUTER JOIN citus_pa_target
USING (tid, balance, val)
WHERE pg_pa_target.tid IS NULL
OR citus_pa_target.tid IS NULL;
RETURN ret;
END
$$ LANGUAGE PLPGSQL;
-- try simple MERGE
BEGIN;
MERGE INTO pg_pa_target t
USING pg_pa_source s
ON t.tid = s.sid
WHEN MATCHED THEN
UPDATE SET balance = balance + delta, val = val || ' updated by merge'
WHEN NOT MATCHED THEN
INSERT VALUES (sid, delta, 'inserted by merge');
MERGE INTO citus_pa_target t
USING citus_pa_source s
ON t.tid = s.sid
WHEN MATCHED THEN
UPDATE SET balance = balance + delta, val = val || ' updated by merge'
WHEN NOT MATCHED THEN
INSERT VALUES (sid, delta, 'inserted by merge');
SELECT pa_compare_tables();
pa_compare_tables
---------------------------------------------------------------------
t
(1 row)
ROLLBACK;
-- same with a constant qual
BEGIN;
MERGE INTO pg_pa_target t
USING pg_pa_source s
ON t.tid = s.sid AND tid = 1
WHEN MATCHED THEN
UPDATE SET balance = balance + delta, val = val || ' updated by merge'
WHEN NOT MATCHED THEN
INSERT VALUES (sid, delta, 'inserted by merge');
MERGE INTO citus_pa_target t
USING citus_pa_source s
ON t.tid = s.sid AND tid = 1
WHEN MATCHED THEN
UPDATE SET balance = balance + delta, val = val || ' updated by merge'
WHEN NOT MATCHED THEN
INSERT VALUES (sid, delta, 'inserted by merge');
SELECT pa_compare_tables();
pa_compare_tables
---------------------------------------------------------------------
t
(1 row)
ROLLBACK;
CREATE TABLE source_json( id integer, z int, d jsonb);
CREATE TABLE target_json( id integer, z int, d jsonb);
INSERT INTO source_json SELECT i,i FROM generate_series(0,5)i;
SELECT create_distributed_table('target_json','id'), create_distributed_table('source_json', 'id');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.source_json$$)
create_distributed_table | create_distributed_table
---------------------------------------------------------------------
|
(1 row)
-- single shard query given source_json is filtered and Postgres is smart to pushdown
-- filter to the target_json as well
SELECT public.coordinator_plan($Q$
EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda
USING (SELECT * FROM source_json WHERE id = 1) sdn
ON sda.id = sdn.id
WHEN NOT matched THEN
INSERT (id, z) VALUES (sdn.id, 5);
$Q$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0) (actual rows=0 loops=1)
Task Count: 1
(2 rows)
SELECT * FROM target_json ORDER BY 1;
id | z | d
---------------------------------------------------------------------
1 | 5 |
(1 row)
-- zero shard query as filters do not match
--SELECT public.coordinator_plan($Q$
--EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda
--USING (SELECT * FROM source_json WHERE id = 1) sdn
--ON sda.id = sdn.id AND sda.id = 2
--WHEN NOT matched THEN
-- INSERT (id, z) VALUES (sdn.id, 5);
--$Q$);
--SELECT * FROM target_json ORDER BY 1;
-- join for source_json is happening at a different place
SELECT public.coordinator_plan($Q$
EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda
USING source_json s1 LEFT JOIN (SELECT * FROM source_json) s2 USING(z)
ON sda.id = s1.id AND s1.id = s2.id
WHEN NOT matched THEN
INSERT (id, z) VALUES (s2.id, 5);
$Q$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0) (actual rows=0 loops=1)
Task Count: 4
(2 rows)
SELECT * FROM target_json ORDER BY 1;
id | z | d
---------------------------------------------------------------------
0 | 5 |
1 | 5 |
2 | 5 |
3 | 5 |
4 | 5 |
5 | 5 |
(6 rows)
-- update JSON column
SELECT public.coordinator_plan($Q$
EXPLAIN (ANALYZE ON, TIMING OFF) MERGE INTO target_json sda
USING source_json sdn
ON sda.id = sdn.id
WHEN matched THEN
UPDATE SET d = '{"a" : 5}';
$Q$);
coordinator_plan
---------------------------------------------------------------------
Custom Scan (Citus Adaptive) (cost=0.00..0.00 rows=0 width=0) (actual rows=0 loops=1)
Task Count: 4
(2 rows)
SELECT * FROM target_json ORDER BY 1;
id | z | d
---------------------------------------------------------------------
0 | 5 | {"a": 5}
1 | 5 | {"a": 5}
2 | 5 | {"a": 5}
3 | 5 | {"a": 5}
4 | 5 | {"a": 5}
5 | 5 | {"a": 5}
(6 rows)
CREATE FUNCTION immutable_hash(int) RETURNS int
AS 'SELECT hashtext( ($1 + $1)::text);'
LANGUAGE SQL
IMMUTABLE
RETURNS NULL ON NULL INPUT;
MERGE INTO target_json sda
USING source_json sdn
ON sda.id = sdn.id
WHEN matched THEN
UPDATE SET z = immutable_hash(sdn.z);
-- Test bigserial
CREATE TABLE source_serial (id integer, z int, d bigserial);
CREATE TABLE target_serial (id integer, z int, d bigserial);
INSERT INTO source_serial SELECT i,i FROM generate_series(0,100)i;
SELECT create_distributed_table('source_serial', 'id'),
create_distributed_table('target_serial', 'id');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.source_serial$$)
create_distributed_table | create_distributed_table
---------------------------------------------------------------------
|
(1 row)
MERGE INTO target_serial sda
USING source_serial sdn
ON sda.id = sdn.id
WHEN NOT matched THEN
INSERT (id, z) VALUES (id, z);
ERROR: non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables
SELECT count(*) from source_serial;
count
---------------------------------------------------------------------
101
(1 row)
SELECT count(*) from target_serial;
count
---------------------------------------------------------------------
0
(1 row)
SELECT count(distinct d) from source_serial;
count
---------------------------------------------------------------------
101
(1 row)
SELECT count(distinct d) from target_serial;
count
---------------------------------------------------------------------
0
(1 row)
-- Test set operations
CREATE TABLE target_set(t1 int, t2 int);
CREATE TABLE source_set(s1 int, s2 int);
SELECT create_distributed_table('target_set', 't1'),
create_distributed_table('source_set', 's1');
create_distributed_table | create_distributed_table
---------------------------------------------------------------------
|
(1 row)
INSERT INTO target_set VALUES(1, 0);
INSERT INTO source_set VALUES(1, 1);
INSERT INTO source_set VALUES(2, 2);
MERGE INTO target_set
USING (SELECT * FROM source_set UNION SELECT * FROM source_set) AS foo ON target_set.t1 = foo.s1
WHEN MATCHED THEN
UPDATE SET t2 = t2 + 100
WHEN NOT MATCHED THEN
INSERT VALUES(foo.s1);
SELECT * FROM target_set ORDER BY 1, 2;
t1 | t2
---------------------------------------------------------------------
1 | 100
2 |
(2 rows)
--
-- Reference as a source
--
CREATE TABLE reftarget_local(t1 int, t2 int);
CREATE TABLE refsource_ref(s1 int, s2 int);
INSERT INTO reftarget_local VALUES(1, 0);
INSERT INTO reftarget_local VALUES(3, 100);
INSERT INTO refsource_ref VALUES(1, 1);
INSERT INTO refsource_ref VALUES(2, 2);
INSERT INTO refsource_ref VALUES(3, 3);
MERGE INTO reftarget_local
USING (SELECT * FROM refsource_ref UNION SELECT * FROM refsource_ref) AS foo ON reftarget_local.t1 = foo.s1
WHEN MATCHED AND reftarget_local.t2 = 100 THEN
DELETE
WHEN MATCHED THEN
UPDATE SET t2 = t2 + 100
WHEN NOT MATCHED THEN
INSERT VALUES(foo.s1);
DROP TABLE IF EXISTS pg_result;
SELECT * INTO pg_result FROM reftarget_local ORDER BY 1, 2;
-- Make source table as reference (target is Postgres)
TRUNCATE reftarget_local;
TRUNCATE refsource_ref;
INSERT INTO reftarget_local VALUES(1, 0);
INSERT INTO reftarget_local VALUES(3, 100);
INSERT INTO refsource_ref VALUES(1, 1);
INSERT INTO refsource_ref VALUES(2, 2);
INSERT INTO refsource_ref VALUES(3, 3);
SELECT create_reference_table('refsource_ref');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.refsource_ref$$)
create_reference_table
---------------------------------------------------------------------
(1 row)
MERGE INTO reftarget_local
USING (SELECT * FROM refsource_ref UNION SELECT * FROM refsource_ref) AS foo ON reftarget_local.t1 = foo.s1
WHEN MATCHED AND reftarget_local.t2 = 100 THEN
DELETE
WHEN MATCHED THEN
UPDATE SET t2 = t2 + 100
WHEN NOT MATCHED THEN
INSERT VALUES(foo.s1);
SELECT * INTO pg_ref FROM reftarget_local ORDER BY 1, 2;
-- Should be equal
SELECT c.*, p.*
FROM pg_ref c, pg_result p
WHERE c.t1 = p.t1
ORDER BY 1,2;
t1 | t2 | t1 | t2
---------------------------------------------------------------------
1 | 100 | 1 | 100
2 | | 2 |
(2 rows)
-- Must return zero rows
SELECT count(*)
FROM pg_result FULL OUTER JOIN pg_ref ON pg_result.t1 = pg_ref.t1
WHERE pg_result.t1 IS NULL OR pg_ref.t1 IS NULL;
count
---------------------------------------------------------------------
0
(1 row)
-- Now make both Citus tables, reference as source, local as target
TRUNCATE reftarget_local;
TRUNCATE refsource_ref;
INSERT INTO reftarget_local VALUES(1, 0);
INSERT INTO reftarget_local VALUES(3, 100);
INSERT INTO refsource_ref VALUES(1, 1);
INSERT INTO refsource_ref VALUES(2, 2);
INSERT INTO refsource_ref VALUES(3, 3);
SELECT citus_add_local_table_to_metadata('reftarget_local');
citus_add_local_table_to_metadata
---------------------------------------------------------------------
(1 row)
MERGE INTO reftarget_local
USING (SELECT * FROM refsource_ref UNION SELECT * FROM refsource_ref) AS foo ON reftarget_local.t1 = foo.s1
WHEN MATCHED AND reftarget_local.t2 = 100 THEN
DELETE
WHEN MATCHED THEN
UPDATE SET t2 = t2 + 100
WHEN NOT MATCHED THEN
INSERT VALUES(foo.s1);
SELECT * INTO local_ref FROM reftarget_local ORDER BY 1, 2;
-- Should be equal
SELECT c.*, p.*
FROM local_ref c, pg_result p
WHERE c.t1 = p.t1
ORDER BY 1,2;
t1 | t2 | t1 | t2
---------------------------------------------------------------------
1 | 100 | 1 | 100
2 | | 2 |
(2 rows)
-- Must return zero rows
SELECT count(*)
FROM pg_result FULL OUTER JOIN local_ref ON pg_result.t1 = local_ref.t1
WHERE pg_result.t1 IS NULL OR local_ref.t1 IS NULL;
count
---------------------------------------------------------------------
0
(1 row)
-- Now make target as distributed, keep reference as source
TRUNCATE reftarget_local;
TRUNCATE refsource_ref;
INSERT INTO reftarget_local VALUES(1, 0);
INSERT INTO reftarget_local VALUES(3, 100);
INSERT INTO refsource_ref VALUES(1, 1);
INSERT INTO refsource_ref VALUES(2, 2);
INSERT INTO refsource_ref VALUES(3, 3);
SELECT create_distributed_table('reftarget_local', 't1');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.reftarget_local$$)
create_distributed_table
---------------------------------------------------------------------
(1 row)
MERGE INTO reftarget_local
USING (SELECT * FROM refsource_ref UNION SELECT * FROM refsource_ref) AS foo ON reftarget_local.t1 = foo.s1
WHEN MATCHED AND reftarget_local.t2 = 100 THEN
DELETE
WHEN MATCHED THEN
UPDATE SET t2 = t2 + 100
WHEN NOT MATCHED THEN
INSERT VALUES(foo.s1);
SELECT * INTO dist_reftarget FROM reftarget_local ORDER BY 1, 2;
-- Should be equal
SELECT c.*, p.*
FROM dist_reftarget c, pg_result p
WHERE c.t1 = p.t1
ORDER BY 1,2;
t1 | t2 | t1 | t2
---------------------------------------------------------------------
1 | 100 | 1 | 100
2 | | 2 |
(2 rows)
-- Must return zero rows
SELECT count(*)
FROM pg_result FULL OUTER JOIN dist_reftarget ON pg_result.t1 = dist_reftarget.t1
WHERE pg_result.t1 IS NULL OR dist_reftarget.t1 IS NULL;
count
---------------------------------------------------------------------
0
(1 row)
--
-- Distributed (target), Reference(source)
--
CREATE TABLE demo_distributed(id1 int, val1 int);
CREATE TABLE demo_source_table(id2 int, val2 int);
CREATE FUNCTION setup_demo_data() RETURNS VOID AS $$
INSERT INTO demo_distributed VALUES(1, 100);
INSERT INTO demo_distributed VALUES(7, 100);
INSERT INTO demo_distributed VALUES(15, 100);
INSERT INTO demo_distributed VALUES(100, 0);
INSERT INTO demo_distributed VALUES(300, 100);
INSERT INTO demo_distributed VALUES(400, 0);
INSERT INTO demo_source_table VALUES(1, 77);
INSERT INTO demo_source_table VALUES(15, 77);
INSERT INTO demo_source_table VALUES(75, 77);
INSERT INTO demo_source_table VALUES(100, 77);
INSERT INTO demo_source_table VALUES(300, 77);
INSERT INTO demo_source_table VALUES(400, 77);
INSERT INTO demo_source_table VALUES(500, 77);
$$
LANGUAGE SQL;
CREATE FUNCTION merge_demo_data() RETURNS VOID AS $$
MERGE INTO demo_distributed t
USING demo_source_table s ON s.id2 = t.id1
WHEN MATCHED AND t.val1= 0 THEN
DELETE
WHEN MATCHED THEN
UPDATE SET val1 = val1 + s.val2
WHEN NOT MATCHED THEN
INSERT VALUES(s.id2, s.val2);
$$
LANGUAGE SQL;
SELECT setup_demo_data();
setup_demo_data
---------------------------------------------------------------------
(1 row)
SELECT merge_demo_data();
merge_demo_data
---------------------------------------------------------------------
(1 row)
SELECT * INTO pg_demo_result FROM demo_distributed ORDER BY 1, 2;
TRUNCATE demo_distributed;
TRUNCATE demo_source_table;
SELECT create_distributed_table('demo_distributed', 'id1');
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT create_reference_table('demo_source_table');
create_reference_table
---------------------------------------------------------------------
(1 row)
SELECT setup_demo_data();
setup_demo_data
---------------------------------------------------------------------
(1 row)
SELECT merge_demo_data();
merge_demo_data
---------------------------------------------------------------------
(1 row)
SELECT * INTO dist_demo_result FROM demo_distributed ORDER BY 1, 2;
-- Should be equal
SELECT c.*, p.*
FROM dist_demo_result c, pg_demo_result p
WHERE c.id1 = p.id1
ORDER BY 1,2;
id1 | val1 | id1 | val1
---------------------------------------------------------------------
1 | 177 | 1 | 177
7 | 100 | 7 | 100
15 | 177 | 15 | 177
75 | 77 | 75 | 77
300 | 177 | 300 | 177
500 | 77 | 500 | 77
(6 rows)
-- Must return zero rows
SELECT count(*)
FROM pg_demo_result p FULL OUTER JOIN dist_demo_result d ON p.id1 = d.id1
WHERE p.id1 IS NULL OR d.id1 IS NULL;
count
---------------------------------------------------------------------
0
(1 row)
-- Now convert source as distributed, but non-colocated with target
DROP TABLE pg_demo_result, dist_demo_result;
SELECT undistribute_table('demo_distributed');
NOTICE: creating a new table for merge_schema.demo_distributed
NOTICE: moving the data of merge_schema.demo_distributed
NOTICE: dropping the old merge_schema.demo_distributed
NOTICE: renaming the new table to merge_schema.demo_distributed
undistribute_table
---------------------------------------------------------------------
(1 row)
SELECT undistribute_table('demo_source_table');
NOTICE: creating a new table for merge_schema.demo_source_table
NOTICE: moving the data of merge_schema.demo_source_table
NOTICE: dropping the old merge_schema.demo_source_table
NOTICE: renaming the new table to merge_schema.demo_source_table
undistribute_table
---------------------------------------------------------------------
(1 row)
CREATE OR REPLACE FUNCTION merge_demo_data() RETURNS VOID AS $$
MERGE INTO demo_distributed t
USING (SELECT id2,val2 FROM demo_source_table UNION SELECT val2,id2 FROM demo_source_table) AS s
ON t.id1 = s.id2
WHEN MATCHED THEN
UPDATE SET val1 = val1 + 1;
$$
LANGUAGE SQL;
TRUNCATE demo_distributed;
TRUNCATE demo_source_table;
SELECT setup_demo_data();
setup_demo_data
---------------------------------------------------------------------
(1 row)
SELECT merge_demo_data();
merge_demo_data
---------------------------------------------------------------------
(1 row)
SELECT * INTO pg_demo_result FROM demo_distributed ORDER BY 1, 2;
SELECT create_distributed_table('demo_distributed', 'id1');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.demo_distributed$$)
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('demo_source_table', 'id2', colocate_with=>'none');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.demo_source_table$$)
create_distributed_table
---------------------------------------------------------------------
(1 row)
TRUNCATE demo_distributed;
TRUNCATE demo_source_table;
SELECT setup_demo_data();
setup_demo_data
---------------------------------------------------------------------
(1 row)
SELECT merge_demo_data();
merge_demo_data
---------------------------------------------------------------------
(1 row)
SELECT * INTO dist_demo_result FROM demo_distributed ORDER BY 1, 2;
-- Should be equal
SELECT c.*, p.*
FROM dist_demo_result c, pg_demo_result p
WHERE c.id1 = p.id1
ORDER BY 1,2;
id1 | val1 | id1 | val1
---------------------------------------------------------------------
1 | 101 | 1 | 101
7 | 100 | 7 | 100
15 | 101 | 15 | 101
100 | 1 | 100 | 1
300 | 101 | 300 | 101
400 | 1 | 400 | 1
(6 rows)
-- Must return zero rows
SELECT count(*)
FROM pg_demo_result p FULL OUTER JOIN dist_demo_result d ON p.id1 = d.id1
WHERE p.id1 IS NULL OR d.id1 IS NULL;
count
---------------------------------------------------------------------
0
(1 row)
-- Test with LIMIT
CREATE OR REPLACE FUNCTION merge_demo_data() RETURNS VOID AS $$
MERGE INTO demo_distributed t
USING (SELECT 999 as s3, demo_source_table.* FROM (SELECT * FROM demo_source_table ORDER BY 1 LIMIT 3) as foo LEFT JOIN demo_source_table USING(id2)) AS s
ON t.id1 = s.id2
WHEN MATCHED THEN
UPDATE SET val1 = s3
WHEN NOT MATCHED THEN
INSERT VALUES(id2, s3);
$$
LANGUAGE SQL;
DROP TABLE pg_demo_result, dist_demo_result;
SELECT undistribute_table('demo_distributed');
NOTICE: creating a new table for merge_schema.demo_distributed
NOTICE: moving the data of merge_schema.demo_distributed
NOTICE: dropping the old merge_schema.demo_distributed
NOTICE: renaming the new table to merge_schema.demo_distributed
undistribute_table
---------------------------------------------------------------------
(1 row)
SELECT undistribute_table('demo_source_table');
NOTICE: creating a new table for merge_schema.demo_source_table
NOTICE: moving the data of merge_schema.demo_source_table
NOTICE: dropping the old merge_schema.demo_source_table
NOTICE: renaming the new table to merge_schema.demo_source_table
undistribute_table
---------------------------------------------------------------------
(1 row)
TRUNCATE demo_distributed;
TRUNCATE demo_source_table;
SELECT setup_demo_data();
setup_demo_data
---------------------------------------------------------------------
(1 row)
SELECT merge_demo_data();
merge_demo_data
---------------------------------------------------------------------
(1 row)
SELECT * INTO pg_demo_result FROM demo_distributed ORDER BY 1, 2;
SELECT create_distributed_table('demo_distributed', 'id1');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.demo_distributed$$)
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('demo_source_table', 'id2', colocate_with=>'none');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.demo_source_table$$)
create_distributed_table
---------------------------------------------------------------------
(1 row)
TRUNCATE demo_distributed;
TRUNCATE demo_source_table;
SELECT setup_demo_data();
setup_demo_data
---------------------------------------------------------------------
(1 row)
SELECT merge_demo_data();
merge_demo_data
---------------------------------------------------------------------
(1 row)
SELECT * INTO dist_demo_result FROM demo_distributed ORDER BY 1, 2;
-- Should be equal
SELECT c.*, p.*
FROM dist_demo_result c, pg_demo_result p
WHERE c.id1 = p.id1
ORDER BY 1,2;
id1 | val1 | id1 | val1
---------------------------------------------------------------------
1 | 999 | 1 | 999
7 | 100 | 7 | 100
15 | 999 | 15 | 999
75 | 999 | 75 | 999
100 | 0 | 100 | 0
300 | 100 | 300 | 100
400 | 0 | 400 | 0
(7 rows)
-- Must return zero rows
SELECT count(*)
FROM pg_demo_result p FULL OUTER JOIN dist_demo_result d ON p.id1 = d.id1
WHERE p.id1 IS NULL OR d.id1 IS NULL;
count
---------------------------------------------------------------------
0
(1 row)
-- Test explain with repartition
SET citus.explain_all_tasks TO false;
EXPLAIN (COSTS OFF)
MERGE INTO demo_distributed t
USING (SELECT 999 as s3, demo_source_table.* FROM (SELECT * FROM demo_source_table ORDER BY 1 LIMIT 3) as foo LEFT JOIN demo_source_table USING(id2)) AS s
ON t.id1 = s.id2
WHEN MATCHED THEN
UPDATE SET val1 = s3
WHEN NOT MATCHED THEN
INSERT VALUES(id2, s3);
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (Citus MERGE INTO ...)
MERGE INTO demo_distributed method: pull to coordinator
-> Custom Scan (Citus Adaptive)
-> Distributed Subplan XXX_1
-> Limit
-> Sort
Sort Key: remote_scan.id2
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Limit
-> Sort
Sort Key: id2
-> Seq Scan on demo_source_table_4000151 demo_source_table
-> Distributed Subplan XXX_2
-> Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Seq Scan on demo_source_table_4000151 demo_source_table
Task Count: 1
Tasks Shown: All
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Merge Left Join
Merge Cond: (intermediate_result.id2 = intermediate_result_1.id2)
-> Sort
Sort Key: intermediate_result.id2
-> Function Scan on read_intermediate_result intermediate_result
-> Sort
Sort Key: intermediate_result_1.id2
-> Function Scan on read_intermediate_result intermediate_result_1
(35 rows)
-- Test multiple join conditions on distribution column
MERGE INTO demo_distributed t
USING (SELECT id2+1 as key, id2+3 as key2 FROM demo_source_table) s
ON t.id1 = s.key2 ANd t.id1 = s.key
WHEN NOT MATCHED THEN
INSERT VALUES(s.key2, 333);
MERGE INTO demo_distributed t
USING (SELECT id2+1 as key, id2+2 as key2 FROM demo_source_table) s
ON t.id1 = s.key2 AND t.id1 = s.key
WHEN NOT MATCHED THEN
DO NOTHING;
MERGE INTO demo_distributed t
USING (SELECT id2+1 as key, id2+3 as key2 FROM demo_source_table) s
ON t.val1 = s.key2 AND t.id1 = s.key AND t.id1 = s.key2
WHEN NOT MATCHED THEN
INSERT VALUES(s.key2, 444);
-- Test aggregate functions in source-query
SELECT COUNT(*) FROM demo_distributed where val1 = 150;
count
---------------------------------------------------------------------
0
(1 row)
SELECT COUNT(*) FROM demo_distributed where id1 = 2;
count
---------------------------------------------------------------------
0
(1 row)
-- One row with Key=7 updated in demo_distributed to 150
MERGE INTO demo_distributed t
USING (SELECT count(DISTINCT id2)::int4 as key FROM demo_source_table GROUP BY val2) s
ON t.id1 = s.key
WHEN NOT MATCHED THEN INSERT VALUES(s.key, 1)
WHEN MATCHED THEN UPDATE SET val1 = 150;
-- Seven rows with Key=2 inserted in demo_distributed
MERGE INTO demo_distributed t
USING (SELECT (count(DISTINCT val2) + 1)::int4 as key FROM demo_source_table GROUP BY id2) s
ON t.id1 = s.key
WHEN NOT MATCHED THEN INSERT VALUES(s.key, 1)
WHEN MATCHED THEN UPDATE SET val1 = 150;
SELECT COUNT(*) FROM demo_distributed where val1 = 150;
count
---------------------------------------------------------------------
1
(1 row)
SELECT COUNT(*) FROM demo_distributed where id1 = 2;
count
---------------------------------------------------------------------
7
(1 row)
--
-- Test FALSE filters
--
CREATE TABLE source_filter(order_id INT, customer_id INT, order_center VARCHAR, order_time timestamp);
CREATE TABLE target_filter(customer_id INT, last_order_id INT, order_center VARCHAR, order_count INT, last_order timestamp);
SELECT create_distributed_table('source_filter', 'customer_id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('target_filter', 'customer_id', colocate_with => 'source_filter');
create_distributed_table
---------------------------------------------------------------------
(1 row)
CREATE FUNCTION load_filter() RETURNS VOID AS $$
TRUNCATE target_filter;
TRUNCATE source_filter;
INSERT INTO target_filter VALUES(100, 11, 'trg', -1, '2022-01-01 00:00:00'); -- Match UPDATE
INSERT INTO target_filter VALUES(200, 11, 'trg', -1, '2022-01-01 00:00:00'); -- Match DELETE
INSERT INTO source_filter VALUES(12, 100, 'src', '2022-01-01 00:00:00');
INSERT INTO source_filter VALUES(12, 200, 'src', '2022-01-01 00:00:00');
INSERT INTO source_filter VALUES(12, 300, 'src', '2022-01-01 00:00:00');
$$
LANGUAGE SQL;
--WHEN MATCH and FALSE
SELECT load_filter();
load_filter
---------------------------------------------------------------------
(1 row)
MERGE INTO target_filter t
USING source_filter s
ON s.customer_id = t.customer_id
WHEN MATCHED AND t.customer_id = 100 AND (FALSE) THEN
UPDATE SET order_count = 999
WHEN MATCHED AND t.customer_id = 200 THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(s.customer_id, s.order_id, s.order_center, 1, s.order_time);
SELECT * FROM target_filter ORDER BY 1, 2;
customer_id | last_order_id | order_center | order_count | last_order
---------------------------------------------------------------------
100 | 11 | trg | -1 | Sat Jan 01 00:00:00 2022
300 | 12 | src | 1 | Sat Jan 01 00:00:00 2022
(2 rows)
--WHEN NOT MATCH and 1=0
SELECT load_filter();
load_filter
---------------------------------------------------------------------
(1 row)
MERGE INTO target_filter t
USING source_filter s
ON s.customer_id = t.customer_id
WHEN MATCHED AND t.customer_id = 100 THEN
UPDATE SET order_count = 999
WHEN MATCHED AND t.customer_id = 200 THEN
DELETE
WHEN NOT MATCHED AND (1=0) THEN
INSERT VALUES(s.customer_id, s.order_id, s.order_center, 1, s.order_time);
SELECT * FROM target_filter ORDER BY 1, 2;
customer_id | last_order_id | order_center | order_count | last_order
---------------------------------------------------------------------
100 | 11 | trg | 999 | Sat Jan 01 00:00:00 2022
(1 row)
--ON t.key = s.key AND 1 < 0
SELECT load_filter();
load_filter
---------------------------------------------------------------------
(1 row)
MERGE INTO target_filter t
USING source_filter s
ON s.customer_id = t.customer_id AND 1 < 0
WHEN MATCHED AND t.customer_id = 100 THEN
UPDATE SET order_count = 999
WHEN MATCHED AND t.customer_id = 200 THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(s.customer_id, s.order_id, s.order_center, 1, s.order_time);
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join.
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
SELECT * FROM target_filter ORDER BY 1, 2;
customer_id | last_order_id | order_center | order_count | last_order
---------------------------------------------------------------------
100 | 11 | trg | -1 | Sat Jan 01 00:00:00 2022
200 | 11 | trg | -1 | Sat Jan 01 00:00:00 2022
(2 rows)
--(SELECT * FROM source_filter WHERE false) as source_filter
SELECT load_filter();
load_filter
---------------------------------------------------------------------
(1 row)
MERGE INTO target_filter t
USING (SELECT * FROM source_filter WHERE false) s
ON s.customer_id = t.customer_id
WHEN MATCHED AND t.customer_id = 100 THEN
UPDATE SET order_count = 999
WHEN MATCHED AND t.customer_id = 200 THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(s.customer_id, s.order_id, s.order_center, 1, s.order_time);
SELECT * FROM target_filter ORDER BY 1, 2;
customer_id | last_order_id | order_center | order_count | last_order
---------------------------------------------------------------------
100 | 11 | trg | -1 | Sat Jan 01 00:00:00 2022
200 | 11 | trg | -1 | Sat Jan 01 00:00:00 2022
(2 rows)
-- Bug 6785
CREATE TABLE source_6785( id integer, z int, d jsonb);
CREATE TABLE target_6785( id integer, z int, d jsonb);
SELECT create_distributed_table('target_6785','id'), create_distributed_table('source_6785', 'id');
create_distributed_table | create_distributed_table
---------------------------------------------------------------------
|
(1 row)
INSERT INTO source_6785 SELECT i,i FROM generate_series(0,5)i;
SET client_min_messages TO DEBUG1;
MERGE INTO target_6785 sda
USING (SELECT * FROM source_6785 WHERE id = 1) sdn
ON sda.id = sdn.id AND sda.id = 2
WHEN NOT matched THEN
INSERT (id, z) VALUES (sdn.id, 5);
DEBUG: Target relation has a filter of the form: false (AND ..), which results in empty shards, but we still need to evaluate NOT-MATCHED clause, try repartitioning
DEBUG: Routing query is not possible with no shards for target
DEBUG: Creating MERGE repartition plan
DEBUG: Using column - index:0 from the source list to redistribute
DEBUG: Collect source query results on coordinator
DEBUG: Create a MERGE task list that needs to be routed
DEBUG: <Deparsed MERGE query: MERGE INTO merge_schema.target_6785_xxxxxxx sda USING (SELECT intermediate_result.id, intermediate_result.z, intermediate_result.d FROM read_intermediate_result('merge_into_XXX_4000163'::text, 'binary'::citus_copy_format) intermediate_result(id integer, z integer, d jsonb)) sdn ON ((sda.id OPERATOR(pg_catalog.=) sdn.id) AND (sda.id OPERATOR(pg_catalog.=) 2)) WHEN NOT MATCHED THEN INSERT (id, z) VALUES (sdn.id, 5)>
DEBUG: <Deparsed MERGE query: MERGE INTO merge_schema.target_6785_xxxxxxx sda USING (SELECT intermediate_result.id, intermediate_result.z, intermediate_result.d FROM read_intermediate_result('merge_into_XXX_4000164'::text, 'binary'::citus_copy_format) intermediate_result(id integer, z integer, d jsonb)) sdn ON ((sda.id OPERATOR(pg_catalog.=) sdn.id) AND (sda.id OPERATOR(pg_catalog.=) 2)) WHEN NOT MATCHED THEN INSERT (id, z) VALUES (sdn.id, 5)>
DEBUG: <Deparsed MERGE query: MERGE INTO merge_schema.target_6785_xxxxxxx sda USING (SELECT intermediate_result.id, intermediate_result.z, intermediate_result.d FROM read_intermediate_result('merge_into_XXX_4000165'::text, 'binary'::citus_copy_format) intermediate_result(id integer, z integer, d jsonb)) sdn ON ((sda.id OPERATOR(pg_catalog.=) sdn.id) AND (sda.id OPERATOR(pg_catalog.=) 2)) WHEN NOT MATCHED THEN INSERT (id, z) VALUES (sdn.id, 5)>
DEBUG: <Deparsed MERGE query: MERGE INTO merge_schema.target_6785_xxxxxxx sda USING (SELECT intermediate_result.id, intermediate_result.z, intermediate_result.d FROM read_intermediate_result('merge_into_XXX_4000166'::text, 'binary'::citus_copy_format) intermediate_result(id integer, z integer, d jsonb)) sdn ON ((sda.id OPERATOR(pg_catalog.=) sdn.id) AND (sda.id OPERATOR(pg_catalog.=) 2)) WHEN NOT MATCHED THEN INSERT (id, z) VALUES (sdn.id, 5)>
DEBUG: Execute MERGE task list
RESET client_min_messages;
SELECT * FROM target_6785 ORDER BY 1;
id | z | d
---------------------------------------------------------------------
1 | 5 |
(1 row)
--
-- Error and Unsupported scenarios
--
-- Test explain analyze with repartition
EXPLAIN ANALYZE
MERGE INTO demo_distributed t
USING (SELECT 999 as s3, demo_source_table.* FROM (SELECT * FROM demo_source_table ORDER BY 1 LIMIT 3) as foo LEFT JOIN demo_source_table USING(id2)) AS s
ON t.id1 = s.id2
WHEN MATCHED THEN
UPDATE SET val1 = s3
WHEN NOT MATCHED THEN
INSERT VALUES(id2, s3);
ERROR: EXPLAIN ANALYZE is currently not supported for MERGE INTO ... commands with repartitioning
-- Source without a table
MERGE INTO target_cj t
USING (VALUES (1, 1), (2, 1), (3, 3)) as s (sid, val)
ON t.tid = s.sid AND t.tid = 2
WHEN MATCHED THEN
UPDATE SET val = s.val
WHEN NOT MATCHED THEN
DO NOTHING;
ERROR: To MERGE into a distributed table, source must be Citus table(s)
-- Incomplete source
MERGE INTO target_cj t
USING (source_cj1 s1 INNER JOIN source_cj2 s2 ON sid1 = val2) s
ON t.tid = s.sid1 AND t.tid = 2
WHEN MATCHED THEN
UPDATE SET src = src2
WHEN NOT MATCHED THEN
DO NOTHING;
ERROR: Source is not an explicit query
HINT: Source query is a Join expression, try converting into a query as SELECT * FROM (..Join..)
-- Reference as a target and local as source
MERGE INTO refsource_ref
USING (SELECT * FROM reftarget_local UNION SELECT * FROM reftarget_local) AS foo ON refsource_ref.s1 = foo.t1
WHEN MATCHED THEN
UPDATE SET s2 = s2 + 100
WHEN NOT MATCHED THEN
INSERT VALUES(foo.t1);
ERROR: Reference table as target is not allowed in MERGE command
MERGE INTO target_set
USING source_set AS foo ON target_set.t1 = foo.s1
WHEN MATCHED THEN
UPDATE SET ctid = '(0,100)';
ERROR: cannot assign to system column "ctid"
-- modifying CTE not supported
EXPLAIN
WITH cte_1 AS (DELETE FROM target_json RETURNING *)
MERGE INTO target_json sda
USING cte_1 sdn
ON sda.id = sdn.id
WHEN NOT matched THEN
INSERT (id, z) VALUES (sdn.id, 5);
ERROR: CTEs with modifying actions are not yet supported in MERGE
-- Grouping sets not supported
MERGE INTO citus_target t
USING (SELECT count(*), id FROM citus_source GROUP BY GROUPING SETS (id, val)) subq
ON subq.id = t.id
WHEN MATCHED AND t.id > 350 THEN
UPDATE SET val = t.val || 'Updated'
WHEN NOT MATCHED THEN
INSERT VALUES (subq.id, 99)
WHEN MATCHED AND t.id < 350 THEN
DELETE;
ERROR: could not run distributed query with GROUPING SETS, CUBE, or ROLLUP
HINT: Consider using an equality filter on the distributed table's partition column.
WITH subq AS
(
SELECT count(*), id FROM citus_source GROUP BY GROUPING SETS (id, val)
)
MERGE INTO citus_target t
USING subq
ON subq.id = t.id
WHEN MATCHED AND t.id > 350 THEN
UPDATE SET val = t.val || 'Updated'
WHEN NOT MATCHED THEN
INSERT VALUES (subq.id, 99)
WHEN MATCHED AND t.id < 350 THEN
DELETE;
ERROR: could not run distributed query with GROUPING SETS, CUBE, or ROLLUP
HINT: Consider using an equality filter on the distributed table's partition column.
-- try inserting unmatched distribution column value
MERGE INTO citus_target t
USING citus_source s
ON t.id = s.id
WHEN NOT MATCHED THEN
INSERT DEFAULT VALUES;
ERROR: cannot perform MERGE INSERT with DEFAULTS
DETAIL: Inserting arbitrary values that don't correspond to the joined column values can lead to unpredictable outcomes where rows are incorrectly distributed among different shards
MERGE INTO citus_target t
USING citus_source s
ON t.id = s.id
WHEN NOT MATCHED THEN
INSERT VALUES(10000);
ERROR: MERGE INSERT is using unsupported expression type for distribution column
DETAIL: Inserting arbitrary values that don't correspond to the joined column values can lead to unpredictable outcomes where rows are incorrectly distributed among different shards
MERGE INTO citus_target t
USING citus_source s
ON t.id = s.id
WHEN NOT MATCHED THEN
INSERT (id) VALUES(1000);
ERROR: MERGE INSERT is using unsupported expression type for distribution column
DETAIL: Inserting arbitrary values that don't correspond to the joined column values can lead to unpredictable outcomes where rows are incorrectly distributed among different shards
-- Colocated merge
MERGE INTO t1 t
USING s1 s
ON t.id = s.id
WHEN NOT MATCHED THEN
INSERT (id) VALUES(s.val);
ERROR: MERGE INSERT must use the source's joining column for target's distribution column
MERGE INTO t1 t
USING s1 s
ON t.id = s.id
WHEN NOT MATCHED THEN
INSERT (val) VALUES(s.val);
ERROR: MERGE INSERT must have distribution column as value
-- Non-colocated merge
MERGE INTO t1 t
USING s1 s
ON t.id = s.val
WHEN NOT MATCHED THEN
INSERT (id) VALUES(s.id);
ERROR: MERGE INSERT must use the source's joining column for target's distribution column
-- try updating the distribution key column
BEGIN;
MERGE INTO target_cj t
USING source_cj1 s
ON t.tid = s.sid1 AND t.tid = 2
WHEN MATCHED THEN
UPDATE SET tid = tid + 9, src = src || ' updated by merge'
WHEN NOT MATCHED THEN
INSERT VALUES (sid1, 'inserted by merge', val1);
ERROR: updating the distribution column is not allowed in MERGE actions
ROLLBACK;
-- Foreign table as target
MERGE INTO foreign_table
USING ft_target ON (foreign_table.id = ft_target.id)
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT (id, user_val) VALUES (ft_target.id, ft_target.user_val);
ERROR: cannot execute MERGE on relation "foreign_table"
DETAIL: This operation is not supported for foreign tables.
TRUNCATE t1;
TRUNCATE s1;
SELECT undistribute_table('t1');
NOTICE: creating a new table for merge_schema.t1
NOTICE: moving the data of merge_schema.t1
NOTICE: dropping the old merge_schema.t1
NOTICE: renaming the new table to merge_schema.t1
undistribute_table
---------------------------------------------------------------------
(1 row)
SELECT undistribute_table('s1');
NOTICE: creating a new table for merge_schema.s1
NOTICE: moving the data of merge_schema.s1
NOTICE: dropping the old merge_schema.s1
NOTICE: renaming the new table to merge_schema.s1
undistribute_table
---------------------------------------------------------------------
(1 row)
SELECT citus_add_local_table_to_metadata('t1');
citus_add_local_table_to_metadata
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('s1', 'id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT load();
load
---------------------------------------------------------------------
(1 row)
-- Combination of Citus local table and distributed table
MERGE INTO t1
USING s1 ON (s1.id = t1.val) -- val is not a distribution column
WHEN MATCHED AND s1.val = 0 THEN
DELETE
WHEN MATCHED THEN
UPDATE SET val = t1.val + 1
WHEN NOT MATCHED THEN
INSERT (id, val) VALUES (s1.id, s1.val);
ERROR: MERGE involving repartition of rows is supported only if the target is distributed
-- Now both s1 and t1 are distributed tables
SELECT undistribute_table('t1');
NOTICE: creating a new table for merge_schema.t1
NOTICE: moving the data of merge_schema.t1
NOTICE: dropping the old merge_schema.t1
NOTICE: renaming the new table to merge_schema.t1
undistribute_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('t1', 'id');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.t1$$)
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- We have a potential pitfall where a function can be invoked in
-- the MERGE conditions which can insert/update to a random shard
CREATE OR REPLACE function merge_when_and_write() RETURNS BOOLEAN
LANGUAGE PLPGSQL AS
$$
BEGIN
INSERT INTO t1 VALUES (100, 100);
RETURN TRUE;
END;
$$;
-- Test functions executing in MERGE statement. This is to prevent the functions from
-- doing a random sql, which may be executed in a remote node or modifying the target
-- relation which will have unexpected/suprising results.
MERGE INTO t1 USING (SELECT * FROM s1 WHERE true) s1 ON
t1.id = s1.id AND s1.id = 2
WHEN matched THEN
UPDATE SET id = s1.id, val = random();
ERROR: non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables
-- Test STABLE function
CREATE FUNCTION add_s(integer, integer) RETURNS integer
AS 'select $1 + $2;'
LANGUAGE SQL
STABLE RETURNS NULL ON NULL INPUT;
MERGE INTO t1
USING s1 ON t1.id = s1.id
WHEN NOT MATCHED THEN
INSERT VALUES(s1.id, add_s(s1.val, 2));
ERROR: non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables
-- Test preventing "ON" join condition from writing to the database
BEGIN;
MERGE INTO t1
USING s1 ON t1.id = s1.id AND t1.id = 2 AND (merge_when_and_write())
WHEN MATCHED THEN
UPDATE SET val = t1.val + s1.val;
ERROR: non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables
ROLLBACK;
-- Test preventing WHEN clause(s) from writing to the database
BEGIN;
MERGE INTO t1
USING s1 ON t1.id = s1.id AND t1.id = 2
WHEN MATCHED AND (merge_when_and_write()) THEN
UPDATE SET val = t1.val + s1.val;
ERROR: non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables
ROLLBACK;
-- Joining on non-partition columns with CTE source, but INSERT incorrect column
WITH s1_res AS (
SELECT * FROM s1
)
MERGE INTO t1
USING s1_res ON (s1_res.val = t1.id)
WHEN MATCHED AND s1_res.val = 0 THEN
DELETE
WHEN MATCHED THEN
UPDATE SET val = t1.val + 1
WHEN NOT MATCHED THEN
INSERT (id, val) VALUES (s1_res.id, s1_res.val);
ERROR: MERGE INSERT must use the source's joining column for target's distribution column
-- Constant Join condition
WITH s1_res AS (
SELECT * FROM s1
)
MERGE INTO t1
USING s1_res ON (TRUE)
WHEN MATCHED AND s1_res.val = 0 THEN
DELETE
WHEN MATCHED THEN
UPDATE SET val = t1.val + 1
WHEN NOT MATCHED THEN
INSERT (id, val) VALUES (s1_res.id, s1_res.val);
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join.
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
-- Join condition without target distribution column
WITH s1_res AS (
SELECT * FROM s1
)
MERGE INTO t1 USING s1_res ON (s1_res.id = t1.val)
WHEN MATCHED THEN DELETE
WHEN NOT MATCHED THEN
INSERT (id, val) VALUES (s1_res.id, s1_res.val);
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join.
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
--
-- Reference tables
--
SELECT undistribute_table('t1');
NOTICE: creating a new table for merge_schema.t1
NOTICE: moving the data of merge_schema.t1
NOTICE: dropping the old merge_schema.t1
NOTICE: renaming the new table to merge_schema.t1
undistribute_table
---------------------------------------------------------------------
(1 row)
SELECT undistribute_table('s1');
NOTICE: creating a new table for merge_schema.s1
NOTICE: moving the data of merge_schema.s1
NOTICE: dropping the old merge_schema.s1
NOTICE: renaming the new table to merge_schema.s1
undistribute_table
---------------------------------------------------------------------
(1 row)
SELECT create_reference_table('t1');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.t1$$)
create_reference_table
---------------------------------------------------------------------
(1 row)
SELECT create_reference_table('s1');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.s1$$)
create_reference_table
---------------------------------------------------------------------
(1 row)
MERGE INTO t1
USING s1 ON (s1.id = t1.id)
WHEN MATCHED AND s1.val = 0 THEN
DELETE
WHEN MATCHED THEN
UPDATE SET val = t1.val + 1
WHEN NOT MATCHED THEN
INSERT (id, val) VALUES (s1.id, s1.val);
ERROR: Reference table as target is not allowed in MERGE command
--
-- Postgres + Citus-Distributed table
--
SELECT undistribute_table('t1');
NOTICE: creating a new table for merge_schema.t1
NOTICE: moving the data of merge_schema.t1
NOTICE: dropping the old merge_schema.t1
NOTICE: renaming the new table to merge_schema.t1
undistribute_table
---------------------------------------------------------------------
(1 row)
SELECT undistribute_table('s1');
NOTICE: creating a new table for merge_schema.s1
NOTICE: moving the data of merge_schema.s1
NOTICE: dropping the old merge_schema.s1
NOTICE: renaming the new table to merge_schema.s1
undistribute_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('t1', 'id');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.t1$$)
create_distributed_table
---------------------------------------------------------------------
(1 row)
MERGE INTO t1
USING s1 ON (s1.id = t1.id)
WHEN MATCHED AND s1.val = 0 THEN
DELETE
WHEN MATCHED THEN
UPDATE SET val = t1.val + 1
WHEN NOT MATCHED THEN
INSERT (id, val) VALUES (s1.id, s1.val);
ERROR: MERGE INTO an distributed table from Postgres table is not yet supported
MERGE INTO t1
USING (SELECT * FROM s1) sub ON (sub.id = t1.id)
WHEN MATCHED AND sub.val = 0 THEN
DELETE
WHEN MATCHED THEN
UPDATE SET val = t1.val + 1
WHEN NOT MATCHED THEN
INSERT (id, val) VALUES (sub.id, sub.val);
ERROR: MERGE INTO an distributed table from Postgres table is not yet supported
CREATE TABLE pg(val int);
SELECT create_distributed_table('s1', 'id');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.s1$$)
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- Both t1 and s1 are citus distributed tables now, mix Postgres table in sub-query
MERGE INTO t1
USING (SELECT s1.id, pg.val FROM s1, pg) sub ON (sub.id = t1.id)
WHEN MATCHED AND sub.val = 0 THEN
DELETE
WHEN MATCHED THEN
UPDATE SET val = t1.val + 1
WHEN NOT MATCHED THEN
INSERT (id, val) VALUES (sub.id, sub.val);
ERROR: MERGE INTO an distributed table from Postgres table is not yet supported
-- Mix Postgres table in CTE
WITH pg_res AS (
SELECT * FROM pg
)
MERGE INTO t1
USING (SELECT s1.id, pg_res.val FROM s1, pg_res) sub ON (sub.id = t1.id)
WHEN MATCHED AND sub.val = 0 THEN
DELETE
WHEN MATCHED THEN
UPDATE SET val = t1.val + 1
WHEN NOT MATCHED THEN
INSERT (id, val) VALUES (sub.id, sub.val);
ERROR: MERGE INTO an distributed table from Postgres table is not yet supported
-- Match more than one source row should fail same as Postgres behavior
SELECT undistribute_table('t1');
NOTICE: creating a new table for merge_schema.t1
NOTICE: moving the data of merge_schema.t1
NOTICE: dropping the old merge_schema.t1
NOTICE: renaming the new table to merge_schema.t1
undistribute_table
---------------------------------------------------------------------
(1 row)
SELECT undistribute_table('s1');
NOTICE: creating a new table for merge_schema.s1
NOTICE: moving the data of merge_schema.s1
NOTICE: dropping the old merge_schema.s1
NOTICE: renaming the new table to merge_schema.s1
undistribute_table
---------------------------------------------------------------------
(1 row)
SELECT citus_add_local_table_to_metadata('t1');
citus_add_local_table_to_metadata
---------------------------------------------------------------------
(1 row)
SELECT citus_add_local_table_to_metadata('s1');
citus_add_local_table_to_metadata
---------------------------------------------------------------------
(1 row)
INSERT INTO s1 VALUES(1, 1); -- From load(), we already have row with id = 1
MERGE INTO t1
USING s1 ON (s1.id = t1.id)
WHEN MATCHED AND s1.val = 0 THEN
DELETE
WHEN MATCHED THEN
UPDATE SET val = t1.val + 1
WHEN NOT MATCHED THEN
INSERT (id, val) VALUES (s1.id, s1.val);
ERROR: MERGE command cannot affect row a second time
HINT: Ensure that not more than one source row matches any one target row.
-- Materialized view as target is not allowed
MERGE INTO mv_source
USING mv_target
ON mv_source.id = mv_target.id
WHEN MATCHED THEN
DO NOTHING
WHEN NOT MATCHED THEN
INSERT VALUES(mv_source.id, mv_source.val);
ERROR: cannot execute MERGE on relation "mv_source"
DETAIL: This operation is not supported for materialized views.
-- Do not allow constant values into the distribution column
CREATE TABLE dist_target(id int, val varchar);
SELECT create_distributed_table('dist_target', 'id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
CREATE TABLE dist_source(id int, val varchar);
SELECT create_distributed_table('dist_source', 'id', colocate_with => 'none');
create_distributed_table
---------------------------------------------------------------------
(1 row)
MERGE INTO dist_target
USING (SELECT 100 id) AS source
ON dist_target.id = source.id AND dist_target.val = 'const'
WHEN MATCHED THEN
UPDATE SET val = 'source'
WHEN NOT MATCHED THEN
INSERT VALUES(source.id, 'source');
ERROR: To MERGE into a distributed table, source must be Citus table(s)
-- Non-hash distributed tables (append/range).
CREATE VIEW show_tables AS
SELECT logicalrelid, partmethod
FROM pg_dist_partition
WHERE (logicalrelid = 'dist_target'::regclass) OR (logicalrelid = 'dist_source'::regclass)
ORDER BY 1;
SELECT undistribute_table('dist_source');
NOTICE: creating a new table for merge_schema.dist_source
NOTICE: moving the data of merge_schema.dist_source
NOTICE: dropping the old merge_schema.dist_source
NOTICE: drop cascades to view show_tables
CONTEXT: SQL statement "DROP TABLE merge_schema.dist_source CASCADE"
NOTICE: renaming the new table to merge_schema.dist_source
undistribute_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('dist_source', 'id', 'append');
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT * FROM show_tables;
logicalrelid | partmethod
---------------------------------------------------------------------
dist_target | h
dist_source | a
(2 rows)
MERGE INTO dist_target
USING dist_source
ON dist_target.id = dist_source.id
WHEN MATCHED THEN
UPDATE SET val = dist_source.val
WHEN NOT MATCHED THEN
INSERT VALUES(dist_source.id, dist_source.val);
ERROR: For MERGE command, append/range distribution table is not supported yet
SELECT undistribute_table('dist_source');
NOTICE: creating a new table for merge_schema.dist_source
NOTICE: moving the data of merge_schema.dist_source
NOTICE: dropping the old merge_schema.dist_source
NOTICE: drop cascades to view show_tables
CONTEXT: SQL statement "DROP TABLE merge_schema.dist_source CASCADE"
NOTICE: renaming the new table to merge_schema.dist_source
undistribute_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('dist_source', 'id', 'range');
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT * FROM show_tables;
logicalrelid | partmethod
---------------------------------------------------------------------
dist_target | h
dist_source | r
(2 rows)
MERGE INTO dist_target
USING dist_source
ON dist_target.id = dist_source.id
WHEN MATCHED THEN
UPDATE SET val = dist_source.val
WHEN NOT MATCHED THEN
INSERT VALUES(dist_source.id, dist_source.val);
ERROR: For MERGE command, append/range distribution table is not supported yet
-- Both are append tables
SELECT undistribute_table('dist_target');
NOTICE: creating a new table for merge_schema.dist_target
NOTICE: moving the data of merge_schema.dist_target
NOTICE: dropping the old merge_schema.dist_target
NOTICE: drop cascades to view show_tables
CONTEXT: SQL statement "DROP TABLE merge_schema.dist_target CASCADE"
NOTICE: renaming the new table to merge_schema.dist_target
undistribute_table
---------------------------------------------------------------------
(1 row)
SELECT undistribute_table('dist_source');
NOTICE: creating a new table for merge_schema.dist_source
NOTICE: moving the data of merge_schema.dist_source
NOTICE: dropping the old merge_schema.dist_source
NOTICE: drop cascades to view show_tables
CONTEXT: SQL statement "DROP TABLE merge_schema.dist_source CASCADE"
NOTICE: renaming the new table to merge_schema.dist_source
undistribute_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('dist_target', 'id', 'append');
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('dist_source', 'id', 'append');
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT * FROM show_tables;
logicalrelid | partmethod
---------------------------------------------------------------------
dist_target | a
dist_source | a
(2 rows)
MERGE INTO dist_target
USING dist_source
ON dist_target.id = dist_source.id
WHEN MATCHED THEN
UPDATE SET val = dist_source.val
WHEN NOT MATCHED THEN
INSERT VALUES(dist_source.id, dist_source.val);
ERROR: For MERGE command, append/range distribution table is not supported yet
-- Both are range tables
SELECT undistribute_table('dist_target');
NOTICE: creating a new table for merge_schema.dist_target
NOTICE: moving the data of merge_schema.dist_target
NOTICE: dropping the old merge_schema.dist_target
NOTICE: drop cascades to view show_tables
CONTEXT: SQL statement "DROP TABLE merge_schema.dist_target CASCADE"
NOTICE: renaming the new table to merge_schema.dist_target
undistribute_table
---------------------------------------------------------------------
(1 row)
SELECT undistribute_table('dist_source');
NOTICE: creating a new table for merge_schema.dist_source
NOTICE: moving the data of merge_schema.dist_source
NOTICE: dropping the old merge_schema.dist_source
NOTICE: drop cascades to view show_tables
CONTEXT: SQL statement "DROP TABLE merge_schema.dist_source CASCADE"
NOTICE: renaming the new table to merge_schema.dist_source
undistribute_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('dist_target', 'id', 'range');
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('dist_source', 'id', 'range');
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT * FROM show_tables;
logicalrelid | partmethod
---------------------------------------------------------------------
dist_target | r
dist_source | r
(2 rows)
MERGE INTO dist_target
USING dist_source
ON dist_target.id = dist_source.id
WHEN MATCHED THEN
UPDATE SET val = dist_source.val
WHEN NOT MATCHED THEN
INSERT VALUES(dist_source.id, dist_source.val);
ERROR: For MERGE command, append/range distribution table is not supported yet
-- Test Columnar table
CREATE TABLE target_columnar(cid int, name text) USING columnar;
SELECT create_distributed_table('target_columnar', 'cid');
create_distributed_table
---------------------------------------------------------------------
(1 row)
MERGE INTO target_columnar t
USING demo_source_table s
ON t.cid = s.id2
WHEN MATCHED THEN
UPDATE SET name = 'Columnar table updated by MERGE'
WHEN NOT MATCHED THEN
DO NOTHING;
ERROR: Columnar table as target is not allowed in MERGE command
MERGE INTO demo_distributed t
USING generate_series(0,100) as source(key)
ON (source.key + 1 = t.id1)
WHEN MATCHED THEN UPDATE SET val1 = 15;
ERROR: Currently, Citus only supports table, subquery, and CTEs as valid sources for the MERGE operation
-- This should fail in planning stage itself
EXPLAIN MERGE INTO demo_distributed t
USING demo_source_table s
ON (s.id2 + 1 = t.id1)
WHEN MATCHED THEN UPDATE SET val1 = 15;
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join.
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
-- Sub-queries and CTEs are not allowed in actions and ON clause
CREATE TABLE target_1 (a int, b int, c int);
SELECT create_distributed_table('target_1', 'a');
create_distributed_table
---------------------------------------------------------------------
(1 row)
CREATE TABLE source_2 (a int, b int, c int);
SELECT create_distributed_table('source_2', 'a');
create_distributed_table
---------------------------------------------------------------------
(1 row)
INSERT INTO target_1 VALUES(1, 2, 3);
INSERT INTO target_1 VALUES(4, 5, 6);
INSERT INTO target_1 VALUES(11, 12, 13);
INSERT INTO source_2 VALUES(1, 2, 3);
WITH cte_1 as (SELECT max(a) as max_a, max(b) as b FROM source_2)
MERGE INTO target_1
USING cte_1
ON (target_1.a = cte_1.b)
WHEN NOT MATCHED AND (SELECT max_a > 10 FROM cte_1) THEN
INSERT VALUES (cte_1.b, 100);
ERROR: Sub-queries and CTEs are not allowed in actions for MERGE with repartitioning
HINT: Consider making the source and target colocated and joined on the distribution column to make it a routable query
WITH cte_1 as (SELECT a, b FROM source_2)
MERGE INTO target_1
USING cte_1
ON (target_1.a = cte_1.b)
WHEN NOT MATCHED AND (SELECT a > 10 FROM cte_1) THEN
INSERT VALUES (cte_1.b, 100);
ERROR: Sub-queries and CTEs are not allowed in actions for MERGE with repartitioning
HINT: Consider making the source and target colocated and joined on the distribution column to make it a routable query
MERGE INTO target_1
USING source_2
ON (target_1.a = source_2.b)
WHEN NOT MATCHED AND (SELECT max_a > 10 FROM (SELECT max(a) as max_a, max(b) as b FROM target_1) as foo) THEN
INSERT VALUES (source_2.b, 100);
ERROR: Sub-queries and CTEs are not allowed in actions for MERGE with repartitioning
HINT: Consider making the source and target colocated and joined on the distribution column to make it a routable query
-- or same with CTEs
WITH cte_1 as (SELECT max(a) as max_a, max(b) as b FROM target_1)
MERGE INTO target_1
USING source_2
ON (target_1.a = source_2.b)
WHEN NOT MATCHED AND (SELECT max_a > 10 FROM (SELECT max(a) as max_a, max(b) as b FROM target_1) as foo) THEN
INSERT VALUES (source_2.b, 100);
ERROR: Sub-queries and CTEs are not allowed in actions for MERGE with repartitioning
HINT: Consider making the source and target colocated and joined on the distribution column to make it a routable query
WITH cte_1 as (SELECT a, b FROM target_1), cte_2 as (select b,a from target_1)
MERGE INTO target_1
USING (SELECT * FROM source_2) as subq
ON (target_1.a = subq.b)
WHEN NOT MATCHED AND (SELECT a > 10 FROM cte_2) THEN
INSERT VALUES (subq.b, 100);
ERROR: Sub-queries and CTEs are not allowed in actions for MERGE with repartitioning
HINT: Consider making the source and target colocated and joined on the distribution column to make it a routable query
MERGE INTO source_2
USING target_1
ON (target_1.a = source_2.a)
WHEN MATCHED THEN
UPDATE SET b = (SELECT max(a) FROM source_2);
ERROR: Sub-queries and CTEs are not allowed in actions for MERGE with repartitioning
HINT: Consider making the source and target colocated and joined on the distribution column to make it a routable query
MERGE INTO source_2
USING target_1
ON (target_1.a = source_2.a)
WHEN NOT MATCHED THEN
INSERT VALUES (target_1.a,(select max(a) from target_1));
ERROR: Sub-queries and CTEs are not allowed in actions for MERGE with repartitioning
HINT: Consider making the source and target colocated and joined on the distribution column to make it a routable query
MERGE INTO target_1
USING source_2
ON (target_1.a = source_2.b)
WHEN NOT MATCHED AND (SELECT max(c) > 10 FROM source_2) THEN
INSERT VALUES (source_2.b, 100);
ERROR: Sub-queries and CTEs are not allowed in actions for MERGE with repartitioning
HINT: Consider making the source and target colocated and joined on the distribution column to make it a routable query
-- Test in ON clause
MERGE INTO target_1 t2
USING (SELECT * FROM source_2) AS t1
ON (t1.a = t2.a AND (SELECT 1=1 FROM target_1))
WHEN MATCHED THEN
DELETE;
ERROR: Sub-queries and CTEs are not allowed in ON clause for MERGE with repartitioning
HINT: Consider making the source and target colocated and joined on the distribution column to make it a routable query
MERGE INTO target_1 t2
USING (SELECT * FROM source_2) AS t1
ON (t1.a = t2.a AND (SELECT max(a) > 55 FROM target_1))
WHEN MATCHED THEN
DELETE;
ERROR: Sub-queries and CTEs are not allowed in ON clause for MERGE with repartitioning
HINT: Consider making the source and target colocated and joined on the distribution column to make it a routable query
WITH cte_1 as (SELECT a, b FROM target_1), cte_2 as (select b,a from target_1)
MERGE INTO target_1 t2
USING (SELECT * FROM cte_1) AS t1
ON (t1.a = t2.a AND (SELECT max(a) > 55 FROM cte_2))
WHEN MATCHED THEN
DELETE;
ERROR: Sub-queries and CTEs are not allowed in ON clause for MERGE with repartitioning
HINT: Consider making the source and target colocated and joined on the distribution column to make it a routable query
-- Datatype mismatch between target and source join column
WITH src AS (SELECT FLOOR(b) AS a FROM source_2)
MERGE INTO target_1 t
USING src
ON t.a = src.a
WHEN MATCHED THEN DELETE;
ERROR: In the MERGE ON clause, there is a datatype mismatch between target's distribution column and the expression originating from the source.
DETAIL: If the types are different, Citus uses different hash functions for the two column types, which might lead to incorrect repartitioning of the result data
RESET client_min_messages;
DROP SERVER foreign_server CASCADE;
NOTICE: drop cascades to 3 other objects
DETAIL: drop cascades to user mapping for postgres on server foreign_server
drop cascades to foreign table foreign_table_4000043
drop cascades to foreign table foreign_table
NOTICE: foreign table "foreign_table_4000043" does not exist, skipping
CONTEXT: SQL statement "SELECT citus_drop_all_shards(v_obj.objid, v_obj.schema_name, v_obj.object_name, drop_shards_metadata_only := false)"
PL/pgSQL function citus_drop_trigger() line XX at PERFORM
DROP FUNCTION merge_when_and_write();
DROP SCHEMA merge_schema CASCADE;
NOTICE: drop cascades to 107 other objects
DETAIL: drop cascades to function insert_data()
drop cascades to table local_local
drop cascades to table target
drop cascades to table source
drop cascades to function load()
drop cascades to table merge_result
drop cascades to table pg_t2
drop cascades to table t2_4000030
drop cascades to table s2_4000031
drop cascades to table t2
drop cascades to table s2
drop cascades to table dist_res
drop cascades to table t3_4000032
drop cascades to table s3_1_4000033
drop cascades to table s3_2_4000034
drop cascades to table pg_t3
drop cascades to table t3
drop cascades to table s3_1
drop cascades to table s3_2
drop cascades to table local_t3
drop cascades to table tf_target_4000035
drop cascades to table tf_result
drop cascades to table tf_target
drop cascades to table tf_local
drop cascades to table vl_target_4000036
drop cascades to table vl_result
drop cascades to table vl_target
drop cascades to table vl_local
drop cascades to function f_immutable(integer)
drop cascades to table rs_target_4000037
drop cascades to table rs_result
drop cascades to table rs_target
drop cascades to table rs_local
drop cascades to table mv_target_4000038
drop cascades to table mv_source_table_4000039
drop cascades to table mv_result
drop cascades to table mv_target
drop cascades to table mv_source_table
drop cascades to materialized view mv_source
drop cascades to table mv_local
drop cascades to table dist_table_4000041
drop cascades to function f_dist()
drop cascades to table fn_target_4000040
drop cascades to table fn_result
drop cascades to table fn_target
drop cascades to table dist_table
drop cascades to table fn_local
drop cascades to table ft_target
drop cascades to table ft_source_4000042
drop cascades to table ft_source
drop cascades to extension postgres_fdw
drop cascades to table target_cj
drop cascades to table source_cj1
drop cascades to table source_cj2
drop cascades to table pg_target
drop cascades to table pg_source
drop cascades to table citus_target
drop cascades to table citus_source
drop cascades to function compare_tables()
drop cascades to table source_pushdowntest
drop cascades to table target_pushdowntest
drop cascades to table source_withdata
drop cascades to table target_table
drop cascades to view pg_source_view
drop cascades to view citus_source_view
drop cascades to table pg_pa_target
drop cascades to table citus_pa_target
drop cascades to table pg_pa_source
drop cascades to table citus_pa_source
drop cascades to function pa_compare_tables()
drop cascades to table source_json
drop cascades to table target_json
drop cascades to function immutable_hash(integer)
drop cascades to table source_serial
drop cascades to table target_serial
drop cascades to table target_set
drop cascades to table source_set
drop cascades to table refsource_ref
drop cascades to table pg_result
drop cascades to table refsource_ref_4000128
drop cascades to table pg_ref
drop cascades to table local_ref
drop cascades to table reftarget_local
drop cascades to table dist_reftarget
drop cascades to function setup_demo_data()
drop cascades to function merge_demo_data()
drop cascades to table demo_distributed
drop cascades to table demo_source_table
drop cascades to table pg_demo_result
drop cascades to table dist_demo_result
drop cascades to table source_filter
drop cascades to table target_filter
drop cascades to function load_filter()
drop cascades to table source_6785
drop cascades to table target_6785
drop cascades to function add_s(integer,integer)
drop cascades to table pg
drop cascades to table t1_4000190
drop cascades to table s1_4000191
drop cascades to table t1
and 7 other objects (see server log for list)