CREATE SCHEMA with_transactions; SET search_path TO with_transactions, public; SET citus.shard_count TO 4; SET citus.shard_replication_factor TO 1; -- https://github.com/citusdata/citus/issues/3061 SET citus.next_placement_id TO 800000; CREATE TABLE with_transactions.raw_table (tenant_id int, income float, created_at timestamptz); SELECT create_distributed_table('raw_table', 'tenant_id'); create_distributed_table -------------------------- (1 row) CREATE TABLE with_transactions.second_raw_table (tenant_id int, income float, created_at timestamptz); SELECT create_distributed_table('second_raw_table', 'tenant_id'); create_distributed_table -------------------------- (1 row) INSERT INTO raw_table (tenant_id, income, created_at) SELECT i % 10, i * 10.0, timestamp '2014-01-10 20:00:00' + i * interval '1 day' FROM generate_series (0, 100) i; INSERT INTO second_raw_table SELECT * FROM raw_table; SET client_min_messages TO DEBUG1; -- run a transaction which DELETE BEGIN; WITH ids_to_delete AS ( SELECT tenant_id FROM raw_table WHERE income < 250 ), deleted_ids AS ( DELETE FROM raw_table WHERE created_at < '2014-02-10 20:00:00' AND tenant_id IN (SELECT * from ids_to_delete) RETURNING tenant_id ) UPDATE raw_table SET income = income * 2 WHERE tenant_id IN (SELECT tenant_id FROM deleted_ids); DEBUG: generating subplan 3_1 for CTE ids_to_delete: SELECT tenant_id FROM with_transactions.raw_table WHERE (income OPERATOR(pg_catalog.<) (250)::double precision) DEBUG: generating subplan 3_2 for CTE deleted_ids: DELETE FROM with_transactions.raw_table WHERE ((created_at OPERATOR(pg_catalog.<) 'Mon Feb 10 20:00:00 2014 PST'::timestamp with time zone) AND (tenant_id OPERATOR(pg_catalog.=) ANY (SELECT ids_to_delete.tenant_id FROM (SELECT intermediate_result.tenant_id FROM read_intermediate_result('3_1'::text, 'binary'::citus_copy_format) intermediate_result(tenant_id integer)) ids_to_delete))) RETURNING tenant_id DEBUG: Plan 3 query after replacing subqueries and CTEs: UPDATE with_transactions.raw_table SET income = (income OPERATOR(pg_catalog.*) (2)::double precision) WHERE (tenant_id OPERATOR(pg_catalog.=) ANY (SELECT deleted_ids.tenant_id FROM (SELECT intermediate_result.tenant_id FROM read_intermediate_result('3_2'::text, 'binary'::citus_copy_format) intermediate_result(tenant_id integer)) deleted_ids)) ROLLBACK; -- see that both UPDATE and DELETE commands are rollbacked SELECT count(*) FROM raw_table; count ------- 101 (1 row) SELECT max(income) FROM raw_table; max ------ 1000 (1 row) -- multi-statement multi shard modifying statements should work BEGIN; SELECT count (*) FROM second_raw_table; count ------- 101 (1 row) WITH distinct_count AS ( SELECT count(DISTINCT created_at) FROM raw_table ), ids_inserted AS ( INSERT INTO raw_table VALUES (11, 1000, now()) RETURNING tenant_id ) UPDATE raw_table SET created_at = '2001-02-10 20:00:00' WHERE tenant_id IN (SELECT tenant_id FROM ids_inserted) AND tenant_id < (SELECT count FROM distinct_count); DEBUG: generating subplan 9_1 for CTE distinct_count: SELECT count(DISTINCT created_at) AS count FROM with_transactions.raw_table DEBUG: generating subplan 9_2 for CTE ids_inserted: INSERT INTO with_transactions.raw_table (tenant_id, income, created_at) VALUES (11, 1000, now()) RETURNING tenant_id DEBUG: Plan 9 query after replacing subqueries and CTEs: UPDATE with_transactions.raw_table SET created_at = 'Sat Feb 10 20:00:00 2001 PST'::timestamp with time zone WHERE ((tenant_id OPERATOR(pg_catalog.=) ANY (SELECT ids_inserted.tenant_id FROM (SELECT intermediate_result.tenant_id FROM read_intermediate_result('9_2'::text, 'binary'::citus_copy_format) intermediate_result(tenant_id integer)) ids_inserted)) AND (tenant_id OPERATOR(pg_catalog.<) (SELECT distinct_count.count FROM (SELECT intermediate_result.count FROM read_intermediate_result('9_1'::text, 'binary'::citus_copy_format) intermediate_result(count bigint)) distinct_count))) TRUNCATE second_raw_table; COMMIT; -- sequential insert followed by parallel update works just fine WITH ids_inserted AS ( INSERT INTO raw_table VALUES (11, 1000, now()), (12, 1000, now()), (13, 1000, now()) RETURNING tenant_id ) UPDATE raw_table SET created_at = '2001-02-10 20:00:00' WHERE tenant_id IN (SELECT tenant_id FROM ids_inserted); DEBUG: generating subplan 12_1 for CTE ids_inserted: INSERT INTO with_transactions.raw_table (tenant_id, income, created_at) VALUES (11,1000,now()), (12,1000,now()), (13,1000,now()) RETURNING raw_table.tenant_id DEBUG: Plan 12 query after replacing subqueries and CTEs: UPDATE with_transactions.raw_table SET created_at = 'Sat Feb 10 20:00:00 2001 PST'::timestamp with time zone WHERE (tenant_id OPERATOR(pg_catalog.=) ANY (SELECT ids_inserted.tenant_id FROM (SELECT intermediate_result.tenant_id FROM read_intermediate_result('12_1'::text, 'binary'::citus_copy_format) intermediate_result(tenant_id integer)) ids_inserted)) -- make sure that everything committed SELECT count(*) FROM raw_table; count ------- 105 (1 row) SELECT count(*) FROM raw_table WHERE created_at = '2001-02-10 20:00:00'; count ------- 4 (1 row) SELECT count(*) FROM second_raw_table; count ------- 0 (1 row) -- sequential insert followed by a sequential real-time query should be fine BEGIN; SET LOCAL citus.multi_shard_modify_mode TO 'sequential'; WITH ids_inserted AS ( INSERT INTO raw_table (tenant_id) VALUES (11), (12), (13), (14) RETURNING tenant_id ) SELECT income FROM second_raw_table WHERE tenant_id IN (SELECT * FROM ids_inserted) ORDER BY 1 DESC LIMIT 3; DEBUG: generating subplan 17_1 for CTE ids_inserted: INSERT INTO with_transactions.raw_table (tenant_id) VALUES (11), (12), (13), (14) RETURNING raw_table.tenant_id DEBUG: Plan 17 query after replacing subqueries and CTEs: SELECT income FROM with_transactions.second_raw_table WHERE (tenant_id OPERATOR(pg_catalog.=) ANY (SELECT ids_inserted.tenant_id FROM (SELECT intermediate_result.tenant_id FROM read_intermediate_result('17_1'::text, 'binary'::citus_copy_format) intermediate_result(tenant_id integer)) ids_inserted)) ORDER BY income DESC LIMIT 3 DEBUG: push down of limit count: 3 income -------- (0 rows) ROLLBACK; RESET client_min_messages; CREATE OR REPLACE FUNCTION run_ctes(id int) RETURNS text LANGUAGE 'plpgsql' AS $BODY$ DECLARE value text; BEGIN WITH dist AS (SELECT tenant_id FROM raw_table WHERE tenant_id < 10 OFFSET 0) SELECT count(*) INTO value FROM dist WHERE id = tenant_id; RETURN value ; END; $BODY$; SELECT count(*) FROM (SELECT run_ctes(s) FROM generate_series(1,current_setting('max_connections')::int+2) s) a; count ------- 102 (1 row) DROP SCHEMA with_transactions CASCADE; NOTICE: drop cascades to 3 other objects DETAIL: drop cascades to table raw_table drop cascades to table second_raw_table drop cascades to function run_ctes(integer)