-- -- Test DDL command propagation failures -- Different dimensions we're testing: -- Replication factor, 1PC-2PC, sequential-parallel modes -- CREATE SCHEMA ddl_failure; SET citus.force_max_query_parallelization TO ON; SET search_path TO 'ddl_failure'; -- do not cache any connections SET citus.max_cached_conns_per_worker TO 0; -- we don't want to see the prepared transaction numbers in the warnings SET client_min_messages TO WARNING; SELECT citus.mitmproxy('conn.allow()'); mitmproxy --------------------------------------------------------------------- (1 row) SET citus.next_shard_id TO 100800; -- we'll start with replication factor 1, 2PC and parallel mode SET citus.shard_count = 4; SET citus.shard_replication_factor = 1; CREATE TABLE test_table (key int, value int); SELECT create_distributed_table('test_table', 'key'); create_distributed_table --------------------------------------------------------------------- (1 row) -- in the first test, kill just in the first -- response we get from the worker SELECT citus.mitmproxy('conn.onAuthenticationOk().kill()'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE test_table ADD COLUMN new_column INT; ERROR: connection to the remote node postgres@localhost:xxxxx failed with the following error: connection not open SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; array_agg --------------------------------------------------------------------- {key,value} (1 row) -- cancel just in the first -- response we get from the worker SELECT citus.mitmproxy('conn.onAuthenticationOk().cancel(' || pg_backend_pid() || ')'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE test_table ADD COLUMN new_column INT; ERROR: canceling statement due to user request SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; array_agg --------------------------------------------------------------------- {key,value} (1 row) -- kill as soon as the coordinator sends begin SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE test_table ADD COLUMN new_column INT; WARNING: connection not open CONTEXT: while executing command on localhost:xxxxx ERROR: failure on connection marked as essential: localhost:xxxxx SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; array_agg --------------------------------------------------------------------- {key,value} (1 row) -- cancel as soon as the coordinator sends begin SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE test_table ADD COLUMN new_column INT; ERROR: canceling statement due to user request SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; array_agg --------------------------------------------------------------------- {key,value} (1 row) -- kill as soon as the coordinator sends worker_apply_shard_ddl_command SELECT citus.mitmproxy('conn.onQuery(query="worker_apply_shard_ddl_command").kill()'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE test_table ADD COLUMN new_column INT; ERROR: connection to the remote node postgres@localhost:xxxxx failed with the following error: connection not open -- show that we've never commited the changes SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; array_agg --------------------------------------------------------------------- {key,value} (1 row) -- cancel as soon as the coordinator sends worker_apply_shard_ddl_command SELECT citus.mitmproxy('conn.onQuery(query="worker_apply_shard_ddl_command").cancel(' || pg_backend_pid() || ')'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE test_table ADD COLUMN new_column INT; ERROR: canceling statement due to user request -- show that we've never commited the changes SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; array_agg --------------------------------------------------------------------- {key,value} (1 row) -- manually drop & re-create the table for the next tests SELECT citus.mitmproxy('conn.allow()'); mitmproxy --------------------------------------------------------------------- (1 row) DROP TABLE test_table; SET citus.next_shard_id TO 100800; SET citus.shard_count = 4; SET citus.shard_replication_factor = 1; CREATE TABLE test_table (key int, value int); SELECT create_distributed_table('test_table', 'key'); create_distributed_table --------------------------------------------------------------------- (1 row) -- cancel as soon as the coordinator sends COMMIT SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").cancel(' || pg_backend_pid() || ')'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE test_table ADD COLUMN new_column INT; WARNING: CONTEXT: while executing command on localhost:xxxxx WARNING: failed to commit transaction on localhost:xxxxx SELECT citus.mitmproxy('conn.allow()'); mitmproxy --------------------------------------------------------------------- (1 row) -- interrupts are held during COMMIT/ROLLBACK, so the command -- should have been applied without any issues since cancel is ignored SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; array_agg --------------------------------------------------------------------- {key,new_column,value} (1 row) SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; run_command_on_placements --------------------------------------------------------------------- (localhost,9060,100800,t,"{key,new_column,value}") (localhost,9060,100802,t,"{key,new_column,value}") (localhost,57637,100801,t,"{key,new_column,value}") (localhost,57637,100803,t,"{key,new_column,value}") (4 rows) -- the following tests rely the column not exists, so drop manually ALTER TABLE test_table DROP COLUMN new_column; -- but now kill just after the worker sends response to -- COMMIT command, so we'll have lots of warnings but the command -- should have been committed both on the distributed table and the placements SET client_min_messages TO ERROR; SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT").kill()'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE test_table ADD COLUMN new_column INT; SELECT citus.mitmproxy('conn.allow()'); mitmproxy --------------------------------------------------------------------- (1 row) SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; array_agg --------------------------------------------------------------------- {key,new_column,value} (1 row) SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; run_command_on_placements --------------------------------------------------------------------- (localhost,9060,100800,t,"{key,new_column,value}") (localhost,9060,100802,t,"{key,new_column,value}") (localhost,57637,100801,t,"{key,new_column,value}") (localhost,57637,100803,t,"{key,new_column,value}") (4 rows) -- now cancel just after the worker sends response to -- but Postgres doesn't accepts interrupts during COMMIT and ROLLBACK -- so should not cancel at all, so not an effective test but adding in -- case Citus messes up this behaviour SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT").cancel(' || pg_backend_pid() || ')'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE test_table DROP COLUMN new_column; SELECT citus.mitmproxy('conn.allow()'); mitmproxy --------------------------------------------------------------------- (1 row) -- the remaining tests rely on table having new_column ALTER TABLE test_table ADD COLUMN new_column INT; -- finally, test failing on ROLLBACK with 1PC -- fail just after the coordinator sends the ROLLBACK -- so the command can be rollbacked SELECT citus.mitmproxy('conn.onQuery(query="ROLLBACK").kill()'); mitmproxy --------------------------------------------------------------------- (1 row) BEGIN; SET LOCAL client_min_messages TO WARNING; ALTER TABLE test_table DROP COLUMN new_column; ROLLBACK; WARNING: connection not open CONTEXT: while executing command on localhost:xxxxx WARNING: connection not open CONTEXT: while executing command on localhost:xxxxx -- now cancel just after the worker sends response to -- but Postgres doesn't accepts interrupts during COMMIT and ROLLBACK -- so should not cancel at all, so not an effective test but adding in -- case Citus messes up this behaviour SELECT citus.mitmproxy('conn.onQuery(query="ROLLBACK").cancel(' || pg_backend_pid() || ')'); mitmproxy --------------------------------------------------------------------- (1 row) BEGIN; ALTER TABLE test_table DROP COLUMN new_column; ROLLBACK; -- but now kill just after the worker sends response to -- ROLLBACK command, so we'll have lots of warnings but the command -- should have been rollbacked both on the distributed table and the placements SELECT citus.mitmproxy('conn.onCommandComplete(command="ROLLBACK").kill()'); mitmproxy --------------------------------------------------------------------- (1 row) BEGIN; ALTER TABLE test_table DROP COLUMN new_column; ROLLBACK; SELECT citus.mitmproxy('conn.allow()'); mitmproxy --------------------------------------------------------------------- (1 row) SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; array_agg --------------------------------------------------------------------- {key,new_column,value} (1 row) SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; run_command_on_placements --------------------------------------------------------------------- (localhost,9060,100800,t,"{key,new_column,value}") (localhost,9060,100802,t,"{key,new_column,value}") (localhost,57637,100801,t,"{key,new_column,value}") (localhost,57637,100803,t,"{key,new_column,value}") (4 rows) -- in the first test, kill just in the first -- response we get from the worker SELECT citus.mitmproxy('conn.onAuthenticationOk().kill()'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE test_table DROP COLUMN new_column; ERROR: connection to the remote node postgres@localhost:xxxxx failed with the following error: connection not open SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; array_agg --------------------------------------------------------------------- {key,new_column,value} (1 row) -- cancel just in the first -- response we get from the worker SELECT citus.mitmproxy('conn.onAuthenticationOk().cancel(' || pg_backend_pid() || ')'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE test_table DROP COLUMN new_column; ERROR: canceling statement due to user request SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; array_agg --------------------------------------------------------------------- {key,new_column,value} (1 row) -- kill as soon as the coordinator sends begin SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE test_table DROP COLUMN new_column; ERROR: failure on connection marked as essential: localhost:xxxxx SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; array_agg --------------------------------------------------------------------- {key,new_column,value} (1 row) -- cancel as soon as the coordinator sends begin SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE test_table DROP COLUMN new_column; ERROR: canceling statement due to user request SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; array_agg --------------------------------------------------------------------- {key,new_column,value} (1 row) -- kill as soon as the coordinator sends worker_apply_shard_ddl_command SELECT citus.mitmproxy('conn.onQuery(query="worker_apply_shard_ddl_command").kill()'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE test_table DROP COLUMN new_column; ERROR: connection to the remote node postgres@localhost:xxxxx failed with the following error: connection not open SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; array_agg --------------------------------------------------------------------- {key,new_column,value} (1 row) -- cancel as soon as the coordinator sends worker_apply_shard_ddl_command SELECT citus.mitmproxy('conn.onQuery(query="worker_apply_shard_ddl_command").cancel(' || pg_backend_pid() || ')'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE test_table DROP COLUMN new_column; ERROR: canceling statement due to user request SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; array_agg --------------------------------------------------------------------- {key,new_column,value} (1 row) -- killing on PREPARE should be fine, everything should be rollbacked SELECT citus.mitmproxy('conn.onCommandComplete(command="PREPARE TRANSACTION").kill()'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE test_table DROP COLUMN new_column; ERROR: connection not open CONTEXT: while executing command on localhost:xxxxx SELECT citus.mitmproxy('conn.allow()'); mitmproxy --------------------------------------------------------------------- (1 row) SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; array_agg --------------------------------------------------------------------- {key,new_column,value} (1 row) SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; run_command_on_placements --------------------------------------------------------------------- (localhost,9060,100800,t,"{key,new_column,value}") (localhost,9060,100802,t,"{key,new_column,value}") (localhost,57637,100801,t,"{key,new_column,value}") (localhost,57637,100803,t,"{key,new_column,value}") (4 rows) -- we should be able to recover the transaction and -- see that the command is rollbacked SELECT recover_prepared_transactions(); recover_prepared_transactions --------------------------------------------------------------------- 2 (1 row) SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; run_command_on_placements --------------------------------------------------------------------- (localhost,9060,100800,t,"{key,new_column,value}") (localhost,9060,100802,t,"{key,new_column,value}") (localhost,57637,100801,t,"{key,new_column,value}") (localhost,57637,100803,t,"{key,new_column,value}") (4 rows) -- cancelling on PREPARE should be fine, everything should be rollbacked SELECT citus.mitmproxy('conn.onCommandComplete(command="PREPARE TRANSACTION").cancel(' || pg_backend_pid() || ')'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE test_table DROP COLUMN new_column; ERROR: canceling statement due to user request SELECT citus.mitmproxy('conn.allow()'); mitmproxy --------------------------------------------------------------------- (1 row) SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; array_agg --------------------------------------------------------------------- {key,new_column,value} (1 row) SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; run_command_on_placements --------------------------------------------------------------------- (localhost,9060,100800,t,"{key,new_column,value}") (localhost,9060,100802,t,"{key,new_column,value}") (localhost,57637,100801,t,"{key,new_column,value}") (localhost,57637,100803,t,"{key,new_column,value}") (4 rows) -- we should be able to recover the transaction and -- see that the command is rollbacked SELECT recover_prepared_transactions(); recover_prepared_transactions --------------------------------------------------------------------- 1 (1 row) SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; run_command_on_placements --------------------------------------------------------------------- (localhost,9060,100800,t,"{key,new_column,value}") (localhost,9060,100802,t,"{key,new_column,value}") (localhost,57637,100801,t,"{key,new_column,value}") (localhost,57637,100803,t,"{key,new_column,value}") (4 rows) -- killing on command complete of COMMIT PREPARE, we should see that the command succeeds -- and all the workers committed SELECT citus.mitmproxy('conn.onCommandComplete(command="COMMIT PREPARED").kill()'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE test_table DROP COLUMN new_column; SELECT citus.mitmproxy('conn.allow()'); mitmproxy --------------------------------------------------------------------- (1 row) SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; array_agg --------------------------------------------------------------------- {key,value} (1 row) SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; run_command_on_placements --------------------------------------------------------------------- (localhost,9060,100800,t,"{key,value}") (localhost,9060,100802,t,"{key,value}") (localhost,57637,100801,t,"{key,value}") (localhost,57637,100803,t,"{key,value}") (4 rows) -- we shouldn't have any prepared transactions in the workers SELECT recover_prepared_transactions(); recover_prepared_transactions --------------------------------------------------------------------- 0 (1 row) SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; run_command_on_placements --------------------------------------------------------------------- (localhost,9060,100800,t,"{key,value}") (localhost,9060,100802,t,"{key,value}") (localhost,57637,100801,t,"{key,value}") (localhost,57637,100803,t,"{key,value}") (4 rows) -- kill as soon as the coordinator sends COMMIT SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT PREPARED").kill()'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE test_table ADD COLUMN new_column INT; SELECT citus.mitmproxy('conn.allow()'); mitmproxy --------------------------------------------------------------------- (1 row) -- some of the placements would be missing the new column -- since we've not commited the prepared transactions SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; array_agg --------------------------------------------------------------------- {key,new_column,value} (1 row) SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; run_command_on_placements --------------------------------------------------------------------- (localhost,9060,100800,t,"{key,value}") (localhost,9060,100802,t,"{key,value}") (localhost,57637,100801,t,"{key,new_column,value}") (localhost,57637,100803,t,"{key,new_column,value}") (4 rows) -- we should be able to recover the transaction and -- see that the command is committed SELECT recover_prepared_transactions(); recover_prepared_transactions --------------------------------------------------------------------- 2 (1 row) SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; run_command_on_placements --------------------------------------------------------------------- (localhost,9060,100800,t,"{key,new_column,value}") (localhost,9060,100802,t,"{key,new_column,value}") (localhost,57637,100801,t,"{key,new_column,value}") (localhost,57637,100803,t,"{key,new_column,value}") (4 rows) -- finally, test failing on ROLLBACK with 2PC -- fail just after the coordinator sends the ROLLBACK -- so the command can be rollbacked SELECT citus.mitmproxy('conn.onQuery(query="ROLLBACK").kill()'); mitmproxy --------------------------------------------------------------------- (1 row) BEGIN; ALTER TABLE test_table DROP COLUMN new_column; ROLLBACK; SELECT citus.mitmproxy('conn.allow()'); mitmproxy --------------------------------------------------------------------- (1 row) -- ROLLBACK should have failed on the distributed table and the placements SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; array_agg --------------------------------------------------------------------- {key,new_column,value} (1 row) SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; run_command_on_placements --------------------------------------------------------------------- (localhost,9060,100800,t,"{key,new_column,value}") (localhost,9060,100802,t,"{key,new_column,value}") (localhost,57637,100801,t,"{key,new_column,value}") (localhost,57637,100803,t,"{key,new_column,value}") (4 rows) -- but now kill just after the worker sends response to -- ROLLBACK command, so we'll have lots of warnings but the command -- should have been rollbacked both on the distributed table and the placements SELECT citus.mitmproxy('conn.onCommandComplete(command="ROLLBACK").kill()'); mitmproxy --------------------------------------------------------------------- (1 row) BEGIN; ALTER TABLE test_table DROP COLUMN new_column; ROLLBACK; SELECT citus.mitmproxy('conn.allow()'); mitmproxy --------------------------------------------------------------------- (1 row) -- make sure that the transaction is rollbacked SELECT recover_prepared_transactions(); recover_prepared_transactions --------------------------------------------------------------------- 0 (1 row) SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; run_command_on_placements --------------------------------------------------------------------- (localhost,9060,100800,t,"{key,new_column,value}") (localhost,9060,100802,t,"{key,new_column,value}") (localhost,57637,100801,t,"{key,new_column,value}") (localhost,57637,100803,t,"{key,new_column,value}") (4 rows) -- another set of tests with 2PC and replication factor = 2 SET citus.shard_count = 4; SET citus.shard_replication_factor = 2; -- re-create the table with replication factor 2 DROP TABLE test_table; CREATE TABLE test_table (key int, value int); SELECT create_distributed_table('test_table', 'key'); create_distributed_table --------------------------------------------------------------------- (1 row) -- in the first test, kill just in the first -- response we get from the worker SELECT citus.mitmproxy('conn.onAuthenticationOk().kill()'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE test_table ADD COLUMN new_column INT; ERROR: connection to the remote node postgres@localhost:xxxxx failed with the following error: connection not open SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; array_agg --------------------------------------------------------------------- {key,value} (1 row) -- cancel just in the first -- response we get from the worker SELECT citus.mitmproxy('conn.onAuthenticationOk().cancel(' || pg_backend_pid() || ')'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE test_table ADD COLUMN new_column INT; ERROR: canceling statement due to user request SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; array_agg --------------------------------------------------------------------- {key,value} (1 row) -- kill as soon as the coordinator sends begin SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE test_table ADD COLUMN new_column INT; ERROR: failure on connection marked as essential: localhost:xxxxx SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; array_agg --------------------------------------------------------------------- {key,value} (1 row) -- cancel as soon as the coordinator sends begin SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE test_table ADD COLUMN new_column INT; ERROR: canceling statement due to user request SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; array_agg --------------------------------------------------------------------- {key,value} (1 row) -- kill as soon as the coordinator sends worker_apply_shard_ddl_command SELECT citus.mitmproxy('conn.onQuery(query="worker_apply_shard_ddl_command").kill()'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE test_table ADD COLUMN new_column INT; ERROR: connection to the remote node postgres@localhost:xxxxx failed with the following error: connection not open SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; array_agg --------------------------------------------------------------------- {key,value} (1 row) -- cancel as soon as the coordinator sends worker_apply_shard_ddl_command SELECT citus.mitmproxy('conn.onQuery(query="worker_apply_shard_ddl_command").cancel(' || pg_backend_pid() || ')'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE test_table ADD COLUMN new_column INT; ERROR: canceling statement due to user request SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; array_agg --------------------------------------------------------------------- {key,value} (1 row) -- killing on PREPARE should be fine, everything should be rollbacked SELECT citus.mitmproxy('conn.onCommandComplete(command="PREPARE TRANSACTION").kill()'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE test_table ADD COLUMN new_column INT; ERROR: connection not open CONTEXT: while executing command on localhost:xxxxx SELECT citus.mitmproxy('conn.allow()'); mitmproxy --------------------------------------------------------------------- (1 row) -- we should be able to recover the transaction and -- see that the command is rollbacked on all workers -- note that in this case recover_prepared_transactions() -- sends ROLLBACK PREPARED to the workers given that -- the transaction has not been commited on any placement yet SELECT recover_prepared_transactions(); recover_prepared_transactions --------------------------------------------------------------------- 4 (1 row) SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; run_command_on_placements --------------------------------------------------------------------- (localhost,9060,100804,t,"{key,value}") (localhost,9060,100805,t,"{key,value}") (localhost,9060,100806,t,"{key,value}") (localhost,9060,100807,t,"{key,value}") (localhost,57637,100804,t,"{key,value}") (localhost,57637,100805,t,"{key,value}") (localhost,57637,100806,t,"{key,value}") (localhost,57637,100807,t,"{key,value}") (8 rows) -- killing on command complete of COMMIT PREPARE, we should see that the command succeeds -- and all the workers committed SELECT citus.mitmproxy('conn.onCommandComplete(command="COMMIT PREPARED").kill()'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE test_table ADD COLUMN new_column INT; SELECT citus.mitmproxy('conn.allow()'); mitmproxy --------------------------------------------------------------------- (1 row) SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; array_agg --------------------------------------------------------------------- {key,new_column,value} (1 row) SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; run_command_on_placements --------------------------------------------------------------------- (localhost,9060,100804,t,"{key,new_column,value}") (localhost,9060,100805,t,"{key,new_column,value}") (localhost,9060,100806,t,"{key,new_column,value}") (localhost,9060,100807,t,"{key,new_column,value}") (localhost,57637,100804,t,"{key,new_column,value}") (localhost,57637,100805,t,"{key,new_column,value}") (localhost,57637,100806,t,"{key,new_column,value}") (localhost,57637,100807,t,"{key,new_column,value}") (8 rows) -- we shouldn't have any prepared transactions in the workers SELECT recover_prepared_transactions(); recover_prepared_transactions --------------------------------------------------------------------- 0 (1 row) SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; run_command_on_placements --------------------------------------------------------------------- (localhost,9060,100804,t,"{key,new_column,value}") (localhost,9060,100805,t,"{key,new_column,value}") (localhost,9060,100806,t,"{key,new_column,value}") (localhost,9060,100807,t,"{key,new_column,value}") (localhost,57637,100804,t,"{key,new_column,value}") (localhost,57637,100805,t,"{key,new_column,value}") (localhost,57637,100806,t,"{key,new_column,value}") (localhost,57637,100807,t,"{key,new_column,value}") (8 rows) -- kill as soon as the coordinator sends COMMIT SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT PREPARED").kill()'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE test_table DROP COLUMN new_column; SELECT citus.mitmproxy('conn.allow()'); mitmproxy --------------------------------------------------------------------- (1 row) -- some of the placements would be missing the new column -- since we've not commited the prepared transactions SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; array_agg --------------------------------------------------------------------- {key,value} (1 row) SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; run_command_on_placements --------------------------------------------------------------------- (localhost,9060,100804,t,"{key,new_column,value}") (localhost,9060,100805,t,"{key,new_column,value}") (localhost,9060,100806,t,"{key,new_column,value}") (localhost,9060,100807,t,"{key,new_column,value}") (localhost,57637,100804,t,"{key,value}") (localhost,57637,100805,t,"{key,value}") (localhost,57637,100806,t,"{key,value}") (localhost,57637,100807,t,"{key,value}") (8 rows) -- we should be able to recover the transaction and -- see that the command is committed SELECT recover_prepared_transactions(); recover_prepared_transactions --------------------------------------------------------------------- 4 (1 row) SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; run_command_on_placements --------------------------------------------------------------------- (localhost,9060,100804,t,"{key,value}") (localhost,9060,100805,t,"{key,value}") (localhost,9060,100806,t,"{key,value}") (localhost,9060,100807,t,"{key,value}") (localhost,57637,100804,t,"{key,value}") (localhost,57637,100805,t,"{key,value}") (localhost,57637,100806,t,"{key,value}") (localhost,57637,100807,t,"{key,value}") (8 rows) -- finally, test failing on ROLLBACK with 2PC -- fail just after the coordinator sends the ROLLBACK -- so the command can be rollbacked SELECT citus.mitmproxy('conn.onQuery(query="ROLLBACK").kill()'); mitmproxy --------------------------------------------------------------------- (1 row) BEGIN; ALTER TABLE test_table ADD COLUMN new_column INT; ROLLBACK; SELECT citus.mitmproxy('conn.allow()'); mitmproxy --------------------------------------------------------------------- (1 row) -- ROLLBACK should have failed on the distributed table and the placements SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; array_agg --------------------------------------------------------------------- {key,value} (1 row) SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; run_command_on_placements --------------------------------------------------------------------- (localhost,9060,100804,t,"{key,value}") (localhost,9060,100805,t,"{key,value}") (localhost,9060,100806,t,"{key,value}") (localhost,9060,100807,t,"{key,value}") (localhost,57637,100804,t,"{key,value}") (localhost,57637,100805,t,"{key,value}") (localhost,57637,100806,t,"{key,value}") (localhost,57637,100807,t,"{key,value}") (8 rows) -- but now kill just after the worker sends response to -- ROLLBACK command, so we'll have lots of warnings but the command -- should have been rollbacked both on the distributed table and the placements SELECT citus.mitmproxy('conn.onCommandComplete(command="ROLLBACK").kill()'); mitmproxy --------------------------------------------------------------------- (1 row) BEGIN; ALTER TABLE test_table ADD COLUMN new_column INT; ROLLBACK; SELECT citus.mitmproxy('conn.allow()'); mitmproxy --------------------------------------------------------------------- (1 row) -- make sure that the transaction is rollbacked SELECT recover_prepared_transactions(); recover_prepared_transactions --------------------------------------------------------------------- 0 (1 row) SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; run_command_on_placements --------------------------------------------------------------------- (localhost,9060,100804,t,"{key,value}") (localhost,9060,100805,t,"{key,value}") (localhost,9060,100806,t,"{key,value}") (localhost,9060,100807,t,"{key,value}") (localhost,57637,100804,t,"{key,value}") (localhost,57637,100805,t,"{key,value}") (localhost,57637,100806,t,"{key,value}") (localhost,57637,100807,t,"{key,value}") (8 rows) -- now do some tests with sequential mode SET citus.multi_shard_modify_mode TO 'sequential'; -- kill as soon as the coordinator sends begin SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE test_table ADD COLUMN new_column INT; ERROR: failure on connection marked as essential: localhost:xxxxx SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; array_agg --------------------------------------------------------------------- {key,value} (1 row) -- cancel as soon as the coordinator sends begin SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE test_table ADD COLUMN new_column INT; ERROR: canceling statement due to user request SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; array_agg --------------------------------------------------------------------- {key,value} (1 row) -- kill as soon as the coordinator sends worker_apply_shard_ddl_command SELECT citus.mitmproxy('conn.onQuery(query="worker_apply_shard_ddl_command").kill()'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE test_table ADD COLUMN new_column INT; ERROR: connection to the remote node postgres@localhost:xxxxx failed with the following error: connection not open -- kill as soon as the coordinator after it sends worker_apply_shard_ddl_command 2nd time SELECT citus.mitmproxy('conn.onQuery(query="worker_apply_shard_ddl_command").after(2).kill()'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE test_table ADD COLUMN new_column INT; ERROR: connection to the remote node postgres@localhost:xxxxx failed with the following error: connection not open -- cancel as soon as the coordinator after it sends worker_apply_shard_ddl_command 2nd time SELECT citus.mitmproxy('conn.onQuery(query="worker_apply_shard_ddl_command").after(2).cancel(' || pg_backend_pid() || ')'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE test_table ADD COLUMN new_column INT; ERROR: canceling statement due to user request SET search_path TO 'public'; DROP SCHEMA ddl_failure CASCADE;