diff --git a/.circleci/config.yml b/.circleci/config.yml index 37834ee07..c52b90f47 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -6,7 +6,7 @@ orbs: parameters: image_suffix: type: string - default: '-dev-b07a192' + default: '-dev-aa7ace1' pg13_version: type: string default: '13.4' diff --git a/src/test/regress/expected/failure_copy_on_hash_1.out b/src/test/regress/expected/failure_copy_on_hash_1.out new file mode 100644 index 000000000..8e54ee13b --- /dev/null +++ b/src/test/regress/expected/failure_copy_on_hash_1.out @@ -0,0 +1,396 @@ +-- +-- Failure tests for COPY to hash distributed tables +-- +CREATE SCHEMA copy_distributed_table; +SET search_path TO 'copy_distributed_table'; +SET citus.next_shard_id TO 1710000; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- With one placement COPY should error out and placement should stay healthy. +SET citus.shard_replication_factor TO 1; +SET citus.shard_count to 4; +SET citus.max_cached_conns_per_worker to 0; +CREATE TABLE test_table(id int, value_1 int); +SELECT create_distributed_table('test_table','id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE VIEW unhealthy_shard_count AS + SELECT count(*) + FROM pg_dist_shard_placement pdsp + JOIN + pg_dist_shard pds + ON pdsp.shardid=pds.shardid + WHERE logicalrelid='copy_distributed_table.test_table'::regclass AND shardstate != 1; +-- Just kill the connection after sending the first query to the worker. +SELECT citus.mitmproxy('conn.kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\COPY test_table FROM stdin delimiter ','; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: COPY test_table, line 1: "1,2" +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Now, kill the connection while copying the data +SELECT citus.mitmproxy('conn.onCopyData().kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\COPY test_table FROM stdin delimiter ','; +ERROR: failed to COPY to shard xxxxx on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Similar to the above one, but now cancel the connection +-- instead of killing it. +SELECT citus.mitmproxy('conn.onCopyData().cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\COPY test_table FROM stdin delimiter ','; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- kill the connection after worker sends command complete message +SELECT citus.mitmproxy('conn.onCommandComplete(command="COPY 1").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\COPY test_table FROM stdin delimiter ','; +ERROR: failed to COPY to shard xxxxx on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- similar to above one, but cancel the connection on command complete +SELECT citus.mitmproxy('conn.onCommandComplete(command="COPY 1").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\COPY test_table FROM stdin delimiter ','; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- kill the connection on PREPARE TRANSACTION +SELECT citus.mitmproxy('conn.onQuery(query="PREPARE TRANSACTION").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\COPY test_table FROM stdin delimiter ','; +ERROR: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- we don't want to see the prepared transaction numbers in the warnings +SET client_min_messages TO ERROR; +-- kill on command complete on COMMIT PREPARE, command should succeed +SELECT citus.mitmproxy('conn.onCommandComplete(command="COMMIT PREPARED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\COPY test_table FROM stdin delimiter ','; +SET client_min_messages TO NOTICE; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 4 +(1 row) + +TRUNCATE TABLE test_table; +-- kill on ROLLBACK, command could be rollbacked +SELECT citus.mitmproxy('conn.onQuery(query="ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +\COPY test_table FROM stdin delimiter ','; +ROLLBACK; +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +DROP TABLE test_table CASCADE; +NOTICE: drop cascades to view unhealthy_shard_count +-- With two placement, should we error out or mark untouched shard placements as inactive? +SET citus.shard_replication_factor TO 2; +CREATE TABLE test_table_2(id int, value_1 int); +SELECT create_distributed_table('test_table_2','id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT citus.mitmproxy('conn.kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\COPY test_table_2 FROM stdin delimiter ','; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT pds.logicalrelid, pdsd.shardid, pdsd.shardstate + FROM pg_dist_shard_placement as pdsd + INNER JOIN pg_dist_shard as pds + ON pdsd.shardid = pds.shardid + WHERE pds.logicalrelid = 'test_table_2'::regclass + ORDER BY shardid, nodeport; + logicalrelid | shardid | shardstate +--------------------------------------------------------------------- + test_table_2 | 1710004 | 1 + test_table_2 | 1710004 | 1 + test_table_2 | 1710005 | 1 + test_table_2 | 1710005 | 1 + test_table_2 | 1710006 | 1 + test_table_2 | 1710006 | 1 + test_table_2 | 1710007 | 1 + test_table_2 | 1710007 | 1 +(8 rows) + +-- Create test_table_2 again to have healthy one +DROP TABLE test_table_2; +CREATE TABLE test_table_2(id int, value_1 int); +SELECT create_distributed_table('test_table_2','id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- Kill the connection when we try to start the COPY +-- The query should abort +SELECT citus.mitmproxy('conn.onQuery(query="FROM STDIN WITH").killall()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\COPY test_table_2 FROM stdin delimiter ','; +ERROR: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +COPY test_table_2, line 1: "1,2" +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT pds.logicalrelid, pdsd.shardid, pdsd.shardstate + FROM pg_dist_shard_placement as pdsd + INNER JOIN pg_dist_shard as pds + ON pdsd.shardid = pds.shardid + WHERE pds.logicalrelid = 'test_table_2'::regclass + ORDER BY shardid, nodeport; + logicalrelid | shardid | shardstate +--------------------------------------------------------------------- + test_table_2 | 1710008 | 1 + test_table_2 | 1710008 | 1 + test_table_2 | 1710009 | 1 + test_table_2 | 1710009 | 1 + test_table_2 | 1710010 | 1 + test_table_2 | 1710010 | 1 + test_table_2 | 1710011 | 1 + test_table_2 | 1710011 | 1 +(8 rows) + +-- Create test_table_2 again to have healthy one +DROP TABLE test_table_2; +CREATE TABLE test_table_2(id int, value_1 int); +SELECT create_distributed_table('test_table_2','id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- When kill on copying data, it will be rollbacked and placements won't be labaled as invalid. +-- Note that now we sent data to shard xxxxx, yet it is not marked as invalid. +-- You can check the issue about this behaviour: https://github.com/citusdata/citus/issues/1933 +SELECT citus.mitmproxy('conn.onCopyData().kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\COPY test_table_2 FROM stdin delimiter ','; +ERROR: failed to COPY to shard xxxxx on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT pds.logicalrelid, pdsd.shardid, pdsd.shardstate + FROM pg_dist_shard_placement as pdsd + INNER JOIN pg_dist_shard as pds + ON pdsd.shardid = pds.shardid + WHERE pds.logicalrelid = 'test_table_2'::regclass + ORDER BY shardid, nodeport; + logicalrelid | shardid | shardstate +--------------------------------------------------------------------- + test_table_2 | 1710012 | 1 + test_table_2 | 1710012 | 1 + test_table_2 | 1710013 | 1 + test_table_2 | 1710013 | 1 + test_table_2 | 1710014 | 1 + test_table_2 | 1710014 | 1 + test_table_2 | 1710015 | 1 + test_table_2 | 1710015 | 1 +(8 rows) + +DROP SCHEMA copy_distributed_table CASCADE; +NOTICE: drop cascades to table test_table_2 +SET search_path TO default; diff --git a/src/test/regress/expected/failure_copy_to_reference_1.out b/src/test/regress/expected/failure_copy_to_reference_1.out new file mode 100644 index 000000000..4089fbd31 --- /dev/null +++ b/src/test/regress/expected/failure_copy_to_reference_1.out @@ -0,0 +1,455 @@ +-- +-- Failure tests for COPY to reference tables +-- +CREATE SCHEMA copy_reference_failure; +SET search_path TO 'copy_reference_failure'; +SET citus.next_shard_id TO 130000; +-- we don't want to see the prepared transaction numbers in the warnings +SET client_min_messages TO ERROR; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE test_table(id int, value_1 int); +SELECT create_reference_table('test_table'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +CREATE VIEW unhealthy_shard_count AS + SELECT count(*) + FROM pg_dist_shard_placement pdsp + JOIN + pg_dist_shard pds + ON pdsp.shardid=pds.shardid + WHERE logicalrelid='copy_reference_failure.test_table'::regclass AND shardstate != 1; +-- in the first test, kill just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\copy test_table FROM STDIN DELIMITER ',' +ERROR: failure on connection marked as essential: localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- kill as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\copy test_table FROM STDIN DELIMITER ',' +ERROR: failure on connection marked as essential: localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- cancel as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\copy test_table FROM STDIN DELIMITER ',' +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- kill as soon as the coordinator sends COPY command +SELECT citus.mitmproxy('conn.onQuery(query="^COPY").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\copy test_table FROM STDIN DELIMITER ',' +ERROR: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +COPY test_table, line 1: "1,2" +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- cancel as soon as the coordinator sends COPY command +SELECT citus.mitmproxy('conn.onQuery(query="^COPY").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\copy test_table FROM STDIN DELIMITER ',' +ERROR: canceling statement due to user request +CONTEXT: COPY test_table, line 1: "1,2" +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- kill as soon as the worker sends CopyComplete +SELECT citus.mitmproxy('conn.onCommandComplete(command="^COPY 3").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\copy test_table FROM STDIN DELIMITER ',' +ERROR: failed to COPY to shard xxxxx on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- cancel as soon as the coordinator sends CopyData +SELECT citus.mitmproxy('conn.onCommandComplete(command="^COPY 3").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\copy test_table FROM STDIN DELIMITER ',' +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- kill the connection when we try to start the COPY +-- the query should abort +SELECT citus.mitmproxy('conn.onQuery(query="FROM STDIN WITH").killall()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\copy test_table FROM STDIN DELIMITER ',' +ERROR: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +COPY test_table, line 1: "1,2" +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- killing on PREPARE should be fine, everything should be rollbacked +SELECT citus.mitmproxy('conn.onQuery(query="^PREPARE TRANSACTION").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\copy test_table FROM STDIN DELIMITER ',' +ERROR: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- cancelling on PREPARE should be fine, everything should be rollbacked +SELECT citus.mitmproxy('conn.onQuery(query="^PREPARE TRANSACTION").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\copy test_table FROM STDIN DELIMITER ',' +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- killing on command complete of COMMIT PREPARE, we should see that the command succeeds +-- and all the workers committed +SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT PREPARED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\copy test_table FROM STDIN DELIMITER ',' +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- we shouldn't have any prepared transactions in the workers +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 3 +(1 row) + +TRUNCATE test_table; +-- kill as soon as the coordinator sends COMMIT +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\copy test_table FROM STDIN DELIMITER ',' +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- Since we kill connections to one worker after commit arrives but the +-- other worker connections are healthy, we cannot commit on 1 worker +-- which has 1 active shard placements, but the other does. That's why +-- we expect to see 1 recovered prepared transactions. +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 3 +(1 row) + +TRUNCATE test_table; +-- finally, test failing on ROLLBACK just after the coordinator +-- sends the ROLLBACK so the command can be rollbacked +SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SET LOCAL client_min_messages TO WARNING; +\copy test_table FROM STDIN DELIMITER ',' +ROLLBACK; +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- but now kill just after the worker sends response to +-- ROLLBACK command, command should have been rollbacked +-- both on the distributed table and the placements +SELECT citus.mitmproxy('conn.onCommandComplete(command="^ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SET LOCAL client_min_messages TO WARNING; +\copy test_table FROM STDIN DELIMITER ',' +ROLLBACK; +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +DROP SCHEMA copy_reference_failure CASCADE; +SET search_path TO default; diff --git a/src/test/regress/expected/failure_create_distributed_table_non_empty_1.out b/src/test/regress/expected/failure_create_distributed_table_non_empty_1.out new file mode 100644 index 000000000..048f10a20 --- /dev/null +++ b/src/test/regress/expected/failure_create_distributed_table_non_empty_1.out @@ -0,0 +1,1006 @@ +-- +-- Failure tests for COPY to reference tables +-- +-- We have to keep two copies of this failure test +-- because if the shards are created via the executor +-- cancellations are processed, otherwise they are not +SET citus.enable_ddl_propagation TO OFF; +CREATE SCHEMA create_distributed_table_non_empty_failure; +SET citus.enable_ddl_propagation TO ON; +SET search_path TO 'create_distributed_table_non_empty_failure'; +SET citus.next_shard_id TO 11000000; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- we'll start with replication factor 1 and 2pc +SET citus.shard_replication_factor TO 1; +SET citus.shard_count to 4; +CREATE TABLE test_table(id int, value_1 int); +INSERT INTO test_table VALUES (1,1),(2,2),(3,3),(4,4); +-- in the first test, kill the first connection we sent from the coordinator +SELECT citus.mitmproxy('conn.kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- in the first test, cancel the first connection we sent from the coordinator +SELECT citus.mitmproxy('conn.cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: canceling statement due to user request +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- kill as soon as the coordinator sends CREATE SCHEMA +SELECT citus.mitmproxy('conn.onQuery(query="^CREATE SCHEMA").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.schemata WHERE schema_name = 'create_distributed_table_non_empty_failure'$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,1) +(2 rows) + +-- cancel as soon as the coordinator sends CREATE SCHEMA +-- Note: Schema should be created in workers because Citus +-- does not check for interrupts until GetRemoteCommandResult is called. +-- Since we already sent the command at this stage, the schemas get created in workers +SELECT citus.mitmproxy('conn.onQuery(query="^CREATE SCHEMA").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: canceling statement due to user request +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.schemata WHERE schema_name = 'create_distributed_table_non_empty_failure'$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,1) +(2 rows) + +-- this triggers a schema creation which prevents further transactions around dependency propagation +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +CREATE TYPE schema_proc AS (a int); +DROP TYPE schema_proc; +-- kill as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.schemata WHERE schema_name = 'create_distributed_table_non_empty_failure'$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,1) + (localhost,57637,t,1) +(2 rows) + +-- cancel as soon as the coordinator sends begin +-- if the shards are created via the executor, the table creation will fail +-- otherwise shards will be created because we ignore cancel requests during the shard creation +-- Interrupts are hold in CreateShardsWithRoundRobinPolicy +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.schemata WHERE schema_name = 'create_distributed_table_non_empty_failure'$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,1) + (localhost,57637,t,1) +(2 rows) + +DROP TABLE test_table ; +CREATE TABLE test_table(id int, value_1 int); +INSERT INTO test_table VALUES (1,1),(2,2),(3,3),(4,4); +-- kill as soon as the coordinator sends CREATE TABLE +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- kill as soon as the coordinator sends COPY +SELECT citus.mitmproxy('conn.onQuery(query="COPY").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- kill when the COPY is completed, it should be rollbacked properly +SELECT citus.mitmproxy('conn.onCommandComplete(command="COPY").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$create_distributed_table_non_empty_failure.test_table$$) +ERROR: failed to COPY to shard xxxxx on localhost:xxxxx +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- cancel as soon as the coordinator sends COPY, table +-- should not be created and rollbacked properly +SELECT citus.mitmproxy('conn.onQuery(query="COPY").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: canceling statement due to user request +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- cancel when the COPY is completed, it should be rollbacked properly +SELECT citus.mitmproxy('conn.onCommandComplete(command="COPY").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$create_distributed_table_non_empty_failure.test_table$$) +ERROR: canceling statement due to user request +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- immediately kill when we see prepare transaction to see if the command +-- successfully rollbacked the created shards +-- we don't want to see the prepared transaction numbers in the warnings +SET client_min_messages TO ERROR; +SELECT citus.mitmproxy('conn.onQuery(query="PREPARE TRANSACTION").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- immediately cancel when we see prepare transaction to see if the command +-- successfully rollbacked the created shards +SELECT citus.mitmproxy('conn.onQuery(query="PREPARE TRANSACTION").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: canceling statement due to user request +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 1 +(1 row) + +-- kill as soon as the coordinator sends COMMIT +-- shards should be created and kill should not affect +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT PREPARED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 4 +(1 row) + +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 2 +(1 row) + +DROP TABLE test_table ; +-- since we want to interrupt the schema creation again we need to drop and recreate +-- for citus to redistribute the dependency +DROP SCHEMA create_distributed_table_non_empty_failure; +CREATE SCHEMA create_distributed_table_non_empty_failure; +CREATE TABLE test_table(id int, value_1 int); +INSERT INTO test_table VALUES (1,1),(2,2),(3,3),(4,4); +-- cancel as soon as the coordinator sends COMMIT +-- shards should be created and kill should not affect +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT PREPARED").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 4 +(1 row) + +DROP TABLE test_table ; +CREATE TABLE test_table(id int, value_1 int); +INSERT INTO test_table VALUES (1,1),(2,2),(3,3),(4,4); +-- kill as soon as the coordinator sends ROLLBACK +-- the command can be rollbacked +SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SELECT create_distributed_table('test_table', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +ROLLBACK; +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- cancel as soon as the coordinator sends ROLLBACK +-- should be rollbacked +SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SELECT create_distributed_table('test_table', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- We are done with pure create_distributed_table testing and now +-- testing for co-located tables. +CREATE TABLE colocated_table(id int, value_1 int); +SELECT create_distributed_table('colocated_table', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- Now, cancel the connection just after transaction is opened on +-- workers. Note that, when there is a colocated table, interrupts +-- are not held and we can cancel in the middle of the execution +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id', colocate_with => 'colocated_table'); +ERROR: canceling statement due to user request +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Now, kill the connection just after transaction is opened on +-- workers. Note that, when there is a colocated table, interrupts +-- are not held and we can cancel in the middle of the execution +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id', colocate_with => 'colocated_table'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.tables WHERE table_schema = 'create_distributed_table_non_empty_failure' and table_name LIKE 'test_table%'$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,0) +(2 rows) + +-- Now, cancel the connection just after the COPY started to +-- workers. Note that, when there is a colocated table, interrupts +-- are not held and we can cancel in the middle of the execution +SELECT citus.mitmproxy('conn.onQuery(query="^COPY").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id', colocate_with => 'colocated_table'); +ERROR: canceling statement due to user request +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Now, kill the connection just after the COPY started to +-- workers. Note that, when there is a colocated table, interrupts +-- are not held and we can cancel in the middle of the execution +SELECT citus.mitmproxy('conn.onQuery(query="^COPY").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id', colocate_with => 'colocated_table'); +ERROR: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.tables WHERE table_schema = 'create_distributed_table_non_empty_failure' and table_name LIKE 'test_table%'$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,0) +(2 rows) + +-- Now, cancel the connection when we issue CREATE TABLE on +-- workers. Note that, when there is a colocated table, interrupts +-- are not held and we can cancel in the middle of the execution +SELECT citus.mitmproxy('conn.onQuery(query="^SELECT worker_apply_shard_ddl_command").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id', colocate_with => 'colocated_table'); +ERROR: canceling statement due to user request +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Now, kill the connection when we issue CREATE TABLE on +-- workers. Note that, when there is a colocated table, interrupts +-- are not held and we can cancel in the middle of the execution +SELECT citus.mitmproxy('conn.onQuery(query="^SELECT worker_apply_shard_ddl_command").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id', colocate_with => 'colocated_table'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.tables WHERE table_schema = 'create_distributed_table_non_empty_failure' and table_name LIKE 'test_table%'$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,0) +(2 rows) + +-- Now run the same tests with 1pc +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +DROP TABLE colocated_table; +DROP TABLE test_table; +DROP SCHEMA create_distributed_table_non_empty_failure; +CREATE SCHEMA create_distributed_table_non_empty_failure; +CREATE TABLE test_table(id int, value_1 int); +INSERT INTO test_table VALUES (1,1),(2,2),(3,3),(4,4); +SELECT citus.mitmproxy('conn.kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.tables WHERE table_schema = 'create_distributed_table_non_empty_failure' and table_name LIKE 'test_table%'$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,0) +(2 rows) + +-- in the first test, cancel the first connection we sent from the coordinator +SELECT citus.mitmproxy('conn.cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.tables WHERE table_schema = 'create_distributed_table_non_empty_failure' and table_name LIKE 'test_table%'$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,0) +(2 rows) + +-- this triggers a schema creation which prevents further transactions around dependency propagation +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +CREATE TYPE schema_proc AS (a int); +DROP TYPE schema_proc; +-- kill as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.schemata WHERE schema_name = 'create_distributed_table_non_empty_failure'$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,1) + (localhost,57637,t,1) +(2 rows) + +-- cancel as soon as the coordinator sends begin +-- if the shards are created via the executor, the table creation will fail +-- otherwise shards will be created because we ignore cancel requests during the shard creation +-- Interrupts are hold in CreateShardsWithRoundRobinPolicy +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.schemata WHERE schema_name = 'create_distributed_table_non_empty_failure'$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,1) + (localhost,57637,t,1) +(2 rows) + +DROP TABLE test_table ; +CREATE TABLE test_table(id int, value_1 int); +INSERT INTO test_table VALUES (1,1),(2,2),(3,3),(4,4); +-- kill as soon as the coordinator sends CREATE TABLE +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- kill as soon as the coordinator sends COPY +SELECT citus.mitmproxy('conn.onQuery(query="COPY").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- kill when the COPY is completed, it should be rollbacked properly +SELECT citus.mitmproxy('conn.onCommandComplete(command="COPY").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: failed to COPY to shard xxxxx on localhost:xxxxx +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- cancel as soon as the coordinator sends COPY, table +-- should not be created and rollbacked properly +SELECT citus.mitmproxy('conn.onQuery(query="COPY").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: canceling statement due to user request +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- cancel when the COPY is completed, it should be rollbacked properly +SELECT citus.mitmproxy('conn.onCommandComplete(command="COPY").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: canceling statement due to user request +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- kill as soon as the coordinator sends ROLLBACK +-- the command can be rollbacked +SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SELECT create_distributed_table('test_table', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +ROLLBACK; +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- cancel as soon as the coordinator sends ROLLBACK +-- should be rollbacked +SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SELECT create_distributed_table('test_table', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- kill as soon as the coordinator sends COMMIT +-- the command can be COMMITed +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SELECT create_distributed_table('test_table', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +COMMIT; +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 4 +(1 row) + +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 2 +(1 row) + +DROP TABLE test_table; +CREATE TABLE test_table(id int, value_1 int); +INSERT INTO test_table VALUES (1,1),(2,2),(3,3),(4,4); +-- cancel as soon as the coordinator sends COMMIT +-- should be COMMITed +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SELECT create_distributed_table('test_table', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +COMMIT; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 4 +(1 row) + +DROP TABLE test_table; +CREATE TABLE test_table(id int, value_1 int); +INSERT INTO test_table VALUES (1,1),(2,2),(3,3),(4,4); +CREATE TABLE colocated_table(id int, value_1 int); +SELECT create_distributed_table('colocated_table', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- Now, cancel the connection just after transaction is opened on +-- workers. Note that, when there is a colocated table, interrupts +-- are not held and we can cancel in the middle of the execution +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id', colocate_with => 'colocated_table'); +ERROR: canceling statement due to user request +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Now, kill the connection just after transaction is opened on +-- workers. Note that, when there is a colocated table, interrupts +-- are not held and we can cancel in the middle of the execution +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id', colocate_with => 'colocated_table'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Now, cancel the connection just after the COPY started to +-- workers. Note that, when there is a colocated table, interrupts +-- are not held and we can cancel in the middle of the execution +SELECT citus.mitmproxy('conn.onQuery(query="^COPY").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id', colocate_with => 'colocated_table'); +ERROR: canceling statement due to user request +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Now, kill the connection just after the COPY started to +-- workers. Note that, when there is a colocated table, interrupts +-- are not held and we can cancel in the middle of the execution +SELECT citus.mitmproxy('conn.onQuery(query="^COPY").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id', colocate_with => 'colocated_table'); +ERROR: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.tables WHERE table_schema = 'create_distributed_table_non_empty_failure' and table_name LIKE 'test_table%'$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,0) +(2 rows) + +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +DROP SCHEMA create_distributed_table_non_empty_failure CASCADE; diff --git a/src/test/regress/expected/failure_create_reference_table_1.out b/src/test/regress/expected/failure_create_reference_table_1.out new file mode 100644 index 000000000..901235435 --- /dev/null +++ b/src/test/regress/expected/failure_create_reference_table_1.out @@ -0,0 +1,266 @@ +-- +-- Failure tests for creating reference table +-- +CREATE SCHEMA failure_reference_table; +SET search_path TO 'failure_reference_table'; +SET citus.next_shard_id TO 10000000; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- this is merely used to get the schema creation propagated. Without there are failures +-- not related to reference tables but schema creation due to dependency creation on workers +CREATE TYPE schema_proc AS (a int); +DROP TYPE schema_proc; +CREATE TABLE ref_table(id int); +INSERT INTO ref_table VALUES(1),(2),(3); +-- Kill on sending first query to worker node, should error +-- out and not create any placement +SELECT citus.mitmproxy('conn.onQuery().kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_reference_table('ref_table'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT count(*) FROM pg_dist_shard_placement; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Kill after creating transaction on worker node +SELECT citus.mitmproxy('conn.onCommandComplete(command="BEGIN").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_reference_table('ref_table'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT count(*) FROM pg_dist_shard_placement; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Cancel after creating transaction on worker node +SELECT citus.mitmproxy('conn.onCommandComplete(command="BEGIN").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_reference_table('ref_table'); +ERROR: canceling statement due to user request +SELECT count(*) FROM pg_dist_shard_placement; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Kill after copying data to worker node +SELECT citus.mitmproxy('conn.onCommandComplete(command="SELECT 1").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_reference_table('ref_table'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT count(*) FROM pg_dist_shard_placement; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Cancel after copying data to worker node +SELECT citus.mitmproxy('conn.onCommandComplete(command="SELECT 1").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_reference_table('ref_table'); +ERROR: canceling statement due to user request +SELECT count(*) FROM pg_dist_shard_placement; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Kill after copying data to worker node +SELECT citus.mitmproxy('conn.onCommandComplete(command="COPY 3").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_reference_table('ref_table'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$failure_reference_table.ref_table$$) +ERROR: failed to COPY to shard xxxxx on localhost:xxxxx +SELECT count(*) FROM pg_dist_shard_placement; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Cancel after copying data to worker node +SELECT citus.mitmproxy('conn.onCommandComplete(command="COPY 3").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_reference_table('ref_table'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$failure_reference_table.ref_table$$) +ERROR: canceling statement due to user request +SELECT count(*) FROM pg_dist_shard_placement; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- we don't want to see the prepared transaction numbers in the warnings +SET client_min_messages TO ERROR; +-- Kill after preparing transaction. Since we don't commit after preparing, we recover +-- prepared transaction afterwards. +SELECT citus.mitmproxy('conn.onCommandComplete(command="PREPARE TRANSACTION").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_reference_table('ref_table'); +ERROR: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT count(*) FROM pg_dist_shard_placement; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 1 +(1 row) + +-- Kill after commiting prepared, this should succeed +SELECT citus.mitmproxy('conn.onCommandComplete(command="COMMIT PREPARED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_reference_table('ref_table'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +SELECT shardid, nodeport, shardstate FROM pg_dist_shard_placement ORDER BY shardid, nodeport; + shardid | nodeport | shardstate +--------------------------------------------------------------------- + 10000008 | 9060 | 1 + 10000008 | 57637 | 1 +(2 rows) + +SET client_min_messages TO NOTICE; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +DROP TABLE ref_table; +DROP SCHEMA failure_reference_table; +SET citus.enable_ddl_propagation TO OFF; +CREATE SCHEMA failure_reference_table; +SET citus.enable_ddl_propagation TO ON; +CREATE TABLE ref_table(id int); +INSERT INTO ref_table VALUES(1),(2),(3); +-- Test in transaction +SELECT citus.mitmproxy('conn.onQuery().kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SELECT create_reference_table('ref_table'); +WARNING: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +ERROR: failure on connection marked as essential: localhost:xxxxx +COMMIT; +-- kill on ROLLBACK, should be rollbacked +SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SELECT create_reference_table('ref_table'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$failure_reference_table.ref_table$$) + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +ROLLBACK; +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT * FROM pg_dist_shard_placement ORDER BY shardid, nodeport; + shardid | shardstate | shardlength | nodename | nodeport | placementid +--------------------------------------------------------------------- +(0 rows) + +-- cancel when the coordinator send ROLLBACK, should be rollbacked. We ignore cancellations +-- during the ROLLBACK. +SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SELECT create_reference_table('ref_table'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$failure_reference_table.ref_table$$) + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +ROLLBACK; +SELECT * FROM pg_dist_shard_placement ORDER BY shardid, nodeport; + shardid | shardstate | shardlength | nodename | nodeport | placementid +--------------------------------------------------------------------- +(0 rows) + +DROP SCHEMA failure_reference_table CASCADE; +NOTICE: drop cascades to table ref_table +SET search_path TO default; diff --git a/src/test/regress/expected/failure_create_table_1.out b/src/test/regress/expected/failure_create_table_1.out new file mode 100644 index 000000000..28305e7be --- /dev/null +++ b/src/test/regress/expected/failure_create_table_1.out @@ -0,0 +1,724 @@ +-- +-- failure_create_table adds failure tests for creating table without data. +-- +SET citus.enable_ddl_propagation TO OFF; +CREATE SCHEMA failure_create_table; +SET citus.enable_ddl_propagation TO ON; +SET search_path TO 'failure_create_table'; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SET citus.shard_replication_factor TO 1; +SET citus.shard_count to 4; +CREATE TABLE test_table(id int, value_1 int); +-- Kill connection before sending query to the worker +SELECT citus.mitmproxy('conn.kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table','id'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.tables WHERE table_schema = 'failure_create_table' and table_name LIKE 'test_table%' ORDER BY 1$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,0) +(2 rows) + +-- kill as soon as the coordinator sends CREATE SCHEMA +-- Since schemas are created in separate transaction, schema will +-- be created only on the node which is not behind the proxy. +-- https://github.com/citusdata/citus/pull/1652 +SELECT citus.mitmproxy('conn.onQuery(query="^CREATE SCHEMA").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.schemata WHERE schema_name = 'failure_create_table'$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,1) +(2 rows) + +-- this is merely used to get the schema creation propagated. Without there are failures +-- not related to reference tables but schema creation due to dependency creation on workers +CREATE TYPE schema_proc AS (a int); +DROP TYPE schema_proc; +-- Now, kill the connection while opening transaction on workers. +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table','id'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.tables WHERE table_schema = 'failure_create_table' and table_name LIKE 'test_table%' ORDER BY 1$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,0) +(2 rows) + +-- Now, kill the connection after sending create table command with worker_apply_shard_ddl_command UDF +SELECT citus.mitmproxy('conn.onQuery(query="SELECT worker_apply_shard_ddl_command").after(1).kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table','id'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.tables WHERE table_schema = 'failure_create_table' and table_name LIKE 'test_table%' ORDER BY 1$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,0) +(2 rows) + +-- Kill the connection while creating a distributed table in sequential mode on sending create command +-- with worker_apply_shard_ddl_command UDF. +BEGIN; + SET LOCAL citus.multi_shard_modify_mode TO 'sequential'; + SELECT citus.mitmproxy('conn.onQuery(query="SELECT worker_apply_shard_ddl_command").after(1).kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + + SELECT create_distributed_table('test_table', 'id'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +COMMIT; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.tables WHERE table_schema = 'failure_create_table' and table_name LIKE 'test_table%' ORDER BY 1$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,0) +(2 rows) + +-- Now, cancel the connection while creating transaction +-- workers. Note that, cancel requests will be ignored during +-- shard creation. +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table','id'); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.tables WHERE table_schema = 'failure_create_table' and table_name LIKE 'test_table%' ORDER BY 1$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,0) +(2 rows) + +DROP TABLE test_table; +CREATE TABLE test_table(id int, value_1 int); +-- Kill and cancel the connection with colocate_with option while sending the create table command +CREATE TABLE temp_table(id int, value_1 int); +SELECT create_distributed_table('temp_table','id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table','id',colocate_with=>'temp_table'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard; + count +--------------------------------------------------------------------- + 4 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.tables WHERE table_schema = 'failure_create_table' and table_name LIKE 'test_table%' ORDER BY 1$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,0) +(2 rows) + +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table','id',colocate_with=>'temp_table'); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard; + count +--------------------------------------------------------------------- + 4 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.tables WHERE table_schema = 'failure_create_table' and table_name LIKE 'test_table%' ORDER BY 1$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,0) +(2 rows) + +-- Kill and cancel the connection after worker sends "PREPARE TRANSACTION" ack with colocate_with option +SELECT citus.mitmproxy('conn.onCommandComplete(command="PREPARE TRANSACTION").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table','id',colocate_with=>'temp_table'); +ERROR: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT count(*) FROM pg_dist_shard; + count +--------------------------------------------------------------------- + 4 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.tables WHERE table_schema = 'failure_create_table' and table_name LIKE 'test_table%' ORDER BY 1$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,0) +(2 rows) + +SELECT citus.mitmproxy('conn.onCommandComplete(command="PREPARE TRANSACTION").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table','id',colocate_with=>'temp_table'); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard; + count +--------------------------------------------------------------------- + 4 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.tables WHERE table_schema = 'failure_create_table' and table_name LIKE 'test_table%' ORDER BY 1$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,0) +(2 rows) + +-- drop tables and schema and recreate to start from a non-distributed schema again +DROP TABLE temp_table; +DROP TABLE test_table; +DROP SCHEMA failure_create_table; +CREATE SCHEMA failure_create_table; +CREATE TABLE test_table(id int, value_1 int); +-- Test inside transaction +-- Kill connection before sending query to the worker +SELECT citus.mitmproxy('conn.kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SELECT create_distributed_table('test_table','id'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.tables WHERE table_schema = 'failure_create_table' and table_name LIKE 'test_table%' ORDER BY 1$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,0) +(2 rows) + +-- this is merely used to get the schema creation propagated. Without there are failures +-- not related to reference tables but schema creation due to dependency creation on workers +CREATE TYPE schema_proc AS (a int); +DROP TYPE schema_proc; +-- Now, kill the connection while creating transaction on workers in transaction. +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SELECT create_distributed_table('test_table','id'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.tables WHERE table_schema = 'failure_create_table' and table_name LIKE 'test_table%' ORDER BY 1$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,0) +(2 rows) + +-- Now, cancel the connection while creating the transaction on +-- workers. Note that, cancel requests will be ignored during +-- shard creation again in transaction if we're not relying on the +-- executor. So, we'll have two output files +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SELECT create_distributed_table('test_table','id'); +ERROR: canceling statement due to user request +COMMIT; +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.tables WHERE table_schema = 'failure_create_table' and table_name LIKE 'test_table%' ORDER BY 1$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,0) +(2 rows) + +-- drop tables and schema and recreate to start from a non-distributed schema again +DROP TABLE test_table; +DROP SCHEMA failure_create_table; +CREATE SCHEMA failure_create_table; +CREATE TABLE test_table(id int, value_1 int); +-- Kill connection before sending query to the worker with 1pc. +SELECT citus.mitmproxy('conn.kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SELECT create_distributed_table('test_table','id'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.tables WHERE table_schema = 'failure_create_table' and table_name LIKE 'test_table%' ORDER BY 1$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,0) +(2 rows) + +-- Kill connection while sending create table command with 1pc. +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SELECT create_distributed_table('test_table','id'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.tables WHERE table_schema = 'failure_create_table' and table_name LIKE 'test_table%' ORDER BY 1$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,0) +(2 rows) + +-- this is merely used to get the schema creation propagated. Without there are failures +-- not related to reference tables but schema creation due to dependency creation on workers +CREATE TYPE schema_proc AS (a int); +DROP TYPE schema_proc; +-- Now, kill the connection while opening transactions on workers with 1pc. Transaction will be opened due to BEGIN. +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SELECT create_distributed_table('test_table','id'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.tables WHERE table_schema = 'failure_create_table' and table_name LIKE 'test_table%' ORDER BY 1$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,0) +(2 rows) + +-- Now, cancel the connection while creating transactions on +-- workers with 1pc. Note that, cancel requests will be ignored during +-- shard creation unless the executor is used. So, we'll have two output files +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SELECT create_distributed_table('test_table','id'); +ERROR: canceling statement due to user request +COMMIT; +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.tables WHERE table_schema = 'failure_create_table' and table_name LIKE 'test_table%' ORDER BY 1$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,0) +(2 rows) + +DROP TABLE test_table; +DROP SCHEMA failure_create_table; +CREATE SCHEMA failure_create_table; +-- this function is dropped in Citus10, added here for tests +CREATE OR REPLACE FUNCTION pg_catalog.master_create_distributed_table(table_name regclass, + distribution_column text, + distribution_method citus.distribution_type) + RETURNS void + LANGUAGE C STRICT + AS 'citus', $$master_create_distributed_table$$; +COMMENT ON FUNCTION pg_catalog.master_create_distributed_table(table_name regclass, + distribution_column text, + distribution_method citus.distribution_type) + IS 'define the table distribution functions'; +-- this function is dropped in Citus10, added here for tests +CREATE OR REPLACE FUNCTION pg_catalog.master_create_worker_shards(table_name text, shard_count integer, + replication_factor integer DEFAULT 2) + RETURNS void + AS 'citus', $$master_create_worker_shards$$ + LANGUAGE C STRICT; +-- Test master_create_worker_shards with 2pc +CREATE TABLE test_table_2(id int, value_1 int); +SELECT master_create_distributed_table('test_table_2', 'id', 'hash'); + master_create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- Kill connection before sending query to the worker +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT master_create_worker_shards('test_table_2', 4, 2); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT count(*) FROM pg_dist_shard; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.tables WHERE table_schema = 'failure_create_table' and table_name LIKE 'test_table%' ORDER BY 1$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,0) +(2 rows) + +-- Kill the connection after worker sends "PREPARE TRANSACTION" ack +SELECT citus.mitmproxy('conn.onCommandComplete(command="^PREPARE TRANSACTION").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT master_create_worker_shards('test_table_2', 4, 2); +ERROR: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.tables WHERE table_schema = 'failure_create_table' and table_name LIKE 'test_table%' ORDER BY 1$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,0) +(2 rows) + +-- Cancel the connection after sending prepare transaction in master_create_worker_shards +SELECT citus.mitmproxy('conn.onCommandComplete(command="PREPARE TRANSACTION").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT master_create_worker_shards('test_table_2', 4, 2); +ERROR: canceling statement due to user request +-- Show that there is no pending transaction +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.tables WHERE table_schema = 'failure_create_table' and table_name LIKE 'test_table%' ORDER BY 1$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,0) +(2 rows) + +DROP SCHEMA failure_create_table CASCADE; +NOTICE: drop cascades to table test_table_2 +SET search_path TO default; diff --git a/src/test/regress/expected/failure_ddl_1.out b/src/test/regress/expected/failure_ddl_1.out new file mode 100644 index 000000000..4a79021b9 --- /dev/null +++ b/src/test/regress/expected/failure_ddl_1.out @@ -0,0 +1,1060 @@ +-- +-- Test DDL command propagation failures +-- Different dimensions we're testing: +-- Replication factor, 1PC-2PC, sequential-parallel modes +-- +CREATE SCHEMA ddl_failure; +SET citus.force_max_query_parallelization TO ON; +SET search_path TO 'ddl_failure'; +-- do not cache any connections +SET citus.max_cached_conns_per_worker TO 0; +-- we don't want to see the prepared transaction numbers in the warnings +SET client_min_messages TO WARNING; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SET citus.next_shard_id TO 100800; +-- we'll start with replication factor 1, 2PC and parallel mode +SET citus.shard_count = 4; +SET citus.shard_replication_factor = 1; +CREATE TABLE test_table (key int, value int); +SELECT create_distributed_table('test_table', 'key'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- in the first test, kill just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +-- cancel just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: canceling statement due to user request +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +-- kill as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +ERROR: failure on connection marked as essential: localhost:xxxxx +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +-- cancel as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: canceling statement due to user request +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +-- kill as soon as the coordinator sends worker_apply_shard_ddl_command +SELECT citus.mitmproxy('conn.onQuery(query="worker_apply_shard_ddl_command").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +-- show that we've never commited the changes +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +-- cancel as soon as the coordinator sends worker_apply_shard_ddl_command +SELECT citus.mitmproxy('conn.onQuery(query="worker_apply_shard_ddl_command").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: canceling statement due to user request +-- show that we've never commited the changes +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +-- manually drop & re-create the table for the next tests +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +DROP TABLE test_table; +SET citus.next_shard_id TO 100800; +SET citus.shard_count = 4; +SET citus.shard_replication_factor = 1; +CREATE TABLE test_table (key int, value int); +SELECT create_distributed_table('test_table', 'key'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- cancel as soon as the coordinator sends COMMIT +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +WARNING: +CONTEXT: while executing command on localhost:xxxxx +WARNING: failed to commit transaction on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- interrupts are held during COMMIT/ROLLBACK, so the command +-- should have been applied without any issues since cancel is ignored +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,new_column,value} +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100800,t,"{key,new_column,value}") + (localhost,9060,100802,t,"{key,new_column,value}") + (localhost,57637,100801,t,"{key,new_column,value}") + (localhost,57637,100803,t,"{key,new_column,value}") +(4 rows) + +-- the following tests rely the column not exists, so drop manually +ALTER TABLE test_table DROP COLUMN new_column; +-- but now kill just after the worker sends response to +-- COMMIT command, so we'll have lots of warnings but the command +-- should have been committed both on the distributed table and the placements +SET client_min_messages TO WARNING; +SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: failed to commit transaction on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: failed to commit transaction on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SET client_min_messages TO ERROR; +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,new_column,value} +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100800,t,"{key,new_column,value}") + (localhost,9060,100802,t,"{key,new_column,value}") + (localhost,57637,100801,t,"{key,new_column,value}") + (localhost,57637,100803,t,"{key,new_column,value}") +(4 rows) + +-- now cancel just after the worker sends response to +-- but Postgres doesn't accepts interrupts during COMMIT and ROLLBACK +-- so should not cancel at all, so not an effective test but adding in +-- case Citus messes up this behaviour +SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table DROP COLUMN new_column; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- the remaining tests rely on table having new_column +ALTER TABLE test_table ADD COLUMN new_column INT; +-- finally, test failing on ROLLBACK with 1PC +-- fail just after the coordinator sends the ROLLBACK +-- so the command can be rollbacked +SELECT citus.mitmproxy('conn.onQuery(query="ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SET LOCAL client_min_messages TO WARNING; +ALTER TABLE test_table DROP COLUMN new_column; +ROLLBACK; +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +-- now cancel just after the worker sends response to +-- but Postgres doesn't accepts interrupts during COMMIT and ROLLBACK +-- so should not cancel at all, so not an effective test but adding in +-- case Citus messes up this behaviour +SELECT citus.mitmproxy('conn.onQuery(query="ROLLBACK").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +ALTER TABLE test_table DROP COLUMN new_column; +ROLLBACK; +-- but now kill just after the worker sends response to +-- ROLLBACK command, so we'll have lots of warnings but the command +-- should have been rollbacked both on the distributed table and the placements +SELECT citus.mitmproxy('conn.onCommandComplete(command="ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +ALTER TABLE test_table DROP COLUMN new_column; +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,new_column,value} +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100800,t,"{key,new_column,value}") + (localhost,9060,100802,t,"{key,new_column,value}") + (localhost,57637,100801,t,"{key,new_column,value}") + (localhost,57637,100803,t,"{key,new_column,value}") +(4 rows) + +-- in the first test, kill just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table DROP COLUMN new_column; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,new_column,value} +(1 row) + +-- cancel just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table DROP COLUMN new_column; +ERROR: canceling statement due to user request +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,new_column,value} +(1 row) + +-- kill as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table DROP COLUMN new_column; +ERROR: failure on connection marked as essential: localhost:xxxxx +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,new_column,value} +(1 row) + +-- cancel as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table DROP COLUMN new_column; +ERROR: canceling statement due to user request +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,new_column,value} +(1 row) + +-- kill as soon as the coordinator sends worker_apply_shard_ddl_command +SELECT citus.mitmproxy('conn.onQuery(query="worker_apply_shard_ddl_command").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table DROP COLUMN new_column; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,new_column,value} +(1 row) + +-- cancel as soon as the coordinator sends worker_apply_shard_ddl_command +SELECT citus.mitmproxy('conn.onQuery(query="worker_apply_shard_ddl_command").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table DROP COLUMN new_column; +ERROR: canceling statement due to user request +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,new_column,value} +(1 row) + +-- killing on PREPARE should be fine, everything should be rollbacked +SELECT citus.mitmproxy('conn.onCommandComplete(command="PREPARE TRANSACTION").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table DROP COLUMN new_column; +ERROR: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,new_column,value} +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100800,t,"{key,new_column,value}") + (localhost,9060,100802,t,"{key,new_column,value}") + (localhost,57637,100801,t,"{key,new_column,value}") + (localhost,57637,100803,t,"{key,new_column,value}") +(4 rows) + +-- we should be able to recover the transaction and +-- see that the command is rollbacked +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 2 +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100800,t,"{key,new_column,value}") + (localhost,9060,100802,t,"{key,new_column,value}") + (localhost,57637,100801,t,"{key,new_column,value}") + (localhost,57637,100803,t,"{key,new_column,value}") +(4 rows) + +-- cancelling on PREPARE should be fine, everything should be rollbacked +SELECT citus.mitmproxy('conn.onCommandComplete(command="PREPARE TRANSACTION").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table DROP COLUMN new_column; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,new_column,value} +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100800,t,"{key,new_column,value}") + (localhost,9060,100802,t,"{key,new_column,value}") + (localhost,57637,100801,t,"{key,new_column,value}") + (localhost,57637,100803,t,"{key,new_column,value}") +(4 rows) + +-- we should be able to recover the transaction and +-- see that the command is rollbacked +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100800,t,"{key,new_column,value}") + (localhost,9060,100802,t,"{key,new_column,value}") + (localhost,57637,100801,t,"{key,new_column,value}") + (localhost,57637,100803,t,"{key,new_column,value}") +(4 rows) + +-- killing on command complete of COMMIT PREPARE, we should see that the command succeeds +-- and all the workers committed +SELECT citus.mitmproxy('conn.onCommandComplete(command="COMMIT PREPARED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table DROP COLUMN new_column; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100800,t,"{key,value}") + (localhost,9060,100802,t,"{key,value}") + (localhost,57637,100801,t,"{key,value}") + (localhost,57637,100803,t,"{key,value}") +(4 rows) + +-- we shouldn't have any prepared transactions in the workers +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100800,t,"{key,value}") + (localhost,9060,100802,t,"{key,value}") + (localhost,57637,100801,t,"{key,value}") + (localhost,57637,100803,t,"{key,value}") +(4 rows) + +-- kill as soon as the coordinator sends COMMIT +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT PREPARED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- some of the placements would be missing the new column +-- since we've not commited the prepared transactions +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,new_column,value} +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100800,t,"{key,value}") + (localhost,9060,100802,t,"{key,value}") + (localhost,57637,100801,t,"{key,new_column,value}") + (localhost,57637,100803,t,"{key,new_column,value}") +(4 rows) + +-- we should be able to recover the transaction and +-- see that the command is committed +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 2 +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100800,t,"{key,new_column,value}") + (localhost,9060,100802,t,"{key,new_column,value}") + (localhost,57637,100801,t,"{key,new_column,value}") + (localhost,57637,100803,t,"{key,new_column,value}") +(4 rows) + +-- finally, test failing on ROLLBACK with 2PC +-- fail just after the coordinator sends the ROLLBACK +-- so the command can be rollbacked +SELECT citus.mitmproxy('conn.onQuery(query="ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +ALTER TABLE test_table DROP COLUMN new_column; +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- ROLLBACK should have failed on the distributed table and the placements +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,new_column,value} +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100800,t,"{key,new_column,value}") + (localhost,9060,100802,t,"{key,new_column,value}") + (localhost,57637,100801,t,"{key,new_column,value}") + (localhost,57637,100803,t,"{key,new_column,value}") +(4 rows) + +-- but now kill just after the worker sends response to +-- ROLLBACK command, so we'll have lots of warnings but the command +-- should have been rollbacked both on the distributed table and the placements +SELECT citus.mitmproxy('conn.onCommandComplete(command="ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +ALTER TABLE test_table DROP COLUMN new_column; +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- make sure that the transaction is rollbacked +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100800,t,"{key,new_column,value}") + (localhost,9060,100802,t,"{key,new_column,value}") + (localhost,57637,100801,t,"{key,new_column,value}") + (localhost,57637,100803,t,"{key,new_column,value}") +(4 rows) + +-- another set of tests with 2PC and replication factor = 2 +SET citus.shard_count = 4; +SET citus.shard_replication_factor = 2; +-- re-create the table with replication factor 2 +DROP TABLE test_table; +CREATE TABLE test_table (key int, value int); +SELECT create_distributed_table('test_table', 'key'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- in the first test, kill just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +-- cancel just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: canceling statement due to user request +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +-- kill as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: failure on connection marked as essential: localhost:xxxxx +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +-- cancel as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: canceling statement due to user request +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +-- kill as soon as the coordinator sends worker_apply_shard_ddl_command +SELECT citus.mitmproxy('conn.onQuery(query="worker_apply_shard_ddl_command").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +-- cancel as soon as the coordinator sends worker_apply_shard_ddl_command +SELECT citus.mitmproxy('conn.onQuery(query="worker_apply_shard_ddl_command").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: canceling statement due to user request +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +-- killing on PREPARE should be fine, everything should be rollbacked +SELECT citus.mitmproxy('conn.onCommandComplete(command="PREPARE TRANSACTION").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- we should be able to recover the transaction and +-- see that the command is rollbacked on all workers +-- note that in this case recover_prepared_transactions() +-- sends ROLLBACK PREPARED to the workers given that +-- the transaction has not been commited on any placement yet +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 4 +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100804,t,"{key,value}") + (localhost,9060,100805,t,"{key,value}") + (localhost,9060,100806,t,"{key,value}") + (localhost,9060,100807,t,"{key,value}") + (localhost,57637,100804,t,"{key,value}") + (localhost,57637,100805,t,"{key,value}") + (localhost,57637,100806,t,"{key,value}") + (localhost,57637,100807,t,"{key,value}") +(8 rows) + +-- killing on command complete of COMMIT PREPARE, we should see that the command succeeds +-- and all the workers committed +SELECT citus.mitmproxy('conn.onCommandComplete(command="COMMIT PREPARED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,new_column,value} +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100804,t,"{key,new_column,value}") + (localhost,9060,100805,t,"{key,new_column,value}") + (localhost,9060,100806,t,"{key,new_column,value}") + (localhost,9060,100807,t,"{key,new_column,value}") + (localhost,57637,100804,t,"{key,new_column,value}") + (localhost,57637,100805,t,"{key,new_column,value}") + (localhost,57637,100806,t,"{key,new_column,value}") + (localhost,57637,100807,t,"{key,new_column,value}") +(8 rows) + +-- we shouldn't have any prepared transactions in the workers +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100804,t,"{key,new_column,value}") + (localhost,9060,100805,t,"{key,new_column,value}") + (localhost,9060,100806,t,"{key,new_column,value}") + (localhost,9060,100807,t,"{key,new_column,value}") + (localhost,57637,100804,t,"{key,new_column,value}") + (localhost,57637,100805,t,"{key,new_column,value}") + (localhost,57637,100806,t,"{key,new_column,value}") + (localhost,57637,100807,t,"{key,new_column,value}") +(8 rows) + +-- kill as soon as the coordinator sends COMMIT +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT PREPARED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table DROP COLUMN new_column; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- some of the placements would be missing the new column +-- since we've not commited the prepared transactions +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100804,t,"{key,new_column,value}") + (localhost,9060,100805,t,"{key,new_column,value}") + (localhost,9060,100806,t,"{key,new_column,value}") + (localhost,9060,100807,t,"{key,new_column,value}") + (localhost,57637,100804,t,"{key,value}") + (localhost,57637,100805,t,"{key,value}") + (localhost,57637,100806,t,"{key,value}") + (localhost,57637,100807,t,"{key,value}") +(8 rows) + +-- we should be able to recover the transaction and +-- see that the command is committed +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 4 +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100804,t,"{key,value}") + (localhost,9060,100805,t,"{key,value}") + (localhost,9060,100806,t,"{key,value}") + (localhost,9060,100807,t,"{key,value}") + (localhost,57637,100804,t,"{key,value}") + (localhost,57637,100805,t,"{key,value}") + (localhost,57637,100806,t,"{key,value}") + (localhost,57637,100807,t,"{key,value}") +(8 rows) + +-- finally, test failing on ROLLBACK with 2PC +-- fail just after the coordinator sends the ROLLBACK +-- so the command can be rollbacked +SELECT citus.mitmproxy('conn.onQuery(query="ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +ALTER TABLE test_table ADD COLUMN new_column INT; +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- ROLLBACK should have failed on the distributed table and the placements +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100804,t,"{key,value}") + (localhost,9060,100805,t,"{key,value}") + (localhost,9060,100806,t,"{key,value}") + (localhost,9060,100807,t,"{key,value}") + (localhost,57637,100804,t,"{key,value}") + (localhost,57637,100805,t,"{key,value}") + (localhost,57637,100806,t,"{key,value}") + (localhost,57637,100807,t,"{key,value}") +(8 rows) + +-- but now kill just after the worker sends response to +-- ROLLBACK command, so we'll have lots of warnings but the command +-- should have been rollbacked both on the distributed table and the placements +SELECT citus.mitmproxy('conn.onCommandComplete(command="ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +ALTER TABLE test_table ADD COLUMN new_column INT; +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- make sure that the transaction is rollbacked +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100804,t,"{key,value}") + (localhost,9060,100805,t,"{key,value}") + (localhost,9060,100806,t,"{key,value}") + (localhost,9060,100807,t,"{key,value}") + (localhost,57637,100804,t,"{key,value}") + (localhost,57637,100805,t,"{key,value}") + (localhost,57637,100806,t,"{key,value}") + (localhost,57637,100807,t,"{key,value}") +(8 rows) + +-- now do some tests with sequential mode +SET citus.multi_shard_modify_mode TO 'sequential'; +-- kill as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: failure on connection marked as essential: localhost:xxxxx +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +-- cancel as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: canceling statement due to user request +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +-- kill as soon as the coordinator sends worker_apply_shard_ddl_command +SELECT citus.mitmproxy('conn.onQuery(query="worker_apply_shard_ddl_command").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +-- kill as soon as the coordinator after it sends worker_apply_shard_ddl_command 2nd time +SELECT citus.mitmproxy('conn.onQuery(query="worker_apply_shard_ddl_command").after(2).kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +-- cancel as soon as the coordinator after it sends worker_apply_shard_ddl_command 2nd time +SELECT citus.mitmproxy('conn.onQuery(query="worker_apply_shard_ddl_command").after(2).cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: canceling statement due to user request +SET search_path TO 'public'; +DROP SCHEMA ddl_failure CASCADE; diff --git a/src/test/regress/expected/failure_multi_dml_1.out b/src/test/regress/expected/failure_multi_dml_1.out new file mode 100644 index 000000000..93974cc38 --- /dev/null +++ b/src/test/regress/expected/failure_multi_dml_1.out @@ -0,0 +1,507 @@ +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SET citus.shard_count = 2; +SET citus.shard_replication_factor = 1; -- one shard per worker +SET citus.next_shard_id TO 103400; +ALTER SEQUENCE pg_catalog.pg_dist_placement_placementid_seq RESTART 100; +CREATE TABLE dml_test (id integer, name text); +SELECT create_distributed_table('dml_test', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +COPY dml_test FROM STDIN WITH CSV; +SELECT citus.clear_network_traffic(); + clear_network_traffic +--------------------------------------------------------------------- + +(1 row) + +---- test multiple statements spanning multiple shards, +---- at each significant point. These transactions are 2pc +-- fail at DELETE +SELECT citus.mitmproxy('conn.onQuery(query="^DELETE").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +DELETE FROM dml_test WHERE id = 1; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +DELETE FROM dml_test WHERE id = 2; +ERROR: current transaction is aborted, commands ignored until end of transaction block +INSERT INTO dml_test VALUES (5, 'Epsilon'); +ERROR: current transaction is aborted, commands ignored until end of transaction block +UPDATE dml_test SET name = 'alpha' WHERE id = 1; +ERROR: current transaction is aborted, commands ignored until end of transaction block +UPDATE dml_test SET name = 'gamma' WHERE id = 3; +ERROR: current transaction is aborted, commands ignored until end of transaction block +COMMIT; +--- shouldn't see any changes performed in failed transaction +SELECT * FROM dml_test ORDER BY id ASC; + id | name +--------------------------------------------------------------------- + 1 | Alpha + 2 | Beta + 3 | Gamma + 4 | Delta +(4 rows) + +-- cancel at DELETE +SELECT citus.mitmproxy('conn.onQuery(query="^DELETE").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +DELETE FROM dml_test WHERE id = 1; +ERROR: canceling statement due to user request +DELETE FROM dml_test WHERE id = 2; +ERROR: current transaction is aborted, commands ignored until end of transaction block +INSERT INTO dml_test VALUES (5, 'Epsilon'); +ERROR: current transaction is aborted, commands ignored until end of transaction block +UPDATE dml_test SET name = 'alpha' WHERE id = 1; +ERROR: current transaction is aborted, commands ignored until end of transaction block +UPDATE dml_test SET name = 'gamma' WHERE id = 3; +ERROR: current transaction is aborted, commands ignored until end of transaction block +COMMIT; +--- shouldn't see any changes performed in failed transaction +SELECT * FROM dml_test ORDER BY id ASC; + id | name +--------------------------------------------------------------------- + 1 | Alpha + 2 | Beta + 3 | Gamma + 4 | Delta +(4 rows) + +-- fail at INSERT +SELECT citus.mitmproxy('conn.onQuery(query="^INSERT").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +DELETE FROM dml_test WHERE id = 1; +DELETE FROM dml_test WHERE id = 2; +INSERT INTO dml_test VALUES (5, 'Epsilon'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +UPDATE dml_test SET name = 'alpha' WHERE id = 1; +ERROR: current transaction is aborted, commands ignored until end of transaction block +UPDATE dml_test SET name = 'gamma' WHERE id = 3; +ERROR: current transaction is aborted, commands ignored until end of transaction block +COMMIT; +--- shouldn't see any changes before failed INSERT +SELECT * FROM dml_test ORDER BY id ASC; + id | name +--------------------------------------------------------------------- + 1 | Alpha + 2 | Beta + 3 | Gamma + 4 | Delta +(4 rows) + +-- cancel at INSERT +SELECT citus.mitmproxy('conn.onQuery(query="^INSERT").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +DELETE FROM dml_test WHERE id = 1; +DELETE FROM dml_test WHERE id = 2; +INSERT INTO dml_test VALUES (5, 'Epsilon'); +ERROR: canceling statement due to user request +UPDATE dml_test SET name = 'alpha' WHERE id = 1; +ERROR: current transaction is aborted, commands ignored until end of transaction block +UPDATE dml_test SET name = 'gamma' WHERE id = 3; +ERROR: current transaction is aborted, commands ignored until end of transaction block +COMMIT; +--- shouldn't see any changes before failed INSERT +SELECT * FROM dml_test ORDER BY id ASC; + id | name +--------------------------------------------------------------------- + 1 | Alpha + 2 | Beta + 3 | Gamma + 4 | Delta +(4 rows) + +-- fail at UPDATE +SELECT citus.mitmproxy('conn.onQuery(query="^UPDATE").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +DELETE FROM dml_test WHERE id = 1; +DELETE FROM dml_test WHERE id = 2; +INSERT INTO dml_test VALUES (5, 'Epsilon'); +UPDATE dml_test SET name = 'alpha' WHERE id = 1; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +UPDATE dml_test SET name = 'gamma' WHERE id = 3; +ERROR: current transaction is aborted, commands ignored until end of transaction block +COMMIT; +--- shouldn't see any changes after failed UPDATE +SELECT * FROM dml_test ORDER BY id ASC; + id | name +--------------------------------------------------------------------- + 1 | Alpha + 2 | Beta + 3 | Gamma + 4 | Delta +(4 rows) + +-- cancel at UPDATE +SELECT citus.mitmproxy('conn.onQuery(query="^UPDATE").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +DELETE FROM dml_test WHERE id = 1; +DELETE FROM dml_test WHERE id = 2; +INSERT INTO dml_test VALUES (5, 'Epsilon'); +UPDATE dml_test SET name = 'alpha' WHERE id = 1; +ERROR: canceling statement due to user request +UPDATE dml_test SET name = 'gamma' WHERE id = 3; +ERROR: current transaction is aborted, commands ignored until end of transaction block +COMMIT; +--- shouldn't see any changes after failed UPDATE +SELECT * FROM dml_test ORDER BY id ASC; + id | name +--------------------------------------------------------------------- + 1 | Alpha + 2 | Beta + 3 | Gamma + 4 | Delta +(4 rows) + +-- fail at PREPARE TRANSACTION +SELECT citus.mitmproxy('conn.onQuery(query="^PREPARE TRANSACTION").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- this transaction block will be sent to the coordinator as a remote command to hide the +-- error message that is caused during commit. +-- we'll test for the txn side-effects to ensure it didn't run +SELECT master_run_on_worker( + ARRAY['localhost']::text[], + ARRAY[:master_port]::int[], + ARRAY[' +BEGIN; +DELETE FROM dml_test WHERE id = 1; +DELETE FROM dml_test WHERE id = 2; +INSERT INTO dml_test VALUES (5, ''Epsilon''); +UPDATE dml_test SET name = ''alpha'' WHERE id = 1; +UPDATE dml_test SET name = ''gamma'' WHERE id = 3; +COMMIT; + '], + false +); +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +while executing command on localhost:xxxxx + master_run_on_worker +--------------------------------------------------------------------- + (localhost,57636,t,BEGIN) +(1 row) + +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT shardid FROM pg_dist_shard_placement WHERE shardstate = 3; + shardid +--------------------------------------------------------------------- +(0 rows) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 0 +(1 row) + +-- shouldn't see any changes after failed PREPARE +SELECT * FROM dml_test ORDER BY id ASC; + id | name +--------------------------------------------------------------------- + 1 | Alpha + 2 | Beta + 3 | Gamma + 4 | Delta +(4 rows) + +-- cancel at PREPARE TRANSACTION +SELECT citus.mitmproxy('conn.onQuery(query="^PREPARE TRANSACTION").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- we'll test for the txn side-effects to ensure it didn't run +BEGIN; +DELETE FROM dml_test WHERE id = 1; +DELETE FROM dml_test WHERE id = 2; +INSERT INTO dml_test VALUES (5, 'Epsilon'); +UPDATE dml_test SET name = 'alpha' WHERE id = 1; +UPDATE dml_test SET name = 'gamma' WHERE id = 3; +COMMIT; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT shardid FROM pg_dist_shard_placement WHERE shardstate = 3; + shardid +--------------------------------------------------------------------- +(0 rows) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 0 +(1 row) + +-- shouldn't see any changes after failed PREPARE +SELECT * FROM dml_test ORDER BY id ASC; + id | name +--------------------------------------------------------------------- + 1 | Alpha + 2 | Beta + 3 | Gamma + 4 | Delta +(4 rows) + +-- fail at COMMIT +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- hide the error message (it has the PID)... +-- we'll test for the txn side-effects to ensure it didn't run +SET client_min_messages TO ERROR; +BEGIN; +DELETE FROM dml_test WHERE id = 1; +DELETE FROM dml_test WHERE id = 2; +INSERT INTO dml_test VALUES (5, 'Epsilon'); +UPDATE dml_test SET name = 'alpha' WHERE id = 1; +UPDATE dml_test SET name = 'gamma' WHERE id = 3; +COMMIT; +SET client_min_messages TO DEFAULT; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT shardid FROM pg_dist_shard_placement WHERE shardstate = 3; + shardid +--------------------------------------------------------------------- +(0 rows) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 1 +(1 row) + +-- should see changes, because of txn recovery +SELECT * FROM dml_test ORDER BY id ASC; + id | name +--------------------------------------------------------------------- + 3 | gamma + 4 | Delta + 5 | Epsilon +(3 rows) + +-- cancel at COMMITs are ignored by Postgres +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +DELETE FROM dml_test WHERE id = 1; +DELETE FROM dml_test WHERE id = 2; +INSERT INTO dml_test VALUES (5, 'Epsilon'); +UPDATE dml_test SET name = 'alpha' WHERE id = 1; +UPDATE dml_test SET name = 'gamma' WHERE id = 3; +COMMIT; +-- should see changes, because cancellation is ignored +SELECT * FROM dml_test ORDER BY id ASC; + id | name +--------------------------------------------------------------------- + 3 | gamma + 4 | Delta + 5 | Epsilon + 5 | Epsilon +(4 rows) + +-- drop table and recreate with different replication/sharding +DROP TABLE dml_test; +SET citus.shard_count = 1; +SET citus.shard_replication_factor = 2; -- two placements +CREATE TABLE dml_test (id integer, name text); +SELECT create_distributed_table('dml_test', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +COPY dml_test FROM STDIN WITH CSV; +---- test multiple statements against a single shard, but with two placements +-- fail at PREPARED COMMIT as we use 2PC +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +DELETE FROM dml_test WHERE id = 1; +DELETE FROM dml_test WHERE id = 2; +INSERT INTO dml_test VALUES (5, 'Epsilon'); +UPDATE dml_test SET name = 'alpha' WHERE id = 1; +UPDATE dml_test SET name = 'gamma' WHERE id = 3; +COMMIT; +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: failed to commit transaction on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +-- all changes should be committed because we injected +-- the failure on the COMMIT time. And, we should not +-- mark any placements as INVALID +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT shardid FROM pg_dist_shard_placement WHERE shardstate = 3; + shardid +--------------------------------------------------------------------- +(0 rows) + +SET citus.task_assignment_policy TO "round-robin"; +SELECT * FROM dml_test ORDER BY id ASC; + id | name +--------------------------------------------------------------------- + 3 | gamma + 4 | Delta + 5 | Epsilon +(3 rows) + +SELECT * FROM dml_test ORDER BY id ASC; + id | name +--------------------------------------------------------------------- + 3 | gamma + 4 | Delta + 5 | Epsilon +(3 rows) + +RESET citus.task_assignment_policy; +-- drop table and recreate as reference table +DROP TABLE dml_test; +SET citus.shard_count = 2; +SET citus.shard_replication_factor = 1; +CREATE TABLE dml_test (id integer, name text); +SELECT create_reference_table('dml_test'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +COPY dml_test FROM STDIN WITH CSV; +-- fail at COMMIT (by failing to PREPARE) +SELECT citus.mitmproxy('conn.onQuery(query="^PREPARE").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +DELETE FROM dml_test WHERE id = 1; +DELETE FROM dml_test WHERE id = 2; +INSERT INTO dml_test VALUES (5, 'Epsilon'); +UPDATE dml_test SET name = 'alpha' WHERE id = 1; +UPDATE dml_test SET name = 'gamma' WHERE id = 3; +COMMIT; +ERROR: connection not open +CONTEXT: while executing command on localhost:xxxxx +--- shouldn't see any changes after failed COMMIT +SELECT * FROM dml_test ORDER BY id ASC; + id | name +--------------------------------------------------------------------- + 1 | Alpha + 2 | Beta + 3 | Gamma + 4 | Delta +(4 rows) + +-- cancel at COMMIT (by cancelling on PREPARE) +SELECT citus.mitmproxy('conn.onQuery(query="^PREPARE").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +DELETE FROM dml_test WHERE id = 1; +DELETE FROM dml_test WHERE id = 2; +INSERT INTO dml_test VALUES (5, 'Epsilon'); +UPDATE dml_test SET name = 'alpha' WHERE id = 1; +UPDATE dml_test SET name = 'gamma' WHERE id = 3; +COMMIT; +ERROR: canceling statement due to user request +--- shouldn't see any changes after cancelled PREPARE +SELECT * FROM dml_test ORDER BY id ASC; + id | name +--------------------------------------------------------------------- + 1 | Alpha + 2 | Beta + 3 | Gamma + 4 | Delta +(4 rows) + +-- allow connection to allow DROP +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +DROP TABLE dml_test; diff --git a/src/test/regress/expected/failure_online_move_shard_placement_1.out b/src/test/regress/expected/failure_online_move_shard_placement_1.out new file mode 100644 index 000000000..52dd1e931 --- /dev/null +++ b/src/test/regress/expected/failure_online_move_shard_placement_1.out @@ -0,0 +1,330 @@ +-- +-- failure_online_move_shard_placement +-- +-- The tests cover moving shard placements using logical replication. +CREATE SCHEMA IF NOT EXISTS move_shard; +SET SEARCH_PATH = move_shard; +SET citus.shard_count TO 4; +SET citus.next_shard_id TO 100; +SET citus.shard_replication_factor TO 1; +SET citus.max_adaptive_executor_pool_size TO 1; +SELECT pg_backend_pid() as pid \gset +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE t(id int PRIMARY KEY, int_data int, data text); +CREATE INDEX index_failure ON t(id); +SELECT create_distributed_table('t', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE VIEW shards_in_workers AS +SELECT shardid, + (CASE WHEN nodeport = :worker_1_port THEN 'worker1' ELSE 'worker2' END) AS worker +FROM pg_dist_placement NATURAL JOIN pg_dist_node +WHERE shardstate != 4 +ORDER BY 1,2 ASC; +-- Insert some data +INSERT INTO t SELECT x, x+1, MD5(random()::text) FROM generate_series(1,100000) AS f(x); +-- Initial shard placements +SELECT * FROM shards_in_workers; + shardid | worker +--------------------------------------------------------------------- + 100 | worker2 + 101 | worker1 + 102 | worker2 + 103 | worker1 +(4 rows) + +-- failure on sanity checks +SELECT citus.mitmproxy('conn.onQuery(query="DROP TABLE IF EXISTS move_shard.t CASCADE").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port); +ERROR: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +-- cancellation on sanity checks +SELECT citus.mitmproxy('conn.onQuery(query="DROP TABLE IF EXISTS move_shard.t CASCADE").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +-- failure on move_shard table creation +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE move_shard.t").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port); +ERROR: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +-- cancellation on move_shard table creation +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE move_shard.t").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +-- failure on polling subscription state +SELECT citus.mitmproxy('conn.onQuery(query="^SELECT count\(\*\) FROM pg_subscription_rel").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port); +ERROR: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +-- cancellation on polling subscription state +SELECT citus.mitmproxy('conn.onQuery(query="^SELECT count\(\*\) FROM pg_subscription_rel").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +-- failure on getting subscriber state +SELECT citus.mitmproxy('conn.onQuery(query="^SELECT sum").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port); +ERROR: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +-- cancellation on getting subscriber state +SELECT citus.mitmproxy('conn.onQuery(query="^SELECT sum").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +-- failure on polling last write-ahead log location reported to origin WAL sender +SELECT citus.mitmproxy('conn.onQuery(query="^SELECT min\(latest_end_lsn").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port); +ERROR: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +-- cancellation on polling last write-ahead log location reported to origin WAL sender +SELECT citus.mitmproxy('conn.onQuery(query="^SELECT min\(latest_end_lsn").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +-- failure on dropping subscription +SELECT citus.mitmproxy('conn.onQuery(query="^DROP SUBSCRIPTION").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port); +WARNING: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- cancellation on dropping subscription +SELECT citus.mitmproxy('conn.onQuery(query="^DROP SUBSCRIPTION").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +-- failure on creating the primary key +SELECT citus.mitmproxy('conn.onQuery(query="t_pkey").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port); +ERROR: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +-- cancellation on creating the primary key +SELECT citus.mitmproxy('conn.onQuery(query="t_pkey").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +-- failure on create index +SELECT citus.mitmproxy('conn.matches(b"CREATE INDEX").killall()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port); +WARNING: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- lets create few more indexes and fail with both +-- parallel mode and sequential mode +CREATE INDEX index_failure_2 ON t(id); +CREATE INDEX index_failure_3 ON t(id); +CREATE INDEX index_failure_4 ON t(id); +CREATE INDEX index_failure_5 ON t(id); +-- failure on the third create index +ALTER SYSTEM SET citus.max_adaptive_executor_pool_size TO 1; +SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +SELECT citus.mitmproxy('conn.matches(b"CREATE INDEX").killall()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port); +WARNING: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- failure on parallel create index +ALTER SYSTEM RESET citus.max_adaptive_executor_pool_size; +SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +SELECT citus.mitmproxy('conn.matches(b"CREATE INDEX").killall()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port); +WARNING: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +-- Verify that the shard is not moved and the number of rows are still 100k +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM shards_in_workers; + shardid | worker +--------------------------------------------------------------------- + 100 | worker2 + 101 | worker1 + 102 | worker2 + 103 | worker1 +(4 rows) + +SELECT count(*) FROM t; + count +--------------------------------------------------------------------- + 100000 +(1 row) + +-- Verify that shard can be moved after a temporary failure +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port); + master_move_shard_placement +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM shards_in_workers; + shardid | worker +--------------------------------------------------------------------- + 100 | worker2 + 101 | worker2 + 102 | worker2 + 103 | worker1 +(4 rows) + +SELECT count(*) FROM t; + count +--------------------------------------------------------------------- + 100000 +(1 row) + +DROP SCHEMA move_shard CASCADE ; +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to table t +drop cascades to view shards_in_workers diff --git a/src/test/regress/expected/failure_savepoints_1.out b/src/test/regress/expected/failure_savepoints_1.out new file mode 100644 index 000000000..fb4c870bb --- /dev/null +++ b/src/test/regress/expected/failure_savepoints_1.out @@ -0,0 +1,384 @@ +-- We have two different output files for this failure test because the +-- failure behaviour of SAVEPOINT and RELEASE commands are different if +-- we use the executor. If we use it, these commands error out if any of +-- the placement commands fail. Otherwise, we might mark the placement +-- as invalid and continue with a WARNING. +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SET citus.shard_count = 2; +SET citus.shard_replication_factor = 1; -- one shard per worker +SET citus.next_shard_id TO 100950; +ALTER SEQUENCE pg_catalog.pg_dist_placement_placementid_seq RESTART 150; +CREATE TABLE artists ( + id bigint NOT NULL, + name text NOT NULL +); +SELECT create_distributed_table('artists', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- add some data +INSERT INTO artists VALUES (1, 'Pablo Picasso'); +INSERT INTO artists VALUES (2, 'Vincent van Gogh'); +INSERT INTO artists VALUES (3, 'Claude Monet'); +INSERT INTO artists VALUES (4, 'William Kurelek'); +-- simply fail at SAVEPOINT +SELECT citus.mitmproxy('conn.onQuery(query="^SAVEPOINT").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +INSERT INTO artists VALUES (5, 'Asher Lev'); +SAVEPOINT s1; +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +ERROR: connection not open +CONTEXT: while executing command on localhost:xxxxx +DELETE FROM artists WHERE id=4; +ERROR: current transaction is aborted, commands ignored until end of transaction block +RELEASE SAVEPOINT s1; +ERROR: current transaction is aborted, commands ignored until end of transaction block +COMMIT; +SELECT * FROM artists WHERE id IN (4, 5); + id | name +--------------------------------------------------------------------- + 4 | William Kurelek +(1 row) + +-- fail at RELEASE +SELECT citus.mitmproxy('conn.onQuery(query="^RELEASE").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +UPDATE artists SET name='a'; +SAVEPOINT s1; +DELETE FROM artists WHERE id=4; +RELEASE SAVEPOINT s1; +WARNING: AbortSubTransaction while in COMMIT state +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: savepoint "savepoint_2" does not exist +CONTEXT: while executing command on localhost:xxxxx +ERROR: connection not open +CONTEXT: while executing command on localhost:xxxxx +ROLLBACK; +SELECT * FROM artists WHERE id IN (4, 5); + id | name +--------------------------------------------------------------------- + 4 | William Kurelek +(1 row) + +-- fail at ROLLBACK +SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +INSERT INTO artists VALUES (5, 'Asher Lev'); +SAVEPOINT s1; +DELETE FROM artists WHERE id=4; +ROLLBACK TO SAVEPOINT s1; +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +COMMIT; +ERROR: failure on connection marked as essential: localhost:xxxxx +SELECT * FROM artists WHERE id IN (4, 5); + id | name +--------------------------------------------------------------------- + 4 | William Kurelek +(1 row) + +-- fail at second RELEASE +SELECT citus.mitmproxy('conn.onQuery(query="^RELEASE").after(1).kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SAVEPOINT s1; +DELETE FROM artists WHERE id=4; +RELEASE SAVEPOINT s1; +SAVEPOINT s2; +INSERT INTO artists VALUES (5, 'Jacob Kahn'); +RELEASE SAVEPOINT s2; +WARNING: AbortSubTransaction while in COMMIT state +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +ERROR: connection not open +CONTEXT: while executing command on localhost:xxxxx +COMMIT; +SELECT * FROM artists WHERE id IN (4, 5); + id | name +--------------------------------------------------------------------- + 4 | William Kurelek +(1 row) + +-- fail at second ROLLBACK +SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").after(1).kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SAVEPOINT s1; +UPDATE artists SET name='A' WHERE id=4; +ROLLBACK TO SAVEPOINT s1; +SAVEPOINT s2; +DELETE FROM artists WHERE id=5; +ROLLBACK TO SAVEPOINT s2; +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +COMMIT; +ERROR: failure on connection marked as essential: localhost:xxxxx +SELECT * FROM artists WHERE id IN (4, 5); + id | name +--------------------------------------------------------------------- + 4 | William Kurelek +(1 row) + +SELECT citus.mitmproxy('conn.onQuery(query="^RELEASE").after(1).kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- Release after rollback +BEGIN; +SAVEPOINT s1; +ROLLBACK TO s1; +RELEASE SAVEPOINT s1; +SAVEPOINT s2; +INSERT INTO artists VALUES (6, 'John J. Audubon'); +INSERT INTO artists VALUES (7, 'Emily Carr'); +ROLLBACK TO s2; +RELEASE SAVEPOINT s2; +COMMIT; +SELECT * FROM artists WHERE id=7; + id | name +--------------------------------------------------------------------- +(0 rows) + +SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- Recover from errors +\set VERBOSITY terse +BEGIN; +SAVEPOINT s1; +SAVEPOINT s2; +INSERT INTO artists VALUES (6, 'John J. Audubon'); +INSERT INTO artists VALUES (7, 'Emily Carr'); +INSERT INTO artists VALUES (7, 'Emily Carr'); +ROLLBACK TO SAVEPOINT s1; +WARNING: connection not open +WARNING: connection not open +WARNING: connection not open +WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open +WARNING: connection not open +WARNING: connection not open +COMMIT; +ERROR: failure on connection marked as essential: localhost:xxxxx +SELECT * FROM artists WHERE id=6; + id | name +--------------------------------------------------------------------- +(0 rows) + +-- replication factor > 1 +CREATE TABLE researchers ( + id bigint NOT NULL, + lab_id int NOT NULL, + name text NOT NULL +); +SET citus.shard_count = 1; +SET citus.shard_replication_factor = 2; -- single shard, on both workers +SELECT create_distributed_table('researchers', 'lab_id', 'hash'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- simply fail at SAVEPOINT +SELECT citus.mitmproxy('conn.onQuery(query="^SAVEPOINT").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +INSERT INTO researchers VALUES (7, 4, 'Jan Plaza'); +SAVEPOINT s1; +WARNING: connection not open +WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open +WARNING: connection not open +WARNING: connection not open +ERROR: connection not open +INSERT INTO researchers VALUES (8, 4, 'Alonzo Church'); +ERROR: current transaction is aborted, commands ignored until end of transaction block +ROLLBACK TO s1; +ERROR: savepoint "s1" does not exist +RELEASE SAVEPOINT s1; +ERROR: current transaction is aborted, commands ignored until end of transaction block +COMMIT; +-- should see correct results from healthy placement and one bad placement +SELECT * FROM researchers WHERE lab_id = 4; + id | lab_id | name +--------------------------------------------------------------------- +(0 rows) + +UPDATE pg_dist_shard_placement SET shardstate = 1 +WHERE shardstate = 3 AND shardid IN ( + SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'researchers'::regclass +) RETURNING placementid; + placementid +--------------------------------------------------------------------- +(0 rows) + +TRUNCATE researchers; +-- fail at rollback +SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +INSERT INTO researchers VALUES (7, 4, 'Jan Plaza'); +SAVEPOINT s1; +INSERT INTO researchers VALUES (8, 4, 'Alonzo Church'); +ROLLBACK TO s1; +WARNING: connection not open +WARNING: connection not open +RELEASE SAVEPOINT s1; +COMMIT; +ERROR: failure on connection marked as essential: localhost:xxxxx +-- should see correct results from healthy placement and one bad placement +SELECT * FROM researchers WHERE lab_id = 4; + id | lab_id | name +--------------------------------------------------------------------- +(0 rows) + +UPDATE pg_dist_shard_placement SET shardstate = 1 +WHERE shardstate = 3 AND shardid IN ( + SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'researchers'::regclass +) RETURNING placementid; + placementid +--------------------------------------------------------------------- +(0 rows) + +TRUNCATE researchers; +-- fail at release +SELECT citus.mitmproxy('conn.onQuery(query="^RELEASE").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +INSERT INTO researchers VALUES (7, 4, 'Jan Plaza'); +SAVEPOINT s1; +INSERT INTO researchers VALUES (8, 4, 'Alonzo Church'); +ROLLBACK TO s1; +RELEASE SAVEPOINT s1; +WARNING: AbortSubTransaction while in COMMIT state +WARNING: connection not open +WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open +WARNING: connection not open +WARNING: connection not open +WARNING: savepoint "savepoint_3" does not exist +ERROR: connection not open +COMMIT; +-- should see correct results from healthy placement and one bad placement +SELECT * FROM researchers WHERE lab_id = 4; + id | lab_id | name +--------------------------------------------------------------------- +(0 rows) + +UPDATE pg_dist_shard_placement SET shardstate = 1 +WHERE shardstate = 3 AND shardid IN ( + SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'researchers'::regclass +) RETURNING placementid; + placementid +--------------------------------------------------------------------- +(0 rows) + +TRUNCATE researchers; +-- test that we don't mark reference placements unhealthy +CREATE TABLE ref(a int, b int); +SELECT create_reference_table('ref'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SAVEPOINT start; +INSERT INTO ref VALUES (1001,2); +SELECT * FROM ref; + a | b +--------------------------------------------------------------------- + 1001 | 2 +(1 row) + +ROLLBACK TO SAVEPOINT start; +WARNING: connection not open +WARNING: connection not open +SELECT * FROM ref; +WARNING: connection not open +WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open +WARNING: connection not open +WARNING: connection not open +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +END; +-- clean up +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +DROP TABLE artists; +DROP TABLE researchers; +DROP TABLE ref; diff --git a/src/test/regress/expected/failure_truncate_1.out b/src/test/regress/expected/failure_truncate_1.out new file mode 100644 index 000000000..92947ee10 --- /dev/null +++ b/src/test/regress/expected/failure_truncate_1.out @@ -0,0 +1,1281 @@ +-- +-- Test TRUNCATE command failures +-- +CREATE SCHEMA truncate_failure; +SET search_path TO 'truncate_failure'; +SET citus.next_shard_id TO 120000; +-- we don't want to see the prepared transaction numbers in the warnings +SET client_min_messages TO ERROR; +-- do not cache any connections +SET citus.max_cached_conns_per_worker TO 0; +-- use a predictable number of connections per task +SET citus.force_max_query_parallelization TO on; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- we'll start with replication factor 1, 2PC and parallel mode +SET citus.shard_count = 4; +SET citus.shard_replication_factor = 1; +CREATE TABLE test_table (key int, value int); +SELECT create_distributed_table('test_table', 'key'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); +CREATE VIEW unhealthy_shard_count AS + SELECT count(*) + FROM pg_dist_shard_placement pdsp + JOIN + pg_dist_shard pds + ON pdsp.shardid=pds.shardid + WHERE logicalrelid='truncate_failure.test_table'::regclass AND shardstate != 1; +-- in the first test, kill just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- cancel just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- kill as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: failure on connection marked as essential: localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- cancel as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- kill as soon as the coordinator sends TRUNCATE TABLE command +SELECT citus.mitmproxy('conn.onQuery(query="TRUNCATE TABLE truncate_failure.test_table").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- cancel as soon as the coordinator sends TRUNCATE TABLE command +SELECT citus.mitmproxy('conn.onQuery(query="TRUNCATE TABLE truncate_failure.test_table").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- kill as soon as the coordinator sends COMMIT PREPARED +-- the transaction succeeds on one placement, and we need to +-- recover prepared statements to see the other placement as well +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 2 +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- refill the table +TRUNCATE test_table; +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); +-- cancel as soon as the coordinator sends COMMIT +-- interrupts are held during COMMIT/ROLLBACK, so the command +-- should have been applied without any issues since cancel is ignored +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- refill the table +TRUNCATE test_table; +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); +SET client_min_messages TO WARNING; +-- now kill just after the worker sends response to +-- COMMIT command, so we'll have lots of warnings but the command +-- should have been committed both on the distributed table and the placements +SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: failed to commit transaction on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: failed to commit transaction on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SET client_min_messages TO ERROR; +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); +-- now cancel just after the worker sends response to +-- but Postgres doesn't accept interrupts during COMMIT and ROLLBACK +-- so should not cancel at all, so not an effective test but adding in +-- case Citus messes up this behaviour +SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); +-- Let's test Truncate on reference tables with a FK from a hash distributed table +CREATE TABLE reference_table(i int UNIQUE); +INSERT INTO reference_table SELECT x FROM generate_series(1,20) as f(x); +SELECT create_reference_table('reference_table'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD CONSTRAINT foreign_key FOREIGN KEY (value) REFERENCES reference_table(i); +-- immediately kill when we see prepare transaction to see if the command +-- still cascaded to referencing table or failed successfuly +SELECT citus.mitmproxy('conn.onQuery(query="PREPARE TRANSACTION").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE reference_table CASCADE; +ERROR: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +SELECT count(*) FROM reference_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- immediately cancel when we see prepare transaction to see if the command +-- still cascaded to referencing table or failed successfuly +SELECT citus.mitmproxy('conn.onQuery(query="PREPARE TRANSACTION").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE reference_table CASCADE; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +SELECT count(*) FROM reference_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- immediately kill when we see cascading TRUNCATE on the hash table to see +-- rollbacked properly +SELECT citus.mitmproxy('conn.onQuery(query="^TRUNCATE TABLE").after(2).kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE reference_table CASCADE; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +SELECT count(*) FROM reference_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- immediately cancel when we see cascading TRUNCATE on the hash table to see +-- if the command still cascaded to referencing table or failed successfuly +SELECT citus.mitmproxy('conn.onQuery(query="^TRUNCATE TABLE").after(2).cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE reference_table CASCADE; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +SELECT count(*) FROM reference_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- immediately kill after we get prepare transaction complete +-- to see if the command still cascaded to referencing table or +-- failed successfuly +SELECT citus.mitmproxy('conn.onCommandComplete(command="PREPARE TRANSACTION").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE reference_table CASCADE; +ERROR: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- immediately cancel after we get prepare transaction complete +-- to see if the command still cascaded to referencing table or +-- failed successfuly +SELECT citus.mitmproxy('conn.onCommandComplete(command="PREPARE TRANSACTION").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE reference_table CASCADE; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- in the first test, kill just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- cancel just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- kill as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: failure on connection marked as essential: localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- cancel as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- kill as soon as the coordinator sends TRUNCATE TABLE command +SELECT citus.mitmproxy('conn.onQuery(query="^TRUNCATE TABLE truncate_failure.test_table").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- cancel as soon as the coordinator sends TRUNCATE TABLE command +SELECT citus.mitmproxy('conn.onQuery(query="^TRUNCATE TABLE truncate_failure.test_table").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- killing on PREPARE should be fine, everything should be rollbacked +SELECT citus.mitmproxy('conn.onCommandComplete(command="^PREPARE TRANSACTION").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- we should be able to revocer the transaction and +-- see that the command is rollbacked +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 2 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- cancelling on PREPARE should be fine, everything should be rollbacked +SELECT citus.mitmproxy('conn.onCommandComplete(command="^PREPARE TRANSACTION").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- we should be able to revocer the transaction and +-- see that the command is rollbacked +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- killing on command complete of COMMIT PREPARE, we should see that the command succeeds +-- and all the workers committed +SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT PREPARED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- we shouldn't have any prepared transactions in the workers +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); +-- kill as soon as the coordinator sends COMMIT +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT PREPARED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- Since we kill connections to one worker after commit arrives but the +-- other worker connections are healthy, we cannot commit on 1 worker +-- which has 2 active shard placements, but the other does. That's why +-- we expect to see 2 recovered prepared transactions. +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 2 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); +-- finally, test failing on ROLLBACK with 2CPC +-- fail just after the coordinator sends the ROLLBACK +-- so the command can be rollbacked +SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +TRUNCATE test_table; +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- but now kill just after the worker sends response to +-- ROLLBACK command, so we'll have lots of warnings but the command +-- should have been rollbacked both on the distributed table and the placements +SELECT citus.mitmproxy('conn.onCommandComplete(command="^ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +TRUNCATE test_table; +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- final set of tests with 2PC and replication factor = 2 +SET citus.shard_count = 4; +SET citus.shard_replication_factor = 2; +-- re-create the table with replication factor 2 +DROP TABLE test_table CASCADE; +CREATE TABLE test_table (key int, value int); +SELECT create_distributed_table('test_table', 'key'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); +CREATE VIEW unhealthy_shard_count AS + SELECT count(*) + FROM pg_dist_shard_placement pdsp + JOIN + pg_dist_shard pds + ON pdsp.shardid=pds.shardid + WHERE logicalrelid='truncate_failure.test_table'::regclass AND shardstate != 1; +-- in the first test, kill just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- cancel just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- kill as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: failure on connection marked as essential: localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- cancel as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- kill as soon as the coordinator sends TRUNCATE TABLE command +SELECT citus.mitmproxy('conn.onQuery(query="TRUNCATE TABLE truncate_failure.test_table").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- cancel as soon as the coordinator sends TRUNCATE TABLE command +SELECT citus.mitmproxy('conn.onQuery(query="TRUNCATE TABLE truncate_failure.test_table").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- killing on PREPARE should be fine, everything should be rollbacked +SELECT citus.mitmproxy('conn.onCommandComplete(command="PREPARE TRANSACTION").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- we should be able to revocer the transaction and +-- see that the command is rollbacked +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 4 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- killing on command complete of COMMIT PREPARE, we should see that the command succeeds +-- and all the workers committed +SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT PREPARED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- we shouldn't have any prepared transactions in the workers +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); +-- kill as soon as the coordinator sends COMMIT +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT PREPARED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Since we kill connections to one worker after commit arrives but the +-- other worker connections are healthy, we cannot commit on 1 worker +-- which has 4 active shard placements (2 shards, replication factor=2), +-- but the other does. That's why we expect to see 4 recovered prepared +-- transactions. +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 4 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); +-- finally, test failing on ROLLBACK with 2CPC +-- fail just after the coordinator sends the ROLLBACK +-- so the command can be rollbacked +SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +TRUNCATE test_table; +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- but now kill just after the worker sends response to +-- ROLLBACK command, so we'll have lots of warnings but the command +-- should have been rollbacked both on the distributed table and the placements +SELECT citus.mitmproxy('conn.onCommandComplete(command="^ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +TRUNCATE test_table; +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +DROP SCHEMA truncate_failure CASCADE; +SET search_path TO default; diff --git a/src/test/regress/expected/failure_vacuum_1.out b/src/test/regress/expected/failure_vacuum_1.out index c13096f6d..2e6776f27 100644 --- a/src/test/regress/expected/failure_vacuum_1.out +++ b/src/test/regress/expected/failure_vacuum_1.out @@ -10,7 +10,6 @@ SELECT citus.mitmproxy('conn.allow()'); SET citus.shard_count = 1; SET citus.shard_replication_factor = 2; -- one shard per worker -SET citus.multi_shard_commit_protocol TO '1pc'; CREATE TABLE vacuum_test (key int, value int); SELECT create_distributed_table('vacuum_test', 'key'); create_distributed_table @@ -31,8 +30,7 @@ SELECT citus.mitmproxy('conn.onQuery(query="^VACUUM").kill()'); (1 row) VACUUM vacuum_test; -ERROR: connection error: localhost:xxxxx -DETAIL: server closed the connection unexpectedly +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly This probably means the server terminated abnormally before or while processing the request. SELECT citus.mitmproxy('conn.onQuery(query="^ANALYZE").kill()'); @@ -42,8 +40,7 @@ SELECT citus.mitmproxy('conn.onQuery(query="^ANALYZE").kill()'); (1 row) ANALYZE vacuum_test; -WARNING: connection error: localhost:xxxxx -DETAIL: server closed the connection unexpectedly +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly This probably means the server terminated abnormally before or while processing the request. SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").kill()'); @@ -53,19 +50,31 @@ SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").kill()'); (1 row) ANALYZE vacuum_test; --- ANALYZE transactions being critical is an open question, see #2430 --- show that we marked as INVALID on COMMIT FAILURE -SELECT shardid, shardstate FROM pg_dist_shard_placement where shardstate != 1 AND -shardid in ( SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'vacuum_test'::regclass); - shardid | shardstate +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: failed to commit transaction on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy --------------------------------------------------------------------- - 12000000 | 3 + (1 row) -UPDATE pg_dist_shard_placement SET shardstate = 1 -WHERE shardid IN ( - SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'vacuum_test'::regclass -); +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 1 +(1 row) + +-- ANALYZE transactions being critical is an open question, see #2430 +-- show that we never mark as INVALID on COMMIT FAILURE +SELECT shardid, shardstate FROM pg_dist_shard_placement where shardstate != 1 AND +shardid in ( SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'vacuum_test'::regclass); + shardid | shardstate +--------------------------------------------------------------------- +(0 rows) + -- the same tests with cancel SELECT citus.mitmproxy('conn.onQuery(query="^VACUUM").cancel(' || pg_backend_pid() || ')'); mitmproxy @@ -111,7 +120,9 @@ SELECT citus.mitmproxy('conn.onQuery(query="^VACUUM.*other").kill()'); (1 row) VACUUM vacuum_test, other_vacuum_test; -ERROR: syntax error at or near "," +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. SELECT citus.mitmproxy('conn.onQuery(query="^VACUUM.*other").cancel(' || pg_backend_pid() || ')'); mitmproxy --------------------------------------------------------------------- @@ -119,7 +130,7 @@ SELECT citus.mitmproxy('conn.onQuery(query="^VACUUM.*other").cancel(' || pg_bac (1 row) VACUUM vacuum_test, other_vacuum_test; -ERROR: syntax error at or near "," +ERROR: canceling statement due to user request -- ==== Clean up, we're done here ==== SELECT citus.mitmproxy('conn.allow()'); mitmproxy