diff --git a/src/test/regress/expected/failure_copy_on_hash_0.out b/src/test/regress/expected/failure_copy_on_hash_0.out new file mode 100644 index 000000000..c95a4c7e6 --- /dev/null +++ b/src/test/regress/expected/failure_copy_on_hash_0.out @@ -0,0 +1,397 @@ +-- +-- Failure tests for COPY to hash distributed tables +-- +CREATE SCHEMA copy_distributed_table; +SET search_path TO 'copy_distributed_table'; +SET citus.next_shard_id TO 1710000; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- With one placement COPY should error out and placement should stay healthy. +SET citus.shard_replication_factor TO 1; +SET citus.shard_count to 4; +SET citus.max_cached_conns_per_worker to 0; +CREATE TABLE test_table(id int, value_1 int); +SELECT create_distributed_table('test_table','id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE VIEW unhealthy_shard_count AS + SELECT count(*) + FROM pg_dist_shard_placement pdsp + JOIN + pg_dist_shard pds + ON pdsp.shardid=pds.shardid + WHERE logicalrelid='copy_distributed_table.test_table'::regclass AND shardstate != 1; +-- Just kill the connection after sending the first query to the worker. +SELECT citus.mitmproxy('conn.kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\COPY test_table FROM stdin delimiter ','; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: COPY test_table, line 1: "1,2" +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Now, kill the connection while copying the data +SELECT citus.mitmproxy('conn.onCopyData().kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\COPY test_table FROM stdin delimiter ','; +ERROR: failed to COPY to shard xxxxx on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Similar to the above one, but now cancel the connection +-- instead of killing it. +SELECT citus.mitmproxy('conn.onCopyData().cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\COPY test_table FROM stdin delimiter ','; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- kill the connection after worker sends command complete message +SELECT citus.mitmproxy('conn.onCommandComplete(command="COPY 1").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\COPY test_table FROM stdin delimiter ','; +ERROR: failed to COPY to shard xxxxx on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- similar to above one, but cancel the connection on command complete +SELECT citus.mitmproxy('conn.onCommandComplete(command="COPY 1").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\COPY test_table FROM stdin delimiter ','; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- kill the connection on PREPARE TRANSACTION +SELECT citus.mitmproxy('conn.onQuery(query="PREPARE TRANSACTION").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\COPY test_table FROM stdin delimiter ','; +ERROR: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- we don't want to see the prepared transaction numbers in the warnings +SET client_min_messages TO ERROR; +-- kill on command complete on COMMIT PREPARE, command should succeed +SELECT citus.mitmproxy('conn.onCommandComplete(command="COMMIT PREPARED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\COPY test_table FROM stdin delimiter ','; +SET client_min_messages TO NOTICE; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 4 +(1 row) + +TRUNCATE TABLE test_table; +-- kill on ROLLBACK, command could be rollbacked +SELECT citus.mitmproxy('conn.onQuery(query="ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +\COPY test_table FROM stdin delimiter ','; +ROLLBACK; +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +DROP TABLE test_table CASCADE; +NOTICE: drop cascades to view unhealthy_shard_count +-- With two placement, should we error out or mark untouched shard placements as inactive? +SET citus.shard_replication_factor TO 2; +CREATE TABLE test_table_2(id int, value_1 int); +SELECT create_distributed_table('test_table_2','id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT citus.mitmproxy('conn.kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\COPY test_table_2 FROM stdin delimiter ','; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: COPY test_table_2, line 1: "1,2" +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT pds.logicalrelid, pdsd.shardid, pdsd.shardstate + FROM pg_dist_shard_placement as pdsd + INNER JOIN pg_dist_shard as pds + ON pdsd.shardid = pds.shardid + WHERE pds.logicalrelid = 'test_table_2'::regclass + ORDER BY shardid, nodeport; + logicalrelid | shardid | shardstate +--------------------------------------------------------------------- + test_table_2 | 1710004 | 1 + test_table_2 | 1710004 | 1 + test_table_2 | 1710005 | 1 + test_table_2 | 1710005 | 1 + test_table_2 | 1710006 | 1 + test_table_2 | 1710006 | 1 + test_table_2 | 1710007 | 1 + test_table_2 | 1710007 | 1 +(8 rows) + +-- Create test_table_2 again to have healthy one +DROP TABLE test_table_2; +CREATE TABLE test_table_2(id int, value_1 int); +SELECT create_distributed_table('test_table_2','id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- Kill the connection when we try to start the COPY +-- The query should abort +SELECT citus.mitmproxy('conn.onQuery(query="FROM STDIN WITH").killall()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\COPY test_table_2 FROM stdin delimiter ','; +ERROR: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +COPY test_table_2, line 1: "1,2" +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT pds.logicalrelid, pdsd.shardid, pdsd.shardstate + FROM pg_dist_shard_placement as pdsd + INNER JOIN pg_dist_shard as pds + ON pdsd.shardid = pds.shardid + WHERE pds.logicalrelid = 'test_table_2'::regclass + ORDER BY shardid, nodeport; + logicalrelid | shardid | shardstate +--------------------------------------------------------------------- + test_table_2 | 1710008 | 1 + test_table_2 | 1710008 | 1 + test_table_2 | 1710009 | 1 + test_table_2 | 1710009 | 1 + test_table_2 | 1710010 | 1 + test_table_2 | 1710010 | 1 + test_table_2 | 1710011 | 1 + test_table_2 | 1710011 | 1 +(8 rows) + +-- Create test_table_2 again to have healthy one +DROP TABLE test_table_2; +CREATE TABLE test_table_2(id int, value_1 int); +SELECT create_distributed_table('test_table_2','id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- When kill on copying data, it will be rollbacked and placements won't be labaled as invalid. +-- Note that now we sent data to shard xxxxx, yet it is not marked as invalid. +-- You can check the issue about this behaviour: https://github.com/citusdata/citus/issues/1933 +SELECT citus.mitmproxy('conn.onCopyData().kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\COPY test_table_2 FROM stdin delimiter ','; +ERROR: failed to COPY to shard xxxxx on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT pds.logicalrelid, pdsd.shardid, pdsd.shardstate + FROM pg_dist_shard_placement as pdsd + INNER JOIN pg_dist_shard as pds + ON pdsd.shardid = pds.shardid + WHERE pds.logicalrelid = 'test_table_2'::regclass + ORDER BY shardid, nodeport; + logicalrelid | shardid | shardstate +--------------------------------------------------------------------- + test_table_2 | 1710012 | 1 + test_table_2 | 1710012 | 1 + test_table_2 | 1710013 | 1 + test_table_2 | 1710013 | 1 + test_table_2 | 1710014 | 1 + test_table_2 | 1710014 | 1 + test_table_2 | 1710015 | 1 + test_table_2 | 1710015 | 1 +(8 rows) + +DROP SCHEMA copy_distributed_table CASCADE; +NOTICE: drop cascades to table test_table_2 +SET search_path TO default; \ No newline at end of file diff --git a/src/test/regress/expected/failure_copy_to_reference_0.out b/src/test/regress/expected/failure_copy_to_reference_0.out new file mode 100644 index 000000000..eb625e40c --- /dev/null +++ b/src/test/regress/expected/failure_copy_to_reference_0.out @@ -0,0 +1,458 @@ +-- +-- Failure tests for COPY to reference tables +-- +CREATE SCHEMA copy_reference_failure; +SET search_path TO 'copy_reference_failure'; +SET citus.next_shard_id TO 130000; +-- we don't want to see the prepared transaction numbers in the warnings +SET client_min_messages TO ERROR; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE test_table(id int, value_1 int); +SELECT create_reference_table('test_table'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +CREATE VIEW unhealthy_shard_count AS + SELECT count(*) + FROM pg_dist_shard_placement pdsp + JOIN + pg_dist_shard pds + ON pdsp.shardid=pds.shardid + WHERE logicalrelid='copy_reference_failure.test_table'::regclass AND shardstate != 1; +-- in the first test, kill just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\copy test_table FROM STDIN DELIMITER ',' +ERROR: failure on connection marked as essential: localhost:xxxxx +CONTEXT: COPY test_table, line 1: "1,2" +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- kill as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\copy test_table FROM STDIN DELIMITER ',' +ERROR: failure on connection marked as essential: localhost:xxxxx +CONTEXT: COPY test_table, line 1: "1,2" +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- cancel as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\copy test_table FROM STDIN DELIMITER ',' +ERROR: canceling statement due to user request +CONTEXT: COPY test_table, line 1: "1,2" +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- kill as soon as the coordinator sends COPY command +SELECT citus.mitmproxy('conn.onQuery(query="^COPY").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\copy test_table FROM STDIN DELIMITER ',' +ERROR: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +COPY test_table, line 1: "1,2" +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- cancel as soon as the coordinator sends COPY command +SELECT citus.mitmproxy('conn.onQuery(query="^COPY").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\copy test_table FROM STDIN DELIMITER ',' +ERROR: canceling statement due to user request +CONTEXT: COPY test_table, line 1: "1,2" +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- kill as soon as the worker sends CopyComplete +SELECT citus.mitmproxy('conn.onCommandComplete(command="^COPY 3").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\copy test_table FROM STDIN DELIMITER ',' +ERROR: failed to COPY to shard xxxxx on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- cancel as soon as the coordinator sends CopyData +SELECT citus.mitmproxy('conn.onCommandComplete(command="^COPY 3").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\copy test_table FROM STDIN DELIMITER ',' +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- kill the connection when we try to start the COPY +-- the query should abort +SELECT citus.mitmproxy('conn.onQuery(query="FROM STDIN WITH").killall()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\copy test_table FROM STDIN DELIMITER ',' +ERROR: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +COPY test_table, line 1: "1,2" +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- killing on PREPARE should be fine, everything should be rollbacked +SELECT citus.mitmproxy('conn.onQuery(query="^PREPARE TRANSACTION").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\copy test_table FROM STDIN DELIMITER ',' +ERROR: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- cancelling on PREPARE should be fine, everything should be rollbacked +SELECT citus.mitmproxy('conn.onQuery(query="^PREPARE TRANSACTION").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\copy test_table FROM STDIN DELIMITER ',' +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- killing on command complete of COMMIT PREPARE, we should see that the command succeeds +-- and all the workers committed +SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT PREPARED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\copy test_table FROM STDIN DELIMITER ',' +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- we shouldn't have any prepared transactions in the workers +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 3 +(1 row) + +TRUNCATE test_table; +-- kill as soon as the coordinator sends COMMIT +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +\copy test_table FROM STDIN DELIMITER ',' +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- Since we kill connections to one worker after commit arrives but the +-- other worker connections are healthy, we cannot commit on 1 worker +-- which has 1 active shard placements, but the other does. That's why +-- we expect to see 1 recovered prepared transactions. +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 3 +(1 row) + +TRUNCATE test_table; +-- finally, test failing on ROLLBACK just after the coordinator +-- sends the ROLLBACK so the command can be rollbacked +SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SET LOCAL client_min_messages TO WARNING; +\copy test_table FROM STDIN DELIMITER ',' +ROLLBACK; +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- but now kill just after the worker sends response to +-- ROLLBACK command, command should have been rollbacked +-- both on the distributed table and the placements +SELECT citus.mitmproxy('conn.onCommandComplete(command="^ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SET LOCAL client_min_messages TO WARNING; +\copy test_table FROM STDIN DELIMITER ',' +ROLLBACK; +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +DROP SCHEMA copy_reference_failure CASCADE; +SET search_path TO default; \ No newline at end of file diff --git a/src/test/regress/expected/failure_create_distributed_table_non_empty_0.out b/src/test/regress/expected/failure_create_distributed_table_non_empty_0.out new file mode 100644 index 000000000..e55f7a1a7 --- /dev/null +++ b/src/test/regress/expected/failure_create_distributed_table_non_empty_0.out @@ -0,0 +1,999 @@ +-- +-- Failure tests for COPY to reference tables +-- +-- We have to keep two copies of this failure test +-- because if the shards are created via the executor +-- cancellations are processed, otherwise they are not +CREATE SCHEMA create_distributed_table_non_empty_failure; +SET search_path TO 'create_distributed_table_non_empty_failure'; +SET citus.next_shard_id TO 11000000; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- we'll start with replication factor 1 and 2pc +SET citus.shard_replication_factor TO 1; +SET citus.shard_count to 4; +CREATE TABLE test_table(id int, value_1 int); +INSERT INTO test_table VALUES (1,1),(2,2),(3,3),(4,4); +-- in the first test, kill the first connection we sent from the coordinator +SELECT citus.mitmproxy('conn.kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- in the first test, cancel the first connection we sent from the coordinator +SELECT citus.mitmproxy('conn.cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: canceling statement due to user request +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- kill as soon as the coordinator sends CREATE SCHEMA +SELECT citus.mitmproxy('conn.onQuery(query="^CREATE SCHEMA").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.schemata WHERE schema_name = 'create_distributed_table_non_empty_failure'$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,1) +(2 rows) + +-- cancel as soon as the coordinator sends CREATE SCHEMA +-- Note: Schema should be created in workers because Citus +-- does not check for interrupts until GetRemoteCommandResult is called. +-- Since we already sent the command at this stage, the schemas get created in workers +SELECT citus.mitmproxy('conn.onQuery(query="^CREATE SCHEMA").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: canceling statement due to user request +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.schemata WHERE schema_name = 'create_distributed_table_non_empty_failure'$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,1) +(2 rows) + +SELECT run_command_on_workers($$DROP SCHEMA IF EXISTS create_distributed_table_non_empty_failure$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,"DROP SCHEMA") + (localhost,57637,t,"DROP SCHEMA") +(2 rows) + +-- this triggers a schema creation which prevents further transactions around dependency propagation +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +CREATE TYPE schema_proc AS (a int); +DROP TYPE schema_proc; +-- kill as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.schemata WHERE schema_name = 'create_distributed_table_non_empty_failure'$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,1) + (localhost,57637,t,1) +(2 rows) + +-- cancel as soon as the coordinator sends begin +-- if the shards are created via the executor, the table creation will fail +-- otherwise shards will be created because we ignore cancel requests during the shard creation +-- Interrupts are hold in CreateShardsWithRoundRobinPolicy +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.schemata WHERE schema_name = 'create_distributed_table_non_empty_failure'$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,1) + (localhost,57637,t,1) +(2 rows) + +DROP TABLE test_table ; +CREATE TABLE test_table(id int, value_1 int); +INSERT INTO test_table VALUES (1,1),(2,2),(3,3),(4,4); +-- kill as soon as the coordinator sends CREATE TABLE +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- kill as soon as the coordinator sends COPY +SELECT citus.mitmproxy('conn.onQuery(query="COPY").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- kill when the COPY is completed, it should be rollbacked properly +SELECT citus.mitmproxy('conn.onCommandComplete(command="COPY").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$create_distributed_table_non_empty_failure.test_table$$) +ERROR: failed to COPY to shard xxxxx on localhost:xxxxx +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- cancel as soon as the coordinator sends COPY, table +-- should not be created and rollbacked properly +SELECT citus.mitmproxy('conn.onQuery(query="COPY").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: canceling statement due to user request +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- cancel when the COPY is completed, it should be rollbacked properly +SELECT citus.mitmproxy('conn.onCommandComplete(command="COPY").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$create_distributed_table_non_empty_failure.test_table$$) +ERROR: canceling statement due to user request +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- immediately kill when we see prepare transaction to see if the command +-- successfully rollbacked the created shards +-- we don't want to see the prepared transaction numbers in the warnings +SET client_min_messages TO ERROR; +SELECT citus.mitmproxy('conn.onQuery(query="PREPARE TRANSACTION").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- immediately cancel when we see prepare transaction to see if the command +-- successfully rollbacked the created shards +SELECT citus.mitmproxy('conn.onQuery(query="PREPARE TRANSACTION").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: canceling statement due to user request +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 1 +(1 row) + +-- kill as soon as the coordinator sends COMMIT +-- shards should be created and kill should not affect +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT PREPARED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 4 +(1 row) + +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 2 +(1 row) + +DROP TABLE test_table ; +-- since we want to interrupt the schema creation again we need to drop and recreate +-- for citus to redistribute the dependency +DROP SCHEMA create_distributed_table_non_empty_failure; +CREATE SCHEMA create_distributed_table_non_empty_failure; +CREATE TABLE test_table(id int, value_1 int); +INSERT INTO test_table VALUES (1,1),(2,2),(3,3),(4,4); +-- cancel as soon as the coordinator sends COMMIT +-- shards should be created and kill should not affect +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT PREPARED").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 4 +(1 row) + +DROP TABLE test_table ; +CREATE TABLE test_table(id int, value_1 int); +INSERT INTO test_table VALUES (1,1),(2,2),(3,3),(4,4); +-- kill as soon as the coordinator sends ROLLBACK +-- the command can be rollbacked +SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SELECT create_distributed_table('test_table', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +ROLLBACK; +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- cancel as soon as the coordinator sends ROLLBACK +-- should be rollbacked +SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SELECT create_distributed_table('test_table', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- We are done with pure create_distributed_table testing and now +-- testing for co-located tables. +CREATE TABLE colocated_table(id int, value_1 int); +SELECT create_distributed_table('colocated_table', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- Now, cancel the connection just after transaction is opened on +-- workers. Note that, when there is a colocated table, interrupts +-- are not held and we can cancel in the middle of the execution +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id', colocate_with => 'colocated_table'); +ERROR: canceling statement due to user request +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Now, kill the connection just after transaction is opened on +-- workers. Note that, when there is a colocated table, interrupts +-- are not held and we can cancel in the middle of the execution +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id', colocate_with => 'colocated_table'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.tables WHERE table_schema = 'create_distributed_table_non_empty_failure' and table_name LIKE 'test_table%'$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,0) +(2 rows) + +-- Now, cancel the connection just after the COPY started to +-- workers. Note that, when there is a colocated table, interrupts +-- are not held and we can cancel in the middle of the execution +SELECT citus.mitmproxy('conn.onQuery(query="^COPY").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id', colocate_with => 'colocated_table'); +ERROR: canceling statement due to user request +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Now, kill the connection just after the COPY started to +-- workers. Note that, when there is a colocated table, interrupts +-- are not held and we can cancel in the middle of the execution +SELECT citus.mitmproxy('conn.onQuery(query="^COPY").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id', colocate_with => 'colocated_table'); +ERROR: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.tables WHERE table_schema = 'create_distributed_table_non_empty_failure' and table_name LIKE 'test_table%'$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,0) +(2 rows) + +-- Now, cancel the connection when we issue CREATE TABLE on +-- workers. Note that, when there is a colocated table, interrupts +-- are not held and we can cancel in the middle of the execution +SELECT citus.mitmproxy('conn.onQuery(query="^SELECT worker_apply_shard_ddl_command").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id', colocate_with => 'colocated_table'); +ERROR: canceling statement due to user request +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Now, kill the connection when we issue CREATE TABLE on +-- workers. Note that, when there is a colocated table, interrupts +-- are not held and we can cancel in the middle of the execution +SELECT citus.mitmproxy('conn.onQuery(query="^SELECT worker_apply_shard_ddl_command").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id', colocate_with => 'colocated_table'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.tables WHERE table_schema = 'create_distributed_table_non_empty_failure' and table_name LIKE 'test_table%'$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,0) +(2 rows) + +-- Now run the same tests with 1pc +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +DROP TABLE colocated_table; +DROP TABLE test_table; +DROP SCHEMA create_distributed_table_non_empty_failure; +CREATE SCHEMA create_distributed_table_non_empty_failure; +CREATE TABLE test_table(id int, value_1 int); +INSERT INTO test_table VALUES (1,1),(2,2),(3,3),(4,4); +SELECT citus.mitmproxy('conn.kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.tables WHERE table_schema = 'create_distributed_table_non_empty_failure' and table_name LIKE 'test_table%'$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,0) +(2 rows) + +-- in the first test, cancel the first connection we sent from the coordinator +SELECT citus.mitmproxy('conn.cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.tables WHERE table_schema = 'create_distributed_table_non_empty_failure' and table_name LIKE 'test_table%'$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,0) +(2 rows) + +-- this triggers a schema creation which prevents further transactions around dependency propagation +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +CREATE TYPE schema_proc AS (a int); +DROP TYPE schema_proc; +-- kill as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.schemata WHERE schema_name = 'create_distributed_table_non_empty_failure'$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,1) + (localhost,57637,t,1) +(2 rows) + +-- cancel as soon as the coordinator sends begin +-- if the shards are created via the executor, the table creation will fail +-- otherwise shards will be created because we ignore cancel requests during the shard creation +-- Interrupts are hold in CreateShardsWithRoundRobinPolicy +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.schemata WHERE schema_name = 'create_distributed_table_non_empty_failure'$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,1) + (localhost,57637,t,1) +(2 rows) + +DROP TABLE test_table ; +CREATE TABLE test_table(id int, value_1 int); +INSERT INTO test_table VALUES (1,1),(2,2),(3,3),(4,4); +-- kill as soon as the coordinator sends CREATE TABLE +SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- kill as soon as the coordinator sends COPY +SELECT citus.mitmproxy('conn.onQuery(query="COPY").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- kill when the COPY is completed, it should be rollbacked properly +SELECT citus.mitmproxy('conn.onCommandComplete(command="COPY").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: failed to COPY to shard xxxxx on localhost:xxxxx +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- cancel as soon as the coordinator sends COPY, table +-- should not be created and rollbacked properly +SELECT citus.mitmproxy('conn.onQuery(query="COPY").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: canceling statement due to user request +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- cancel when the COPY is completed, it should be rollbacked properly +SELECT citus.mitmproxy('conn.onCommandComplete(command="COPY").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id'); +ERROR: canceling statement due to user request +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- kill as soon as the coordinator sends ROLLBACK +-- the command can be rollbacked +SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SELECT create_distributed_table('test_table', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +ROLLBACK; +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- cancel as soon as the coordinator sends ROLLBACK +-- should be rollbacked +SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SELECT create_distributed_table('test_table', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- kill as soon as the coordinator sends COMMIT +-- the command can be COMMITed +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SELECT create_distributed_table('test_table', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +COMMIT; +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 4 +(1 row) + +DROP TABLE test_table; +CREATE TABLE test_table(id int, value_1 int); +INSERT INTO test_table VALUES (1,1),(2,2),(3,3),(4,4); +-- cancel as soon as the coordinator sends COMMIT +-- should be COMMITed +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SELECT create_distributed_table('test_table', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +COMMIT; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 4 +(1 row) + +DROP TABLE test_table; +CREATE TABLE test_table(id int, value_1 int); +INSERT INTO test_table VALUES (1,1),(2,2),(3,3),(4,4); +CREATE TABLE colocated_table(id int, value_1 int); +SELECT create_distributed_table('colocated_table', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- Now, cancel the connection just after transaction is opened on +-- workers. Note that, when there is a colocated table, interrupts +-- are not held and we can cancel in the middle of the execution +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id', colocate_with => 'colocated_table'); +ERROR: canceling statement due to user request +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Now, kill the connection just after transaction is opened on +-- workers. Note that, when there is a colocated table, interrupts +-- are not held and we can cancel in the middle of the execution +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id', colocate_with => 'colocated_table'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Now, cancel the connection just after the COPY started to +-- workers. Note that, when there is a colocated table, interrupts +-- are not held and we can cancel in the middle of the execution +SELECT citus.mitmproxy('conn.onQuery(query="^COPY").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id', colocate_with => 'colocated_table'); +ERROR: canceling statement due to user request +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Now, kill the connection just after the COPY started to +-- workers. Note that, when there is a colocated table, interrupts +-- are not held and we can cancel in the middle of the execution +SELECT citus.mitmproxy('conn.onQuery(query="^COPY").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('test_table', 'id', colocate_with => 'colocated_table'); +ERROR: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +SELECT count(*) FROM pg_dist_shard WHERE logicalrelid='create_distributed_table_non_empty_failure.test_table'::regclass; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_workers($$SELECT count(*) FROM information_schema.tables WHERE table_schema = 'create_distributed_table_non_empty_failure' and table_name LIKE 'test_table%'$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,9060,t,0) + (localhost,57637,t,0) +(2 rows) + +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +DROP SCHEMA create_distributed_table_non_empty_failure CASCADE; \ No newline at end of file diff --git a/src/test/regress/expected/failure_create_reference_table_0.out b/src/test/regress/expected/failure_create_reference_table_0.out new file mode 100644 index 000000000..37c4914ba --- /dev/null +++ b/src/test/regress/expected/failure_create_reference_table_0.out @@ -0,0 +1,264 @@ +-- +-- Failure tests for creating reference table +-- +CREATE SCHEMA failure_reference_table; +SET search_path TO 'failure_reference_table'; +SET citus.next_shard_id TO 10000000; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- this is merely used to get the schema creation propagated. Without there are failures +-- not related to reference tables but schema creation due to dependency creation on workers +CREATE TYPE schema_proc AS (a int); +DROP TYPE schema_proc; +CREATE TABLE ref_table(id int); +INSERT INTO ref_table VALUES(1),(2),(3); +-- Kill on sending first query to worker node, should error +-- out and not create any placement +SELECT citus.mitmproxy('conn.onQuery().kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_reference_table('ref_table'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT count(*) FROM pg_dist_shard_placement; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Kill after creating transaction on worker node +SELECT citus.mitmproxy('conn.onCommandComplete(command="BEGIN").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_reference_table('ref_table'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT count(*) FROM pg_dist_shard_placement; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Cancel after creating transaction on worker node +SELECT citus.mitmproxy('conn.onCommandComplete(command="BEGIN").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_reference_table('ref_table'); +ERROR: canceling statement due to user request +SELECT count(*) FROM pg_dist_shard_placement; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Kill after copying data to worker node +SELECT citus.mitmproxy('conn.onCommandComplete(command="SELECT 1").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_reference_table('ref_table'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT count(*) FROM pg_dist_shard_placement; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Cancel after copying data to worker node +SELECT citus.mitmproxy('conn.onCommandComplete(command="SELECT 1").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_reference_table('ref_table'); +ERROR: canceling statement due to user request +SELECT count(*) FROM pg_dist_shard_placement; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Kill after copying data to worker node +SELECT citus.mitmproxy('conn.onCommandComplete(command="COPY 3").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_reference_table('ref_table'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$failure_reference_table.ref_table$$) +ERROR: failed to COPY to shard xxxxx on localhost:xxxxx +SELECT count(*) FROM pg_dist_shard_placement; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Cancel after copying data to worker node +SELECT citus.mitmproxy('conn.onCommandComplete(command="COPY 3").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_reference_table('ref_table'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$failure_reference_table.ref_table$$) +ERROR: canceling statement due to user request +SELECT count(*) FROM pg_dist_shard_placement; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- we don't want to see the prepared transaction numbers in the warnings +SET client_min_messages TO ERROR; +-- Kill after preparing transaction. Since we don't commit after preparing, we recover +-- prepared transaction afterwards. +SELECT citus.mitmproxy('conn.onCommandComplete(command="PREPARE TRANSACTION").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_reference_table('ref_table'); +ERROR: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT count(*) FROM pg_dist_shard_placement; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 1 +(1 row) + +-- Kill after commiting prepared, this should succeed +SELECT citus.mitmproxy('conn.onCommandComplete(command="COMMIT PREPARED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_reference_table('ref_table'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +SELECT shardid, nodeport, shardstate FROM pg_dist_shard_placement ORDER BY shardid, nodeport; + shardid | nodeport | shardstate +--------------------------------------------------------------------- + 10000008 | 9060 | 1 + 10000008 | 57637 | 1 +(2 rows) + +SET client_min_messages TO NOTICE; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +DROP TABLE ref_table; +DROP SCHEMA failure_reference_table; +CREATE SCHEMA failure_reference_table; +CREATE TABLE ref_table(id int); +INSERT INTO ref_table VALUES(1),(2),(3); +-- Test in transaction +SELECT citus.mitmproxy('conn.onQuery().kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SELECT create_reference_table('ref_table'); +WARNING: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +ERROR: failure on connection marked as essential: localhost:xxxxx +COMMIT; +-- kill on ROLLBACK, should be rollbacked +SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SELECT create_reference_table('ref_table'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$failure_reference_table.ref_table$$) + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +ROLLBACK; +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT * FROM pg_dist_shard_placement ORDER BY shardid, nodeport; + shardid | shardstate | shardlength | nodename | nodeport | placementid +--------------------------------------------------------------------- +(0 rows) + +-- cancel when the coordinator send ROLLBACK, should be rollbacked. We ignore cancellations +-- during the ROLLBACK. +SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SELECT create_reference_table('ref_table'); +NOTICE: Copying data from local table... +NOTICE: copying the data has completed +DETAIL: The local data in the table is no longer visible, but is still on disk. +HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$failure_reference_table.ref_table$$) + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +ROLLBACK; +SELECT * FROM pg_dist_shard_placement ORDER BY shardid, nodeport; + shardid | shardstate | shardlength | nodename | nodeport | placementid +--------------------------------------------------------------------- +(0 rows) + +DROP SCHEMA failure_reference_table CASCADE; +NOTICE: drop cascades to table ref_table +SET search_path TO default; diff --git a/src/test/regress/expected/failure_create_table_0.out b/src/test/regress/expected/failure_create_table_0.out new file mode 100644 index 000000000..e69de29bb diff --git a/src/test/regress/expected/failure_ddl_0.out b/src/test/regress/expected/failure_ddl_0.out new file mode 100644 index 000000000..081a43830 --- /dev/null +++ b/src/test/regress/expected/failure_ddl_0.out @@ -0,0 +1,1063 @@ +-- +-- Test DDL command propagation failures +-- Different dimensions we're testing: +-- Replication factor, 1PC-2PC, sequential-parallel modes +-- +CREATE SCHEMA ddl_failure; +SET citus.force_max_query_parallelization TO ON; +SET search_path TO 'ddl_failure'; +-- do not cache any connections +SET citus.max_cached_conns_per_worker TO 0; +-- we don't want to see the prepared transaction numbers in the warnings +SET client_min_messages TO ERROR; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SET citus.next_shard_id TO 100800; +-- we'll start with replication factor 1, 2PC and parallel mode +SET citus.shard_count = 4; +SET citus.shard_replication_factor = 1; +CREATE TABLE test_table (key int, value int); +SELECT create_distributed_table('test_table', 'key'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- in the first test, kill just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +-- cancel just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: canceling statement due to user request +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +-- kill as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +-- cancel as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: canceling statement due to user request +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +-- kill as soon as the coordinator sends worker_apply_shard_ddl_command +SELECT citus.mitmproxy('conn.onQuery(query="worker_apply_shard_ddl_command").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +-- show that we've never commited the changes +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +-- cancel as soon as the coordinator sends worker_apply_shard_ddl_command +SELECT citus.mitmproxy('conn.onQuery(query="worker_apply_shard_ddl_command").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: canceling statement due to user request +-- show that we've never commited the changes +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +-- manually drop & re-create the table for the next tests +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +DROP TABLE test_table; +SET citus.next_shard_id TO 100800; +SET citus.shard_count = 4; +SET citus.shard_replication_factor = 1; +CREATE TABLE test_table (key int, value int); +SELECT create_distributed_table('test_table', 'key'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- cancel as soon as the coordinator sends COMMIT +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- interrupts are held during COMMIT/ROLLBACK, so the command +-- should have been applied without any issues since cancel is ignored +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,new_column,value} +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100800,t,"{key,new_column,value}") + (localhost,9060,100802,t,"{key,new_column,value}") + (localhost,57637,100801,t,"{key,new_column,value}") + (localhost,57637,100803,t,"{key,new_column,value}") +(4 rows) + +-- the following tests rely the column not exists, so drop manually +ALTER TABLE test_table DROP COLUMN new_column; +-- but now kill just after the worker sends response to +-- COMMIT command, so we'll have lots of warnings but the command +-- should have been committed both on the distributed table and the placements +SET client_min_messages TO WARNING; +SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: failed to commit transaction on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: failed to commit transaction on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SET client_min_messages TO ERROR; +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,new_column,value} +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100800,t,"{key,new_column,value}") + (localhost,9060,100802,t,"{key,new_column,value}") + (localhost,57637,100801,t,"{key,new_column,value}") + (localhost,57637,100803,t,"{key,new_column,value}") +(4 rows) + +-- now cancel just after the worker sends response to +-- but Postgres doesn't accepts interrupts during COMMIT and ROLLBACK +-- so should not cancel at all, so not an effective test but adding in +-- case Citus messes up this behaviour +SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table DROP COLUMN new_column; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- the remaining tests rely on table having new_column +ALTER TABLE test_table ADD COLUMN new_column INT; +-- finally, test failing on ROLLBACK with 1PC +-- fail just after the coordinator sends the ROLLBACK +-- so the command can be rollbacked +SELECT citus.mitmproxy('conn.onQuery(query="ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SET LOCAL client_min_messages TO WARNING; +ALTER TABLE test_table DROP COLUMN new_column; +ROLLBACK; +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +-- now cancel just after the worker sends response to +-- but Postgres doesn't accepts interrupts during COMMIT and ROLLBACK +-- so should not cancel at all, so not an effective test but adding in +-- case Citus messes up this behaviour +SELECT citus.mitmproxy('conn.onQuery(query="ROLLBACK").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +ALTER TABLE test_table DROP COLUMN new_column; +ROLLBACK; +-- but now kill just after the worker sends response to +-- ROLLBACK command, so we'll have lots of warnings but the command +-- should have been rollbacked both on the distributed table and the placements +SELECT citus.mitmproxy('conn.onCommandComplete(command="ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +ALTER TABLE test_table DROP COLUMN new_column; +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,new_column,value} +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100800,t,"{key,new_column,value}") + (localhost,9060,100802,t,"{key,new_column,value}") + (localhost,57637,100801,t,"{key,new_column,value}") + (localhost,57637,100803,t,"{key,new_column,value}") +(4 rows) + +-- in the first test, kill just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table DROP COLUMN new_column; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,new_column,value} +(1 row) + +-- cancel just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table DROP COLUMN new_column; +ERROR: canceling statement due to user request +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,new_column,value} +(1 row) + +-- kill as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table DROP COLUMN new_column; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,new_column,value} +(1 row) + +-- cancel as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table DROP COLUMN new_column; +ERROR: canceling statement due to user request +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,new_column,value} +(1 row) + +-- kill as soon as the coordinator sends worker_apply_shard_ddl_command +SELECT citus.mitmproxy('conn.onQuery(query="worker_apply_shard_ddl_command").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table DROP COLUMN new_column; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,new_column,value} +(1 row) + +-- cancel as soon as the coordinator sends worker_apply_shard_ddl_command +SELECT citus.mitmproxy('conn.onQuery(query="worker_apply_shard_ddl_command").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table DROP COLUMN new_column; +ERROR: canceling statement due to user request +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,new_column,value} +(1 row) + +-- killing on PREPARE should be fine, everything should be rollbacked +SELECT citus.mitmproxy('conn.onCommandComplete(command="PREPARE TRANSACTION").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table DROP COLUMN new_column; +ERROR: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,new_column,value} +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100800,t,"{key,new_column,value}") + (localhost,9060,100802,t,"{key,new_column,value}") + (localhost,57637,100801,t,"{key,new_column,value}") + (localhost,57637,100803,t,"{key,new_column,value}") +(4 rows) + +-- we should be able to recover the transaction and +-- see that the command is rollbacked +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 2 +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100800,t,"{key,new_column,value}") + (localhost,9060,100802,t,"{key,new_column,value}") + (localhost,57637,100801,t,"{key,new_column,value}") + (localhost,57637,100803,t,"{key,new_column,value}") +(4 rows) + +-- cancelling on PREPARE should be fine, everything should be rollbacked +SELECT citus.mitmproxy('conn.onCommandComplete(command="PREPARE TRANSACTION").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table DROP COLUMN new_column; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,new_column,value} +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100800,t,"{key,new_column,value}") + (localhost,9060,100802,t,"{key,new_column,value}") + (localhost,57637,100801,t,"{key,new_column,value}") + (localhost,57637,100803,t,"{key,new_column,value}") +(4 rows) + +-- we should be able to recover the transaction and +-- see that the command is rollbacked +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100800,t,"{key,new_column,value}") + (localhost,9060,100802,t,"{key,new_column,value}") + (localhost,57637,100801,t,"{key,new_column,value}") + (localhost,57637,100803,t,"{key,new_column,value}") +(4 rows) + +-- killing on command complete of COMMIT PREPARE, we should see that the command succeeds +-- and all the workers committed +SELECT citus.mitmproxy('conn.onCommandComplete(command="COMMIT PREPARED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table DROP COLUMN new_column; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100800,t,"{key,value}") + (localhost,9060,100802,t,"{key,value}") + (localhost,57637,100801,t,"{key,value}") + (localhost,57637,100803,t,"{key,value}") +(4 rows) + +-- we shouldn't have any prepared transactions in the workers +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100800,t,"{key,value}") + (localhost,9060,100802,t,"{key,value}") + (localhost,57637,100801,t,"{key,value}") + (localhost,57637,100803,t,"{key,value}") +(4 rows) + +-- kill as soon as the coordinator sends COMMIT +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT PREPARED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- some of the placements would be missing the new column +-- since we've not commited the prepared transactions +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,new_column,value} +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100800,t,"{key,value}") + (localhost,9060,100802,t,"{key,value}") + (localhost,57637,100801,t,"{key,new_column,value}") + (localhost,57637,100803,t,"{key,new_column,value}") +(4 rows) + +-- we should be able to recover the transaction and +-- see that the command is committed +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 2 +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100800,t,"{key,new_column,value}") + (localhost,9060,100802,t,"{key,new_column,value}") + (localhost,57637,100801,t,"{key,new_column,value}") + (localhost,57637,100803,t,"{key,new_column,value}") +(4 rows) + +-- finally, test failing on ROLLBACK with 2PC +-- fail just after the coordinator sends the ROLLBACK +-- so the command can be rollbacked +SELECT citus.mitmproxy('conn.onQuery(query="ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +ALTER TABLE test_table DROP COLUMN new_column; +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- ROLLBACK should have failed on the distributed table and the placements +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,new_column,value} +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100800,t,"{key,new_column,value}") + (localhost,9060,100802,t,"{key,new_column,value}") + (localhost,57637,100801,t,"{key,new_column,value}") + (localhost,57637,100803,t,"{key,new_column,value}") +(4 rows) + +-- but now kill just after the worker sends response to +-- ROLLBACK command, so we'll have lots of warnings but the command +-- should have been rollbacked both on the distributed table and the placements +SELECT citus.mitmproxy('conn.onCommandComplete(command="ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +ALTER TABLE test_table DROP COLUMN new_column; +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- make sure that the transaction is rollbacked +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100800,t,"{key,new_column,value}") + (localhost,9060,100802,t,"{key,new_column,value}") + (localhost,57637,100801,t,"{key,new_column,value}") + (localhost,57637,100803,t,"{key,new_column,value}") +(4 rows) + +-- another set of tests with 2PC and replication factor = 2 +SET citus.shard_count = 4; +SET citus.shard_replication_factor = 2; +-- re-create the table with replication factor 2 +DROP TABLE test_table; +CREATE TABLE test_table (key int, value int); +SELECT create_distributed_table('test_table', 'key'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- in the first test, kill just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +-- cancel just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: canceling statement due to user request +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +-- kill as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +-- cancel as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: canceling statement due to user request +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +-- kill as soon as the coordinator sends worker_apply_shard_ddl_command +SELECT citus.mitmproxy('conn.onQuery(query="worker_apply_shard_ddl_command").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +-- cancel as soon as the coordinator sends worker_apply_shard_ddl_command +SELECT citus.mitmproxy('conn.onQuery(query="worker_apply_shard_ddl_command").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: canceling statement due to user request +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +-- killing on PREPARE should be fine, everything should be rollbacked +SELECT citus.mitmproxy('conn.onCommandComplete(command="PREPARE TRANSACTION").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- we should be able to recover the transaction and +-- see that the command is rollbacked on all workers +-- note that in this case recover_prepared_transactions() +-- sends ROLLBACK PREPARED to the workers given that +-- the transaction has not been commited on any placement yet +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 4 +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100804,t,"{key,value}") + (localhost,9060,100805,t,"{key,value}") + (localhost,9060,100806,t,"{key,value}") + (localhost,9060,100807,t,"{key,value}") + (localhost,57637,100804,t,"{key,value}") + (localhost,57637,100805,t,"{key,value}") + (localhost,57637,100806,t,"{key,value}") + (localhost,57637,100807,t,"{key,value}") +(8 rows) + +-- killing on command complete of COMMIT PREPARE, we should see that the command succeeds +-- and all the workers committed +SELECT citus.mitmproxy('conn.onCommandComplete(command="COMMIT PREPARED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,new_column,value} +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100804,t,"{key,new_column,value}") + (localhost,9060,100805,t,"{key,new_column,value}") + (localhost,9060,100806,t,"{key,new_column,value}") + (localhost,9060,100807,t,"{key,new_column,value}") + (localhost,57637,100804,t,"{key,new_column,value}") + (localhost,57637,100805,t,"{key,new_column,value}") + (localhost,57637,100806,t,"{key,new_column,value}") + (localhost,57637,100807,t,"{key,new_column,value}") +(8 rows) + +-- we shouldn't have any prepared transactions in the workers +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100804,t,"{key,new_column,value}") + (localhost,9060,100805,t,"{key,new_column,value}") + (localhost,9060,100806,t,"{key,new_column,value}") + (localhost,9060,100807,t,"{key,new_column,value}") + (localhost,57637,100804,t,"{key,new_column,value}") + (localhost,57637,100805,t,"{key,new_column,value}") + (localhost,57637,100806,t,"{key,new_column,value}") + (localhost,57637,100807,t,"{key,new_column,value}") +(8 rows) + +-- kill as soon as the coordinator sends COMMIT +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT PREPARED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table DROP COLUMN new_column; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- some of the placements would be missing the new column +-- since we've not commited the prepared transactions +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100804,t,"{key,new_column,value}") + (localhost,9060,100805,t,"{key,new_column,value}") + (localhost,9060,100806,t,"{key,new_column,value}") + (localhost,9060,100807,t,"{key,new_column,value}") + (localhost,57637,100804,t,"{key,value}") + (localhost,57637,100805,t,"{key,value}") + (localhost,57637,100806,t,"{key,value}") + (localhost,57637,100807,t,"{key,value}") +(8 rows) + +-- we should be able to recover the transaction and +-- see that the command is committed +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 4 +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100804,t,"{key,value}") + (localhost,9060,100805,t,"{key,value}") + (localhost,9060,100806,t,"{key,value}") + (localhost,9060,100807,t,"{key,value}") + (localhost,57637,100804,t,"{key,value}") + (localhost,57637,100805,t,"{key,value}") + (localhost,57637,100806,t,"{key,value}") + (localhost,57637,100807,t,"{key,value}") +(8 rows) + +-- finally, test failing on ROLLBACK with 2PC +-- fail just after the coordinator sends the ROLLBACK +-- so the command can be rollbacked +SELECT citus.mitmproxy('conn.onQuery(query="ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +ALTER TABLE test_table ADD COLUMN new_column INT; +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- ROLLBACK should have failed on the distributed table and the placements +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100804,t,"{key,value}") + (localhost,9060,100805,t,"{key,value}") + (localhost,9060,100806,t,"{key,value}") + (localhost,9060,100807,t,"{key,value}") + (localhost,57637,100804,t,"{key,value}") + (localhost,57637,100805,t,"{key,value}") + (localhost,57637,100806,t,"{key,value}") + (localhost,57637,100807,t,"{key,value}") +(8 rows) + +-- but now kill just after the worker sends response to +-- ROLLBACK command, so we'll have lots of warnings but the command +-- should have been rollbacked both on the distributed table and the placements +SELECT citus.mitmproxy('conn.onCommandComplete(command="ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +ALTER TABLE test_table ADD COLUMN new_column INT; +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- make sure that the transaction is rollbacked +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT run_command_on_placements('test_table', $$SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = '%s'::regclass;$$) ORDER BY 1; + run_command_on_placements +--------------------------------------------------------------------- + (localhost,9060,100804,t,"{key,value}") + (localhost,9060,100805,t,"{key,value}") + (localhost,9060,100806,t,"{key,value}") + (localhost,9060,100807,t,"{key,value}") + (localhost,57637,100804,t,"{key,value}") + (localhost,57637,100805,t,"{key,value}") + (localhost,57637,100806,t,"{key,value}") + (localhost,57637,100807,t,"{key,value}") +(8 rows) + +-- now do some tests with sequential mode +SET citus.multi_shard_modify_mode TO 'sequential'; +-- kill as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +-- cancel as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: canceling statement due to user request +SELECT array_agg(name::text ORDER BY name::text) FROM public.table_attrs where relid = 'test_table'::regclass; + array_agg +--------------------------------------------------------------------- + {key,value} +(1 row) + +-- kill as soon as the coordinator sends worker_apply_shard_ddl_command +SELECT citus.mitmproxy('conn.onQuery(query="worker_apply_shard_ddl_command").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +-- kill as soon as the coordinator after it sends worker_apply_shard_ddl_command 2nd time +SELECT citus.mitmproxy('conn.onQuery(query="worker_apply_shard_ddl_command").after(2).kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +-- cancel as soon as the coordinator after it sends worker_apply_shard_ddl_command 2nd time +SELECT citus.mitmproxy('conn.onQuery(query="worker_apply_shard_ddl_command").after(2).cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD COLUMN new_column INT; +ERROR: canceling statement due to user request +SET search_path TO 'public'; +DROP SCHEMA ddl_failure CASCADE; \ No newline at end of file diff --git a/src/test/regress/expected/failure_multi_dml_0.out b/src/test/regress/expected/failure_multi_dml_0.out new file mode 100644 index 000000000..2b7ac3f38 --- /dev/null +++ b/src/test/regress/expected/failure_multi_dml_0.out @@ -0,0 +1,504 @@ +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SET citus.shard_count = 2; +SET citus.shard_replication_factor = 1; -- one shard per worker +SET citus.next_shard_id TO 103400; +ALTER SEQUENCE pg_catalog.pg_dist_placement_placementid_seq RESTART 100; +CREATE TABLE dml_test (id integer, name text); +SELECT create_distributed_table('dml_test', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +COPY dml_test FROM STDIN WITH CSV; +SELECT citus.clear_network_traffic(); + clear_network_traffic +--------------------------------------------------------------------- + +(1 row) + +---- test multiple statements spanning multiple shards, +---- at each significant point. These transactions are 2pc +-- fail at DELETE +SELECT citus.mitmproxy('conn.onQuery(query="^DELETE").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +DELETE FROM dml_test WHERE id = 1; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +DELETE FROM dml_test WHERE id = 2; +ERROR: current transaction is aborted, commands ignored until end of transaction block +INSERT INTO dml_test VALUES (5, 'Epsilon'); +ERROR: current transaction is aborted, commands ignored until end of transaction block +UPDATE dml_test SET name = 'alpha' WHERE id = 1; +ERROR: current transaction is aborted, commands ignored until end of transaction block +UPDATE dml_test SET name = 'gamma' WHERE id = 3; +ERROR: current transaction is aborted, commands ignored until end of transaction block +COMMIT; +--- shouldn't see any changes performed in failed transaction +SELECT * FROM dml_test ORDER BY id ASC; + id | name +--------------------------------------------------------------------- + 1 | Alpha + 2 | Beta + 3 | Gamma + 4 | Delta +(4 rows) + +-- cancel at DELETE +SELECT citus.mitmproxy('conn.onQuery(query="^DELETE").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +DELETE FROM dml_test WHERE id = 1; +ERROR: canceling statement due to user request +DELETE FROM dml_test WHERE id = 2; +ERROR: current transaction is aborted, commands ignored until end of transaction block +INSERT INTO dml_test VALUES (5, 'Epsilon'); +ERROR: current transaction is aborted, commands ignored until end of transaction block +UPDATE dml_test SET name = 'alpha' WHERE id = 1; +ERROR: current transaction is aborted, commands ignored until end of transaction block +UPDATE dml_test SET name = 'gamma' WHERE id = 3; +ERROR: current transaction is aborted, commands ignored until end of transaction block +COMMIT; +--- shouldn't see any changes performed in failed transaction +SELECT * FROM dml_test ORDER BY id ASC; + id | name +--------------------------------------------------------------------- + 1 | Alpha + 2 | Beta + 3 | Gamma + 4 | Delta +(4 rows) + +-- fail at INSERT +SELECT citus.mitmproxy('conn.onQuery(query="^INSERT").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +DELETE FROM dml_test WHERE id = 1; +DELETE FROM dml_test WHERE id = 2; +INSERT INTO dml_test VALUES (5, 'Epsilon'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +UPDATE dml_test SET name = 'alpha' WHERE id = 1; +ERROR: current transaction is aborted, commands ignored until end of transaction block +UPDATE dml_test SET name = 'gamma' WHERE id = 3; +ERROR: current transaction is aborted, commands ignored until end of transaction block +COMMIT; +--- shouldn't see any changes before failed INSERT +SELECT * FROM dml_test ORDER BY id ASC; + id | name +--------------------------------------------------------------------- + 1 | Alpha + 2 | Beta + 3 | Gamma + 4 | Delta +(4 rows) + +-- cancel at INSERT +SELECT citus.mitmproxy('conn.onQuery(query="^INSERT").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +DELETE FROM dml_test WHERE id = 1; +DELETE FROM dml_test WHERE id = 2; +INSERT INTO dml_test VALUES (5, 'Epsilon'); +ERROR: canceling statement due to user request +UPDATE dml_test SET name = 'alpha' WHERE id = 1; +ERROR: current transaction is aborted, commands ignored until end of transaction block +UPDATE dml_test SET name = 'gamma' WHERE id = 3; +ERROR: current transaction is aborted, commands ignored until end of transaction block +COMMIT; +--- shouldn't see any changes before failed INSERT +SELECT * FROM dml_test ORDER BY id ASC; + id | name +--------------------------------------------------------------------- + 1 | Alpha + 2 | Beta + 3 | Gamma + 4 | Delta +(4 rows) + +-- fail at UPDATE +SELECT citus.mitmproxy('conn.onQuery(query="^UPDATE").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +DELETE FROM dml_test WHERE id = 1; +DELETE FROM dml_test WHERE id = 2; +INSERT INTO dml_test VALUES (5, 'Epsilon'); +UPDATE dml_test SET name = 'alpha' WHERE id = 1; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +UPDATE dml_test SET name = 'gamma' WHERE id = 3; +ERROR: current transaction is aborted, commands ignored until end of transaction block +COMMIT; +--- shouldn't see any changes after failed UPDATE +SELECT * FROM dml_test ORDER BY id ASC; + id | name +--------------------------------------------------------------------- + 1 | Alpha + 2 | Beta + 3 | Gamma + 4 | Delta +(4 rows) + +-- cancel at UPDATE +SELECT citus.mitmproxy('conn.onQuery(query="^UPDATE").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +DELETE FROM dml_test WHERE id = 1; +DELETE FROM dml_test WHERE id = 2; +INSERT INTO dml_test VALUES (5, 'Epsilon'); +UPDATE dml_test SET name = 'alpha' WHERE id = 1; +ERROR: canceling statement due to user request +UPDATE dml_test SET name = 'gamma' WHERE id = 3; +ERROR: current transaction is aborted, commands ignored until end of transaction block +COMMIT; +--- shouldn't see any changes after failed UPDATE +SELECT * FROM dml_test ORDER BY id ASC; + id | name +--------------------------------------------------------------------- + 1 | Alpha + 2 | Beta + 3 | Gamma + 4 | Delta +(4 rows) + +-- fail at PREPARE TRANSACTION +SELECT citus.mitmproxy('conn.onQuery(query="^PREPARE TRANSACTION").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- this transaction block will be sent to the coordinator as a remote command to hide the +-- error message that is caused during commit. +-- we'll test for the txn side-effects to ensure it didn't run +SELECT master_run_on_worker( + ARRAY['localhost']::text[], + ARRAY[:master_port]::int[], + ARRAY[' +BEGIN; +DELETE FROM dml_test WHERE id = 1; +DELETE FROM dml_test WHERE id = 2; +INSERT INTO dml_test VALUES (5, ''Epsilon''); +UPDATE dml_test SET name = ''alpha'' WHERE id = 1; +UPDATE dml_test SET name = ''gamma'' WHERE id = 3; +COMMIT; + '], + false +); + master_run_on_worker +--------------------------------------------------------------------- + (localhost,57636,t,BEGIN) +(1 row) + +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT shardid FROM pg_dist_shard_placement WHERE shardstate = 3; + shardid +--------------------------------------------------------------------- +(0 rows) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 0 +(1 row) + +-- shouldn't see any changes after failed PREPARE +SELECT * FROM dml_test ORDER BY id ASC; + id | name +--------------------------------------------------------------------- + 1 | Alpha + 2 | Beta + 3 | Gamma + 4 | Delta +(4 rows) + +-- cancel at PREPARE TRANSACTION +SELECT citus.mitmproxy('conn.onQuery(query="^PREPARE TRANSACTION").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- we'll test for the txn side-effects to ensure it didn't run +BEGIN; +DELETE FROM dml_test WHERE id = 1; +DELETE FROM dml_test WHERE id = 2; +INSERT INTO dml_test VALUES (5, 'Epsilon'); +UPDATE dml_test SET name = 'alpha' WHERE id = 1; +UPDATE dml_test SET name = 'gamma' WHERE id = 3; +COMMIT; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT shardid FROM pg_dist_shard_placement WHERE shardstate = 3; + shardid +--------------------------------------------------------------------- +(0 rows) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 0 +(1 row) + +-- shouldn't see any changes after failed PREPARE +SELECT * FROM dml_test ORDER BY id ASC; + id | name +--------------------------------------------------------------------- + 1 | Alpha + 2 | Beta + 3 | Gamma + 4 | Delta +(4 rows) + +-- fail at COMMIT +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- hide the error message (it has the PID)... +-- we'll test for the txn side-effects to ensure it didn't run +SET client_min_messages TO ERROR; +BEGIN; +DELETE FROM dml_test WHERE id = 1; +DELETE FROM dml_test WHERE id = 2; +INSERT INTO dml_test VALUES (5, 'Epsilon'); +UPDATE dml_test SET name = 'alpha' WHERE id = 1; +UPDATE dml_test SET name = 'gamma' WHERE id = 3; +COMMIT; +SET client_min_messages TO DEFAULT; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT shardid FROM pg_dist_shard_placement WHERE shardstate = 3; + shardid +--------------------------------------------------------------------- +(0 rows) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 1 +(1 row) + +-- should see changes, because of txn recovery +SELECT * FROM dml_test ORDER BY id ASC; + id | name +--------------------------------------------------------------------- + 3 | gamma + 4 | Delta + 5 | Epsilon +(3 rows) + +-- cancel at COMMITs are ignored by Postgres +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +DELETE FROM dml_test WHERE id = 1; +DELETE FROM dml_test WHERE id = 2; +INSERT INTO dml_test VALUES (5, 'Epsilon'); +UPDATE dml_test SET name = 'alpha' WHERE id = 1; +UPDATE dml_test SET name = 'gamma' WHERE id = 3; +COMMIT; +-- should see changes, because cancellation is ignored +SELECT * FROM dml_test ORDER BY id ASC; + id | name +--------------------------------------------------------------------- + 3 | gamma + 4 | Delta + 5 | Epsilon + 5 | Epsilon +(4 rows) + +-- drop table and recreate with different replication/sharding +DROP TABLE dml_test; +SET citus.shard_count = 1; +SET citus.shard_replication_factor = 2; -- two placements +CREATE TABLE dml_test (id integer, name text); +SELECT create_distributed_table('dml_test', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +COPY dml_test FROM STDIN WITH CSV; +---- test multiple statements against a single shard, but with two placements +-- fail at PREPARED COMMIT as we use 2PC +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +DELETE FROM dml_test WHERE id = 1; +DELETE FROM dml_test WHERE id = 2; +INSERT INTO dml_test VALUES (5, 'Epsilon'); +UPDATE dml_test SET name = 'alpha' WHERE id = 1; +UPDATE dml_test SET name = 'gamma' WHERE id = 3; +COMMIT; +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: failed to commit transaction on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +-- all changes should be committed because we injected +-- the failure on the COMMIT time. And, we should not +-- mark any placements as INVALID +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT shardid FROM pg_dist_shard_placement WHERE shardstate = 3; + shardid +--------------------------------------------------------------------- +(0 rows) + +SET citus.task_assignment_policy TO "round-robin"; +SELECT * FROM dml_test ORDER BY id ASC; + id | name +--------------------------------------------------------------------- + 3 | gamma + 4 | Delta + 5 | Epsilon +(3 rows) + +SELECT * FROM dml_test ORDER BY id ASC; + id | name +--------------------------------------------------------------------- + 3 | gamma + 4 | Delta + 5 | Epsilon +(3 rows) + +RESET citus.task_assignment_policy; +-- drop table and recreate as reference table +DROP TABLE dml_test; +SET citus.shard_count = 2; +SET citus.shard_replication_factor = 1; +CREATE TABLE dml_test (id integer, name text); +SELECT create_reference_table('dml_test'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +COPY dml_test FROM STDIN WITH CSV; +-- fail at COMMIT (by failing to PREPARE) +SELECT citus.mitmproxy('conn.onQuery(query="^PREPARE").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +DELETE FROM dml_test WHERE id = 1; +DELETE FROM dml_test WHERE id = 2; +INSERT INTO dml_test VALUES (5, 'Epsilon'); +UPDATE dml_test SET name = 'alpha' WHERE id = 1; +UPDATE dml_test SET name = 'gamma' WHERE id = 3; +COMMIT; +ERROR: connection not open +CONTEXT: while executing command on localhost:xxxxx +--- shouldn't see any changes after failed COMMIT +SELECT * FROM dml_test ORDER BY id ASC; + id | name +--------------------------------------------------------------------- + 1 | Alpha + 2 | Beta + 3 | Gamma + 4 | Delta +(4 rows) + +-- cancel at COMMIT (by cancelling on PREPARE) +SELECT citus.mitmproxy('conn.onQuery(query="^PREPARE").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +DELETE FROM dml_test WHERE id = 1; +DELETE FROM dml_test WHERE id = 2; +INSERT INTO dml_test VALUES (5, 'Epsilon'); +UPDATE dml_test SET name = 'alpha' WHERE id = 1; +UPDATE dml_test SET name = 'gamma' WHERE id = 3; +COMMIT; +ERROR: canceling statement due to user request +--- shouldn't see any changes after cancelled PREPARE +SELECT * FROM dml_test ORDER BY id ASC; + id | name +--------------------------------------------------------------------- + 1 | Alpha + 2 | Beta + 3 | Gamma + 4 | Delta +(4 rows) + +-- allow connection to allow DROP +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +DROP TABLE dml_test; \ No newline at end of file diff --git a/src/test/regress/expected/failure_mx_metadata_sync_0.out b/src/test/regress/expected/failure_mx_metadata_sync_0.out new file mode 100644 index 000000000..31da72324 --- /dev/null +++ b/src/test/regress/expected/failure_mx_metadata_sync_0.out @@ -0,0 +1,317 @@ +-- +-- failure_mx_metadata_sync.sql +-- +CREATE SCHEMA IF NOT EXISTS mx_metadata_sync; +SET SEARCH_PATH = mx_metadata_sync; +SET citus.shard_count TO 2; +SET citus.next_shard_id TO 16000000; +SET citus.shard_replication_factor TO 1; +SELECT pg_backend_pid() as pid \gset +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE t1 (id int PRIMARY KEY); +SELECT create_distributed_table('t1', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO t1 SELECT x FROM generate_series(1,100) AS f(x); +-- Initial metadata status +SELECT hasmetadata FROM pg_dist_node WHERE nodeport=:worker_2_proxy_port; + hasmetadata +--------------------------------------------------------------------- + f +(1 row) + +-- Failure to set groupid in the worker +SELECT citus.mitmproxy('conn.onQuery(query="^UPDATE pg_dist_local_group SET groupid").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT start_metadata_sync_to_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="^UPDATE pg_dist_local_group SET groupid").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT start_metadata_sync_to_node('localhost', :worker_2_proxy_port); +ERROR: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +-- Failure to drop all tables in pg_dist_partition +SELECT citus.mitmproxy('conn.onQuery(query="^SELECT worker_drop_distributed_table").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT start_metadata_sync_to_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="^SELECT worker_drop_distributed_table").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT start_metadata_sync_to_node('localhost', :worker_2_proxy_port); +ERROR: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +-- Failure to truncate pg_dist_node in the worker +SELECT citus.mitmproxy('conn.onQuery(query="^TRUNCATE pg_dist_node CASCADE").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT start_metadata_sync_to_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="^TRUNCATE pg_dist_node CASCADE").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT start_metadata_sync_to_node('localhost', :worker_2_proxy_port); +ERROR: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +-- Failure to populate pg_dist_node in the worker +SELECT citus.mitmproxy('conn.onQuery(query="^INSERT INTO pg_dist_node").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT start_metadata_sync_to_node('localhost', :worker_2_proxy_port); +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="^INSERT INTO pg_dist_node").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT start_metadata_sync_to_node('localhost', :worker_2_proxy_port); +ERROR: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +-- Verify that coordinator knows worker does not have valid metadata +SELECT hasmetadata FROM pg_dist_node WHERE nodeport=:worker_2_proxy_port; + hasmetadata +--------------------------------------------------------------------- + f +(1 row) + +-- Verify we can sync metadata after unsuccessful attempts +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT start_metadata_sync_to_node('localhost', :worker_2_proxy_port); + start_metadata_sync_to_node +--------------------------------------------------------------------- + +(1 row) + +SELECT hasmetadata FROM pg_dist_node WHERE nodeport=:worker_2_proxy_port; + hasmetadata +--------------------------------------------------------------------- + t +(1 row) + +-- Check failures on DDL command propagation +CREATE TABLE t2 (id int PRIMARY KEY); +SELECT citus.mitmproxy('conn.onParse(query="citus_internal_add_placement_metadata").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('t2', 'id'); +ERROR: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +SELECT citus.mitmproxy('conn.onParse(query="citus_internal_add_shard_metadata").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('t2', 'id'); +ERROR: canceling statement due to user request +-- Verify that the table was not distributed +SELECT count(*) > 0 AS is_table_distributed +FROM pg_dist_partition +WHERE logicalrelid='t2'::regclass; + is_table_distributed +--------------------------------------------------------------------- + f +(1 row) + +-- Failure to set groupid in the worker +SELECT citus.mitmproxy('conn.onQuery(query="^UPDATE pg_dist_local_group SET groupid").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT stop_metadata_sync_to_node('localhost', :worker_2_proxy_port); +NOTICE: dropping metadata on the node (localhost,9060) +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="^UPDATE pg_dist_local_group SET groupid").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT stop_metadata_sync_to_node('localhost', :worker_2_proxy_port); +NOTICE: dropping metadata on the node (localhost,9060) +WARNING: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx + stop_metadata_sync_to_node +--------------------------------------------------------------------- + +(1 row) + +-- Failure to drop all tables in pg_dist_partition +SELECT citus.mitmproxy('conn.onQuery(query="^SELECT worker_drop_distributed_table").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT stop_metadata_sync_to_node('localhost', :worker_2_proxy_port); +NOTICE: dropping metadata on the node (localhost,9060) +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="^SELECT worker_drop_distributed_table").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT stop_metadata_sync_to_node('localhost', :worker_2_proxy_port); +NOTICE: dropping metadata on the node (localhost,9060) +WARNING: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx + stop_metadata_sync_to_node +--------------------------------------------------------------------- + +(1 row) + +-- Failure to truncate pg_dist_node in the worker +SELECT citus.mitmproxy('conn.onQuery(query="^TRUNCATE pg_dist_node CASCADE").cancel(' || :pid || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT stop_metadata_sync_to_node('localhost', :worker_2_proxy_port); +NOTICE: dropping metadata on the node (localhost,9060) +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="^TRUNCATE pg_dist_node CASCADE").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT stop_metadata_sync_to_node('localhost', :worker_2_proxy_port); +NOTICE: dropping metadata on the node (localhost,9060) +WARNING: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx + stop_metadata_sync_to_node +--------------------------------------------------------------------- + +(1 row) + +\c - - - :worker_2_port +SELECT count(*) FROM pg_dist_node; + count +--------------------------------------------------------------------- + 2 +(1 row) + +\c - - - :master_port +SELECT hasmetadata FROM pg_dist_node WHERE nodeport=:worker_2_proxy_port; + hasmetadata +--------------------------------------------------------------------- + f +(1 row) + +-- Verify we can drop metadata after unsuccessful attempts +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT stop_metadata_sync_to_node('localhost', :worker_2_proxy_port); +NOTICE: dropping metadata on the node (localhost,9060) + stop_metadata_sync_to_node +--------------------------------------------------------------------- + +(1 row) + +\c - - - :worker_2_port +SELECT count(*) FROM pg_dist_node; + count +--------------------------------------------------------------------- + 0 +(1 row) + +\c - - - :master_port +SELECT hasmetadata FROM pg_dist_node WHERE nodeport=:worker_2_proxy_port; + hasmetadata +--------------------------------------------------------------------- + f +(1 row) + +SET SEARCH_PATH = mx_metadata_sync; +DROP TABLE t1; +DROP TABLE t2; +DROP SCHEMA mx_metadata_sync CASCADE; \ No newline at end of file diff --git a/src/test/regress/expected/failure_savepoints_0.out b/src/test/regress/expected/failure_savepoints_0.out new file mode 100644 index 000000000..c23ee975e --- /dev/null +++ b/src/test/regress/expected/failure_savepoints_0.out @@ -0,0 +1,384 @@ +-- We have two different output files for this failure test because the +-- failure behaviour of SAVEPOINT and RELEASE commands are different if +-- we use the executor. If we use it, these commands error out if any of +-- the placement commands fail. Otherwise, we might mark the placement +-- as invalid and continue with a WARNING. +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SET citus.shard_count = 2; +SET citus.shard_replication_factor = 1; -- one shard per worker +SET citus.next_shard_id TO 100950; +ALTER SEQUENCE pg_catalog.pg_dist_placement_placementid_seq RESTART 150; +CREATE TABLE artists ( + id bigint NOT NULL, + name text NOT NULL +); +SELECT create_distributed_table('artists', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- add some data +INSERT INTO artists VALUES (1, 'Pablo Picasso'); +INSERT INTO artists VALUES (2, 'Vincent van Gogh'); +INSERT INTO artists VALUES (3, 'Claude Monet'); +INSERT INTO artists VALUES (4, 'William Kurelek'); +-- simply fail at SAVEPOINT +SELECT citus.mitmproxy('conn.onQuery(query="^SAVEPOINT").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +INSERT INTO artists VALUES (5, 'Asher Lev'); +SAVEPOINT s1; +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +ERROR: connection not open +CONTEXT: while executing command on localhost:xxxxx +DELETE FROM artists WHERE id=4; +ERROR: current transaction is aborted, commands ignored until end of transaction block +RELEASE SAVEPOINT s1; +ERROR: current transaction is aborted, commands ignored until end of transaction block +COMMIT; +SELECT * FROM artists WHERE id IN (4, 5); + id | name +--------------------------------------------------------------------- + 4 | William Kurelek +(1 row) + +-- fail at RELEASE +SELECT citus.mitmproxy('conn.onQuery(query="^RELEASE").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +UPDATE artists SET name='a'; +SAVEPOINT s1; +DELETE FROM artists WHERE id=4; +RELEASE SAVEPOINT s1; +WARNING: AbortSubTransaction while in COMMIT state +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: savepoint "savepoint_2" does not exist +CONTEXT: while executing command on localhost:xxxxx +ERROR: connection not open +CONTEXT: while executing command on localhost:xxxxx +ROLLBACK; +SELECT * FROM artists WHERE id IN (4, 5); + id | name +--------------------------------------------------------------------- + 4 | William Kurelek +(1 row) + +-- fail at ROLLBACK +SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +INSERT INTO artists VALUES (5, 'Asher Lev'); +SAVEPOINT s1; +DELETE FROM artists WHERE id=4; +ROLLBACK TO SAVEPOINT s1; +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +COMMIT; +ERROR: failure on connection marked as essential: localhost:xxxxx +SELECT * FROM artists WHERE id IN (4, 5); + id | name +--------------------------------------------------------------------- + 4 | William Kurelek +(1 row) + +-- fail at second RELEASE +SELECT citus.mitmproxy('conn.onQuery(query="^RELEASE").after(1).kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SAVEPOINT s1; +DELETE FROM artists WHERE id=4; +RELEASE SAVEPOINT s1; +SAVEPOINT s2; +INSERT INTO artists VALUES (5, 'Jacob Kahn'); +RELEASE SAVEPOINT s2; +WARNING: AbortSubTransaction while in COMMIT state +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +ERROR: connection not open +CONTEXT: while executing command on localhost:xxxxx +COMMIT; +SELECT * FROM artists WHERE id IN (4, 5); + id | name +--------------------------------------------------------------------- + 4 | William Kurelek +(1 row) + +-- fail at second ROLLBACK +SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").after(1).kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SAVEPOINT s1; +UPDATE artists SET name='A' WHERE id=4; +ROLLBACK TO SAVEPOINT s1; +SAVEPOINT s2; +DELETE FROM artists WHERE id=5; +ROLLBACK TO SAVEPOINT s2; +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +COMMIT; +ERROR: failure on connection marked as essential: localhost:xxxxx +SELECT * FROM artists WHERE id IN (4, 5); + id | name +--------------------------------------------------------------------- + 4 | William Kurelek +(1 row) + +SELECT citus.mitmproxy('conn.onQuery(query="^RELEASE").after(1).kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- Release after rollback +BEGIN; +SAVEPOINT s1; +ROLLBACK TO s1; +RELEASE SAVEPOINT s1; +SAVEPOINT s2; +INSERT INTO artists VALUES (6, 'John J. Audubon'); +INSERT INTO artists VALUES (7, 'Emily Carr'); +ROLLBACK TO s2; +RELEASE SAVEPOINT s2; +COMMIT; +SELECT * FROM artists WHERE id=7; + id | name +--------------------------------------------------------------------- +(0 rows) + +SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- Recover from errors +\set VERBOSITY terse +BEGIN; +SAVEPOINT s1; +SAVEPOINT s2; +INSERT INTO artists VALUES (6, 'John J. Audubon'); +INSERT INTO artists VALUES (7, 'Emily Carr'); +INSERT INTO artists VALUES (7, 'Emily Carr'); +ROLLBACK TO SAVEPOINT s1; +WARNING: connection not open +WARNING: connection not open +WARNING: connection not open +WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open +WARNING: connection not open +WARNING: connection not open +COMMIT; +ERROR: failure on connection marked as essential: localhost:xxxxx +SELECT * FROM artists WHERE id=6; + id | name +--------------------------------------------------------------------- +(0 rows) + +-- replication factor > 1 +CREATE TABLE researchers ( + id bigint NOT NULL, + lab_id int NOT NULL, + name text NOT NULL +); +SET citus.shard_count = 1; +SET citus.shard_replication_factor = 2; -- single shard, on both workers +SELECT create_distributed_table('researchers', 'lab_id', 'hash'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- simply fail at SAVEPOINT +SELECT citus.mitmproxy('conn.onQuery(query="^SAVEPOINT").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +INSERT INTO researchers VALUES (7, 4, 'Jan Plaza'); +SAVEPOINT s1; +WARNING: connection not open +WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open +WARNING: connection not open +WARNING: connection not open +ERROR: connection not open +INSERT INTO researchers VALUES (8, 4, 'Alonzo Church'); +ERROR: current transaction is aborted, commands ignored until end of transaction block +ROLLBACK TO s1; +ERROR: savepoint "s1" does not exist +RELEASE SAVEPOINT s1; +ERROR: current transaction is aborted, commands ignored until end of transaction block +COMMIT; +-- should see correct results from healthy placement and one bad placement +SELECT * FROM researchers WHERE lab_id = 4; + id | lab_id | name +--------------------------------------------------------------------- +(0 rows) + +UPDATE pg_dist_shard_placement SET shardstate = 1 +WHERE shardstate = 3 AND shardid IN ( + SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'researchers'::regclass +) RETURNING placementid; + placementid +--------------------------------------------------------------------- +(0 rows) + +TRUNCATE researchers; +-- fail at rollback +SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +INSERT INTO researchers VALUES (7, 4, 'Jan Plaza'); +SAVEPOINT s1; +INSERT INTO researchers VALUES (8, 4, 'Alonzo Church'); +ROLLBACK TO s1; +WARNING: connection not open +WARNING: connection not open +RELEASE SAVEPOINT s1; +COMMIT; +ERROR: failure on connection marked as essential: localhost:xxxxx +-- should see correct results from healthy placement and one bad placement +SELECT * FROM researchers WHERE lab_id = 4; + id | lab_id | name +--------------------------------------------------------------------- +(0 rows) + +UPDATE pg_dist_shard_placement SET shardstate = 1 +WHERE shardstate = 3 AND shardid IN ( + SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'researchers'::regclass +) RETURNING placementid; + placementid +--------------------------------------------------------------------- +(0 rows) + +TRUNCATE researchers; +-- fail at release +SELECT citus.mitmproxy('conn.onQuery(query="^RELEASE").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +INSERT INTO researchers VALUES (7, 4, 'Jan Plaza'); +SAVEPOINT s1; +INSERT INTO researchers VALUES (8, 4, 'Alonzo Church'); +ROLLBACK TO s1; +RELEASE SAVEPOINT s1; +WARNING: AbortSubTransaction while in COMMIT state +WARNING: connection not open +WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open +WARNING: connection not open +WARNING: connection not open +WARNING: savepoint "savepoint_3" does not exist +ERROR: connection not open +COMMIT; +-- should see correct results from healthy placement and one bad placement +SELECT * FROM researchers WHERE lab_id = 4; + id | lab_id | name +--------------------------------------------------------------------- +(0 rows) + +UPDATE pg_dist_shard_placement SET shardstate = 1 +WHERE shardstate = 3 AND shardid IN ( + SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'researchers'::regclass +) RETURNING placementid; + placementid +--------------------------------------------------------------------- +(0 rows) + +TRUNCATE researchers; +-- test that we don't mark reference placements unhealthy +CREATE TABLE ref(a int, b int); +SELECT create_reference_table('ref'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +SAVEPOINT start; +INSERT INTO ref VALUES (1001,2); +SELECT * FROM ref; + a | b +--------------------------------------------------------------------- + 1001 | 2 +(1 row) + +ROLLBACK TO SAVEPOINT start; +WARNING: connection not open +WARNING: connection not open +SELECT * FROM ref; +WARNING: connection not open +WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open +WARNING: connection not open +WARNING: connection not open +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open +END; +-- clean up +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +DROP TABLE artists; +DROP TABLE researchers; +DROP TABLE ref; \ No newline at end of file diff --git a/src/test/regress/expected/failure_truncate_0.out b/src/test/regress/expected/failure_truncate_0.out new file mode 100644 index 000000000..bf1c0bf96 --- /dev/null +++ b/src/test/regress/expected/failure_truncate_0.out @@ -0,0 +1,1287 @@ +-- +-- Test TRUNCATE command failures +-- +CREATE SCHEMA truncate_failure; +SET search_path TO 'truncate_failure'; +SET citus.next_shard_id TO 120000; +-- we don't want to see the prepared transaction numbers in the warnings +SET client_min_messages TO ERROR; +-- do not cache any connections +SET citus.max_cached_conns_per_worker TO 0; +-- use a predictable number of connections per task +SET citus.force_max_query_parallelization TO on; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- we'll start with replication factor 1, 2PC and parallel mode +SET citus.shard_count = 4; +SET citus.shard_replication_factor = 1; +CREATE TABLE test_table (key int, value int); +SELECT create_distributed_table('test_table', 'key'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); +CREATE VIEW unhealthy_shard_count AS + SELECT count(*) + FROM pg_dist_shard_placement pdsp + JOIN + pg_dist_shard pds + ON pdsp.shardid=pds.shardid + WHERE logicalrelid='truncate_failure.test_table'::regclass AND shardstate != 1; +-- in the first test, kill just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- cancel just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- kill as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- cancel as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- kill as soon as the coordinator sends TRUNCATE TABLE command +SELECT citus.mitmproxy('conn.onQuery(query="TRUNCATE TABLE truncate_failure.test_table").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- cancel as soon as the coordinator sends TRUNCATE TABLE command +SELECT citus.mitmproxy('conn.onQuery(query="TRUNCATE TABLE truncate_failure.test_table").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- kill as soon as the coordinator sends COMMIT PREPARED +-- the transaction succeeds on one placement, and we need to +-- recover prepared statements to see the other placement as well +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 2 +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- refill the table +TRUNCATE test_table; +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); +-- cancel as soon as the coordinator sends COMMIT +-- interrupts are held during COMMIT/ROLLBACK, so the command +-- should have been applied without any issues since cancel is ignored +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- refill the table +TRUNCATE test_table; +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); +SET client_min_messages TO WARNING; +-- now kill just after the worker sends response to +-- COMMIT command, so we'll have lots of warnings but the command +-- should have been committed both on the distributed table and the placements +SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: failed to commit transaction on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: failed to commit transaction on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SET client_min_messages TO ERROR; +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); +-- now cancel just after the worker sends response to +-- but Postgres doesn't accept interrupts during COMMIT and ROLLBACK +-- so should not cancel at all, so not an effective test but adding in +-- case Citus messes up this behaviour +SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); +-- Let's test Truncate on reference tables with a FK from a hash distributed table +CREATE TABLE reference_table(i int UNIQUE); +INSERT INTO reference_table SELECT x FROM generate_series(1,20) as f(x); +SELECT create_reference_table('reference_table'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +ALTER TABLE test_table ADD CONSTRAINT foreign_key FOREIGN KEY (value) REFERENCES reference_table(i); +-- immediately kill when we see prepare transaction to see if the command +-- still cascaded to referencing table or failed successfuly +SELECT citus.mitmproxy('conn.onQuery(query="PREPARE TRANSACTION").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE reference_table CASCADE; +ERROR: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +SELECT count(*) FROM reference_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- immediately cancel when we see prepare transaction to see if the command +-- still cascaded to referencing table or failed successfuly +SELECT citus.mitmproxy('conn.onQuery(query="PREPARE TRANSACTION").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE reference_table CASCADE; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +SELECT count(*) FROM reference_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- immediately kill when we see cascading TRUNCATE on the hash table to see +-- rollbacked properly +SELECT citus.mitmproxy('conn.onQuery(query="^TRUNCATE TABLE").after(2).kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE reference_table CASCADE; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +SELECT count(*) FROM reference_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- immediately cancel when we see cascading TRUNCATE on the hash table to see +-- if the command still cascaded to referencing table or failed successfuly +SELECT citus.mitmproxy('conn.onQuery(query="^TRUNCATE TABLE").after(2).cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE reference_table CASCADE; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +SELECT count(*) FROM reference_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- immediately kill after we get prepare transaction complete +-- to see if the command still cascaded to referencing table or +-- failed successfuly +SELECT citus.mitmproxy('conn.onCommandComplete(command="PREPARE TRANSACTION").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE reference_table CASCADE; +ERROR: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- immediately cancel after we get prepare transaction complete +-- to see if the command still cascaded to referencing table or +-- failed successfuly +SELECT citus.mitmproxy('conn.onCommandComplete(command="PREPARE TRANSACTION").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE reference_table CASCADE; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- in the first test, kill just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- cancel just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- kill as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- cancel as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- kill as soon as the coordinator sends TRUNCATE TABLE command +SELECT citus.mitmproxy('conn.onQuery(query="^TRUNCATE TABLE truncate_failure.test_table").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- cancel as soon as the coordinator sends TRUNCATE TABLE command +SELECT citus.mitmproxy('conn.onQuery(query="^TRUNCATE TABLE truncate_failure.test_table").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- killing on PREPARE should be fine, everything should be rollbacked +SELECT citus.mitmproxy('conn.onCommandComplete(command="^PREPARE TRANSACTION").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- we should be able to revocer the transaction and +-- see that the command is rollbacked +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 2 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- cancelling on PREPARE should be fine, everything should be rollbacked +SELECT citus.mitmproxy('conn.onCommandComplete(command="^PREPARE TRANSACTION").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- we should be able to revocer the transaction and +-- see that the command is rollbacked +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- killing on command complete of COMMIT PREPARE, we should see that the command succeeds +-- and all the workers committed +SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT PREPARED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- we shouldn't have any prepared transactions in the workers +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); +-- kill as soon as the coordinator sends COMMIT +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT PREPARED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- Since we kill connections to one worker after commit arrives but the +-- other worker connections are healthy, we cannot commit on 1 worker +-- which has 2 active shard placements, but the other does. That's why +-- we expect to see 2 recovered prepared transactions. +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 2 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); +-- finally, test failing on ROLLBACK with 2CPC +-- fail just after the coordinator sends the ROLLBACK +-- so the command can be rollbacked +SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +TRUNCATE test_table; +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- but now kill just after the worker sends response to +-- ROLLBACK command, so we'll have lots of warnings but the command +-- should have been rollbacked both on the distributed table and the placements +SELECT citus.mitmproxy('conn.onCommandComplete(command="^ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +TRUNCATE test_table; +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- final set of tests with 2PC and replication factor = 2 +SET citus.shard_count = 4; +SET citus.shard_replication_factor = 2; +-- re-create the table with replication factor 2 +DROP TABLE test_table CASCADE; +CREATE TABLE test_table (key int, value int); +SELECT create_distributed_table('test_table', 'key'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); +CREATE VIEW unhealthy_shard_count AS + SELECT count(*) + FROM pg_dist_shard_placement pdsp + JOIN + pg_dist_shard pds + ON pdsp.shardid=pds.shardid + WHERE logicalrelid='truncate_failure.test_table'::regclass AND shardstate != 1; +-- in the first test, kill just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- cancel just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- kill as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- cancel as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- kill as soon as the coordinator sends TRUNCATE TABLE command +SELECT citus.mitmproxy('conn.onQuery(query="TRUNCATE TABLE truncate_failure.test_table").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- cancel as soon as the coordinator sends TRUNCATE TABLE command +SELECT citus.mitmproxy('conn.onQuery(query="TRUNCATE TABLE truncate_failure.test_table").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- killing on PREPARE should be fine, everything should be rollbacked +SELECT citus.mitmproxy('conn.onCommandComplete(command="PREPARE TRANSACTION").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +ERROR: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- we should be able to revocer the transaction and +-- see that the command is rollbacked +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 4 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- killing on command complete of COMMIT PREPARE, we should see that the command succeeds +-- and all the workers committed +SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT PREPARED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- we shouldn't have any prepared transactions in the workers +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); +-- kill as soon as the coordinator sends COMMIT +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT PREPARED").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Since we kill connections to one worker after commit arrives but the +-- other worker connections are healthy, we cannot commit on 1 worker +-- which has 4 active shard placements (2 shards, replication factor=2), +-- but the other does. That's why we expect to see 4 recovered prepared +-- transactions. +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 4 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 0 +(1 row) + +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); +-- finally, test failing on ROLLBACK with 2CPC +-- fail just after the coordinator sends the ROLLBACK +-- so the command can be rollbacked +SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +TRUNCATE test_table; +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +-- but now kill just after the worker sends response to +-- ROLLBACK command, so we'll have lots of warnings but the command +-- should have been rollbacked both on the distributed table and the placements +SELECT citus.mitmproxy('conn.onCommandComplete(command="^ROLLBACK").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +BEGIN; +TRUNCATE test_table; +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +--------------------------------------------------------------------- + 20 +(1 row) + +DROP SCHEMA truncate_failure CASCADE; +SET search_path TO default; \ No newline at end of file diff --git a/src/test/regress/expected/failure_vacuum_2.out b/src/test/regress/expected/failure_vacuum_2.out new file mode 100644 index 000000000..bdae46e27 --- /dev/null +++ b/src/test/regress/expected/failure_vacuum_2.out @@ -0,0 +1,141 @@ +-- We have different output files for the executor. This is because +-- we don't mark transactions with ANALYZE as critical anymore, and +-- get WARNINGs instead of ERRORs. +SET citus.next_shard_id TO 12000000; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SET citus.shard_count = 1; +SET citus.shard_replication_factor = 2; -- one shard per worker +CREATE TABLE vacuum_test (key int, value int); +SELECT create_distributed_table('vacuum_test', 'key'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT citus.clear_network_traffic(); + clear_network_traffic +--------------------------------------------------------------------- + +(1 row) + +SELECT citus.mitmproxy('conn.onQuery(query="^VACUUM").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +VACUUM vacuum_test; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.onQuery(query="^ANALYZE").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ANALYZE vacuum_test; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ANALYZE vacuum_test; +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: failed to commit transaction on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +--------------------------------------------------------------------- + 1 +(1 row) + +-- ANALYZE transactions being critical is an open question, see #2430 +-- show that we never mark as INVALID on COMMIT FAILURE +SELECT shardid, shardstate FROM pg_dist_shard_placement where shardstate != 1 AND +shardid in ( SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'vacuum_test'::regclass); + shardid | shardstate +--------------------------------------------------------------------- +(0 rows) + +-- the same tests with cancel +SELECT citus.mitmproxy('conn.onQuery(query="^VACUUM").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +VACUUM vacuum_test; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.onQuery(query="^ANALYZE").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ANALYZE vacuum_test; +ERROR: canceling statement due to user request +-- cancel during COMMIT should be ignored +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +ANALYZE vacuum_test; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE other_vacuum_test (key int, value int); +SELECT create_distributed_table('other_vacuum_test', 'key'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT citus.mitmproxy('conn.onQuery(query="^VACUUM.*other").kill()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +VACUUM vacuum_test, other_vacuum_test; +ERROR: connection to the remote node localhost:xxxxx failed with the following error: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.onQuery(query="^VACUUM.*other").cancel(' || pg_backend_pid() || ')'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +VACUUM vacuum_test, other_vacuum_test; +ERROR: canceling statement due to user request +-- ==== Clean up, we're done here ==== +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +DROP TABLE vacuum_test, other_vacuum_test; \ No newline at end of file