diff --git a/src/test/regress/expected/failure_truncate.out b/src/test/regress/expected/failure_truncate.out new file mode 100644 index 000000000..4954b8574 --- /dev/null +++ b/src/test/regress/expected/failure_truncate.out @@ -0,0 +1,1286 @@ +-- +-- Test TRUNCATE command failures +-- +CREATE SCHEMA truncate_failure; +SET search_path TO 'truncate_failure'; +SET citus.next_shard_id TO 120000; +-- we don't want to see the prepared transaction numbers in the warnings +SET client_min_messages TO ERROR; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +-- we'll start with replication factor 1, 1PC and parallel mode +SET citus.multi_shard_commit_protocol TO '1pc'; +SET citus.shard_count = 4; +SET citus.shard_replication_factor = 1; +CREATE TABLE test_table (key int, value int); +SELECT create_distributed_table('test_table', 'key'); + create_distributed_table +-------------------------- + +(1 row) + +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); +CREATE VIEW unhealthy_shard_count AS + SELECT count(*) + FROM pg_dist_shard_placement pdsp + JOIN + pg_dist_shard pds + ON pdsp.shardid=pds.shardid + WHERE logicalrelid='truncate_failure.test_table'::regclass AND shardstate != 1; +-- in the first test, kill just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().kill()'); + mitmproxy +----------- + +(1 row) + +TRUNCATE test_table; +ERROR: connection error: localhost:57640 +DETAIL: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 20 +(1 row) + +-- cancel just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().cancel(' || pg_backend_pid() || ')'); + mitmproxy +----------- + +(1 row) + +TRUNCATE test_table; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 20 +(1 row) + +-- kill as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); + mitmproxy +----------- + +(1 row) + +TRUNCATE test_table; +ERROR: failure on connection marked as essential: localhost:57640 +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 20 +(1 row) + +-- cancel as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); + mitmproxy +----------- + +(1 row) + +TRUNCATE test_table; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 20 +(1 row) + +-- kill as soon as the coordinator sends TRUNCATE TABLE command +SELECT citus.mitmproxy('conn.onQuery(query="TRUNCATE TABLE truncate_failure.test_table").kill()'); + mitmproxy +----------- + +(1 row) + +TRUNCATE test_table; +ERROR: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:57640 +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 20 +(1 row) + +-- cancel as soon as the coordinator sends TRUNCATE TABLE command +SELECT citus.mitmproxy('conn.onQuery(query="TRUNCATE TABLE truncate_failure.test_table").cancel(' || pg_backend_pid() || ')'); + mitmproxy +----------- + +(1 row) + +TRUNCATE test_table; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 20 +(1 row) + +-- kill as soon as the coordinator sends COMMIT +-- One shard should not get truncated but the other should +-- since it is sent from another connection. +-- Thus, we should see a partially successful truncate +-- Note: This is the result of using 1pc and there is no way to recover from it +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").kill()'); + mitmproxy +----------- + +(1 row) + +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 12 +(1 row) + +-- refill the table +TRUNCATE test_table; +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); +-- cancel as soon as the coordinator sends COMMIT +-- interrupts are held during COMMIT/ROLLBACK, so the command +-- should have been applied without any issues since cancel is ignored +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").cancel(' || pg_backend_pid() || ')'); + mitmproxy +----------- + +(1 row) + +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 0 +(1 row) + +-- refill the table +TRUNCATE test_table; +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); +SET client_min_messages TO WARNING; +-- now kill just after the worker sends response to +-- COMMIT command, so we'll have lots of warnings but the command +-- should have been committed both on the distributed table and the placements +SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT").kill()'); + mitmproxy +----------- + +(1 row) + +TRUNCATE test_table; +WARNING: connection not open +CONTEXT: while executing command on localhost:57640 +WARNING: failed to commit critical transaction on localhost:57640, metadata is likely out of sync +WARNING: connection not open +CONTEXT: while executing command on localhost:57640 +WARNING: connection not open +CONTEXT: while executing command on localhost:57640 +WARNING: failed to commit critical transaction on localhost:57640, metadata is likely out of sync +WARNING: connection not open +CONTEXT: while executing command on localhost:57640 +WARNING: could not commit transaction for shard 120001 on any active node +WARNING: could not commit transaction for shard 120003 on any active node +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 0 +(1 row) + +SET client_min_messages TO ERROR; +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); +-- now cancel just after the worker sends response to +-- but Postgres doesn't accept interrupts during COMMIT and ROLLBACK +-- so should not cancel at all, so not an effective test but adding in +-- case Citus messes up this behaviour +SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT").cancel(' || pg_backend_pid() || ')'); + mitmproxy +----------- + +(1 row) + +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 0 +(1 row) + +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); +-- Let's test Truncate on reference tables with a FK from a hash distributed table +CREATE TABLE reference_table(i int UNIQUE); +INSERT INTO reference_table SELECT x FROM generate_series(1,20) as f(x); +SELECT create_reference_table('reference_table'); + create_reference_table +------------------------ + +(1 row) + +ALTER TABLE test_table ADD CONSTRAINT foreign_key FOREIGN KEY (value) REFERENCES reference_table(i); +-- immediately kill when we see prepare transaction to see if the command +-- still cascaded to referencing table or failed successfuly +SELECT citus.mitmproxy('conn.onQuery(query="PREPARE TRANSACTION").kill()'); + mitmproxy +----------- + +(1 row) + +TRUNCATE reference_table CASCADE; +ERROR: connection not open +CONTEXT: while executing command on localhost:57640 +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 20 +(1 row) + +SELECT count(*) FROM reference_table; + count +------- + 20 +(1 row) + +-- immediately cancel when we see prepare transaction to see if the command +-- still cascaded to referencing table or failed successfuly +SELECT citus.mitmproxy('conn.onQuery(query="PREPARE TRANSACTION").cancel(' || pg_backend_pid() || ')'); + mitmproxy +----------- + +(1 row) + +TRUNCATE reference_table CASCADE; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 20 +(1 row) + +SELECT count(*) FROM reference_table; + count +------- + 20 +(1 row) + +-- immediately kill when we see cascading TRUNCATE on the hash table to see +-- rollbacked properly +SELECT citus.mitmproxy('conn.onQuery(query="^TRUNCATE TABLE").after(2).kill()'); + mitmproxy +----------- + +(1 row) + +TRUNCATE reference_table CASCADE; +ERROR: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:57640 +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 20 +(1 row) + +SELECT count(*) FROM reference_table; + count +------- + 20 +(1 row) + +-- immediately cancel when we see cascading TRUNCATE on the hash table to see +-- if the command still cascaded to referencing table or failed successfuly +SELECT citus.mitmproxy('conn.onQuery(query="^TRUNCATE TABLE").after(2).cancel(' || pg_backend_pid() || ')'); + mitmproxy +----------- + +(1 row) + +TRUNCATE reference_table CASCADE; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 20 +(1 row) + +SELECT count(*) FROM reference_table; + count +------- + 20 +(1 row) + +-- immediately kill after we get prepare transaction complete +-- to see if the command still cascaded to referencing table or +-- failed successfuly +SELECT citus.mitmproxy('conn.onCommandComplete(command="PREPARE TRANSACTION").kill()'); + mitmproxy +----------- + +(1 row) + +TRUNCATE reference_table CASCADE; +ERROR: connection not open +CONTEXT: while executing command on localhost:57640 +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +------------------------------- + 1 +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 20 +(1 row) + +-- immediately cancel after we get prepare transaction complete +-- to see if the command still cascaded to referencing table or +-- failed successfuly +SELECT citus.mitmproxy('conn.onCommandComplete(command="PREPARE TRANSACTION").cancel(' || pg_backend_pid() || ')'); + mitmproxy +----------- + +(1 row) + +TRUNCATE reference_table CASCADE; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +------------------------------- + 0 +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 20 +(1 row) + +-- now, lets test with 2PC +SET citus.multi_shard_commit_protocol TO '2pc'; +-- in the first test, kill just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().kill()'); + mitmproxy +----------- + +(1 row) + +TRUNCATE test_table; +ERROR: connection error: localhost:57640 +DETAIL: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 20 +(1 row) + +-- cancel just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().cancel(' || pg_backend_pid() || ')'); + mitmproxy +----------- + +(1 row) + +TRUNCATE test_table; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 20 +(1 row) + +-- kill as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); + mitmproxy +----------- + +(1 row) + +TRUNCATE test_table; +ERROR: failure on connection marked as essential: localhost:57640 +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 20 +(1 row) + +-- cancel as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); + mitmproxy +----------- + +(1 row) + +TRUNCATE test_table; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 20 +(1 row) + +-- kill as soon as the coordinator sends TRUNCATE TABLE command +SELECT citus.mitmproxy('conn.onQuery(query="^TRUNCATE TABLE truncate_failure.test_table").kill()'); + mitmproxy +----------- + +(1 row) + +TRUNCATE test_table; +ERROR: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:57640 +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 20 +(1 row) + +-- cancel as soon as the coordinator sends TRUNCATE TABLE command +SELECT citus.mitmproxy('conn.onQuery(query="^TRUNCATE TABLE truncate_failure.test_table").cancel(' || pg_backend_pid() || ')'); + mitmproxy +----------- + +(1 row) + +TRUNCATE test_table; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 20 +(1 row) + +-- killing on PREPARE should be fine, everything should be rollbacked +SELECT citus.mitmproxy('conn.onCommandComplete(command="^PREPARE TRANSACTION").kill()'); + mitmproxy +----------- + +(1 row) + +TRUNCATE test_table; +ERROR: connection not open +CONTEXT: while executing command on localhost:57640 +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +-- we should be able to revocer the transaction and +-- see that the command is rollbacked +SELECT recover_prepared_transactions(); + recover_prepared_transactions +------------------------------- + 2 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 20 +(1 row) + +-- cancelling on PREPARE should be fine, everything should be rollbacked +SELECT citus.mitmproxy('conn.onCommandComplete(command="^PREPARE TRANSACTION").cancel(' || pg_backend_pid() || ')'); + mitmproxy +----------- + +(1 row) + +TRUNCATE test_table; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +-- we should be able to revocer the transaction and +-- see that the command is rollbacked +SELECT recover_prepared_transactions(); + recover_prepared_transactions +------------------------------- + 1 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 20 +(1 row) + +-- killing on command complete of COMMIT PREPARE, we should see that the command succeeds +-- and all the workers committed +SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT PREPARED").kill()'); + mitmproxy +----------- + +(1 row) + +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +-- we shouldn't have any prepared transactions in the workers +SELECT recover_prepared_transactions(); + recover_prepared_transactions +------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 0 +(1 row) + +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); +-- kill as soon as the coordinator sends COMMIT +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT PREPARED").kill()'); + mitmproxy +----------- + +(1 row) + +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +-- Since we kill connections to one worker after commit arrives but the +-- other worker connections are healthy, we cannot commit on 1 worker +-- which has 2 active shard placements, but the other does. That's why +-- we expect to see 2 recovered prepared transactions. +SELECT recover_prepared_transactions(); + recover_prepared_transactions +------------------------------- + 2 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 0 +(1 row) + +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); +-- finally, test failing on ROLLBACK with 2CPC +-- fail just after the coordinator sends the ROLLBACK +-- so the command can be rollbacked +SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").kill()'); + mitmproxy +----------- + +(1 row) + +BEGIN; +TRUNCATE test_table; +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 20 +(1 row) + +-- but now kill just after the worker sends response to +-- ROLLBACK command, so we'll have lots of warnings but the command +-- should have been rollbacked both on the distributed table and the placements +SELECT citus.mitmproxy('conn.onCommandComplete(command="^ROLLBACK").kill()'); + mitmproxy +----------- + +(1 row) + +BEGIN; +TRUNCATE test_table; +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 20 +(1 row) + +-- final set of tests with 2PC and replication factor = 2 +SET citus.multi_shard_commit_protocol TO '2pc'; +SET citus.shard_count = 4; +SET citus.shard_replication_factor = 2; +-- re-create the table with replication factor 2 +DROP TABLE test_table CASCADE; +CREATE TABLE test_table (key int, value int); +SELECT create_distributed_table('test_table', 'key'); + create_distributed_table +-------------------------- + +(1 row) + +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); +CREATE VIEW unhealthy_shard_count AS + SELECT count(*) + FROM pg_dist_shard_placement pdsp + JOIN + pg_dist_shard pds + ON pdsp.shardid=pds.shardid + WHERE logicalrelid='truncate_failure.test_table'::regclass AND shardstate != 1; +-- in the first test, kill just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().kill()'); + mitmproxy +----------- + +(1 row) + +TRUNCATE test_table; +ERROR: connection error: localhost:57640 +DETAIL: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 20 +(1 row) + +-- cancel just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().cancel(' || pg_backend_pid() || ')'); + mitmproxy +----------- + +(1 row) + +TRUNCATE test_table; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 20 +(1 row) + +-- kill as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); + mitmproxy +----------- + +(1 row) + +TRUNCATE test_table; +ERROR: failure on connection marked as essential: localhost:57640 +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 20 +(1 row) + +-- cancel as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); + mitmproxy +----------- + +(1 row) + +TRUNCATE test_table; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 20 +(1 row) + +-- kill as soon as the coordinator sends TRUNCATE TABLE command +SELECT citus.mitmproxy('conn.onQuery(query="TRUNCATE TABLE truncate_failure.test_table").kill()'); + mitmproxy +----------- + +(1 row) + +TRUNCATE test_table; +ERROR: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: while executing command on localhost:57640 +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 20 +(1 row) + +-- cancel as soon as the coordinator sends TRUNCATE TABLE command +SELECT citus.mitmproxy('conn.onQuery(query="TRUNCATE TABLE truncate_failure.test_table").cancel(' || pg_backend_pid() || ')'); + mitmproxy +----------- + +(1 row) + +TRUNCATE test_table; +ERROR: canceling statement due to user request +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 20 +(1 row) + +-- killing on PREPARE should be fine, everything should be rollbacked +SELECT citus.mitmproxy('conn.onCommandComplete(command="PREPARE TRANSACTION").kill()'); + mitmproxy +----------- + +(1 row) + +TRUNCATE test_table; +ERROR: connection not open +CONTEXT: while executing command on localhost:57640 +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +-- we should be able to revocer the transaction and +-- see that the command is rollbacked +SELECT recover_prepared_transactions(); + recover_prepared_transactions +------------------------------- + 4 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 20 +(1 row) + +-- killing on command complete of COMMIT PREPARE, we should see that the command succeeds +-- and all the workers committed +SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT PREPARED").kill()'); + mitmproxy +----------- + +(1 row) + +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +-- we shouldn't have any prepared transactions in the workers +SELECT recover_prepared_transactions(); + recover_prepared_transactions +------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 0 +(1 row) + +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); +-- kill as soon as the coordinator sends COMMIT +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT PREPARED").kill()'); + mitmproxy +----------- + +(1 row) + +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +-- Since we kill connections to one worker after commit arrives but the +-- other worker connections are healthy, we cannot commit on 1 worker +-- which has 4 active shard placements (2 shards, replication factor=2), +-- but the other does. That's why we expect to see 4 recovered prepared +-- transactions. +SELECT recover_prepared_transactions(); + recover_prepared_transactions +------------------------------- + 4 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 0 +(1 row) + +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); +-- finally, test failing on ROLLBACK with 2CPC +-- fail just after the coordinator sends the ROLLBACK +-- so the command can be rollbacked +SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").kill()'); + mitmproxy +----------- + +(1 row) + +BEGIN; +TRUNCATE test_table; +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 20 +(1 row) + +-- but now kill just after the worker sends response to +-- ROLLBACK command, so we'll have lots of warnings but the command +-- should have been rollbacked both on the distributed table and the placements +SELECT citus.mitmproxy('conn.onCommandComplete(command="^ROLLBACK").kill()'); + mitmproxy +----------- + +(1 row) + +BEGIN; +TRUNCATE test_table; +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +----------- + +(1 row) + +SELECT * FROM unhealthy_shard_count; + count +------- + 0 +(1 row) + +SELECT recover_prepared_transactions(); + recover_prepared_transactions +------------------------------- + 0 +(1 row) + +SELECT count(*) FROM test_table; + count +------- + 20 +(1 row) + +DROP SCHEMA truncate_failure CASCADE; +SET search_path TO default; diff --git a/src/test/regress/failure_schedule b/src/test/regress/failure_schedule index e9849bf6e..afd672da1 100644 --- a/src/test/regress/failure_schedule +++ b/src/test/regress/failure_schedule @@ -6,4 +6,5 @@ test: failure_setup test: multi_test_helpers test: failure_ddl +test: failure_truncate test: failure_create_index_concurrently diff --git a/src/test/regress/sql/failure_truncate.sql b/src/test/regress/sql/failure_truncate.sql new file mode 100644 index 000000000..3d079d28b --- /dev/null +++ b/src/test/regress/sql/failure_truncate.sql @@ -0,0 +1,430 @@ +-- +-- Test TRUNCATE command failures +-- +CREATE SCHEMA truncate_failure; +SET search_path TO 'truncate_failure'; +SET citus.next_shard_id TO 120000; +-- we don't want to see the prepared transaction numbers in the warnings +SET client_min_messages TO ERROR; + +SELECT citus.mitmproxy('conn.allow()'); + +-- we'll start with replication factor 1, 1PC and parallel mode +SET citus.multi_shard_commit_protocol TO '1pc'; +SET citus.shard_count = 4; +SET citus.shard_replication_factor = 1; + +CREATE TABLE test_table (key int, value int); +SELECT create_distributed_table('test_table', 'key'); + +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); + +CREATE VIEW unhealthy_shard_count AS + SELECT count(*) + FROM pg_dist_shard_placement pdsp + JOIN + pg_dist_shard pds + ON pdsp.shardid=pds.shardid + WHERE logicalrelid='truncate_failure.test_table'::regclass AND shardstate != 1; + +-- in the first test, kill just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().kill()'); +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); +SELECT * FROM unhealthy_shard_count; +SELECT count(*) FROM test_table; + +-- cancel just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().cancel(' || pg_backend_pid() || ')'); +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); +SELECT * FROM unhealthy_shard_count; +SELECT count(*) FROM test_table; + +-- kill as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); +SELECT * FROM unhealthy_shard_count; +SELECT count(*) FROM test_table; + +-- cancel as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); +SELECT * FROM unhealthy_shard_count; +SELECT count(*) FROM test_table; + +-- kill as soon as the coordinator sends TRUNCATE TABLE command +SELECT citus.mitmproxy('conn.onQuery(query="TRUNCATE TABLE truncate_failure.test_table").kill()'); +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); +SELECT * FROM unhealthy_shard_count; +SELECT count(*) FROM test_table; + +-- cancel as soon as the coordinator sends TRUNCATE TABLE command +SELECT citus.mitmproxy('conn.onQuery(query="TRUNCATE TABLE truncate_failure.test_table").cancel(' || pg_backend_pid() || ')'); +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); +SELECT * FROM unhealthy_shard_count; +SELECT count(*) FROM test_table; + +-- kill as soon as the coordinator sends COMMIT +-- One shard should not get truncated but the other should +-- since it is sent from another connection. +-- Thus, we should see a partially successful truncate +-- Note: This is the result of using 1pc and there is no way to recover from it +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").kill()'); +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); +SELECT * FROM unhealthy_shard_count; +SELECT count(*) FROM test_table; + +-- refill the table +TRUNCATE test_table; +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); + +-- cancel as soon as the coordinator sends COMMIT +-- interrupts are held during COMMIT/ROLLBACK, so the command +-- should have been applied without any issues since cancel is ignored +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").cancel(' || pg_backend_pid() || ')'); +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); +SELECT * FROM unhealthy_shard_count; +SELECT count(*) FROM test_table; + +-- refill the table +TRUNCATE test_table; +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); + +SET client_min_messages TO WARNING; +-- now kill just after the worker sends response to +-- COMMIT command, so we'll have lots of warnings but the command +-- should have been committed both on the distributed table and the placements +SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT").kill()'); +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); +SELECT * FROM unhealthy_shard_count; +SELECT count(*) FROM test_table; +SET client_min_messages TO ERROR; + +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); + +-- now cancel just after the worker sends response to +-- but Postgres doesn't accept interrupts during COMMIT and ROLLBACK +-- so should not cancel at all, so not an effective test but adding in +-- case Citus messes up this behaviour +SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT").cancel(' || pg_backend_pid() || ')'); +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); +SELECT * FROM unhealthy_shard_count; +SELECT count(*) FROM test_table; + +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); + +-- Let's test Truncate on reference tables with a FK from a hash distributed table +CREATE TABLE reference_table(i int UNIQUE); +INSERT INTO reference_table SELECT x FROM generate_series(1,20) as f(x); +SELECT create_reference_table('reference_table'); + +ALTER TABLE test_table ADD CONSTRAINT foreign_key FOREIGN KEY (value) REFERENCES reference_table(i); +-- immediately kill when we see prepare transaction to see if the command +-- still cascaded to referencing table or failed successfuly +SELECT citus.mitmproxy('conn.onQuery(query="PREPARE TRANSACTION").kill()'); +TRUNCATE reference_table CASCADE; +SELECT citus.mitmproxy('conn.allow()'); +SELECT * FROM unhealthy_shard_count; +SELECT count(*) FROM test_table; +SELECT count(*) FROM reference_table; + +-- immediately cancel when we see prepare transaction to see if the command +-- still cascaded to referencing table or failed successfuly +SELECT citus.mitmproxy('conn.onQuery(query="PREPARE TRANSACTION").cancel(' || pg_backend_pid() || ')'); +TRUNCATE reference_table CASCADE; +SELECT citus.mitmproxy('conn.allow()'); +SELECT * FROM unhealthy_shard_count; +SELECT count(*) FROM test_table; +SELECT count(*) FROM reference_table; + +-- immediately kill when we see cascading TRUNCATE on the hash table to see +-- rollbacked properly +SELECT citus.mitmproxy('conn.onQuery(query="^TRUNCATE TABLE").after(2).kill()'); +TRUNCATE reference_table CASCADE; +SELECT citus.mitmproxy('conn.allow()'); +SELECT * FROM unhealthy_shard_count; +SELECT count(*) FROM test_table; +SELECT count(*) FROM reference_table; + +-- immediately cancel when we see cascading TRUNCATE on the hash table to see +-- if the command still cascaded to referencing table or failed successfuly +SELECT citus.mitmproxy('conn.onQuery(query="^TRUNCATE TABLE").after(2).cancel(' || pg_backend_pid() || ')'); +TRUNCATE reference_table CASCADE; +SELECT citus.mitmproxy('conn.allow()'); +SELECT * FROM unhealthy_shard_count; +SELECT count(*) FROM test_table; +SELECT count(*) FROM reference_table; + +-- immediately kill after we get prepare transaction complete +-- to see if the command still cascaded to referencing table or +-- failed successfuly +SELECT citus.mitmproxy('conn.onCommandComplete(command="PREPARE TRANSACTION").kill()'); +TRUNCATE reference_table CASCADE; +SELECT citus.mitmproxy('conn.allow()'); +SELECT recover_prepared_transactions(); +SELECT * FROM unhealthy_shard_count; +SELECT count(*) FROM test_table; + +-- immediately cancel after we get prepare transaction complete +-- to see if the command still cascaded to referencing table or +-- failed successfuly +SELECT citus.mitmproxy('conn.onCommandComplete(command="PREPARE TRANSACTION").cancel(' || pg_backend_pid() || ')'); +TRUNCATE reference_table CASCADE; +SELECT citus.mitmproxy('conn.allow()'); +SELECT recover_prepared_transactions(); +SELECT * FROM unhealthy_shard_count; +SELECT count(*) FROM test_table; + +-- now, lets test with 2PC +SET citus.multi_shard_commit_protocol TO '2pc'; + +-- in the first test, kill just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().kill()'); +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); +SELECT * FROM unhealthy_shard_count; +SELECT count(*) FROM test_table; + +-- cancel just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().cancel(' || pg_backend_pid() || ')'); +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); +SELECT * FROM unhealthy_shard_count; +SELECT count(*) FROM test_table; + +-- kill as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); +SELECT * FROM unhealthy_shard_count; +SELECT count(*) FROM test_table; + +-- cancel as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); +SELECT * FROM unhealthy_shard_count; +SELECT count(*) FROM test_table; + +-- kill as soon as the coordinator sends TRUNCATE TABLE command +SELECT citus.mitmproxy('conn.onQuery(query="^TRUNCATE TABLE truncate_failure.test_table").kill()'); +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); +SELECT * FROM unhealthy_shard_count; +SELECT count(*) FROM test_table; + +-- cancel as soon as the coordinator sends TRUNCATE TABLE command +SELECT citus.mitmproxy('conn.onQuery(query="^TRUNCATE TABLE truncate_failure.test_table").cancel(' || pg_backend_pid() || ')'); +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); +SELECT * FROM unhealthy_shard_count; +SELECT count(*) FROM test_table; + +-- killing on PREPARE should be fine, everything should be rollbacked +SELECT citus.mitmproxy('conn.onCommandComplete(command="^PREPARE TRANSACTION").kill()'); +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); +SELECT * FROM unhealthy_shard_count; + +-- we should be able to revocer the transaction and +-- see that the command is rollbacked +SELECT recover_prepared_transactions(); +SELECT count(*) FROM test_table; + +-- cancelling on PREPARE should be fine, everything should be rollbacked +SELECT citus.mitmproxy('conn.onCommandComplete(command="^PREPARE TRANSACTION").cancel(' || pg_backend_pid() || ')'); +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); +SELECT * FROM unhealthy_shard_count; + +-- we should be able to revocer the transaction and +-- see that the command is rollbacked +SELECT recover_prepared_transactions(); +SELECT count(*) FROM test_table; + +-- killing on command complete of COMMIT PREPARE, we should see that the command succeeds +-- and all the workers committed +SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT PREPARED").kill()'); +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); + +-- we shouldn't have any prepared transactions in the workers +SELECT recover_prepared_transactions(); +SELECT count(*) FROM test_table; + +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); + +-- kill as soon as the coordinator sends COMMIT +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT PREPARED").kill()'); +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); +-- Since we kill connections to one worker after commit arrives but the +-- other worker connections are healthy, we cannot commit on 1 worker +-- which has 2 active shard placements, but the other does. That's why +-- we expect to see 2 recovered prepared transactions. +SELECT recover_prepared_transactions(); +SELECT count(*) FROM test_table; + +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); + +-- finally, test failing on ROLLBACK with 2CPC +-- fail just after the coordinator sends the ROLLBACK +-- so the command can be rollbacked +SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").kill()'); +BEGIN; +TRUNCATE test_table; +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); +SELECT count(*) FROM test_table; + +-- but now kill just after the worker sends response to +-- ROLLBACK command, so we'll have lots of warnings but the command +-- should have been rollbacked both on the distributed table and the placements +SELECT citus.mitmproxy('conn.onCommandComplete(command="^ROLLBACK").kill()'); +BEGIN; +TRUNCATE test_table; +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); +SELECT recover_prepared_transactions(); +SELECT count(*) FROM test_table; + +-- final set of tests with 2PC and replication factor = 2 +SET citus.multi_shard_commit_protocol TO '2pc'; +SET citus.shard_count = 4; +SET citus.shard_replication_factor = 2; + +-- re-create the table with replication factor 2 +DROP TABLE test_table CASCADE; +CREATE TABLE test_table (key int, value int); +SELECT create_distributed_table('test_table', 'key'); +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); + +CREATE VIEW unhealthy_shard_count AS + SELECT count(*) + FROM pg_dist_shard_placement pdsp + JOIN + pg_dist_shard pds + ON pdsp.shardid=pds.shardid + WHERE logicalrelid='truncate_failure.test_table'::regclass AND shardstate != 1; + +-- in the first test, kill just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().kill()'); +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); +SELECT * FROM unhealthy_shard_count; +SELECT count(*) FROM test_table; + +-- cancel just in the first +-- response we get from the worker +SELECT citus.mitmproxy('conn.onAuthenticationOk().cancel(' || pg_backend_pid() || ')'); +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); +SELECT * FROM unhealthy_shard_count; +SELECT count(*) FROM test_table; + +-- kill as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()'); +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); +SELECT * FROM unhealthy_shard_count; +SELECT count(*) FROM test_table; + +-- cancel as soon as the coordinator sends begin +SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' || pg_backend_pid() || ')'); +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); +SELECT * FROM unhealthy_shard_count; +SELECT count(*) FROM test_table; + +-- kill as soon as the coordinator sends TRUNCATE TABLE command +SELECT citus.mitmproxy('conn.onQuery(query="TRUNCATE TABLE truncate_failure.test_table").kill()'); +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); +SELECT * FROM unhealthy_shard_count; +SELECT count(*) FROM test_table; + +-- cancel as soon as the coordinator sends TRUNCATE TABLE command +SELECT citus.mitmproxy('conn.onQuery(query="TRUNCATE TABLE truncate_failure.test_table").cancel(' || pg_backend_pid() || ')'); +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); +SELECT * FROM unhealthy_shard_count; +SELECT count(*) FROM test_table; + +-- killing on PREPARE should be fine, everything should be rollbacked +SELECT citus.mitmproxy('conn.onCommandComplete(command="PREPARE TRANSACTION").kill()'); +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); +SELECT * FROM unhealthy_shard_count; + +-- we should be able to revocer the transaction and +-- see that the command is rollbacked +SELECT recover_prepared_transactions(); +SELECT count(*) FROM test_table; + +-- killing on command complete of COMMIT PREPARE, we should see that the command succeeds +-- and all the workers committed +SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT PREPARED").kill()'); +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); +SELECT * FROM unhealthy_shard_count; + +-- we shouldn't have any prepared transactions in the workers +SELECT recover_prepared_transactions(); +SELECT count(*) FROM test_table; + +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); + +-- kill as soon as the coordinator sends COMMIT +SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT PREPARED").kill()'); +TRUNCATE test_table; +SELECT citus.mitmproxy('conn.allow()'); +SELECT * FROM unhealthy_shard_count; +-- Since we kill connections to one worker after commit arrives but the +-- other worker connections are healthy, we cannot commit on 1 worker +-- which has 4 active shard placements (2 shards, replication factor=2), +-- but the other does. That's why we expect to see 4 recovered prepared +-- transactions. +SELECT recover_prepared_transactions(); +SELECT count(*) FROM test_table; + +INSERT INTO test_table SELECT x,x FROM generate_series(1,20) as f(x); + +-- finally, test failing on ROLLBACK with 2CPC +-- fail just after the coordinator sends the ROLLBACK +-- so the command can be rollbacked +SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").kill()'); +BEGIN; +TRUNCATE test_table; +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); +SELECT * FROM unhealthy_shard_count; +SELECT count(*) FROM test_table; + +-- but now kill just after the worker sends response to +-- ROLLBACK command, so we'll have lots of warnings but the command +-- should have been rollbacked both on the distributed table and the placements +SELECT citus.mitmproxy('conn.onCommandComplete(command="^ROLLBACK").kill()'); +BEGIN; +TRUNCATE test_table; +ROLLBACK; +SELECT citus.mitmproxy('conn.allow()'); +SELECT * FROM unhealthy_shard_count; +SELECT recover_prepared_transactions(); +SELECT count(*) FROM test_table; + +DROP SCHEMA truncate_failure CASCADE; +SET search_path TO default;