Copy to reference table failure tests are added

2018-07-11 15:13:31 +03:00 · 2018-07-11 15:13:31 +03:00 · dde86cb731
parent 39cc54b4b5
commit dde86cb731
3 changed files with 666 additions and 0 deletions
--- a/src/test/regress/expected/failure_copy_to_reference.out
+++ b/src/test/regress/expected/failure_copy_to_reference.out
@ -0,0 +1,461 @@
 -- 
 --  Failure tests for COPY to reference tables 
 -- 
 CREATE SCHEMA copy_reference_failure;
 SET search_path TO 'copy_reference_failure';
 SET citus.next_shard_id TO 130000;
 -- we don't want to see the prepared transaction numbers in the warnings
 SET client_min_messages TO ERROR;
 SELECT citus.mitmproxy('conn.allow()');
 mitmproxy 
 -----------
 (1 row)
 CREATE TABLE test_table(id int, value_1 int);
 SELECT create_reference_table('test_table');
 create_reference_table 
 ------------------------
 (1 row)
 CREATE VIEW unhealthy_shard_count AS 
  SELECT count(*) 
  FROM pg_dist_shard_placement pdsp 
  JOIN 
  pg_dist_shard pds 
  ON pdsp.shardid=pds.shardid 
  WHERE logicalrelid='copy_reference_failure.test_table'::regclass AND shardstate != 1;
 -- in the first test, kill just in the first 
 -- response we get from the worker
 SELECT citus.mitmproxy('conn.kill()');
 mitmproxy 
 -----------
 (1 row)
 \copy test_table FROM STDIN DELIMITER ','
 ERROR:  connection error: localhost:57640
 DETAIL:  server closed the connection unexpectedly
 	This probably means the server terminated abnormally
 	before or while processing the request.
 CONTEXT:  COPY test_table, line 1: "1,2"
 SELECT citus.mitmproxy('conn.allow()');
 mitmproxy 
 -----------
 (1 row)
 SELECT * FROM unhealthy_shard_count;
 count 
 -------
     0
 (1 row)
 SELECT count(*) FROM test_table;
 count 
 -------
     0
 (1 row)
 -- kill as soon as the coordinator sends begin
 SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()');
 mitmproxy 
 -----------
 (1 row)
 \copy test_table FROM STDIN DELIMITER ','
 ERROR:  failure on connection marked as essential: localhost:57640
 CONTEXT:  COPY test_table, line 1: "1,2"
 SELECT citus.mitmproxy('conn.allow()');
 mitmproxy 
 -----------
 (1 row)
 SELECT * FROM unhealthy_shard_count;
 count 
 -------
     0
 (1 row)
 SELECT count(*) FROM test_table;
 count 
 -------
     0
 (1 row)
 -- cancel as soon as the coordinator sends begin
 SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' ||  pg_backend_pid() || ')');
 mitmproxy 
 -----------
 (1 row)
 \copy test_table FROM STDIN DELIMITER ','
 ERROR:  canceling statement due to user request
 CONTEXT:  COPY test_table, line 1: "1,2"
 SELECT citus.mitmproxy('conn.allow()');
 mitmproxy 
 -----------
 (1 row)
 SELECT * FROM unhealthy_shard_count;
 count 
 -------
     0
 (1 row)
 SELECT count(*) FROM test_table;
 count 
 -------
     0
 (1 row)
 -- kill as soon as the coordinator sends COPY command
 SELECT citus.mitmproxy('conn.onQuery(query="^COPY").kill()');
 mitmproxy 
 -----------
 (1 row)
 \copy test_table FROM STDIN DELIMITER ','
 ERROR:  server closed the connection unexpectedly
 	This probably means the server terminated abnormally
 	before or while processing the request.
 CONTEXT:  while executing command on localhost:57640
 COPY test_table, line 1: "1,2"
 SELECT citus.mitmproxy('conn.allow()');
 mitmproxy 
 -----------
 (1 row)
 SELECT * FROM unhealthy_shard_count;
 count 
 -------
     0
 (1 row)
 SELECT count(*) FROM test_table;
 count 
 -------
     0
 (1 row)
 -- cancel as soon as the coordinator sends COPY command
 SELECT citus.mitmproxy('conn.onQuery(query="^COPY").cancel(' ||  pg_backend_pid() || ')');
 mitmproxy 
 -----------
 (1 row)
 \copy test_table FROM STDIN DELIMITER ','
 ERROR:  canceling statement due to user request
 CONTEXT:  COPY test_table, line 1: "1,2"
 SELECT citus.mitmproxy('conn.allow()');
 mitmproxy 
 -----------
 (1 row)
 SELECT * FROM unhealthy_shard_count;
 count 
 -------
     0
 (1 row)
 SELECT count(*) FROM test_table;
 count 
 -------
     0
 (1 row)
 -- kill as soon as the worker sends CopyComplete
 SELECT citus.mitmproxy('conn.onCommandComplete(command="^COPY 3").kill()');
 mitmproxy 
 -----------
 (1 row)
 \copy test_table FROM STDIN DELIMITER ','
 ERROR:  failed to COPY to shard 130000 on localhost:57640
 SELECT citus.mitmproxy('conn.allow()');
 mitmproxy 
 -----------
 (1 row)
 SELECT * FROM unhealthy_shard_count;
 count 
 -------
     0
 (1 row)
 SELECT count(*) FROM test_table;
 count 
 -------
     0
 (1 row)
 -- cancel as soon as the coordinator sends CopyData
 SELECT citus.mitmproxy('conn.onCommandComplete(command="^COPY 3").cancel(' ||  pg_backend_pid() || ')');
 mitmproxy 
 -----------
 (1 row)
 \copy test_table FROM STDIN DELIMITER ','
 ERROR:  canceling statement due to user request
 SELECT citus.mitmproxy('conn.allow()');
 mitmproxy 
 -----------
 (1 row)
 SELECT * FROM unhealthy_shard_count;
 count 
 -------
     0
 (1 row)
 SELECT count(*) FROM test_table;
 count 
 -------
     0
 (1 row)
 -- kill the connection when we try to start the COPY 
 -- the query should abort
 SELECT citus.mitmproxy('conn.onQuery(query="FROM STDIN WITH").killall()');
 mitmproxy 
 -----------
 (1 row)
 \copy test_table FROM STDIN DELIMITER ','
 ERROR:  server closed the connection unexpectedly
 	This probably means the server terminated abnormally
 	before or while processing the request.
 CONTEXT:  while executing command on localhost:57640
 COPY test_table, line 1: "1,2"
 SELECT citus.mitmproxy('conn.allow()');
 mitmproxy 
 -----------
 (1 row)
 SELECT * FROM unhealthy_shard_count;
 count 
 -------
     0
 (1 row)
 SELECT count(*) FROM test_table;
 count 
 -------
     0
 (1 row)
 -- killing on PREPARE should be fine, everything should be rollbacked
 SELECT citus.mitmproxy('conn.onQuery(query="^PREPARE TRANSACTION").kill()');
 mitmproxy 
 -----------
 (1 row)
 \copy test_table FROM STDIN DELIMITER ','
 ERROR:  connection not open
 CONTEXT:  while executing command on localhost:57640
 SELECT citus.mitmproxy('conn.allow()');
 mitmproxy 
 -----------
 (1 row)
 SELECT * FROM unhealthy_shard_count;
 count 
 -------
     0
 (1 row)
 SELECT count(*) FROM test_table;
 count 
 -------
     0
 (1 row)
 -- cancelling on PREPARE should be fine, everything should be rollbacked
 SELECT citus.mitmproxy('conn.onQuery(query="^PREPARE TRANSACTION").cancel(' ||  pg_backend_pid() || ')');
 mitmproxy 
 -----------
 (1 row)
 \copy test_table FROM STDIN DELIMITER ','
 ERROR:  canceling statement due to user request
 SELECT citus.mitmproxy('conn.allow()');
 mitmproxy 
 -----------
 (1 row)
 SELECT * FROM unhealthy_shard_count;
 count 
 -------
     0
 (1 row)
 SELECT count(*) FROM test_table;
 count 
 -------
     0
 (1 row)
 -- killing on command complete of COMMIT PREPARE, we should see that the command succeeds
 -- and all the workers committed
 SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT PREPARED").kill()');
 mitmproxy 
 -----------
 (1 row)
 \copy test_table FROM STDIN DELIMITER ','
 SELECT citus.mitmproxy('conn.allow()');
 mitmproxy 
 -----------
 (1 row)
 -- we shouldn't have any prepared transactions in the workers
 SELECT recover_prepared_transactions();
 recover_prepared_transactions 
 -------------------------------
                             0
 (1 row)
 SELECT * FROM unhealthy_shard_count;
 count 
 -------
     0
 (1 row)
 SELECT count(*) FROM test_table;
 count 
 -------
     3
 (1 row)
 TRUNCATE test_table;
 -- kill as soon as the coordinator sends COMMIT
 SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").kill()');
 mitmproxy 
 -----------
 (1 row)
 \copy test_table FROM STDIN DELIMITER ','
 SELECT citus.mitmproxy('conn.allow()');
 mitmproxy 
 -----------
 (1 row)
 -- Since we kill connections to one worker after commit arrives but the 
 -- other worker connections are healthy, we cannot commit on 1 worker
 -- which has 1 active shard placements, but the other does. That's why
 -- we expect to see 1 recovered prepared transactions.
 SELECT recover_prepared_transactions();
 recover_prepared_transactions 
 -------------------------------
                             1
 (1 row)
 SELECT * FROM unhealthy_shard_count;
 count 
 -------
     0
 (1 row)
 SELECT count(*) FROM test_table;
 count 
 -------
     3
 (1 row)
 TRUNCATE test_table;
 -- finally, test failing on ROLLBACK just after the coordinator
 -- sends the ROLLBACK so the command can be rollbacked
 SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").kill()');
 mitmproxy 
 -----------
 (1 row)
 BEGIN;
 SET LOCAL client_min_messages TO WARNING;
 \copy test_table FROM STDIN DELIMITER ','
 ROLLBACK;
 WARNING:  connection not open
 CONTEXT:  while executing command on localhost:57640
 SELECT citus.mitmproxy('conn.allow()');
 mitmproxy 
 -----------
 (1 row)
 SELECT * FROM unhealthy_shard_count;
 count 
 -------
     0
 (1 row)
 SELECT count(*) FROM test_table;
 count 
 -------
     0
 (1 row)
 -- but now kill just after the worker sends response to 
 -- ROLLBACK command, command should have been rollbacked
 -- both on the distributed table and the placements
 SELECT citus.mitmproxy('conn.onCommandComplete(command="^ROLLBACK").kill()');
 mitmproxy 
 -----------
 (1 row)
 BEGIN;
 SET LOCAL client_min_messages TO WARNING;
 \copy test_table FROM STDIN DELIMITER ','
 ROLLBACK;
 WARNING:  connection not open
 CONTEXT:  while executing command on localhost:57640
 SELECT citus.mitmproxy('conn.allow()');
 mitmproxy 
 -----------
 (1 row)
 SELECT recover_prepared_transactions();
 recover_prepared_transactions 
 -------------------------------
                             0
 (1 row)
 SELECT * FROM unhealthy_shard_count;
 count 
 -------
     0
 (1 row)
 SELECT count(*) FROM test_table;
 count 
 -------
     0
 (1 row)
 DROP SCHEMA copy_reference_failure CASCADE;
 SET search_path TO default;
--- a/src/test/regress/failure_schedule
+++ b/src/test/regress/failure_schedule
@ -9,3 +9,4 @@ test: failure_ddl
 test: failure_truncate
 test: failure_create_index_concurrently
 test: failure_add_disable_node
 test: failure_copy_to_reference
--- a/src/test/regress/sql/failure_copy_to_reference.sql
+++ b/src/test/regress/sql/failure_copy_to_reference.sql
@ -0,0 +1,204 @@
 -- 
 --  Failure tests for COPY to reference tables 
 -- 
 CREATE SCHEMA copy_reference_failure;
 SET search_path TO 'copy_reference_failure';
 SET citus.next_shard_id TO 130000;
 -- we don't want to see the prepared transaction numbers in the warnings
 SET client_min_messages TO ERROR;
 SELECT citus.mitmproxy('conn.allow()');
 CREATE TABLE test_table(id int, value_1 int);
 SELECT create_reference_table('test_table');
 CREATE VIEW unhealthy_shard_count AS 
  SELECT count(*) 
  FROM pg_dist_shard_placement pdsp 
  JOIN 
  pg_dist_shard pds 
  ON pdsp.shardid=pds.shardid 
  WHERE logicalrelid='copy_reference_failure.test_table'::regclass AND shardstate != 1;
 -- in the first test, kill just in the first 
 -- response we get from the worker
 SELECT citus.mitmproxy('conn.kill()');
 \copy test_table FROM STDIN DELIMITER ','
 1,2
 2,3
 3,4
 \.
 SELECT citus.mitmproxy('conn.allow()');
 SELECT * FROM unhealthy_shard_count;
 SELECT count(*) FROM test_table;
 -- kill as soon as the coordinator sends begin
 SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").kill()');
 \copy test_table FROM STDIN DELIMITER ','
 1,2
 2,3
 3,4
 \.
 SELECT citus.mitmproxy('conn.allow()');
 SELECT * FROM unhealthy_shard_count;
 SELECT count(*) FROM test_table;
 -- cancel as soon as the coordinator sends begin
 SELECT citus.mitmproxy('conn.onQuery(query="^BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED").cancel(' ||  pg_backend_pid() || ')');
 \copy test_table FROM STDIN DELIMITER ','
 1,2
 2,3
 3,4
 \.
 SELECT citus.mitmproxy('conn.allow()');
 SELECT * FROM unhealthy_shard_count;
 SELECT count(*) FROM test_table;
 -- kill as soon as the coordinator sends COPY command
 SELECT citus.mitmproxy('conn.onQuery(query="^COPY").kill()');
 \copy test_table FROM STDIN DELIMITER ','
 1,2
 2,3
 3,4
 \.
 SELECT citus.mitmproxy('conn.allow()');
 SELECT * FROM unhealthy_shard_count;
 SELECT count(*) FROM test_table;
 -- cancel as soon as the coordinator sends COPY command
 SELECT citus.mitmproxy('conn.onQuery(query="^COPY").cancel(' ||  pg_backend_pid() || ')');
 \copy test_table FROM STDIN DELIMITER ','
 1,2
 2,3
 3,4
 \.
 SELECT citus.mitmproxy('conn.allow()');
 SELECT * FROM unhealthy_shard_count;
 SELECT count(*) FROM test_table;
 -- kill as soon as the worker sends CopyComplete
 SELECT citus.mitmproxy('conn.onCommandComplete(command="^COPY 3").kill()');
 \copy test_table FROM STDIN DELIMITER ','
 1,2
 2,3
 3,4
 \.
 SELECT citus.mitmproxy('conn.allow()');
 SELECT * FROM unhealthy_shard_count;
 SELECT count(*) FROM test_table;
 -- cancel as soon as the coordinator sends CopyData
 SELECT citus.mitmproxy('conn.onCommandComplete(command="^COPY 3").cancel(' ||  pg_backend_pid() || ')');
 \copy test_table FROM STDIN DELIMITER ','
 1,2
 2,3
 3,4
 \.
 SELECT citus.mitmproxy('conn.allow()');
 SELECT * FROM unhealthy_shard_count;
 SELECT count(*) FROM test_table;
 -- kill the connection when we try to start the COPY 
 -- the query should abort
 SELECT citus.mitmproxy('conn.onQuery(query="FROM STDIN WITH").killall()');
 \copy test_table FROM STDIN DELIMITER ','
 1,2
 2,3
 3,4
 \.
 SELECT citus.mitmproxy('conn.allow()');
 SELECT * FROM unhealthy_shard_count;
 SELECT count(*) FROM test_table;
 -- killing on PREPARE should be fine, everything should be rollbacked
 SELECT citus.mitmproxy('conn.onQuery(query="^PREPARE TRANSACTION").kill()');
 \copy test_table FROM STDIN DELIMITER ','
 1,2
 2,3
 3,4
 \.
 SELECT citus.mitmproxy('conn.allow()');
 SELECT * FROM unhealthy_shard_count;
 SELECT count(*) FROM test_table;
 -- cancelling on PREPARE should be fine, everything should be rollbacked
 SELECT citus.mitmproxy('conn.onQuery(query="^PREPARE TRANSACTION").cancel(' ||  pg_backend_pid() || ')');
 \copy test_table FROM STDIN DELIMITER ','
 1,2
 2,3
 3,4
 \.
 SELECT citus.mitmproxy('conn.allow()');
 SELECT * FROM unhealthy_shard_count;
 SELECT count(*) FROM test_table;
 -- killing on command complete of COMMIT PREPARE, we should see that the command succeeds
 -- and all the workers committed
 SELECT citus.mitmproxy('conn.onCommandComplete(command="^COMMIT PREPARED").kill()');
 \copy test_table FROM STDIN DELIMITER ','
 1,2
 2,3
 3,4
 \.
 SELECT citus.mitmproxy('conn.allow()');
 -- we shouldn't have any prepared transactions in the workers
 SELECT recover_prepared_transactions();
 SELECT * FROM unhealthy_shard_count;
 SELECT count(*) FROM test_table;
 TRUNCATE test_table;
 -- kill as soon as the coordinator sends COMMIT
 SELECT citus.mitmproxy('conn.onQuery(query="^COMMIT").kill()');
 \copy test_table FROM STDIN DELIMITER ','
 1,2
 2,3
 3,4
 \.
 SELECT citus.mitmproxy('conn.allow()');
 -- Since we kill connections to one worker after commit arrives but the 
 -- other worker connections are healthy, we cannot commit on 1 worker
 -- which has 1 active shard placements, but the other does. That's why
 -- we expect to see 1 recovered prepared transactions.
 SELECT recover_prepared_transactions();
 SELECT * FROM unhealthy_shard_count;
 SELECT count(*) FROM test_table;
 TRUNCATE test_table;
 -- finally, test failing on ROLLBACK just after the coordinator
 -- sends the ROLLBACK so the command can be rollbacked
 SELECT citus.mitmproxy('conn.onQuery(query="^ROLLBACK").kill()');
 BEGIN;
 SET LOCAL client_min_messages TO WARNING;
 \copy test_table FROM STDIN DELIMITER ','
 1,2
 2,3
 3,4
 \.
 ROLLBACK;
 SELECT citus.mitmproxy('conn.allow()');
 SELECT * FROM unhealthy_shard_count;
 SELECT count(*) FROM test_table;
 -- but now kill just after the worker sends response to 
 -- ROLLBACK command, command should have been rollbacked
 -- both on the distributed table and the placements
 SELECT citus.mitmproxy('conn.onCommandComplete(command="^ROLLBACK").kill()');
 BEGIN;
 SET LOCAL client_min_messages TO WARNING;
 \copy test_table FROM STDIN DELIMITER ','
 1,2
 2,3
 3,4
 \.
 ROLLBACK;
 SELECT citus.mitmproxy('conn.allow()');
 SELECT recover_prepared_transactions();
 SELECT * FROM unhealthy_shard_count;
 SELECT count(*) FROM test_table;
 DROP SCHEMA copy_reference_failure CASCADE;
 SET search_path TO default;