From 51941114203a6028a13294663bed3855c9bc0191 Mon Sep 17 00:00:00 2001 From: Murat Tuncer Date: Wed, 25 Jan 2017 17:48:15 +0300 Subject: [PATCH] Add failure case for regression tests --- src/test/regress/input/multi_copy.source | 201 +++++++++++++++ src/test/regress/output/multi_copy.source | 287 ++++++++++++++++++++++ 2 files changed, 488 insertions(+) diff --git a/src/test/regress/input/multi_copy.source b/src/test/regress/input/multi_copy.source index 1708dcf12..d20c5c4c1 100644 --- a/src/test/regress/input/multi_copy.source +++ b/src/test/regress/input/multi_copy.source @@ -538,3 +538,204 @@ SELECT master_create_distributed_table('composite_partition_column_table', 'comp 1,"(1,1)" 2,"(2,2)" \. + + +-- Test copy on append distributed tables do not create shards on removed workers +CREATE TABLE numbers_append (a int, b int); +SELECT master_create_distributed_table('numbers_append', 'a', 'append'); + +-- no shards is created yet +SELECT shardid, nodename, nodeport + FROM pg_dist_shard_placement join pg_dist_shard using(shardid) + WHERE logicalrelid = 'numbers_append'::regclass order by placementid; + +COPY numbers_append FROM STDIN WITH (FORMAT 'csv'); +1,1 +2,2 +\. + +COPY numbers_append FROM STDIN WITH (FORMAT 'csv'); +3,5 +4,6 +\. + +-- verify there are shards at both workers +SELECT shardid, nodename, nodeport + FROM pg_dist_shard_placement join pg_dist_shard using(shardid) + WHERE logicalrelid = 'numbers_append'::regclass order by placementid; + +-- disable the first node +SELECT master_disable_node('localhost', :worker_1_port); +-- set replication factor to 1 so that copy will +-- succeed without replication count error +SET citus.shard_replication_factor TO 1; + +-- add two new shards and verify they are created at the other node +COPY numbers_append FROM STDIN WITH (FORMAT 'csv'); +5,7 +6,8 +\. + +COPY numbers_append FROM STDIN WITH (FORMAT 'csv'); +7,9 +8,10 +\. + +SELECT shardid, nodename, nodeport + FROM pg_dist_shard_placement join pg_dist_shard using(shardid) + WHERE logicalrelid = 'numbers_append'::regclass order by placementid; + +-- add the node back +SELECT master_add_node('localhost', :worker_1_port); +RESET citus.shard_replication_factor; +-- add two new shards and verify they are created at both workers +COPY numbers_append FROM STDIN WITH (FORMAT 'csv'); +9,11 +10,12 +\. + +COPY numbers_append FROM STDIN WITH (FORMAT 'csv'); +11,13 +12,14 +\. + +SELECT shardid, nodename, nodeport + FROM pg_dist_shard_placement join pg_dist_shard using(shardid) + WHERE logicalrelid = 'numbers_append'::regclass order by placementid; +DROP TABLE numbers_append; + +-- Test copy failures against connection failures +-- switch to a test user, it was previously created +\c - test_user +SET citus.shard_count to 4; +CREATE TABLE numbers_hash (a int, b int); +SELECT create_distributed_table('numbers_hash', 'a'); + +COPY numbers_hash FROM STDIN WITH (FORMAT 'csv'); +1,1 +2,2 +3,3 +4,4 +5,5 +6,6 +7,7 +8,8 +\. + +-- verify each placement is active +SELECT shardid, shardstate, nodename, nodeport + FROM pg_dist_shard_placement join pg_dist_shard using(shardid) + WHERE logicalrelid = 'numbers_hash'::regclass order by placementid; + +-- create a reference table +CREATE TABLE numbers_reference(a int, b int); +SELECT create_reference_table('numbers_reference'); +COPY numbers_reference FROM STDIN WITH (FORMAT 'csv'); +1,1 +2,2 +\. + +-- create another hash distributed table +CREATE TABLE numbers_hash_other(a int, b int); +SELECT create_distributed_table('numbers_hash_other', 'a'); +SELECT shardid, shardstate, nodename, nodeport + FROM pg_dist_shard_placement join pg_dist_shard using(shardid) + WHERE logicalrelid = 'numbers_hash_other'::regclass order by placementid; + +-- manually corrupt pg_dist_shard such that both copies of one shard is placed in +-- worker_1. This is to test the behavior when no replica of a shard is accessible. +-- Whole copy operation is supposed to fail and rollback. +\c - :default_user +UPDATE pg_dist_shard_placement SET nodeport = :worker_1_port WHERE shardid = 560176; + +-- disable test_user on the first worker +\c - :default_user - :worker_1_port +ALTER USER test_user WITH nologin; +\c - test_user - :master_port + +-- reissue copy +COPY numbers_hash FROM STDIN WITH (FORMAT 'csv'); +1,1 +2,2 +3,3 +4,4 +5,5 +6,6 +7,7 +8,8 +\. + +-- verify shards in the first worker as marked invalid +SELECT shardid, shardstate, nodename, nodeport + FROM pg_dist_shard_placement join pg_dist_shard using(shardid) + WHERE logicalrelid = 'numbers_hash'::regclass order by placementid; + +-- try to insert into a reference table copy should fail +COPY numbers_reference FROM STDIN WITH (FORMAT 'csv'); +3,1 +4,2 +\. + +-- verify shards for reference table are still valid +SELECT shardid, shardstate, nodename, nodeport + FROM pg_dist_shard_placement join pg_dist_shard using(shardid) + WHERE logicalrelid = 'numbers_reference'::regclass order by placementid; + + +-- try to insert into numbers_hash_other. copy should fail and rollback +-- since it can not insert into either copies of a shard. shards are expected to +-- stay valid since the operation is rolled back. +COPY numbers_hash_other FROM STDIN WITH (FORMAT 'csv'); +1,1 +2,2 +3,3 +\. + +-- verify shards for numbers_hash_other are still valid +-- since copy has failed altogether +SELECT shardid, shardstate, nodename, nodeport + FROM pg_dist_shard_placement join pg_dist_shard using(shardid) + WHERE logicalrelid = 'numbers_hash_other'::regclass order by placementid; + +-- re-enable test_user on the first worker +\c - :default_user - :worker_1_port +ALTER USER test_user WITH login; +\c - test_user - :master_port + +DROP TABLE numbers_hash; +DROP TABLE numbers_hash_other; +DROP TABLE numbers_reference; +\c - :default_user + +-- test copy failure inside the node +-- it will be done by changing definition of a shard table +SET citus.shard_count to 4; +CREATE TABLE numbers_hash(a int, b int); +SELECT create_distributed_table('numbers_hash', 'a'); + +\c - - - :worker_1_port +ALTER TABLE numbers_hash_560180 ADD COLUMN c int; +\c - - - :master_port + +-- operation will fail to modify a shard and roll back +COPY numbers_hash FROM STDIN WITH (FORMAT 'csv'); +1,1 +2,2 +3,3 +4,4 +5,5 +6,6 +7,7 +8,8 +\. + +-- verify no row is inserted +SELECT * FROM numbers_hash; + +-- verify shard is still marked as valid +SELECT shardid, shardstate, nodename, nodeport + FROM pg_dist_shard_placement join pg_dist_shard using(shardid) + WHERE logicalrelid = 'numbers_hash'::regclass order by placementid; + +DROP TABLE numbers_hash; + diff --git a/src/test/regress/output/multi_copy.source b/src/test/regress/output/multi_copy.source index 4722a83c6..5c26f6649 100644 --- a/src/test/regress/output/multi_copy.source +++ b/src/test/regress/output/multi_copy.source @@ -708,3 +708,290 @@ CONTEXT: while executing command on localhost:57638 WARNING: could not get statistics for shard public.composite_partition_column_table_560164 DETAIL: Setting shard statistics to NULL ERROR: failure on connection marked as essential: localhost:57637 +-- Test copy on append distributed tables do not create shards on removed workers +CREATE TABLE numbers_append (a int, b int); +SELECT master_create_distributed_table('numbers_append', 'a', 'append'); + master_create_distributed_table +--------------------------------- + +(1 row) + +-- no shards is created yet +SELECT shardid, nodename, nodeport + FROM pg_dist_shard_placement join pg_dist_shard using(shardid) + WHERE logicalrelid = 'numbers_append'::regclass order by placementid; + shardid | nodename | nodeport +---------+----------+---------- +(0 rows) + +COPY numbers_append FROM STDIN WITH (FORMAT 'csv'); +COPY numbers_append FROM STDIN WITH (FORMAT 'csv'); +-- verify there are shards at both workers +SELECT shardid, nodename, nodeport + FROM pg_dist_shard_placement join pg_dist_shard using(shardid) + WHERE logicalrelid = 'numbers_append'::regclass order by placementid; + shardid | nodename | nodeport +---------+-----------+---------- + 560165 | localhost | 57637 + 560165 | localhost | 57638 + 560166 | localhost | 57638 + 560166 | localhost | 57637 +(4 rows) + +-- disable the first node +SELECT master_disable_node('localhost', :worker_1_port); +NOTICE: Node localhost:57637 has active shard placements. Some queries may fail after this operation. Use select master_add_node('localhost', 57637) to add this node back. + master_disable_node +--------------------- + +(1 row) + +-- set replication factor to 1 so that copy will +-- succeed without replication count error +SET citus.shard_replication_factor TO 1; +-- add two new shards and verify they are created at the other node +COPY numbers_append FROM STDIN WITH (FORMAT 'csv'); +COPY numbers_append FROM STDIN WITH (FORMAT 'csv'); +SELECT shardid, nodename, nodeport + FROM pg_dist_shard_placement join pg_dist_shard using(shardid) + WHERE logicalrelid = 'numbers_append'::regclass order by placementid; + shardid | nodename | nodeport +---------+-----------+---------- + 560165 | localhost | 57637 + 560165 | localhost | 57638 + 560166 | localhost | 57638 + 560166 | localhost | 57637 + 560167 | localhost | 57638 + 560168 | localhost | 57638 +(6 rows) + +-- add the node back +SELECT master_add_node('localhost', :worker_1_port); +NOTICE: Replicating reference table "nation" to all workers +NOTICE: Replicating reference table "supplier" to all workers +NOTICE: Replicating reference table "reference_failure_test" to all workers + master_add_node +--------------------------------- + (3,3,localhost,57637,default,f) +(1 row) + +RESET citus.shard_replication_factor; +-- add two new shards and verify they are created at both workers +COPY numbers_append FROM STDIN WITH (FORMAT 'csv'); +COPY numbers_append FROM STDIN WITH (FORMAT 'csv'); +SELECT shardid, nodename, nodeport + FROM pg_dist_shard_placement join pg_dist_shard using(shardid) + WHERE logicalrelid = 'numbers_append'::regclass order by placementid; + shardid | nodename | nodeport +---------+-----------+---------- + 560165 | localhost | 57637 + 560165 | localhost | 57638 + 560166 | localhost | 57638 + 560166 | localhost | 57637 + 560167 | localhost | 57638 + 560168 | localhost | 57638 + 560169 | localhost | 57637 + 560169 | localhost | 57638 + 560170 | localhost | 57638 + 560170 | localhost | 57637 +(10 rows) + +DROP TABLE numbers_append; +-- Test copy failures against connection failures +-- switch to a test user, it was previously created +\c - test_user +SET citus.shard_count to 4; +CREATE TABLE numbers_hash (a int, b int); +SELECT create_distributed_table('numbers_hash', 'a'); + create_distributed_table +-------------------------- + +(1 row) + +COPY numbers_hash FROM STDIN WITH (FORMAT 'csv'); +-- verify each placement is active +SELECT shardid, shardstate, nodename, nodeport + FROM pg_dist_shard_placement join pg_dist_shard using(shardid) + WHERE logicalrelid = 'numbers_hash'::regclass order by placementid; + shardid | shardstate | nodename | nodeport +---------+------------+-----------+---------- + 560171 | 1 | localhost | 57637 + 560171 | 1 | localhost | 57638 + 560172 | 1 | localhost | 57638 + 560172 | 1 | localhost | 57637 + 560173 | 1 | localhost | 57637 + 560173 | 1 | localhost | 57638 + 560174 | 1 | localhost | 57638 + 560174 | 1 | localhost | 57637 +(8 rows) + +-- create a reference table +CREATE TABLE numbers_reference(a int, b int); +SELECT create_reference_table('numbers_reference'); + create_reference_table +------------------------ + +(1 row) + +COPY numbers_reference FROM STDIN WITH (FORMAT 'csv'); +-- create another hash distributed table +CREATE TABLE numbers_hash_other(a int, b int); +SELECT create_distributed_table('numbers_hash_other', 'a'); + create_distributed_table +-------------------------- + +(1 row) + +SELECT shardid, shardstate, nodename, nodeport + FROM pg_dist_shard_placement join pg_dist_shard using(shardid) + WHERE logicalrelid = 'numbers_hash_other'::regclass order by placementid; + shardid | shardstate | nodename | nodeport +---------+------------+-----------+---------- + 560176 | 1 | localhost | 57638 + 560176 | 1 | localhost | 57637 + 560177 | 1 | localhost | 57637 + 560177 | 1 | localhost | 57638 + 560178 | 1 | localhost | 57638 + 560178 | 1 | localhost | 57637 + 560179 | 1 | localhost | 57637 + 560179 | 1 | localhost | 57638 +(8 rows) + +-- manually corrupt pg_dist_shard such that both copies of one shard is placed in +-- worker_1. This is to test the behavior when no replica of a shard is accessible. +-- Whole copy operation is supposed to fail and rollback. +\c - :default_user +UPDATE pg_dist_shard_placement SET nodeport = :worker_1_port WHERE shardid = 560176; +-- disable test_user on the first worker +\c - :default_user - :worker_1_port +ALTER USER test_user WITH nologin; +\c - test_user - :master_port +-- reissue copy +COPY numbers_hash FROM STDIN WITH (FORMAT 'csv'); +WARNING: connection error: localhost:57637 +DETAIL: FATAL: role "test_user" is not permitted to log in + +CONTEXT: COPY numbers_hash, line 1: "1,1" +WARNING: connection error: localhost:57637 +DETAIL: FATAL: role "test_user" is not permitted to log in + +CONTEXT: COPY numbers_hash, line 2: "2,2" +WARNING: connection error: localhost:57637 +DETAIL: FATAL: role "test_user" is not permitted to log in + +CONTEXT: COPY numbers_hash, line 3: "3,3" +WARNING: connection error: localhost:57637 +DETAIL: FATAL: role "test_user" is not permitted to log in + +CONTEXT: COPY numbers_hash, line 6: "6,6" +-- verify shards in the first worker as marked invalid +SELECT shardid, shardstate, nodename, nodeport + FROM pg_dist_shard_placement join pg_dist_shard using(shardid) + WHERE logicalrelid = 'numbers_hash'::regclass order by placementid; + shardid | shardstate | nodename | nodeport +---------+------------+-----------+---------- + 560171 | 3 | localhost | 57637 + 560171 | 1 | localhost | 57638 + 560172 | 1 | localhost | 57638 + 560172 | 3 | localhost | 57637 + 560173 | 3 | localhost | 57637 + 560173 | 1 | localhost | 57638 + 560174 | 1 | localhost | 57638 + 560174 | 3 | localhost | 57637 +(8 rows) + +-- try to insert into a reference table copy should fail +COPY numbers_reference FROM STDIN WITH (FORMAT 'csv'); +ERROR: connection error: localhost:57637 +DETAIL: FATAL: role "test_user" is not permitted to log in + +CONTEXT: COPY numbers_reference, line 1: "3,1" +-- verify shards for reference table are still valid +SELECT shardid, shardstate, nodename, nodeport + FROM pg_dist_shard_placement join pg_dist_shard using(shardid) + WHERE logicalrelid = 'numbers_reference'::regclass order by placementid; + shardid | shardstate | nodename | nodeport +---------+------------+-----------+---------- + 560175 | 1 | localhost | 57637 + 560175 | 1 | localhost | 57638 +(2 rows) + +-- try to insert into numbers_hash_other. copy should fail and rollback +-- since it can not insert into either copies of a shard. shards are expected to +-- stay valid since the operation is rolled back. +COPY numbers_hash_other FROM STDIN WITH (FORMAT 'csv'); +WARNING: connection error: localhost:57637 +DETAIL: FATAL: role "test_user" is not permitted to log in + +CONTEXT: COPY numbers_hash_other, line 1: "1,1" +WARNING: connection error: localhost:57637 +DETAIL: FATAL: role "test_user" is not permitted to log in + +CONTEXT: COPY numbers_hash_other, line 1: "1,1" +ERROR: could not connect to any active placements +CONTEXT: COPY numbers_hash_other, line 1: "1,1" +-- verify shards for numbers_hash_other are still valid +-- since copy has failed altogether +SELECT shardid, shardstate, nodename, nodeport + FROM pg_dist_shard_placement join pg_dist_shard using(shardid) + WHERE logicalrelid = 'numbers_hash_other'::regclass order by placementid; + shardid | shardstate | nodename | nodeport +---------+------------+-----------+---------- + 560176 | 1 | localhost | 57637 + 560176 | 1 | localhost | 57637 + 560177 | 1 | localhost | 57637 + 560177 | 1 | localhost | 57638 + 560178 | 1 | localhost | 57638 + 560178 | 1 | localhost | 57637 + 560179 | 1 | localhost | 57637 + 560179 | 1 | localhost | 57638 +(8 rows) + +-- re-enable test_user on the first worker +\c - :default_user - :worker_1_port +ALTER USER test_user WITH login; +\c - test_user - :master_port +DROP TABLE numbers_hash; +DROP TABLE numbers_hash_other; +DROP TABLE numbers_reference; +\c - :default_user +-- test copy failure inside the node +-- it will be done by changing definition of a shard table +SET citus.shard_count to 4; +CREATE TABLE numbers_hash(a int, b int); +SELECT create_distributed_table('numbers_hash', 'a'); + create_distributed_table +-------------------------- + +(1 row) + +\c - - - :worker_1_port +ALTER TABLE numbers_hash_560180 ADD COLUMN c int; +\c - - - :master_port +-- operation will fail to modify a shard and roll back +COPY numbers_hash FROM STDIN WITH (FORMAT 'csv'); +ERROR: row field count is 2, expected 3 +DETAIL: (null) +-- verify no row is inserted +SELECT * FROM numbers_hash; + a | b +---+--- +(0 rows) + +-- verify shard is still marked as valid +SELECT shardid, shardstate, nodename, nodeport + FROM pg_dist_shard_placement join pg_dist_shard using(shardid) + WHERE logicalrelid = 'numbers_hash'::regclass order by placementid; + shardid | shardstate | nodename | nodeport +---------+------------+-----------+---------- + 560180 | 1 | localhost | 57637 + 560180 | 1 | localhost | 57638 + 560181 | 1 | localhost | 57638 + 560181 | 1 | localhost | 57637 + 560182 | 1 | localhost | 57637 + 560182 | 1 | localhost | 57638 + 560183 | 1 | localhost | 57638 + 560183 | 1 | localhost | 57637 +(8 rows) + +DROP TABLE numbers_hash;