diff --git a/.circleci/config.yml b/.circleci/config.yml index c847a8823..a7780c471 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -867,3 +867,231 @@ workflows: image_tag: '<< pipeline.parameters.pg14_version >>' make: check-failure requires: [build-14] + + + - test-citus: + name: 'test-13c_check-failure' + pg_major: 13 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg13_version >>' + make: check-failure + requires: [build-13] + - test-citus: + name: 'test-13c_check-failure-1' + pg_major: 13 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg13_version >>' + make: check-failure + requires: [build-13] + - test-citus: + name: 'test-13c_check-failure-2' + pg_major: 13 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg13_version >>' + make: check-failure + requires: [build-13] + - test-citus: + name: 'test-13c_check-failure-3' + pg_major: 13 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg13_version >>' + make: check-failure + requires: [build-13] + - test-citus: + name: 'test-13c_check-failure-4' + pg_major: 13 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg13_version >>' + make: check-failure + requires: [build-13] + - test-citus: + name: 'test-13c_check-failure-5' + pg_major: 13 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg13_version >>' + make: check-failure + requires: [build-13] + - test-citus: + name: 'test-13c_check-failure-6' + pg_major: 13 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg13_version >>' + make: check-failure + requires: [build-13] + - test-citus: + name: 'test-13c_check-failure-7' + pg_major: 13 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg13_version >>' + make: check-failure + requires: [build-13] + - test-citus: + name: 'test-14c_check-failure' + pg_major: 14 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg14_version >>' + make: check-failure + requires: [build-14] + - test-citus: + name: 'test-14c_check-failure-1' + pg_major: 14 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg14_version >>' + make: check-failure + requires: [build-14] + - test-citus: + name: 'test-14c_check-failure-2' + pg_major: 14 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg14_version >>' + make: check-failure + requires: [build-14] + - test-citus: + name: 'test-14c_check-failure-3' + pg_major: 14 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg14_version >>' + make: check-failure + requires: [build-14] + - test-citus: + name: 'test-14c_check-failure-4' + pg_major: 14 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg14_version >>' + make: check-failure + requires: [build-14] + - test-citus: + name: 'test-14c_check-failure-5' + pg_major: 14 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg14_version >>' + make: check-failure + requires: [build-14] + - test-citus: + name: 'test-14c_check-failure-6' + pg_major: 14 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg14_version >>' + make: check-failure + requires: [build-14] + - test-citus: + name: 'test-14c_check-failure-7' + pg_major: 14 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg14_version >>' + make: check-failure + requires: [build-14] + + + - test-citus: + name: 'test-13d_check-failure' + pg_major: 13 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg13_version >>' + make: check-failure + requires: [build-13] + - test-citus: + name: 'test-13d_check-failure-1' + pg_major: 13 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg13_version >>' + make: check-failure + requires: [build-13] + - test-citus: + name: 'test-13d_check-failure-2' + pg_major: 13 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg13_version >>' + make: check-failure + requires: [build-13] + - test-citus: + name: 'test-13d_check-failure-3' + pg_major: 13 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg13_version >>' + make: check-failure + requires: [build-13] + - test-citus: + name: 'test-13d_check-failure-4' + pg_major: 13 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg13_version >>' + make: check-failure + requires: [build-13] + - test-citus: + name: 'test-13d_check-failure-5' + pg_major: 13 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg13_version >>' + make: check-failure + requires: [build-13] + - test-citus: + name: 'test-13d_check-failure-6' + pg_major: 13 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg13_version >>' + make: check-failure + requires: [build-13] + - test-citus: + name: 'test-13d_check-failure-7' + pg_major: 13 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg13_version >>' + make: check-failure + requires: [build-13] + - test-citus: + name: 'test-14d_check-failure' + pg_major: 14 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg14_version >>' + make: check-failure + requires: [build-14] + - test-citus: + name: 'test-14d_check-failure-1' + pg_major: 14 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg14_version >>' + make: check-failure + requires: [build-14] + - test-citus: + name: 'test-14d_check-failure-2' + pg_major: 14 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg14_version >>' + make: check-failure + requires: [build-14] + - test-citus: + name: 'test-14d_check-failure-3' + pg_major: 14 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg14_version >>' + make: check-failure + requires: [build-14] + - test-citus: + name: 'test-14d_check-failure-4' + pg_major: 14 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg14_version >>' + make: check-failure + requires: [build-14] + - test-citus: + name: 'test-14d_check-failure-5' + pg_major: 14 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg14_version >>' + make: check-failure + requires: [build-14] + - test-citus: + name: 'test-14d_check-failure-6' + pg_major: 14 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg14_version >>' + make: check-failure + requires: [build-14] + - test-citus: + name: 'test-14d_check-failure-7' + pg_major: 14 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg14_version >>' + make: check-failure + requires: [build-14] diff --git a/src/test/regress/expected/failure_connection_establishment.out b/src/test/regress/expected/failure_connection_establishment.out index e072d2c2c..e0e62f68a 100644 --- a/src/test/regress/expected/failure_connection_establishment.out +++ b/src/test/regress/expected/failure_connection_establishment.out @@ -85,6 +85,19 @@ SELECT citus.mitmproxy('conn.connect_delay(1000)'); ALTER TABLE products ADD CONSTRAINT p_key PRIMARY KEY(product_no); WARNING: could not establish connection after 900 ms ERROR: connection to the remote node localhost:xxxxx failed +RESET citus.node_connection_timeout; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +SELECT citus.clear_network_traffic(); + clear_network_traffic +--------------------------------------------------------------------- + +(1 row) + -- Make sure that we fall back to a working node for reads, even if it's not -- the first choice in our task assignment policy. SET citus.node_connection_timeout TO 900; @@ -94,6 +107,7 @@ SELECT citus.mitmproxy('conn.connect_delay(1000)'); (1 row) +-- tests for connectivity checks SELECT name FROM r1 WHERE id = 2; WARNING: could not establish any connections to the node localhost:xxxxx after 900 ms name @@ -101,6 +115,21 @@ WARNING: could not establish any connections to the node localhost:xxxxx after bar (1 row) +RESET citus.node_connection_timeout; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + +-- verify a connection attempt was made to the intercepted node, this would +-- have cause the connection to have been delayed and thus caused a timeout +SELECT * FROM citus.dump_network_traffic() WHERE conn=0 AND source = 'coordinator'; + conn | source | message +--------------------------------------------------------------------- + 0 | coordinator | [initial message] +(1 row) + -- similar test with the above but this time on a distributed table instead of -- a reference table and with citus.force_max_query_parallelization is set SET citus.force_max_query_parallelization TO ON; @@ -145,10 +174,7 @@ SELECT citus.mitmproxy('conn.connect_delay(1000)'); SELECT count(*) FROM single_replicatated; ERROR: could not establish any connections to the node localhost:xxxxx after 900 ms -SET citus.force_max_query_parallelization TO OFF; --- one similar test, and this time on modification queries --- to see that connection establishement failures could --- fail the transaction (but not mark any placements as INVALID) +RESET citus.force_max_query_parallelization; RESET citus.node_connection_timeout; SELECT citus.mitmproxy('conn.allow()'); mitmproxy @@ -156,6 +182,9 @@ SELECT citus.mitmproxy('conn.allow()'); (1 row) +-- one similar test, and this time on modification queries +-- to see that connection establishement failures could +-- fail the transaction (but not mark any placements as INVALID) BEGIN; SELECT count(*) as invalid_placement_count @@ -179,6 +208,13 @@ SELECT citus.mitmproxy('conn.connect_delay(1000)'); INSERT INTO single_replicatated VALUES (100); ERROR: could not establish any connections to the node localhost:xxxxx after 900 ms COMMIT; +RESET citus.node_connection_timeout; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + SELECT count(*) as invalid_placement_count FROM @@ -191,14 +227,6 @@ WHERE 0 (1 row) --- show that INSERT failed -RESET citus.node_connection_timeout; -SELECT citus.mitmproxy('conn.allow()'); - mitmproxy ---------------------------------------------------------------------- - -(1 row) - SELECT count(*) FROM single_replicatated WHERE key = 100; count --------------------------------------------------------------------- @@ -294,6 +322,13 @@ SELECT * FROM citus_check_connection_to_node('localhost', :worker_2_proxy_port); f (1 row) +RESET citus.node_connection_timeout; +SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + -- tests for citus_check_cluster_node_health -- kill all connectivity checks that originate from this node SELECT citus.mitmproxy('conn.onQuery(query="^SELECT citus_check_connection_to_node").kill()'); diff --git a/src/test/regress/sql/failure_connection_establishment.sql b/src/test/regress/sql/failure_connection_establishment.sql index 4f03fb69e..0435a482d 100644 --- a/src/test/regress/sql/failure_connection_establishment.sql +++ b/src/test/regress/sql/failure_connection_establishment.sql @@ -51,15 +51,24 @@ SET citus.task_assignment_policy TO 'first-replica'; -- investigation into connection establishment problems SET citus.node_connection_timeout TO 900; SELECT citus.mitmproxy('conn.connect_delay(1000)'); - ALTER TABLE products ADD CONSTRAINT p_key PRIMARY KEY(product_no); +RESET citus.node_connection_timeout; +SELECT citus.mitmproxy('conn.allow()'); + +SELECT citus.clear_network_traffic(); -- Make sure that we fall back to a working node for reads, even if it's not -- the first choice in our task assignment policy. SET citus.node_connection_timeout TO 900; SELECT citus.mitmproxy('conn.connect_delay(1000)'); - +-- tests for connectivity checks SELECT name FROM r1 WHERE id = 2; +RESET citus.node_connection_timeout; +SELECT citus.mitmproxy('conn.allow()'); + +-- verify a connection attempt was made to the intercepted node, this would +-- have cause the connection to have been delayed and thus caused a timeout +SELECT * FROM citus.dump_network_traffic() WHERE conn=0 AND source = 'coordinator'; -- similar test with the above but this time on a distributed table instead of -- a reference table and with citus.force_max_query_parallelization is set @@ -67,9 +76,9 @@ SET citus.force_max_query_parallelization TO ON; SET citus.node_connection_timeout TO 900; SELECT citus.mitmproxy('conn.connect_delay(1000)'); SELECT count(*) FROM products; - RESET citus.node_connection_timeout; SELECT citus.mitmproxy('conn.allow()'); + SET citus.shard_replication_factor TO 1; CREATE TABLE single_replicatated(key int); SELECT create_distributed_table('single_replicatated', 'key'); @@ -80,14 +89,14 @@ SET citus.force_max_query_parallelization TO ON; SET citus.node_connection_timeout TO 900; SELECT citus.mitmproxy('conn.connect_delay(1000)'); SELECT count(*) FROM single_replicatated; +RESET citus.force_max_query_parallelization; +RESET citus.node_connection_timeout; +SELECT citus.mitmproxy('conn.allow()'); -SET citus.force_max_query_parallelization TO OFF; -- one similar test, and this time on modification queries -- to see that connection establishement failures could -- fail the transaction (but not mark any placements as INVALID) -RESET citus.node_connection_timeout; -SELECT citus.mitmproxy('conn.allow()'); BEGIN; SELECT count(*) as invalid_placement_count @@ -100,6 +109,8 @@ SET citus.node_connection_timeout TO 900; SELECT citus.mitmproxy('conn.connect_delay(1000)'); INSERT INTO single_replicatated VALUES (100); COMMIT; +RESET citus.node_connection_timeout; +SELECT citus.mitmproxy('conn.allow()'); SELECT count(*) as invalid_placement_count FROM @@ -108,9 +119,6 @@ WHERE shardstate = 3 AND shardid IN (SELECT shardid from pg_dist_shard where logicalrelid = 'single_replicatated'::regclass); --- show that INSERT failed -RESET citus.node_connection_timeout; -SELECT citus.mitmproxy('conn.allow()'); SELECT count(*) FROM single_replicatated WHERE key = 100; @@ -150,6 +158,8 @@ SELECT * FROM citus_check_connection_to_node('localhost', :worker_2_proxy_port); SET citus.node_connection_timeout TO 900; SELECT citus.mitmproxy('conn.connect_delay(1000)'); SELECT * FROM citus_check_connection_to_node('localhost', :worker_2_proxy_port); +RESET citus.node_connection_timeout; +SELECT citus.mitmproxy('conn.allow()'); -- tests for citus_check_cluster_node_health