From a6ae5756ef350fb8aa950028e5570effebeb577f Mon Sep 17 00:00:00 2001 From: Jelte Fennema Date: Tue, 23 Aug 2022 09:26:00 +0200 Subject: [PATCH] RESET connection_timeout all the time --- .circleci/config.yml | 113 ++++++++++++++++++ .../failure_connection_establishment.out | 14 ++- src/test/regress/failure_schedule | 11 ++ .../sql/failure_connection_establishment.sql | 14 ++- 4 files changed, 146 insertions(+), 6 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index d6e5b773c..0ab3ac608 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -642,3 +642,116 @@ workflows: image_tag: '<< pipeline.parameters.pg14_version >>' make: check-failure requires: [build-14] + + - test-citus: + name: 'test-13a_check-failure' + pg_major: 13 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg13_version >>' + make: check-failure + requires: [build-13] + - test-citus: + name: 'test-13a_check-failure-1' + pg_major: 13 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg13_version >>' + make: check-failure + requires: [build-13] + - test-citus: + name: 'test-13a_check-failure-2' + pg_major: 13 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg13_version >>' + make: check-failure + requires: [build-13] + - test-citus: + name: 'test-13a_check-failure-3' + pg_major: 13 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg13_version >>' + make: check-failure + requires: [build-13] + - test-citus: + name: 'test-13a_check-failure-4' + pg_major: 13 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg13_version >>' + make: check-failure + requires: [build-13] + - test-citus: + name: 'test-13a_check-failure-5' + pg_major: 13 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg13_version >>' + make: check-failure + requires: [build-13] + - test-citus: + name: 'test-13a_check-failure-6' + pg_major: 13 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg13_version >>' + make: check-failure + requires: [build-13] + - test-citus: + name: 'test-13a_check-failure-7' + pg_major: 13 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg13_version >>' + make: check-failure + requires: [build-13] + - test-citus: + name: 'test-14a_check-failure' + pg_major: 14 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg14_version >>' + make: check-failure + requires: [build-14] + - test-citus: + name: 'test-14a_check-failure-1' + pg_major: 14 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg14_version >>' + make: check-failure + requires: [build-14] + - test-citus: + name: 'test-14a_check-failure-2' + pg_major: 14 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg14_version >>' + make: check-failure + requires: [build-14] + - test-citus: + name: 'test-14a_check-failure-3' + pg_major: 14 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg14_version >>' + make: check-failure + requires: [build-14] + - test-citus: + name: 'test-14a_check-failure-4' + pg_major: 14 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg14_version >>' + make: check-failure + requires: [build-14] + - test-citus: + name: 'test-14a_check-failure-5' + pg_major: 14 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg14_version >>' + make: check-failure + requires: [build-14] + - test-citus: + name: 'test-14a_check-failure-6' + pg_major: 14 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg14_version >>' + make: check-failure + requires: [build-14] + - test-citus: + name: 'test-14a_check-failure-7' + pg_major: 14 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg14_version >>' + make: check-failure + requires: [build-14] diff --git a/src/test/regress/expected/failure_connection_establishment.out b/src/test/regress/expected/failure_connection_establishment.out index e2a4a729e..47c14720c 100644 --- a/src/test/regress/expected/failure_connection_establishment.out +++ b/src/test/regress/expected/failure_connection_establishment.out @@ -46,6 +46,7 @@ SELECT citus.mitmproxy('conn.delay(500)'); ALTER TABLE products ADD CONSTRAINT p_key PRIMARY KEY(product_no); WARNING: could not establish connection after 400 ms ERROR: connection to the remote node localhost:xxxxx failed +RESET citus.node_connection_timeout; SELECT citus.mitmproxy('conn.allow()'); mitmproxy --------------------------------------------------------------------- @@ -93,6 +94,7 @@ SELECT citus.clear_network_traffic(); (1 row) +SET citus.node_connection_timeout TO 400; SELECT citus.mitmproxy('conn.delay(500)'); mitmproxy --------------------------------------------------------------------- @@ -116,6 +118,7 @@ SELECT * FROM citus.dump_network_traffic() WHERE conn=0 AND source = 'coordinato 0 | coordinator | [initial message] (1 row) +RESET citus.node_connection_timeout; SELECT citus.mitmproxy('conn.allow()'); mitmproxy --------------------------------------------------------------------- @@ -126,6 +129,7 @@ SELECT citus.mitmproxy('conn.allow()'); -- distributed table instead of a reference table -- and with citus.force_max_query_parallelization is set SET citus.force_max_query_parallelization TO ON; +SET citus.node_connection_timeout TO 400; SELECT citus.mitmproxy('conn.delay(500)'); mitmproxy --------------------------------------------------------------------- @@ -139,6 +143,7 @@ WARNING: could not establish any connections to the node localhost:xxxxx after 0 (1 row) +RESET citus.node_connection_timeout; SELECT citus.mitmproxy('conn.allow()'); mitmproxy --------------------------------------------------------------------- @@ -147,7 +152,6 @@ SELECT citus.mitmproxy('conn.allow()'); SET citus.shard_replication_factor TO 1; CREATE TABLE single_replicatated(key int); -RESET citus.node_connection_timeout; -- speed up test and make it less flaky in CI SELECT create_distributed_table('single_replicatated', 'key'); create_distributed_table --------------------------------------------------------------------- @@ -156,8 +160,8 @@ SELECT create_distributed_table('single_replicatated', 'key'); -- this time the table is single replicated and we're still using the -- the max parallelization flag, so the query should fail -SET citus.node_connection_timeout TO 400; SET citus.force_max_query_parallelization TO ON; +SET citus.node_connection_timeout TO 400; SELECT citus.mitmproxy('conn.delay(500)'); mitmproxy --------------------------------------------------------------------- @@ -170,6 +174,7 @@ SET citus.force_max_query_parallelization TO OFF; -- one similar test, and this time on modification queries -- to see that connection establishement failures could -- fail the transaction (but not mark any placements as INVALID) +RESET citus.node_connection_timeout; SELECT citus.mitmproxy('conn.allow()'); mitmproxy --------------------------------------------------------------------- @@ -189,6 +194,7 @@ WHERE 0 (1 row) +SET citus.node_connection_timeout TO 400; SELECT citus.mitmproxy('conn.delay(500)'); mitmproxy --------------------------------------------------------------------- @@ -211,6 +217,7 @@ WHERE (1 row) -- show that INSERT failed +RESET citus.node_connection_timeout; SELECT citus.mitmproxy('conn.allow()'); mitmproxy --------------------------------------------------------------------- @@ -299,6 +306,7 @@ SELECT citus.mitmproxy('conn.onCommandComplete(command="SELECT 1").cancel(' || p SELECT * FROM citus_check_connection_to_node('localhost', :worker_2_proxy_port); ERROR: canceling statement due to user request -- verify that the checks are not successful when timeouts happen on a connection +SET citus.node_connection_timeout TO 400; SELECT citus.mitmproxy('conn.delay(500)'); mitmproxy --------------------------------------------------------------------- @@ -427,13 +435,13 @@ SELECT citus.mitmproxy('conn.onQuery(query="^SELECT 1$").cancel(' || pg_backend_ SELECT * FROM citus_check_cluster_node_health(); ERROR: canceling statement due to user request RESET client_min_messages; +RESET citus.node_connection_timeout; SELECT citus.mitmproxy('conn.allow()'); mitmproxy --------------------------------------------------------------------- (1 row) -SET citus.node_connection_timeout TO DEFAULT; DROP SCHEMA fail_connect CASCADE; NOTICE: drop cascades to 3 other objects DETAIL: drop cascades to table products diff --git a/src/test/regress/failure_schedule b/src/test/regress/failure_schedule index 795e5dee2..16ad55ad0 100644 --- a/src/test/regress/failure_schedule +++ b/src/test/regress/failure_schedule @@ -28,3 +28,14 @@ test: failure_connection_establishment test: failure_connection_establishment test: failure_connection_establishment test: failure_connection_establishment +test: failure_connection_establishment +test: failure_connection_establishment +test: failure_connection_establishment +test: failure_connection_establishment +test: failure_connection_establishment +test: failure_connection_establishment +test: failure_connection_establishment +test: failure_connection_establishment +test: failure_connection_establishment +test: failure_connection_establishment +test: failure_connection_establishment diff --git a/src/test/regress/sql/failure_connection_establishment.sql b/src/test/regress/sql/failure_connection_establishment.sql index 6d8d76fd6..ddd3119a8 100644 --- a/src/test/regress/sql/failure_connection_establishment.sql +++ b/src/test/regress/sql/failure_connection_establishment.sql @@ -36,6 +36,7 @@ SELECT citus.mitmproxy('conn.delay(500)'); ALTER TABLE products ADD CONSTRAINT p_key PRIMARY KEY(product_no); +RESET citus.node_connection_timeout; SELECT citus.mitmproxy('conn.allow()'); CREATE TABLE r1 ( @@ -55,6 +56,7 @@ ORDER BY placementid; SELECT citus.clear_network_traffic(); +SET citus.node_connection_timeout TO 400; SELECT citus.mitmproxy('conn.delay(500)'); SET citus.task_assignment_policy TO 'round-robin'; @@ -65,25 +67,27 @@ SELECT name FROM r1 WHERE id = 2; -- connection to have been delayed and thus caused a timeout SELECT * FROM citus.dump_network_traffic() WHERE conn=0 AND source = 'coordinator'; +RESET citus.node_connection_timeout; SELECT citus.mitmproxy('conn.allow()'); -- similar test with the above but this time on a -- distributed table instead of a reference table -- and with citus.force_max_query_parallelization is set SET citus.force_max_query_parallelization TO ON; +SET citus.node_connection_timeout TO 400; SELECT citus.mitmproxy('conn.delay(500)'); SELECT count(*) FROM products; +RESET citus.node_connection_timeout; SELECT citus.mitmproxy('conn.allow()'); SET citus.shard_replication_factor TO 1; CREATE TABLE single_replicatated(key int); -RESET citus.node_connection_timeout; -- speed up test and make it less flaky in CI SELECT create_distributed_table('single_replicatated', 'key'); -- this time the table is single replicated and we're still using the -- the max parallelization flag, so the query should fail -SET citus.node_connection_timeout TO 400; SET citus.force_max_query_parallelization TO ON; +SET citus.node_connection_timeout TO 400; SELECT citus.mitmproxy('conn.delay(500)'); SELECT count(*) FROM single_replicatated; @@ -92,6 +96,7 @@ SET citus.force_max_query_parallelization TO OFF; -- one similar test, and this time on modification queries -- to see that connection establishement failures could -- fail the transaction (but not mark any placements as INVALID) +RESET citus.node_connection_timeout; SELECT citus.mitmproxy('conn.allow()'); BEGIN; SELECT @@ -101,6 +106,7 @@ FROM WHERE shardstate = 3 AND shardid IN (SELECT shardid from pg_dist_shard where logicalrelid = 'single_replicatated'::regclass); +SET citus.node_connection_timeout TO 400; SELECT citus.mitmproxy('conn.delay(500)'); INSERT INTO single_replicatated VALUES (100); COMMIT; @@ -113,6 +119,7 @@ WHERE shardid IN (SELECT shardid from pg_dist_shard where logicalrelid = 'single_replicatated'::regclass); -- show that INSERT failed +RESET citus.node_connection_timeout; SELECT citus.mitmproxy('conn.allow()'); SELECT count(*) FROM single_replicatated WHERE key = 100; @@ -150,6 +157,7 @@ SELECT citus.mitmproxy('conn.onCommandComplete(command="SELECT 1").cancel(' || p SELECT * FROM citus_check_connection_to_node('localhost', :worker_2_proxy_port); -- verify that the checks are not successful when timeouts happen on a connection +SET citus.node_connection_timeout TO 400; SELECT citus.mitmproxy('conn.delay(500)'); SELECT * FROM citus_check_connection_to_node('localhost', :worker_2_proxy_port); @@ -193,7 +201,7 @@ SELECT * FROM citus_check_cluster_node_health(); RESET client_min_messages; +RESET citus.node_connection_timeout; SELECT citus.mitmproxy('conn.allow()'); -SET citus.node_connection_timeout TO DEFAULT; DROP SCHEMA fail_connect CASCADE; SET search_path TO default;