From d7b2dee56ec25eeb20e25c703db8e9b44200b446 Mon Sep 17 00:00:00 2001 From: Jelte Fennema Date: Tue, 23 Aug 2022 10:35:44 +0200 Subject: [PATCH] Increase timeouts --- .circleci/config.yml | 112 ++++++++++++++++++ .../failure_connection_establishment.out | 34 +++--- .../failure_failover_to_local_execution.out | 10 +- .../sql/failure_connection_establishment.sql | 24 ++-- .../failure_failover_to_local_execution.sql | 6 +- 5 files changed, 149 insertions(+), 37 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 0ab3ac608..c847a8823 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -755,3 +755,115 @@ workflows: image_tag: '<< pipeline.parameters.pg14_version >>' make: check-failure requires: [build-14] + - test-citus: + name: 'test-13b_check-failure' + pg_major: 13 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg13_version >>' + make: check-failure + requires: [build-13] + - test-citus: + name: 'test-13b_check-failure-1' + pg_major: 13 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg13_version >>' + make: check-failure + requires: [build-13] + - test-citus: + name: 'test-13b_check-failure-2' + pg_major: 13 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg13_version >>' + make: check-failure + requires: [build-13] + - test-citus: + name: 'test-13b_check-failure-3' + pg_major: 13 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg13_version >>' + make: check-failure + requires: [build-13] + - test-citus: + name: 'test-13b_check-failure-4' + pg_major: 13 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg13_version >>' + make: check-failure + requires: [build-13] + - test-citus: + name: 'test-13b_check-failure-5' + pg_major: 13 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg13_version >>' + make: check-failure + requires: [build-13] + - test-citus: + name: 'test-13b_check-failure-6' + pg_major: 13 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg13_version >>' + make: check-failure + requires: [build-13] + - test-citus: + name: 'test-13b_check-failure-7' + pg_major: 13 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg13_version >>' + make: check-failure + requires: [build-13] + - test-citus: + name: 'test-14b_check-failure' + pg_major: 14 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg14_version >>' + make: check-failure + requires: [build-14] + - test-citus: + name: 'test-14b_check-failure-1' + pg_major: 14 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg14_version >>' + make: check-failure + requires: [build-14] + - test-citus: + name: 'test-14b_check-failure-2' + pg_major: 14 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg14_version >>' + make: check-failure + requires: [build-14] + - test-citus: + name: 'test-14b_check-failure-3' + pg_major: 14 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg14_version >>' + make: check-failure + requires: [build-14] + - test-citus: + name: 'test-14b_check-failure-4' + pg_major: 14 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg14_version >>' + make: check-failure + requires: [build-14] + - test-citus: + name: 'test-14b_check-failure-5' + pg_major: 14 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg14_version >>' + make: check-failure + requires: [build-14] + - test-citus: + name: 'test-14b_check-failure-6' + pg_major: 14 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg14_version >>' + make: check-failure + requires: [build-14] + - test-citus: + name: 'test-14b_check-failure-7' + pg_major: 14 + image: citus/failtester + image_tag: '<< pipeline.parameters.pg14_version >>' + make: check-failure + requires: [build-14] diff --git a/src/test/regress/expected/failure_connection_establishment.out b/src/test/regress/expected/failure_connection_establishment.out index 5b09a9aab..e072d2c2c 100644 --- a/src/test/regress/expected/failure_connection_establishment.out +++ b/src/test/regress/expected/failure_connection_establishment.out @@ -75,27 +75,27 @@ ORDER BY placementid; SET citus.task_assignment_policy TO 'first-replica'; -- we will insert a connection delay here as this query was the cause for an -- investigation into connection establishment problems -SET citus.node_connection_timeout TO 400; -SELECT citus.mitmproxy('conn.connect_delay(500)'); +SET citus.node_connection_timeout TO 900; +SELECT citus.mitmproxy('conn.connect_delay(1000)'); mitmproxy --------------------------------------------------------------------- (1 row) ALTER TABLE products ADD CONSTRAINT p_key PRIMARY KEY(product_no); -WARNING: could not establish connection after 400 ms +WARNING: could not establish connection after 900 ms ERROR: connection to the remote node localhost:xxxxx failed -- Make sure that we fall back to a working node for reads, even if it's not -- the first choice in our task assignment policy. -SET citus.node_connection_timeout TO 400; -SELECT citus.mitmproxy('conn.connect_delay(500)'); +SET citus.node_connection_timeout TO 900; +SELECT citus.mitmproxy('conn.connect_delay(1000)'); mitmproxy --------------------------------------------------------------------- (1 row) SELECT name FROM r1 WHERE id = 2; -WARNING: could not establish any connections to the node localhost:xxxxx after 400 ms +WARNING: could not establish any connections to the node localhost:xxxxx after 900 ms name --------------------------------------------------------------------- bar @@ -104,15 +104,15 @@ WARNING: could not establish any connections to the node localhost:xxxxx after -- similar test with the above but this time on a distributed table instead of -- a reference table and with citus.force_max_query_parallelization is set SET citus.force_max_query_parallelization TO ON; -SET citus.node_connection_timeout TO 400; -SELECT citus.mitmproxy('conn.connect_delay(500)'); +SET citus.node_connection_timeout TO 900; +SELECT citus.mitmproxy('conn.connect_delay(1000)'); mitmproxy --------------------------------------------------------------------- (1 row) SELECT count(*) FROM products; -WARNING: could not establish any connections to the node localhost:xxxxx after 400 ms +WARNING: could not establish any connections to the node localhost:xxxxx after 900 ms count --------------------------------------------------------------------- 0 @@ -136,15 +136,15 @@ SELECT create_distributed_table('single_replicatated', 'key'); -- this time the table is single replicated and we're still using the -- the max parallelization flag, so the query should fail SET citus.force_max_query_parallelization TO ON; -SET citus.node_connection_timeout TO 400; -SELECT citus.mitmproxy('conn.connect_delay(500)'); +SET citus.node_connection_timeout TO 900; +SELECT citus.mitmproxy('conn.connect_delay(1000)'); mitmproxy --------------------------------------------------------------------- (1 row) SELECT count(*) FROM single_replicatated; -ERROR: could not establish any connections to the node localhost:xxxxx after 400 ms +ERROR: could not establish any connections to the node localhost:xxxxx after 900 ms SET citus.force_max_query_parallelization TO OFF; -- one similar test, and this time on modification queries -- to see that connection establishement failures could @@ -169,15 +169,15 @@ WHERE 0 (1 row) -SET citus.node_connection_timeout TO 400; -SELECT citus.mitmproxy('conn.connect_delay(500)'); +SET citus.node_connection_timeout TO 900; +SELECT citus.mitmproxy('conn.connect_delay(1000)'); mitmproxy --------------------------------------------------------------------- (1 row) INSERT INTO single_replicatated VALUES (100); -ERROR: could not establish any connections to the node localhost:xxxxx after 400 ms +ERROR: could not establish any connections to the node localhost:xxxxx after 900 ms COMMIT; SELECT count(*) as invalid_placement_count @@ -281,8 +281,8 @@ SELECT citus.mitmproxy('conn.onCommandComplete(command="SELECT 1").cancel(' || p SELECT * FROM citus_check_connection_to_node('localhost', :worker_2_proxy_port); ERROR: canceling statement due to user request -- verify that the checks are not successful when timeouts happen on a connection -SET citus.node_connection_timeout TO 400; -SELECT citus.mitmproxy('conn.connect_delay(500)'); +SET citus.node_connection_timeout TO 900; +SELECT citus.mitmproxy('conn.connect_delay(1000)'); mitmproxy --------------------------------------------------------------------- diff --git a/src/test/regress/expected/failure_failover_to_local_execution.out b/src/test/regress/expected/failure_failover_to_local_execution.out index f50a07a94..356e14465 100644 --- a/src/test/regress/expected/failure_failover_to_local_execution.out +++ b/src/test/regress/expected/failure_failover_to_local_execution.out @@ -35,8 +35,8 @@ SET citus.max_cached_conns_per_worker to 0; INSERT INTO failover_to_local SELECT i, i::text FROM generate_series(0,20)i; -- even if the connection establishment fails, Citus can -- failover to local exection -SET citus.node_connection_timeout TO 400; -SELECT citus.mitmproxy('conn.connect_delay(500)'); +SET citus.node_connection_timeout TO 900; +SELECT citus.mitmproxy('conn.connect_delay(1000)'); mitmproxy --------------------------------------------------------------------- @@ -45,7 +45,7 @@ SELECT citus.mitmproxy('conn.connect_delay(500)'); SET citus.log_local_commands TO ON; SET client_min_messages TO DEBUG1; SELECT count(*) FROM failover_to_local; -DEBUG: could not establish any connections to the node localhost:xxxxx after 400 ms +DEBUG: could not establish any connections to the node localhost:xxxxx after 900 ms NOTICE: executing the command locally: SELECT count(*) AS count FROM failure_failover_to_local_execution.failover_to_local_1980000 failover_to_local WHERE true NOTICE: executing the command locally: SELECT count(*) AS count FROM failure_failover_to_local_execution.failover_to_local_1980002 failover_to_local WHERE true count @@ -68,7 +68,7 @@ CONTEXT: while executing command on localhost:xxxxx -- if the local execution is disabled, Citus does -- not try to fallback to local execution SET citus.enable_local_execution TO false; -SELECT citus.mitmproxy('conn.connect_delay(500)'); +SELECT citus.mitmproxy('conn.connect_delay(1000)'); mitmproxy --------------------------------------------------------------------- @@ -76,7 +76,7 @@ SELECT citus.mitmproxy('conn.connect_delay(500)'); SET citus.log_local_commands TO ON; SELECT count(*) FROM failover_to_local; -ERROR: could not establish any connections to the node localhost:xxxxx after 400 ms +ERROR: could not establish any connections to the node localhost:xxxxx after 900 ms SELECT citus.mitmproxy('conn.allow()'); mitmproxy --------------------------------------------------------------------- diff --git a/src/test/regress/sql/failure_connection_establishment.sql b/src/test/regress/sql/failure_connection_establishment.sql index 4c4394a8d..4f03fb69e 100644 --- a/src/test/regress/sql/failure_connection_establishment.sql +++ b/src/test/regress/sql/failure_connection_establishment.sql @@ -49,23 +49,23 @@ SET citus.task_assignment_policy TO 'first-replica'; -- we will insert a connection delay here as this query was the cause for an -- investigation into connection establishment problems -SET citus.node_connection_timeout TO 400; -SELECT citus.mitmproxy('conn.connect_delay(500)'); +SET citus.node_connection_timeout TO 900; +SELECT citus.mitmproxy('conn.connect_delay(1000)'); ALTER TABLE products ADD CONSTRAINT p_key PRIMARY KEY(product_no); -- Make sure that we fall back to a working node for reads, even if it's not -- the first choice in our task assignment policy. -SET citus.node_connection_timeout TO 400; -SELECT citus.mitmproxy('conn.connect_delay(500)'); +SET citus.node_connection_timeout TO 900; +SELECT citus.mitmproxy('conn.connect_delay(1000)'); SELECT name FROM r1 WHERE id = 2; -- similar test with the above but this time on a distributed table instead of -- a reference table and with citus.force_max_query_parallelization is set SET citus.force_max_query_parallelization TO ON; -SET citus.node_connection_timeout TO 400; -SELECT citus.mitmproxy('conn.connect_delay(500)'); +SET citus.node_connection_timeout TO 900; +SELECT citus.mitmproxy('conn.connect_delay(1000)'); SELECT count(*) FROM products; RESET citus.node_connection_timeout; @@ -77,8 +77,8 @@ SELECT create_distributed_table('single_replicatated', 'key'); -- this time the table is single replicated and we're still using the -- the max parallelization flag, so the query should fail SET citus.force_max_query_parallelization TO ON; -SET citus.node_connection_timeout TO 400; -SELECT citus.mitmproxy('conn.connect_delay(500)'); +SET citus.node_connection_timeout TO 900; +SELECT citus.mitmproxy('conn.connect_delay(1000)'); SELECT count(*) FROM single_replicatated; SET citus.force_max_query_parallelization TO OFF; @@ -96,8 +96,8 @@ FROM WHERE shardstate = 3 AND shardid IN (SELECT shardid from pg_dist_shard where logicalrelid = 'single_replicatated'::regclass); -SET citus.node_connection_timeout TO 400; -SELECT citus.mitmproxy('conn.connect_delay(500)'); +SET citus.node_connection_timeout TO 900; +SELECT citus.mitmproxy('conn.connect_delay(1000)'); INSERT INTO single_replicatated VALUES (100); COMMIT; SELECT @@ -147,8 +147,8 @@ SELECT citus.mitmproxy('conn.onCommandComplete(command="SELECT 1").cancel(' || p SELECT * FROM citus_check_connection_to_node('localhost', :worker_2_proxy_port); -- verify that the checks are not successful when timeouts happen on a connection -SET citus.node_connection_timeout TO 400; -SELECT citus.mitmproxy('conn.connect_delay(500)'); +SET citus.node_connection_timeout TO 900; +SELECT citus.mitmproxy('conn.connect_delay(1000)'); SELECT * FROM citus_check_connection_to_node('localhost', :worker_2_proxy_port); -- tests for citus_check_cluster_node_health diff --git a/src/test/regress/sql/failure_failover_to_local_execution.sql b/src/test/regress/sql/failure_failover_to_local_execution.sql index 4730f822e..6e27cfb7b 100644 --- a/src/test/regress/sql/failure_failover_to_local_execution.sql +++ b/src/test/regress/sql/failure_failover_to_local_execution.sql @@ -22,8 +22,8 @@ INSERT INTO failover_to_local SELECT i, i::text FROM generate_series(0,20)i; -- even if the connection establishment fails, Citus can -- failover to local exection -SET citus.node_connection_timeout TO 400; -SELECT citus.mitmproxy('conn.connect_delay(500)'); +SET citus.node_connection_timeout TO 900; +SELECT citus.mitmproxy('conn.connect_delay(1000)'); SET citus.log_local_commands TO ON; SET client_min_messages TO DEBUG1; SELECT count(*) FROM failover_to_local; @@ -37,7 +37,7 @@ SELECT key / 0 FROM failover_to_local; -- if the local execution is disabled, Citus does -- not try to fallback to local execution SET citus.enable_local_execution TO false; -SELECT citus.mitmproxy('conn.connect_delay(500)'); +SELECT citus.mitmproxy('conn.connect_delay(1000)'); SET citus.log_local_commands TO ON; SELECT count(*) FROM failover_to_local; SELECT citus.mitmproxy('conn.allow()');