From b728e73d4273faa70ae52729dcecccbf91d4772e Mon Sep 17 00:00:00 2001 From: Jelte Fennema Date: Tue, 23 Aug 2022 11:40:20 +0200 Subject: [PATCH] Retry --- .../failure_connection_establishment.out | 25 ++++++++++++++++++- .../sql/failure_connection_establishment.sql | 15 ++++++++++- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/src/test/regress/expected/failure_connection_establishment.out b/src/test/regress/expected/failure_connection_establishment.out index e0e62f68a..07a3bf949 100644 --- a/src/test/regress/expected/failure_connection_establishment.out +++ b/src/test/regress/expected/failure_connection_establishment.out @@ -15,6 +15,17 @@ CREATE SCHEMA fail_connect; SET search_path TO 'fail_connect'; SET citus.shard_count TO 4; SET citus.max_cached_conns_per_worker TO 0; +-- We make sure the maintenance daemon doesn't send queries to the workers, +-- because we use dump_network_traffic and thus the maintenance daemon queries +-- would randomly show up there otherwise. +ALTER SYSTEM SET citus.distributed_deadlock_detection_factor TO -1; +ALTER SYSTEM SET citus.recover_2pc_interval TO -1; +SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1450000; ALTER SEQUENCE pg_catalog.pg_dist_placement_placementid_seq RESTART 1450000; CREATE TABLE products ( @@ -76,7 +87,7 @@ SET citus.task_assignment_policy TO 'first-replica'; -- we will insert a connection delay here as this query was the cause for an -- investigation into connection establishment problems SET citus.node_connection_timeout TO 900; -SELECT citus.mitmproxy('conn.connect_delay(1000)'); +SELECT citus.mitmproxy('conn.connect_delay(1400)'); mitmproxy --------------------------------------------------------------------- @@ -100,6 +111,10 @@ SELECT citus.clear_network_traffic(); -- Make sure that we fall back to a working node for reads, even if it's not -- the first choice in our task assignment policy. +-- +-- Instead of looking at the warning we use dump_network_traffic to confirm +-- that +-- WARNING: connection to the remote node localhost:xxxxx failed with the following error: SET citus.node_connection_timeout TO 900; SELECT citus.mitmproxy('conn.connect_delay(1000)'); mitmproxy @@ -452,6 +467,14 @@ SELECT citus.mitmproxy('conn.allow()'); (1 row) +ALTER SYSTEM RESET citus.distributed_deadlock_detection_factor; +ALTER SYSTEM RESET citus.recover_2pc_interval; +SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + DROP SCHEMA fail_connect CASCADE; NOTICE: drop cascades to 3 other objects DETAIL: drop cascades to table products diff --git a/src/test/regress/sql/failure_connection_establishment.sql b/src/test/regress/sql/failure_connection_establishment.sql index 0435a482d..52a0c4f3c 100644 --- a/src/test/regress/sql/failure_connection_establishment.sql +++ b/src/test/regress/sql/failure_connection_establishment.sql @@ -13,6 +13,12 @@ SET search_path TO 'fail_connect'; SET citus.shard_count TO 4; SET citus.max_cached_conns_per_worker TO 0; +-- We make sure the maintenance daemon doesn't send queries to the workers, +-- because we use dump_network_traffic and thus the maintenance daemon queries +-- would randomly show up there otherwise. +ALTER SYSTEM SET citus.distributed_deadlock_detection_factor TO -1; +ALTER SYSTEM SET citus.recover_2pc_interval TO -1; +SELECT pg_reload_conf(); ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1450000; ALTER SEQUENCE pg_catalog.pg_dist_placement_placementid_seq RESTART 1450000; @@ -50,7 +56,7 @@ SET citus.task_assignment_policy TO 'first-replica'; -- we will insert a connection delay here as this query was the cause for an -- investigation into connection establishment problems SET citus.node_connection_timeout TO 900; -SELECT citus.mitmproxy('conn.connect_delay(1000)'); +SELECT citus.mitmproxy('conn.connect_delay(1400)'); ALTER TABLE products ADD CONSTRAINT p_key PRIMARY KEY(product_no); RESET citus.node_connection_timeout; SELECT citus.mitmproxy('conn.allow()'); @@ -59,6 +65,10 @@ SELECT citus.clear_network_traffic(); -- Make sure that we fall back to a working node for reads, even if it's not -- the first choice in our task assignment policy. +-- +-- Instead of looking at the warning we use dump_network_traffic to confirm +-- that +-- WARNING: connection to the remote node localhost:9060 failed with the following error: SET citus.node_connection_timeout TO 900; SELECT citus.mitmproxy('conn.connect_delay(1000)'); -- tests for connectivity checks @@ -203,5 +213,8 @@ SELECT * FROM citus_check_cluster_node_health(); RESET client_min_messages; RESET citus.node_connection_timeout; SELECT citus.mitmproxy('conn.allow()'); +ALTER SYSTEM RESET citus.distributed_deadlock_detection_factor; +ALTER SYSTEM RESET citus.recover_2pc_interval; +SELECT pg_reload_conf(); DROP SCHEMA fail_connect CASCADE; SET search_path TO default;