Only delay initial connection packets

fix-flaky-failure_connection_establishment5
Jelte Fennema 2022-08-23 10:08:30 +02:00
parent f9476898d7
commit 667edd9194
4 changed files with 17 additions and 16 deletions

View File

@ -76,7 +76,7 @@ SET citus.task_assignment_policy TO 'first-replica';
-- we will insert a connection delay here as this query was the cause for an
-- investigation into connection establishment problems
SET citus.node_connection_timeout TO 400;
SELECT citus.mitmproxy('conn.delay(500)');
SELECT citus.mitmproxy('conn.connect_delay(500)');
mitmproxy
---------------------------------------------------------------------
@ -88,7 +88,7 @@ ERROR: connection to the remote node localhost:xxxxx failed
-- Make sure that we fall back to a working node for reads, even if it's not
-- the first choice in our task assignment policy.
SET citus.node_connection_timeout TO 400;
SELECT citus.mitmproxy('conn.delay(500)');
SELECT citus.mitmproxy('conn.connect_delay(500)');
mitmproxy
---------------------------------------------------------------------
@ -105,7 +105,7 @@ WARNING: could not establish any connections to the node localhost:xxxxx after
-- a reference table and with citus.force_max_query_parallelization is set
SET citus.force_max_query_parallelization TO ON;
SET citus.node_connection_timeout TO 400;
SELECT citus.mitmproxy('conn.delay(500)');
SELECT citus.mitmproxy('conn.connect_delay(500)');
mitmproxy
---------------------------------------------------------------------
@ -137,7 +137,7 @@ SELECT create_distributed_table('single_replicatated', 'key');
-- the max parallelization flag, so the query should fail
SET citus.force_max_query_parallelization TO ON;
SET citus.node_connection_timeout TO 400;
SELECT citus.mitmproxy('conn.delay(500)');
SELECT citus.mitmproxy('conn.connect_delay(500)');
mitmproxy
---------------------------------------------------------------------
@ -170,7 +170,7 @@ WHERE
(1 row)
SET citus.node_connection_timeout TO 400;
SELECT citus.mitmproxy('conn.delay(500)');
SELECT citus.mitmproxy('conn.connect_delay(500)');
mitmproxy
---------------------------------------------------------------------
@ -282,7 +282,7 @@ SELECT * FROM citus_check_connection_to_node('localhost', :worker_2_proxy_port);
ERROR: canceling statement due to user request
-- verify that the checks are not successful when timeouts happen on a connection
SET citus.node_connection_timeout TO 400;
SELECT citus.mitmproxy('conn.delay(500)');
SELECT citus.mitmproxy('conn.connect_delay(500)');
mitmproxy
---------------------------------------------------------------------

View File

@ -114,7 +114,7 @@ class ActionsMixin:
self.next = CancelHandler(self.root, pid)
return self.next
def delay(self, timeMs):
def connect_delay(self, timeMs):
self.next = DelayHandler(self.root, timeMs)
return self.next
@ -180,7 +180,8 @@ class DelayHandler(Handler):
super().__init__(root)
self.timeMs = timeMs
def _handle(self, flow, message):
time.sleep(self.timeMs/1000.0)
if message.is_initial:
time.sleep(self.timeMs/1000.0)
return 'done'
class Contains(Handler, ActionsMixin, FilterableMixin):

View File

@ -50,14 +50,14 @@ SET citus.task_assignment_policy TO 'first-replica';
-- we will insert a connection delay here as this query was the cause for an
-- investigation into connection establishment problems
SET citus.node_connection_timeout TO 400;
SELECT citus.mitmproxy('conn.delay(500)');
SELECT citus.mitmproxy('conn.connect_delay(500)');
ALTER TABLE products ADD CONSTRAINT p_key PRIMARY KEY(product_no);
-- Make sure that we fall back to a working node for reads, even if it's not
-- the first choice in our task assignment policy.
SET citus.node_connection_timeout TO 400;
SELECT citus.mitmproxy('conn.delay(500)');
SELECT citus.mitmproxy('conn.connect_delay(500)');
SELECT name FROM r1 WHERE id = 2;
@ -65,7 +65,7 @@ SELECT name FROM r1 WHERE id = 2;
-- a reference table and with citus.force_max_query_parallelization is set
SET citus.force_max_query_parallelization TO ON;
SET citus.node_connection_timeout TO 400;
SELECT citus.mitmproxy('conn.delay(500)');
SELECT citus.mitmproxy('conn.connect_delay(500)');
SELECT count(*) FROM products;
RESET citus.node_connection_timeout;
@ -78,7 +78,7 @@ SELECT create_distributed_table('single_replicatated', 'key');
-- the max parallelization flag, so the query should fail
SET citus.force_max_query_parallelization TO ON;
SET citus.node_connection_timeout TO 400;
SELECT citus.mitmproxy('conn.delay(500)');
SELECT citus.mitmproxy('conn.connect_delay(500)');
SELECT count(*) FROM single_replicatated;
SET citus.force_max_query_parallelization TO OFF;
@ -97,7 +97,7 @@ WHERE
shardstate = 3 AND
shardid IN (SELECT shardid from pg_dist_shard where logicalrelid = 'single_replicatated'::regclass);
SET citus.node_connection_timeout TO 400;
SELECT citus.mitmproxy('conn.delay(500)');
SELECT citus.mitmproxy('conn.connect_delay(500)');
INSERT INTO single_replicatated VALUES (100);
COMMIT;
SELECT
@ -148,7 +148,7 @@ SELECT * FROM citus_check_connection_to_node('localhost', :worker_2_proxy_port);
-- verify that the checks are not successful when timeouts happen on a connection
SET citus.node_connection_timeout TO 400;
SELECT citus.mitmproxy('conn.delay(500)');
SELECT citus.mitmproxy('conn.connect_delay(500)');
SELECT * FROM citus_check_connection_to_node('localhost', :worker_2_proxy_port);
-- tests for citus_check_cluster_node_health

View File

@ -23,7 +23,7 @@ INSERT INTO failover_to_local SELECT i, i::text FROM generate_series(0,20)i;
-- even if the connection establishment fails, Citus can
-- failover to local exection
SET citus.node_connection_timeout TO 400;
SELECT citus.mitmproxy('conn.delay(500)');
SELECT citus.mitmproxy('conn.connect_delay(500)');
SET citus.log_local_commands TO ON;
SET client_min_messages TO DEBUG1;
SELECT count(*) FROM failover_to_local;
@ -37,7 +37,7 @@ SELECT key / 0 FROM failover_to_local;
-- if the local execution is disabled, Citus does
-- not try to fallback to local execution
SET citus.enable_local_execution TO false;
SELECT citus.mitmproxy('conn.delay(500)');
SELECT citus.mitmproxy('conn.connect_delay(500)');
SET citus.log_local_commands TO ON;
SELECT count(*) FROM failover_to_local;
SELECT citus.mitmproxy('conn.allow()');