Increase connection establishment timeout, 400ms was too little

fix-flaky-failure_connection_establishment4
Jelte Fennema 2022-08-23 00:37:16 +02:00
parent 5f2ec57ad0
commit 344145daa1
2 changed files with 19 additions and 19 deletions

View File

@ -36,15 +36,15 @@ ERROR: cannot create constraint on "products"
DETAIL: Distributed relations cannot have UNIQUE, EXCLUDE, or PRIMARY KEY constraints that do not include the partition column (with an equality operator if EXCLUDE). DETAIL: Distributed relations cannot have UNIQUE, EXCLUDE, or PRIMARY KEY constraints that do not include the partition column (with an equality operator if EXCLUDE).
-- we will insert a connection delay here as this query was the cause for an investigation -- we will insert a connection delay here as this query was the cause for an investigation
-- into connection establishment problems -- into connection establishment problems
SET citus.node_connection_timeout TO 400; SET citus.node_connection_timeout TO 1400;
SELECT citus.mitmproxy('conn.delay(500)'); SELECT citus.mitmproxy('conn.delay(1500)');
mitmproxy mitmproxy
--------------------------------------------------------------------- ---------------------------------------------------------------------
(1 row) (1 row)
ALTER TABLE products ADD CONSTRAINT p_key PRIMARY KEY(product_no); ALTER TABLE products ADD CONSTRAINT p_key PRIMARY KEY(product_no);
WARNING: could not establish connection after 400 ms WARNING: could not establish connection after 1400 ms
ERROR: connection to the remote node localhost:xxxxx failed ERROR: connection to the remote node localhost:xxxxx failed
SELECT citus.mitmproxy('conn.allow()'); SELECT citus.mitmproxy('conn.allow()');
mitmproxy mitmproxy
@ -93,7 +93,7 @@ SELECT citus.clear_network_traffic();
(1 row) (1 row)
SELECT citus.mitmproxy('conn.delay(500)'); SELECT citus.mitmproxy('conn.delay(1500)');
mitmproxy mitmproxy
--------------------------------------------------------------------- ---------------------------------------------------------------------
@ -102,7 +102,7 @@ SELECT citus.mitmproxy('conn.delay(500)');
SET citus.task_assignment_policy TO 'round-robin'; SET citus.task_assignment_policy TO 'round-robin';
SET citus.task_assignment_round_robin_index TO 0; SET citus.task_assignment_round_robin_index TO 0;
SELECT name FROM r1 WHERE id = 2; SELECT name FROM r1 WHERE id = 2;
WARNING: could not establish any connections to the node localhost:xxxxx after 400 ms WARNING: could not establish any connections to the node localhost:xxxxx after 1400 ms
name name
--------------------------------------------------------------------- ---------------------------------------------------------------------
bar bar
@ -126,14 +126,14 @@ SELECT citus.mitmproxy('conn.allow()');
-- distributed table instead of a reference table -- distributed table instead of a reference table
-- and with citus.force_max_query_parallelization is set -- and with citus.force_max_query_parallelization is set
SET citus.force_max_query_parallelization TO ON; SET citus.force_max_query_parallelization TO ON;
SELECT citus.mitmproxy('conn.delay(500)'); SELECT citus.mitmproxy('conn.delay(1500)');
mitmproxy mitmproxy
--------------------------------------------------------------------- ---------------------------------------------------------------------
(1 row) (1 row)
SELECT count(*) FROM products; SELECT count(*) FROM products;
WARNING: could not establish any connections to the node localhost:xxxxx after 400 ms WARNING: could not establish any connections to the node localhost:xxxxx after 1400 ms
count count
--------------------------------------------------------------------- ---------------------------------------------------------------------
0 0
@ -156,14 +156,14 @@ SELECT create_distributed_table('single_replicatated', 'key');
-- this time the table is single replicated and we're still using the -- this time the table is single replicated and we're still using the
-- the max parallelization flag, so the query should fail -- the max parallelization flag, so the query should fail
SET citus.force_max_query_parallelization TO ON; SET citus.force_max_query_parallelization TO ON;
SELECT citus.mitmproxy('conn.delay(500)'); SELECT citus.mitmproxy('conn.delay(1500)');
mitmproxy mitmproxy
--------------------------------------------------------------------- ---------------------------------------------------------------------
(1 row) (1 row)
SELECT count(*) FROM single_replicatated; SELECT count(*) FROM single_replicatated;
ERROR: could not establish any connections to the node localhost:xxxxx after 400 ms ERROR: could not establish any connections to the node localhost:xxxxx after 1400 ms
SET citus.force_max_query_parallelization TO OFF; SET citus.force_max_query_parallelization TO OFF;
-- one similar test, and this time on modification queries -- one similar test, and this time on modification queries
-- to see that connection establishement failures could -- to see that connection establishement failures could
@ -187,14 +187,14 @@ WHERE
0 0
(1 row) (1 row)
SELECT citus.mitmproxy('conn.delay(500)'); SELECT citus.mitmproxy('conn.delay(1500)');
mitmproxy mitmproxy
--------------------------------------------------------------------- ---------------------------------------------------------------------
(1 row) (1 row)
INSERT INTO single_replicatated VALUES (100); INSERT INTO single_replicatated VALUES (100);
ERROR: could not establish any connections to the node localhost:xxxxx after 400 ms ERROR: could not establish any connections to the node localhost:xxxxx after 1400 ms
COMMIT; COMMIT;
SELECT SELECT
count(*) as invalid_placement_count count(*) as invalid_placement_count
@ -297,7 +297,7 @@ SELECT citus.mitmproxy('conn.onCommandComplete(command="SELECT 1").cancel(' || p
SELECT * FROM citus_check_connection_to_node('localhost', :worker_2_proxy_port); SELECT * FROM citus_check_connection_to_node('localhost', :worker_2_proxy_port);
ERROR: canceling statement due to user request ERROR: canceling statement due to user request
-- verify that the checks are not successful when timeouts happen on a connection -- verify that the checks are not successful when timeouts happen on a connection
SELECT citus.mitmproxy('conn.delay(500)'); SELECT citus.mitmproxy('conn.delay(1500)');
mitmproxy mitmproxy
--------------------------------------------------------------------- ---------------------------------------------------------------------

View File

@ -31,8 +31,8 @@ ALTER TABLE products ADD CONSTRAINT p_key PRIMARY KEY(name);
-- we will insert a connection delay here as this query was the cause for an investigation -- we will insert a connection delay here as this query was the cause for an investigation
-- into connection establishment problems -- into connection establishment problems
SET citus.node_connection_timeout TO 400; SET citus.node_connection_timeout TO 1400;
SELECT citus.mitmproxy('conn.delay(500)'); SELECT citus.mitmproxy('conn.delay(1500)');
ALTER TABLE products ADD CONSTRAINT p_key PRIMARY KEY(product_no); ALTER TABLE products ADD CONSTRAINT p_key PRIMARY KEY(product_no);
@ -55,7 +55,7 @@ ORDER BY placementid;
SELECT citus.clear_network_traffic(); SELECT citus.clear_network_traffic();
SELECT citus.mitmproxy('conn.delay(500)'); SELECT citus.mitmproxy('conn.delay(1500)');
SET citus.task_assignment_policy TO 'round-robin'; SET citus.task_assignment_policy TO 'round-robin';
SET citus.task_assignment_round_robin_index TO 0; SET citus.task_assignment_round_robin_index TO 0;
@ -71,7 +71,7 @@ SELECT citus.mitmproxy('conn.allow()');
-- distributed table instead of a reference table -- distributed table instead of a reference table
-- and with citus.force_max_query_parallelization is set -- and with citus.force_max_query_parallelization is set
SET citus.force_max_query_parallelization TO ON; SET citus.force_max_query_parallelization TO ON;
SELECT citus.mitmproxy('conn.delay(500)'); SELECT citus.mitmproxy('conn.delay(1500)');
SELECT count(*) FROM products; SELECT count(*) FROM products;
SELECT citus.mitmproxy('conn.allow()'); SELECT citus.mitmproxy('conn.allow()');
@ -82,7 +82,7 @@ SELECT create_distributed_table('single_replicatated', 'key');
-- this time the table is single replicated and we're still using the -- this time the table is single replicated and we're still using the
-- the max parallelization flag, so the query should fail -- the max parallelization flag, so the query should fail
SET citus.force_max_query_parallelization TO ON; SET citus.force_max_query_parallelization TO ON;
SELECT citus.mitmproxy('conn.delay(500)'); SELECT citus.mitmproxy('conn.delay(1500)');
SELECT count(*) FROM single_replicatated; SELECT count(*) FROM single_replicatated;
SET citus.force_max_query_parallelization TO OFF; SET citus.force_max_query_parallelization TO OFF;
@ -99,7 +99,7 @@ FROM
WHERE WHERE
shardstate = 3 AND shardstate = 3 AND
shardid IN (SELECT shardid from pg_dist_shard where logicalrelid = 'single_replicatated'::regclass); shardid IN (SELECT shardid from pg_dist_shard where logicalrelid = 'single_replicatated'::regclass);
SELECT citus.mitmproxy('conn.delay(500)'); SELECT citus.mitmproxy('conn.delay(1500)');
INSERT INTO single_replicatated VALUES (100); INSERT INTO single_replicatated VALUES (100);
COMMIT; COMMIT;
SELECT SELECT
@ -148,7 +148,7 @@ SELECT citus.mitmproxy('conn.onCommandComplete(command="SELECT 1").cancel(' || p
SELECT * FROM citus_check_connection_to_node('localhost', :worker_2_proxy_port); SELECT * FROM citus_check_connection_to_node('localhost', :worker_2_proxy_port);
-- verify that the checks are not successful when timeouts happen on a connection -- verify that the checks are not successful when timeouts happen on a connection
SELECT citus.mitmproxy('conn.delay(500)'); SELECT citus.mitmproxy('conn.delay(1500)');
SELECT * FROM citus_check_connection_to_node('localhost', :worker_2_proxy_port); SELECT * FROM citus_check_connection_to_node('localhost', :worker_2_proxy_port);
-- tests for citus_check_cluster_node_health -- tests for citus_check_cluster_node_health