Fixup again

fix-flaky-failure_connection_establishment5
Jelte Fennema 2022-08-23 12:08:29 +02:00
parent b728e73d42
commit b1da6b0bec
2 changed files with 6 additions and 54 deletions

View File

@ -15,17 +15,6 @@ CREATE SCHEMA fail_connect;
SET search_path TO 'fail_connect';
SET citus.shard_count TO 4;
SET citus.max_cached_conns_per_worker TO 0;
-- We make sure the maintenance daemon doesn't send queries to the workers,
-- because we use dump_network_traffic and thus the maintenance daemon queries
-- would randomly show up there otherwise.
ALTER SYSTEM SET citus.distributed_deadlock_detection_factor TO -1;
ALTER SYSTEM SET citus.recover_2pc_interval TO -1;
SELECT pg_reload_conf();
pg_reload_conf
---------------------------------------------------------------------
t
(1 row)
ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1450000;
ALTER SEQUENCE pg_catalog.pg_dist_placement_placementid_seq RESTART 1450000;
CREATE TABLE products (
@ -103,18 +92,12 @@ SELECT citus.mitmproxy('conn.allow()');
(1 row)
SELECT citus.clear_network_traffic();
clear_network_traffic
---------------------------------------------------------------------
(1 row)
-- Make sure that we fall back to a working node for reads, even if it's not
-- the first choice in our task assignment policy.
--
-- Instead of looking at the warning we use dump_network_traffic to confirm
-- that
-- This sometimes adds an extra warning line like this (without a cause after
-- the error):
-- WARNING: connection to the remote node localhost:xxxxx failed with the following error:
-- So because of that we have two output files
SET citus.node_connection_timeout TO 900;
SELECT citus.mitmproxy('conn.connect_delay(1000)');
mitmproxy
@ -137,14 +120,6 @@ SELECT citus.mitmproxy('conn.allow()');
(1 row)
-- verify a connection attempt was made to the intercepted node, this would
-- have cause the connection to have been delayed and thus caused a timeout
SELECT * FROM citus.dump_network_traffic() WHERE conn=0 AND source = 'coordinator';
conn | source | message
---------------------------------------------------------------------
0 | coordinator | [initial message]
(1 row)
-- similar test with the above but this time on a distributed table instead of
-- a reference table and with citus.force_max_query_parallelization is set
SET citus.force_max_query_parallelization TO ON;
@ -467,14 +442,6 @@ SELECT citus.mitmproxy('conn.allow()');
(1 row)
ALTER SYSTEM RESET citus.distributed_deadlock_detection_factor;
ALTER SYSTEM RESET citus.recover_2pc_interval;
SELECT pg_reload_conf();
pg_reload_conf
---------------------------------------------------------------------
t
(1 row)
DROP SCHEMA fail_connect CASCADE;
NOTICE: drop cascades to 3 other objects
DETAIL: drop cascades to table products

View File

@ -13,12 +13,6 @@ SET search_path TO 'fail_connect';
SET citus.shard_count TO 4;
SET citus.max_cached_conns_per_worker TO 0;
-- We make sure the maintenance daemon doesn't send queries to the workers,
-- because we use dump_network_traffic and thus the maintenance daemon queries
-- would randomly show up there otherwise.
ALTER SYSTEM SET citus.distributed_deadlock_detection_factor TO -1;
ALTER SYSTEM SET citus.recover_2pc_interval TO -1;
SELECT pg_reload_conf();
ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1450000;
ALTER SEQUENCE pg_catalog.pg_dist_placement_placementid_seq RESTART 1450000;
@ -61,14 +55,12 @@ ALTER TABLE products ADD CONSTRAINT p_key PRIMARY KEY(product_no);
RESET citus.node_connection_timeout;
SELECT citus.mitmproxy('conn.allow()');
SELECT citus.clear_network_traffic();
-- Make sure that we fall back to a working node for reads, even if it's not
-- the first choice in our task assignment policy.
--
-- Instead of looking at the warning we use dump_network_traffic to confirm
-- that
-- This sometimes adds an extra warning line like this (without a cause after
-- the error):
-- WARNING: connection to the remote node localhost:9060 failed with the following error:
-- So because of that we have two output files
SET citus.node_connection_timeout TO 900;
SELECT citus.mitmproxy('conn.connect_delay(1000)');
-- tests for connectivity checks
@ -76,10 +68,6 @@ SELECT name FROM r1 WHERE id = 2;
RESET citus.node_connection_timeout;
SELECT citus.mitmproxy('conn.allow()');
-- verify a connection attempt was made to the intercepted node, this would
-- have cause the connection to have been delayed and thus caused a timeout
SELECT * FROM citus.dump_network_traffic() WHERE conn=0 AND source = 'coordinator';
-- similar test with the above but this time on a distributed table instead of
-- a reference table and with citus.force_max_query_parallelization is set
SET citus.force_max_query_parallelization TO ON;
@ -213,8 +201,5 @@ SELECT * FROM citus_check_cluster_node_health();
RESET client_min_messages;
RESET citus.node_connection_timeout;
SELECT citus.mitmproxy('conn.allow()');
ALTER SYSTEM RESET citus.distributed_deadlock_detection_factor;
ALTER SYSTEM RESET citus.recover_2pc_interval;
SELECT pg_reload_conf();
DROP SCHEMA fail_connect CASCADE;
SET search_path TO default;