fix-flaky-failure_connection_establishment5
Jelte Fennema 2022-08-23 11:40:20 +02:00
parent 3a373cb277
commit b728e73d42
2 changed files with 38 additions and 2 deletions

View File

@ -15,6 +15,17 @@ CREATE SCHEMA fail_connect;
SET search_path TO 'fail_connect';
SET citus.shard_count TO 4;
SET citus.max_cached_conns_per_worker TO 0;
-- We make sure the maintenance daemon doesn't send queries to the workers,
-- because we use dump_network_traffic and thus the maintenance daemon queries
-- would randomly show up there otherwise.
ALTER SYSTEM SET citus.distributed_deadlock_detection_factor TO -1;
ALTER SYSTEM SET citus.recover_2pc_interval TO -1;
SELECT pg_reload_conf();
pg_reload_conf
---------------------------------------------------------------------
t
(1 row)
ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1450000;
ALTER SEQUENCE pg_catalog.pg_dist_placement_placementid_seq RESTART 1450000;
CREATE TABLE products (
@ -76,7 +87,7 @@ SET citus.task_assignment_policy TO 'first-replica';
-- we will insert a connection delay here as this query was the cause for an
-- investigation into connection establishment problems
SET citus.node_connection_timeout TO 900;
SELECT citus.mitmproxy('conn.connect_delay(1000)');
SELECT citus.mitmproxy('conn.connect_delay(1400)');
mitmproxy
---------------------------------------------------------------------
@ -100,6 +111,10 @@ SELECT citus.clear_network_traffic();
-- Make sure that we fall back to a working node for reads, even if it's not
-- the first choice in our task assignment policy.
--
-- Instead of looking at the warning we use dump_network_traffic to confirm
-- that
-- WARNING: connection to the remote node localhost:xxxxx failed with the following error:
SET citus.node_connection_timeout TO 900;
SELECT citus.mitmproxy('conn.connect_delay(1000)');
mitmproxy
@ -452,6 +467,14 @@ SELECT citus.mitmproxy('conn.allow()');
(1 row)
ALTER SYSTEM RESET citus.distributed_deadlock_detection_factor;
ALTER SYSTEM RESET citus.recover_2pc_interval;
SELECT pg_reload_conf();
pg_reload_conf
---------------------------------------------------------------------
t
(1 row)
DROP SCHEMA fail_connect CASCADE;
NOTICE: drop cascades to 3 other objects
DETAIL: drop cascades to table products

View File

@ -13,6 +13,12 @@ SET search_path TO 'fail_connect';
SET citus.shard_count TO 4;
SET citus.max_cached_conns_per_worker TO 0;
-- We make sure the maintenance daemon doesn't send queries to the workers,
-- because we use dump_network_traffic and thus the maintenance daemon queries
-- would randomly show up there otherwise.
ALTER SYSTEM SET citus.distributed_deadlock_detection_factor TO -1;
ALTER SYSTEM SET citus.recover_2pc_interval TO -1;
SELECT pg_reload_conf();
ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1450000;
ALTER SEQUENCE pg_catalog.pg_dist_placement_placementid_seq RESTART 1450000;
@ -50,7 +56,7 @@ SET citus.task_assignment_policy TO 'first-replica';
-- we will insert a connection delay here as this query was the cause for an
-- investigation into connection establishment problems
SET citus.node_connection_timeout TO 900;
SELECT citus.mitmproxy('conn.connect_delay(1000)');
SELECT citus.mitmproxy('conn.connect_delay(1400)');
ALTER TABLE products ADD CONSTRAINT p_key PRIMARY KEY(product_no);
RESET citus.node_connection_timeout;
SELECT citus.mitmproxy('conn.allow()');
@ -59,6 +65,10 @@ SELECT citus.clear_network_traffic();
-- Make sure that we fall back to a working node for reads, even if it's not
-- the first choice in our task assignment policy.
--
-- Instead of looking at the warning we use dump_network_traffic to confirm
-- that
-- WARNING: connection to the remote node localhost:9060 failed with the following error:
SET citus.node_connection_timeout TO 900;
SELECT citus.mitmproxy('conn.connect_delay(1000)');
-- tests for connectivity checks
@ -203,5 +213,8 @@ SELECT * FROM citus_check_cluster_node_health();
RESET client_min_messages;
RESET citus.node_connection_timeout;
SELECT citus.mitmproxy('conn.allow()');
ALTER SYSTEM RESET citus.distributed_deadlock_detection_factor;
ALTER SYSTEM RESET citus.recover_2pc_interval;
SELECT pg_reload_conf();
DROP SCHEMA fail_connect CASCADE;
SET search_path TO default;