fix 3 flaky tests in failure schedule (#6846)

Fixed 3 flaky tests in failure tests which caused flakiness in other
tests due to changed node and group sequence ids during node
addition-removal.

(cherry picked from commit 3286ec59e9)
pull/6862/head
aykut-bozkurt 2023-04-13 13:13:28 +03:00 committed by aykutbozkurt
parent 1a9066c34a
commit 17149b92b2
6 changed files with 36 additions and 32 deletions

View File

@ -187,6 +187,8 @@ ORDER BY placementid;
(1 row) (1 row)
-- reset cluster to original state -- reset cluster to original state
ALTER SEQUENCE pg_dist_node_nodeid_seq RESTART 2;
ALTER SEQUENCE pg_dist_groupid_seq RESTART 2;
SELECT citus.mitmproxy('conn.allow()'); SELECT citus.mitmproxy('conn.allow()');
mitmproxy mitmproxy
--------------------------------------------------------------------- ---------------------------------------------------------------------
@ -196,7 +198,7 @@ SELECT citus.mitmproxy('conn.allow()');
SELECT master_add_node('localhost', :worker_2_proxy_port); SELECT master_add_node('localhost', :worker_2_proxy_port);
master_add_node master_add_node
--------------------------------------------------------------------- ---------------------------------------------------------------------
4 2
(1 row) (1 row)
-- verify node is added -- verify node is added

View File

@ -12,6 +12,8 @@ SET citus.shard_count TO 2;
SET citus.shard_replication_factor TO 1; SET citus.shard_replication_factor TO 1;
SET citus.max_adaptive_executor_pool_size TO 1; SET citus.max_adaptive_executor_pool_size TO 1;
SELECT pg_backend_pid() as pid \gset SELECT pg_backend_pid() as pid \gset
ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 222222;
ALTER SEQUENCE pg_catalog.pg_dist_placement_placementid_seq RESTART 333333;
-- make sure coordinator is in the metadata -- make sure coordinator is in the metadata
SELECT citus_set_coordinator_host('localhost', 57636); SELECT citus_set_coordinator_host('localhost', 57636);
citus_set_coordinator_host citus_set_coordinator_host
@ -189,8 +191,8 @@ SELECT create_distributed_table_concurrently('table_1', 'id');
SELECT * FROM pg_dist_shard WHERE logicalrelid = 'table_1'::regclass; SELECT * FROM pg_dist_shard WHERE logicalrelid = 'table_1'::regclass;
logicalrelid | shardid | shardstorage | shardminvalue | shardmaxvalue logicalrelid | shardid | shardstorage | shardminvalue | shardmaxvalue
--------------------------------------------------------------------- ---------------------------------------------------------------------
table_1 | 1880080 | t | -2147483648 | -1 table_1 | 222247 | t | -2147483648 | -1
table_1 | 1880081 | t | 0 | 2147483647 table_1 | 222248 | t | 0 | 2147483647
(2 rows) (2 rows)
DROP SCHEMA create_dist_tbl_con CASCADE; DROP SCHEMA create_dist_tbl_con CASCADE;
@ -201,3 +203,5 @@ SELECT citus_remove_node('localhost', 57636);
(1 row) (1 row)
ALTER SEQUENCE pg_dist_node_nodeid_seq RESTART 3;
ALTER SEQUENCE pg_dist_groupid_seq RESTART 3;

View File

@ -597,8 +597,8 @@ ERROR: connection not open
SELECT * FROM pg_dist_node ORDER BY nodeport; SELECT * FROM pg_dist_node ORDER BY nodeport;
nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards
--------------------------------------------------------------------- ---------------------------------------------------------------------
4 | 4 | localhost | 9060 | default | f | t | primary | default | f | t 2 | 2 | localhost | 9060 | default | f | t | primary | default | f | t
6 | 0 | localhost | 57636 | default | t | t | primary | default | t | f 3 | 0 | localhost | 57636 | default | t | t | primary | default | t | f
1 | 1 | localhost | 57637 | default | t | t | primary | default | t | t 1 | 1 | localhost | 57637 | default | t | t | primary | default | t | t
(3 rows) (3 rows)
@ -626,24 +626,14 @@ UPDATE dist1 SET id = :failed_node_val WHERE id = :failed_node_val;
-- Show that we can still delete from a shard at the node from coordinator -- Show that we can still delete from a shard at the node from coordinator
DELETE FROM dist1 WHERE id = :failed_node_val; DELETE FROM dist1 WHERE id = :failed_node_val;
-- Show that DDL would still propagate to the node -- Show that DDL would still propagate to the node
SET client_min_messages TO NOTICE;
SET citus.log_remote_commands TO 1;
CREATE SCHEMA dummy; CREATE SCHEMA dummy;
NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); SELECT * FROM run_command_on_workers($$SELECT nspname FROM pg_namespace WHERE nspname = 'dummy'$$);
NOTICE: issuing BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;SELECT assign_distributed_transaction_id(xx, xx, 'xxxxxxx'); nodename | nodeport | success | result
NOTICE: issuing SET citus.enable_ddl_propagation TO 'off' ---------------------------------------------------------------------
NOTICE: issuing CREATE SCHEMA dummy localhost | 9060 | t | dummy
NOTICE: issuing SET citus.enable_ddl_propagation TO 'on' localhost | 57637 | t | dummy
NOTICE: issuing SET citus.enable_ddl_propagation TO 'off' (2 rows)
NOTICE: issuing CREATE SCHEMA dummy
NOTICE: issuing SET citus.enable_ddl_propagation TO 'on'
NOTICE: issuing WITH distributed_object_data(typetext, objnames, objargs, distargumentindex, colocationid, force_delegation) AS (VALUES ('schema', ARRAY['dummy']::text[], ARRAY[]::text[], -1, 0, false)) SELECT citus_internal_add_object_metadata(typetext, objnames, objargs, distargumentindex::int, colocationid::int, force_delegation::bool) FROM distributed_object_data;
NOTICE: issuing PREPARE TRANSACTION 'citus_xx_xx_xx_xx'
NOTICE: issuing PREPARE TRANSACTION 'citus_xx_xx_xx_xx'
NOTICE: issuing COMMIT PREPARED 'citus_xx_xx_xx_xx'
NOTICE: issuing COMMIT PREPARED 'citus_xx_xx_xx_xx'
SET citus.log_remote_commands TO 0;
SET client_min_messages TO ERROR;
-- Successfully activate the node after many failures -- Successfully activate the node after many failures
SELECT citus.mitmproxy('conn.allow()'); SELECT citus.mitmproxy('conn.allow()');
mitmproxy mitmproxy
@ -654,14 +644,14 @@ SELECT citus.mitmproxy('conn.allow()');
SELECT citus_activate_node('localhost', :worker_2_proxy_port); SELECT citus_activate_node('localhost', :worker_2_proxy_port);
citus_activate_node citus_activate_node
--------------------------------------------------------------------- ---------------------------------------------------------------------
4 2
(1 row) (1 row)
-- Activate the node once more to verify it works again with already synced metadata -- Activate the node once more to verify it works again with already synced metadata
SELECT citus_activate_node('localhost', :worker_2_proxy_port); SELECT citus_activate_node('localhost', :worker_2_proxy_port);
citus_activate_node citus_activate_node
--------------------------------------------------------------------- ---------------------------------------------------------------------
4 2
(1 row) (1 row)
-- Show node metadata info on worker2 and coordinator after success -- Show node metadata info on worker2 and coordinator after success
@ -669,8 +659,8 @@ SELECT citus_activate_node('localhost', :worker_2_proxy_port);
SELECT * FROM pg_dist_node ORDER BY nodeport; SELECT * FROM pg_dist_node ORDER BY nodeport;
nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards
--------------------------------------------------------------------- ---------------------------------------------------------------------
4 | 4 | localhost | 9060 | default | t | t | primary | default | t | t 2 | 2 | localhost | 9060 | default | t | t | primary | default | t | t
6 | 0 | localhost | 57636 | default | t | t | primary | default | t | f 3 | 0 | localhost | 57636 | default | t | t | primary | default | t | f
1 | 1 | localhost | 57637 | default | t | t | primary | default | t | t 1 | 1 | localhost | 57637 | default | t | t | primary | default | t | t
(3 rows) (3 rows)
@ -678,8 +668,8 @@ SELECT * FROM pg_dist_node ORDER BY nodeport;
SELECT * FROM pg_dist_node ORDER BY nodeport; SELECT * FROM pg_dist_node ORDER BY nodeport;
nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards
--------------------------------------------------------------------- ---------------------------------------------------------------------
4 | 4 | localhost | 9060 | default | t | t | primary | default | t | t 2 | 2 | localhost | 9060 | default | t | t | primary | default | t | t
6 | 0 | localhost | 57636 | default | t | t | primary | default | t | f 3 | 0 | localhost | 57636 | default | t | t | primary | default | t | f
1 | 1 | localhost | 57637 | default | t | t | primary | default | t | t 1 | 1 | localhost | 57637 | default | t | t | primary | default | t | t
(3 rows) (3 rows)
@ -701,3 +691,5 @@ SELECT citus_remove_node('localhost', :master_port);
(1 row) (1 row)
ALTER SEQUENCE pg_dist_node_nodeid_seq RESTART 3;
ALTER SEQUENCE pg_dist_groupid_seq RESTART 3;

View File

@ -97,6 +97,8 @@ WHERE s.logicalrelid = 'user_table'::regclass AND n.isactive
ORDER BY placementid; ORDER BY placementid;
-- reset cluster to original state -- reset cluster to original state
ALTER SEQUENCE pg_dist_node_nodeid_seq RESTART 2;
ALTER SEQUENCE pg_dist_groupid_seq RESTART 2;
SELECT citus.mitmproxy('conn.allow()'); SELECT citus.mitmproxy('conn.allow()');
SELECT master_add_node('localhost', :worker_2_proxy_port); SELECT master_add_node('localhost', :worker_2_proxy_port);

View File

@ -15,6 +15,9 @@ SET citus.shard_replication_factor TO 1;
SET citus.max_adaptive_executor_pool_size TO 1; SET citus.max_adaptive_executor_pool_size TO 1;
SELECT pg_backend_pid() as pid \gset SELECT pg_backend_pid() as pid \gset
ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 222222;
ALTER SEQUENCE pg_catalog.pg_dist_placement_placementid_seq RESTART 333333;
-- make sure coordinator is in the metadata -- make sure coordinator is in the metadata
SELECT citus_set_coordinator_host('localhost', 57636); SELECT citus_set_coordinator_host('localhost', 57636);
@ -108,3 +111,5 @@ SELECT * FROM pg_dist_shard WHERE logicalrelid = 'table_1'::regclass;
DROP SCHEMA create_dist_tbl_con CASCADE; DROP SCHEMA create_dist_tbl_con CASCADE;
SET search_path TO default; SET search_path TO default;
SELECT citus_remove_node('localhost', 57636); SELECT citus_remove_node('localhost', 57636);
ALTER SEQUENCE pg_dist_node_nodeid_seq RESTART 3;
ALTER SEQUENCE pg_dist_groupid_seq RESTART 3;

View File

@ -260,11 +260,8 @@ UPDATE dist1 SET id = :failed_node_val WHERE id = :failed_node_val;
DELETE FROM dist1 WHERE id = :failed_node_val; DELETE FROM dist1 WHERE id = :failed_node_val;
-- Show that DDL would still propagate to the node -- Show that DDL would still propagate to the node
SET client_min_messages TO NOTICE;
SET citus.log_remote_commands TO 1;
CREATE SCHEMA dummy; CREATE SCHEMA dummy;
SET citus.log_remote_commands TO 0; SELECT * FROM run_command_on_workers($$SELECT nspname FROM pg_namespace WHERE nspname = 'dummy'$$);
SET client_min_messages TO ERROR;
-- Successfully activate the node after many failures -- Successfully activate the node after many failures
SELECT citus.mitmproxy('conn.allow()'); SELECT citus.mitmproxy('conn.allow()');
@ -285,3 +282,5 @@ DROP SCHEMA mx_metadata_sync_multi_trans CASCADE;
DROP ROLE foo1; DROP ROLE foo1;
DROP ROLE foo2; DROP ROLE foo2;
SELECT citus_remove_node('localhost', :master_port); SELECT citus_remove_node('localhost', :master_port);
ALTER SEQUENCE pg_dist_node_nodeid_seq RESTART 3;
ALTER SEQUENCE pg_dist_groupid_seq RESTART 3;