From a960799dfbdbbb4d66686bfada1997ea0cfc4e88 Mon Sep 17 00:00:00 2001 From: Naisila Puka <37271756+naisila@users.noreply.github.com> Date: Tue, 14 Nov 2023 18:50:54 +0300 Subject: [PATCH] Clean up leftover replication slots in tests (#7338) This commit fixes the flakiness in `logical_replication` and `citus_non_blocking_split_shard_cleanup` tests. The flakiness was related to leftover replication slots. Below is a flaky example for each test: logical_replication https://github.com/citusdata/citus/actions/runs/6721324131/attempts/1#summary-18267030604 citus_non_blocking_split_shard_cleanup https://github.com/citusdata/citus/actions/runs/6721324131/attempts/1#summary-18267006967 ```diff -- Replication slots should be cleaned up SELECT slot_name FROM pg_replication_slots; slot_name --------------------------------- -(0 rows) + citus_shard_split_slot_19_10_17 +(1 row) ``` The tests by themselves are not flaky: 32 flaky test schedules each with 20 runs run successfully. https://github.com/citusdata/citus/actions/runs/6822020127?pr=7338 The conclusion is that: 1. `multi_tenant_isolation_nonblocking` is the problematic test running before `logical_replication` in the `enterprise_schedule`, so I added a cleanup at the end of `multi_tenant_isolation_nonblocking`. https://github.com/citusdata/citus/actions/runs/6824334614/attempts/1#summary-18560127461 2. `citus_split_shard_by_split_points_negative` is the problematic test running before `citus_non_blocking_split_shards_cleanup` in the split schedule. Also added cleanup line. For details on the investigation of leftover replication slots, please check the PR https://github.com/citusdata/citus/pull/7338 --- .../citus_split_shard_by_split_points_negative.out | 6 ++++++ .../expected/multi_tenant_isolation_nonblocking.out | 7 +++++++ .../sql/citus_split_shard_by_split_points_negative.sql | 1 + .../regress/sql/multi_tenant_isolation_nonblocking.sql | 3 +++ 4 files changed, 17 insertions(+) diff --git a/src/test/regress/expected/citus_split_shard_by_split_points_negative.out b/src/test/regress/expected/citus_split_shard_by_split_points_negative.out index 85b1fc3ee..6a4265f81 100644 --- a/src/test/regress/expected/citus_split_shard_by_split_points_negative.out +++ b/src/test/regress/expected/citus_split_shard_by_split_points_negative.out @@ -135,4 +135,10 @@ NOTICE: drop cascades to 3 other objects DETAIL: drop cascades to table citus_split_shard_by_split_points_negative.range_paritioned_table_to_split drop cascades to table citus_split_shard_by_split_points_negative.table_to_split drop cascades to table citus_split_shard_by_split_points_negative.table_to_split_replication_factor_2 +SELECT public.wait_for_resource_cleanup(); + wait_for_resource_cleanup +--------------------------------------------------------------------- + +(1 row) + --END : Cleanup diff --git a/src/test/regress/expected/multi_tenant_isolation_nonblocking.out b/src/test/regress/expected/multi_tenant_isolation_nonblocking.out index dbd15b056..3daac7dac 100644 --- a/src/test/regress/expected/multi_tenant_isolation_nonblocking.out +++ b/src/test/regress/expected/multi_tenant_isolation_nonblocking.out @@ -1275,3 +1275,10 @@ SELECT count(*) FROM pg_catalog.pg_dist_partition WHERE colocationid > 0; TRUNCATE TABLE pg_catalog.pg_dist_colocation; ALTER SEQUENCE pg_catalog.pg_dist_colocationid_seq RESTART 100; ALTER SEQUENCE pg_catalog.pg_dist_placement_placementid_seq RESTART :last_placement_id; +-- make sure we don't have any replication objects leftover on the nodes +SELECT public.wait_for_resource_cleanup(); + wait_for_resource_cleanup +--------------------------------------------------------------------- + +(1 row) + diff --git a/src/test/regress/sql/citus_split_shard_by_split_points_negative.sql b/src/test/regress/sql/citus_split_shard_by_split_points_negative.sql index fe37777c7..4c180052f 100644 --- a/src/test/regress/sql/citus_split_shard_by_split_points_negative.sql +++ b/src/test/regress/sql/citus_split_shard_by_split_points_negative.sql @@ -113,4 +113,5 @@ SELECT citus_split_shard_by_split_points( --BEGIN : Cleanup \c - postgres - :master_port DROP SCHEMA "citus_split_shard_by_split_points_negative" CASCADE; +SELECT public.wait_for_resource_cleanup(); --END : Cleanup diff --git a/src/test/regress/sql/multi_tenant_isolation_nonblocking.sql b/src/test/regress/sql/multi_tenant_isolation_nonblocking.sql index f74835108..994f29f0a 100644 --- a/src/test/regress/sql/multi_tenant_isolation_nonblocking.sql +++ b/src/test/regress/sql/multi_tenant_isolation_nonblocking.sql @@ -607,3 +607,6 @@ TRUNCATE TABLE pg_catalog.pg_dist_colocation; ALTER SEQUENCE pg_catalog.pg_dist_colocationid_seq RESTART 100; ALTER SEQUENCE pg_catalog.pg_dist_placement_placementid_seq RESTART :last_placement_id; + +-- make sure we don't have any replication objects leftover on the nodes +SELECT public.wait_for_resource_cleanup();