From 20a4d742aacfa3371ebd047609dafd168b6a7b3a Mon Sep 17 00:00:00 2001 From: Jelte Fennema Date: Mon, 24 Oct 2022 17:35:31 +0200 Subject: [PATCH] Fix flakyness in failure_split_cleanup (#6450) Sometimes in CI our failure_split_cleanup test would fail like this: ```diff CALL pg_catalog.citus_cleanup_orphaned_resources(); -NOTICE: cleaned up 79 orphaned resources +NOTICE: cleaned up 82 orphaned resources SELECT operation_id, object_type, object_name, node_group_id, policy_type ``` Source: https://app.circleci.com/pipelines/github/citusdata/citus/28107/workflows/4ec712c9-98b5-4e90-9806-e02a37d71679/jobs/846107 The reason was that previous tests in the schedule would also create some orphaned resources. Sometimes some of those would already be cleaned up by the maintenance daemon, resulting in a different number of cleaned up resources than expected. This cleans up any previously created resources at the start of the test without logging how many exactly were cleaned up. As a bonus this now also allows running this test using check-failure-base. --- src/test/regress/expected/failure_split_cleanup.out | 8 ++++++-- src/test/regress/sql/failure_split_cleanup.sql | 7 +++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/test/regress/expected/failure_split_cleanup.out b/src/test/regress/expected/failure_split_cleanup.out index 7c9cc2739..f86a1de1f 100644 --- a/src/test/regress/expected/failure_split_cleanup.out +++ b/src/test/regress/expected/failure_split_cleanup.out @@ -14,6 +14,10 @@ SET citus.next_cleanup_record_id TO 11; SET citus.shard_count TO 2; SET citus.shard_replication_factor TO 1; SELECT pg_backend_pid() as pid \gset +-- cleanup any leftovers from previous tests so we get consistent output +SET client_min_messages TO WARNING; +CALL pg_catalog.citus_cleanup_orphaned_resources(); +RESET client_min_messages; -- Disable defer shard delete to stop auto cleanup. ALTER SYSTEM SET citus.defer_shard_delete_interval TO -1; SELECT pg_reload_conf(); @@ -94,7 +98,7 @@ ERROR: connection to the remote node localhost:xxxxx failed with the following \c - postgres - :master_port CALL pg_catalog.citus_cleanup_orphaned_resources(); -NOTICE: cleaned up 79 orphaned resources +NOTICE: cleaned up 4 orphaned resources SELECT operation_id, object_type, object_name, node_group_id, policy_type FROM pg_dist_cleanup where operation_id = 777; operation_id | object_type | object_name | node_group_id | policy_type @@ -683,6 +687,6 @@ CONTEXT: while executing command on localhost:xxxxx -- Cleanup \c - postgres - :master_port +SET client_min_messages TO WARNING; DROP SCHEMA "citus_failure_split_cleanup_schema" CASCADE; -NOTICE: drop cascades to table citus_failure_split_cleanup_schema.table_to_split -- Cleanup diff --git a/src/test/regress/sql/failure_split_cleanup.sql b/src/test/regress/sql/failure_split_cleanup.sql index 17434917a..21ed31885 100644 --- a/src/test/regress/sql/failure_split_cleanup.sql +++ b/src/test/regress/sql/failure_split_cleanup.sql @@ -16,6 +16,11 @@ SET citus.shard_count TO 2; SET citus.shard_replication_factor TO 1; SELECT pg_backend_pid() as pid \gset +-- cleanup any leftovers from previous tests so we get consistent output +SET client_min_messages TO WARNING; +CALL pg_catalog.citus_cleanup_orphaned_resources(); +RESET client_min_messages; + -- Disable defer shard delete to stop auto cleanup. ALTER SYSTEM SET citus.defer_shard_delete_interval TO -1; SELECT pg_reload_conf(); @@ -304,5 +309,7 @@ SELECT create_distributed_table('table_to_split', 'id'); -- Cleanup \c - postgres - :master_port + +SET client_min_messages TO WARNING; DROP SCHEMA "citus_failure_split_cleanup_schema" CASCADE; -- Cleanup