From e3c93c303dec623f6e196ca8f2ca1d1a20c51e6c Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <jelte.fennema@microsoft.com>
Date: Wed, 1 Nov 2023 17:21:12 +0100
Subject: [PATCH 1/2] Fix flaky citus_non_blocking_split_shard_cleanup (#7311)

Sometimes in CI citus_non_blocking_split_shard_cleanup failed like this:

```diff
--- /__w/citus/citus/src/test/regress/expected/citus_non_blocking_split_shard_cleanup.out.modified	2023-11-01 15:07:14.280551207 +0000
+++ /__w/citus/citus/src/test/regress/results/citus_non_blocking_split_shard_cleanup.out.modified	2023-11-01 15:07:14.292551358 +0000
@@ -106,21 +106,22 @@
 -----------------------------------

 (1 row)

 \c - - - :worker_2_port
 SET search_path TO "citus_split_test_schema";
 -- Replication slots should be cleaned up
 SELECT slot_name FROM pg_replication_slots;
             slot_name
 ---------------------------------
-(0 rows)
+ citus_shard_split_slot_19_10_17
+(1 row)

 -- Publications should be cleanedup
 SELECT count(*) FROM pg_publication;
  count
```

It's expected that the replication slot is sometimes not cleaned up if
we don't wait until resource cleanup completes. This PR starts doing
that here.
---
 .../expected/citus_non_blocking_split_shard_cleanup.out     | 6 ++++++
 .../regress/sql/citus_non_blocking_split_shard_cleanup.sql  | 2 ++
 2 files changed, 8 insertions(+)

diff --git a/src/test/regress/expected/citus_non_blocking_split_shard_cleanup.out b/src/test/regress/expected/citus_non_blocking_split_shard_cleanup.out
index e2685c2d7..a559ec442 100644
--- a/src/test/regress/expected/citus_non_blocking_split_shard_cleanup.out
+++ b/src/test/regress/expected/citus_non_blocking_split_shard_cleanup.out
@@ -107,6 +107,12 @@ SELECT pg_catalog.citus_split_shard_by_split_points(
 
 (1 row)
 
+SELECT public.wait_for_resource_cleanup();
+ wait_for_resource_cleanup
+---------------------------------------------------------------------
+
+(1 row)
+
 \c - - - :worker_2_port
 SET search_path TO "citus_split_test_schema";
 -- Replication slots should be cleaned up
diff --git a/src/test/regress/sql/citus_non_blocking_split_shard_cleanup.sql b/src/test/regress/sql/citus_non_blocking_split_shard_cleanup.sql
index ba3f95215..480d81b88 100644
--- a/src/test/regress/sql/citus_non_blocking_split_shard_cleanup.sql
+++ b/src/test/regress/sql/citus_non_blocking_split_shard_cleanup.sql
@@ -79,6 +79,8 @@ SELECT pg_catalog.citus_split_shard_by_split_points(
     ARRAY[:worker_2_node, :worker_2_node, :worker_2_node],
     'force_logical');
 
+SELECT public.wait_for_resource_cleanup();
+
 \c - - - :worker_2_port
 SET search_path TO "citus_split_test_schema";
 -- Replication slots should be cleaned up

From 2cf4c0402319a9616e4d0feb4d9273757b3c1eaf Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Thu, 2 Nov 2023 01:59:41 +0300
Subject: [PATCH 2/2] Fix flaky global_cancel.sql test (#7316)

---
 src/test/regress/expected/global_cancel.out | 10 ++++++++--
 src/test/regress/sql/global_cancel.sql      |  6 ++++--
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/test/regress/expected/global_cancel.out b/src/test/regress/expected/global_cancel.out
index 5adeef3c8..e5ce4fbc6 100644
--- a/src/test/regress/expected/global_cancel.out
+++ b/src/test/regress/expected/global_cancel.out
@@ -9,9 +9,14 @@ SELECT 1 FROM master_add_node('localhost', :master_port, groupid => 0);
 RESET client_min_messages;
 -- Kill maintenance daemon so it gets restarted and gets a gpid containing our
 -- nodeid
-SELECT pg_terminate_backend(pid)
+SELECT COUNT(pg_terminate_backend(pid)) >= 0
 FROM pg_stat_activity
-WHERE application_name = 'Citus Maintenance Daemon' \gset
+WHERE application_name = 'Citus Maintenance Daemon';
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
 -- reconnect to make sure we get a session with the gpid containing our nodeid
 \c - - - -
 CREATE SCHEMA global_cancel;
@@ -77,6 +82,7 @@ ERROR:  must be a superuser to terminate superuser process
 SELECT pg_cancel_backend(citus_backend_gpid());
 ERROR:  canceling statement due to user request
 \c - postgres - :master_port
+DROP USER global_cancel_user;
 SET client_min_messages TO DEBUG;
 -- 10000000000 is the node id multiplier for global pid
 SELECT pg_cancel_backend(10000000000 * citus_coordinator_nodeid() + 0);
diff --git a/src/test/regress/sql/global_cancel.sql b/src/test/regress/sql/global_cancel.sql
index 848c3b01a..12330baf2 100644
--- a/src/test/regress/sql/global_cancel.sql
+++ b/src/test/regress/sql/global_cancel.sql
@@ -5,9 +5,9 @@ RESET client_min_messages;
 
 -- Kill maintenance daemon so it gets restarted and gets a gpid containing our
 -- nodeid
-SELECT pg_terminate_backend(pid)
+SELECT COUNT(pg_terminate_backend(pid)) >= 0
 FROM pg_stat_activity
-WHERE application_name = 'Citus Maintenance Daemon' \gset
+WHERE application_name = 'Citus Maintenance Daemon';
 
 -- reconnect to make sure we get a session with the gpid containing our nodeid
 \c - - - -
@@ -58,6 +58,8 @@ SELECT pg_cancel_backend(citus_backend_gpid());
 
 \c - postgres - :master_port
 
+DROP USER global_cancel_user;
+
 SET client_min_messages TO DEBUG;
 
 -- 10000000000 is the node id multiplier for global pid