From 0d83ab57de8c266764fc47810afd115758da5034 Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <jelte.fennema@microsoft.com>
Date: Wed, 1 Nov 2023 11:46:01 +0100
Subject: [PATCH 01/22] Fix flaky multi_cluster_management (#7295)

One of our most flaky and most anoying tests is
multi_cluster_management. It usually fails like this:
```diff
 SELECT citus_disable_node('localhost', :worker_2_port);
  citus_disable_node
 --------------------

 (1 row)

 SELECT public.wait_until_metadata_sync(60000);
+WARNING:  waiting for metadata sync timed out
  wait_until_metadata_sync
 --------------------------

 (1 row)

```

This tries to address that by hardening wait_until_metadata_sync. I
believe the reason for this warning is that there is a race condition in
wait_until_metadata_sync. It's possible for the pre-check to fail, then
have the maintenance daemon send a notification. And only then have the
backend start to listen. I tried to fix it in two ways:
1. First run LISTEN, and only then read do the pre-check.
2. If we time out, check again just to make sure that we did not miss
   the notification somehow. And don't show a warning if all metadata is
   synced after the timeout.

It's hard to know for sure that this fixes it because the test is not
repeatable and I could not reproduce it locally. Let's just hope for the
best.

---------

Co-authored-by: Onur Tirtir <onurcantirtir@gmail.com>
---
 src/backend/distributed/test/metadata_sync.c  | 48 +++++++++++--------
 .../expected/multi_cluster_management.out     |  4 +-
 .../regress/sql/multi_cluster_management.sql  |  4 +-
 3 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/src/backend/distributed/test/metadata_sync.c b/src/backend/distributed/test/metadata_sync.c
index 46d2303d6..8ad4b15f2 100644
--- a/src/backend/distributed/test/metadata_sync.c
+++ b/src/backend/distributed/test/metadata_sync.c
@@ -90,6 +90,28 @@ activate_node_snapshot(PG_FUNCTION_ARGS)
 }
 
 
+/*
+ * IsMetadataSynced checks the workers to see if all workers with metadata are
+ * synced.
+ */
+static bool
+IsMetadataSynced(void)
+{
+	List *workerList = ActivePrimaryNonCoordinatorNodeList(NoLock);
+
+	WorkerNode *workerNode = NULL;
+	foreach_ptr(workerNode, workerList)
+	{
+		if (workerNode->hasMetadata && !workerNode->metadataSynced)
+		{
+			return false;
+		}
+	}
+
+	return true;
+}
+
+
 /*
  * wait_until_metadata_sync waits until the maintenance daemon does a metadata
  * sync, or times out.
@@ -99,19 +121,10 @@ wait_until_metadata_sync(PG_FUNCTION_ARGS)
 {
 	uint32 timeout = PG_GETARG_UINT32(0);
 
-	List *workerList = ActivePrimaryNonCoordinatorNodeList(NoLock);
-	bool waitNotifications = false;
-
-	WorkerNode *workerNode = NULL;
-	foreach_ptr(workerNode, workerList)
-	{
-		/* if already has metadata, no need to do it again */
-		if (workerNode->hasMetadata && !workerNode->metadataSynced)
-		{
-			waitNotifications = true;
-			break;
-		}
-	}
+	/* First we start listening. */
+	MultiConnection *connection = GetNodeConnection(FORCE_NEW_CONNECTION,
+													LOCAL_HOST_NAME, PostPortNumber);
+	ExecuteCriticalRemoteCommand(connection, "LISTEN " METADATA_SYNC_CHANNEL);
 
 	/*
 	 * If all the metadata nodes have already been synced, we should not wait.
@@ -119,15 +132,12 @@ wait_until_metadata_sync(PG_FUNCTION_ARGS)
 	 * the notification and we'd wait unnecessarily here. Worse, the test outputs
 	 * might be inconsistent across executions due to the warning.
 	 */
-	if (!waitNotifications)
+	if (IsMetadataSynced())
 	{
+		CloseConnection(connection);
 		PG_RETURN_VOID();
 	}
 
-	MultiConnection *connection = GetNodeConnection(FORCE_NEW_CONNECTION,
-													LOCAL_HOST_NAME, PostPortNumber);
-	ExecuteCriticalRemoteCommand(connection, "LISTEN " METADATA_SYNC_CHANNEL);
-
 	int waitFlags = WL_SOCKET_READABLE | WL_TIMEOUT | WL_POSTMASTER_DEATH;
 	int waitResult = WaitLatchOrSocket(NULL, waitFlags, PQsocket(connection->pgConn),
 									   timeout, 0);
@@ -139,7 +149,7 @@ wait_until_metadata_sync(PG_FUNCTION_ARGS)
 	{
 		ClearResults(connection, true);
 	}
-	else if (waitResult & WL_TIMEOUT)
+	else if (waitResult & WL_TIMEOUT && !IsMetadataSynced())
 	{
 		elog(WARNING, "waiting for metadata sync timed out");
 	}
diff --git a/src/test/regress/expected/multi_cluster_management.out b/src/test/regress/expected/multi_cluster_management.out
index b92d8d136..3eb549ab5 100644
--- a/src/test/regress/expected/multi_cluster_management.out
+++ b/src/test/regress/expected/multi_cluster_management.out
@@ -90,7 +90,7 @@ SELECT citus_disable_node('localhost', :worker_2_port);
 
 (1 row)
 
-SELECT public.wait_until_metadata_sync(60000);
+SELECT public.wait_until_metadata_sync(20000);
  wait_until_metadata_sync
 ---------------------------------------------------------------------
 
@@ -812,7 +812,7 @@ SELECT citus_disable_node('localhost', 9999);
 
 (1 row)
 
-SELECT public.wait_until_metadata_sync(60000);
+SELECT public.wait_until_metadata_sync(20000);
  wait_until_metadata_sync
 ---------------------------------------------------------------------
 
diff --git a/src/test/regress/sql/multi_cluster_management.sql b/src/test/regress/sql/multi_cluster_management.sql
index ab268939f..86fbd15b6 100644
--- a/src/test/regress/sql/multi_cluster_management.sql
+++ b/src/test/regress/sql/multi_cluster_management.sql
@@ -39,7 +39,7 @@ SELECT master_get_active_worker_nodes();
 SELECT 1 FROM master_add_node('localhost', :worker_2_port);
 
 SELECT citus_disable_node('localhost', :worker_2_port);
-SELECT public.wait_until_metadata_sync(60000);
+SELECT public.wait_until_metadata_sync(20000);
 SELECT master_get_active_worker_nodes();
 
 -- add some shard placements to the cluster
@@ -328,7 +328,7 @@ SELECT 1 FROM master_add_inactive_node('localhost', 9996, groupid => :worker_2_g
 SELECT master_add_inactive_node('localhost', 9999, groupid => :worker_2_group, nodecluster => 'olap', noderole => 'secondary');
 SELECT master_activate_node('localhost', 9999);
 SELECT citus_disable_node('localhost', 9999);
-SELECT public.wait_until_metadata_sync(60000);
+SELECT public.wait_until_metadata_sync(20000);
 SELECT master_remove_node('localhost', 9999);
 
 -- check that you can't manually add two primaries to a group

From 2bccb5815770b67cf646a7dc5bb9539d5a29c010 Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <jelte.fennema@microsoft.com>
Date: Wed, 1 Nov 2023 13:12:20 +0100
Subject: [PATCH 02/22] Run github actions on main (#7292)

We want the nice looking green checkmark on our main branch too.

This PR includes running on pushes to release branches too, but that
won't come into effect until we have release branches with this
workflow file.
---
 .github/workflows/build_and_test.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index d285e4f50..d900fe867 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -10,6 +10,10 @@ on:
         required: false
         default: false
         type: boolean
+  push:
+    branches:
+      - "main"
+      - "release-*"
   pull_request:
     types: [opened, reopened,synchronize]
 jobs:

From c83c5567028d2035651c39f737ac5a944a70db16 Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <jelte.fennema@microsoft.com>
Date: Wed, 1 Nov 2023 14:44:45 +0100
Subject: [PATCH 03/22] Fix flaky isolation_master_update_node (#7303)

Sometimes in CI isolation_master_update_node fails like this:

```diff
 ------------------

 (1 row)

 step s2-abort: ABORT;
 step s1-abort: ABORT;
 FATAL:  terminating connection due to administrator command
 FATAL:  terminating connection due to administrator command
 SSL connection has been closed unexpectedly
+server closed the connection unexpectedly

 master_remove_node
 ------------------

```

This just seesm like a random error line. The only way to reasonably fix
this is by adding an extra output file. So that's what this PR does.
---
 .../isolation_master_update_node_1.out        | 68 +++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 src/test/regress/expected/isolation_master_update_node_1.out

diff --git a/src/test/regress/expected/isolation_master_update_node_1.out b/src/test/regress/expected/isolation_master_update_node_1.out
new file mode 100644
index 000000000..474956629
--- /dev/null
+++ b/src/test/regress/expected/isolation_master_update_node_1.out
@@ -0,0 +1,68 @@
+Parsed test spec with 2 sessions
+
+starting permutation: s1-begin s1-insert s2-begin s2-update-node-1 s1-abort s2-abort
+create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+step s1-begin: BEGIN;
+step s1-insert: INSERT INTO t1 SELECT generate_series(1, 100);
+step s2-begin: BEGIN;
+step s2-update-node-1:
+    -- update a specific node by address
+    SELECT master_update_node(nodeid, 'localhost', nodeport + 10)
+      FROM pg_dist_node
+     WHERE nodename = 'localhost'
+       AND nodeport = 57637;
+ <waiting ...>
+step s1-abort: ABORT;
+step s2-update-node-1: <... completed>
+master_update_node
+---------------------------------------------------------------------
+
+(1 row)
+
+step s2-abort: ABORT;
+master_remove_node
+---------------------------------------------------------------------
+
+
+(2 rows)
+
+
+starting permutation: s1-begin s1-insert s2-begin s2-update-node-1-force s2-abort s1-abort
+create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+step s1-begin: BEGIN;
+step s1-insert: INSERT INTO t1 SELECT generate_series(1, 100);
+step s2-begin: BEGIN;
+step s2-update-node-1-force:
+    -- update a specific node by address (force)
+    SELECT master_update_node(nodeid, 'localhost', nodeport + 10, force => true, lock_cooldown => 100)
+      FROM pg_dist_node
+     WHERE nodename = 'localhost'
+       AND nodeport = 57637;
+ <waiting ...>
+step s2-update-node-1-force: <... completed>
+master_update_node
+---------------------------------------------------------------------
+
+(1 row)
+
+step s2-abort: ABORT;
+step s1-abort: ABORT;
+FATAL:  terminating connection due to administrator command
+FATAL:  terminating connection due to administrator command
+SSL connection has been closed unexpectedly
+server closed the connection unexpectedly
+
+master_remove_node
+---------------------------------------------------------------------
+
+
+(2 rows)
+

From c9f2fc892d4ce01a4bc23beb508e2ff03f08a774 Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <jelte.fennema@microsoft.com>
Date: Wed, 1 Nov 2023 15:08:51 +0100
Subject: [PATCH 04/22] Fix flaky failure_split_cleanup (#7299)

Sometimes failure_split_cleanup failed in CI like this:

```diff
 ERROR:  server closed the connection unexpectedly
 CONTEXT:  while executing command on localhost:9060
     SELECT operation_id, object_type, object_name, node_group_id, policy_type
     FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
  operation_id | object_type |                        object_name                        | node_group_id | policy_type
 --------------+-------------+-----------------------------------------------------------+---------------+-------------
           777 |           1 | citus_failure_split_cleanup_schema.table_to_split_8981000 |             1 |           0
-          777 |           1 | citus_failure_split_cleanup_schema.table_to_split_8981002 |             1 |           1
           777 |           1 | citus_failure_split_cleanup_schema.table_to_split_8981002 |             2 |           0
+          777 |           1 | citus_failure_split_cleanup_schema.table_to_split_8981002 |             1 |           1
           777 |           1 | citus_failure_split_cleanup_schema.table_to_split_8981003 |             2 |           1
           777 |           4 | citus_shard_split_publication_1_10_777                    |             2 |           0
 (5 rows)

     -- we need to allow connection so that we can connect to proxy
```

Source:
https://github.com/citusdata/citus/actions/runs/6717642291/attempts/1#summary-18256014949

It's the common problem where we're missing a column in the ORDER BY
clause. This fixes that by adding an node_group_id to the query in
question.
---
 .../regress/expected/failure_split_cleanup.out | 18 +++++++++---------
 src/test/regress/sql/failure_split_cleanup.sql | 16 ++++++++--------
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/src/test/regress/expected/failure_split_cleanup.out b/src/test/regress/expected/failure_split_cleanup.out
index fe646587c..d81335325 100644
--- a/src/test/regress/expected/failure_split_cleanup.out
+++ b/src/test/regress/expected/failure_split_cleanup.out
@@ -277,12 +277,12 @@ CONTEXT:  while executing command on localhost:xxxxx
 ERROR:  connection not open
 CONTEXT:  while executing command on localhost:xxxxx
     SELECT operation_id, object_type, object_name, node_group_id, policy_type
-    FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
+    FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
  operation_id | object_type |                        object_name                        | node_group_id | policy_type
 ---------------------------------------------------------------------
           777 |           1 | citus_failure_split_cleanup_schema.table_to_split_8981000 |             1 |           0
-          777 |           1 | citus_failure_split_cleanup_schema.table_to_split_8981002 |             2 |           0
           777 |           1 | citus_failure_split_cleanup_schema.table_to_split_8981002 |             1 |           1
+          777 |           1 | citus_failure_split_cleanup_schema.table_to_split_8981002 |             2 |           0
           777 |           1 | citus_failure_split_cleanup_schema.table_to_split_8981003 |             2 |           1
           777 |           4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx                    |             2 |           0
           777 |           4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx                    |             2 |           0
@@ -336,7 +336,7 @@ CONTEXT:  while executing command on localhost:xxxxx
 (1 row)
 
     SELECT operation_id, object_type, object_name, node_group_id, policy_type
-    FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
+    FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
  operation_id | object_type | object_name | node_group_id | policy_type
 ---------------------------------------------------------------------
 (0 rows)
@@ -388,7 +388,7 @@ CONTEXT:  while executing command on localhost:xxxxx
 ERROR:  connection not open
 CONTEXT:  while executing command on localhost:xxxxx
     SELECT operation_id, object_type, object_name, node_group_id, policy_type
-    FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
+    FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
  operation_id | object_type |                        object_name                        | node_group_id | policy_type
 ---------------------------------------------------------------------
           777 |           1 | citus_failure_split_cleanup_schema.table_to_split_8981000 |             1 |           0
@@ -455,7 +455,7 @@ CONTEXT:  while executing command on localhost:xxxxx
 (1 row)
 
     SELECT operation_id, object_type, object_name, node_group_id, policy_type
-    FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
+    FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
  operation_id | object_type | object_name | node_group_id | policy_type
 ---------------------------------------------------------------------
 (0 rows)
@@ -507,7 +507,7 @@ CONTEXT:  while executing command on localhost:xxxxx
 ERROR:  connection not open
 CONTEXT:  while executing command on localhost:xxxxx
     SELECT operation_id, object_type, object_name, node_group_id, policy_type
-    FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
+    FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
  operation_id | object_type |                        object_name                        | node_group_id | policy_type
 ---------------------------------------------------------------------
           777 |           1 | citus_failure_split_cleanup_schema.table_to_split_8981000 |             1 |           0
@@ -574,7 +574,7 @@ CONTEXT:  while executing command on localhost:xxxxx
 (1 row)
 
     SELECT operation_id, object_type, object_name, node_group_id, policy_type
-    FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
+    FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
  operation_id | object_type | object_name | node_group_id | policy_type
 ---------------------------------------------------------------------
 (0 rows)
@@ -634,7 +634,7 @@ WARNING:  connection to the remote node localhost:xxxxx failed with the followin
 ERROR:  connection not open
 CONTEXT:  while executing command on localhost:xxxxx
     SELECT operation_id, object_type, object_name, node_group_id, policy_type
-    FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
+    FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
  operation_id | object_type |                        object_name                        | node_group_id | policy_type
 ---------------------------------------------------------------------
           777 |           1 | citus_failure_split_cleanup_schema.table_to_split_8981002 |             1 |           1
@@ -701,7 +701,7 @@ CONTEXT:  while executing command on localhost:xxxxx
 (1 row)
 
     SELECT operation_id, object_type, object_name, node_group_id, policy_type
-    FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
+    FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
  operation_id | object_type | object_name | node_group_id | policy_type
 ---------------------------------------------------------------------
 (0 rows)
diff --git a/src/test/regress/sql/failure_split_cleanup.sql b/src/test/regress/sql/failure_split_cleanup.sql
index 1b85d3d17..9dfbb245e 100644
--- a/src/test/regress/sql/failure_split_cleanup.sql
+++ b/src/test/regress/sql/failure_split_cleanup.sql
@@ -136,7 +136,7 @@ SELECT create_distributed_table('table_to_split', 'id');
         ARRAY[:worker_1_node, :worker_2_node],
         'force_logical');
     SELECT operation_id, object_type, object_name, node_group_id, policy_type
-    FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
+    FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
     -- we need to allow connection so that we can connect to proxy
     SELECT citus.mitmproxy('conn.allow()');
 
@@ -155,7 +155,7 @@ SELECT create_distributed_table('table_to_split', 'id');
     \c - postgres - :master_port
     SELECT public.wait_for_resource_cleanup();
     SELECT operation_id, object_type, object_name, node_group_id, policy_type
-    FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
+    FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
 
     \c - - - :worker_2_proxy_port
     SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog;
@@ -182,7 +182,7 @@ SELECT create_distributed_table('table_to_split', 'id');
         ARRAY[:worker_1_node, :worker_2_node],
         'force_logical');
     SELECT operation_id, object_type, object_name, node_group_id, policy_type
-    FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
+    FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
     -- we need to allow connection so that we can connect to proxy
     SELECT citus.mitmproxy('conn.allow()');
 
@@ -201,7 +201,7 @@ SELECT create_distributed_table('table_to_split', 'id');
     \c - postgres - :master_port
     SELECT public.wait_for_resource_cleanup();
     SELECT operation_id, object_type, object_name, node_group_id, policy_type
-    FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
+    FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
 
     \c - - - :worker_2_proxy_port
     SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog;
@@ -228,7 +228,7 @@ SELECT create_distributed_table('table_to_split', 'id');
         ARRAY[:worker_1_node, :worker_2_node],
         'force_logical');
     SELECT operation_id, object_type, object_name, node_group_id, policy_type
-    FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
+    FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
     -- we need to allow connection so that we can connect to proxy
     SELECT citus.mitmproxy('conn.allow()');
 
@@ -247,7 +247,7 @@ SELECT create_distributed_table('table_to_split', 'id');
     \c - postgres - :master_port
     SELECT public.wait_for_resource_cleanup();
     SELECT operation_id, object_type, object_name, node_group_id, policy_type
-    FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
+    FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
 
     \c - - - :worker_2_proxy_port
     SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog;
@@ -275,7 +275,7 @@ SELECT create_distributed_table('table_to_split', 'id');
         'force_logical');
 
     SELECT operation_id, object_type, object_name, node_group_id, policy_type
-    FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
+    FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
     SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname;
     -- we need to allow connection so that we can connect to proxy
     SELECT citus.mitmproxy('conn.allow()');
@@ -295,7 +295,7 @@ SELECT create_distributed_table('table_to_split', 'id');
     \c - postgres - :master_port
     SELECT public.wait_for_resource_cleanup();
     SELECT operation_id, object_type, object_name, node_group_id, policy_type
-    FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
+    FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
 
     \c - - - :worker_2_proxy_port
     SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog;

From 5903196020ed3f444d70d76dca889bab35c756c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?G=C3=BCrkan=20=C4=B0ndibay?= <gindibay@microsoft.com>
Date: Wed, 1 Nov 2023 18:52:22 +0300
Subject: [PATCH 05/22] Removes use-base-schedule flag from CI (#7301)

Normally, tests which are written non-dependent to other tests can use
minimal-tests and should use as well. However, in our test settings
base-schedule is being used which may cause unnecessary dependencies and
so unrelated errors that developers don't see in their local environment
With this change, default setting will be minimal, so that tests will be
free of unnecessary dependencies.
---
 .github/workflows/build_and_test.yml       | 2 +-
 .github/workflows/flaky_test_debugging.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index d900fe867..804cd0bb9 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -505,7 +505,7 @@ jobs:
         for test in "${tests_array[@]}"
         do
             test_name=$(echo "$test" | sed -r "s/.+\/(.+)\..+/\1/")
-            gosu circleci src/test/regress/citus_tests/run_test.py $test_name --repeat ${{ env.runs }} --use-base-schedule --use-whole-schedule-line
+            gosu circleci src/test/regress/citus_tests/run_test.py $test_name --repeat ${{ env.runs }} --use-whole-schedule-line
         done
       shell: bash
     - uses: "./.github/actions/save_logs_and_results"
diff --git a/.github/workflows/flaky_test_debugging.yml b/.github/workflows/flaky_test_debugging.yml
index a666c1cd5..a744edc3b 100644
--- a/.github/workflows/flaky_test_debugging.yml
+++ b/.github/workflows/flaky_test_debugging.yml
@@ -71,7 +71,7 @@ jobs:
     - uses: "./.github/actions/setup_extension"
     - name: Run minimal tests
       run: |-
-          gosu circleci src/test/regress/citus_tests/run_test.py ${{ env.test }} --repeat ${{ env.runs }} --use-base-schedule --use-whole-schedule-line
+          gosu circleci src/test/regress/citus_tests/run_test.py ${{ env.test }} --repeat ${{ env.runs }} --use-whole-schedule-line
       shell: bash
     - uses: "./.github/actions/save_logs_and_results"
       if: always()

From e3c93c303dec623f6e196ca8f2ca1d1a20c51e6c Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <jelte.fennema@microsoft.com>
Date: Wed, 1 Nov 2023 17:21:12 +0100
Subject: [PATCH 06/22] Fix flaky citus_non_blocking_split_shard_cleanup
 (#7311)

Sometimes in CI citus_non_blocking_split_shard_cleanup failed like this:

```diff
--- /__w/citus/citus/src/test/regress/expected/citus_non_blocking_split_shard_cleanup.out.modified	2023-11-01 15:07:14.280551207 +0000
+++ /__w/citus/citus/src/test/regress/results/citus_non_blocking_split_shard_cleanup.out.modified	2023-11-01 15:07:14.292551358 +0000
@@ -106,21 +106,22 @@
 -----------------------------------

 (1 row)

 \c - - - :worker_2_port
 SET search_path TO "citus_split_test_schema";
 -- Replication slots should be cleaned up
 SELECT slot_name FROM pg_replication_slots;
             slot_name
 ---------------------------------
-(0 rows)
+ citus_shard_split_slot_19_10_17
+(1 row)

 -- Publications should be cleanedup
 SELECT count(*) FROM pg_publication;
  count
```

It's expected that the replication slot is sometimes not cleaned up if
we don't wait until resource cleanup completes. This PR starts doing
that here.
---
 .../expected/citus_non_blocking_split_shard_cleanup.out     | 6 ++++++
 .../regress/sql/citus_non_blocking_split_shard_cleanup.sql  | 2 ++
 2 files changed, 8 insertions(+)

diff --git a/src/test/regress/expected/citus_non_blocking_split_shard_cleanup.out b/src/test/regress/expected/citus_non_blocking_split_shard_cleanup.out
index e2685c2d7..a559ec442 100644
--- a/src/test/regress/expected/citus_non_blocking_split_shard_cleanup.out
+++ b/src/test/regress/expected/citus_non_blocking_split_shard_cleanup.out
@@ -107,6 +107,12 @@ SELECT pg_catalog.citus_split_shard_by_split_points(
 
 (1 row)
 
+SELECT public.wait_for_resource_cleanup();
+ wait_for_resource_cleanup
+---------------------------------------------------------------------
+
+(1 row)
+
 \c - - - :worker_2_port
 SET search_path TO "citus_split_test_schema";
 -- Replication slots should be cleaned up
diff --git a/src/test/regress/sql/citus_non_blocking_split_shard_cleanup.sql b/src/test/regress/sql/citus_non_blocking_split_shard_cleanup.sql
index ba3f95215..480d81b88 100644
--- a/src/test/regress/sql/citus_non_blocking_split_shard_cleanup.sql
+++ b/src/test/regress/sql/citus_non_blocking_split_shard_cleanup.sql
@@ -79,6 +79,8 @@ SELECT pg_catalog.citus_split_shard_by_split_points(
     ARRAY[:worker_2_node, :worker_2_node, :worker_2_node],
     'force_logical');
 
+SELECT public.wait_for_resource_cleanup();
+
 \c - - - :worker_2_port
 SET search_path TO "citus_split_test_schema";
 -- Replication slots should be cleaned up

From 2cf4c0402319a9616e4d0feb4d9273757b3c1eaf Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Thu, 2 Nov 2023 01:59:41 +0300
Subject: [PATCH 07/22] Fix flaky global_cancel.sql test (#7316)

---
 src/test/regress/expected/global_cancel.out | 10 ++++++++--
 src/test/regress/sql/global_cancel.sql      |  6 ++++--
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/test/regress/expected/global_cancel.out b/src/test/regress/expected/global_cancel.out
index 5adeef3c8..e5ce4fbc6 100644
--- a/src/test/regress/expected/global_cancel.out
+++ b/src/test/regress/expected/global_cancel.out
@@ -9,9 +9,14 @@ SELECT 1 FROM master_add_node('localhost', :master_port, groupid => 0);
 RESET client_min_messages;
 -- Kill maintenance daemon so it gets restarted and gets a gpid containing our
 -- nodeid
-SELECT pg_terminate_backend(pid)
+SELECT COUNT(pg_terminate_backend(pid)) >= 0
 FROM pg_stat_activity
-WHERE application_name = 'Citus Maintenance Daemon' \gset
+WHERE application_name = 'Citus Maintenance Daemon';
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
 -- reconnect to make sure we get a session with the gpid containing our nodeid
 \c - - - -
 CREATE SCHEMA global_cancel;
@@ -77,6 +82,7 @@ ERROR:  must be a superuser to terminate superuser process
 SELECT pg_cancel_backend(citus_backend_gpid());
 ERROR:  canceling statement due to user request
 \c - postgres - :master_port
+DROP USER global_cancel_user;
 SET client_min_messages TO DEBUG;
 -- 10000000000 is the node id multiplier for global pid
 SELECT pg_cancel_backend(10000000000 * citus_coordinator_nodeid() + 0);
diff --git a/src/test/regress/sql/global_cancel.sql b/src/test/regress/sql/global_cancel.sql
index 848c3b01a..12330baf2 100644
--- a/src/test/regress/sql/global_cancel.sql
+++ b/src/test/regress/sql/global_cancel.sql
@@ -5,9 +5,9 @@ RESET client_min_messages;
 
 -- Kill maintenance daemon so it gets restarted and gets a gpid containing our
 -- nodeid
-SELECT pg_terminate_backend(pid)
+SELECT COUNT(pg_terminate_backend(pid)) >= 0
 FROM pg_stat_activity
-WHERE application_name = 'Citus Maintenance Daemon' \gset
+WHERE application_name = 'Citus Maintenance Daemon';
 
 -- reconnect to make sure we get a session with the gpid containing our nodeid
 \c - - - -
@@ -58,6 +58,8 @@ SELECT pg_cancel_backend(citus_backend_gpid());
 
 \c - postgres - :master_port
 
+DROP USER global_cancel_user;
+
 SET client_min_messages TO DEBUG;
 
 -- 10000000000 is the node id multiplier for global pid

From ea5551689ef90b3a9a0c51349c22fdeaae34a20a Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <jelte.fennema@microsoft.com>
Date: Thu, 2 Nov 2023 09:23:34 +0100
Subject: [PATCH 08/22] Prepare github actions pipelines for merge queue
 (#7315)

Github has a built in merge queue. I think it would be good to try this
out, to speed up merging PRs when multiple people want to merge at the
same time. This PR does not enable it yet, but it starts triggering
Github actions also for the `merge_queue` event. This is a requirement
for trying them out.

Announcment:
https://github.blog/2023-07-12-github-merge-queue-is-generally-available/
Docs:
https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/configuring-pull-request-merges/managing-a-merge-queue
---
 .github/workflows/build_and_test.yml           | 1 +
 .github/workflows/packaging-test-pipelines.yml | 1 +
 2 files changed, 2 insertions(+)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 804cd0bb9..f80e42f6d 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -16,6 +16,7 @@ on:
       - "release-*"
   pull_request:
     types: [opened, reopened,synchronize]
+  merge_group:
 jobs:
   # Since GHA does not interpolate env varibles in matrix context, we need to
   # define them in a separate job and use them in other jobs.
diff --git a/.github/workflows/packaging-test-pipelines.yml b/.github/workflows/packaging-test-pipelines.yml
index 8690fce1f..51bd82503 100644
--- a/.github/workflows/packaging-test-pipelines.yml
+++ b/.github/workflows/packaging-test-pipelines.yml
@@ -3,6 +3,7 @@ name: Build tests in packaging images
 on:
   pull_request:
     types: [opened, reopened,synchronize]
+  merge_group:
 
   workflow_dispatch:
 

From a6e86884f6aa526476b6a0586848c801db29cc66 Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <jelte.fennema@microsoft.com>
Date: Thu, 2 Nov 2023 10:39:05 +0100
Subject: [PATCH 09/22] Fix flaky isolation_metadata_sync_deadlock (#7312)

Sometimes isolation_metadata_sync_deadlock fails in CI like this:

```diff
diff -dU10 -w /__w/citus/citus/src/test/regress/expected/isolation_metadata_sync_deadlock.out /__w/citus/citus/src/test/regress/results/isolation_metadata_sync_deadlock.out
--- /__w/citus/citus/src/test/regress/expected/isolation_metadata_sync_deadlock.out.modified	2023-11-01 16:03:15.090199229 +0000
+++ /__w/citus/citus/src/test/regress/results/isolation_metadata_sync_deadlock.out.modified	2023-11-01 16:03:15.098199312 +0000
@@ -110,10 +110,14 @@
 t
 (1 row)

 step s2-stop-connection:
  SELECT stop_session_level_connection_to_node();

 stop_session_level_connection_to_node
 -------------------------------------

 (1 row)
+
+teardown failed: ERROR:  localhost:57638 is a metadata node, but is out of sync
+HINT:  If the node is up, wait until metadata gets synced to it and try again.
+CONTEXT:  SQL statement "SELECT master_remove_distributed_table_metadata_from_workers(v_obj.objid, v_obj.schema_name, v_obj.object_name)"
```

Source:
https://github.com/citusdata/citus/actions/runs/6721938040/attempts/1#summary-18268946448

To fix this we now wait for the metadata to be fully synced to all
nodes at the start of the teardown steps.
---
 src/test/regress/spec/isolation_metadata_sync_deadlock.spec | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/test/regress/spec/isolation_metadata_sync_deadlock.spec b/src/test/regress/spec/isolation_metadata_sync_deadlock.spec
index 67c20a2b2..411faf889 100644
--- a/src/test/regress/spec/isolation_metadata_sync_deadlock.spec
+++ b/src/test/regress/spec/isolation_metadata_sync_deadlock.spec
@@ -22,6 +22,7 @@ setup
 
 teardown
 {
+  SELECT wait_until_metadata_sync();
   DROP FUNCTION trigger_metadata_sync();
   DROP TABLE deadlock_detection_test;
   DROP TABLE t2;

From 184c8fc1eeeabd1e5de40766e463caff5b142dcc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?G=C3=BCrkan=20=C4=B0ndibay?= <gindibay@microsoft.com>
Date: Thu, 2 Nov 2023 12:59:34 +0300
Subject: [PATCH 10/22] Enriches statement propagation document (#7267)

Co-authored-by: Onur Tirtir <onurcantirtir@gmail.com>
Co-authored-by: Hanefi Onaldi <Hanefi.Onaldi@microsoft.com>
Co-authored-by: Jelte Fennema-Nio <jelte.fennema@microsoft.com>
---
 src/backend/distributed/README.md | 64 ++++++++++++++++++++++++++++++-
 1 file changed, 62 insertions(+), 2 deletions(-)

diff --git a/src/backend/distributed/README.md b/src/backend/distributed/README.md
index 7c4f43add..225b1f962 100644
--- a/src/backend/distributed/README.md
+++ b/src/backend/distributed/README.md
@@ -1723,11 +1723,11 @@ Merge command the same principles as INSERT .. SELECT processing. However, due t
 
 # DDL
 
-DDL commands are primarily handled via the ProcessUtility hook, which gets the parse tree of the DDL command. For supported DDL commands, we always follow the same sequence of steps:
+DDL commands are primarily handled via the citus_ProcessUtility hook, which gets the parse tree of the DDL command. For supported DDL commands, we always follow the same sequence of steps:
 
 1. Qualify the table names in the parse tree (simplifies deparsing, avoids sensitivity to search_path changes)
 2. Pre-process logic
-3. Call original ProcessUtility to execute the command on the local shell table
+3. Call original previous ProcessUtility to execute the command on the local shell table
 4. Post-process logic
 5. Execute command on all other nodes
 6. Execute command on shards (in case of table DDL)
@@ -1749,6 +1749,66 @@ The reason for handling dependencies and deparsing in post-process step is that
 
 Not all table DDL is currently deparsed. In that case, the original command sent by the client is used. That is a shortcoming in our DDL logic that causes user-facing issues and should be addressed. We do not directly construct a separate DDL command for each shard. Instead, we call the `worker_apply_shard_ddl_command(shardid bigint, ddl_command text)` function which parses the DDL command, replaces the table names with shard names in the parse tree according to the shard ID, and then executes the command. That also has some shortcomings, because we cannot support more complex DDL commands in this manner (e.g. adding multiple foreign keys). Ideally, all DDL would be deparsed, and for table DDL the deparsed query string would have shard names, similar to regular queries.
 
+`markDistributed` is used to indicate whether we add a record to `pg_dist_object` to mark the object as "distributed".
+
+## Defining a new DDL command
+
+All commands that are propagated by Citus should be defined in DistributeObjectOps struct. Below is a sample DistributeObjectOps for ALTER DATABASE command that is defined in [distribute_object_ops.c](commands/distribute_object_ops.c) file.
+
+```c
+static DistributeObjectOps Database_Alter = {
+	.deparse = DeparseAlterDatabaseStmt,
+	.qualify = NULL,
+	.preprocess = PreprocessAlterDatabaseStmt,
+	.postprocess = NULL,
+	.objectType = OBJECT_DATABASE,
+	.operationType = DIST_OPS_ALTER,
+	.address = NULL,
+	.markDistributed = false,
+};
+```
+
+Each field in the struct is documented in the comments within the `DistributeObjectOps`. When defining a new DDL command, follow these guidelines:
+
+- **Returning tasks for `preprocess` and `postprocess`**: Ensure that either `preprocess` or `postprocess` returns a list of "DDLJob"s. If both functions return non-empty lists, then you would get an assertion failure.
+
+- **Generic `preprocess` and `postprocess` methods**: The generic methods, `PreprocessAlterDistributedObjectStmt` and `PostprocessAlterDistributedObjectStmt`, serve as generic pre and post methods utilized for various statements. Both of these methods find application in distributed object operations.
+
+  - The `PreprocessAlterDistributedObjectStmt` method carries out the following operations:
+    - Performs a qualification operation.
+    - Deparses the statement and generates a task list.
+
+  - As for the `PostprocessAlterDistributedObjectStmt` method, it:
+    - Invokes the `EnsureAllObjectDependenciesExistOnAllNodes` function to propagate missing dependencies, both on the coordinator and the worker.
+
+  - Before defining new `preprocess` or `postprocess` methods, it is advisable to assess whether the generic methods can be employed in your specific case.
+
+
+- **`deparse`**: When propagating the command to worker nodes, make sure to define `deparse`. This is necessary because it generates a query string for each worker node.
+
+- **`markDistributed`**: Set this flag to true if you want to add a record to the `pg_dist_object` table. This is particularly important for `CREATE` statements when introducing a new object to the system.
+
+- **`address`**: If `markDistributed` is set to true, you must define the `address`. Failure to do so will result in a runtime error. The `address` is required to identify the fields that will be stored in the `pg_dist_object` table.
+
+- **`markDistributed` usage in `DROP` Statements**: Please note that `markDistributed` does not apply to `DROP` statements. For `DROP` statements, instead you need to call `UnmarkObjectDistributed()` for the object either in `preprocess` or `postprocess`. Otherwise, state records in ``pg_dist_object`` table will cause errors in UDF calls such as ``citus_add_node()``, which will try to copy the non-existent db object.
+
+- **`qualify`**: The `qualify` function is used to qualify the objects based on their schemas in the parse tree. It is employed to prevent sensitivity to changes in the `search_path` on worker nodes. Note that it is not mandatory to define this function for all DDL commands. It is only required for commands that involve objects that are bound to schemas, such as; tables, types, functions and so on.
+
+After defining the `DistributeObjectOps` structure, this structure should be implemented in the `GetDistributeObjectOps()` function as shown below:
+
+```c
+// Example implementation in C code
+const DistributeObjectOps *
+GetDistributeObjectOps(Node *node)
+{
+	switch (nodeTag(node))
+	{
+		case T_AlterDatabaseStmt:
+		{
+			return &Database_Alter;
+		}
+...
+```
 
 ## Object & dependency propagation
 

From 9867c5b949d5a2b9dd7183a09df722336cc73db6 Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Thu, 2 Nov 2023 14:02:34 +0300
Subject: [PATCH 11/22] Fix flaky multi_mx_node_metadata.sql test (#7317)

Fixes the flaky test that results in following diff:
```diff
--- /__w/citus/citus/src/test/regress/expected/multi_mx_node_metadata.out.modified	2023-11-01 14:22:12.890476575 +0000
+++ /__w/citus/citus/src/test/regress/results/multi_mx_node_metadata.out.modified	2023-11-01 14:22:12.914476657 +0000
@@ -840,24 +840,26 @@
 (1 row)

 \c :datname - - :master_port
 SELECT datname FROM pg_stat_activity WHERE application_name LIKE 'Citus Met%';
   datname
 ------------
  db_to_drop
 (1 row)

 DROP DATABASE db_to_drop;
+ERROR:  database "db_to_drop" is being accessed by other users
 SELECT datname FROM pg_stat_activity WHERE application_name LIKE 'Citus Met%';
   datname
 ------------
-(0 rows)
+ db_to_drop
+(1 row)

 -- cleanup
 DROP SEQUENCE sequence CASCADE;
 NOTICE:  drop cascades to default value for column a of table reference_table
```
---
 src/test/regress/citus_tests/run_test.py      |  8 +++++++
 .../expected/multi_mx_node_metadata.out       | 21 ++++++++++++++++---
 .../regress/sql/multi_mx_node_metadata.sql    | 21 ++++++++++++++++---
 3 files changed, 44 insertions(+), 6 deletions(-)

diff --git a/src/test/regress/citus_tests/run_test.py b/src/test/regress/citus_tests/run_test.py
index b28341e5c..a3bdf368d 100755
--- a/src/test/regress/citus_tests/run_test.py
+++ b/src/test/regress/citus_tests/run_test.py
@@ -176,6 +176,14 @@ DEPS = {
     "grant_on_schema_propagation": TestDeps("minimal_schedule"),
     "propagate_extension_commands": TestDeps("minimal_schedule"),
     "multi_size_queries": TestDeps("base_schedule", ["multi_copy"]),
+    "multi_mx_node_metadata": TestDeps(
+        None,
+        [
+            "multi_extension",
+            "multi_test_helpers",
+            "multi_test_helpers_superuser",
+        ],
+    ),
 }
 
 
diff --git a/src/test/regress/expected/multi_mx_node_metadata.out b/src/test/regress/expected/multi_mx_node_metadata.out
index 707dcc472..6a152b515 100644
--- a/src/test/regress/expected/multi_mx_node_metadata.out
+++ b/src/test/regress/expected/multi_mx_node_metadata.out
@@ -9,7 +9,7 @@ SET citus.shard_count TO 8;
 SET citus.shard_replication_factor TO 1;
 \set VERBOSITY terse
 -- Simulates a readonly node by setting default_transaction_read_only.
-CREATE FUNCTION mark_node_readonly(hostname TEXT, port INTEGER, isreadonly BOOLEAN)
+CREATE OR REPLACE FUNCTION mark_node_readonly(hostname TEXT, port INTEGER, isreadonly BOOLEAN)
     RETURNS TEXT
     LANGUAGE sql
     AS $$
@@ -27,7 +27,7 @@ CREATE OR REPLACE FUNCTION raise_error_in_metadata_sync()
     RETURNS void
     LANGUAGE C STRICT
     AS 'citus';
-CREATE PROCEDURE wait_until_process_count(appname text, target_count int) AS $$
+CREATE OR REPLACE PROCEDURE wait_until_process_count(appname text, target_count int) AS $$
 declare
    counter integer := -1;
 begin
@@ -846,7 +846,22 @@ SELECT datname FROM pg_stat_activity WHERE application_name LIKE 'Citus Met%';
  db_to_drop
 (1 row)
 
-DROP DATABASE db_to_drop;
+DO $$
+DECLARE
+    i int := 0;
+BEGIN
+  WHILE NOT (SELECT bool_and(success) from run_command_on_all_nodes('DROP DATABASE IF EXISTS db_to_drop'))
+  LOOP
+    BEGIN
+      i := i + 1;
+      IF i > 5 THEN
+          RAISE EXCEPTION 'DROP DATABASE timed out';
+      END IF;
+      PERFORM pg_sleep(1);
+    END;
+  END LOOP;
+END;
+$$;
 SELECT datname FROM pg_stat_activity WHERE application_name LIKE 'Citus Met%';
  datname
 ---------------------------------------------------------------------
diff --git a/src/test/regress/sql/multi_mx_node_metadata.sql b/src/test/regress/sql/multi_mx_node_metadata.sql
index 45b4edae1..e0d765a20 100644
--- a/src/test/regress/sql/multi_mx_node_metadata.sql
+++ b/src/test/regress/sql/multi_mx_node_metadata.sql
@@ -14,7 +14,7 @@ SET citus.shard_replication_factor TO 1;
 \set VERBOSITY terse
 
 -- Simulates a readonly node by setting default_transaction_read_only.
-CREATE FUNCTION mark_node_readonly(hostname TEXT, port INTEGER, isreadonly BOOLEAN)
+CREATE OR REPLACE FUNCTION mark_node_readonly(hostname TEXT, port INTEGER, isreadonly BOOLEAN)
     RETURNS TEXT
     LANGUAGE sql
     AS $$
@@ -35,7 +35,7 @@ CREATE OR REPLACE FUNCTION raise_error_in_metadata_sync()
     LANGUAGE C STRICT
     AS 'citus';
 
-CREATE PROCEDURE wait_until_process_count(appname text, target_count int) AS $$
+CREATE OR REPLACE PROCEDURE wait_until_process_count(appname text, target_count int) AS $$
 declare
    counter integer := -1;
 begin
@@ -378,7 +378,22 @@ SELECT trigger_metadata_sync();
 
 SELECT datname FROM pg_stat_activity WHERE application_name LIKE 'Citus Met%';
 
-DROP DATABASE db_to_drop;
+DO $$
+DECLARE
+    i int := 0;
+BEGIN
+  WHILE NOT (SELECT bool_and(success) from run_command_on_all_nodes('DROP DATABASE IF EXISTS db_to_drop'))
+  LOOP
+    BEGIN
+      i := i + 1;
+      IF i > 5 THEN
+          RAISE EXCEPTION 'DROP DATABASE timed out';
+      END IF;
+      PERFORM pg_sleep(1);
+    END;
+  END LOOP;
+END;
+$$;
 
 SELECT datname FROM pg_stat_activity WHERE application_name LIKE 'Citus Met%';
 

From 6fed82609c432ffc8d66a3b5967a66dc91d50439 Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <jelte.fennema@microsoft.com>
Date: Thu, 2 Nov 2023 13:13:29 +0100
Subject: [PATCH 12/22] Do not download all artifacts for flaky test detection
 (#7320)

This is causing 404 failures due to a race condition:
https://github.com/actions/toolkit/issues/1235

It also makes the tests take unnecessarily long.

This was tested by changing a test file and seeing that the flaky test
detection was still working.
---
 .github/workflows/build_and_test.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index f80e42f6d..e938e3904 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -497,7 +497,6 @@ jobs:
       matrix: ${{ fromJson(needs.prepare_parallelization_matrix_32.outputs.json) }}
     steps:
     - uses: actions/checkout@v3.5.0
-    - uses: actions/download-artifact@v3.0.1
     - uses: "./.github/actions/setup_extension"
     - name: Run minimal tests
       run: |-

From 5a48a1602e6a42b1f6261c50c726d02339f153f0 Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <jelte.fennema@microsoft.com>
Date: Thu, 2 Nov 2023 13:15:02 +0100
Subject: [PATCH 13/22] Debug flaky logical_replication test (#7309)

Sometimes in CI our logical_replication test fails like this:

```diff
+++ /__w/citus/citus/src/test/regress/results/logical_replication.out.modified	2023-11-01 14:15:08.562758546 +0000
@@ -40,21 +40,21 @@

 SELECT count(*) from pg_publication;
  count
 -------
      0
 (1 row)

 SELECT count(*) from pg_replication_slots;
  count
 -------
-     0
+     1
 (1 row)

 SELECT count(*) FROM dist;
  count
 -------
```

It's hard to understand what is going on here, just based on the wrong
number. So this PR changes the test to show the name of the
subscription, publication and replication slot to make finding the cause
easier.

In passing this also fixes another flaky test in the same file that our
flaky test detection picked up. This is done by waiting for resource
cleanup after the shard move.
---
 .../regress/expected/logical_replication.out  | 113 +++++++++---------
 src/test/regress/sql/logical_replication.sql  |  32 ++---
 2 files changed, 72 insertions(+), 73 deletions(-)

diff --git a/src/test/regress/expected/logical_replication.out b/src/test/regress/expected/logical_replication.out
index 8a3e96da9..b5a36125a 100644
--- a/src/test/regress/expected/logical_replication.out
+++ b/src/test/regress/expected/logical_replication.out
@@ -32,23 +32,21 @@ CREATE SUBSCRIPTION citus_shard_move_subscription_:postgres_oid
     PUBLICATION citus_shard_move_publication_:postgres_oid
     WITH (enabled=false, slot_name=citus_shard_move_slot_:postgres_oid);
 NOTICE:  created replication slot "citus_shard_move_slot_10" on publisher
-SELECT count(*) from pg_subscription;
- count
+SELECT subname from pg_subscription;
+             subname
 ---------------------------------------------------------------------
-     1
+ citus_shard_move_subscription_10
 (1 row)
 
-SELECT count(*) from pg_publication;
- count
+SELECT pubname from pg_publication;
+ pubname
 ---------------------------------------------------------------------
-     0
-(1 row)
+(0 rows)
 
-SELECT count(*) from pg_replication_slots;
- count
+SELECT slot_name from pg_replication_slots;
+ slot_name
 ---------------------------------------------------------------------
-     0
-(1 row)
+(0 rows)
 
 SELECT count(*) FROM dist;
  count
@@ -58,22 +56,21 @@ SELECT count(*) FROM dist;
 
 \c - - - :worker_1_port
 SET search_path TO logical_replication;
-SELECT count(*) from pg_subscription;
- count
+SELECT subname from pg_subscription;
+ subname
 ---------------------------------------------------------------------
-     0
+(0 rows)
+
+SELECT pubname from pg_publication;
+             pubname
+---------------------------------------------------------------------
+ citus_shard_move_publication_10
 (1 row)
 
-SELECT count(*) from pg_publication;
- count
+SELECT slot_name from pg_replication_slots;
+        slot_name
 ---------------------------------------------------------------------
-     1
-(1 row)
-
-SELECT count(*) from pg_replication_slots;
- count
----------------------------------------------------------------------
-     1
+ citus_shard_move_slot_10
 (1 row)
 
 SELECT count(*) FROM dist;
@@ -90,25 +87,29 @@ select citus_move_shard_placement(6830002, 'localhost', :worker_1_port, 'localho
 
 (1 row)
 
+SELECT public.wait_for_resource_cleanup();
+ wait_for_resource_cleanup
+---------------------------------------------------------------------
+
+(1 row)
+
 -- the subscription is still there, as there is no cleanup record for it
 -- we have created it manually
-SELECT count(*) from pg_subscription;
- count
+SELECT subname from pg_subscription;
+             subname
 ---------------------------------------------------------------------
-     1
+ citus_shard_move_subscription_10
 (1 row)
 
-SELECT count(*) from pg_publication;
- count
+SELECT pubname from pg_publication;
+ pubname
 ---------------------------------------------------------------------
-     0
-(1 row)
+(0 rows)
 
-SELECT count(*) from pg_replication_slots;
- count
+SELECT slot_name from pg_replication_slots;
+ slot_name
 ---------------------------------------------------------------------
-     0
-(1 row)
+(0 rows)
 
 SELECT count(*) from dist;
  count
@@ -120,22 +121,21 @@ SELECT count(*) from dist;
 SET search_path TO logical_replication;
 -- the publication and repslot are still there, as there are no cleanup records for them
 -- we have created them manually
-SELECT count(*) from pg_subscription;
- count
+SELECT subname from pg_subscription;
+ subname
 ---------------------------------------------------------------------
-     0
+(0 rows)
+
+SELECT pubname from pg_publication;
+             pubname
+---------------------------------------------------------------------
+ citus_shard_move_publication_10
 (1 row)
 
-SELECT count(*) from pg_publication;
- count
+SELECT slot_name from pg_replication_slots;
+        slot_name
 ---------------------------------------------------------------------
-     1
-(1 row)
-
-SELECT count(*) from pg_replication_slots;
- count
----------------------------------------------------------------------
-     1
+ citus_shard_move_slot_10
 (1 row)
 
 SELECT count(*) from dist;
@@ -153,23 +153,20 @@ SELECT pg_drop_replication_slot('citus_shard_move_slot_' || :postgres_oid);
 
 \c - - - :worker_2_port
 SET search_path TO logical_replication;
-SELECT count(*) from pg_subscription;
- count
+SELECT subname from pg_subscription;
+ subname
 ---------------------------------------------------------------------
-     0
-(1 row)
+(0 rows)
 
-SELECT count(*) from pg_publication;
- count
+SELECT pubname from pg_publication;
+ pubname
 ---------------------------------------------------------------------
-     0
-(1 row)
+(0 rows)
 
-SELECT count(*) from pg_replication_slots;
- count
+SELECT slot_name from pg_replication_slots;
+ slot_name
 ---------------------------------------------------------------------
-     0
-(1 row)
+(0 rows)
 
 SELECT count(*) from dist;
  count
diff --git a/src/test/regress/sql/logical_replication.sql b/src/test/regress/sql/logical_replication.sql
index 3f8e048ca..a85c70b08 100644
--- a/src/test/regress/sql/logical_replication.sql
+++ b/src/test/regress/sql/logical_replication.sql
@@ -35,17 +35,17 @@ CREATE SUBSCRIPTION citus_shard_move_subscription_:postgres_oid
     WITH (enabled=false, slot_name=citus_shard_move_slot_:postgres_oid);
 
 
-SELECT count(*) from pg_subscription;
-SELECT count(*) from pg_publication;
-SELECT count(*) from pg_replication_slots;
+SELECT subname from pg_subscription;
+SELECT pubname from pg_publication;
+SELECT slot_name from pg_replication_slots;
 SELECT count(*) FROM dist;
 
 \c - - - :worker_1_port
 SET search_path TO logical_replication;
 
-SELECT count(*) from pg_subscription;
-SELECT count(*) from pg_publication;
-SELECT count(*) from pg_replication_slots;
+SELECT subname from pg_subscription;
+SELECT pubname from pg_publication;
+SELECT slot_name from pg_replication_slots;
 SELECT count(*) FROM dist;
 
 \c - - - :master_port
@@ -53,11 +53,13 @@ SET search_path TO logical_replication;
 
 select citus_move_shard_placement(6830002, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'force_logical');
 
+SELECT public.wait_for_resource_cleanup();
+
 -- the subscription is still there, as there is no cleanup record for it
 -- we have created it manually
-SELECT count(*) from pg_subscription;
-SELECT count(*) from pg_publication;
-SELECT count(*) from pg_replication_slots;
+SELECT subname from pg_subscription;
+SELECT pubname from pg_publication;
+SELECT slot_name from pg_replication_slots;
 SELECT count(*) from dist;
 
 \c - - - :worker_1_port
@@ -65,9 +67,9 @@ SET search_path TO logical_replication;
 
 -- the publication and repslot are still there, as there are no cleanup records for them
 -- we have created them manually
-SELECT count(*) from pg_subscription;
-SELECT count(*) from pg_publication;
-SELECT count(*) from pg_replication_slots;
+SELECT subname from pg_subscription;
+SELECT pubname from pg_publication;
+SELECT slot_name from pg_replication_slots;
 SELECT count(*) from dist;
 
 DROP PUBLICATION citus_shard_move_publication_:postgres_oid;
@@ -76,9 +78,9 @@ SELECT pg_drop_replication_slot('citus_shard_move_slot_' || :postgres_oid);
 \c - - - :worker_2_port
 SET search_path TO logical_replication;
 
-SELECT count(*) from pg_subscription;
-SELECT count(*) from pg_publication;
-SELECT count(*) from pg_replication_slots;
+SELECT subname from pg_subscription;
+SELECT pubname from pg_publication;
+SELECT slot_name from pg_replication_slots;
 SELECT count(*) from dist;
 
 \c - - - :master_port

From 0678a2fd895670e0dd0a3b594915144d468f20e4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Villemain?= <Cedric.Villemain@Data-Bene.io>
Date: Thu, 2 Nov 2023 13:15:24 +0100
Subject: [PATCH 14/22] Fix #7242, CALL(@0) crash backend (#7288)

When executing a prepared CALL, which is not pure SQL but available with
some drivers like npgsql and jpgdbc, Citus entered a code path where a
plan is not defined, while trying to increase its cost. Thus SIG11 when
plan is a NULL pointer.

Fix by only increasing plan cost when plan is not null.

However, it is a bit suspicious to get here with a NULL plan and maybe a
better change will be to not call
ShardPlacementForFunctionColocatedWithDistTable() with a NULL plan at
all (in call.c:134)

bug hit with for example:
```
CallableStatement proc = con.prepareCall("{CALL p(?)}");
proc.registerOutParameter(1, java.sql.Types.BIGINT);
proc.setInt(1, -100);
proc.execute();
```

where `p(bigint)` is a distributed "function" and the param the
distribution key (also in a distributed table), see #7242 for details

Fixes #7242
---
 .../distributed/planner/distributed_planner.c |  1 +
 .../planner/function_call_delegation.c        | 12 ++++++--
 src/test/regress/citus_tests/common.py        |  8 +++++
 .../test/test_prepared_statements.py          | 30 +++++++++++++++++++
 4 files changed, 49 insertions(+), 2 deletions(-)
 create mode 100644 src/test/regress/citus_tests/test/test_prepared_statements.py

diff --git a/src/backend/distributed/planner/distributed_planner.c b/src/backend/distributed/planner/distributed_planner.c
index 65278d1ea..4f7612f8f 100644
--- a/src/backend/distributed/planner/distributed_planner.c
+++ b/src/backend/distributed/planner/distributed_planner.c
@@ -702,6 +702,7 @@ DissuadePlannerFromUsingPlan(PlannedStmt *plan)
 	 * Arbitrarily high cost, but low enough that it can be added up
 	 * without overflowing by choose_custom_plan().
 	 */
+	Assert(plan != NULL);
 	plan->planTree->total_cost = FLT_MAX / 100000000;
 }
 
diff --git a/src/backend/distributed/planner/function_call_delegation.c b/src/backend/distributed/planner/function_call_delegation.c
index 2f8da29c0..ce9c818d7 100644
--- a/src/backend/distributed/planner/function_call_delegation.c
+++ b/src/backend/distributed/planner/function_call_delegation.c
@@ -525,8 +525,16 @@ ShardPlacementForFunctionColocatedWithDistTable(DistObjectCacheEntry *procedure,
 
 		if (partitionParam->paramkind == PARAM_EXTERN)
 		{
-			/* Don't log a message, we should end up here again without a parameter */
-			DissuadePlannerFromUsingPlan(plan);
+			/*
+			 * Don't log a message, we should end up here again without a
+			 * parameter.
+			 * Note that "plan" can be null, for example when a CALL statement
+			 * is prepared.
+			 */
+			if (plan)
+			{
+				DissuadePlannerFromUsingPlan(plan);
+			}
 			return NULL;
 		}
 	}
diff --git a/src/test/regress/citus_tests/common.py b/src/test/regress/citus_tests/common.py
index 53c9c7944..40c727189 100644
--- a/src/test/regress/citus_tests/common.py
+++ b/src/test/regress/citus_tests/common.py
@@ -581,6 +581,14 @@ class QueryRunner(ABC):
         with self.cur(**kwargs) as cur:
             cur.execute(query, params=params)
 
+    def sql_prepared(self, query, params=None, **kwargs):
+        """Run an SQL query, with prepare=True
+
+        This opens a new connection and closes it once the query is done
+        """
+        with self.cur(**kwargs) as cur:
+            cur.execute(query, params=params, prepare=True)
+
     def sql_row(self, query, params=None, allow_empty_result=False, **kwargs):
         """Run an SQL query that returns a single row and returns this row
 
diff --git a/src/test/regress/citus_tests/test/test_prepared_statements.py b/src/test/regress/citus_tests/test/test_prepared_statements.py
new file mode 100644
index 000000000..761ecc30c
--- /dev/null
+++ b/src/test/regress/citus_tests/test/test_prepared_statements.py
@@ -0,0 +1,30 @@
+def test_call_param(cluster):
+    # create a distributed table and an associated distributed procedure
+    # to ensure parameterized CALL succeed, even when the param is the
+    # distribution key.
+    coord = cluster.coordinator
+    coord.sql("CREATE TABLE test(i int)")
+    coord.sql(
+        """
+        CREATE PROCEDURE p(_i INT) LANGUAGE plpgsql AS $$
+        BEGIN
+        INSERT INTO test(i) VALUES (_i);
+        END; $$
+        """
+    )
+    sql = "CALL p(%s)"
+
+    # prepare/exec before distributing
+    coord.sql_prepared(sql, (1,))
+
+    coord.sql("SELECT create_distributed_table('test', 'i')")
+    coord.sql(
+        "SELECT create_distributed_function('p(int)', distribution_arg_name := '_i', colocate_with := 'test')"
+    )
+
+    # prepare/exec after distribution
+    coord.sql_prepared(sql, (2,))
+
+    sum_i = coord.sql_value("select sum(i) from test;")
+
+    assert sum_i == 3

From b47c8b3fb077ea7eb2047801cceb47ebb61539fa Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <jelte.fennema@microsoft.com>
Date: Thu, 2 Nov 2023 13:15:43 +0100
Subject: [PATCH 15/22] Fix flaky insert_select_connection_leak (#7302)

Sometimes in CI insert_select_connection_leak would fail like this:

```diff
 END;
 SELECT worker_connection_count(:worker_1_port) - :pre_xact_worker_1_connections AS leaked_worker_1_connections,
        worker_connection_count(:worker_2_port) - :pre_xact_worker_2_connections AS leaked_worker_2_connections;
  leaked_worker_1_connections | leaked_worker_2_connections
 -----------------------------+-----------------------------
-                           0 |                           0
+                          -1 |                           0
 (1 row)

 -- ROLLBACK
 BEGIN;
 INSERT INTO target_table SELECT * FROM source_table;
 INSERT INTO target_table SELECT * FROM source_table;
 ROLLBACK;
 SELECT worker_connection_count(:worker_1_port) - :pre_xact_worker_1_connections AS leaked_worker_1_connections,
        worker_connection_count(:worker_2_port) - :pre_xact_worker_2_connections AS leaked_worker_2_connections;
  leaked_worker_1_connections | leaked_worker_2_connections
 -----------------------------+-----------------------------
-                           0 |                           0
+                          -1 |                           0
 (1 row)

 \set VERBOSITY TERSE
 -- Error on constraint failure
 BEGIN;
 INSERT INTO target_table SELECT * FROM source_table;
 SELECT worker_connection_count(:worker_1_port) AS worker_1_connections,
        worker_connection_count(:worker_2_port) AS worker_2_connections \gset
 SAVEPOINT s1;
 INSERT INTO target_table SELECT a, CASE WHEN a < 50 THEN b ELSE null END  FROM source_table;
@@ -89,15 +89,15 @@
  leaked_worker_1_connections | leaked_worker_2_connections
 -----------------------------+-----------------------------
                            0 |                           0
 (1 row)

 END;
 SELECT worker_connection_count(:worker_1_port) - :pre_xact_worker_1_connections AS leaked_worker_1_connections,
        worker_connection_count(:worker_2_port) - :pre_xact_worker_2_connections AS leaked_worker_2_connections;
  leaked_worker_1_connections | leaked_worker_2_connections
 -----------------------------+-----------------------------
-                           0 |                           0
+                          -1 |                           0
 (1 row)
```

Source:
https://github.com/citusdata/citus/actions/runs/6718401194/attempts/1#summary-18258258387

A negative amount of leaked connectios is obviously not possible. For
some reason there was a connection open when we checked the initial
amount of connections that was closed afterwards. This could be the
from the maintenance daemon or maybe from the previous test that had not
fully closed its connections just yet.

The change in this PR doesnt't actually fix the cause of the negative
connection, but it simply considers it good as well, by changing the
result to zero for negative values.

With this fix we might sometimes miss a leak, because the negative
number can cancel out the leak and still result in a 0. But since the
negative number only occurs sometimes, we'll still find the leak often
enough.
---
 .../insert_select_connection_leak.out         | 20 +++++++++----------
 .../sql/insert_select_connection_leak.sql     | 20 +++++++++----------
 2 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/src/test/regress/expected/insert_select_connection_leak.out b/src/test/regress/expected/insert_select_connection_leak.out
index 8a983acd5..b342ecde1 100644
--- a/src/test/regress/expected/insert_select_connection_leak.out
+++ b/src/test/regress/expected/insert_select_connection_leak.out
@@ -47,16 +47,16 @@ INSERT INTO target_table SELECT * FROM source_table;
 INSERT INTO target_table SELECT * FROM source_table;
 INSERT INTO target_table SELECT * FROM source_table;
 INSERT INTO target_table SELECT * FROM source_table;
-SELECT worker_connection_count(:worker_1_port) - :worker_1_connections AS leaked_worker_1_connections,
-       worker_connection_count(:worker_2_port) - :worker_2_connections AS leaked_worker_2_connections;
+SELECT GREATEST(0, worker_connection_count(:worker_1_port) - :worker_1_connections) AS leaked_worker_1_connections,
+       GREATEST(0, worker_connection_count(:worker_2_port) - :worker_2_connections) AS leaked_worker_2_connections;
  leaked_worker_1_connections | leaked_worker_2_connections
 ---------------------------------------------------------------------
                            0 |                           0
 (1 row)
 
 END;
-SELECT worker_connection_count(:worker_1_port) - :pre_xact_worker_1_connections AS leaked_worker_1_connections,
-       worker_connection_count(:worker_2_port) - :pre_xact_worker_2_connections AS leaked_worker_2_connections;
+SELECT GREATEST(0, worker_connection_count(:worker_1_port) - :pre_xact_worker_1_connections) AS leaked_worker_1_connections,
+       GREATEST(0, worker_connection_count(:worker_2_port) - :pre_xact_worker_2_connections) AS leaked_worker_2_connections;
  leaked_worker_1_connections | leaked_worker_2_connections
 ---------------------------------------------------------------------
                            0 |                           0
@@ -67,8 +67,8 @@ BEGIN;
 INSERT INTO target_table SELECT * FROM source_table;
 INSERT INTO target_table SELECT * FROM source_table;
 ROLLBACK;
-SELECT worker_connection_count(:worker_1_port) - :pre_xact_worker_1_connections AS leaked_worker_1_connections,
-       worker_connection_count(:worker_2_port) - :pre_xact_worker_2_connections AS leaked_worker_2_connections;
+SELECT GREATEST(0, worker_connection_count(:worker_1_port) - :pre_xact_worker_1_connections) AS leaked_worker_1_connections,
+       GREATEST(0, worker_connection_count(:worker_2_port) - :pre_xact_worker_2_connections) AS leaked_worker_2_connections;
  leaked_worker_1_connections | leaked_worker_2_connections
 ---------------------------------------------------------------------
                            0 |                           0
@@ -84,16 +84,16 @@ SAVEPOINT s1;
 INSERT INTO target_table SELECT a, CASE WHEN a < 50 THEN b ELSE null END  FROM source_table;
 ERROR:  null value in column "b" violates not-null constraint
 ROLLBACK TO SAVEPOINT s1;
-SELECT worker_connection_count(:worker_1_port) - :worker_1_connections AS leaked_worker_1_connections,
-       worker_connection_count(:worker_2_port) - :worker_2_connections AS leaked_worker_2_connections;
+SELECT GREATEST(0, worker_connection_count(:worker_1_port) - :worker_1_connections) AS leaked_worker_1_connections,
+       GREATEST(0, worker_connection_count(:worker_2_port) - :worker_2_connections) AS leaked_worker_2_connections;
  leaked_worker_1_connections | leaked_worker_2_connections
 ---------------------------------------------------------------------
                            0 |                           0
 (1 row)
 
 END;
-SELECT worker_connection_count(:worker_1_port) - :pre_xact_worker_1_connections AS leaked_worker_1_connections,
-       worker_connection_count(:worker_2_port) - :pre_xact_worker_2_connections AS leaked_worker_2_connections;
+SELECT GREATEST(0, worker_connection_count(:worker_1_port) - :pre_xact_worker_1_connections) AS leaked_worker_1_connections,
+       GREATEST(0, worker_connection_count(:worker_2_port) - :pre_xact_worker_2_connections) AS leaked_worker_2_connections;
  leaked_worker_1_connections | leaked_worker_2_connections
 ---------------------------------------------------------------------
                            0 |                           0
diff --git a/src/test/regress/sql/insert_select_connection_leak.sql b/src/test/regress/sql/insert_select_connection_leak.sql
index 05afb10a0..e138f6c4d 100644
--- a/src/test/regress/sql/insert_select_connection_leak.sql
+++ b/src/test/regress/sql/insert_select_connection_leak.sql
@@ -33,12 +33,12 @@ INSERT INTO target_table SELECT * FROM source_table;
 INSERT INTO target_table SELECT * FROM source_table;
 INSERT INTO target_table SELECT * FROM source_table;
 INSERT INTO target_table SELECT * FROM source_table;
-SELECT worker_connection_count(:worker_1_port) - :worker_1_connections AS leaked_worker_1_connections,
-       worker_connection_count(:worker_2_port) - :worker_2_connections AS leaked_worker_2_connections;
+SELECT GREATEST(0, worker_connection_count(:worker_1_port) - :worker_1_connections) AS leaked_worker_1_connections,
+       GREATEST(0, worker_connection_count(:worker_2_port) - :worker_2_connections) AS leaked_worker_2_connections;
 END;
 
-SELECT worker_connection_count(:worker_1_port) - :pre_xact_worker_1_connections AS leaked_worker_1_connections,
-       worker_connection_count(:worker_2_port) - :pre_xact_worker_2_connections AS leaked_worker_2_connections;
+SELECT GREATEST(0, worker_connection_count(:worker_1_port) - :pre_xact_worker_1_connections) AS leaked_worker_1_connections,
+       GREATEST(0, worker_connection_count(:worker_2_port) - :pre_xact_worker_2_connections) AS leaked_worker_2_connections;
 
 -- ROLLBACK
 BEGIN;
@@ -46,8 +46,8 @@ INSERT INTO target_table SELECT * FROM source_table;
 INSERT INTO target_table SELECT * FROM source_table;
 ROLLBACK;
 
-SELECT worker_connection_count(:worker_1_port) - :pre_xact_worker_1_connections AS leaked_worker_1_connections,
-       worker_connection_count(:worker_2_port) - :pre_xact_worker_2_connections AS leaked_worker_2_connections;
+SELECT GREATEST(0, worker_connection_count(:worker_1_port) - :pre_xact_worker_1_connections) AS leaked_worker_1_connections,
+       GREATEST(0, worker_connection_count(:worker_2_port) - :pre_xact_worker_2_connections) AS leaked_worker_2_connections;
 
 \set VERBOSITY TERSE
 
@@ -59,12 +59,12 @@ SELECT worker_connection_count(:worker_1_port) AS worker_1_connections,
 SAVEPOINT s1;
 INSERT INTO target_table SELECT a, CASE WHEN a < 50 THEN b ELSE null END  FROM source_table;
 ROLLBACK TO SAVEPOINT s1;
-SELECT worker_connection_count(:worker_1_port) - :worker_1_connections AS leaked_worker_1_connections,
-       worker_connection_count(:worker_2_port) - :worker_2_connections AS leaked_worker_2_connections;
+SELECT GREATEST(0, worker_connection_count(:worker_1_port) - :worker_1_connections) AS leaked_worker_1_connections,
+       GREATEST(0, worker_connection_count(:worker_2_port) - :worker_2_connections) AS leaked_worker_2_connections;
 END;
 
-SELECT worker_connection_count(:worker_1_port) - :pre_xact_worker_1_connections AS leaked_worker_1_connections,
-       worker_connection_count(:worker_2_port) - :pre_xact_worker_2_connections AS leaked_worker_2_connections;
+SELECT GREATEST(0, worker_connection_count(:worker_1_port) - :pre_xact_worker_1_connections) AS leaked_worker_1_connections,
+       GREATEST(0, worker_connection_count(:worker_2_port) - :pre_xact_worker_2_connections) AS leaked_worker_2_connections;
 
 SET client_min_messages TO WARNING;
 DROP SCHEMA insert_select_connection_leak CASCADE;

From f171ec98fc58ac1ab2fdf808535c3259c4dfc3d1 Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <jelte.fennema@microsoft.com>
Date: Thu, 2 Nov 2023 13:31:56 +0100
Subject: [PATCH 16/22] Fix flaky failure_distributed_results (#7307)

Sometimes in CI we run into this failure:

```diff
   SELECT resultId, nodeport, rowcount, targetShardId, targetShardIndex
   FROM partition_task_list_results('test', $$ SELECT * FROM source_table $$, 'target_table')
           NATURAL JOIN pg_dist_node;
-WARNING:  connection to the remote node localhost:xxxxx failed with the following error: connection not open
+ERROR:  connection to the remote node localhost:9060 failed with the following error: connection not open
 SELECT * FROM distributed_result_info ORDER BY resultId;
-       resultid        | nodeport | rowcount | targetshardid | targetshardindex
----------------------------------------------------------------------
- test_from_100800_to_0 |     9060 |       22 |        100805 |                0
- test_from_100801_to_0 |    57637 |        2 |        100805 |                0
- test_from_100801_to_1 |    57637 |       15 |        100806 |                1
- test_from_100802_to_1 |    57637 |       10 |        100806 |                1
- test_from_100802_to_2 |    57637 |        5 |        100807 |                2
- test_from_100803_to_2 |    57637 |       18 |        100807 |                2
- test_from_100803_to_3 |    57637 |        4 |        100808 |                3
- test_from_100804_to_3 |     9060 |       24 |        100808 |                3
-(8 rows)
-
+ERROR:  current transaction is aborted, commands ignored until end of transaction block
 -- fetch from worker 2 should fail
 SAVEPOINT s1;
+ERROR:  current transaction is aborted, commands ignored until end of transaction block
 SELECT fetch_intermediate_results('{test_from_100802_to_1,test_from_100802_to_2}'::text[], 'localhost', :worker_2_port) > 0 AS fetched;
-ERROR:  could not open file "base/pgsql_job_cache/xx_x_xxx/test_from_100802_to_1.data": No such file or directory
-CONTEXT:  while executing command on localhost:xxxxx
+ERROR:  current transaction is aborted, commands ignored until end of transaction block
 ROLLBACK TO SAVEPOINT s1;
+ERROR:  savepoint "s1" does not exist
 -- fetch from worker 1 should succeed
 SELECT fetch_intermediate_results('{test_from_100802_to_1,test_from_100802_to_2}'::text[], 'localhost', :worker_1_port) > 0 AS fetched;
- fetched
----------------------------------------------------------------------
- t
-(1 row)
-
+ERROR:  current transaction is aborted, commands ignored until end of transaction block
 -- make sure the results read are same as the previous transaction block
 SELECT count(*), sum(x) FROM
   read_intermediate_results('{test_from_100802_to_1,test_from_100802_to_2}'::text[],'binary') AS res (x int);
- count | sum
----------------------------------------------------------------------
-    15 | 863
-(1 row)
-
+ERROR:  current transaction is aborted, commands ignored until end of transaction block
 ROLLBACk;
```

As outlined in the #7306 I created, the reason for this is related to
only having a single connection open to the node. Finding and fixing the
full cause is not trivial, so instead this PR starts working around
this bug by forcing maximum parallelism. Preferably we'd want
this workaround not to be necessary, but that requires
spending time to fix this. For now having a less flaky CI is
good enough.
---
 src/test/regress/expected/failure_distributed_results.out | 2 ++
 src/test/regress/sql/failure_distributed_results.sql      | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/src/test/regress/expected/failure_distributed_results.out b/src/test/regress/expected/failure_distributed_results.out
index fc97c9af6..a316763e3 100644
--- a/src/test/regress/expected/failure_distributed_results.out
+++ b/src/test/regress/expected/failure_distributed_results.out
@@ -14,6 +14,8 @@ SELECT citus.mitmproxy('conn.allow()');
 (1 row)
 
 SET citus.next_shard_id TO 100800;
+-- Needed because of issue #7306
+SET citus.force_max_query_parallelization TO true;
 -- always try the 1st replica before the 2nd replica.
 SET citus.task_assignment_policy TO 'first-replica';
 --
diff --git a/src/test/regress/sql/failure_distributed_results.sql b/src/test/regress/sql/failure_distributed_results.sql
index 95e4d5513..93e4a9a33 100644
--- a/src/test/regress/sql/failure_distributed_results.sql
+++ b/src/test/regress/sql/failure_distributed_results.sql
@@ -15,6 +15,8 @@ SET client_min_messages TO WARNING;
 SELECT citus.mitmproxy('conn.allow()');
 
 SET citus.next_shard_id TO 100800;
+-- Needed because of issue #7306
+SET citus.force_max_query_parallelization TO true;
 
 -- always try the 1st replica before the 2nd replica.
 SET citus.task_assignment_policy TO 'first-replica';

From 85b997a0fb0cb2e4efaac2d3700bdc235adf7743 Mon Sep 17 00:00:00 2001
From: Jelte Fennema-Nio <jelte.fennema@microsoft.com>
Date: Thu, 2 Nov 2023 14:42:45 +0100
Subject: [PATCH 17/22] Fix flaky multi_alter_table_statements (#7321)

Sometimes multi_alter_table_statements would fail in CI like this:

```diff
 -- Verify that DROP NOT NULL works
 ALTER TABLE lineitem_alter ALTER COLUMN int_column2 DROP NOT NULL;
 SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='lineitem_alter'::regclass;
-     Column      |         Type          | Modifiers
----------------------------------------------------------------------
- l_orderkey      | bigint                | not null
- l_partkey       | integer               | not null
- l_suppkey       | integer               | not null
- l_linenumber    | integer               | not null
- l_quantity      | numeric(15,2)         | not null
- l_extendedprice | numeric(15,2)         | not null
- l_discount      | numeric(15,2)         | not null
- l_tax           | numeric(15,2)         | not null
- l_returnflag    | character(1)          | not null
- l_linestatus    | character(1)          | not null
- l_shipdate      | date                  | not null
- l_commitdate    | date                  | not null
- l_receiptdate   | date                  | not null
- l_shipinstruct  | character(25)         | not null
- l_shipmode      | character(10)         | not null
- l_comment       | character varying(44) | not null
- float_column    | double precision      | default 1
- date_column     | date                  |
- int_column1     | integer               |
- int_column2     | integer               |
- null_column     | integer               |
-(21 rows)
-
+ERROR:  schema "alter_table_add_column" does not exist
 -- COPY should succeed now
 SELECT master_create_empty_shard('lineitem_alter') as shardid \gset
 ```

Reading from table_desc apparantly has an issue that if the schema gets
deleted from one of the items, while it is being read that we get such
an error.

This change fixes that by not running multi_alter_table_statements in parallel
with alter_table_add_column anymore.

This is another instance of the same issue as in #7294
---
 src/test/regress/multi_1_schedule | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/test/regress/multi_1_schedule b/src/test/regress/multi_1_schedule
index 4e2b19795..10884c637 100644
--- a/src/test/regress/multi_1_schedule
+++ b/src/test/regress/multi_1_schedule
@@ -162,7 +162,8 @@ test: with_executors with_join with_partitioning with_transactions with_dml
 # Tests around DDL statements run on distributed tables
 # ----------
 test: multi_index_statements
-test: multi_alter_table_statements alter_table_add_column
+test: multi_alter_table_statements
+test: alter_table_add_column
 test: multi_alter_table_add_constraints
 test: multi_alter_table_add_constraints_without_name
 test: multi_alter_table_add_foreign_key_without_name

From 5e2439a1179f1fa328a37ff31c8eeb52594cedcd Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Thu, 2 Nov 2023 18:32:56 +0300
Subject: [PATCH 18/22] Make some more tests re-runable (#7322)

* multi_mx_create_table
* multi_mx_function_table_reference
* multi_mx_add_coordinator
* create_role_propagation
* metadata_sync_helpers
* text_search

https://github.com/citusdata/citus/pull/7278 requires this.
---
 src/test/regress/citus_tests/run_test.py      | 25 +++++++++++++++-
 .../expected/create_role_propagation.out      |  1 +
 .../expected/multi_mx_add_coordinator.out     |  3 +-
 .../expected/multi_mx_create_table.out        | 17 ++++++++---
 src/test/regress/expected/text_search.out     | 29 ++++++++++++-------
 .../regress/sql/create_role_propagation.sql   |  2 ++
 .../regress/sql/multi_mx_add_coordinator.sql  |  3 +-
 .../regress/sql/multi_mx_create_table.sql     | 19 +++++++++---
 src/test/regress/sql/text_search.sql          | 10 +++----
 9 files changed, 83 insertions(+), 26 deletions(-)

diff --git a/src/test/regress/citus_tests/run_test.py b/src/test/regress/citus_tests/run_test.py
index a3bdf368d..6ae17060f 100755
--- a/src/test/regress/citus_tests/run_test.py
+++ b/src/test/regress/citus_tests/run_test.py
@@ -125,7 +125,6 @@ DEPS = {
     "multi_mx_create_table": TestDeps(
         None,
         [
-            "multi_test_helpers_superuser",
             "multi_mx_node_metadata",
             "multi_cluster_management",
             "multi_mx_function_table_reference",
@@ -184,6 +183,30 @@ DEPS = {
             "multi_test_helpers_superuser",
         ],
     ),
+    "multi_mx_function_table_reference": TestDeps(
+        None,
+        [
+            "multi_cluster_management",
+            "remove_coordinator_from_metadata",
+        ],
+        # because it queries node group id and it changes as we add / remove nodes
+        repeatable=False,
+    ),
+    "multi_mx_add_coordinator": TestDeps(
+        None,
+        [
+            "multi_cluster_management",
+            "remove_coordinator_from_metadata",
+            "multi_mx_function_table_reference",
+        ],
+    ),
+    "metadata_sync_helpers": TestDeps(
+        None,
+        [
+            "multi_mx_node_metadata",
+            "multi_cluster_management",
+        ],
+    ),
 }
 
 
diff --git a/src/test/regress/expected/create_role_propagation.out b/src/test/regress/expected/create_role_propagation.out
index 59f7948a1..16ef8b82a 100644
--- a/src/test/regress/expected/create_role_propagation.out
+++ b/src/test/regress/expected/create_role_propagation.out
@@ -696,3 +696,4 @@ SELECT rolname FROM pg_authid WHERE rolname LIKE '%existing%' ORDER BY 1;
 (0 rows)
 
 \c - - - :master_port
+DROP ROLE nondist_cascade_1, nondist_cascade_2, nondist_cascade_3, dist_cascade;
diff --git a/src/test/regress/expected/multi_mx_add_coordinator.out b/src/test/regress/expected/multi_mx_add_coordinator.out
index e810b715e..66450826b 100644
--- a/src/test/regress/expected/multi_mx_add_coordinator.out
+++ b/src/test/regress/expected/multi_mx_add_coordinator.out
@@ -124,7 +124,7 @@ NOTICE:  executing the command locally: SELECT count(*) AS count FROM mx_add_coo
      0
 (1 row)
 
--- test that distributed functions also use local execution
+-- test that distributed functions also use sequential execution
 CREATE OR REPLACE FUNCTION my_group_id()
 RETURNS void
 LANGUAGE plpgsql
@@ -365,5 +365,6 @@ SELECT verify_metadata('localhost', :worker_1_port),
 
 SET client_min_messages TO error;
 DROP SCHEMA mx_add_coordinator CASCADE;
+DROP USER reprefuser;
 SET search_path TO DEFAULT;
 RESET client_min_messages;
diff --git a/src/test/regress/expected/multi_mx_create_table.out b/src/test/regress/expected/multi_mx_create_table.out
index ac7f90826..b9d3f7faa 100644
--- a/src/test/regress/expected/multi_mx_create_table.out
+++ b/src/test/regress/expected/multi_mx_create_table.out
@@ -3,6 +3,7 @@
 --
 ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1220000;
 ALTER SEQUENCE pg_catalog.pg_dist_colocationid_seq RESTART 1220000;
+SET client_min_messages TO WARNING;
 SELECT start_metadata_sync_to_node('localhost', :worker_1_port);
  start_metadata_sync_to_node
 ---------------------------------------------------------------------
@@ -15,6 +16,9 @@ SELECT start_metadata_sync_to_node('localhost', :worker_2_port);
 
 (1 row)
 
+-- cannot drop them at the end of the test file as other tests depend on them
+DROP SCHEMA IF EXISTS citus_mx_test_schema, citus_mx_test_schema_join_1, citus_mx_test_schema_join_2 CASCADE;
+DROP TABLE IF EXISTS nation_hash, lineitem_mx, orders_mx, customer_mx, nation_mx, part_mx, supplier_mx, mx_ddl_table, limit_orders_mx, multiple_hash_mx, app_analytics_events_mx, researchers_mx, labs_mx, objects_mx, articles_hash_mx, articles_single_shard_hash_mx, company_employees_mx;
 -- create schema to test schema support
 CREATE SCHEMA citus_mx_test_schema;
 CREATE SCHEMA citus_mx_test_schema_join_1;
@@ -42,7 +46,7 @@ BEGIN
 END;
 $$
 LANGUAGE 'plpgsql' IMMUTABLE;
-CREATE FUNCTION public.immutable_append_mx(old_values int[], new_value int)
+CREATE OR REPLACE FUNCTION public.immutable_append_mx(old_values int[], new_value int)
 RETURNS int[] AS $$ SELECT old_values || new_value $$ LANGUAGE SQL IMMUTABLE;
 CREATE OPERATOR citus_mx_test_schema.=== (
     LEFTARG = int,
@@ -65,14 +69,16 @@ SELECT quote_ident(current_setting('lc_collate')) as current_locale \gset
 \endif
 CREATE COLLATION citus_mx_test_schema.english (LOCALE=:current_locale);
 CREATE TYPE citus_mx_test_schema.new_composite_type as (key1 text, key2 text);
-CREATE TYPE order_side_mx AS ENUM ('buy', 'sell');
+CREATE TYPE citus_mx_test_schema.order_side_mx AS ENUM ('buy', 'sell');
 -- now create required stuff in the worker 1
 \c - - - :worker_1_port
+SET client_min_messages TO WARNING;
 -- show that we do not support creating citus local tables from mx workers for now
 CREATE TABLE citus_local_table(a int);
 SELECT citus_add_local_table_to_metadata('citus_local_table');
 ERROR:  operation is not allowed on this node
 HINT:  Connect to the coordinator and run it again.
+DROP TABLE citus_local_table;
 SET search_path TO citus_mx_test_schema;
 -- create operator
 CREATE OPERATOR citus_mx_test_schema.=== (
@@ -85,6 +91,7 @@ CREATE OPERATOR citus_mx_test_schema.=== (
 );
 -- now create required stuff in the worker 2
 \c - - - :worker_2_port
+SET client_min_messages TO WARNING;
 SET search_path TO citus_mx_test_schema;
 -- create operator
 CREATE OPERATOR citus_mx_test_schema.=== (
@@ -97,6 +104,7 @@ CREATE OPERATOR citus_mx_test_schema.=== (
 );
 -- connect back to the master, and do some more tests
 \c - - - :master_port
+SET client_min_messages TO WARNING;
 SET citus.shard_replication_factor TO 1;
 SET search_path TO public;
 CREATE TABLE nation_hash(
@@ -315,7 +323,7 @@ CREATE TABLE limit_orders_mx (
     symbol text NOT NULL,
     bidder_id bigint NOT NULL,
     placed_at timestamp NOT NULL,
-    kind order_side_mx NOT NULL,
+    kind citus_mx_test_schema.order_side_mx NOT NULL,
     limit_price decimal NOT NULL DEFAULT 0.00 CHECK (limit_price >= 0.00)
 );
 SET citus.shard_count TO 2;
@@ -473,6 +481,7 @@ ORDER BY table_name::text;
 (23 rows)
 
 \c - - - :worker_1_port
+SET client_min_messages TO WARNING;
 SELECT table_name, citus_table_type, distribution_column, shard_count, table_owner
 FROM citus_tables
 ORDER BY table_name::text;
@@ -978,6 +987,6 @@ SELECT shard_name, table_name, citus_table_type, shard_size FROM citus_shards OR
 (469 rows)
 
 -- Show that altering type name is not supported from worker node
-ALTER TYPE order_side_mx RENAME TO temp_order_side_mx;
+ALTER TYPE citus_mx_test_schema.order_side_mx RENAME TO temp_order_side_mx;
 ERROR:  operation is not allowed on this node
 HINT:  Connect to the coordinator and run it again.
diff --git a/src/test/regress/expected/text_search.out b/src/test/regress/expected/text_search.out
index b9934a1d4..6c5b387ba 100644
--- a/src/test/regress/expected/text_search.out
+++ b/src/test/regress/expected/text_search.out
@@ -374,12 +374,21 @@ SELECT * FROM run_command_on_workers($$ SELECT 'text_search.config3'::regconfig;
 (2 rows)
 
 -- verify they are all removed locally
-SELECT 'text_search.config1'::regconfig;
-ERROR:  text search configuration "text_search.config1" does not exist
-SELECT 'text_search.config2'::regconfig;
-ERROR:  text search configuration "text_search.config2" does not exist
-SELECT 'text_search.config3'::regconfig;
-ERROR:  text search configuration "text_search.config3" does not exist
+SELECT 1 FROM pg_ts_config WHERE cfgname = 'config1' AND cfgnamespace = 'text_search'::regnamespace;
+ ?column?
+---------------------------------------------------------------------
+(0 rows)
+
+SELECT 1 FROM pg_ts_config WHERE cfgname = 'config2' AND cfgnamespace = 'text_search'::regnamespace;
+ ?column?
+---------------------------------------------------------------------
+(0 rows)
+
+SELECT 1 FROM pg_ts_config WHERE cfgname = 'config3' AND cfgnamespace = 'text_search'::regnamespace;
+ ?column?
+---------------------------------------------------------------------
+(0 rows)
+
 -- verify that indexes created concurrently that would propagate a TEXT SEARCH CONFIGURATION object
 SET citus.enable_ddl_propagation TO off;
 CREATE TEXT SEARCH CONFIGURATION concurrent_index_config ( PARSER = default );
@@ -434,12 +443,12 @@ $$) ORDER BY 1,2;
 CREATE TEXT SEARCH CONFIGURATION text_search.manually_created_wrongly ( copy = french );
 -- now we expect manually_created_wrongly(citus_backup_XXX) to show up when querying the configurations
 SELECT * FROM run_command_on_workers($$
-    SELECT array_agg(cfgname) FROM pg_ts_config WHERE cfgname LIKE 'manually_created_wrongly%';
+    SELECT array_agg(cfgname ORDER BY cfgname) FROM pg_ts_config WHERE cfgname LIKE 'manually_created_wrongly%';
 $$) ORDER BY 1,2;
  nodename  | nodeport | success |                               result
 ---------------------------------------------------------------------
- localhost |    57637 | t       | {manually_created_wrongly(citus_backup_0),manually_created_wrongly}
- localhost |    57638 | t       | {manually_created_wrongly(citus_backup_0),manually_created_wrongly}
+ localhost |    57637 | t       | {manually_created_wrongly,manually_created_wrongly(citus_backup_0)}
+ localhost |    57638 | t       | {manually_created_wrongly,manually_created_wrongly(citus_backup_0)}
 (2 rows)
 
 -- verify the objects get reused appropriately when the specification is the same
@@ -458,7 +467,7 @@ CREATE TEXT SEARCH CONFIGURATION text_search.manually_created_correct ( copy = f
 -- now we don't expect manually_created_correct(citus_backup_XXX) to show up when querying the configurations as the
 -- original one is reused
 SELECT * FROM run_command_on_workers($$
-    SELECT array_agg(cfgname) FROM pg_ts_config WHERE cfgname LIKE 'manually_created_correct%';
+    SELECT array_agg(cfgname ORDER BY cfgname) FROM pg_ts_config WHERE cfgname LIKE 'manually_created_correct%';
 $$) ORDER BY 1,2;
  nodename  | nodeport | success |           result
 ---------------------------------------------------------------------
diff --git a/src/test/regress/sql/create_role_propagation.sql b/src/test/regress/sql/create_role_propagation.sql
index 027e4f72e..8ac3c83d9 100644
--- a/src/test/regress/sql/create_role_propagation.sql
+++ b/src/test/regress/sql/create_role_propagation.sql
@@ -277,3 +277,5 @@ SELECT rolname FROM pg_authid WHERE rolname LIKE '%existing%' ORDER BY 1;
 \c - - - :worker_1_port
 SELECT rolname FROM pg_authid WHERE rolname LIKE '%existing%' ORDER BY 1;
 \c - - - :master_port
+
+DROP ROLE nondist_cascade_1, nondist_cascade_2, nondist_cascade_3, dist_cascade;
diff --git a/src/test/regress/sql/multi_mx_add_coordinator.sql b/src/test/regress/sql/multi_mx_add_coordinator.sql
index 47053cd28..56346f901 100644
--- a/src/test/regress/sql/multi_mx_add_coordinator.sql
+++ b/src/test/regress/sql/multi_mx_add_coordinator.sql
@@ -67,7 +67,7 @@ SET client_min_messages TO DEBUG;
 SELECT count(*) FROM ref;
 SELECT count(*) FROM ref;
 
--- test that distributed functions also use local execution
+-- test that distributed functions also use sequential execution
 CREATE OR REPLACE FUNCTION my_group_id()
 RETURNS void
 LANGUAGE plpgsql
@@ -190,5 +190,6 @@ SELECT verify_metadata('localhost', :worker_1_port),
 
 SET client_min_messages TO error;
 DROP SCHEMA mx_add_coordinator CASCADE;
+DROP USER reprefuser;
 SET search_path TO DEFAULT;
 RESET client_min_messages;
diff --git a/src/test/regress/sql/multi_mx_create_table.sql b/src/test/regress/sql/multi_mx_create_table.sql
index de3468415..4fb6eadbb 100644
--- a/src/test/regress/sql/multi_mx_create_table.sql
+++ b/src/test/regress/sql/multi_mx_create_table.sql
@@ -5,9 +5,15 @@
 ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1220000;
 ALTER SEQUENCE pg_catalog.pg_dist_colocationid_seq RESTART 1220000;
 
+SET client_min_messages TO WARNING;
+
 SELECT start_metadata_sync_to_node('localhost', :worker_1_port);
 SELECT start_metadata_sync_to_node('localhost', :worker_2_port);
 
+-- cannot drop them at the end of the test file as other tests depend on them
+DROP SCHEMA IF EXISTS citus_mx_test_schema, citus_mx_test_schema_join_1, citus_mx_test_schema_join_2 CASCADE;
+DROP TABLE IF EXISTS nation_hash, lineitem_mx, orders_mx, customer_mx, nation_mx, part_mx, supplier_mx, mx_ddl_table, limit_orders_mx, multiple_hash_mx, app_analytics_events_mx, researchers_mx, labs_mx, objects_mx, articles_hash_mx, articles_single_shard_hash_mx, company_employees_mx;
+
 -- create schema to test schema support
 CREATE SCHEMA citus_mx_test_schema;
 CREATE SCHEMA citus_mx_test_schema_join_1;
@@ -38,7 +44,7 @@ END;
 $$
 LANGUAGE 'plpgsql' IMMUTABLE;
 
-CREATE FUNCTION public.immutable_append_mx(old_values int[], new_value int)
+CREATE OR REPLACE FUNCTION public.immutable_append_mx(old_values int[], new_value int)
 RETURNS int[] AS $$ SELECT old_values || new_value $$ LANGUAGE SQL IMMUTABLE;
 
 CREATE OPERATOR citus_mx_test_schema.=== (
@@ -67,14 +73,16 @@ SELECT quote_ident(current_setting('lc_collate')) as current_locale \gset
 CREATE COLLATION citus_mx_test_schema.english (LOCALE=:current_locale);
 
 CREATE TYPE citus_mx_test_schema.new_composite_type as (key1 text, key2 text);
-CREATE TYPE order_side_mx AS ENUM ('buy', 'sell');
+CREATE TYPE citus_mx_test_schema.order_side_mx AS ENUM ('buy', 'sell');
 
 -- now create required stuff in the worker 1
 \c - - - :worker_1_port
+SET client_min_messages TO WARNING;
 
 -- show that we do not support creating citus local tables from mx workers for now
 CREATE TABLE citus_local_table(a int);
 SELECT citus_add_local_table_to_metadata('citus_local_table');
+DROP TABLE citus_local_table;
 
 SET search_path TO citus_mx_test_schema;
 -- create operator
@@ -89,6 +97,7 @@ CREATE OPERATOR citus_mx_test_schema.=== (
 
 -- now create required stuff in the worker 2
 \c - - - :worker_2_port
+SET client_min_messages TO WARNING;
 
 SET search_path TO citus_mx_test_schema;
 
@@ -104,6 +113,7 @@ CREATE OPERATOR citus_mx_test_schema.=== (
 
 -- connect back to the master, and do some more tests
 \c - - - :master_port
+SET client_min_messages TO WARNING;
 
 SET citus.shard_replication_factor TO 1;
 SET search_path TO public;
@@ -308,7 +318,7 @@ CREATE TABLE limit_orders_mx (
     symbol text NOT NULL,
     bidder_id bigint NOT NULL,
     placed_at timestamp NOT NULL,
-    kind order_side_mx NOT NULL,
+    kind citus_mx_test_schema.order_side_mx NOT NULL,
     limit_price decimal NOT NULL DEFAULT 0.00 CHECK (limit_price >= 0.00)
 );
 
@@ -386,6 +396,7 @@ FROM citus_tables
 ORDER BY table_name::text;
 
 \c - - - :worker_1_port
+SET client_min_messages TO WARNING;
 
 SELECT table_name, citus_table_type, distribution_column, shard_count, table_owner
 FROM citus_tables
@@ -394,4 +405,4 @@ ORDER BY table_name::text;
 SELECT shard_name, table_name, citus_table_type, shard_size FROM citus_shards ORDER BY shard_name::text;
 
 -- Show that altering type name is not supported from worker node
-ALTER TYPE order_side_mx RENAME TO temp_order_side_mx;
+ALTER TYPE citus_mx_test_schema.order_side_mx RENAME TO temp_order_side_mx;
diff --git a/src/test/regress/sql/text_search.sql b/src/test/regress/sql/text_search.sql
index d0d4b5a6f..4a65a5e1a 100644
--- a/src/test/regress/sql/text_search.sql
+++ b/src/test/regress/sql/text_search.sql
@@ -199,9 +199,9 @@ SELECT * FROM run_command_on_workers($$ SELECT 'text_search.config1'::regconfig;
 SELECT * FROM run_command_on_workers($$ SELECT 'text_search.config2'::regconfig; $$) ORDER BY 1,2;
 SELECT * FROM run_command_on_workers($$ SELECT 'text_search.config3'::regconfig; $$) ORDER BY 1,2;
 -- verify they are all removed locally
-SELECT 'text_search.config1'::regconfig;
-SELECT 'text_search.config2'::regconfig;
-SELECT 'text_search.config3'::regconfig;
+SELECT 1 FROM pg_ts_config WHERE cfgname = 'config1' AND cfgnamespace = 'text_search'::regnamespace;
+SELECT 1 FROM pg_ts_config WHERE cfgname = 'config2' AND cfgnamespace = 'text_search'::regnamespace;
+SELECT 1 FROM pg_ts_config WHERE cfgname = 'config3' AND cfgnamespace = 'text_search'::regnamespace;
 
 -- verify that indexes created concurrently that would propagate a TEXT SEARCH CONFIGURATION object
 SET citus.enable_ddl_propagation TO off;
@@ -235,7 +235,7 @@ CREATE TEXT SEARCH CONFIGURATION text_search.manually_created_wrongly ( copy = f
 
 -- now we expect manually_created_wrongly(citus_backup_XXX) to show up when querying the configurations
 SELECT * FROM run_command_on_workers($$
-    SELECT array_agg(cfgname) FROM pg_ts_config WHERE cfgname LIKE 'manually_created_wrongly%';
+    SELECT array_agg(cfgname ORDER BY cfgname) FROM pg_ts_config WHERE cfgname LIKE 'manually_created_wrongly%';
 $$) ORDER BY 1,2;
 
 -- verify the objects get reused appropriately when the specification is the same
@@ -249,7 +249,7 @@ CREATE TEXT SEARCH CONFIGURATION text_search.manually_created_correct ( copy = f
 -- now we don't expect manually_created_correct(citus_backup_XXX) to show up when querying the configurations as the
 -- original one is reused
 SELECT * FROM run_command_on_workers($$
-    SELECT array_agg(cfgname) FROM pg_ts_config WHERE cfgname LIKE 'manually_created_correct%';
+    SELECT array_agg(cfgname ORDER BY cfgname) FROM pg_ts_config WHERE cfgname LIKE 'manually_created_correct%';
 $$) ORDER BY 1,2;
 
 CREATE SCHEMA "Text Search Requiring Quote's";

From 21646ca1e96175370be1472a14d5ab1baa55b471 Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Fri, 3 Nov 2023 11:00:32 +0300
Subject: [PATCH 19/22] Fix flaky isolation_get_all_active_transactions.spec
 test (#7323)

Fix the flaky test that results in following diff by waiting until the
backend that we want to terminate really terminates, until 5secs.

```diff
--- /__w/citus/citus/src/test/regress/expected/isolation_get_all_active_transactions.out.modified	2023-11-01 16:30:57.648749795 +0000
+++ /__w/citus/citus/src/test/regress/results/isolation_get_all_active_transactions.out.modified	2023-11-01 16:30:57.656749877 +0000
@@ -114,13 +114,13 @@
 --------------------
 t
 (1 row)

 step s3-show-activity:
  SET ROLE postgres;
  select count(*) from get_all_active_transactions() where process_id IN (SELECT * FROM selected_pid);

 count
 -----
-    0
+    1
 (1 row)
```
---
 .../isolation_get_all_active_transactions.out | 26 +++++++++++++------
 ...isolation_get_all_active_transactions.spec | 25 +++++++++++++++++-
 2 files changed, 42 insertions(+), 9 deletions(-)

diff --git a/src/test/regress/expected/isolation_get_all_active_transactions.out b/src/test/regress/expected/isolation_get_all_active_transactions.out
index a9739a826..73610a455 100644
--- a/src/test/regress/expected/isolation_get_all_active_transactions.out
+++ b/src/test/regress/expected/isolation_get_all_active_transactions.out
@@ -94,7 +94,7 @@ step s2-commit:
 	COMMIT;
 
 
-starting permutation: s4-record-pid s3-show-activity s5-kill s3-show-activity
+starting permutation: s4-record-pid s3-show-activity s5-kill s3-wait-backend-termination
 step s4-record-pid:
 	SELECT pg_backend_pid() INTO selected_pid;
 
@@ -115,12 +115,22 @@ pg_terminate_backend
 t
 (1 row)
 
-step s3-show-activity:
+step s3-wait-backend-termination:
 	SET ROLE postgres;
-	select count(*) from get_all_active_transactions() where process_id IN (SELECT * FROM selected_pid);
-
-count
----------------------------------------------------------------------
-    0
-(1 row)
+	DO $$
+	DECLARE
+		i int;
+	BEGIN
+		i := 0;
+		-- try for 5 sec then timeout
+		WHILE (select count(*) > 0 from get_all_active_transactions() where process_id IN (SELECT * FROM selected_pid))
+		LOOP
+			PERFORM pg_sleep(0.1);
+			i := i + 1;
+			IF i > 50 THEN
+				RAISE EXCEPTION 'Timeout while waiting for backend to terminate';
+			END IF;
+		END LOOP;
+	END;
+	$$;
 
diff --git a/src/test/regress/spec/isolation_get_all_active_transactions.spec b/src/test/regress/spec/isolation_get_all_active_transactions.spec
index 497b3a58a..8a2d5a5c6 100644
--- a/src/test/regress/spec/isolation_get_all_active_transactions.spec
+++ b/src/test/regress/spec/isolation_get_all_active_transactions.spec
@@ -107,6 +107,29 @@ step "s3-show-activity"
 	select count(*) from get_all_active_transactions() where process_id IN (SELECT * FROM selected_pid);
 }
 
+step "s3-wait-backend-termination"
+{
+	SET ROLE postgres;
+
+	DO $$
+	DECLARE
+		i int;
+	BEGIN
+		i := 0;
+
+		-- try for 5 sec then timeout
+		WHILE (select count(*) > 0 from get_all_active_transactions() where process_id IN (SELECT * FROM selected_pid))
+		LOOP
+			PERFORM pg_sleep(0.1);
+			i := i + 1;
+			IF i > 50 THEN
+				RAISE EXCEPTION 'Timeout while waiting for backend to terminate';
+			END IF;
+		END LOOP;
+	END;
+	$$;
+}
+
 session "s4"
 
 step "s4-record-pid"
@@ -123,4 +146,4 @@ step "s5-kill"
 
 
 permutation "s1-grant" "s1-begin-insert" "s2-begin-insert" "s3-as-admin" "s3-as-user-1" "s3-as-readonly" "s3-as-monitor" "s1-commit" "s2-commit"
-permutation "s4-record-pid" "s3-show-activity" "s5-kill" "s3-show-activity"
+permutation "s4-record-pid" "s3-show-activity" "s5-kill" "s3-wait-backend-termination"

From e535f53ce5644b2ee339098a255ac877edc44c63 Mon Sep 17 00:00:00 2001
From: cvbhjkl <wu888555666@126.com>
Date: Fri, 3 Nov 2023 20:14:11 +0800
Subject: [PATCH 20/22] Fix typo in local_executor.c (#7324)

Fix a typo 'remaning' -> 'remaining' in local_executor.c
---
 src/backend/distributed/executor/local_executor.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/backend/distributed/executor/local_executor.c b/src/backend/distributed/executor/local_executor.c
index 5661403b9..73d93c055 100644
--- a/src/backend/distributed/executor/local_executor.c
+++ b/src/backend/distributed/executor/local_executor.c
@@ -567,7 +567,7 @@ LogLocalCommand(Task *task)
  *
  * One slightly different case is modifications to replicated tables
  * (e.g., reference tables) where a single task ends in two separate tasks
- * and the local task is added to localTaskList and the remaning ones to
+ * and the local task is added to localTaskList and the remaining ones to
  * the remoteTaskList.
  */
 void

From 444e6cb7d6749919e8e43fdd89bd3a3a3ce07bcb Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Tue, 7 Nov 2023 16:39:08 +0300
Subject: [PATCH 21/22] Remove useless variables (#7327)

To fix warnings observed when using different compiler versions.
---
 src/backend/distributed/commands/function.c        | 2 --
 src/backend/distributed/commands/publication.c     | 6 ------
 src/backend/distributed/commands/vacuum.c          | 2 --
 src/backend/distributed/operations/create_shards.c | 7 -------
 4 files changed, 17 deletions(-)

diff --git a/src/backend/distributed/commands/function.c b/src/backend/distributed/commands/function.c
index 01911677d..103d35a51 100644
--- a/src/backend/distributed/commands/function.c
+++ b/src/backend/distributed/commands/function.c
@@ -978,7 +978,6 @@ GetAggregateDDLCommand(const RegProcedure funcOid, bool useCreateOrReplace)
 	char *argmodes = NULL;
 	int insertorderbyat = -1;
 	int argsprinted = 0;
-	int inputargno = 0;
 
 	HeapTuple proctup = SearchSysCache1(PROCOID, funcOid);
 	if (!HeapTupleIsValid(proctup))
@@ -1058,7 +1057,6 @@ GetAggregateDDLCommand(const RegProcedure funcOid, bool useCreateOrReplace)
 			}
 		}
 
-		inputargno++;       /* this is a 1-based counter */
 		if (argsprinted == insertorderbyat)
 		{
 			appendStringInfoString(&buf, " ORDER BY ");
diff --git a/src/backend/distributed/commands/publication.c b/src/backend/distributed/commands/publication.c
index 581f7f874..f225b0fca 100644
--- a/src/backend/distributed/commands/publication.c
+++ b/src/backend/distributed/commands/publication.c
@@ -175,7 +175,6 @@ BuildCreatePublicationStmt(Oid publicationId)
 												PUBLICATION_PART_ROOT :
 												PUBLICATION_PART_LEAF);
 	Oid relationId = InvalidOid;
-	int citusTableCount PG_USED_FOR_ASSERTS_ONLY = 0;
 
 	/* mainly for consistent ordering in test output */
 	relationIds = SortList(relationIds, CompareOids);
@@ -199,11 +198,6 @@ BuildCreatePublicationStmt(Oid publicationId)
 
 		createPubStmt->tables = lappend(createPubStmt->tables, rangeVar);
 #endif
-
-		if (IsCitusTable(relationId))
-		{
-			citusTableCount++;
-		}
 	}
 
 	/* WITH (publish_via_partition_root = true) option */
diff --git a/src/backend/distributed/commands/vacuum.c b/src/backend/distributed/commands/vacuum.c
index 21638ba7f..4201c9400 100644
--- a/src/backend/distributed/commands/vacuum.c
+++ b/src/backend/distributed/commands/vacuum.c
@@ -184,7 +184,6 @@ ExecuteVacuumOnDistributedTables(VacuumStmt *vacuumStmt, List *relationIdList,
 								 CitusVacuumParams vacuumParams)
 {
 	int relationIndex = 0;
-	int executedVacuumCount = 0;
 
 	Oid relationId = InvalidOid;
 	foreach_oid(relationId, relationIdList)
@@ -197,7 +196,6 @@ ExecuteVacuumOnDistributedTables(VacuumStmt *vacuumStmt, List *relationIdList,
 			/* local execution is not implemented for VACUUM commands */
 			bool localExecutionSupported = false;
 			ExecuteUtilityTaskList(taskList, localExecutionSupported);
-			executedVacuumCount++;
 		}
 		relationIndex++;
 	}
diff --git a/src/backend/distributed/operations/create_shards.c b/src/backend/distributed/operations/create_shards.c
index d0fcc9612..8bc3b249f 100644
--- a/src/backend/distributed/operations/create_shards.c
+++ b/src/backend/distributed/operations/create_shards.c
@@ -158,13 +158,6 @@ CreateShardsWithRoundRobinPolicy(Oid distributedTableId, int32 shardCount,
 								"replication factor.")));
 	}
 
-	/* if we have enough nodes, add an extra placement attempt for backup */
-	uint32 placementAttemptCount = (uint32) replicationFactor;
-	if (workerNodeCount > replicationFactor)
-	{
-		placementAttemptCount++;
-	}
-
 	/* set shard storage type according to relation type */
 	char shardStorageType = ShardStorageType(distributedTableId);
 

From 0dc41ee5a07a6fad9e35fc7af9f279d627e159b2 Mon Sep 17 00:00:00 2001
From: Naisila Puka <37271756+naisila@users.noreply.github.com>
Date: Wed, 8 Nov 2023 15:15:33 +0300
Subject: [PATCH 22/22] Fix flaky multi_mx_insert_select_repartition test
 (#7331)

https://github.com/citusdata/citus/actions/runs/6745019678/attempts/1#summary-18336188930
```diff
     insert into target_table SELECT a*2 FROM source_table RETURNING a;
-NOTICE:  executing the command locally: SELECT bytes FROM fetch_intermediate_results(ARRAY['repartitioned_results_xxxxx_from_4213582_to_0','repartitioned_results_xxxxx_from_4213584_to_0']::text[],'localhost',57638) bytes
+NOTICE:  executing the command locally: SELECT bytes FROM fetch_intermediate_results(ARRAY['repartitioned_results_3940758121873413_from_4213584_to_0','repartitioned_results_3940758121873413_from_4213582_to_0']::text[],'localhost',57638) bytes
```

The elements in the array passed to `fetch_intermediate_results` are the
same, but in the opposite order than expected.

To fix this flakiness, we can omit the `"SELECT bytes FROM
fetch_intermediate_results..."` line. From the following logs, it is
understandable that the intermediate results have been fetched.
---
 .../regress/expected/multi_mx_insert_select_repartition.out    | 3 ++-
 .../regress/expected/multi_mx_insert_select_repartition_0.out  | 3 ++-
 src/test/regress/sql/multi_mx_insert_select_repartition.sql    | 2 ++
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/test/regress/expected/multi_mx_insert_select_repartition.out b/src/test/regress/expected/multi_mx_insert_select_repartition.out
index 62f197c30..a3912ec8e 100644
--- a/src/test/regress/expected/multi_mx_insert_select_repartition.out
+++ b/src/test/regress/expected/multi_mx_insert_select_repartition.out
@@ -103,10 +103,11 @@ NOTICE:  executing the command locally: SELECT count(*) AS count FROM multi_mx_i
      4
 (1 row)
 
+    -- we omit the "SELECT bytes FROM fetch_intermediate_results..." line since it is flaky
+    SET LOCAL citus.grep_remote_commands TO '%multi_mx_insert_select_repartition%';
     insert into target_table SELECT a*2 FROM source_table RETURNING a;
 NOTICE:  executing the command locally: SELECT partition_index, 'repartitioned_results_xxxxx_from_4213581_to' || '_' || partition_index::text , rows_written FROM worker_partition_query_result('repartitioned_results_xxxxx_from_4213581_to','SELECT (a OPERATOR(pg_catalog.*) 2) AS a FROM multi_mx_insert_select_repartition.source_table_4213581 source_table WHERE true',0,'hash','{-2147483648,-715827883,715827882}'::text[],'{-715827884,715827881,2147483647}'::text[],true) WHERE rows_written > 0
 NOTICE:  executing the command locally: SELECT partition_index, 'repartitioned_results_xxxxx_from_4213583_to' || '_' || partition_index::text , rows_written FROM worker_partition_query_result('repartitioned_results_xxxxx_from_4213583_to','SELECT (a OPERATOR(pg_catalog.*) 2) AS a FROM multi_mx_insert_select_repartition.source_table_4213583 source_table WHERE true',0,'hash','{-2147483648,-715827883,715827882}'::text[],'{-715827884,715827881,2147483647}'::text[],true) WHERE rows_written > 0
-NOTICE:  executing the command locally: SELECT bytes FROM fetch_intermediate_results(ARRAY['repartitioned_results_xxxxx_from_4213582_to_0','repartitioned_results_xxxxx_from_4213584_to_0']::text[],'localhost',57638) bytes
 NOTICE:  executing the command locally: INSERT INTO multi_mx_insert_select_repartition.target_table_4213585 AS citus_table_alias (a) SELECT intermediate_result.a FROM read_intermediate_results('{repartitioned_results_xxxxx_from_4213581_to_0,repartitioned_results_xxxxx_from_4213582_to_0,repartitioned_results_xxxxx_from_4213584_to_0}'::text[], 'binary'::citus_copy_format) intermediate_result(a integer) RETURNING citus_table_alias.a
 NOTICE:  executing the command locally: INSERT INTO multi_mx_insert_select_repartition.target_table_4213587 AS citus_table_alias (a) SELECT intermediate_result.a FROM read_intermediate_results('{repartitioned_results_xxxxx_from_4213581_to_2}'::text[], 'binary'::citus_copy_format) intermediate_result(a integer) RETURNING citus_table_alias.a
  a
diff --git a/src/test/regress/expected/multi_mx_insert_select_repartition_0.out b/src/test/regress/expected/multi_mx_insert_select_repartition_0.out
index 15deba0c0..62271f9a7 100644
--- a/src/test/regress/expected/multi_mx_insert_select_repartition_0.out
+++ b/src/test/regress/expected/multi_mx_insert_select_repartition_0.out
@@ -103,10 +103,11 @@ NOTICE:  executing the command locally: SELECT count(*) AS count FROM multi_mx_i
      4
 (1 row)
 
+    -- we omit the "SELECT bytes FROM fetch_intermediate_results..." line since it is flaky
+    SET LOCAL citus.grep_remote_commands TO '%multi_mx_insert_select_repartition%';
     insert into target_table SELECT a*2 FROM source_table RETURNING a;
 NOTICE:  executing the command locally: SELECT partition_index, 'repartitioned_results_xxxxx_from_4213581_to' || '_' || partition_index::text , rows_written FROM worker_partition_query_result('repartitioned_results_xxxxx_from_4213581_to','SELECT (a OPERATOR(pg_catalog.*) 2) AS a FROM multi_mx_insert_select_repartition.source_table_4213581 source_table WHERE true',0,'hash','{-2147483648,-715827883,715827882}'::text[],'{-715827884,715827881,2147483647}'::text[],true) WHERE rows_written > 0
 NOTICE:  executing the command locally: SELECT partition_index, 'repartitioned_results_xxxxx_from_4213583_to' || '_' || partition_index::text , rows_written FROM worker_partition_query_result('repartitioned_results_xxxxx_from_4213583_to','SELECT (a OPERATOR(pg_catalog.*) 2) AS a FROM multi_mx_insert_select_repartition.source_table_4213583 source_table WHERE true',0,'hash','{-2147483648,-715827883,715827882}'::text[],'{-715827884,715827881,2147483647}'::text[],true) WHERE rows_written > 0
-NOTICE:  executing the command locally: SELECT bytes FROM fetch_intermediate_results(ARRAY['repartitioned_results_xxxxx_from_4213582_to_0','repartitioned_results_xxxxx_from_4213584_to_0']::text[],'localhost',57638) bytes
 NOTICE:  executing the command locally: INSERT INTO multi_mx_insert_select_repartition.target_table_4213585 AS citus_table_alias (a) SELECT a FROM read_intermediate_results('{repartitioned_results_xxxxx_from_4213581_to_0,repartitioned_results_xxxxx_from_4213582_to_0,repartitioned_results_xxxxx_from_4213584_to_0}'::text[], 'binary'::citus_copy_format) intermediate_result(a integer) RETURNING citus_table_alias.a
 NOTICE:  executing the command locally: INSERT INTO multi_mx_insert_select_repartition.target_table_4213587 AS citus_table_alias (a) SELECT a FROM read_intermediate_results('{repartitioned_results_xxxxx_from_4213581_to_2}'::text[], 'binary'::citus_copy_format) intermediate_result(a integer) RETURNING citus_table_alias.a
  a
diff --git a/src/test/regress/sql/multi_mx_insert_select_repartition.sql b/src/test/regress/sql/multi_mx_insert_select_repartition.sql
index 4a9c8c96f..b206c6e4e 100644
--- a/src/test/regress/sql/multi_mx_insert_select_repartition.sql
+++ b/src/test/regress/sql/multi_mx_insert_select_repartition.sql
@@ -55,6 +55,8 @@ SET citus.log_local_commands to on;
 -- INSERT .. SELECT via repartitioning with local execution
 BEGIN;
     select count(*) from source_table WHERE a = 1;
+    -- we omit the "SELECT bytes FROM fetch_intermediate_results..." line since it is flaky
+    SET LOCAL citus.grep_remote_commands TO '%multi_mx_insert_select_repartition%';
     insert into target_table SELECT a*2 FROM source_table RETURNING a;
 ROLLBACK;