From 20ae42e7fa4bf8b29a028fa80b8905a9496f56dd Mon Sep 17 00:00:00 2001 From: Jelte Fennema-Nio Date: Wed, 1 Nov 2023 11:12:06 +0100 Subject: [PATCH 1/2] Fix flaky multi_reference_table test (#7294) Sometimes multi_reference_table failed in CI like this: ```diff \c - - - :master_port DROP INDEX reference_schema.reference_index_2; \c - - - :worker_1_port SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='reference_schema.reference_table_ddl_1250019'::regclass; - Column | Type | Modifiers ---------------------------------------------------------------------- - value_2 | double precision | default 25.0 - value_3 | text | not null - value_4 | timestamp without time zone | - value_5 | double precision | -(4 rows) - +ERROR: schema "citus_local_table_queries" does not exist \di reference_schema.reference_index_2* List of relations Schema | Name | Type | Owner | Table ``` Source: https://github.com/citusdata/citus/actions/runs/6707535961/attempts/2#summary-18226879513 Reading from table_desc apparantly has an issue that if the schema gets deleted from one of the items, while it is being read that we get such an error. This change fixes that by not running multi_reference_table in parallel with citus_local_tables_queries anymore. --- src/test/regress/multi_1_schedule | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/test/regress/multi_1_schedule b/src/test/regress/multi_1_schedule index e824f5cba..4e2b19795 100644 --- a/src/test/regress/multi_1_schedule +++ b/src/test/regress/multi_1_schedule @@ -298,7 +298,8 @@ test: replicate_reference_tables_to_coordinator test: citus_local_tables test: mixed_relkind_tests test: multi_row_router_insert create_distributed_table_concurrently -test: multi_reference_table citus_local_tables_queries +test: multi_reference_table +test: citus_local_tables_queries test: citus_local_table_triggers test: coordinator_shouldhaveshards test: local_shard_utility_command_execution From 0d83ab57de8c266764fc47810afd115758da5034 Mon Sep 17 00:00:00 2001 From: Jelte Fennema-Nio Date: Wed, 1 Nov 2023 11:46:01 +0100 Subject: [PATCH 2/2] Fix flaky multi_cluster_management (#7295) One of our most flaky and most anoying tests is multi_cluster_management. It usually fails like this: ```diff SELECT citus_disable_node('localhost', :worker_2_port); citus_disable_node -------------------- (1 row) SELECT public.wait_until_metadata_sync(60000); +WARNING: waiting for metadata sync timed out wait_until_metadata_sync -------------------------- (1 row) ``` This tries to address that by hardening wait_until_metadata_sync. I believe the reason for this warning is that there is a race condition in wait_until_metadata_sync. It's possible for the pre-check to fail, then have the maintenance daemon send a notification. And only then have the backend start to listen. I tried to fix it in two ways: 1. First run LISTEN, and only then read do the pre-check. 2. If we time out, check again just to make sure that we did not miss the notification somehow. And don't show a warning if all metadata is synced after the timeout. It's hard to know for sure that this fixes it because the test is not repeatable and I could not reproduce it locally. Let's just hope for the best. --------- Co-authored-by: Onur Tirtir --- src/backend/distributed/test/metadata_sync.c | 48 +++++++++++-------- .../expected/multi_cluster_management.out | 4 +- .../regress/sql/multi_cluster_management.sql | 4 +- 3 files changed, 33 insertions(+), 23 deletions(-) diff --git a/src/backend/distributed/test/metadata_sync.c b/src/backend/distributed/test/metadata_sync.c index 46d2303d6..8ad4b15f2 100644 --- a/src/backend/distributed/test/metadata_sync.c +++ b/src/backend/distributed/test/metadata_sync.c @@ -90,6 +90,28 @@ activate_node_snapshot(PG_FUNCTION_ARGS) } +/* + * IsMetadataSynced checks the workers to see if all workers with metadata are + * synced. + */ +static bool +IsMetadataSynced(void) +{ + List *workerList = ActivePrimaryNonCoordinatorNodeList(NoLock); + + WorkerNode *workerNode = NULL; + foreach_ptr(workerNode, workerList) + { + if (workerNode->hasMetadata && !workerNode->metadataSynced) + { + return false; + } + } + + return true; +} + + /* * wait_until_metadata_sync waits until the maintenance daemon does a metadata * sync, or times out. @@ -99,19 +121,10 @@ wait_until_metadata_sync(PG_FUNCTION_ARGS) { uint32 timeout = PG_GETARG_UINT32(0); - List *workerList = ActivePrimaryNonCoordinatorNodeList(NoLock); - bool waitNotifications = false; - - WorkerNode *workerNode = NULL; - foreach_ptr(workerNode, workerList) - { - /* if already has metadata, no need to do it again */ - if (workerNode->hasMetadata && !workerNode->metadataSynced) - { - waitNotifications = true; - break; - } - } + /* First we start listening. */ + MultiConnection *connection = GetNodeConnection(FORCE_NEW_CONNECTION, + LOCAL_HOST_NAME, PostPortNumber); + ExecuteCriticalRemoteCommand(connection, "LISTEN " METADATA_SYNC_CHANNEL); /* * If all the metadata nodes have already been synced, we should not wait. @@ -119,15 +132,12 @@ wait_until_metadata_sync(PG_FUNCTION_ARGS) * the notification and we'd wait unnecessarily here. Worse, the test outputs * might be inconsistent across executions due to the warning. */ - if (!waitNotifications) + if (IsMetadataSynced()) { + CloseConnection(connection); PG_RETURN_VOID(); } - MultiConnection *connection = GetNodeConnection(FORCE_NEW_CONNECTION, - LOCAL_HOST_NAME, PostPortNumber); - ExecuteCriticalRemoteCommand(connection, "LISTEN " METADATA_SYNC_CHANNEL); - int waitFlags = WL_SOCKET_READABLE | WL_TIMEOUT | WL_POSTMASTER_DEATH; int waitResult = WaitLatchOrSocket(NULL, waitFlags, PQsocket(connection->pgConn), timeout, 0); @@ -139,7 +149,7 @@ wait_until_metadata_sync(PG_FUNCTION_ARGS) { ClearResults(connection, true); } - else if (waitResult & WL_TIMEOUT) + else if (waitResult & WL_TIMEOUT && !IsMetadataSynced()) { elog(WARNING, "waiting for metadata sync timed out"); } diff --git a/src/test/regress/expected/multi_cluster_management.out b/src/test/regress/expected/multi_cluster_management.out index b92d8d136..3eb549ab5 100644 --- a/src/test/regress/expected/multi_cluster_management.out +++ b/src/test/regress/expected/multi_cluster_management.out @@ -90,7 +90,7 @@ SELECT citus_disable_node('localhost', :worker_2_port); (1 row) -SELECT public.wait_until_metadata_sync(60000); +SELECT public.wait_until_metadata_sync(20000); wait_until_metadata_sync --------------------------------------------------------------------- @@ -812,7 +812,7 @@ SELECT citus_disable_node('localhost', 9999); (1 row) -SELECT public.wait_until_metadata_sync(60000); +SELECT public.wait_until_metadata_sync(20000); wait_until_metadata_sync --------------------------------------------------------------------- diff --git a/src/test/regress/sql/multi_cluster_management.sql b/src/test/regress/sql/multi_cluster_management.sql index ab268939f..86fbd15b6 100644 --- a/src/test/regress/sql/multi_cluster_management.sql +++ b/src/test/regress/sql/multi_cluster_management.sql @@ -39,7 +39,7 @@ SELECT master_get_active_worker_nodes(); SELECT 1 FROM master_add_node('localhost', :worker_2_port); SELECT citus_disable_node('localhost', :worker_2_port); -SELECT public.wait_until_metadata_sync(60000); +SELECT public.wait_until_metadata_sync(20000); SELECT master_get_active_worker_nodes(); -- add some shard placements to the cluster @@ -328,7 +328,7 @@ SELECT 1 FROM master_add_inactive_node('localhost', 9996, groupid => :worker_2_g SELECT master_add_inactive_node('localhost', 9999, groupid => :worker_2_group, nodecluster => 'olap', noderole => 'secondary'); SELECT master_activate_node('localhost', 9999); SELECT citus_disable_node('localhost', 9999); -SELECT public.wait_until_metadata_sync(60000); +SELECT public.wait_until_metadata_sync(20000); SELECT master_remove_node('localhost', 9999); -- check that you can't manually add two primaries to a group