Do not acquire locks on reference tables when a node is removed/disabled

Before this commit, we acquire the metadata locks on the reference tables while removing/disabling a node on all the MX nodes. Although it has some marginal benefits, such as a concurrent modification during remove/disable node blocks, instead of erroring out, the drawbacks seems worse. Both citus_remove_node and citus_disable_node are not tolerant to multiple node failures. With this commit, we relax the locks. The implication is that while a node is removed/disabled, users might see query errors. On the other hand, this change becomes removing/disabling nodes more tolerant to multiple node failures.
2021-11-16 17:24:48 +01:00 · 2021-11-16 17:24:48 +01:00 · b4931f7345
parent 76b8006a9e
commit b4931f7345
3 changed files with 7 additions and 17 deletions
--- a/src/backend/distributed/utils/reference_table_utils.c
+++ b/src/backend/distributed/utils/reference_table_utils.c
@ -434,7 +434,6 @@ void
 DeleteAllReferenceTablePlacementsFromNodeGroup(int32 groupId)
 {
 	List *referenceTableList = CitusTableTypeIdList(REFERENCE_TABLE);
-	List *referenceShardIntervalList = NIL;

 	/* if there are no reference tables, we do not need to do anything */
 	if (list_length(referenceTableList) == 0)
@ -442,18 +441,6 @@ DeleteAllReferenceTablePlacementsFromNodeGroup(int32 groupId)
 		return;
 	}

-	/*
-	 * We sort the reference table list to prevent deadlocks in concurrent
-	 * DeleteAllReferenceTablePlacementsFromNodeGroup calls.
-	 */
-	referenceTableList = SortList(referenceTableList, CompareOids);
-	if (ClusterHasKnownMetadataWorkers())
-	{
-		referenceShardIntervalList = GetSortedReferenceShardIntervals(referenceTableList);
-
-		BlockWritesToShardList(referenceShardIntervalList);
-	}
-
 	StringInfo deletePlacementCommand = makeStringInfo();
 	Oid referenceTableId = InvalidOid;
 	foreach_oid(referenceTableId, referenceTableList)
--- a/src/test/regress/expected/multi_mx_node_metadata.out
+++ b/src/test/regress/expected/multi_mx_node_metadata.out
@ -682,9 +682,13 @@ SELECT wait_until_metadata_sync(30000);

 -- set metadatasynced so we try porpagating metadata changes
 UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid IN (:nodeid_1, :nodeid_2);
-- should error out
+-- should not error out, master_disable_node is tolerant for node failures
 SELECT 1 FROM master_disable_node('localhost', 1);
-ERROR:  Disabling localhost:xxxxx failed
+ ?column?
+---------------------------------------------------------------------
+        1
+(1 row)
+
 -- try again after stopping metadata sync
 SELECT stop_metadata_sync_to_node('localhost', 1);
 NOTICE:  dropping metadata on the node (localhost,1)
--- a/src/test/regress/sql/multi_mx_node_metadata.sql
+++ b/src/test/regress/sql/multi_mx_node_metadata.sql
@ -301,7 +301,7 @@ SELECT wait_until_metadata_sync(30000);
 -- set metadatasynced so we try porpagating metadata changes
 UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid IN (:nodeid_1, :nodeid_2);

-- should error out
+-- should not error out, master_disable_node is tolerant for node failures
 SELECT 1 FROM master_disable_node('localhost', 1);

 -- try again after stopping metadata sync
@ -316,7 +316,6 @@ SELECT wait_until_metadata_sync(30000);
 SELECT 1 FROM master_activate_node('localhost', :worker_2_port);
 SELECT verify_metadata('localhost', :worker_1_port);

-
 ------------------------------------------------------------------------------------
 -- Test master_disable_node() when the other node is down
 ------------------------------------------------------------------------------------