From b4931f7345acc3d55440c5af93ebc3e01661bbd3 Mon Sep 17 00:00:00 2001 From: Onder Kalaci Date: Tue, 16 Nov 2021 17:24:48 +0100 Subject: [PATCH] Do not acquire locks on reference tables when a node is removed/disabled Before this commit, we acquire the metadata locks on the reference tables while removing/disabling a node on all the MX nodes. Although it has some marginal benefits, such as a concurrent modification during remove/disable node blocks, instead of erroring out, the drawbacks seems worse. Both citus_remove_node and citus_disable_node are not tolerant to multiple node failures. With this commit, we relax the locks. The implication is that while a node is removed/disabled, users might see query errors. On the other hand, this change becomes removing/disabling nodes more tolerant to multiple node failures. --- .../distributed/utils/reference_table_utils.c | 13 ------------- .../regress/expected/multi_mx_node_metadata.out | 8 ++++++-- src/test/regress/sql/multi_mx_node_metadata.sql | 3 +-- 3 files changed, 7 insertions(+), 17 deletions(-) diff --git a/src/backend/distributed/utils/reference_table_utils.c b/src/backend/distributed/utils/reference_table_utils.c index 731c05484..ab890bd78 100644 --- a/src/backend/distributed/utils/reference_table_utils.c +++ b/src/backend/distributed/utils/reference_table_utils.c @@ -434,7 +434,6 @@ void DeleteAllReferenceTablePlacementsFromNodeGroup(int32 groupId) { List *referenceTableList = CitusTableTypeIdList(REFERENCE_TABLE); - List *referenceShardIntervalList = NIL; /* if there are no reference tables, we do not need to do anything */ if (list_length(referenceTableList) == 0) @@ -442,18 +441,6 @@ DeleteAllReferenceTablePlacementsFromNodeGroup(int32 groupId) return; } - /* - * We sort the reference table list to prevent deadlocks in concurrent - * DeleteAllReferenceTablePlacementsFromNodeGroup calls. - */ - referenceTableList = SortList(referenceTableList, CompareOids); - if (ClusterHasKnownMetadataWorkers()) - { - referenceShardIntervalList = GetSortedReferenceShardIntervals(referenceTableList); - - BlockWritesToShardList(referenceShardIntervalList); - } - StringInfo deletePlacementCommand = makeStringInfo(); Oid referenceTableId = InvalidOid; foreach_oid(referenceTableId, referenceTableList) diff --git a/src/test/regress/expected/multi_mx_node_metadata.out b/src/test/regress/expected/multi_mx_node_metadata.out index eea9cd5bb..4cc666784 100644 --- a/src/test/regress/expected/multi_mx_node_metadata.out +++ b/src/test/regress/expected/multi_mx_node_metadata.out @@ -682,9 +682,13 @@ SELECT wait_until_metadata_sync(30000); -- set metadatasynced so we try porpagating metadata changes UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid IN (:nodeid_1, :nodeid_2); --- should error out +-- should not error out, master_disable_node is tolerant for node failures SELECT 1 FROM master_disable_node('localhost', 1); -ERROR: Disabling localhost:xxxxx failed + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + -- try again after stopping metadata sync SELECT stop_metadata_sync_to_node('localhost', 1); NOTICE: dropping metadata on the node (localhost,1) diff --git a/src/test/regress/sql/multi_mx_node_metadata.sql b/src/test/regress/sql/multi_mx_node_metadata.sql index c5625a59e..bc701ff38 100644 --- a/src/test/regress/sql/multi_mx_node_metadata.sql +++ b/src/test/regress/sql/multi_mx_node_metadata.sql @@ -301,7 +301,7 @@ SELECT wait_until_metadata_sync(30000); -- set metadatasynced so we try porpagating metadata changes UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid IN (:nodeid_1, :nodeid_2); --- should error out +-- should not error out, master_disable_node is tolerant for node failures SELECT 1 FROM master_disable_node('localhost', 1); -- try again after stopping metadata sync @@ -316,7 +316,6 @@ SELECT wait_until_metadata_sync(30000); SELECT 1 FROM master_activate_node('localhost', :worker_2_port); SELECT verify_metadata('localhost', :worker_1_port); - ------------------------------------------------------------------------------------ -- Test master_disable_node() when the other node is down ------------------------------------------------------------------------------------