From 74dd5bb2810c809a9e457e5d712bfb799f126c09 Mon Sep 17 00:00:00 2001 From: Brian Cloutier Date: Thu, 20 Jul 2017 18:55:40 +0300 Subject: [PATCH] Fix crash when removing an inactive node --- .../distributed/utils/reference_table_utils.c | 20 ++++++----- .../multi_remove_node_reference_table.out | 33 +++++++++++++++---- .../sql/multi_remove_node_reference_table.sql | 7 ++++ 3 files changed, 46 insertions(+), 14 deletions(-) diff --git a/src/backend/distributed/utils/reference_table_utils.c b/src/backend/distributed/utils/reference_table_utils.c index 86d39118a..c87b7f31d 100644 --- a/src/backend/distributed/utils/reference_table_utils.c +++ b/src/backend/distributed/utils/reference_table_utils.c @@ -406,25 +406,29 @@ DeleteAllReferenceTablePlacementsFromNode(char *workerName, uint32 workerPort) referenceTableList = SortList(referenceTableList, CompareOids); foreach(referenceTableCell, referenceTableList) { + GroupShardPlacement *placement = NULL; + StringInfo deletePlacementCommand = makeStringInfo(); + uint32 workerGroup = GroupForNode(workerName, workerPort); Oid referenceTableId = lfirst_oid(referenceTableCell); List *placements = GroupShardPlacementsForTableOnGroup(referenceTableId, workerGroup); - GroupShardPlacement *placement = (GroupShardPlacement *) linitial(placements); + if (list_length(placements) == 0) + { + /* this happens if the node was previously disabled */ + continue; + } - uint64 shardId = placement->shardId; - uint64 placementId = placement->placementId; + placement = (GroupShardPlacement *) linitial(placements); - StringInfo deletePlacementCommand = makeStringInfo(); + LockShardDistributionMetadata(placement->shardId, ExclusiveLock); - LockShardDistributionMetadata(shardId, ExclusiveLock); - - DeleteShardPlacementRow(placementId); + DeleteShardPlacementRow(placement->placementId); appendStringInfo(deletePlacementCommand, "DELETE FROM pg_dist_placement WHERE placementid=%lu", - placementId); + placement->placementId); SendCommandToWorkers(WORKERS_WITH_METADATA, deletePlacementCommand->data); } } diff --git a/src/test/regress/expected/multi_remove_node_reference_table.out b/src/test/regress/expected/multi_remove_node_reference_table.out index 9896f7056..f2a1e7740 100644 --- a/src/test/regress/expected/multi_remove_node_reference_table.out +++ b/src/test/regress/expected/multi_remove_node_reference_table.out @@ -169,6 +169,27 @@ NOTICE: Replicating reference table "remove_node_reference_table" to the node l (1380001,1380001,localhost,57638,default,f,t) (1 row) +-- try to disable the node before removing it (this used to crash) +SELECT master_disable_node('localhost', :worker_2_port); + master_disable_node +--------------------- + +(1 row) + +SELECT master_remove_node('localhost', :worker_2_port); + master_remove_node +-------------------- + +(1 row) + +-- re-add the node for the next test +SELECT master_add_node('localhost', :worker_2_port); +NOTICE: Replicating reference table "remove_node_reference_table" to the node localhost:57638 + master_add_node +----------------------------------------------- + (1380002,1380002,localhost,57638,default,f,t) +(1 row) + -- remove node in a transaction and ROLLBACK -- status before master_remove_node SELECT COUNT(*) FROM pg_dist_node WHERE nodeport = :worker_2_port; @@ -389,7 +410,7 @@ SELECT master_add_node('localhost', :worker_2_port); NOTICE: Replicating reference table "remove_node_reference_table" to the node localhost:57638 master_add_node ----------------------------------------------- - (1380002,1380002,localhost,57638,default,f,t) + (1380003,1380003,localhost,57638,default,f,t) (1 row) -- test inserting a value then removing a node in a transaction @@ -518,7 +539,7 @@ SELECT master_add_node('localhost', :worker_2_port); NOTICE: Replicating reference table "remove_node_reference_table" to the node localhost:57638 master_add_node ----------------------------------------------- - (1380003,1380003,localhost,57638,default,f,t) + (1380004,1380004,localhost,57638,default,f,t) (1 row) -- test executing DDL command then removing a node in a transaction @@ -643,7 +664,7 @@ SELECT master_add_node('localhost', :worker_2_port); NOTICE: Replicating reference table "remove_node_reference_table" to the node localhost:57638 master_add_node ----------------------------------------------- - (1380004,1380004,localhost,57638,default,f,t) + (1380005,1380005,localhost,57638,default,f,t) (1 row) -- test DROP table after removing a node in a transaction @@ -711,7 +732,7 @@ SELECT * FROM pg_dist_colocation WHERE colocationid = 1380000; SELECT master_add_node('localhost', :worker_2_port); master_add_node ----------------------------------------------- - (1380005,1380005,localhost,57638,default,f,t) + (1380006,1380006,localhost,57638,default,f,t) (1 row) -- re-create remove_node_reference_table @@ -846,7 +867,7 @@ NOTICE: Replicating reference table "remove_node_reference_table" to the node l NOTICE: Replicating reference table "table1" to the node localhost:57638 master_add_node ----------------------------------------------- - (1380006,1380006,localhost,57638,default,f,t) + (1380007,1380007,localhost,57638,default,f,t) (1 row) -- test with master_disable_node @@ -963,7 +984,7 @@ NOTICE: Replicating reference table "remove_node_reference_table" to the node l NOTICE: Replicating reference table "table1" to the node localhost:57638 master_activate_node ----------------------------------------------- - (1380006,1380006,localhost,57638,default,f,t) + (1380007,1380007,localhost,57638,default,f,t) (1 row) -- DROP tables to clean workspace diff --git a/src/test/regress/sql/multi_remove_node_reference_table.sql b/src/test/regress/sql/multi_remove_node_reference_table.sql index 60f3584da..3f0eb9751 100644 --- a/src/test/regress/sql/multi_remove_node_reference_table.sql +++ b/src/test/regress/sql/multi_remove_node_reference_table.sql @@ -105,6 +105,13 @@ SELECT master_remove_node('localhost', :worker_2_port); -- re-add the node for next tests SELECT master_add_node('localhost', :worker_2_port); +-- try to disable the node before removing it (this used to crash) +SELECT master_disable_node('localhost', :worker_2_port); +SELECT master_remove_node('localhost', :worker_2_port); + +-- re-add the node for the next test +SELECT master_add_node('localhost', :worker_2_port); + -- remove node in a transaction and ROLLBACK -- status before master_remove_node