From f25de6a0e3e3bda8186506910205ee77e88c5f24 Mon Sep 17 00:00:00 2001 From: Marco Slot Date: Mon, 1 Mar 2021 15:45:16 +0100 Subject: [PATCH] Try to return earlier in idempotent master_add_node --- .../distributed/metadata/node_metadata.c | 24 ++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/backend/distributed/metadata/node_metadata.c b/src/backend/distributed/metadata/node_metadata.c index c1f522ed1..3853cbe16 100644 --- a/src/backend/distributed/metadata/node_metadata.c +++ b/src/backend/distributed/metadata/node_metadata.c @@ -1384,16 +1384,34 @@ AddNodeMetadata(char *nodeName, int32 nodePort, *nodeAlreadyExists = false; /* - * Take an exclusive lock on pg_dist_node to serialize node changes. + * Prevent / wait for concurrent modification before checking whether + * the worker already exists in pg_dist_node. + */ + LockRelationOid(DistNodeRelationId(), RowShareLock); + + WorkerNode *workerNode = FindWorkerNodeAnyCluster(nodeName, nodePort); + if (workerNode != NULL) + { + /* return early without holding locks when the node already exists */ + *nodeAlreadyExists = true; + + return workerNode->nodeId; + } + + /* + * We are going to change pg_dist_node, prevent any concurrent reads that + * are not tolerant to concurrent node addition by taking an exclusive + * lock (conflicts with all but AccessShareLock). + * * We may want to relax or have more fine-grained locking in the future * to allow users to add multiple nodes concurrently. */ LockRelationOid(DistNodeRelationId(), ExclusiveLock); - WorkerNode *workerNode = FindWorkerNodeAnyCluster(nodeName, nodePort); + /* recheck in case 2 node additions pass the first check concurrently */ + workerNode = FindWorkerNodeAnyCluster(nodeName, nodePort); if (workerNode != NULL) { - /* fill return data and return */ *nodeAlreadyExists = true; return workerNode->nodeId;