From 550a5578d8655404ee425a8cc6ad08b3cbe9ca56 Mon Sep 17 00:00:00 2001 From: Onder Kalaci Date: Thu, 17 Aug 2017 19:33:39 +0300 Subject: [PATCH] Skip deadlock detection on the workers Do not run distributed deadlock detection on the worker nodes to prevent errornous decisions to kill the deadlocks. --- .../distributed_deadlock_detection.c | 23 ++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/src/backend/distributed/transaction/distributed_deadlock_detection.c b/src/backend/distributed/transaction/distributed_deadlock_detection.c index 176a2c60b..6d8b108ed 100644 --- a/src/backend/distributed/transaction/distributed_deadlock_detection.c +++ b/src/backend/distributed/transaction/distributed_deadlock_detection.c @@ -101,12 +101,29 @@ check_distributed_deadlocks(PG_FUNCTION_ARGS) bool CheckForDistributedDeadlocks(void) { - WaitGraph *waitGraph = BuildGlobalWaitGraph(); - HTAB *adjacencyLists = BuildAdjacencyListsForWaitGraph(waitGraph); + WaitGraph *waitGraph = NULL; + HTAB *adjacencyLists = NULL; HASH_SEQ_STATUS status; TransactionNode *transactionNode = NULL; - int edgeCount = waitGraph->edgeCount; + int edgeCount = 0; int localGroupId = GetLocalGroupId(); + List *workerNodeList = ActiveReadableNodeList(); + + /* + * We don't need to do any distributed deadlock checking if there + * are no worker nodes. This might even be problematic for a non-mx + * worker node which has the same group id with its master (i.e., 0), + * which may erroneously decide to kill the deadlocks happening on it. + */ + if (list_length(workerNodeList) == 0) + { + return false; + } + + waitGraph = BuildGlobalWaitGraph(); + adjacencyLists = BuildAdjacencyListsForWaitGraph(waitGraph); + + edgeCount = waitGraph->edgeCount; /* * We iterate on transaction nodes and search for deadlocks where the