mirror of https://github.com/citusdata/citus.git
Merge pull request #1595 from citusdata/improve_deadlock_detection
Improve deadlock detectionpull/1598/head
commit
96391bea15
|
@ -101,12 +101,29 @@ check_distributed_deadlocks(PG_FUNCTION_ARGS)
|
||||||
bool
|
bool
|
||||||
CheckForDistributedDeadlocks(void)
|
CheckForDistributedDeadlocks(void)
|
||||||
{
|
{
|
||||||
WaitGraph *waitGraph = BuildGlobalWaitGraph();
|
WaitGraph *waitGraph = NULL;
|
||||||
HTAB *adjacencyLists = BuildAdjacencyListsForWaitGraph(waitGraph);
|
HTAB *adjacencyLists = NULL;
|
||||||
HASH_SEQ_STATUS status;
|
HASH_SEQ_STATUS status;
|
||||||
TransactionNode *transactionNode = NULL;
|
TransactionNode *transactionNode = NULL;
|
||||||
int edgeCount = waitGraph->edgeCount;
|
int edgeCount = 0;
|
||||||
int localGroupId = GetLocalGroupId();
|
int localGroupId = GetLocalGroupId();
|
||||||
|
List *workerNodeList = ActiveReadableNodeList();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We don't need to do any distributed deadlock checking if there
|
||||||
|
* are no worker nodes. This might even be problematic for a non-mx
|
||||||
|
* worker node which has the same group id with its master (i.e., 0),
|
||||||
|
* which may erroneously decide to kill the deadlocks happening on it.
|
||||||
|
*/
|
||||||
|
if (list_length(workerNodeList) == 0)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
waitGraph = BuildGlobalWaitGraph();
|
||||||
|
adjacencyLists = BuildAdjacencyListsForWaitGraph(waitGraph);
|
||||||
|
|
||||||
|
edgeCount = waitGraph->edgeCount;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We iterate on transaction nodes and search for deadlocks where the
|
* We iterate on transaction nodes and search for deadlocks where the
|
||||||
|
@ -135,8 +152,12 @@ CheckForDistributedDeadlocks(void)
|
||||||
TransactionNode *youngestTransaction = transactionNode;
|
TransactionNode *youngestTransaction = transactionNode;
|
||||||
ListCell *participantTransactionCell = NULL;
|
ListCell *participantTransactionCell = NULL;
|
||||||
|
|
||||||
/* there should be at least two transactions to get into a deadlock */
|
/*
|
||||||
Assert(list_length(deadlockPath) > 1);
|
* There should generally be at least two transactions to get into a
|
||||||
|
* deadlock. However, in case Citus gets into a self-deadlock, we may
|
||||||
|
* find a deadlock with a single transaction.
|
||||||
|
*/
|
||||||
|
Assert(list_length(deadlockPath) >= 1);
|
||||||
|
|
||||||
LogDistributedDeadlockDebugMessage("Distributed deadlock found among the "
|
LogDistributedDeadlockDebugMessage("Distributed deadlock found among the "
|
||||||
"following distributed transactions:");
|
"following distributed transactions:");
|
||||||
|
|
Loading…
Reference in New Issue