mirror of https://github.com/citusdata/citus.git
Prevent maintanince deamon crashes due to dead processes
If after the distributed deadlock detection decides to cancel a backend, the backend has been terminated/killed/cancelled externally, we might be accessing to a NULL pointer. This commit prevents that case by ignoring the current distributed deadlock.pull/1612/head
parent
46f81d5531
commit
c7bb29b69e
|
@ -50,7 +50,7 @@ static void BuildDeadlockPathList(QueuedTransactionNode *cycledTransactionNode,
|
||||||
TransactionNode **transactionNodeStack,
|
TransactionNode **transactionNodeStack,
|
||||||
List **deadlockPath);
|
List **deadlockPath);
|
||||||
static void ResetVisitedFields(HTAB *adjacencyList);
|
static void ResetVisitedFields(HTAB *adjacencyList);
|
||||||
static void AssociateDistributedTransactionWithBackendProc(TransactionNode *
|
static bool AssociateDistributedTransactionWithBackendProc(TransactionNode *
|
||||||
transactionNode);
|
transactionNode);
|
||||||
static TransactionNode * GetOrCreateTransactionNode(HTAB *adjacencyList,
|
static TransactionNode * GetOrCreateTransactionNode(HTAB *adjacencyList,
|
||||||
DistributedTransactionId *
|
DistributedTransactionId *
|
||||||
|
@ -149,7 +149,7 @@ CheckForDistributedDeadlocks(void)
|
||||||
&deadlockPath);
|
&deadlockPath);
|
||||||
if (deadlockFound)
|
if (deadlockFound)
|
||||||
{
|
{
|
||||||
TransactionNode *youngestTransaction = transactionNode;
|
TransactionNode *youngestAliveTransaction = NULL;
|
||||||
ListCell *participantTransactionCell = NULL;
|
ListCell *participantTransactionCell = NULL;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -175,34 +175,45 @@ CheckForDistributedDeadlocks(void)
|
||||||
{
|
{
|
||||||
TransactionNode *currentNode =
|
TransactionNode *currentNode =
|
||||||
(TransactionNode *) lfirst(participantTransactionCell);
|
(TransactionNode *) lfirst(participantTransactionCell);
|
||||||
|
bool transactionAssociatedWithProc =
|
||||||
TimestampTz youngestTimestamp =
|
|
||||||
youngestTransaction->transactionId.timestamp;
|
|
||||||
TimestampTz currentTimestamp = currentNode->transactionId.timestamp;
|
|
||||||
|
|
||||||
AssociateDistributedTransactionWithBackendProc(currentNode);
|
AssociateDistributedTransactionWithBackendProc(currentNode);
|
||||||
|
TimestampTz youngestTimestamp = 0;
|
||||||
|
TimestampTz currentTimestamp = 0;
|
||||||
|
|
||||||
LogTransactionNode(currentNode);
|
LogTransactionNode(currentNode);
|
||||||
|
|
||||||
if (currentNode->transactionId.initiatorNodeIdentifier ==
|
/* we couldn't find the backend process originated the transaction */
|
||||||
GetLocalGroupId() &&
|
if (!transactionAssociatedWithProc)
|
||||||
timestamptz_cmp_internal(currentTimestamp, youngestTimestamp) == 1)
|
|
||||||
{
|
{
|
||||||
youngestTransaction = currentNode;
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (youngestAliveTransaction == NULL)
|
||||||
|
{
|
||||||
|
youngestAliveTransaction = currentNode;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
youngestTimestamp = youngestAliveTransaction->transactionId.timestamp;
|
||||||
|
currentTimestamp = currentNode->transactionId.timestamp;
|
||||||
|
if (timestamptz_cmp_internal(currentTimestamp, youngestTimestamp) == 1)
|
||||||
|
{
|
||||||
|
youngestAliveTransaction = currentNode;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* we should find the backend */
|
/* we found the deadlock and its associated proc exists */
|
||||||
Assert(youngestTransaction->initiatorProc != NULL);
|
if (youngestAliveTransaction)
|
||||||
|
{
|
||||||
CancelTransactionDueToDeadlock(youngestTransaction->initiatorProc);
|
CancelTransactionDueToDeadlock(youngestAliveTransaction->initiatorProc);
|
||||||
LogCancellingBackend(youngestTransaction);
|
LogCancellingBackend(youngestAliveTransaction);
|
||||||
|
|
||||||
hash_seq_term(&status);
|
hash_seq_term(&status);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -349,8 +360,11 @@ ResetVisitedFields(HTAB *adjacencyList)
|
||||||
*
|
*
|
||||||
* The function goes over all the backends, checks for the backend with
|
* The function goes over all the backends, checks for the backend with
|
||||||
* the same transaction number as the given transaction node.
|
* the same transaction number as the given transaction node.
|
||||||
|
*
|
||||||
|
* If the transaction cannot be associated with a backend process, the function
|
||||||
|
* returns false. Otherwise, the function returns true.
|
||||||
*/
|
*/
|
||||||
static void
|
static bool
|
||||||
AssociateDistributedTransactionWithBackendProc(TransactionNode *transactionNode)
|
AssociateDistributedTransactionWithBackendProc(TransactionNode *transactionNode)
|
||||||
{
|
{
|
||||||
int backendIndex = 0;
|
int backendIndex = 0;
|
||||||
|
@ -394,8 +408,10 @@ AssociateDistributedTransactionWithBackendProc(TransactionNode *transactionNode)
|
||||||
|
|
||||||
transactionNode->initiatorProc = currentProc;
|
transactionNode->initiatorProc = currentProc;
|
||||||
|
|
||||||
break;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue