diff --git a/src/backend/distributed/shared_library_init.c b/src/backend/distributed/shared_library_init.c index e1ccc6cbb..bb56b8670 100644 --- a/src/backend/distributed/shared_library_init.c +++ b/src/backend/distributed/shared_library_init.c @@ -23,6 +23,7 @@ #include "distributed/citus_nodefuncs.h" #include "distributed/connection_management.h" #include "distributed/connection_management.h" +#include "distributed/distributed_deadlock_detection.h" #include "distributed/maintenanced.h" #include "distributed/master_metadata_utility.h" #include "distributed/master_protocol.h" @@ -361,6 +362,17 @@ RegisterCitusConfigVariables(void) 0, NULL, NULL, NULL); + DefineCustomBoolVariable( + "citus.log_distributed_deadlock_detection", + gettext_noop("Log distributed deadlock detection related processing in " + "the server log"), + NULL, + &LogDistributedDeadlockDetection, + false, + PGC_SIGHUP, + GUC_NO_SHOW_ALL, + NULL, NULL, NULL); + DefineCustomBoolVariable( "citus.explain_distributed_queries", gettext_noop("Enables Explain for distributed queries."), diff --git a/src/backend/distributed/test/distributed_deadlock_detection.c b/src/backend/distributed/test/distributed_deadlock_detection.c index dc3373dfc..6f10f55fc 100644 --- a/src/backend/distributed/test/distributed_deadlock_detection.c +++ b/src/backend/distributed/test/distributed_deadlock_detection.c @@ -26,9 +26,6 @@ #include "utils/timestamp.h" -static char * WaitsForToString(List *waitsFor); - - PG_FUNCTION_INFO_V1(get_adjacency_list_wait_graph); @@ -114,31 +111,3 @@ get_adjacency_list_wait_graph(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } - - -/* - * WaitsForToString is only intended for testing and debugging. It gets a - * waitsForList and returns the list of transaction nodes' transactionNumber - * in a string. - */ -static char * -WaitsForToString(List *waitsFor) -{ - StringInfo transactionIdStr = makeStringInfo(); - ListCell *waitsForCell = NULL; - - foreach(waitsForCell, waitsFor) - { - TransactionNode *waitingNode = (TransactionNode *) lfirst(waitsForCell); - - if (transactionIdStr->len != 0) - { - appendStringInfoString(transactionIdStr, ","); - } - - appendStringInfo(transactionIdStr, "%ld", - waitingNode->transactionId.transactionNumber); - } - - return transactionIdStr->data; -} diff --git a/src/backend/distributed/transaction/distributed_deadlock_detection.c b/src/backend/distributed/transaction/distributed_deadlock_detection.c index 2d61fd1b6..b231da2e0 100644 --- a/src/backend/distributed/transaction/distributed_deadlock_detection.c +++ b/src/backend/distributed/transaction/distributed_deadlock_detection.c @@ -10,7 +10,9 @@ */ #include "postgres.h" + #include "miscadmin.h" +#include "pgstat.h" #include "access/hash.h" #include "distributed/backend_data.h" @@ -55,7 +57,9 @@ static TransactionNode * GetOrCreateTransactionNode(HTAB *adjacencyList, transactionId); static uint32 DistributedTransactionIdHash(const void *key, Size keysize); static int DistributedTransactionIdCompare(const void *a, const void *b, Size keysize); - +static void LogCancellingBackend(TransactionNode *transactionNode); +static void LogTransactionNode(TransactionNode *transactionNode); +static void LogDistributedDeadlockDebugMessage(const char *errorMessage); PG_FUNCTION_INFO_V1(check_distributed_deadlocks); @@ -134,6 +138,9 @@ CheckForDistributedDeadlocks(void) /* there should be at least two transactions to get into a deadlock */ Assert(list_length(deadlockPath) > 1); + LogDistributedDeadlockDebugMessage("Distributed deadlock found among the " + "following distributed transactions:"); + /* * We search for the youngest participant for two reasons * (i) predictable results (ii) cancel the youngest transaction @@ -154,6 +161,8 @@ CheckForDistributedDeadlocks(void) AssociateDistributedTransactionWithBackendProc(currentNode); + LogTransactionNode(currentNode); + if (currentNode->transactionId.initiatorNodeIdentifier == GetLocalGroupId() && timestamptz_cmp_internal(currentTimestamp, youngestTimestamp) == 1) @@ -166,6 +175,7 @@ CheckForDistributedDeadlocks(void) Assert(youngestTransaction->initiatorProc != NULL); CancelTransactionDueToDeadlock(youngestTransaction->initiatorProc); + LogCancellingBackend(youngestTransaction); hash_seq_term(&status); @@ -522,3 +532,116 @@ DistributedTransactionIdCompare(const void *a, const void *b, Size keysize) return 0; } } + + +/* + * LogCancellingBackend should only be called when a distributed transaction's + * backend is cancelled due to distributed deadlocks. It sends which transaction + * is cancelled and its corresponding pid to the log. + */ +static void +LogCancellingBackend(TransactionNode *transactionNode) +{ + StringInfo logMessage = NULL; + + if (!LogDistributedDeadlockDetection) + { + return; + } + + logMessage = makeStringInfo(); + + appendStringInfo(logMessage, "Cancelling the following backend " + "to resolve distributed deadlock " + "(transaction numner = %ld, pid = %d)", + transactionNode->transactionId.transactionNumber, + transactionNode->initiatorProc->pid); + + LogDistributedDeadlockDebugMessage(logMessage->data); +} + + +/* + * LogTransactionNode converts the transaction node to a human readable form + * and sends to the logs via LogDistributedDeadlockDebugMessage(). + */ +static void +LogTransactionNode(TransactionNode *transactionNode) +{ + StringInfo logMessage = NULL; + DistributedTransactionId *transactionId = NULL; + + if (!LogDistributedDeadlockDetection) + { + return; + } + + logMessage = makeStringInfo(); + transactionId = &(transactionNode->transactionId); + + appendStringInfo(logMessage, "[DistributedTransactionId: (%d, %ld, %s)] = ", + transactionId->initiatorNodeIdentifier, + transactionId->transactionNumber, + timestamptz_to_str(transactionId->timestamp)); + + appendStringInfo(logMessage, "[WaitsFor transaction numbers: %s]", + WaitsForToString(transactionNode->waitsFor)); + + /* log the backend query if the proc is associated with the transaction */ + if (transactionNode->initiatorProc != NULL) + { + const char *backendQuery = + pgstat_get_backend_current_activity(transactionNode->initiatorProc->pid, + false); + + appendStringInfo(logMessage, "[Backend Query: %s]", backendQuery); + } + + LogDistributedDeadlockDebugMessage(logMessage->data); +} + + +/* + * LogDistributedDeadlockDebugMessage checks EnableDistributedDeadlockDebugging flag. If + * it is true, the input message is sent to the logs with LOG level. Also, current timestamp + * is prepanded to the message. + */ +static void +LogDistributedDeadlockDebugMessage(const char *errorMessage) +{ + if (!LogDistributedDeadlockDetection) + { + return; + } + + ereport(LOG, (errmsg("[%s] %s", timestamptz_to_str(GetCurrentTimestamp()), + errorMessage))); +} + + +/* + * WaitsForToString is only intended for testing and debugging. It gets a + * waitsForList and returns the list of transaction nodes' transactionNumber + * in a string. + */ +char * +WaitsForToString(List *waitsFor) +{ + StringInfo transactionIdStr = makeStringInfo(); + ListCell *waitsForCell = NULL; + + foreach(waitsForCell, waitsFor) + { + TransactionNode *waitingNode = (TransactionNode *) lfirst(waitsForCell); + + if (transactionIdStr->len != 0) + { + appendStringInfoString(transactionIdStr, ","); + } + + appendStringInfo(transactionIdStr, "%ld", + waitingNode->transactionId.transactionNumber); + } + + return transactionIdStr->data; +}