Very-WIP: Better error messages.

pull/1447/head
Andres Freund 2017-07-12 11:23:03 -07:00
parent d6ac345c23
commit edb8f4e2c5
6 changed files with 34 additions and 3 deletions

View File

@ -22,6 +22,7 @@
#include "distributed/citus_nodefuncs.h"
#include "distributed/connection_management.h"
#include "distributed/connection_management.h"
#include "distributed/deadlock.h"
#include "distributed/maintenanced.h"
#include "distributed/master_metadata_utility.h"
#include "distributed/master_protocol.h"
@ -166,6 +167,8 @@ _PG_init(void)
set_rel_pathlist_hook = multi_relation_restriction_hook;
set_join_pathlist_hook = multi_join_restriction_hook;
emit_log_hook = DeadlockLogHook;
InitializeMaintenanceDaemon();
/* organize that task tracker is started once server is up */

View File

@ -43,6 +43,7 @@
#include "access/hash.h"
#include "distributed/connection_management.h"
#include "distributed/deadlock.h"
#include "distributed/hash_helpers.h"
#include "distributed/metadata_cache.h"
#include "distributed/remote_commands.h"
@ -56,6 +57,7 @@ typedef struct LockDepNode
TmgmtTransactionId transactionId;
List *deps;
int initial_pid;
bool *deadlocked;
bool visited;
} LockDepNode;
@ -188,6 +190,7 @@ this_machine_kills_deadlocks(PG_FUNCTION_ARGS)
initialNode = LookupDepNode(lockDepNodeHash, &CurBackendData->transactionId);
initialNode->initial_pid = curProc->pid;
initialNode->deadlocked = &CurBackendData->deadlockKilled;
}
LWLockRelease(&TmgmtShmemControl->lock);
@ -265,8 +268,7 @@ this_machine_kills_deadlocks(PG_FUNCTION_ARGS)
if (visitNode == curNode)
{
elog(WARNING, "found deadlock, killing: %d", curNode->initial_pid);
kill(curNode->initial_pid, SIGINT);
pg_usleep(100000);
*curNode->deadlocked = true;
kill(curNode->initial_pid, SIGINT);
PG_RETURN_BOOL(true);
}
@ -924,3 +926,21 @@ LookupDepNode(HTAB *lockDepNodeHash, TmgmtTransactionId *transactionId)
return node;
}
void
DeadlockLogHook(ErrorData *edata)
{
if (edata->elevel != ERROR ||
edata->sqlerrcode != ERRCODE_QUERY_CANCELED)
{
return;
}
if (MyTmgmtBackendData->deadlockKilled)
{
edata->sqlerrcode = ERRCODE_T_R_DEADLOCK_DETECTED;
edata->message = "deadlock detected";
edata->detail = "Check server log for detail.";
}
}

View File

@ -79,6 +79,7 @@ assign_distributed_transaction_id(PG_FUNCTION_ARGS)
MyTmgmtBackendData->transactionId.nodeId = PG_GETARG_INT64(0);
MyTmgmtBackendData->transactionId.transactionId = PG_GETARG_INT64(1);
MyTmgmtBackendData->transactionId.timestamp = PG_GETARG_TIMESTAMPTZ(2);
MyTmgmtBackendData->deadlockKilled = false;
PG_RETURN_VOID();
}
@ -93,6 +94,7 @@ UnsetDistributedTransactionId(void)
MyTmgmtBackendData->transactionId.nodeId = 0;
MyTmgmtBackendData->transactionId.transactionId = 0;
MyTmgmtBackendData->transactionId.timestamp = 0;
MyTmgmtBackendData->deadlockKilled = false;
}
}

View File

@ -249,7 +249,7 @@ CitusMaintenanceDaemonMain(Datum main_arg)
{
int rc;
int latchFlags = WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH;
int timeout = 10000; /* wake up at least every so often */
int timeout = 1000; /* wake up at least every so often */
CHECK_FOR_INTERRUPTS();

View File

@ -10,6 +10,11 @@
#ifndef DEADLOCK_H
#define DEADLOCK_H
#include "fmgr.h"
#include "utils/elog.h"
extern Datum this_machine_kills_deadlocks(PG_FUNCTION_ARGS);
extern void DeadlockLogHook(ErrorData *edata);
#endif /* DEADLOCK_H */

View File

@ -68,6 +68,7 @@ typedef struct TmgmtBackendData
{
Oid databaseId;
TmgmtTransactionId transactionId;
bool deadlockKilled;
} TmgmtBackendData;