mirror of https://github.com/citusdata/citus.git
Merge pull request #3756 from citusdata/fix-maintenanced-error-restart
maintenanced: use before_shmem_exit to clear workerPidpull/3786/head
commit
2e5b1bfa41
|
@ -108,6 +108,7 @@ static void MaintenanceDaemonSigTermHandler(SIGNAL_ARGS);
|
||||||
static void MaintenanceDaemonSigHupHandler(SIGNAL_ARGS);
|
static void MaintenanceDaemonSigHupHandler(SIGNAL_ARGS);
|
||||||
static size_t MaintenanceDaemonShmemSize(void);
|
static size_t MaintenanceDaemonShmemSize(void);
|
||||||
static void MaintenanceDaemonShmemInit(void);
|
static void MaintenanceDaemonShmemInit(void);
|
||||||
|
static void MaintenanceDaemonShmemExit(int code, Datum arg);
|
||||||
static void MaintenanceDaemonErrorContext(void *arg);
|
static void MaintenanceDaemonErrorContext(void *arg);
|
||||||
static bool LockCitusExtension(void);
|
static bool LockCitusExtension(void);
|
||||||
static bool MetadataSyncTriggeredCheckAndReset(MaintenanceDaemonDBData *dbData);
|
static bool MetadataSyncTriggeredCheckAndReset(MaintenanceDaemonDBData *dbData);
|
||||||
|
@ -258,18 +259,22 @@ CitusMaintenanceDaemonMain(Datum main_arg)
|
||||||
MaintenanceDaemonDBData *myDbData = (MaintenanceDaemonDBData *)
|
MaintenanceDaemonDBData *myDbData = (MaintenanceDaemonDBData *)
|
||||||
hash_search(MaintenanceDaemonDBHash, &databaseOid,
|
hash_search(MaintenanceDaemonDBHash, &databaseOid,
|
||||||
HASH_FIND, NULL);
|
HASH_FIND, NULL);
|
||||||
if (!myDbData || myDbData->workerPid != 0)
|
if (!myDbData)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* When the database crashes, background workers are restarted, but
|
* When the database crashes, background workers are restarted, but
|
||||||
* the state in shared memory is lost. In that case, we exit and
|
* the state in shared memory is lost. In that case, we exit and
|
||||||
* wait for a session to call InitializeMaintenanceDaemonBackend
|
* wait for a session to call InitializeMaintenanceDaemonBackend
|
||||||
* to properly add it to the hash.
|
* to properly add it to the hash.
|
||||||
* Alternatively, don't continue if another worker exists.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
proc_exit(0);
|
proc_exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
before_shmem_exit(MaintenanceDaemonShmemExit, main_arg);
|
||||||
|
|
||||||
|
Assert(myDbData->workerPid == 0);
|
||||||
|
|
||||||
/* from this point, DROP DATABASE will attempt to kill the worker */
|
/* from this point, DROP DATABASE will attempt to kill the worker */
|
||||||
myDbData->workerPid = MyProcPid;
|
myDbData->workerPid = MyProcPid;
|
||||||
|
|
||||||
|
@ -307,7 +312,6 @@ CitusMaintenanceDaemonMain(Datum main_arg)
|
||||||
/* enter main loop */
|
/* enter main loop */
|
||||||
while (!got_SIGTERM)
|
while (!got_SIGTERM)
|
||||||
{
|
{
|
||||||
int rc;
|
|
||||||
int latchFlags = WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH;
|
int latchFlags = WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH;
|
||||||
double timeout = 10000.0; /* use this if the deadlock detection is disabled */
|
double timeout = 10000.0; /* use this if the deadlock detection is disabled */
|
||||||
bool foundDeadlock = false;
|
bool foundDeadlock = false;
|
||||||
|
@ -524,7 +528,7 @@ CitusMaintenanceDaemonMain(Datum main_arg)
|
||||||
* Wait until timeout, or until somebody wakes us up. Also cast the timeout to
|
* Wait until timeout, or until somebody wakes us up. Also cast the timeout to
|
||||||
* integer where we've calculated it using double for not losing the precision.
|
* integer where we've calculated it using double for not losing the precision.
|
||||||
*/
|
*/
|
||||||
rc = WaitLatch(MyLatch, latchFlags, (long) timeout, PG_WAIT_EXTENSION);
|
int rc = WaitLatch(MyLatch, latchFlags, (long) timeout, PG_WAIT_EXTENSION);
|
||||||
|
|
||||||
/* emergency bailout if postmaster has died */
|
/* emergency bailout if postmaster has died */
|
||||||
if (rc & WL_POSTMASTER_DEATH)
|
if (rc & WL_POSTMASTER_DEATH)
|
||||||
|
@ -647,6 +651,29 @@ MaintenanceDaemonShmemInit(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* MaintenaceDaemonShmemExit is the before_shmem_exit handler for cleaning up MaintenanceDaemonDBHash
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
MaintenanceDaemonShmemExit(int code, Datum arg)
|
||||||
|
{
|
||||||
|
Oid databaseOid = DatumGetObjectId(arg);
|
||||||
|
|
||||||
|
LWLockAcquire(&MaintenanceDaemonControl->lock, LW_EXCLUSIVE);
|
||||||
|
|
||||||
|
MaintenanceDaemonDBData *myDbData = (MaintenanceDaemonDBData *)
|
||||||
|
hash_search(MaintenanceDaemonDBHash, &databaseOid,
|
||||||
|
HASH_FIND, NULL);
|
||||||
|
if (myDbData && myDbData->workerPid == MyProcPid)
|
||||||
|
{
|
||||||
|
myDbData->daemonStarted = false;
|
||||||
|
myDbData->workerPid = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
LWLockRelease(&MaintenanceDaemonControl->lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* MaintenanceDaemonSigTermHandler calls proc_exit(0) */
|
/* MaintenanceDaemonSigTermHandler calls proc_exit(0) */
|
||||||
static void
|
static void
|
||||||
MaintenanceDaemonSigTermHandler(SIGNAL_ARGS)
|
MaintenanceDaemonSigTermHandler(SIGNAL_ARGS)
|
||||||
|
|
Loading…
Reference in New Issue