mirror of https://github.com/citusdata/citus.git
Fix a bug that could lead to multiple maintenance daemons
parent
18219843d0
commit
881e5df780
|
@ -109,6 +109,9 @@ static HTAB *MaintenanceDaemonDBHash;
|
|||
static volatile sig_atomic_t got_SIGHUP = false;
|
||||
static volatile sig_atomic_t got_SIGTERM = false;
|
||||
|
||||
/* set to true when becoming a maintenance daemon */
|
||||
static bool IsMaintenanceDaemon = false;
|
||||
|
||||
static void MaintenanceDaemonSigTermHandler(SIGNAL_ARGS);
|
||||
static void MaintenanceDaemonSigHupHandler(SIGNAL_ARGS);
|
||||
static size_t MaintenanceDaemonShmemSize(void);
|
||||
|
@ -165,15 +168,31 @@ InitializeMaintenanceDaemonBackend(void)
|
|||
return;
|
||||
}
|
||||
|
||||
/* maintenance daemon can ignore itself */
|
||||
if (dbData->workerPid == MyProcPid)
|
||||
if (!found)
|
||||
{
|
||||
/* ensure the values in MaintenanceDaemonDBData are zero */
|
||||
memset(((char *) dbData) + sizeof(Oid), 0,
|
||||
sizeof(MaintenanceDaemonDBData) - sizeof(Oid));
|
||||
}
|
||||
|
||||
if (IsMaintenanceDaemon)
|
||||
{
|
||||
/*
|
||||
* InitializeMaintenanceDaemonBackend is called by the maintenance daemon
|
||||
* itself. In that case, we clearly don't need to start another maintenance
|
||||
* daemon.
|
||||
*/
|
||||
Assert(found);
|
||||
Assert(dbData->workerPid == MyProcPid);
|
||||
|
||||
LWLockRelease(&MaintenanceDaemonControl->lock);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!found || !dbData->daemonStarted)
|
||||
{
|
||||
Assert(dbData->workerPid == 0);
|
||||
|
||||
BackgroundWorker worker;
|
||||
BackgroundWorkerHandle *handle = NULL;
|
||||
|
||||
|
@ -292,13 +311,33 @@ CitusMaintenanceDaemonMain(Datum main_arg)
|
|||
proc_exit(0);
|
||||
}
|
||||
|
||||
if (myDbData->workerPid != 0)
|
||||
{
|
||||
/*
|
||||
* Another maintenance daemon is running. This usually happens because
|
||||
* postgres restarts the daemon after an non-zero exit, and
|
||||
* InitializeMaintenanceDaemonBackend started one before postgres did.
|
||||
* In that case, the first one stays and the last one exits.
|
||||
*/
|
||||
|
||||
proc_exit(0);
|
||||
}
|
||||
|
||||
before_shmem_exit(MaintenanceDaemonShmemExit, main_arg);
|
||||
|
||||
Assert(myDbData->workerPid == 0);
|
||||
|
||||
/* from this point, DROP DATABASE will attempt to kill the worker */
|
||||
/*
|
||||
* Signal that I am the maintenance daemon now.
|
||||
*
|
||||
* From this point, DROP DATABASE/EXTENSION will send a SIGTERM to me.
|
||||
*/
|
||||
myDbData->workerPid = MyProcPid;
|
||||
|
||||
/*
|
||||
* Signal that we are running. This in mainly needed in case of restart after
|
||||
* an error, otherwise the daemonStarted flag is already true.
|
||||
*/
|
||||
myDbData->daemonStarted = true;
|
||||
|
||||
/* wire up signals */
|
||||
pqsignal(SIGTERM, MaintenanceDaemonSigTermHandler);
|
||||
pqsignal(SIGHUP, MaintenanceDaemonSigHupHandler);
|
||||
|
@ -306,6 +345,8 @@ CitusMaintenanceDaemonMain(Datum main_arg)
|
|||
|
||||
myDbData->latch = MyLatch;
|
||||
|
||||
IsMaintenanceDaemon = true;
|
||||
|
||||
LWLockRelease(&MaintenanceDaemonControl->lock);
|
||||
|
||||
/*
|
||||
|
@ -339,8 +380,6 @@ CitusMaintenanceDaemonMain(Datum main_arg)
|
|||
|
||||
CHECK_FOR_INTERRUPTS();
|
||||
|
||||
Assert(myDbData->workerPid == MyProcPid);
|
||||
|
||||
CitusTableCacheFlushInvalidatedEntries();
|
||||
|
||||
/*
|
||||
|
@ -567,15 +606,6 @@ CitusMaintenanceDaemonMain(Datum main_arg)
|
|||
/* check for changed configuration */
|
||||
if (myDbData->userOid != GetSessionUserId())
|
||||
{
|
||||
/*
|
||||
* Reset myDbData->daemonStarted so InitializeMaintenanceDaemonBackend()
|
||||
* notices this is a restart.
|
||||
*/
|
||||
LWLockAcquire(&MaintenanceDaemonControl->lock, LW_EXCLUSIVE);
|
||||
myDbData->daemonStarted = false;
|
||||
myDbData->workerPid = 0;
|
||||
LWLockRelease(&MaintenanceDaemonControl->lock);
|
||||
|
||||
/* return code of 1 requests worker restart */
|
||||
proc_exit(1);
|
||||
}
|
||||
|
@ -687,8 +717,15 @@ MaintenanceDaemonShmemExit(int code, Datum arg)
|
|||
MaintenanceDaemonDBData *myDbData = (MaintenanceDaemonDBData *)
|
||||
hash_search(MaintenanceDaemonDBHash, &databaseOid,
|
||||
HASH_FIND, NULL);
|
||||
if (myDbData && myDbData->workerPid == MyProcPid)
|
||||
|
||||
/* myDbData is NULL after StopMaintenanceDaemon */
|
||||
if (myDbData != NULL)
|
||||
{
|
||||
/*
|
||||
* Confirm that I am still the registered maintenance daemon before exiting.
|
||||
*/
|
||||
Assert(myDbData->workerPid == MyProcPid);
|
||||
|
||||
myDbData->daemonStarted = false;
|
||||
myDbData->workerPid = 0;
|
||||
}
|
||||
|
|
|
@ -765,6 +765,13 @@ SELECT pg_cancel_backend(pid) FROM pg_stat_activity WHERE application_name = 'Ci
|
|||
|
||||
-- reconnect
|
||||
\c - - - :master_port
|
||||
-- run something that goes through planner hook and therefore kicks of maintenance daemon
|
||||
SELECT 1;
|
||||
?column?
|
||||
---------------------------------------------------------------------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
-- wait for maintenance daemon restart
|
||||
SELECT datname, current_database(),
|
||||
usename, (SELECT extowner::regrole::text FROM pg_extension WHERE extname = 'citus')
|
||||
|
|
|
@ -470,6 +470,8 @@ SELECT pg_cancel_backend(pid) FROM pg_stat_activity WHERE application_name = 'Ci
|
|||
|
||||
-- reconnect
|
||||
\c - - - :master_port
|
||||
-- run something that goes through planner hook and therefore kicks of maintenance daemon
|
||||
SELECT 1;
|
||||
|
||||
-- wait for maintenance daemon restart
|
||||
SELECT datname, current_database(),
|
||||
|
|
Loading…
Reference in New Issue