mirror of https://github.com/citusdata/citus.git
Merge pull request #4222 from citusdata/fix/multiple-maintenanced
commit
fd40605745
|
@ -109,6 +109,9 @@ static HTAB *MaintenanceDaemonDBHash;
|
||||||
static volatile sig_atomic_t got_SIGHUP = false;
|
static volatile sig_atomic_t got_SIGHUP = false;
|
||||||
static volatile sig_atomic_t got_SIGTERM = false;
|
static volatile sig_atomic_t got_SIGTERM = false;
|
||||||
|
|
||||||
|
/* set to true when becoming a maintenance daemon */
|
||||||
|
static bool IsMaintenanceDaemon = false;
|
||||||
|
|
||||||
static void MaintenanceDaemonSigTermHandler(SIGNAL_ARGS);
|
static void MaintenanceDaemonSigTermHandler(SIGNAL_ARGS);
|
||||||
static void MaintenanceDaemonSigHupHandler(SIGNAL_ARGS);
|
static void MaintenanceDaemonSigHupHandler(SIGNAL_ARGS);
|
||||||
static size_t MaintenanceDaemonShmemSize(void);
|
static size_t MaintenanceDaemonShmemSize(void);
|
||||||
|
@ -165,15 +168,31 @@ InitializeMaintenanceDaemonBackend(void)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* maintenance daemon can ignore itself */
|
if (!found)
|
||||||
if (dbData->workerPid == MyProcPid)
|
|
||||||
{
|
{
|
||||||
|
/* ensure the values in MaintenanceDaemonDBData are zero */
|
||||||
|
memset(((char *) dbData) + sizeof(Oid), 0,
|
||||||
|
sizeof(MaintenanceDaemonDBData) - sizeof(Oid));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (IsMaintenanceDaemon)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* InitializeMaintenanceDaemonBackend is called by the maintenance daemon
|
||||||
|
* itself. In that case, we clearly don't need to start another maintenance
|
||||||
|
* daemon.
|
||||||
|
*/
|
||||||
|
Assert(found);
|
||||||
|
Assert(dbData->workerPid == MyProcPid);
|
||||||
|
|
||||||
LWLockRelease(&MaintenanceDaemonControl->lock);
|
LWLockRelease(&MaintenanceDaemonControl->lock);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!found || !dbData->daemonStarted)
|
if (!found || !dbData->daemonStarted)
|
||||||
{
|
{
|
||||||
|
Assert(dbData->workerPid == 0);
|
||||||
|
|
||||||
BackgroundWorker worker;
|
BackgroundWorker worker;
|
||||||
BackgroundWorkerHandle *handle = NULL;
|
BackgroundWorkerHandle *handle = NULL;
|
||||||
|
|
||||||
|
@ -292,13 +311,33 @@ CitusMaintenanceDaemonMain(Datum main_arg)
|
||||||
proc_exit(0);
|
proc_exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (myDbData->workerPid != 0)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Another maintenance daemon is running. This usually happens because
|
||||||
|
* postgres restarts the daemon after an non-zero exit, and
|
||||||
|
* InitializeMaintenanceDaemonBackend started one before postgres did.
|
||||||
|
* In that case, the first one stays and the last one exits.
|
||||||
|
*/
|
||||||
|
|
||||||
|
proc_exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
before_shmem_exit(MaintenanceDaemonShmemExit, main_arg);
|
before_shmem_exit(MaintenanceDaemonShmemExit, main_arg);
|
||||||
|
|
||||||
Assert(myDbData->workerPid == 0);
|
/*
|
||||||
|
* Signal that I am the maintenance daemon now.
|
||||||
/* from this point, DROP DATABASE will attempt to kill the worker */
|
*
|
||||||
|
* From this point, DROP DATABASE/EXTENSION will send a SIGTERM to me.
|
||||||
|
*/
|
||||||
myDbData->workerPid = MyProcPid;
|
myDbData->workerPid = MyProcPid;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Signal that we are running. This in mainly needed in case of restart after
|
||||||
|
* an error, otherwise the daemonStarted flag is already true.
|
||||||
|
*/
|
||||||
|
myDbData->daemonStarted = true;
|
||||||
|
|
||||||
/* wire up signals */
|
/* wire up signals */
|
||||||
pqsignal(SIGTERM, MaintenanceDaemonSigTermHandler);
|
pqsignal(SIGTERM, MaintenanceDaemonSigTermHandler);
|
||||||
pqsignal(SIGHUP, MaintenanceDaemonSigHupHandler);
|
pqsignal(SIGHUP, MaintenanceDaemonSigHupHandler);
|
||||||
|
@ -306,6 +345,8 @@ CitusMaintenanceDaemonMain(Datum main_arg)
|
||||||
|
|
||||||
myDbData->latch = MyLatch;
|
myDbData->latch = MyLatch;
|
||||||
|
|
||||||
|
IsMaintenanceDaemon = true;
|
||||||
|
|
||||||
LWLockRelease(&MaintenanceDaemonControl->lock);
|
LWLockRelease(&MaintenanceDaemonControl->lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -339,8 +380,6 @@ CitusMaintenanceDaemonMain(Datum main_arg)
|
||||||
|
|
||||||
CHECK_FOR_INTERRUPTS();
|
CHECK_FOR_INTERRUPTS();
|
||||||
|
|
||||||
Assert(myDbData->workerPid == MyProcPid);
|
|
||||||
|
|
||||||
CitusTableCacheFlushInvalidatedEntries();
|
CitusTableCacheFlushInvalidatedEntries();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -567,15 +606,6 @@ CitusMaintenanceDaemonMain(Datum main_arg)
|
||||||
/* check for changed configuration */
|
/* check for changed configuration */
|
||||||
if (myDbData->userOid != GetSessionUserId())
|
if (myDbData->userOid != GetSessionUserId())
|
||||||
{
|
{
|
||||||
/*
|
|
||||||
* Reset myDbData->daemonStarted so InitializeMaintenanceDaemonBackend()
|
|
||||||
* notices this is a restart.
|
|
||||||
*/
|
|
||||||
LWLockAcquire(&MaintenanceDaemonControl->lock, LW_EXCLUSIVE);
|
|
||||||
myDbData->daemonStarted = false;
|
|
||||||
myDbData->workerPid = 0;
|
|
||||||
LWLockRelease(&MaintenanceDaemonControl->lock);
|
|
||||||
|
|
||||||
/* return code of 1 requests worker restart */
|
/* return code of 1 requests worker restart */
|
||||||
proc_exit(1);
|
proc_exit(1);
|
||||||
}
|
}
|
||||||
|
@ -687,8 +717,15 @@ MaintenanceDaemonShmemExit(int code, Datum arg)
|
||||||
MaintenanceDaemonDBData *myDbData = (MaintenanceDaemonDBData *)
|
MaintenanceDaemonDBData *myDbData = (MaintenanceDaemonDBData *)
|
||||||
hash_search(MaintenanceDaemonDBHash, &databaseOid,
|
hash_search(MaintenanceDaemonDBHash, &databaseOid,
|
||||||
HASH_FIND, NULL);
|
HASH_FIND, NULL);
|
||||||
if (myDbData && myDbData->workerPid == MyProcPid)
|
|
||||||
|
/* myDbData is NULL after StopMaintenanceDaemon */
|
||||||
|
if (myDbData != NULL)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* Confirm that I am still the registered maintenance daemon before exiting.
|
||||||
|
*/
|
||||||
|
Assert(myDbData->workerPid == MyProcPid);
|
||||||
|
|
||||||
myDbData->daemonStarted = false;
|
myDbData->daemonStarted = false;
|
||||||
myDbData->workerPid = 0;
|
myDbData->workerPid = 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -723,3 +723,69 @@ CONTEXT: PL/pgSQL function inline_code_block line 6 at RAISE
|
||||||
DROP DATABASE another;
|
DROP DATABASE another;
|
||||||
\c - - - :worker_1_port
|
\c - - - :worker_1_port
|
||||||
DROP DATABASE another;
|
DROP DATABASE another;
|
||||||
|
\c - - - :master_port
|
||||||
|
-- only the regression database should have a maintenance daemon
|
||||||
|
SELECT count(*) FROM pg_stat_activity WHERE application_name = 'Citus Maintenance Daemon';
|
||||||
|
count
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
1
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- recreate the extension immediately after the maintenancae daemon errors
|
||||||
|
SELECT pg_cancel_backend(pid) FROM pg_stat_activity WHERE application_name = 'Citus Maintenance Daemon';
|
||||||
|
pg_cancel_backend
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
DROP EXTENSION citus;
|
||||||
|
CREATE EXTENSION citus;
|
||||||
|
-- wait for maintenance daemon restart
|
||||||
|
SELECT datname, current_database(),
|
||||||
|
usename, (SELECT extowner::regrole::text FROM pg_extension WHERE extname = 'citus')
|
||||||
|
FROM test.maintenance_worker();
|
||||||
|
datname | current_database | usename | extowner
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
regression | regression | postgres | postgres
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- confirm that there is only one maintenance daemon
|
||||||
|
SELECT count(*) FROM pg_stat_activity WHERE application_name = 'Citus Maintenance Daemon';
|
||||||
|
count
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
1
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- kill the maintenance daemon
|
||||||
|
SELECT pg_cancel_backend(pid) FROM pg_stat_activity WHERE application_name = 'Citus Maintenance Daemon';
|
||||||
|
pg_cancel_backend
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- reconnect
|
||||||
|
\c - - - :master_port
|
||||||
|
-- run something that goes through planner hook and therefore kicks of maintenance daemon
|
||||||
|
SELECT 1;
|
||||||
|
?column?
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
1
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- wait for maintenance daemon restart
|
||||||
|
SELECT datname, current_database(),
|
||||||
|
usename, (SELECT extowner::regrole::text FROM pg_extension WHERE extname = 'citus')
|
||||||
|
FROM test.maintenance_worker();
|
||||||
|
datname | current_database | usename | extowner
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
regression | regression | postgres | postgres
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- confirm that there is only one maintenance daemon
|
||||||
|
SELECT count(*) FROM pg_stat_activity WHERE application_name = 'Citus Maintenance Daemon';
|
||||||
|
count
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
1
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
DROP TABLE version_mismatch_table;
|
||||||
|
|
|
@ -448,3 +448,37 @@ DROP DATABASE another;
|
||||||
\c - - - :worker_1_port
|
\c - - - :worker_1_port
|
||||||
DROP DATABASE another;
|
DROP DATABASE another;
|
||||||
|
|
||||||
|
\c - - - :master_port
|
||||||
|
-- only the regression database should have a maintenance daemon
|
||||||
|
SELECT count(*) FROM pg_stat_activity WHERE application_name = 'Citus Maintenance Daemon';
|
||||||
|
|
||||||
|
-- recreate the extension immediately after the maintenancae daemon errors
|
||||||
|
SELECT pg_cancel_backend(pid) FROM pg_stat_activity WHERE application_name = 'Citus Maintenance Daemon';
|
||||||
|
DROP EXTENSION citus;
|
||||||
|
CREATE EXTENSION citus;
|
||||||
|
|
||||||
|
-- wait for maintenance daemon restart
|
||||||
|
SELECT datname, current_database(),
|
||||||
|
usename, (SELECT extowner::regrole::text FROM pg_extension WHERE extname = 'citus')
|
||||||
|
FROM test.maintenance_worker();
|
||||||
|
|
||||||
|
-- confirm that there is only one maintenance daemon
|
||||||
|
SELECT count(*) FROM pg_stat_activity WHERE application_name = 'Citus Maintenance Daemon';
|
||||||
|
|
||||||
|
-- kill the maintenance daemon
|
||||||
|
SELECT pg_cancel_backend(pid) FROM pg_stat_activity WHERE application_name = 'Citus Maintenance Daemon';
|
||||||
|
|
||||||
|
-- reconnect
|
||||||
|
\c - - - :master_port
|
||||||
|
-- run something that goes through planner hook and therefore kicks of maintenance daemon
|
||||||
|
SELECT 1;
|
||||||
|
|
||||||
|
-- wait for maintenance daemon restart
|
||||||
|
SELECT datname, current_database(),
|
||||||
|
usename, (SELECT extowner::regrole::text FROM pg_extension WHERE extname = 'citus')
|
||||||
|
FROM test.maintenance_worker();
|
||||||
|
|
||||||
|
-- confirm that there is only one maintenance daemon
|
||||||
|
SELECT count(*) FROM pg_stat_activity WHERE application_name = 'Citus Maintenance Daemon';
|
||||||
|
|
||||||
|
DROP TABLE version_mismatch_table;
|
||||||
|
|
Loading…
Reference in New Issue