diff --git a/src/backend/distributed/commands/extension.c b/src/backend/distributed/commands/extension.c index 85fd0bbe9..521d5458e 100644 --- a/src/backend/distributed/commands/extension.c +++ b/src/backend/distributed/commands/extension.c @@ -37,7 +37,6 @@ static List * ExtensionNameListToObjectAddressList(List *extensionObjectList); static void MarkExistingObjectDependenciesDistributedIfSupported(void); static void EnsureSequentialModeForExtensionDDL(void); static bool ShouldPropagateExtensionCommand(Node *parseTree); -static bool IsDropCitusStmt(Node *parseTree); static bool IsAlterExtensionSetSchemaCitus(Node *parseTree); static Node * RecreateExtensionStmt(Oid extensionOid); @@ -727,7 +726,7 @@ IsCreateAlterExtensionUpdateCitusStmt(Node *parseTree) * IsDropCitusStmt iterates the objects to be dropped in a drop statement * and try to find citus there. */ -static bool +bool IsDropCitusStmt(Node *parseTree) { ListCell *objectCell = NULL; diff --git a/src/backend/distributed/commands/utility_hook.c b/src/backend/distributed/commands/utility_hook.c index a566f1afb..e2bc74439 100644 --- a/src/backend/distributed/commands/utility_hook.c +++ b/src/backend/distributed/commands/utility_hook.c @@ -452,6 +452,11 @@ multi_ProcessUtility(PlannedStmt *pstmt, } } + if (IsDropCitusStmt(parsetree)) + { + StopMaintenanceDaemon(MyDatabaseId); + } + pstmt->utilityStmt = parsetree; PG_TRY(); @@ -633,11 +638,14 @@ multi_ProcessUtility(PlannedStmt *pstmt, PostprocessVacuumStmt(vacuumStmt, queryString); } - /* - * Ensure value is valid, we can't do some checks during CREATE - * EXTENSION. This is important to register some invalidation callbacks. - */ - CitusHasBeenLoaded(); + if (!IsDropCitusStmt(parsetree) && !IsA(parsetree, DropdbStmt)) + { + /* + * Ensure value is valid, we can't do some checks during CREATE + * EXTENSION. This is important to register some invalidation callbacks. + */ + CitusHasBeenLoaded(); + } } diff --git a/src/backend/distributed/utils/maintenanced.c b/src/backend/distributed/utils/maintenanced.c index ddb9f9830..efcf6c4ea 100644 --- a/src/backend/distributed/utils/maintenanced.c +++ b/src/backend/distributed/utils/maintenanced.c @@ -77,8 +77,8 @@ typedef struct MaintenanceDaemonDBData /* information: which user to use */ Oid userOid; - bool daemonStarted; pid_t workerPid; + bool daemonStarted; bool triggerMetadataSync; Latch *latch; /* pointer to the background worker's latch */ } MaintenanceDaemonDBData; @@ -102,6 +102,7 @@ static HTAB *MaintenanceDaemonDBHash; static volatile sig_atomic_t got_SIGHUP = false; +static void MaintenanceDaemonSigTermHandler(SIGNAL_ARGS); static void MaintenanceDaemonSigHupHandler(SIGNAL_ARGS); static size_t MaintenanceDaemonShmemSize(void); static void MaintenanceDaemonShmemInit(void); @@ -154,11 +155,17 @@ InitializeMaintenanceDaemonBackend(void) ereport(ERROR, (errmsg("ran out of database slots"))); } + /* maintenance daemon can ignore itself */ + if (dbData->workerPid == MyProcPid) + { + LWLockRelease(&MaintenanceDaemonControl->lock); + return; + } + if (!found || !dbData->daemonStarted) { BackgroundWorker worker; BackgroundWorkerHandle *handle = NULL; - int pid = 0; dbData->userOid = extensionOwner; @@ -173,13 +180,11 @@ InitializeMaintenanceDaemonBackend(void) /* * No point in getting started before able to run query, but we do - * want to get started on Hot-Stanby standbys. + * want to get started on Hot-Standby. */ worker.bgw_start_time = BgWorkerStart_ConsistentState; - /* - * Restart after a bit after errors, but don't bog the system. - */ + /* Restart after a bit after errors, but don't bog the system. */ worker.bgw_restart_time = 5; sprintf(worker.bgw_library_name, "citus"); sprintf(worker.bgw_function_name, "CitusMaintenanceDaemonMain"); @@ -198,7 +203,10 @@ InitializeMaintenanceDaemonBackend(void) dbData->triggerMetadataSync = false; LWLockRelease(&MaintenanceDaemonControl->lock); + pid_t pid; WaitForBackgroundWorkerStartup(handle, &pid); + + pfree(handle); } else { @@ -245,13 +253,14 @@ CitusMaintenanceDaemonMain(Datum main_arg) MaintenanceDaemonDBData *myDbData = (MaintenanceDaemonDBData *) hash_search(MaintenanceDaemonDBHash, &databaseOid, HASH_FIND, NULL); - if (!myDbData) + if (!myDbData || myDbData->workerPid != 0) { /* * When the database crashes, background workers are restarted, but * the state in shared memory is lost. In that case, we exit and * wait for a session to call InitializeMaintenanceDaemonBackend * to properly add it to the hash. + * Alternatively, don't continue if another worker exists. */ proc_exit(0); } @@ -260,7 +269,7 @@ CitusMaintenanceDaemonMain(Datum main_arg) myDbData->workerPid = MyProcPid; /* wire up signals */ - pqsignal(SIGTERM, die); + pqsignal(SIGTERM, MaintenanceDaemonSigTermHandler); pqsignal(SIGHUP, MaintenanceDaemonSigHupHandler); BackgroundWorkerUnblockSignals(); @@ -300,6 +309,8 @@ CitusMaintenanceDaemonMain(Datum main_arg) CHECK_FOR_INTERRUPTS(); + Assert(myDbData->workerPid == MyProcPid); + /* * XXX: Each task should clear the metadata cache before every iteration * by calling InvalidateMetadataSystemCache(), because otherwise it @@ -530,6 +541,7 @@ CitusMaintenanceDaemonMain(Datum main_arg) */ LWLockAcquire(&MaintenanceDaemonControl->lock, LW_EXCLUSIVE); myDbData->daemonStarted = false; + myDbData->workerPid = 0; LWLockRelease(&MaintenanceDaemonControl->lock); /* return code of 1 requests worker restart */ @@ -630,6 +642,14 @@ MaintenanceDaemonShmemInit(void) } +/* MaintenanceDaemonSigTermHandler calls proc_exit(0) */ +static void +MaintenanceDaemonSigTermHandler(SIGNAL_ARGS) +{ + proc_exit(0); +} + + /* * MaintenanceDaemonSigHupHandler set a flag to re-read config file at next * convenient time. @@ -709,6 +729,7 @@ StopMaintenanceDaemon(Oid databaseId) MaintenanceDaemonDBHash, &databaseId, HASH_REMOVE, &found); + if (found) { workerPid = dbData->workerPid; diff --git a/src/include/distributed/commands.h b/src/include/distributed/commands.h index ba00a5f13..3477487f3 100644 --- a/src/include/distributed/commands.h +++ b/src/include/distributed/commands.h @@ -71,6 +71,7 @@ extern ObjectAddress DefineCollationStmtObjectAddress(Node *stmt, bool missing_o extern List * PostprocessDefineCollationStmt(Node *stmt, const char *queryString); /* extension.c - forward declarations */ +extern bool IsDropCitusStmt(Node *parsetree); extern bool IsCreateAlterExtensionUpdateCitusStmt(Node *parsetree); extern void ErrorIfUnstableCreateOrAlterExtensionStmt(Node *parsetree); extern List * PostprocessCreateExtensionStmt(Node *stmt, const char *queryString); diff --git a/src/test/regress/expected/multi_extension.out b/src/test/regress/expected/multi_extension.out index a5cdcccb3..293ca723d 100644 --- a/src/test/regress/expected/multi_extension.out +++ b/src/test/regress/expected/multi_extension.out @@ -6,7 +6,7 @@ -- It'd be nice to script generation of this file, but alas, that's -- not done yet. SET citus.next_shard_id TO 580000; -CREATE SCHEMA test; +SELECT $definition$ CREATE OR REPLACE FUNCTION test.maintenance_worker(p_dbname text DEFAULT current_database()) RETURNS pg_stat_activity LANGUAGE plpgsql @@ -14,18 +14,23 @@ AS $$ DECLARE activity record; BEGIN + DO 'BEGIN END'; -- Force maintenance daemon to start LOOP + PERFORM pg_stat_clear_snapshot(); SELECT * INTO activity FROM pg_stat_activity - WHERE application_name = 'Citus Maintenance Daemon' AND datname = p_dbname; + WHERE application_name = 'Citus Maintenance Daemon' AND datname = p_dbname; IF activity.pid IS NOT NULL THEN RETURN activity; ELSE PERFORM pg_sleep(0.1); - PERFORM pg_stat_clear_snapshot(); END IF ; END LOOP; END; $$; +$definition$ create_function_test_maintenance_worker +\gset +CREATE SCHEMA test; +:create_function_test_maintenance_worker -- check maintenance daemon is started SELECT datname, current_database(), usename, (SELECT extowner::regrole::text FROM pg_extension WHERE extname = 'citus') @@ -221,6 +226,45 @@ ALTER EXTENSION citus UPDATE; (0 rows) \c - - - :master_port +-- test https://github.com/citusdata/citus/issues/3409 +CREATE USER testuser2 SUPERUSER; +NOTICE: not propagating CREATE ROLE/USER commands to worker nodes +HINT: Connect to worker nodes directly to manually create all necessary users and roles. +SET ROLE testuser2; +DROP EXTENSION Citus; +-- Loop until we see there's no maintenance daemon running +DO $$begin + for i in 0 .. 100 loop + if i = 100 then raise 'Waited too long'; end if; + PERFORM pg_stat_clear_snapshot(); + perform * from pg_stat_activity where application_name = 'Citus Maintenance Daemon'; + if not found then exit; end if; + perform pg_sleep(0.1); + end loop; +end$$; +SELECT datid, datname, usename FROM pg_stat_activity WHERE application_name = 'Citus Maintenance Daemon'; + datid | datname | usename +--------------------------------------------------------------------- +(0 rows) + +CREATE EXTENSION Citus; +-- Loop until we there's a maintenance daemon running +DO $$begin + for i in 0 .. 100 loop + if i = 100 then raise 'Waited too long'; end if; + PERFORM pg_stat_clear_snapshot(); + perform * from pg_stat_activity where application_name = 'Citus Maintenance Daemon'; + if found then exit; end if; + perform pg_sleep(0.1); + end loop; +end$$; +SELECT datid, datname, usename FROM pg_stat_activity WHERE application_name = 'Citus Maintenance Daemon'; + datid | datname | usename +--------------------------------------------------------------------- + 16384 | regression | testuser2 +(1 row) + +RESET ROLE; -- check that maintenance daemon gets (re-)started for the right user DROP EXTENSION citus; CREATE USER testuser SUPERUSER; @@ -246,26 +290,8 @@ HINT: You can manually create a database and its extensions on workers. \c another CREATE EXTENSION citus; CREATE SCHEMA test; -CREATE OR REPLACE FUNCTION test.maintenance_worker(p_dbname text DEFAULT current_database()) - RETURNS pg_stat_activity - LANGUAGE plpgsql -AS $$ -DECLARE - activity record; -BEGIN - LOOP - SELECT * INTO activity FROM pg_stat_activity - WHERE application_name = 'Citus Maintenance Daemon' AND datname = p_dbname; - IF activity.pid IS NOT NULL THEN - RETURN activity; - ELSE - PERFORM pg_sleep(0.1); - PERFORM pg_stat_clear_snapshot(); - END IF ; - END LOOP; -END; -$$; --- see that the deamon started +:create_function_test_maintenance_worker +-- see that the daemon started SELECT datname, current_database(), usename, (SELECT extowner::regrole::text FROM pg_extension WHERE extname = 'citus') FROM test.maintenance_worker(); @@ -276,12 +302,12 @@ FROM test.maintenance_worker(); -- Test that database with active worker can be dropped. \c regression -CREATE SCHEMA test_deamon; +CREATE SCHEMA test_daemon; -- we create a similar function on the regression database -- note that this function checks for the existence of the daemon -- when not found, returns true else tries for 5 times and -- returns false -CREATE OR REPLACE FUNCTION test_deamon.maintenance_deamon_died(p_dbname text) +CREATE OR REPLACE FUNCTION test_daemon.maintenance_daemon_died(p_dbname text) RETURNS boolean LANGUAGE plpgsql AS $$ @@ -289,31 +315,29 @@ DECLARE activity record; BEGIN PERFORM pg_stat_clear_snapshot(); - LOOP - SELECT * INTO activity FROM pg_stat_activity - WHERE application_name = 'Citus Maintenance Daemon' AND datname = p_dbname; - IF activity.pid IS NULL THEN - RETURN true; - ELSE - RETURN false; - END IF; - END LOOP; + SELECT * INTO activity FROM pg_stat_activity + WHERE application_name = 'Citus Maintenance Daemon' AND datname = p_dbname; + IF activity.pid IS NULL THEN + RETURN true; + ELSE + RETURN false; + END IF; END; $$; --- drop the database and see that the deamon is dead +-- drop the database and see that the daemon is dead DROP DATABASE another; SELECT * FROM - test_deamon.maintenance_deamon_died('another'); - maintenance_deamon_died + test_daemon.maintenance_daemon_died('another'); + maintenance_daemon_died --------------------------------------------------------------------- t (1 row) -- we don't need the schema and the function anymore -DROP SCHEMA test_deamon CASCADE; -NOTICE: drop cascades to function test_deamon.maintenance_deamon_died(text) +DROP SCHEMA test_daemon CASCADE; +NOTICE: drop cascades to function test_daemon.maintenance_daemon_died(text) -- verify citus does not crash while creating a table when run against an older worker -- create_distributed_table piggybacks multiple commands into single one, if one worker -- did not have the required UDF it should fail instead of crash. diff --git a/src/test/regress/sql/multi_extension.sql b/src/test/regress/sql/multi_extension.sql index 19704af0e..6a58d72e0 100644 --- a/src/test/regress/sql/multi_extension.sql +++ b/src/test/regress/sql/multi_extension.sql @@ -9,8 +9,7 @@ SET citus.next_shard_id TO 580000; -CREATE SCHEMA test; - +SELECT $definition$ CREATE OR REPLACE FUNCTION test.maintenance_worker(p_dbname text DEFAULT current_database()) RETURNS pg_stat_activity LANGUAGE plpgsql @@ -18,18 +17,24 @@ AS $$ DECLARE activity record; BEGIN + DO 'BEGIN END'; -- Force maintenance daemon to start LOOP + PERFORM pg_stat_clear_snapshot(); SELECT * INTO activity FROM pg_stat_activity - WHERE application_name = 'Citus Maintenance Daemon' AND datname = p_dbname; + WHERE application_name = 'Citus Maintenance Daemon' AND datname = p_dbname; IF activity.pid IS NOT NULL THEN RETURN activity; ELSE PERFORM pg_sleep(0.1); - PERFORM pg_stat_clear_snapshot(); END IF ; END LOOP; END; $$; +$definition$ create_function_test_maintenance_worker +\gset + +CREATE SCHEMA test; +:create_function_test_maintenance_worker -- check maintenance daemon is started SELECT datname, current_database(), @@ -207,6 +212,35 @@ ALTER EXTENSION citus UPDATE; \c - - - :master_port +-- test https://github.com/citusdata/citus/issues/3409 +CREATE USER testuser2 SUPERUSER; +SET ROLE testuser2; +DROP EXTENSION Citus; +-- Loop until we see there's no maintenance daemon running +DO $$begin + for i in 0 .. 100 loop + if i = 100 then raise 'Waited too long'; end if; + PERFORM pg_stat_clear_snapshot(); + perform * from pg_stat_activity where application_name = 'Citus Maintenance Daemon'; + if not found then exit; end if; + perform pg_sleep(0.1); + end loop; +end$$; +SELECT datid, datname, usename FROM pg_stat_activity WHERE application_name = 'Citus Maintenance Daemon'; +CREATE EXTENSION Citus; +-- Loop until we there's a maintenance daemon running +DO $$begin + for i in 0 .. 100 loop + if i = 100 then raise 'Waited too long'; end if; + PERFORM pg_stat_clear_snapshot(); + perform * from pg_stat_activity where application_name = 'Citus Maintenance Daemon'; + if found then exit; end if; + perform pg_sleep(0.1); + end loop; +end$$; +SELECT datid, datname, usename FROM pg_stat_activity WHERE application_name = 'Citus Maintenance Daemon'; +RESET ROLE; + -- check that maintenance daemon gets (re-)started for the right user DROP EXTENSION citus; CREATE USER testuser SUPERUSER; @@ -229,28 +263,9 @@ CREATE DATABASE another; CREATE EXTENSION citus; CREATE SCHEMA test; +:create_function_test_maintenance_worker -CREATE OR REPLACE FUNCTION test.maintenance_worker(p_dbname text DEFAULT current_database()) - RETURNS pg_stat_activity - LANGUAGE plpgsql -AS $$ -DECLARE - activity record; -BEGIN - LOOP - SELECT * INTO activity FROM pg_stat_activity - WHERE application_name = 'Citus Maintenance Daemon' AND datname = p_dbname; - IF activity.pid IS NOT NULL THEN - RETURN activity; - ELSE - PERFORM pg_sleep(0.1); - PERFORM pg_stat_clear_snapshot(); - END IF ; - END LOOP; -END; -$$; - --- see that the deamon started +-- see that the daemon started SELECT datname, current_database(), usename, (SELECT extowner::regrole::text FROM pg_extension WHERE extname = 'citus') FROM test.maintenance_worker(); @@ -258,13 +273,13 @@ FROM test.maintenance_worker(); -- Test that database with active worker can be dropped. \c regression -CREATE SCHEMA test_deamon; +CREATE SCHEMA test_daemon; -- we create a similar function on the regression database -- note that this function checks for the existence of the daemon -- when not found, returns true else tries for 5 times and -- returns false -CREATE OR REPLACE FUNCTION test_deamon.maintenance_deamon_died(p_dbname text) +CREATE OR REPLACE FUNCTION test_daemon.maintenance_daemon_died(p_dbname text) RETURNS boolean LANGUAGE plpgsql AS $$ @@ -272,27 +287,25 @@ DECLARE activity record; BEGIN PERFORM pg_stat_clear_snapshot(); - LOOP - SELECT * INTO activity FROM pg_stat_activity - WHERE application_name = 'Citus Maintenance Daemon' AND datname = p_dbname; - IF activity.pid IS NULL THEN - RETURN true; - ELSE - RETURN false; - END IF; - END LOOP; + SELECT * INTO activity FROM pg_stat_activity + WHERE application_name = 'Citus Maintenance Daemon' AND datname = p_dbname; + IF activity.pid IS NULL THEN + RETURN true; + ELSE + RETURN false; + END IF; END; $$; --- drop the database and see that the deamon is dead +-- drop the database and see that the daemon is dead DROP DATABASE another; SELECT * FROM - test_deamon.maintenance_deamon_died('another'); + test_daemon.maintenance_daemon_died('another'); -- we don't need the schema and the function anymore -DROP SCHEMA test_deamon CASCADE; +DROP SCHEMA test_daemon CASCADE; -- verify citus does not crash while creating a table when run against an older worker