Merge branch 'master' into improve_deadlock_detection

pull/1595/head
Önder Kalacı 2017-08-18 13:07:18 +03:00 committed by GitHub
commit b82f886ad3
3 changed files with 64 additions and 6 deletions

View File

@ -21,6 +21,8 @@
#include "pgstat.h"
#include "access/xact.h"
#include "catalog/pg_extension.h"
#include "commands/extension.h"
#include "libpq/pqsignal.h"
#include "distributed/distributed_deadlock_detection.h"
#include "distributed/maintenanced.h"
@ -29,6 +31,7 @@
#include "storage/ipc.h"
#include "storage/proc.h"
#include "storage/latch.h"
#include "storage/lmgr.h"
#include "storage/lwlock.h"
#include "tcop/tcopprot.h"
@ -85,6 +88,8 @@ static void MaintenanceDaemonSigHupHandler(SIGNAL_ARGS);
static size_t MaintenanceDaemonShmemSize(void);
static void MaintenanceDaemonShmemInit(void);
static void MaintenanceDaemonErrorContext(void *arg);
static bool LockCitusExtension(void);
/*
* InitializeMaintenanceDaemon, called at server start, is responsible for
@ -261,6 +266,15 @@ CitusMaintenanceDaemonMain(Datum main_arg)
CHECK_FOR_INTERRUPTS();
/*
* XXX: We clear the metadata cache before every iteration because otherwise
* it might contain stale OIDs. It appears that in some cases invalidation
* messages for a DROP EXTENSION may arrive during deadlock detection and
* this causes us to cache a stale pg_dist_node OID. We'd actually expect
* all invalidations to arrive after obtaining a lock in LockCitusExtension.
*/
ClearMetadataOIDCache();
/*
* Perform Work. If a specific task needs to be called sooner than
* timeout indicates, it's ok to lower it to that value. Expensive
@ -272,11 +286,12 @@ CitusMaintenanceDaemonMain(Datum main_arg)
{
StartTransactionCommand();
/*
* We don't want to run the deadlock checks if there exists
* any version mistmatch.
*/
if (CheckCitusVersion(DEBUG1))
if (!LockCitusExtension())
{
ereport(DEBUG1, (errmsg("could not lock the citus extension, "
"skipping deadlock detection")));
}
else if (CheckCitusVersion(DEBUG1) && CitusHasBeenLoaded())
{
foundDeadlock = CheckForDistributedDeadlocks();
}
@ -472,3 +487,35 @@ MaintenanceDaemonErrorContext(void *arg)
errcontext("Citus maintenance daemon for database %u user %u",
myDbData->databaseOid, myDbData->userOid);
}
/*
* LockCitusExtension acquires a lock on the Citus extension or returns
* false if the extension does not exist or is being dropped.
*/
static bool
LockCitusExtension(void)
{
Oid recheckExtensionOid = InvalidOid;
Oid extensionOid = get_extension_oid("citus", true);
if (extensionOid == InvalidOid)
{
/* citus extension does not exist */
return false;
}
LockDatabaseObject(ExtensionRelationId, extensionOid, 0, AccessShareLock);
/*
* The extension may have been dropped and possibly recreated prior to
* obtaining a lock. Check whether we still get the expected OID.
*/
recheckExtensionOid = get_extension_oid("citus", true);
if (recheckExtensionOid != extensionOid)
{
return false;
}
return true;
}

View File

@ -2694,11 +2694,21 @@ InvalidateDistRelationCacheCallback(Datum argument, Oid relationId)
*/
if (relationId != InvalidOid && relationId == MetadataCache.distPartitionRelationId)
{
memset(&MetadataCache, 0, sizeof(MetadataCache));
ClearMetadataOIDCache();
}
}
/*
* ClearMetadataOIDCache resets all the cached OIDs and the extensionLoaded flag.
*/
void
ClearMetadataOIDCache(void)
{
memset(&MetadataCache, 0, sizeof(MetadataCache));
}
/*
* DistTableOidList iterates over the pg_dist_partition table and returns
* a list that consists of the logicalrelids.

View File

@ -86,6 +86,7 @@ extern List * DistTableOidList(void);
extern List * ShardPlacementList(uint64 shardId);
extern void CitusInvalidateRelcacheByRelid(Oid relationId);
extern void CitusInvalidateRelcacheByShardId(int64 shardId);
extern void ClearMetadataOIDCache(void);
extern bool CitusHasBeenLoaded(void);
extern bool CheckCitusVersion(int elevel);