mirror of https://github.com/citusdata/citus.git
932 lines
25 KiB
C
932 lines
25 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* metadata_cache.c
|
|
* Distributed table metadata cache
|
|
*
|
|
* Copyright (c) 2012-2015, Citus Data, Inc.
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
#include "access/genam.h"
|
|
#include "access/heapam.h"
|
|
#include "access/htup_details.h"
|
|
#include "catalog/indexing.h"
|
|
#include "catalog/pg_namespace.h"
|
|
#include "catalog/pg_type.h"
|
|
#include "commands/extension.h"
|
|
#include "commands/trigger.h"
|
|
#include "distributed/master_metadata_utility.h"
|
|
#include "distributed/metadata_cache.h"
|
|
#include "distributed/pg_dist_partition.h"
|
|
#include "distributed/pg_dist_shard.h"
|
|
#include "parser/parse_func.h"
|
|
#include "utils/builtins.h"
|
|
#include "utils/catcache.h"
|
|
#include "utils/datum.h"
|
|
#include "utils/hsearch.h"
|
|
#include "utils/inval.h"
|
|
#include "utils/fmgroids.h"
|
|
#include "utils/lsyscache.h"
|
|
#include "utils/rel.h"
|
|
#include "utils/relfilenodemap.h"
|
|
#include "utils/relmapper.h"
|
|
#include "utils/syscache.h"
|
|
|
|
|
|
/* Hash table for informations about each partition */
|
|
static HTAB *DistTableCacheHash = NULL;
|
|
|
|
/* built first time through in InitializePartitionCache */
|
|
static ScanKeyData DistPartitionScanKey[1];
|
|
static ScanKeyData DistShardScanKey[1];
|
|
|
|
|
|
/* local function forward declarations */
|
|
static DistTableCacheEntry * LookupDistTableCacheEntry(Oid relationId);
|
|
static void InitializeDistTableCache(void);
|
|
static void ResetDistTableCacheEntry(DistTableCacheEntry *cacheEntry);
|
|
static void InvalidateDistRelationCacheCallback(Datum argument, Oid relationId);
|
|
static HeapTuple LookupDistPartitionTuple(Oid relationId);
|
|
static List * LookupDistShardTuples(Oid relationId);
|
|
static void GetPartitionTypeInputInfo(char *partitionKeyString, char partitionMethod,
|
|
Oid *intervalTypeId, int32 *intervalTypeMod);
|
|
static ShardInterval * TupleToShardInterval(HeapTuple heapTuple,
|
|
TupleDesc tupleDescriptor, Oid intervalTypeId,
|
|
int32 intervalTypeMod);
|
|
static void CachedRelationLookup(const char *relationName, Oid *cachedOid);
|
|
|
|
|
|
/* exports for SQL callable functions */
|
|
PG_FUNCTION_INFO_V1(master_dist_partition_cache_invalidate);
|
|
PG_FUNCTION_INFO_V1(master_dist_shard_cache_invalidate);
|
|
|
|
|
|
/*
|
|
* IsDistributedTable returns whether relationId is a distributed relation or
|
|
* not.
|
|
*/
|
|
bool
|
|
IsDistributedTable(Oid relationId)
|
|
{
|
|
DistTableCacheEntry *cacheEntry = NULL;
|
|
|
|
/*
|
|
* Can't be a distributed relation if the extension hasn't been loaded
|
|
* yet. As we can't do lookups in nonexistent tables, directly return
|
|
* false.
|
|
*/
|
|
if (!CitusHasBeenLoaded())
|
|
{
|
|
return false;
|
|
}
|
|
|
|
cacheEntry = LookupDistTableCacheEntry(relationId);
|
|
|
|
return cacheEntry->isDistributedTable;
|
|
}
|
|
|
|
|
|
/*
|
|
* LoadShardInterval reads shard metadata for given shardId from pg_dist_shard,
|
|
* and converts min/max values in these metadata to their properly typed datum
|
|
* representations. The function then allocates a structure that stores the read
|
|
* and converted values, and returns this structure.
|
|
*/
|
|
ShardInterval *
|
|
LoadShardInterval(uint64 shardId)
|
|
{
|
|
ShardInterval *shardInterval;
|
|
SysScanDesc scanDescriptor = NULL;
|
|
ScanKeyData scanKey[1];
|
|
int scanKeyCount = 1;
|
|
HeapTuple heapTuple = NULL;
|
|
Form_pg_dist_shard shardForm = NULL;
|
|
DistTableCacheEntry *partitionEntry;
|
|
Oid intervalTypeId = InvalidOid;
|
|
int32 intervalTypeMod = -1;
|
|
|
|
Relation pgDistShard = heap_open(DistShardRelationId(), AccessShareLock);
|
|
TupleDesc tupleDescriptor = RelationGetDescr(pgDistShard);
|
|
|
|
ScanKeyInit(&scanKey[0], Anum_pg_dist_shard_shardid,
|
|
BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(shardId));
|
|
|
|
scanDescriptor = systable_beginscan(pgDistShard,
|
|
DistShardShardidIndexId(), true,
|
|
NULL, scanKeyCount, scanKey);
|
|
|
|
heapTuple = systable_getnext(scanDescriptor);
|
|
if (!HeapTupleIsValid(heapTuple))
|
|
{
|
|
ereport(ERROR, (errmsg("could not find valid entry for shard "
|
|
UINT64_FORMAT, shardId)));
|
|
}
|
|
|
|
shardForm = (Form_pg_dist_shard) GETSTRUCT(heapTuple);
|
|
partitionEntry = DistributedTableCacheEntry(shardForm->logicalrelid);
|
|
|
|
GetPartitionTypeInputInfo(partitionEntry->partitionKeyString,
|
|
partitionEntry->partitionMethod, &intervalTypeId,
|
|
&intervalTypeMod);
|
|
|
|
shardInterval = TupleToShardInterval(heapTuple, tupleDescriptor, intervalTypeId,
|
|
intervalTypeMod);
|
|
|
|
systable_endscan(scanDescriptor);
|
|
heap_close(pgDistShard, AccessShareLock);
|
|
|
|
return shardInterval;
|
|
}
|
|
|
|
|
|
/*
|
|
* DistributedTableCacheEntry looks up a pg_dist_partition entry for a
|
|
* relation.
|
|
*
|
|
* Errors out if no relation matching the criteria could be found.
|
|
*/
|
|
DistTableCacheEntry *
|
|
DistributedTableCacheEntry(Oid distributedRelationId)
|
|
{
|
|
DistTableCacheEntry *cacheEntry = NULL;
|
|
|
|
/*
|
|
* Can't be a distributed relation if the extension hasn't been loaded
|
|
* yet. As we can't do lookups in nonexistent tables, directly return NULL
|
|
* here.
|
|
*/
|
|
if (!CitusHasBeenLoaded())
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
cacheEntry = LookupDistTableCacheEntry(distributedRelationId);
|
|
|
|
if (cacheEntry->isDistributedTable)
|
|
{
|
|
return cacheEntry;
|
|
}
|
|
else
|
|
{
|
|
ereport(ERROR, (errmsg("relation %u is not distributed",
|
|
distributedRelationId)));
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* LookupDistTableCacheEntry returns the distributed table metadata for the
|
|
* passed relationId. For efficiency it caches lookups.
|
|
*/
|
|
static DistTableCacheEntry *
|
|
LookupDistTableCacheEntry(Oid relationId)
|
|
{
|
|
DistTableCacheEntry *cacheEntry = NULL;
|
|
bool foundInCache = false;
|
|
HeapTuple distPartitionTuple = NULL;
|
|
char *partitionKeyString = NULL;
|
|
char partitionMethod = 0;
|
|
List *distShardTupleList = NIL;
|
|
int shardIntervalArrayLength = 0;
|
|
ShardInterval *shardIntervalArray = NULL;
|
|
void *hashKey = (void *) &relationId;
|
|
|
|
if (DistTableCacheHash == NULL)
|
|
{
|
|
InitializeDistTableCache();
|
|
}
|
|
|
|
cacheEntry = hash_search(DistTableCacheHash, hashKey, HASH_FIND, &foundInCache);
|
|
|
|
/* return valid matches */
|
|
if ((cacheEntry != NULL) && (cacheEntry->isValid))
|
|
{
|
|
return cacheEntry;
|
|
}
|
|
|
|
/* free the content of old, invalid, entries */
|
|
if (cacheEntry != NULL)
|
|
{
|
|
ResetDistTableCacheEntry(cacheEntry);
|
|
}
|
|
|
|
distPartitionTuple = LookupDistPartitionTuple(relationId);
|
|
if (distPartitionTuple != NULL)
|
|
{
|
|
Form_pg_dist_partition partitionForm =
|
|
(Form_pg_dist_partition) GETSTRUCT(distPartitionTuple);
|
|
Datum partitionKeyDatum = PointerGetDatum(&partitionForm->partkey);
|
|
|
|
MemoryContext oldContext = MemoryContextSwitchTo(CacheMemoryContext);
|
|
|
|
partitionKeyString = TextDatumGetCString(partitionKeyDatum);
|
|
partitionMethod = partitionForm->partmethod;
|
|
|
|
MemoryContextSwitchTo(oldContext);
|
|
|
|
heap_freetuple(distPartitionTuple);
|
|
}
|
|
|
|
distShardTupleList = LookupDistShardTuples(relationId);
|
|
shardIntervalArrayLength = list_length(distShardTupleList);
|
|
if (shardIntervalArrayLength > 0)
|
|
{
|
|
Relation distShardRelation = heap_open(DistShardRelationId(), AccessShareLock);
|
|
TupleDesc distShardTupleDesc = RelationGetDescr(distShardRelation);
|
|
ListCell *distShardTupleCell = NULL;
|
|
int arrayIndex = 0;
|
|
Oid intervalTypeId = InvalidOid;
|
|
int32 intervalTypeMod = -1;
|
|
|
|
GetPartitionTypeInputInfo(partitionKeyString, partitionMethod, &intervalTypeId,
|
|
&intervalTypeMod);
|
|
|
|
shardIntervalArray = MemoryContextAllocZero(CacheMemoryContext,
|
|
shardIntervalArrayLength *
|
|
sizeof(ShardInterval));
|
|
|
|
foreach(distShardTupleCell, distShardTupleList)
|
|
{
|
|
HeapTuple shardTuple = lfirst(distShardTupleCell);
|
|
ShardInterval *shardInterval = TupleToShardInterval(shardTuple,
|
|
distShardTupleDesc,
|
|
intervalTypeId,
|
|
intervalTypeMod);
|
|
MemoryContext oldContext = MemoryContextSwitchTo(CacheMemoryContext);
|
|
|
|
CopyShardInterval(shardInterval, &shardIntervalArray[arrayIndex]);
|
|
|
|
MemoryContextSwitchTo(oldContext);
|
|
|
|
heap_freetuple(shardTuple);
|
|
|
|
arrayIndex++;
|
|
}
|
|
|
|
heap_close(distShardRelation, AccessShareLock);
|
|
}
|
|
|
|
cacheEntry = hash_search(DistTableCacheHash, hashKey, HASH_ENTER, NULL);
|
|
|
|
/* zero out entry, but not the key part */
|
|
memset(((char *) cacheEntry) + sizeof(Oid), 0,
|
|
sizeof(DistTableCacheEntry) - sizeof(Oid));
|
|
|
|
if (distPartitionTuple == NULL)
|
|
{
|
|
cacheEntry->isValid = true;
|
|
cacheEntry->isDistributedTable = false;
|
|
}
|
|
else
|
|
{
|
|
cacheEntry->isValid = true;
|
|
cacheEntry->isDistributedTable = true;
|
|
cacheEntry->partitionKeyString = partitionKeyString;
|
|
cacheEntry->partitionMethod = partitionMethod;
|
|
cacheEntry->shardIntervalArrayLength = shardIntervalArrayLength;
|
|
cacheEntry->shardIntervalArray = shardIntervalArray;
|
|
}
|
|
|
|
return cacheEntry;
|
|
}
|
|
|
|
|
|
/*
|
|
* CitusHasBeenLoaded returns true if the citus extension has been created
|
|
* in the current database and the extension script has been executed. Otherwise,
|
|
* it returns false. The result is cached as this is called very frequently.
|
|
*
|
|
* NB: The way this is cached means the result will be wrong after the
|
|
* extension is dropped. A reconnect fixes that though, so that seems
|
|
* acceptable.
|
|
*/
|
|
bool
|
|
CitusHasBeenLoaded(void)
|
|
{
|
|
static bool extensionLoaded = false;
|
|
|
|
/* recheck presence until citus has been loaded */
|
|
if (!extensionLoaded)
|
|
{
|
|
bool extensionPresent = false;
|
|
bool extensionScriptExecuted = true;
|
|
|
|
Oid extensionOid = get_extension_oid("citus", true);
|
|
if (extensionOid != InvalidOid)
|
|
{
|
|
extensionPresent = true;
|
|
}
|
|
|
|
if (extensionPresent)
|
|
{
|
|
/* check if Citus extension objects are still being created */
|
|
if (creating_extension && CurrentExtensionObject == extensionOid)
|
|
{
|
|
extensionScriptExecuted = false;
|
|
}
|
|
}
|
|
|
|
extensionLoaded = extensionPresent && extensionScriptExecuted;
|
|
}
|
|
|
|
return extensionLoaded;
|
|
}
|
|
|
|
|
|
/* return oid of pg_dist_shard relation */
|
|
Oid
|
|
DistShardRelationId(void)
|
|
{
|
|
static Oid cachedOid = InvalidOid;
|
|
|
|
CachedRelationLookup("pg_dist_shard", &cachedOid);
|
|
|
|
return cachedOid;
|
|
}
|
|
|
|
|
|
/* return oid of pg_dist_shard_placement relation */
|
|
Oid
|
|
DistShardPlacementRelationId(void)
|
|
{
|
|
static Oid cachedOid = InvalidOid;
|
|
|
|
CachedRelationLookup("pg_dist_shard_placement", &cachedOid);
|
|
|
|
return cachedOid;
|
|
}
|
|
|
|
|
|
/* return oid of pg_dist_partition relation */
|
|
Oid
|
|
DistPartitionRelationId(void)
|
|
{
|
|
static Oid cachedOid = InvalidOid;
|
|
|
|
CachedRelationLookup("pg_dist_partition", &cachedOid);
|
|
|
|
return cachedOid;
|
|
}
|
|
|
|
|
|
/* return oid of pg_dist_partition_logical_relid_index index */
|
|
Oid
|
|
DistPartitionLogicalRelidIndexId(void)
|
|
{
|
|
static Oid cachedOid = InvalidOid;
|
|
|
|
CachedRelationLookup("pg_dist_partition_logical_relid_index", &cachedOid);
|
|
|
|
return cachedOid;
|
|
}
|
|
|
|
|
|
/* return oid of pg_dist_shard_logical_relid_index index */
|
|
Oid
|
|
DistShardLogicalRelidIndexId(void)
|
|
{
|
|
static Oid cachedOid = InvalidOid;
|
|
|
|
CachedRelationLookup("pg_dist_shard_logical_relid_index", &cachedOid);
|
|
|
|
return cachedOid;
|
|
}
|
|
|
|
|
|
/* return oid of pg_dist_shard_shardid_index index */
|
|
Oid
|
|
DistShardShardidIndexId(void)
|
|
{
|
|
static Oid cachedOid = InvalidOid;
|
|
|
|
CachedRelationLookup("pg_dist_shard_shardid_index", &cachedOid);
|
|
|
|
return cachedOid;
|
|
}
|
|
|
|
|
|
/* return oid of pg_dist_shard_placement_shardid_index */
|
|
Oid
|
|
DistShardPlacementShardidIndexId(void)
|
|
{
|
|
static Oid cachedOid = InvalidOid;
|
|
|
|
CachedRelationLookup("pg_dist_shard_placement_shardid_index", &cachedOid);
|
|
|
|
return cachedOid;
|
|
}
|
|
|
|
|
|
/* return oid of the citus_extradata_container(internal) function */
|
|
Oid
|
|
CitusExtraDataContainerFuncId(void)
|
|
{
|
|
static Oid cachedOid = 0;
|
|
List *nameList = NIL;
|
|
Oid paramOids[1] = { INTERNALOID };
|
|
|
|
if (cachedOid == InvalidOid)
|
|
{
|
|
nameList = list_make2(makeString("pg_catalog"),
|
|
makeString("citus_extradata_container"));
|
|
cachedOid = LookupFuncName(nameList, 1, paramOids, false);
|
|
}
|
|
|
|
return cachedOid;
|
|
}
|
|
|
|
|
|
/*
|
|
* master_dist_partition_cache_invalidate is a trigger function that performs
|
|
* relcache invalidations when the contents of pg_dist_partition are changed
|
|
* on the SQL level.
|
|
*/
|
|
Datum
|
|
master_dist_partition_cache_invalidate(PG_FUNCTION_ARGS)
|
|
{
|
|
TriggerData *triggerData = (TriggerData *) fcinfo->context;
|
|
HeapTuple newTuple = NULL;
|
|
HeapTuple oldTuple = NULL;
|
|
Oid oldLogicalRelationId = InvalidOid;
|
|
Oid newLogicalRelationId = InvalidOid;
|
|
|
|
if (!CALLED_AS_TRIGGER(fcinfo))
|
|
{
|
|
ereport(ERROR, (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
|
|
errmsg("must be called as trigger")));
|
|
}
|
|
|
|
newTuple = triggerData->tg_newtuple;
|
|
oldTuple = triggerData->tg_trigtuple;
|
|
|
|
/* collect logicalrelid for OLD and NEW tuple */
|
|
if (oldTuple != NULL)
|
|
{
|
|
Form_pg_dist_partition distPart = (Form_pg_dist_partition) GETSTRUCT(oldTuple);
|
|
|
|
oldLogicalRelationId = distPart->logicalrelid;
|
|
}
|
|
|
|
if (newTuple != NULL)
|
|
{
|
|
Form_pg_dist_partition distPart = (Form_pg_dist_partition) GETSTRUCT(newTuple);
|
|
|
|
newLogicalRelationId = distPart->logicalrelid;
|
|
}
|
|
|
|
/*
|
|
* Invalidate relcache for the relevant relation(s). In theory
|
|
* logicalrelid should never change, but it doesn't hurt to be
|
|
* paranoid. We ignore the case that there's no corresponding pg_class
|
|
* entry - that happens if the pg_dist_partition tuple is deleted after
|
|
* the relation has been dropped.
|
|
*/
|
|
if (oldLogicalRelationId != InvalidOid &&
|
|
oldLogicalRelationId != newLogicalRelationId)
|
|
{
|
|
HeapTuple oldClassTuple =
|
|
SearchSysCache1(RELOID, ObjectIdGetDatum(oldLogicalRelationId));
|
|
|
|
if (HeapTupleIsValid(oldClassTuple))
|
|
{
|
|
CacheInvalidateRelcacheByTuple(oldClassTuple);
|
|
ReleaseSysCache(oldClassTuple);
|
|
}
|
|
}
|
|
|
|
if (newLogicalRelationId != InvalidOid)
|
|
{
|
|
HeapTuple newClassTuple =
|
|
SearchSysCache1(RELOID, ObjectIdGetDatum(newLogicalRelationId));
|
|
|
|
if (HeapTupleIsValid(newClassTuple))
|
|
{
|
|
CacheInvalidateRelcacheByTuple(newClassTuple);
|
|
ReleaseSysCache(newClassTuple);
|
|
}
|
|
}
|
|
|
|
PG_RETURN_DATUM(PointerGetDatum(NULL));
|
|
}
|
|
|
|
|
|
/*
|
|
* master_dist_shard_cache_invalidate is a trigger function that performs
|
|
* relcache invalidations when the contents of pg_dist_shard are changed
|
|
* on the SQL level.
|
|
*/
|
|
Datum
|
|
master_dist_shard_cache_invalidate(PG_FUNCTION_ARGS)
|
|
{
|
|
TriggerData *triggerData = (TriggerData *) fcinfo->context;
|
|
HeapTuple newTuple = NULL;
|
|
HeapTuple oldTuple = NULL;
|
|
Oid oldLogicalRelationId = InvalidOid;
|
|
Oid newLogicalRelationId = InvalidOid;
|
|
|
|
if (!CALLED_AS_TRIGGER(fcinfo))
|
|
{
|
|
ereport(ERROR, (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
|
|
errmsg("must be called as trigger")));
|
|
}
|
|
|
|
newTuple = triggerData->tg_newtuple;
|
|
oldTuple = triggerData->tg_trigtuple;
|
|
|
|
/* collect logicalrelid for OLD and NEW tuple */
|
|
if (oldTuple != NULL)
|
|
{
|
|
Form_pg_dist_shard distShard = (Form_pg_dist_shard) GETSTRUCT(oldTuple);
|
|
|
|
oldLogicalRelationId = distShard->logicalrelid;
|
|
}
|
|
|
|
if (newTuple != NULL)
|
|
{
|
|
Form_pg_dist_shard distShard = (Form_pg_dist_shard) GETSTRUCT(newTuple);
|
|
|
|
newLogicalRelationId = distShard->logicalrelid;
|
|
}
|
|
|
|
/*
|
|
* Invalidate relcache for the relevant relation(s). In theory
|
|
* logicalrelid should never change, but it doesn't hurt to be
|
|
* paranoid. We ignore the case that there's no corresponding pg_class
|
|
* entry - that happens if the pg_dist_shard tuple is deleted after
|
|
* the relation has been dropped.
|
|
*/
|
|
if (oldLogicalRelationId != InvalidOid &&
|
|
oldLogicalRelationId != newLogicalRelationId)
|
|
{
|
|
HeapTuple oldClassTuple =
|
|
SearchSysCache1(RELOID, ObjectIdGetDatum(oldLogicalRelationId));
|
|
|
|
if (HeapTupleIsValid(oldClassTuple))
|
|
{
|
|
CacheInvalidateRelcacheByTuple(oldClassTuple);
|
|
ReleaseSysCache(oldClassTuple);
|
|
}
|
|
}
|
|
|
|
if (newLogicalRelationId != InvalidOid)
|
|
{
|
|
HeapTuple newClassTuple =
|
|
SearchSysCache1(RELOID, ObjectIdGetDatum(newLogicalRelationId));
|
|
|
|
if (HeapTupleIsValid(newClassTuple))
|
|
{
|
|
CacheInvalidateRelcacheByTuple(newClassTuple);
|
|
ReleaseSysCache(newClassTuple);
|
|
}
|
|
}
|
|
|
|
PG_RETURN_DATUM(PointerGetDatum(NULL));
|
|
}
|
|
|
|
|
|
/* initialize the infrastructure for the metadata cache */
|
|
static void
|
|
InitializeDistTableCache(void)
|
|
{
|
|
HASHCTL info;
|
|
|
|
/* make sure we've initialized CacheMemoryContext */
|
|
if (CacheMemoryContext == NULL)
|
|
{
|
|
CreateCacheMemoryContext();
|
|
}
|
|
|
|
/* build initial scan keys, copied for every relation scan */
|
|
memset(&DistPartitionScanKey, 0, sizeof(DistPartitionScanKey));
|
|
|
|
fmgr_info_cxt(F_OIDEQ,
|
|
&DistPartitionScanKey[0].sk_func,
|
|
CacheMemoryContext);
|
|
DistPartitionScanKey[0].sk_strategy = BTEqualStrategyNumber;
|
|
DistPartitionScanKey[0].sk_subtype = InvalidOid;
|
|
DistPartitionScanKey[0].sk_collation = InvalidOid;
|
|
DistPartitionScanKey[0].sk_attno = Anum_pg_dist_partition_logicalrelid;
|
|
|
|
memset(&DistShardScanKey, 0, sizeof(DistShardScanKey));
|
|
|
|
fmgr_info_cxt(F_OIDEQ,
|
|
&DistShardScanKey[0].sk_func,
|
|
CacheMemoryContext);
|
|
DistShardScanKey[0].sk_strategy = BTEqualStrategyNumber;
|
|
DistShardScanKey[0].sk_subtype = InvalidOid;
|
|
DistShardScanKey[0].sk_collation = InvalidOid;
|
|
DistShardScanKey[0].sk_attno = Anum_pg_dist_shard_logicalrelid;
|
|
|
|
/* initialize the hash table */
|
|
MemSet(&info, 0, sizeof(info));
|
|
info.keysize = sizeof(Oid);
|
|
info.entrysize = sizeof(DistTableCacheEntry);
|
|
info.hash = tag_hash;
|
|
DistTableCacheHash =
|
|
hash_create("Distributed Relation Cache", 32, &info,
|
|
HASH_ELEM | HASH_FUNCTION);
|
|
|
|
/* Watch for invalidation events. */
|
|
CacheRegisterRelcacheCallback(InvalidateDistRelationCacheCallback,
|
|
(Datum) 0);
|
|
}
|
|
|
|
|
|
/*
|
|
* ResetDistTableCacheEntry frees any out-of-band memory used by a cache entry,
|
|
* but does not free the entry itself.
|
|
*/
|
|
void
|
|
ResetDistTableCacheEntry(DistTableCacheEntry *cacheEntry)
|
|
{
|
|
if (cacheEntry->partitionKeyString != NULL)
|
|
{
|
|
pfree(cacheEntry->partitionKeyString);
|
|
cacheEntry->partitionKeyString = NULL;
|
|
}
|
|
|
|
if (cacheEntry->shardIntervalArrayLength > 0)
|
|
{
|
|
int i = 0;
|
|
|
|
for (i = 0; i < cacheEntry->shardIntervalArrayLength; i++)
|
|
{
|
|
ShardInterval *shardInterval = &cacheEntry->shardIntervalArray[i];
|
|
bool valueByVal = shardInterval->valueByVal;
|
|
|
|
if (!valueByVal)
|
|
{
|
|
if (shardInterval->minValueExists)
|
|
{
|
|
pfree(DatumGetPointer(shardInterval->minValue));
|
|
}
|
|
|
|
if (shardInterval->maxValueExists)
|
|
{
|
|
pfree(DatumGetPointer(shardInterval->maxValue));
|
|
}
|
|
}
|
|
}
|
|
|
|
pfree(cacheEntry->shardIntervalArray);
|
|
cacheEntry->shardIntervalArray = NULL;
|
|
cacheEntry->shardIntervalArrayLength = 0;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* InvalidateDistRelationCacheCallback flushes cache entries when a relation
|
|
* is updated (or flushes the entire cache).
|
|
*/
|
|
static void
|
|
InvalidateDistRelationCacheCallback(Datum argument, Oid relationId)
|
|
{
|
|
/* invalidate either entire cache or a specific entry */
|
|
if (relationId == InvalidOid)
|
|
{
|
|
DistTableCacheEntry *cacheEntry = NULL;
|
|
HASH_SEQ_STATUS status;
|
|
|
|
hash_seq_init(&status, DistTableCacheHash);
|
|
|
|
while ((cacheEntry = (DistTableCacheEntry *) hash_seq_search(&status)) != NULL)
|
|
{
|
|
cacheEntry->isValid = false;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
void *hashKey = (void *) &relationId;
|
|
bool foundInCache = false;
|
|
|
|
DistTableCacheEntry *cacheEntry = hash_search(DistTableCacheHash, hashKey,
|
|
HASH_FIND, &foundInCache);
|
|
if (foundInCache)
|
|
{
|
|
cacheEntry->isValid = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* LookupDistPartitionTuple searches pg_dist_partition for relationId's entry
|
|
* and returns that or, if no matching entry was found, NULL.
|
|
*/
|
|
static HeapTuple
|
|
LookupDistPartitionTuple(Oid relationId)
|
|
{
|
|
Relation pgDistPartition = NULL;
|
|
HeapTuple distPartitionTuple = NULL;
|
|
HeapTuple currentPartitionTuple = NULL;
|
|
SysScanDesc scanDescriptor;
|
|
ScanKeyData scanKey[1];
|
|
|
|
pgDistPartition = heap_open(DistPartitionRelationId(), AccessShareLock);
|
|
|
|
/* copy scankey to local copy, it will be modified during the scan */
|
|
memcpy(scanKey, DistPartitionScanKey, sizeof(DistPartitionScanKey));
|
|
|
|
/* set scan arguments */
|
|
scanKey[0].sk_argument = ObjectIdGetDatum(relationId);
|
|
|
|
scanDescriptor = systable_beginscan(pgDistPartition,
|
|
DistPartitionLogicalRelidIndexId(),
|
|
true, NULL, 1, scanKey);
|
|
|
|
currentPartitionTuple = systable_getnext(scanDescriptor);
|
|
if (HeapTupleIsValid(currentPartitionTuple))
|
|
{
|
|
Assert(!HeapTupleHasNulls(currentPartitionTuple));
|
|
|
|
distPartitionTuple = heap_copytuple(currentPartitionTuple);
|
|
}
|
|
|
|
systable_endscan(scanDescriptor);
|
|
|
|
heap_close(pgDistPartition, NoLock);
|
|
|
|
return distPartitionTuple;
|
|
}
|
|
|
|
|
|
/*
|
|
* LookupDistShardTuples returns a list of all dist_shard tuples for the
|
|
* specified relation.
|
|
*/
|
|
static List *
|
|
LookupDistShardTuples(Oid relationId)
|
|
{
|
|
Relation pgDistShard = NULL;
|
|
List *distShardTupleList = NIL;
|
|
HeapTuple currentShardTuple = NULL;
|
|
SysScanDesc scanDescriptor;
|
|
ScanKeyData scanKey[1];
|
|
|
|
pgDistShard = heap_open(DistShardRelationId(), AccessShareLock);
|
|
|
|
/* copy scankey to local copy, it will be modified during the scan */
|
|
memcpy(scanKey, DistShardScanKey, sizeof(DistShardScanKey));
|
|
|
|
/* set scan arguments */
|
|
scanKey[0].sk_argument = ObjectIdGetDatum(relationId);
|
|
|
|
scanDescriptor = systable_beginscan(pgDistShard, DistShardLogicalRelidIndexId(), true,
|
|
NULL, 1, scanKey);
|
|
|
|
currentShardTuple = systable_getnext(scanDescriptor);
|
|
while (HeapTupleIsValid(currentShardTuple))
|
|
{
|
|
HeapTuple shardTupleCopy = heap_copytuple(currentShardTuple);
|
|
distShardTupleList = lappend(distShardTupleList, shardTupleCopy);
|
|
|
|
currentShardTuple = systable_getnext(scanDescriptor);
|
|
}
|
|
|
|
systable_endscan(scanDescriptor);
|
|
heap_close(pgDistShard, AccessShareLock);
|
|
|
|
return distShardTupleList;
|
|
}
|
|
|
|
|
|
/*
|
|
* GetPartitionTypeInputInfo populates output parameters with the interval type
|
|
* identifier and modifier for the specified partition key/method combination.
|
|
*/
|
|
static void
|
|
GetPartitionTypeInputInfo(char *partitionKeyString, char partitionMethod,
|
|
Oid *intervalTypeId, int32 *intervalTypeMod)
|
|
{
|
|
*intervalTypeId = InvalidOid;
|
|
*intervalTypeMod = -1;
|
|
|
|
switch (partitionMethod)
|
|
{
|
|
case DISTRIBUTE_BY_APPEND:
|
|
case DISTRIBUTE_BY_RANGE:
|
|
{
|
|
Node *partitionNode = stringToNode(partitionKeyString);
|
|
Var *partitionColumn = (Var *) partitionNode;
|
|
Assert(IsA(partitionNode, Var));
|
|
|
|
*intervalTypeId = partitionColumn->vartype;
|
|
*intervalTypeMod = partitionColumn->vartypmod;
|
|
break;
|
|
}
|
|
|
|
case DISTRIBUTE_BY_HASH:
|
|
{
|
|
*intervalTypeId = INT4OID;
|
|
break;
|
|
}
|
|
|
|
default:
|
|
{
|
|
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
errmsg("unsupported table partition type: %c",
|
|
partitionMethod)));
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* TupleToShardInterval transforms the specified dist_shard tuple into a new
|
|
* ShardInterval using the provided descriptor and partition type information.
|
|
*/
|
|
static ShardInterval *
|
|
TupleToShardInterval(HeapTuple heapTuple, TupleDesc tupleDescriptor, Oid intervalTypeId,
|
|
int32 intervalTypeMod)
|
|
{
|
|
ShardInterval *shardInterval = NULL;
|
|
bool isNull = false;
|
|
bool minValueNull = false;
|
|
bool maxValueNull = false;
|
|
Oid inputFunctionId = InvalidOid;
|
|
Oid typeIoParam = InvalidOid;
|
|
Datum relationIdDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_logicalrelid,
|
|
tupleDescriptor, &isNull);
|
|
Datum shardIdDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardid,
|
|
tupleDescriptor, &isNull);
|
|
Datum storageTypeDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardstorage,
|
|
tupleDescriptor, &isNull);
|
|
|
|
Datum minValueTextDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardminvalue,
|
|
tupleDescriptor, &minValueNull);
|
|
Datum maxValueTextDatum = heap_getattr(heapTuple, Anum_pg_dist_shard_shardmaxvalue,
|
|
tupleDescriptor, &maxValueNull);
|
|
|
|
Oid relationId = DatumGetObjectId(relationIdDatum);
|
|
int64 shardId = DatumGetInt64(shardIdDatum);
|
|
char storageType = DatumGetChar(storageTypeDatum);
|
|
Datum minValue = 0;
|
|
Datum maxValue = 0;
|
|
bool minValueExists = false;
|
|
bool maxValueExists = false;
|
|
int16 intervalTypeLen = 0;
|
|
bool intervalByVal = false;
|
|
char intervalAlign = '0';
|
|
char intervalDelim = '0';
|
|
|
|
if (!minValueNull && !maxValueNull)
|
|
{
|
|
char *minValueString = TextDatumGetCString(minValueTextDatum);
|
|
char *maxValueString = TextDatumGetCString(maxValueTextDatum);
|
|
|
|
/* TODO: move this up the call stack to avoid per-tuple invocation? */
|
|
get_type_io_data(intervalTypeId, IOFunc_input, &intervalTypeLen, &intervalByVal,
|
|
&intervalAlign, &intervalDelim, &typeIoParam, &inputFunctionId);
|
|
|
|
/* finally convert min/max values to their actual types */
|
|
minValue = OidInputFunctionCall(inputFunctionId, minValueString,
|
|
typeIoParam, intervalTypeMod);
|
|
maxValue = OidInputFunctionCall(inputFunctionId, maxValueString,
|
|
typeIoParam, intervalTypeMod);
|
|
|
|
minValueExists = true;
|
|
maxValueExists = true;
|
|
}
|
|
|
|
shardInterval = CitusMakeNode(ShardInterval);
|
|
shardInterval->relationId = relationId;
|
|
shardInterval->storageType = storageType;
|
|
shardInterval->valueTypeId = intervalTypeId;
|
|
shardInterval->valueTypeLen = intervalTypeLen;
|
|
shardInterval->valueByVal = intervalByVal;
|
|
shardInterval->minValueExists = minValueExists;
|
|
shardInterval->maxValueExists = maxValueExists;
|
|
shardInterval->minValue = minValue;
|
|
shardInterval->maxValue = maxValue;
|
|
shardInterval->shardId = shardId;
|
|
|
|
return shardInterval;
|
|
}
|
|
|
|
|
|
/*
|
|
* CachedRelationLookup performs a cached lookup for the relation
|
|
* relationName, with the result cached in *cachedOid.
|
|
*
|
|
* NB: The way this is cached means the result will be wrong after the
|
|
* extension is dropped and reconnect. A reconnect fixes that though, so that
|
|
* seems acceptable.
|
|
*/
|
|
static void
|
|
CachedRelationLookup(const char *relationName, Oid *cachedOid)
|
|
{
|
|
if (*cachedOid == InvalidOid)
|
|
{
|
|
*cachedOid = get_relname_relid(relationName, PG_CATALOG_NAMESPACE);
|
|
|
|
if (*cachedOid == InvalidOid)
|
|
{
|
|
ereport(ERROR, (errmsg("cache lookup failed for %s, called to early?",
|
|
relationName)));
|
|
}
|
|
}
|
|
}
|