citus/src/backend/distributed/utils/multi_partitioning_utils.c

1328 lines
40 KiB
C

/*
* multi_partitioning_utils.c
* Utility functions for declarative partitioning
*
* Copyright (c) Citus Data, Inc.
*/
#include "postgres.h"
#include "pg_version_constants.h"
#include "access/genam.h"
#include "access/heapam.h"
#include "access/htup_details.h"
#include "catalog/index.h"
#include "catalog/indexing.h"
#include "catalog/partition.h"
#include "catalog/pg_class.h"
#include "catalog/pg_constraint.h"
#include "catalog/pg_inherits.h"
#include "commands/tablecmds.h"
#include "common/string.h"
#include "distributed/citus_nodes.h"
#include "distributed/adaptive_executor.h"
#include "distributed/citus_ruleutils.h"
#include "distributed/colocation_utils.h"
#include "distributed/commands.h"
#include "distributed/coordinator_protocol.h"
#include "distributed/deparse_shard_query.h"
#include "distributed/listutils.h"
#include "distributed/metadata_utility.h"
#include "distributed/multi_executor.h"
#include "distributed/multi_partitioning_utils.h"
#include "distributed/multi_physical_planner.h"
#include "distributed/relay_utility.h"
#include "distributed/resource_lock.h"
#include "distributed/shardinterval_utils.h"
#include "distributed/version_compat.h"
#include "distributed/worker_protocol.h"
#include "lib/stringinfo.h"
#include "nodes/makefuncs.h"
#include "nodes/pg_list.h"
#include "pgstat.h"
#include "partitioning/partdesc.h"
#include "utils/builtins.h"
#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
#include "utils/rel.h"
#include "utils/syscache.h"
#include "utils/varlena.h"
static char * PartitionBound(Oid partitionId);
static Relation try_relation_open_nolock(Oid relationId);
static List * CreateFixPartitionConstraintsTaskList(Oid relationId);
static List * WorkerFixPartitionConstraintCommandList(Oid relationId, uint64 shardId,
List *checkConstraintList);
static void CreateFixPartitionShardIndexNames(Oid parentRelationId,
Oid partitionRelationId,
Oid parentIndexOid);
static List * WorkerFixPartitionShardIndexNamesCommandList(uint64 parentShardId,
List *indexIdList,
Oid partitionRelationId);
static List * WorkerFixPartitionShardIndexNamesCommandListForParentShardIndex(
char *qualifiedParentShardIndexName, Oid parentIndexId, Oid partitionRelationId);
static List * WorkerFixPartitionShardIndexNamesCommandListForPartitionIndex(Oid
partitionIndexId,
char *
qualifiedParentShardIndexName,
Oid
partitionId);
static List * CheckConstraintNameListForRelation(Oid relationId);
static bool RelationHasConstraint(Oid relationId, char *constraintName);
static char * RenameConstraintCommand(Oid relationId, char *constraintName,
char *newConstraintName);
PG_FUNCTION_INFO_V1(fix_pre_citus10_partitioned_table_constraint_names);
PG_FUNCTION_INFO_V1(worker_fix_pre_citus10_partitioned_table_constraint_names);
PG_FUNCTION_INFO_V1(fix_partition_shard_index_names);
PG_FUNCTION_INFO_V1(worker_fix_partition_shard_index_names);
/*
* fix_pre_citus10_partitioned_table_constraint_names fixes the constraint names of
* partitioned table shards on workers.
*
* Constraint names for partitioned table shards should have shardId suffixes if and only
* if they are unique or foreign key constraints. We mistakenly appended shardIds to
* constraint names on ALTER TABLE dist_part_table ADD CONSTRAINT .. queries prior to
* Citus 10. fix_pre_citus10_partitioned_table_constraint_names determines if this is the
* case, and renames constraints back to their original names on shards.
*/
Datum
fix_pre_citus10_partitioned_table_constraint_names(PG_FUNCTION_ARGS)
{
Oid relationId = PG_GETARG_OID(0);
EnsureCoordinator();
if (!PartitionedTable(relationId))
{
ereport(ERROR, (errmsg("could not fix partition constraints: "
"relation does not exist or is not partitioned")));
}
if (!IsCitusTable(relationId))
{
ereport(ERROR, (errmsg("fix_pre_citus10_partitioned_table_constraint_names can "
"only be called for distributed partitioned tables")));
}
List *taskList = CreateFixPartitionConstraintsTaskList(relationId);
/* do not do anything if there are no constraints that should be fixed */
if (taskList != NIL)
{
bool localExecutionSupported = true;
ExecuteUtilityTaskList(taskList, localExecutionSupported);
}
PG_RETURN_VOID();
}
/*
* worker_fix_pre_citus10_partitioned_table_constraint_names fixes the constraint names on a worker given a shell
* table name and shard id.
*/
Datum
worker_fix_pre_citus10_partitioned_table_constraint_names(PG_FUNCTION_ARGS)
{
Oid relationId = PG_GETARG_OID(0);
int64 shardId = PG_GETARG_INT32(1);
text *constraintNameText = PG_GETARG_TEXT_P(2);
if (!PartitionedTable(relationId))
{
ereport(ERROR, (errmsg("could not fix partition constraints: "
"relation does not exist or is not partitioned")));
}
char *constraintName = text_to_cstring(constraintNameText);
char *shardIdAppendedConstraintName = pstrdup(constraintName);
AppendShardIdToName(&shardIdAppendedConstraintName, shardId);
/* if shardId was appended to the constraint name, rename back to original */
if (RelationHasConstraint(relationId, shardIdAppendedConstraintName))
{
char *renameConstraintDDLCommand =
RenameConstraintCommand(relationId, shardIdAppendedConstraintName,
constraintName);
ExecuteAndLogUtilityCommand(renameConstraintDDLCommand);
}
PG_RETURN_VOID();
}
/*
* fix_partition_shard_index_names fixes the index names of shards of partitions of
* partitioned tables on workers. If the input is a partition rather than a partitioned
* table, we only fix the index names of shards of that particular partition.
*
* When running CREATE INDEX on parent_table, we didn't explicitly create the index on
* each partition as well. Postgres created indexes for partitions in the coordinator,
* and also in the workers. Actually, Postgres auto-generates index names when auto-creating
* indexes on each partition shard of the parent shards. If index name is too long, it
* truncates the name and adds _idx postfix to it. However, when truncating the name, the
* shardId of the partition shard can be lost. This may result in the same index name used for
* the partition shell table and one of the partition shards.
* For more details, check issue #4962 https://github.com/citusdata/citus/issues/4962
*
* fix_partition_shard_index_names renames indexes of shards of partition tables to include
* the shardId at the end of the name, regardless of whether index name was long or short
* As a result there will be no index name ending in _idx, rather all will end in _{shardid}
*
* Algorithm is:
* foreach parentShard in shardListOfParentTableId:
* foreach parentIndex on parent:
* generate qualifiedParentShardIndexName -> parentShardIndex
* foreach inheritedPartitionIndex on parentIndex:
* get table relation of inheritedPartitionIndex -> partitionId
* foreach partitionShard in shardListOfPartitionid:
* generate qualifiedPartitionShardName -> partitionShard
* generate newPartitionShardIndexName
* (the following happens in the worker node)
* foreach inheritedPartitionShardIndex on parentShardIndex:
* if table relation of inheritedPartitionShardIndex is partitionShard:
* if inheritedPartitionShardIndex does not have proper name:
* Rename(inheritedPartitionShardIndex, newPartitionShardIndexName)
* break
*/
Datum
fix_partition_shard_index_names(PG_FUNCTION_ARGS)
{
CheckCitusVersion(ERROR);
EnsureCoordinator();
Oid relationId = PG_GETARG_OID(0);
Oid parentIndexOid = InvalidOid; /* fix all the indexes */
if (!IsCitusTable(relationId))
{
ereport(ERROR, (errmsg("fix_partition_shard_index_names can only be called "
"for Citus tables")));
}
EnsureTableOwner(relationId);
FixPartitionShardIndexNames(relationId, parentIndexOid);
/*
* This UDF is called from fix_all_partition_shard_index_names() which iterates
* over all the partitioned tables. There is no need to hold all the distributed
* table metadata until the end of the transaction for the input table.
*/
CitusTableCacheFlushInvalidatedEntries();
PG_RETURN_VOID();
}
/*
* worker_fix_partition_shard_index_names fixes the index name of the index on given
* partition shard that has parent the given parent index.
* The parent index should be the index of a shard of a distributed partitioned table.
*/
Datum
worker_fix_partition_shard_index_names(PG_FUNCTION_ARGS)
{
Oid parentShardIndexId = PG_GETARG_OID(0);
text *partitionShardName = PG_GETARG_TEXT_P(1);
/* resolve partitionShardId from passed in schema and partition shard name */
List *partitionShardNameList = textToQualifiedNameList(partitionShardName);
RangeVar *partitionShard = makeRangeVarFromNameList(partitionShardNameList);
/* lock the relation with the lock mode */
bool missing_ok = true;
Oid partitionShardId = RangeVarGetRelid(partitionShard, NoLock, missing_ok);
if (!OidIsValid(partitionShardId))
{
PG_RETURN_VOID();
}
CheckCitusVersion(ERROR);
EnsureTableOwner(partitionShardId);
text *newPartitionShardIndexNameText = PG_GETARG_TEXT_P(2);
char *newPartitionShardIndexName = text_to_cstring(
newPartitionShardIndexNameText);
if (!has_subclass(parentShardIndexId))
{
ereport(ERROR, (errmsg("could not fix child index names: "
"index is not partitioned")));
}
List *partitionShardIndexIds = find_inheritance_children(parentShardIndexId,
ShareRowExclusiveLock);
Oid partitionShardIndexId = InvalidOid;
foreach_oid(partitionShardIndexId, partitionShardIndexIds)
{
if (IndexGetRelation(partitionShardIndexId, false) == partitionShardId)
{
char *partitionShardIndexName = get_rel_name(partitionShardIndexId);
if (ExtractShardIdFromTableName(partitionShardIndexName, missing_ok) ==
INVALID_SHARD_ID)
{
/*
* ExtractShardIdFromTableName will return INVALID_SHARD_ID if
* partitionShardIndexName doesn't end in _shardid. In that case,
* we want to rename this partition shard index to newPartitionShardIndexName,
* which ends in _shardid, hence we maintain naming consistency:
* we can reach this partition shard index by conventional Citus naming
*/
RenameStmt *stmt = makeNode(RenameStmt);
stmt->renameType = OBJECT_INDEX;
stmt->missing_ok = false;
char *idxNamespace = get_namespace_name(get_rel_namespace(
partitionShardIndexId));
stmt->relation = makeRangeVar(idxNamespace, partitionShardIndexName, -1);
stmt->newname = newPartitionShardIndexName;
RenameRelation(stmt);
}
break;
}
}
PG_RETURN_VOID();
}
/*
* FixPartitionShardIndexNames gets a relationId. The input relationId should be
* either a parent or partition table. If it is a parent table, then all the
* index names on all the partitions are fixed. If it is a partition, only the
* specific partition is fixed.
*
* The second parentIndexOid parameter is optional. If provided a valid Oid, only
* that specific index name is fixed.
*/
void
FixPartitionShardIndexNames(Oid relationId, Oid parentIndexOid)
{
Relation relation = try_relation_open(relationId, AccessShareLock);
if (relation == NULL)
{
ereport(NOTICE, (errmsg("relation with OID %u does not exist, skipping",
relationId)));
return;
}
/* at this point, we should only be dealing with Citus tables */
Assert(IsCitusTable(relationId));
Oid parentRelationId = InvalidOid;
Oid partitionRelationId = InvalidOid;
if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
parentRelationId = relationId;
}
else if (PartitionTable(relationId))
{
parentRelationId = PartitionParentOid(relationId);
partitionRelationId = relationId;
}
else
{
relation_close(relation, NoLock);
ereport(ERROR, (errmsg("Fixing shard index names is only applicable to "
"partitioned tables or partitions, "
"and \"%s\" is neither",
RelationGetRelationName(relation))));
}
CreateFixPartitionShardIndexNames(parentRelationId,
partitionRelationId,
parentIndexOid);
relation_close(relation, NoLock);
}
/*
* CreateFixPartitionConstraintsTaskList goes over all the partitions of a distributed
* partitioned table, and creates the list of tasks to execute
* worker_fix_pre_citus10_partitioned_table_constraint_names UDF on worker nodes.
*/
static List *
CreateFixPartitionConstraintsTaskList(Oid relationId)
{
List *taskList = NIL;
/* enumerate the tasks when putting them to the taskList */
int taskId = 1;
List *checkConstraintList = CheckConstraintNameListForRelation(relationId);
/* early exit if the relation does not have any check constraints */
if (checkConstraintList == NIL)
{
return NIL;
}
List *shardIntervalList = LoadShardIntervalList(relationId);
/* lock metadata before getting placement lists */
LockShardListMetadata(shardIntervalList, ShareLock);
ShardInterval *shardInterval = NULL;
foreach_ptr(shardInterval, shardIntervalList)
{
uint64 shardId = shardInterval->shardId;
List *queryStringList = WorkerFixPartitionConstraintCommandList(relationId,
shardId,
checkConstraintList);
Task *task = CitusMakeNode(Task);
task->jobId = INVALID_JOB_ID;
task->taskId = taskId++;
task->taskType = DDL_TASK;
SetTaskQueryStringList(task, queryStringList);
task->dependentTaskList = NULL;
task->replicationModel = REPLICATION_MODEL_INVALID;
task->anchorShardId = shardId;
task->taskPlacementList = ActiveShardPlacementList(shardId);
taskList = lappend(taskList, task);
}
return taskList;
}
/*
* CheckConstraintNameListForRelation returns a list of names of CHECK constraints
* for a relation.
*/
static List *
CheckConstraintNameListForRelation(Oid relationId)
{
List *constraintNameList = NIL;
int scanKeyCount = 2;
ScanKeyData scanKey[2];
Relation pgConstraint = table_open(ConstraintRelationId, AccessShareLock);
ScanKeyInit(&scanKey[0], Anum_pg_constraint_conrelid,
BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(relationId));
ScanKeyInit(&scanKey[1], Anum_pg_constraint_contype,
BTEqualStrategyNumber, F_CHAREQ, CharGetDatum(CONSTRAINT_CHECK));
bool useIndex = false;
SysScanDesc scanDescriptor = systable_beginscan(pgConstraint, InvalidOid, useIndex,
NULL, scanKeyCount, scanKey);
HeapTuple heapTuple = systable_getnext(scanDescriptor);
while (HeapTupleIsValid(heapTuple))
{
Form_pg_constraint constraintForm = (Form_pg_constraint) GETSTRUCT(heapTuple);
char *constraintName = NameStr(constraintForm->conname);
constraintNameList = lappend(constraintNameList, pstrdup(constraintName));
heapTuple = systable_getnext(scanDescriptor);
}
systable_endscan(scanDescriptor);
table_close(pgConstraint, NoLock);
return constraintNameList;
}
/*
* WorkerFixPartitionConstraintCommandList creates a list of queries that will fix
* all check constraint names of a shard.
*/
static List *
WorkerFixPartitionConstraintCommandList(Oid relationId, uint64 shardId,
List *checkConstraintList)
{
List *commandList = NIL;
Oid schemaId = get_rel_namespace(relationId);
char *schemaName = get_namespace_name(schemaId);
char *relationName = get_rel_name(relationId);
char *shardRelationName = pstrdup(relationName);
/* build shard relation name */
AppendShardIdToName(&shardRelationName, shardId);
char *quotedShardName = quote_qualified_identifier(schemaName, shardRelationName);
char *constraintName = NULL;
foreach_ptr(constraintName, checkConstraintList)
{
StringInfo shardQueryString = makeStringInfo();
appendStringInfo(shardQueryString,
"SELECT worker_fix_pre_citus10_partitioned_table_constraint_names(%s::regclass, "
UINT64_FORMAT ", %s::text)",
quote_literal_cstr(quotedShardName), shardId,
quote_literal_cstr(constraintName));
commandList = lappend(commandList, shardQueryString->data);
}
return commandList;
}
/*
* CreateFixPartitionShardIndexNamesTaskList goes over all the
* indexes of a distributed partitioned table unless parentIndexOid
* is valid. If it is valid, only the given index is processed.
*
* The function creates the list of tasks to execute
* worker_fix_partition_shard_index_names() on worker nodes.
*
* When the partitionRelationId is a valid Oid, the function only operates on the
* given partition. Otherwise, the function create tasks for all the partitions.
*
* So, for example, if a new partition is created, we only need to fix only for the
* new partition, hence partitionRelationId should be a valid Oid. However, if a new
* index/constraint is created on the parent, we should fix all the partitions, hence
* partitionRelationId should be InvalidOid.
*
* As a reflection of the above, we always create parent_table_shard_count tasks.
* When we need to fix all the partitions, each task with parent_indexes_count
* times partition_count query strings. When we need to fix a single
* partition each task will have parent_indexes_count query strings. When we need
* to fix a single index, parent_indexes_count becomes 1.
*/
static void
CreateFixPartitionShardIndexNames(Oid parentRelationId, Oid partitionRelationId,
Oid parentIndexOid)
{
List *partitionList = PartitionList(parentRelationId);
if (partitionList == NIL)
{
/* early exit if the parent relation does not have any partitions */
return;
}
Relation parentRelation = RelationIdGetRelation(parentRelationId);
if (!RelationIsValid(parentRelation))
{
ereport(ERROR, (errmsg("could not open relation with OID %u", parentRelationId)));
}
List *parentIndexIdList = NIL;
if (parentIndexOid != InvalidOid)
{
parentIndexIdList = list_make1_oid(parentIndexOid);
}
else
{
parentIndexIdList = RelationGetIndexList(parentRelation);
}
if (parentIndexIdList == NIL)
{
/* early exit if the parent relation does not have any indexes */
RelationClose(parentRelation);
return;
}
/*
* Lock shard metadata, if a specific partition is provided, lock that. Otherwise,
* lock all partitions.
*/
if (OidIsValid(partitionRelationId))
{
/* if a partition was provided we only need to lock that partition's metadata */
List *partitionShardIntervalList = LoadShardIntervalList(partitionRelationId);
LockShardListMetadata(partitionShardIntervalList, ShareLock);
}
else
{
Oid partitionId = InvalidOid;
foreach_oid(partitionId, partitionList)
{
List *partitionShardIntervalList = LoadShardIntervalList(partitionId);
LockShardListMetadata(partitionShardIntervalList, ShareLock);
}
}
List *parentShardIntervalList = LoadShardIntervalList(parentRelationId);
/* lock metadata before getting placement lists */
LockShardListMetadata(parentShardIntervalList, ShareLock);
MemoryContext localContext = AllocSetContextCreate(CurrentMemoryContext,
"CreateFixPartitionShardIndexNames",
ALLOCSET_DEFAULT_SIZES);
MemoryContext oldContext = MemoryContextSwitchTo(localContext);
int taskId = 1;
ShardInterval *parentShardInterval = NULL;
foreach_ptr(parentShardInterval, parentShardIntervalList)
{
uint64 parentShardId = parentShardInterval->shardId;
List *queryStringList =
WorkerFixPartitionShardIndexNamesCommandList(parentShardId,
parentIndexIdList,
partitionRelationId);
if (queryStringList != NIL)
{
Task *task = CitusMakeNode(Task);
task->jobId = INVALID_JOB_ID;
task->taskId = taskId++;
task->taskType = DDL_TASK;
char *prefix = "SELECT pg_catalog.citus_run_local_command($$";
char *postfix = "$$)";
char *string = StringJoinParams(queryStringList, ';', prefix, postfix);
SetTaskQueryString(task, string);
task->dependentTaskList = NULL;
task->replicationModel = REPLICATION_MODEL_INVALID;
task->anchorShardId = parentShardId;
task->taskPlacementList = ActiveShardPlacementList(parentShardId);
bool localExecutionSupported = true;
ExecuteUtilityTaskList(list_make1(task), localExecutionSupported);
}
/* after every iteration, clean-up all the memory associated with it */
MemoryContextReset(localContext);
}
MemoryContextSwitchTo(oldContext);
RelationClose(parentRelation);
}
/*
* WorkerFixPartitionShardIndexNamesCommandList creates a list of queries that will fix
* all child index names of parent indexes on given shard of parent partitioned table.
*/
static List *
WorkerFixPartitionShardIndexNamesCommandList(uint64 parentShardId,
List *parentIndexIdList,
Oid partitionRelationId)
{
List *commandList = NIL;
Oid parentIndexId = InvalidOid;
foreach_oid(parentIndexId, parentIndexIdList)
{
if (!has_subclass(parentIndexId))
{
continue;
}
/*
* Get the qualified name of the corresponding index of given parent index
* in the parent shard with given parentShardId
*/
char *parentIndexName = get_rel_name(parentIndexId);
char *parentShardIndexName = pstrdup(parentIndexName);
AppendShardIdToName(&parentShardIndexName, parentShardId);
Oid schemaId = get_rel_namespace(parentIndexId);
char *schemaName = get_namespace_name(schemaId);
char *qualifiedParentShardIndexName = quote_qualified_identifier(schemaName,
parentShardIndexName);
List *commands = WorkerFixPartitionShardIndexNamesCommandListForParentShardIndex(
qualifiedParentShardIndexName, parentIndexId, partitionRelationId);
commandList = list_concat(commandList, commands);
}
return commandList;
}
/*
* WorkerFixPartitionShardIndexNamesCommandListForParentShardIndex creates a list
* of queries that will fix the child index names of given index on shard
* of parent partitioned table.
*
* In case a partition was provided as argument (partitionRelationId isn't InvalidOid)
* the list of queries will include only the child indexes whose relation is the
* given partition. Otherwise, all the partitions are included.
*/
static List *
WorkerFixPartitionShardIndexNamesCommandListForParentShardIndex(
char *qualifiedParentShardIndexName, Oid parentIndexId, Oid partitionRelationId)
{
List *commandList = NIL;
/*
* Get the list of all partition indexes that are children of current
* index on parent
*/
List *partitionIndexIds =
find_inheritance_children(parentIndexId, ShareRowExclusiveLock);
bool addAllPartitions = (partitionRelationId == InvalidOid);
Oid partitionIndexId = InvalidOid;
foreach_oid(partitionIndexId, partitionIndexIds)
{
Oid partitionId = IndexGetRelation(partitionIndexId, false);
if (addAllPartitions || partitionId == partitionRelationId)
{
List *commands =
WorkerFixPartitionShardIndexNamesCommandListForPartitionIndex(
partitionIndexId, qualifiedParentShardIndexName, partitionId);
commandList = list_concat(commandList, commands);
}
}
return commandList;
}
/*
* WorkerFixPartitionShardIndexNamesCommandListForPartitionIndex creates a list of queries that will fix
* all child index names of given index on shard of parent partitioned table, whose table relation is a shard
* of the partition that is the table relation of given partitionIndexId, which is partitionId
*/
static List *
WorkerFixPartitionShardIndexNamesCommandListForPartitionIndex(Oid partitionIndexId,
char *
qualifiedParentShardIndexName,
Oid partitionId)
{
List *commandList = NIL;
/* get info for this partition relation of this index*/
char *partitionIndexName = get_rel_name(partitionIndexId);
char *partitionName = get_rel_name(partitionId);
char *partitionSchemaName = get_namespace_name(get_rel_namespace(partitionId));
List *partitionShardIntervalList = LoadShardIntervalList(partitionId);
ShardInterval *partitionShardInterval = NULL;
foreach_ptr(partitionShardInterval, partitionShardIntervalList)
{
/*
* Prepare commands for each shard of current partition
* to fix the index name that corresponds to the
* current parent index name
*/
uint64 partitionShardId = partitionShardInterval->shardId;
/* get qualified partition shard name */
char *partitionShardName = pstrdup(partitionName);
AppendShardIdToName(&partitionShardName, partitionShardId);
char *qualifiedPartitionShardName = quote_qualified_identifier(
partitionSchemaName,
partitionShardName);
/* generate the new correct index name */
char *newPartitionShardIndexName = pstrdup(partitionIndexName);
AppendShardIdToName(&newPartitionShardIndexName, partitionShardId);
/* create worker_fix_partition_shard_index_names command */
StringInfo shardQueryString = makeStringInfo();
appendStringInfo(shardQueryString,
"SELECT worker_fix_partition_shard_index_names(%s::regclass, %s, %s)",
quote_literal_cstr(qualifiedParentShardIndexName),
quote_literal_cstr(qualifiedPartitionShardName),
quote_literal_cstr(newPartitionShardIndexName));
commandList = lappend(commandList, shardQueryString->data);
}
return commandList;
}
/*
* RelationHasConstraint checks if a relation has a constraint with a given name.
*/
static bool
RelationHasConstraint(Oid relationId, char *constraintName)
{
bool found = false;
int scanKeyCount = 2;
ScanKeyData scanKey[2];
Relation pgConstraint = table_open(ConstraintRelationId, AccessShareLock);
ScanKeyInit(&scanKey[0], Anum_pg_constraint_conrelid,
BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(relationId));
ScanKeyInit(&scanKey[1], Anum_pg_constraint_conname,
BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(constraintName));
bool useIndex = false;
SysScanDesc scanDescriptor = systable_beginscan(pgConstraint, InvalidOid, useIndex,
NULL, scanKeyCount, scanKey);
HeapTuple heapTuple = systable_getnext(scanDescriptor);
if (HeapTupleIsValid(heapTuple))
{
found = true;
}
systable_endscan(scanDescriptor);
table_close(pgConstraint, NoLock);
return found;
}
/*
* RenameConstraintCommand creates the query string that will rename a constraint
*/
static char *
RenameConstraintCommand(Oid relationId, char *constraintName, char *newConstraintName)
{
char *qualifiedRelationName = generate_qualified_relation_name(relationId);
const char *quotedConstraintName = quote_identifier(constraintName);
const char *quotedNewConstraintName = quote_identifier(newConstraintName);
StringInfo renameCommand = makeStringInfo();
appendStringInfo(renameCommand, "ALTER TABLE %s RENAME CONSTRAINT %s TO %s",
qualifiedRelationName, quotedConstraintName,
quotedNewConstraintName);
return renameCommand->data;
}
/*
* Returns true if the given relation is a partitioned table.
*/
bool
PartitionedTable(Oid relationId)
{
Relation rel = try_relation_open(relationId, AccessShareLock);
/* don't error out for tables that are dropped */
if (rel == NULL)
{
return false;
}
bool partitionedTable = false;
if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
partitionedTable = true;
}
/* keep the lock */
table_close(rel, NoLock);
return partitionedTable;
}
/*
* Returns true if the given relation is a partitioned table. The function
* doesn't acquire any locks on the input relation, thus the caller is
* reponsible for holding the appropriate locks.
*/
bool
PartitionedTableNoLock(Oid relationId)
{
Relation rel = try_relation_open_nolock(relationId);
bool partitionedTable = false;
/* don't error out for tables that are dropped */
if (rel == NULL)
{
return false;
}
if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
partitionedTable = true;
}
/* keep the lock */
table_close(rel, NoLock);
return partitionedTable;
}
/*
* Returns true if the given relation is a partition.
*/
bool
PartitionTable(Oid relationId)
{
Relation rel = try_relation_open(relationId, AccessShareLock);
/* don't error out for tables that are dropped */
if (rel == NULL)
{
return false;
}
bool partitionTable = rel->rd_rel->relispartition;
/* keep the lock */
table_close(rel, NoLock);
return partitionTable;
}
/*
* Returns true if the given relation is a partition. The function
* doesn't acquire any locks on the input relation, thus the caller is
* reponsible for holding the appropriate locks.
*/
bool
PartitionTableNoLock(Oid relationId)
{
Relation rel = try_relation_open_nolock(relationId);
/* don't error out for tables that are dropped */
if (rel == NULL)
{
return false;
}
bool partitionTable = rel->rd_rel->relispartition;
/* keep the lock */
table_close(rel, NoLock);
return partitionTable;
}
/*
* try_relation_open_nolock opens a relation with given relationId without
* acquiring locks. PostgreSQL's try_relation_open() asserts that caller
* has already acquired a lock on the relation, which we don't always do.
*
* ATTENTION:
* 1. Sync this with try_relation_open(). It hasn't changed for 10 to 12
* releases though.
* 2. We should remove this after we fix the locking/distributed deadlock
* issues with MX Truncate. See https://github.com/citusdata/citus/pull/2894
* for more discussion.
*/
static Relation
try_relation_open_nolock(Oid relationId)
{
if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(relationId)))
{
return NULL;
}
Relation relation = RelationIdGetRelation(relationId);
if (!RelationIsValid(relation))
{
return NULL;
}
pgstat_init_relation(relation);
return relation;
}
/*
* IsChildTable returns true if the table is inherited. Note that
* partition tables inherites by default. However, this function
* returns false if the given table is a partition.
*/
bool
IsChildTable(Oid relationId)
{
ScanKeyData key[1];
HeapTuple inheritsTuple = NULL;
bool tableInherits = false;
Relation pgInherits = table_open(InheritsRelationId, AccessShareLock);
ScanKeyInit(&key[0], Anum_pg_inherits_inhrelid,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(relationId));
SysScanDesc scan = systable_beginscan(pgInherits, InvalidOid, false,
NULL, 1, key);
while ((inheritsTuple = systable_getnext(scan)) != NULL)
{
Oid inheritedRelationId =
((Form_pg_inherits) GETSTRUCT(inheritsTuple))->inhrelid;
if (relationId == inheritedRelationId)
{
tableInherits = true;
break;
}
}
systable_endscan(scan);
table_close(pgInherits, AccessShareLock);
if (tableInherits && PartitionTable(relationId))
{
tableInherits = false;
}
return tableInherits;
}
/*
* IsParentTable returns true if the table is inherited. Note that
* partitioned tables inherited by default. However, this function
* returns false if the given table is a partitioned table.
*/
bool
IsParentTable(Oid relationId)
{
ScanKeyData key[1];
bool tableInherited = false;
Relation pgInherits = table_open(InheritsRelationId, AccessShareLock);
ScanKeyInit(&key[0], Anum_pg_inherits_inhparent,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(relationId));
SysScanDesc scan = systable_beginscan(pgInherits, InheritsParentIndexId, true,
NULL, 1, key);
if (systable_getnext(scan) != NULL)
{
tableInherited = true;
}
systable_endscan(scan);
table_close(pgInherits, AccessShareLock);
Relation relation = try_relation_open(relationId, AccessShareLock);
if (relation == NULL)
{
ereport(ERROR, (errmsg("relation with OID %u does not exist", relationId)));
}
if (tableInherited && PartitionedTableNoLock(relationId))
{
tableInherited = false;
}
relation_close(relation, AccessShareLock);
return tableInherited;
}
/*
* Wrapper around get_partition_parent
*
* Note: Because this function assumes that the relation whose OID is passed
* as an argument will have precisely one parent, it should only be called
* when it is known that the relation is a partition.
*/
Oid
PartitionParentOid(Oid partitionOid)
{
Oid partitionParentOid = get_partition_parent(partitionOid, false);
return partitionParentOid;
}
/*
* PartitionWithLongestNameRelationId is a utility function that returns the
* oid of the partition table that has the longest name in terms of number of
* characters.
*/
Oid
PartitionWithLongestNameRelationId(Oid parentRelationId)
{
Oid longestNamePartitionId = InvalidOid;
int longestNameLength = 0;
List *partitionList = PartitionList(parentRelationId);
Oid partitionRelationId = InvalidOid;
foreach_oid(partitionRelationId, partitionList)
{
char *partitionName = get_rel_name(partitionRelationId);
int partitionNameLength = strnlen(partitionName, NAMEDATALEN);
if (partitionNameLength > longestNameLength)
{
longestNamePartitionId = partitionRelationId;
longestNameLength = partitionNameLength;
}
}
return longestNamePartitionId;
}
/*
* Takes a parent relation and returns Oid list of its partitions. The
* function errors out if the given relation is not a parent.
*/
List *
PartitionList(Oid parentRelationId)
{
Relation rel = table_open(parentRelationId, AccessShareLock);
List *partitionList = NIL;
if (!PartitionedTable(parentRelationId))
{
char *relationName = get_rel_name(parentRelationId);
ereport(ERROR, (errmsg("\"%s\" is not a parent table", relationName)));
}
PartitionDesc partDesc = RelationGetPartitionDesc(rel, true);
Assert(partDesc != NULL);
int partitionCount = partDesc->nparts;
for (int partitionIndex = 0; partitionIndex < partitionCount; ++partitionIndex)
{
partitionList =
lappend_oid(partitionList, partDesc->oids[partitionIndex]);
}
/* keep the lock */
table_close(rel, NoLock);
return partitionList;
}
/*
* GenerateDetachPartitionCommand gets a partition table and returns
* "ALTER TABLE parent_table DETACH PARTITION partitionName" command.
*/
char *
GenerateDetachPartitionCommand(Oid partitionTableId)
{
StringInfo detachPartitionCommand = makeStringInfo();
if (!PartitionTable(partitionTableId))
{
char *relationName = get_rel_name(partitionTableId);
ereport(ERROR, (errmsg("\"%s\" is not a partition", relationName)));
}
Oid parentId = get_partition_parent(partitionTableId, false);
char *tableQualifiedName = generate_qualified_relation_name(partitionTableId);
char *parentTableQualifiedName = generate_qualified_relation_name(parentId);
appendStringInfo(detachPartitionCommand,
"ALTER TABLE IF EXISTS %s DETACH PARTITION %s;",
parentTableQualifiedName, tableQualifiedName);
return detachPartitionCommand->data;
}
/*
* GenerateDetachPartitionCommandRelationIdList returns the necessary command list to
* detach the given partitions from their parents.
*/
List *
GenerateDetachPartitionCommandRelationIdList(List *relationIds)
{
List *detachPartitionCommands = NIL;
Oid relationId = InvalidOid;
foreach_oid(relationId, relationIds)
{
Assert(PartitionTable(relationId));
char *detachCommand = GenerateDetachPartitionCommand(relationId);
detachPartitionCommands = lappend(detachPartitionCommands, detachCommand);
}
return detachPartitionCommands;
}
/*
* GenereatePartitioningInformation returns the partitioning type and partition column
* for the given parent table in the form of "PARTITION TYPE (partitioning column(s)/expression(s))".
*/
char *
GeneratePartitioningInformation(Oid parentTableId)
{
char *partitionBoundCString = "";
if (!PartitionedTable(parentTableId))
{
char *relationName = get_rel_name(parentTableId);
ereport(ERROR, (errmsg("\"%s\" is not a parent table", relationName)));
}
Datum partitionBoundDatum = DirectFunctionCall1(pg_get_partkeydef,
ObjectIdGetDatum(parentTableId));
partitionBoundCString = TextDatumGetCString(partitionBoundDatum);
return partitionBoundCString;
}
/*
* GenerateAttachShardPartitionCommand generates command to attach a child table
* table to its parent in a partitioning hierarchy.
*/
char *
GenerateAttachShardPartitionCommand(ShardInterval *shardInterval)
{
Oid schemaId = get_rel_namespace(shardInterval->relationId);
char *schemaName = get_namespace_name(schemaId);
char *escapedSchemaName = quote_literal_cstr(schemaName);
char *command = GenerateAlterTableAttachPartitionCommand(shardInterval->relationId);
char *escapedCommand = quote_literal_cstr(command);
int shardIndex = ShardIndex(shardInterval);
StringInfo attachPartitionCommand = makeStringInfo();
Oid parentRelationId = PartitionParentOid(shardInterval->relationId);
if (parentRelationId == InvalidOid)
{
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot attach partition"),
errdetail("Referenced relation cannot be found.")));
}
Oid parentSchemaId = get_rel_namespace(parentRelationId);
char *parentSchemaName = get_namespace_name(parentSchemaId);
char *escapedParentSchemaName = quote_literal_cstr(parentSchemaName);
uint64 parentShardId = ColocatedShardIdInRelation(parentRelationId, shardIndex);
appendStringInfo(attachPartitionCommand,
WORKER_APPLY_INTER_SHARD_DDL_COMMAND, parentShardId,
escapedParentSchemaName, shardInterval->shardId,
escapedSchemaName, escapedCommand);
return attachPartitionCommand->data;
}
/*
* GenerateAlterTableAttachPartitionCommand returns the necessary command to
* attach the given partition to its parent.
*/
char *
GenerateAlterTableAttachPartitionCommand(Oid partitionTableId)
{
StringInfo createPartitionCommand = makeStringInfo();
if (!PartitionTable(partitionTableId))
{
char *relationName = get_rel_name(partitionTableId);
ereport(ERROR, (errmsg("\"%s\" is not a partition", relationName)));
}
Oid parentId = get_partition_parent(partitionTableId, false);
char *tableQualifiedName = generate_qualified_relation_name(partitionTableId);
char *parentTableQualifiedName = generate_qualified_relation_name(parentId);
char *partitionBoundCString = PartitionBound(partitionTableId);
appendStringInfo(createPartitionCommand, "ALTER TABLE %s ATTACH PARTITION %s %s;",
parentTableQualifiedName, tableQualifiedName,
partitionBoundCString);
return createPartitionCommand->data;
}
/*
* GenerateAttachPartitionCommandRelationIdList returns the necessary command list to
* attach the given partitions to their parents.
*/
List *
GenerateAttachPartitionCommandRelationIdList(List *relationIds)
{
List *attachPartitionCommands = NIL;
Oid relationId = InvalidOid;
foreach_oid(relationId, relationIds)
{
char *attachCommand = GenerateAlterTableAttachPartitionCommand(relationId);
attachPartitionCommands = lappend(attachPartitionCommands, attachCommand);
}
return attachPartitionCommands;
}
/*
* This function heaviliy inspired from RelationBuildPartitionDesc()
* which is avaliable in src/backend/catalog/partition.c.
*
* The function simply reads the pg_class and gets the partition bound.
* Later, converts it to text format and returns.
*/
static char *
PartitionBound(Oid partitionId)
{
bool isnull = false;
HeapTuple tuple = SearchSysCache1(RELOID, partitionId);
if (!HeapTupleIsValid(tuple))
{
elog(ERROR, "cache lookup failed for relation %u", partitionId);
}
/*
* It is possible that the pg_class tuple of a partition has not been
* updated yet to set its relpartbound field. The only case where
* this happens is when we open the parent relation to check using its
* partition descriptor that a new partition's bound does not overlap
* some existing partition.
*/
if (!((Form_pg_class) GETSTRUCT(tuple))->relispartition)
{
ReleaseSysCache(tuple);
return "";
}
Datum datum = SysCacheGetAttr(RELOID, tuple,
Anum_pg_class_relpartbound,
&isnull);
Assert(!isnull);
Datum partitionBoundDatum =
DirectFunctionCall2(pg_get_expr, datum, ObjectIdGetDatum(partitionId));
char *partitionBoundString = TextDatumGetCString(partitionBoundDatum);
ReleaseSysCache(tuple);
return partitionBoundString;
}
/*
* ListShardsUnderParentRelation returns a list of ShardInterval for every
* shard under a given relation, meaning it includes the shards of child
* tables in a partitioning hierarchy.
*/
List *
ListShardsUnderParentRelation(Oid relationId)
{
List *shardList = LoadShardIntervalList(relationId);
if (PartitionedTable(relationId))
{
List *partitionList = PartitionList(relationId);
Oid partitionRelationId = InvalidOid;
foreach_oid(partitionRelationId, partitionList)
{
List *childShardList = ListShardsUnderParentRelation(partitionRelationId);
shardList = list_concat(shardList, childShardList);
}
}
return shardList;
}