From fa6743d43629253ed581dac5389ab32f77125105 Mon Sep 17 00:00:00 2001 From: Jelte Fennema-Nio Date: Mon, 15 Apr 2024 12:28:11 +0200 Subject: [PATCH] Speed up EnsureSequenceTypeSupported (#7575) DESCRIPTION: Fix performance issue when creating distributed tables and many already exist EnsureSequenceTypeSupported was doing an O(number of distributed tables) operation. This can become very slow with lots of Citus tables, which now happens much more frequently in practice due to schema based sharding. Partially addresses #7022 (cherry picked from commit 381f31756e6de0e0522aeaa489fe671df0ddf731) --- .../commands/create_distributed_table.c | 73 +++++++--------- .../distributed/metadata/metadata_sync.c | 84 +++++++++++++++++++ src/include/distributed/metadata_sync.h | 4 + 3 files changed, 119 insertions(+), 42 deletions(-) diff --git a/src/backend/distributed/commands/create_distributed_table.c b/src/backend/distributed/commands/create_distributed_table.c index 9f3975a1e..fced0fe3a 100644 --- a/src/backend/distributed/commands/create_distributed_table.c +++ b/src/backend/distributed/commands/create_distributed_table.c @@ -22,6 +22,7 @@ #include "catalog/dependency.h" #include "catalog/index.h" #include "catalog/pg_am.h" +#include "catalog/pg_attrdef.h" #include "catalog/pg_attribute.h" #include "catalog/pg_enum.h" #include "catalog/pg_extension.h" @@ -50,6 +51,7 @@ #include "tcop/pquery.h" #include "tcop/tcopprot.h" #include "utils/builtins.h" +#include "utils/fmgroids.h" #include "utils/inval.h" #include "utils/lsyscache.h" #include "utils/memutils.h" @@ -1699,52 +1701,39 @@ PropagatePrerequisiteObjectsForDistributedTable(Oid relationId) void EnsureSequenceTypeSupported(Oid seqOid, Oid attributeTypeId, Oid ownerRelationId) { - List *citusTableIdList = CitusTableTypeIdList(ANY_CITUS_TABLE_TYPE); - citusTableIdList = list_append_unique_oid(citusTableIdList, ownerRelationId); + Oid attrDefOid; + List *attrDefOids = GetAttrDefsFromSequence(seqOid); - Oid citusTableId = InvalidOid; - foreach_oid(citusTableId, citusTableIdList) + foreach_oid(attrDefOid, attrDefOids) { - List *seqInfoList = NIL; - GetDependentSequencesWithRelation(citusTableId, &seqInfoList, 0, DEPENDENCY_AUTO); + ObjectAddress columnAddress = GetAttrDefaultColumnAddress(attrDefOid); - SequenceInfo *seqInfo = NULL; - foreach_ptr(seqInfo, seqInfoList) + /* + * If another distributed table is using the same sequence + * in one of its column defaults, make sure the types of the + * columns match. + * + * We skip non-distributed tables, but we need to check the current + * table as it might reference the same sequence multiple times. + */ + if (columnAddress.objectId != ownerRelationId && + !IsCitusTable(columnAddress.objectId)) { - AttrNumber currentAttnum = seqInfo->attributeNumber; - Oid currentSeqOid = seqInfo->sequenceOid; - - if (!seqInfo->isNextValDefault) - { - /* - * If a sequence is not on the nextval, we don't need any check. - * This is a dependent sequence via ALTER SEQUENCE .. OWNED BY col - */ - continue; - } - - /* - * If another distributed table is using the same sequence - * in one of its column defaults, make sure the types of the - * columns match - */ - if (currentSeqOid == seqOid) - { - Oid currentAttributeTypId = GetAttributeTypeOid(citusTableId, - currentAttnum); - if (attributeTypeId != currentAttributeTypId) - { - char *sequenceName = generate_qualified_relation_name( - seqOid); - char *citusTableName = - generate_qualified_relation_name(citusTableId); - ereport(ERROR, (errmsg( - "The sequence %s is already used for a different" - " type in column %d of the table %s", - sequenceName, currentAttnum, - citusTableName))); - } - } + continue; + } + Oid currentAttributeTypId = GetAttributeTypeOid(columnAddress.objectId, + columnAddress.objectSubId); + if (attributeTypeId != currentAttributeTypId) + { + char *sequenceName = generate_qualified_relation_name( + seqOid); + char *citusTableName = + generate_qualified_relation_name(columnAddress.objectId); + ereport(ERROR, (errmsg( + "The sequence %s is already used for a different" + " type in column %d of the table %s", + sequenceName, columnAddress.objectSubId, + citusTableName))); } } } diff --git a/src/backend/distributed/metadata/metadata_sync.c b/src/backend/distributed/metadata/metadata_sync.c index 0492e7b55..511cc45a9 100644 --- a/src/backend/distributed/metadata/metadata_sync.c +++ b/src/backend/distributed/metadata/metadata_sync.c @@ -1671,6 +1671,90 @@ GetSequencesFromAttrDef(Oid attrdefOid) } +#if PG_VERSION_NUM < PG_VERSION_15 + +/* + * Given a pg_attrdef OID, return the relation OID and column number of + * the owning column (represented as an ObjectAddress for convenience). + * + * Returns InvalidObjectAddress if there is no such pg_attrdef entry. + */ +ObjectAddress +GetAttrDefaultColumnAddress(Oid attrdefoid) +{ + ObjectAddress result = InvalidObjectAddress; + ScanKeyData skey[1]; + HeapTuple tup; + + Relation attrdef = table_open(AttrDefaultRelationId, AccessShareLock); + ScanKeyInit(&skey[0], + Anum_pg_attrdef_oid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(attrdefoid)); + SysScanDesc scan = systable_beginscan(attrdef, AttrDefaultOidIndexId, true, + NULL, 1, skey); + + if (HeapTupleIsValid(tup = systable_getnext(scan))) + { + Form_pg_attrdef atdform = (Form_pg_attrdef) GETSTRUCT(tup); + + result.classId = RelationRelationId; + result.objectId = atdform->adrelid; + result.objectSubId = atdform->adnum; + } + + systable_endscan(scan); + table_close(attrdef, AccessShareLock); + + return result; +} + + +#endif + + +/* + * GetAttrDefsFromSequence returns a list of attrdef OIDs that have + * a dependency on the given sequence + */ +List * +GetAttrDefsFromSequence(Oid seqOid) +{ + List *attrDefsResult = NIL; + ScanKeyData key[2]; + HeapTuple tup; + + Relation depRel = table_open(DependRelationId, AccessShareLock); + + ScanKeyInit(&key[0], + Anum_pg_depend_refclassid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationRelationId)); + ScanKeyInit(&key[1], + Anum_pg_depend_refobjid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(seqOid)); + SysScanDesc scan = systable_beginscan(depRel, DependReferenceIndexId, true, + NULL, lengthof(key), key); + while (HeapTupleIsValid(tup = systable_getnext(scan))) + { + Form_pg_depend deprec = (Form_pg_depend) GETSTRUCT(tup); + + if (deprec->classid == AttrDefaultRelationId && + deprec->deptype == DEPENDENCY_NORMAL) + { + attrDefsResult = lappend_oid(attrDefsResult, deprec->objid); + } + } + + systable_endscan(scan); + + table_close(depRel, AccessShareLock); + + return attrDefsResult; +} + + /* * GetDependentFunctionsWithRelation returns the dependent functions for the * given relation id. diff --git a/src/include/distributed/metadata_sync.h b/src/include/distributed/metadata_sync.h index 4e190ec39..01929b0c5 100644 --- a/src/include/distributed/metadata_sync.h +++ b/src/include/distributed/metadata_sync.h @@ -128,6 +128,10 @@ extern List * IdentitySequenceDependencyCommandList(Oid targetRelationId); extern List * DDLCommandsForSequence(Oid sequenceOid, char *ownerName); extern List * GetSequencesFromAttrDef(Oid attrdefOid); +#if PG_VERSION_NUM < PG_VERSION_15 +ObjectAddress GetAttrDefaultColumnAddress(Oid attrdefoid); +#endif +extern List * GetAttrDefsFromSequence(Oid seqOid); extern void GetDependentSequencesWithRelation(Oid relationId, List **seqInfoList, AttrNumber attnum, char depType); extern List * GetDependentFunctionsWithRelation(Oid relationId);