From e6160ad131207a99fc5e38b0410eba3502b93007 Mon Sep 17 00:00:00 2001 From: Hanefi Onaldi Date: Wed, 10 Nov 2021 17:10:07 +0300 Subject: [PATCH 1/2] Document failing tests for issue 5099 --- src/test/regress/expected/issue_5099.out | 17 +++++++++++++++++ src/test/regress/multi_schedule | 2 +- src/test/regress/sql/issue_5099.sql | 11 +++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 src/test/regress/expected/issue_5099.out create mode 100644 src/test/regress/sql/issue_5099.sql diff --git a/src/test/regress/expected/issue_5099.out b/src/test/regress/expected/issue_5099.out new file mode 100644 index 000000000..00ab863da --- /dev/null +++ b/src/test/regress/expected/issue_5099.out @@ -0,0 +1,17 @@ +CREATE SCHEMA issue_5099; +SET search_path to 'issue_5099'; +CREATE TYPE comp_type AS ( + int_field_1 BIGINT, + int_field_2 BIGINT +); +CREATE TABLE range_dist_table_2 (dist_col comp_type); +SELECT create_distributed_table('range_dist_table_2', 'dist_col', 'range'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +\set VERBOSITY TERSE +DROP SCHEMA issue_5099 CASCADE; +NOTICE: drop cascades to 2 other objects +ERROR: cache lookup failed for type 17048 diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule index 715b032a5..2e13902fb 100644 --- a/src/test/regress/multi_schedule +++ b/src/test/regress/multi_schedule @@ -95,7 +95,7 @@ test: multi_dropped_column_aliases foreign_key_restriction_enforcement test: binary_protocol test: alter_table_set_access_method test: alter_distributed_table -test: issue_5248 +test: issue_5248 issue_5099 test: object_propagation_debug diff --git a/src/test/regress/sql/issue_5099.sql b/src/test/regress/sql/issue_5099.sql new file mode 100644 index 000000000..265800851 --- /dev/null +++ b/src/test/regress/sql/issue_5099.sql @@ -0,0 +1,11 @@ +CREATE SCHEMA issue_5099; +SET search_path to 'issue_5099'; +CREATE TYPE comp_type AS ( + int_field_1 BIGINT, + int_field_2 BIGINT +); + +CREATE TABLE range_dist_table_2 (dist_col comp_type); +SELECT create_distributed_table('range_dist_table_2', 'dist_col', 'range'); +\set VERBOSITY TERSE +DROP SCHEMA issue_5099 CASCADE; From c0d43d490509d1dcfd8e14f0a5fa7ea1358a3fb4 Mon Sep 17 00:00:00 2001 From: Hanefi Onaldi Date: Thu, 18 Nov 2021 20:24:51 +0300 Subject: [PATCH 2/2] Prevent cache usage on citus_drop_trigger codepaths --- .../commands/drop_distributed_table.c | 6 +-- .../distributed/metadata/metadata_cache.c | 48 +++++++++++++++++- .../distributed/metadata/metadata_sync.c | 50 ++++++++++++++++--- .../distributed/metadata/metadata_utility.c | 38 ++++++++++++++ .../distributed/operations/delete_protocol.c | 4 +- src/include/distributed/metadata_cache.h | 2 + src/include/distributed/metadata_sync.h | 1 + src/include/distributed/metadata_utility.h | 1 + src/test/regress/expected/issue_5099.out | 1 - 9 files changed, 137 insertions(+), 14 deletions(-) diff --git a/src/backend/distributed/commands/drop_distributed_table.c b/src/backend/distributed/commands/drop_distributed_table.c index 79adf02a9..99c2cc2ab 100644 --- a/src/backend/distributed/commands/drop_distributed_table.c +++ b/src/backend/distributed/commands/drop_distributed_table.c @@ -74,7 +74,7 @@ master_remove_partition_metadata(PG_FUNCTION_ARGS) * user-friendly, but this function is really only meant to be called * from the trigger. */ - if (!IsCitusTable(relationId) || !EnableDDLPropagation) + if (!IsCitusTableViaCatalog(relationId) || !EnableDDLPropagation) { PG_RETURN_VOID(); } @@ -134,14 +134,14 @@ MasterRemoveDistributedTableMetadataFromWorkers(Oid relationId, char *schemaName * user-friendly, but this function is really only meant to be called * from the trigger. */ - if (!IsCitusTable(relationId) || !EnableDDLPropagation) + if (!IsCitusTableViaCatalog(relationId) || !EnableDDLPropagation) { return; } EnsureCoordinator(); - if (!ShouldSyncTableMetadata(relationId)) + if (!ShouldSyncTableMetadataViaCatalog(relationId)) { return; } diff --git a/src/backend/distributed/metadata/metadata_cache.c b/src/backend/distributed/metadata/metadata_cache.c index 88906ee5f..52c4d258e 100644 --- a/src/backend/distributed/metadata/metadata_cache.c +++ b/src/backend/distributed/metadata/metadata_cache.c @@ -207,7 +207,6 @@ static ScanKeyData DistObjectScanKey[3]; /* local function forward declarations */ -static bool IsCitusTableViaCatalog(Oid relationId); static HeapTuple PgDistPartitionTupleViaCatalog(Oid relationId); static ShardIdCacheEntry * LookupShardIdCacheEntry(int64 shardId); static CitusTableCacheEntry * BuildCitusTableCacheEntry(Oid relationId); @@ -484,7 +483,7 @@ IsCitusTable(Oid relationId) * offset and the corresponding index. If we ever come close to changing * that, we'll have to work a bit harder. */ -static bool +bool IsCitusTableViaCatalog(Oid relationId) { HeapTuple partitionTuple = PgDistPartitionTupleViaCatalog(relationId); @@ -538,6 +537,51 @@ PartitionMethodViaCatalog(Oid relationId) } +/* + * PartitionColumnViaCatalog gets a relationId and returns the partition + * key column from pg_dist_partition via reading from catalog. + */ +Var * +PartitionColumnViaCatalog(Oid relationId) +{ + HeapTuple partitionTuple = PgDistPartitionTupleViaCatalog(relationId); + if (!HeapTupleIsValid(partitionTuple)) + { + return NULL; + } + + Datum datumArray[Natts_pg_dist_partition]; + bool isNullArray[Natts_pg_dist_partition]; + + Relation pgDistPartition = table_open(DistPartitionRelationId(), AccessShareLock); + + TupleDesc tupleDescriptor = RelationGetDescr(pgDistPartition); + heap_deform_tuple(partitionTuple, tupleDescriptor, datumArray, isNullArray); + + if (isNullArray[Anum_pg_dist_partition_partkey - 1]) + { + /* partition key cannot be NULL, still let's make sure */ + heap_freetuple(partitionTuple); + table_close(pgDistPartition, NoLock); + return NULL; + } + + Datum partitionKeyDatum = datumArray[Anum_pg_dist_partition_partkey - 1]; + char *partitionKeyString = TextDatumGetCString(partitionKeyDatum); + + /* convert the string to a Node and ensure it is a Var */ + Node *partitionNode = stringToNode(partitionKeyString); + Assert(IsA(partitionNode, Var)); + + Var *partitionColumn = (Var *) partitionNode; + + heap_freetuple(partitionTuple); + table_close(pgDistPartition, NoLock); + + return partitionColumn; +} + + /* * PgDistPartitionTupleViaCatalog is a helper function that searches * pg_dist_partition for the given relationId. The caller is responsible diff --git a/src/backend/distributed/metadata/metadata_sync.c b/src/backend/distributed/metadata/metadata_sync.c index 2a96eb329..56360b3a7 100644 --- a/src/backend/distributed/metadata/metadata_sync.c +++ b/src/backend/distributed/metadata/metadata_sync.c @@ -88,6 +88,8 @@ static char * TruncateTriggerCreateCommand(Oid relationId); static char * SchemaOwnerName(Oid objectId); static bool HasMetadataWorkers(void); static List * DetachPartitionCommandList(void); +static bool ShouldSyncTableMetadataInternal(bool hashDistributed, + bool citusTableWithNoDistKey); static bool SyncMetadataSnapshotToNode(WorkerNode *workerNode, bool raiseOnError); static void DropMetadataSnapshotOnNode(WorkerNode *workerNode); static char * CreateSequenceDependencyCommand(Oid relationId, Oid sequenceId, @@ -380,15 +382,51 @@ ShouldSyncTableMetadata(Oid relationId) CitusTableCacheEntry *tableEntry = GetCitusTableCacheEntry(relationId); - if (IsCitusTableTypeCacheEntry(tableEntry, HASH_DISTRIBUTED) || - IsCitusTableTypeCacheEntry(tableEntry, CITUS_TABLE_WITH_NO_DIST_KEY)) - { - return true; - } - else + bool hashDistributed = IsCitusTableTypeCacheEntry(tableEntry, HASH_DISTRIBUTED); + bool citusTableWithNoDistKey = + IsCitusTableTypeCacheEntry(tableEntry, CITUS_TABLE_WITH_NO_DIST_KEY); + + return ShouldSyncTableMetadataInternal(hashDistributed, citusTableWithNoDistKey); +} + + +/* + * ShouldSyncTableMetadataViaCatalog checks if the metadata of a distributed table should + * be propagated to metadata workers, i.e. the table is an MX table or reference table. + * Tables with streaming replication model (which means RF=1) and hash distribution are + * considered as MX tables while tables with none distribution are reference tables. + * + * ShouldSyncTableMetadataViaCatalog does not use the CitusTableCache and instead reads + * from catalog tables directly. + */ +bool +ShouldSyncTableMetadataViaCatalog(Oid relationId) +{ + if (!OidIsValid(relationId) || !IsCitusTableViaCatalog(relationId)) { return false; } + + char partitionMethod = PartitionMethodViaCatalog(relationId); + bool hashDistributed = partitionMethod == DISTRIBUTE_BY_HASH; + bool citusTableWithNoDistKey = partitionMethod == DISTRIBUTE_BY_NONE; + + return ShouldSyncTableMetadataInternal(hashDistributed, citusTableWithNoDistKey); +} + + +/* + * ShouldSyncTableMetadataInternal decides whether we should sync the metadata for a table + * based on whether it is a hash distributed table, or a citus table with no distribution + * key. + * + * This function is here to make sure that ShouldSyncTableMetadata and + * ShouldSyncTableMetadataViaCatalog behaves the same way. + */ +static bool +ShouldSyncTableMetadataInternal(bool hashDistributed, bool citusTableWithNoDistKey) +{ + return hashDistributed || citusTableWithNoDistKey; } diff --git a/src/backend/distributed/metadata/metadata_utility.c b/src/backend/distributed/metadata/metadata_utility.c index 3d5d52754..dda2aa34d 100644 --- a/src/backend/distributed/metadata/metadata_utility.c +++ b/src/backend/distributed/metadata/metadata_utility.c @@ -1143,6 +1143,44 @@ LoadShardIntervalList(Oid relationId) } +/* + * LoadUnsortedShardIntervalListViaCatalog returns a list of shard intervals related for a + * given distributed table. The function returns an empty list if no shards can be found + * for the given relation. + * + * This function does not use CitusTableCache and instead reads from catalog tables + * directly. + */ +List * +LoadUnsortedShardIntervalListViaCatalog(Oid relationId) +{ + List *shardIntervalList = NIL; + List *distShardTuples = LookupDistShardTuples(relationId); + Relation distShardRelation = table_open(DistShardRelationId(), AccessShareLock); + TupleDesc distShardTupleDesc = RelationGetDescr(distShardRelation); + Oid intervalTypeId = InvalidOid; + int32 intervalTypeMod = -1; + + char partitionMethod = PartitionMethodViaCatalog(relationId); + Var *partitionColumn = PartitionColumnViaCatalog(relationId); + GetIntervalTypeInfo(partitionMethod, partitionColumn, &intervalTypeId, + &intervalTypeMod); + + HeapTuple distShardTuple = NULL; + foreach_ptr(distShardTuple, distShardTuples) + { + ShardInterval *interval = TupleToShardInterval(distShardTuple, + distShardTupleDesc, + intervalTypeId, + intervalTypeMod); + shardIntervalList = lappend(shardIntervalList, interval); + } + table_close(distShardRelation, AccessShareLock); + + return shardIntervalList; +} + + /* * LoadShardIntervalWithLongestShardName is a utility function that returns * the shard interaval with the largest shardId for the given relationId. Note diff --git a/src/backend/distributed/operations/delete_protocol.c b/src/backend/distributed/operations/delete_protocol.c index 32fe617f0..84a22737b 100644 --- a/src/backend/distributed/operations/delete_protocol.c +++ b/src/backend/distributed/operations/delete_protocol.c @@ -123,7 +123,7 @@ citus_drop_all_shards(PG_FUNCTION_ARGS) * The SQL_DROP trigger calls this function even for tables that are * not distributed. In that case, silently ignore and return -1. */ - if (!IsCitusTable(relationId) || !EnableDDLPropagation) + if (!IsCitusTableViaCatalog(relationId) || !EnableDDLPropagation) { PG_RETURN_INT32(-1); } @@ -139,7 +139,7 @@ citus_drop_all_shards(PG_FUNCTION_ARGS) */ LockRelationOid(relationId, AccessExclusiveLock); - List *shardIntervalList = LoadShardIntervalList(relationId); + List *shardIntervalList = LoadUnsortedShardIntervalListViaCatalog(relationId); int droppedShardCount = DropShards(relationId, schemaName, relationName, shardIntervalList, dropShardsMetadataOnly); diff --git a/src/include/distributed/metadata_cache.h b/src/include/distributed/metadata_cache.h index c87db457d..4461cb1e9 100644 --- a/src/include/distributed/metadata_cache.h +++ b/src/include/distributed/metadata_cache.h @@ -144,9 +144,11 @@ extern bool IsCitusTableTypeCacheEntry(CitusTableCacheEntry *tableEtnry, CitusTableType tableType); extern bool IsCitusTable(Oid relationId); +extern bool IsCitusTableViaCatalog(Oid relationId); extern char PgDistPartitionViaCatalog(Oid relationId); extern List * LookupDistShardTuples(Oid relationId); extern char PartitionMethodViaCatalog(Oid relationId); +extern Var * PartitionColumnViaCatalog(Oid relationId); extern bool IsCitusLocalTableByDistParams(char partitionMethod, char replicationModel); extern List * CitusTableList(void); extern ShardInterval * LoadShardInterval(uint64 shardId); diff --git a/src/include/distributed/metadata_sync.h b/src/include/distributed/metadata_sync.h index 77acc2b82..a615d9cbe 100644 --- a/src/include/distributed/metadata_sync.h +++ b/src/include/distributed/metadata_sync.h @@ -31,6 +31,7 @@ typedef enum extern void StartMetadataSyncToNode(const char *nodeNameString, int32 nodePort); extern bool ClusterHasKnownMetadataWorkers(void); extern bool ShouldSyncTableMetadata(Oid relationId); +extern bool ShouldSyncTableMetadataViaCatalog(Oid relationId); extern List * MetadataCreateCommands(void); extern List * MetadataDropCommands(void); extern char * DistributionCreateCommand(CitusTableCacheEntry *cacheEntry); diff --git a/src/include/distributed/metadata_utility.h b/src/include/distributed/metadata_utility.h index 1f0ab2bc7..4e7f0a743 100644 --- a/src/include/distributed/metadata_utility.h +++ b/src/include/distributed/metadata_utility.h @@ -202,6 +202,7 @@ extern Datum citus_relation_size(PG_FUNCTION_ARGS); /* Function declarations to read shard and shard placement data */ extern uint32 TableShardReplicationFactor(Oid relationId); extern List * LoadShardIntervalList(Oid relationId); +extern List * LoadUnsortedShardIntervalListViaCatalog(Oid relationId); extern ShardInterval * LoadShardIntervalWithLongestShardName(Oid relationId); extern int ShardIntervalCount(Oid relationId); extern List * LoadShardList(Oid relationId); diff --git a/src/test/regress/expected/issue_5099.out b/src/test/regress/expected/issue_5099.out index 00ab863da..bbd7a8727 100644 --- a/src/test/regress/expected/issue_5099.out +++ b/src/test/regress/expected/issue_5099.out @@ -14,4 +14,3 @@ SELECT create_distributed_table('range_dist_table_2', 'dist_col', 'range'); \set VERBOSITY TERSE DROP SCHEMA issue_5099 CASCADE; NOTICE: drop cascades to 2 other objects -ERROR: cache lookup failed for type 17048