diff --git a/src/backend/distributed/commands/truncate.c b/src/backend/distributed/commands/truncate.c index 558688598..a1a2d1c5f 100644 --- a/src/backend/distributed/commands/truncate.c +++ b/src/backend/distributed/commands/truncate.c @@ -48,9 +48,32 @@ static void AcquireDistributedLockOnRelations(List *relationIdList, LOCKMODE loc void ProcessTruncateStatement(TruncateStmt *truncateStatement) { + ListCell *rangeVarCell = NULL; + ListCell *relationCell = NULL; + List *lockedRelations = NIL; + + /* + * Lock relations while calling next three functions, because they expect + * relations to be locked. We release them then to avoid distributed + * deadlock in MX. + */ + foreach(rangeVarCell, truncateStatement->relations) + { + RangeVar *rangeVar = (RangeVar *) lfirst(rangeVarCell); + Relation relation = heap_openrv(rangeVar, AccessShareLock); + lockedRelations = lappend(lockedRelations, relation); + } + ErrorIfUnsupportedTruncateStmt(truncateStatement); EnsurePartitionTableNotReplicatedForTruncate(truncateStatement); ExecuteTruncateStmtSequentialIfNecessary(truncateStatement); + + foreach(relationCell, lockedRelations) + { + Relation relation = (Relation) lfirst(relationCell); + heap_close(relation, AccessShareLock); + } + LockTruncatedRelationMetadataInWorkers(truncateStatement); } @@ -67,7 +90,7 @@ ErrorIfUnsupportedTruncateStmt(TruncateStmt *truncateStatement) foreach(relationCell, relationList) { RangeVar *rangeVar = (RangeVar *) lfirst(relationCell); - Oid relationId = RangeVarGetRelid(rangeVar, NoLock, true); + Oid relationId = RangeVarGetRelid(rangeVar, NoLock, false); char relationKind = get_rel_relkind(relationId); if (IsDistributedTable(relationId) && relationKind == RELKIND_FOREIGN_TABLE) @@ -93,19 +116,15 @@ EnsurePartitionTableNotReplicatedForTruncate(TruncateStmt *truncateStatement) foreach(relationCell, truncateStatement->relations) { - RangeVar *relationRV = (RangeVar *) lfirst(relationCell); - Relation relation = heap_openrv(relationRV, NoLock); - Oid relationId = RelationGetRelid(relation); + RangeVar *rangeVar = (RangeVar *) lfirst(relationCell); + Oid relationId = RangeVarGetRelid(rangeVar, NoLock, false); if (!IsDistributedTable(relationId)) { - heap_close(relation, NoLock); continue; } EnsurePartitionTableNotReplicated(relationId); - - heap_close(relation, NoLock); } } @@ -181,22 +200,19 @@ LockTruncatedRelationMetadataInWorkers(TruncateStmt *truncateStatement) foreach(relationCell, truncateStatement->relations) { - RangeVar *relationRV = (RangeVar *) lfirst(relationCell); - Relation relation = heap_openrv(relationRV, NoLock); - Oid relationId = RelationGetRelid(relation); + RangeVar *rangeVar = (RangeVar *) lfirst(relationCell); + Oid relationId = RangeVarGetRelid(rangeVar, NoLock, false); DistTableCacheEntry *cacheEntry = NULL; List *referencingTableList = NIL; ListCell *referencingTableCell = NULL; if (!IsDistributedTable(relationId)) { - heap_close(relation, NoLock); continue; } if (list_member_oid(distributedRelationList, relationId)) { - heap_close(relation, NoLock); continue; } @@ -212,8 +228,6 @@ LockTruncatedRelationMetadataInWorkers(TruncateStmt *truncateStatement) distributedRelationList = list_append_unique_oid(distributedRelationList, referencingRelationId); } - - heap_close(relation, NoLock); } if (distributedRelationList != NIL) diff --git a/src/backend/distributed/master/master_stage_protocol.c b/src/backend/distributed/master/master_stage_protocol.c index 49d3f7bd4..698a584fc 100644 --- a/src/backend/distributed/master/master_stage_protocol.c +++ b/src/backend/distributed/master/master_stage_protocol.c @@ -259,6 +259,10 @@ master_append_table_to_shard(PG_FUNCTION_ARGS) shardInterval = LoadShardInterval(shardId); relationId = shardInterval->relationId; + + /* don't allow the table to be dropped */ + LockRelationOid(relationId, AccessShareLock); + cstoreTable = CStoreTable(relationId); storageType = shardInterval->storageType; diff --git a/src/backend/distributed/transaction/relation_access_tracking.c b/src/backend/distributed/transaction/relation_access_tracking.c index 88fb500ce..21f080aaa 100644 --- a/src/backend/distributed/transaction/relation_access_tracking.c +++ b/src/backend/distributed/transaction/relation_access_tracking.c @@ -222,46 +222,15 @@ PlacementAccessTypeToText(ShardPlacementAccessType accessType) static void RecordRelationAccessBase(Oid relationId, ShardPlacementAccessType accessType) { + /* + * We call this only for reference tables, and we don't support partitioned + * reference tables. + */ + Assert(!PartitionedTable(relationId) && !PartitionTable(relationId)); + /* make sure that this is not a conflicting access */ CheckConflictingRelationAccesses(relationId, accessType); - /* - * If a relation is partitioned, record accesses to all of its partitions as well. - * We prefer to use PartitionedTableNoLock() because at this point the necessary - * locks on the relation has already been acquired. - */ - if (PartitionedTableNoLock(relationId)) - { - List *partitionList = PartitionList(relationId); - ListCell *partitionCell = NULL; - - foreach(partitionCell, partitionList) - { - Oid partitionOid = lfirst_oid(partitionCell); - - /* - * During create_distributed_table, the partitions may not - * have been created yet and so there are no placements yet. - * We're already going to register them when we distribute - * the partitions. - */ - if (!IsDistributedTable(partitionOid)) - { - continue; - } - - /* recursively call the function to cover multi-level partitioned tables */ - RecordRelationAccessBase(partitionOid, accessType); - } - } - else if (PartitionTableNoLock(relationId)) - { - Oid parentOid = PartitionParentOid(relationId); - - /* record the parent */ - RecordPlacementAccessToCache(parentOid, accessType); - } - /* always record the relation that is being considered */ RecordPlacementAccessToCache(relationId, accessType); } @@ -521,12 +490,8 @@ RecordParallelRelationAccess(Oid relationId, ShardPlacementAccessType placementA /* act accordingly if it's a conflicting access */ CheckConflictingParallelRelationAccesses(relationId, placementAccess); - /* - * If a relation is partitioned, record accesses to all of its partitions as well. - * We prefer to use PartitionedTableNoLock() because at this point the necessary - * locks on the relation has already been acquired. - */ - if (PartitionedTableNoLock(relationId)) + /* If a relation is partitioned, record accesses to all of its partitions as well. */ + if (PartitionedTable(relationId)) { List *partitionList = PartitionList(relationId); ListCell *partitionCell = NULL; @@ -539,7 +504,7 @@ RecordParallelRelationAccess(Oid relationId, ShardPlacementAccessType placementA RecordParallelRelationAccess(partitionOid, placementAccess); } } - else if (PartitionTableNoLock(relationId)) + else if (PartitionTable(relationId)) { Oid parentOid = PartitionParentOid(relationId); diff --git a/src/backend/distributed/utils/colocation_utils.c b/src/backend/distributed/utils/colocation_utils.c index 89e3cebb7..e73c4bbcb 100644 --- a/src/backend/distributed/utils/colocation_utils.c +++ b/src/backend/distributed/utils/colocation_utils.c @@ -1059,10 +1059,18 @@ DeleteColocationGroup(uint32 colocationId) heapTuple = systable_getnext(scanDescriptor); if (HeapTupleIsValid(heapTuple)) { + /* + * simple_heap_delete() expects that the caller has at least an + * AccessShareLock on replica identity index. + */ + Relation replicaIndex = + index_open(RelationGetReplicaIndex(pgDistColocation), + AccessShareLock); simple_heap_delete(pgDistColocation, &(heapTuple->t_self)); CitusInvalidateRelcacheByRelid(DistColocationRelationId()); CommandCounterIncrement(); + heap_close(replicaIndex, AccessShareLock); } systable_endscan(scanDescriptor); diff --git a/src/backend/distributed/utils/multi_partitioning_utils.c b/src/backend/distributed/utils/multi_partitioning_utils.c index b03a6cf08..f5a2124a9 100644 --- a/src/backend/distributed/utils/multi_partitioning_utils.c +++ b/src/backend/distributed/utils/multi_partitioning_utils.c @@ -40,9 +40,15 @@ static char * PartitionBound(Oid partitionId); bool PartitionedTable(Oid relationId) { - Relation rel = heap_open(relationId, AccessShareLock); + Relation rel = try_relation_open(relationId, AccessShareLock); bool partitionedTable = false; + /* don't error out for tables that are dropped */ + if (rel == NULL) + { + return false; + } + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) { partitionedTable = true; @@ -90,9 +96,15 @@ PartitionedTableNoLock(Oid relationId) bool PartitionTable(Oid relationId) { - Relation rel = heap_open(relationId, AccessShareLock); + Relation rel = try_relation_open(relationId, AccessShareLock); bool partitionTable = false; + /* don't error out for tables that are dropped */ + if (rel == NULL) + { + return false; + } + partitionTable = rel->rd_rel->relispartition; /* keep the lock */ diff --git a/src/backend/distributed/utils/node_metadata.c b/src/backend/distributed/utils/node_metadata.c index aec183d33..acf18f225 100644 --- a/src/backend/distributed/utils/node_metadata.c +++ b/src/backend/distributed/utils/node_metadata.c @@ -1321,9 +1321,15 @@ DeleteNodeRow(char *nodeName, int32 nodePort) HeapTuple heapTuple = NULL; SysScanDesc heapScan = NULL; ScanKeyData scanKey[2]; - Relation pgDistNode = heap_open(DistNodeRelationId(), RowExclusiveLock); + /* + * simple_heap_delete() expects that the caller has at least an + * AccessShareLock on replica identity index. + */ + Relation replicaIndex = index_open(RelationGetReplicaIndex(pgDistNode), + AccessShareLock); + ScanKeyInit(&scanKey[0], Anum_pg_dist_node_nodename, BTEqualStrategyNumber, F_TEXTEQ, CStringGetTextDatum(nodeName)); ScanKeyInit(&scanKey[1], Anum_pg_dist_node_nodeport, @@ -1350,6 +1356,7 @@ DeleteNodeRow(char *nodeName, int32 nodePort) /* increment the counter so that next command won't see the row */ CommandCounterIncrement(); + heap_close(replicaIndex, AccessShareLock); heap_close(pgDistNode, NoLock); } diff --git a/src/test/regress/expected/multi_partitioning.out b/src/test/regress/expected/multi_partitioning.out index 7d8e57c6f..9089d942c 100644 --- a/src/test/regress/expected/multi_partitioning.out +++ b/src/test/regress/expected/multi_partitioning.out @@ -1409,8 +1409,10 @@ SELECT relation::regclass, locktype, mode FROM pg_locks WHERE relation::regclass partitioning_locks | relation | AccessExclusiveLock partitioning_locks | relation | AccessShareLock partitioning_locks_2009 | relation | AccessExclusiveLock + partitioning_locks_2009 | relation | AccessShareLock partitioning_locks_2010 | relation | AccessExclusiveLock -(4 rows) + partitioning_locks_2010 | relation | AccessShareLock +(6 rows) COMMIT; -- test locks on TRUNCATE diff --git a/src/test/regress/expected/multi_partitioning_0.out b/src/test/regress/expected/multi_partitioning_0.out index 51a76ac90..9eaf20f3a 100644 --- a/src/test/regress/expected/multi_partitioning_0.out +++ b/src/test/regress/expected/multi_partitioning_0.out @@ -1381,8 +1381,10 @@ SELECT relation::regclass, locktype, mode FROM pg_locks WHERE relation::regclass partitioning_locks | relation | AccessExclusiveLock partitioning_locks | relation | AccessShareLock partitioning_locks_2009 | relation | AccessExclusiveLock + partitioning_locks_2009 | relation | AccessShareLock partitioning_locks_2010 | relation | AccessExclusiveLock -(4 rows) + partitioning_locks_2010 | relation | AccessShareLock +(6 rows) COMMIT; -- test locks on TRUNCATE @@ -1394,10 +1396,12 @@ SELECT relation::regclass, locktype, mode FROM pg_locks WHERE relation::regclass partitioning_locks | relation | AccessExclusiveLock partitioning_locks | relation | AccessShareLock partitioning_locks_2009 | relation | AccessExclusiveLock + partitioning_locks_2009 | relation | AccessShareLock partitioning_locks_2009 | relation | ShareLock partitioning_locks_2010 | relation | AccessExclusiveLock + partitioning_locks_2010 | relation | AccessShareLock partitioning_locks_2010 | relation | ShareLock -(6 rows) +(8 rows) COMMIT; -- test shard resource locks with multi-shard UPDATE diff --git a/src/test/regress/expected/multi_partitioning_1.out b/src/test/regress/expected/multi_partitioning_1.out index 6f46bd6f9..72ea95550 100644 --- a/src/test/regress/expected/multi_partitioning_1.out +++ b/src/test/regress/expected/multi_partitioning_1.out @@ -1409,8 +1409,10 @@ SELECT relation::regclass, locktype, mode FROM pg_locks WHERE relation::regclass partitioning_locks | relation | AccessExclusiveLock partitioning_locks | relation | AccessShareLock partitioning_locks_2009 | relation | AccessExclusiveLock + partitioning_locks_2009 | relation | AccessShareLock partitioning_locks_2010 | relation | AccessExclusiveLock -(4 rows) + partitioning_locks_2010 | relation | AccessShareLock +(6 rows) COMMIT; -- test locks on TRUNCATE @@ -1422,10 +1424,12 @@ SELECT relation::regclass, locktype, mode FROM pg_locks WHERE relation::regclass partitioning_locks | relation | AccessExclusiveLock partitioning_locks | relation | AccessShareLock partitioning_locks_2009 | relation | AccessExclusiveLock + partitioning_locks_2009 | relation | AccessShareLock partitioning_locks_2009 | relation | ShareLock partitioning_locks_2010 | relation | AccessExclusiveLock + partitioning_locks_2010 | relation | AccessShareLock partitioning_locks_2010 | relation | ShareLock -(6 rows) +(8 rows) COMMIT; -- test shard resource locks with multi-shard UPDATE