Fix distributed deadlock for TRUNCATE

pull/2894/head
Hadi Moshayedi 2019-08-16 11:02:01 -07:00 committed by Hadi Moshayedi
parent 036b4216a8
commit 6be1bacddd
3 changed files with 41 additions and 25 deletions

View File

@ -48,32 +48,9 @@ static void AcquireDistributedLockOnRelations(List *relationIdList, LOCKMODE loc
void
ProcessTruncateStatement(TruncateStmt *truncateStatement)
{
ListCell *rangeVarCell = NULL;
ListCell *relationCell = NULL;
List *lockedRelations = NIL;
/*
* Lock relations while calling next three functions, because they expect
* relations to be locked. We release them then to avoid distributed
* deadlock in MX.
*/
foreach(rangeVarCell, truncateStatement->relations)
{
RangeVar *rangeVar = (RangeVar *) lfirst(rangeVarCell);
Relation relation = heap_openrv(rangeVar, AccessShareLock);
lockedRelations = lappend(lockedRelations, relation);
}
ErrorIfUnsupportedTruncateStmt(truncateStatement);
EnsurePartitionTableNotReplicatedForTruncate(truncateStatement);
ExecuteTruncateStmtSequentialIfNecessary(truncateStatement);
foreach(relationCell, lockedRelations)
{
Relation relation = (Relation) lfirst(relationCell);
heap_close(relation, AccessShareLock);
}
LockTruncatedRelationMetadataInWorkers(truncateStatement);
}

View File

@ -34,6 +34,7 @@
#include "lib/stringinfo.h"
#include "nodes/pg_list.h"
#include "storage/lock.h"
#include "storage/lmgr.h"
#include "utils/builtins.h"
#include "utils/elog.h"
#include "utils/errcodes.h"
@ -231,6 +232,9 @@ RepairShardPlacement(int64 shardId, char *sourceNodeName, int32 sourceNodePort,
List *placementList = NIL;
ShardPlacement *placement = NULL;
/* prevent table from being dropped */
LockRelationOid(distributedTableId, AccessShareLock);
EnsureTableOwner(distributedTableId);
/*

View File

@ -24,6 +24,7 @@
#include "distributed/shardinterval_utils.h"
#include "lib/stringinfo.h"
#include "nodes/pg_list.h"
#include "pgstat.h"
#include "utils/builtins.h"
#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
@ -32,6 +33,7 @@
static char * PartitionBound(Oid partitionId);
static Relation try_relation_open_nolock(Oid relationId);
/*
@ -69,7 +71,7 @@ PartitionedTable(Oid relationId)
bool
PartitionedTableNoLock(Oid relationId)
{
Relation rel = try_relation_open(relationId, NoLock);
Relation rel = try_relation_open_nolock(relationId);
bool partitionedTable = false;
/* don't error out for tables that are dropped */
@ -122,7 +124,7 @@ PartitionTable(Oid relationId)
bool
PartitionTableNoLock(Oid relationId)
{
Relation rel = try_relation_open(relationId, NoLock);
Relation rel = try_relation_open_nolock(relationId);
bool partitionTable = false;
/* don't error out for tables that are dropped */
@ -140,6 +142,39 @@ PartitionTableNoLock(Oid relationId)
}
/*
* try_relation_open_nolock opens a relation with given relationId without
* acquiring locks. PostgreSQL's try_relation_open() asserts that caller
* has already acquired a lock on the relation, which we don't always do.
*
* ATTENTION:
* 1. Sync this with try_relation_open(). It hasn't changed for 10 to 12
* releases though.
* 2. We should remove this after we fix the locking/distributed deadlock
* issues with MX Truncate. See https://github.com/citusdata/citus/pull/2894
* for more discussion.
*/
static Relation
try_relation_open_nolock(Oid relationId)
{
Relation relation = NULL;
if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(relationId)))
{
return NULL;
}
relation = RelationIdGetRelation(relationId);
if (!RelationIsValid(relation))
{
return NULL;
}
pgstat_initstats(relation);
return relation;
}
/*
* IsChildTable returns true if the table is inherited. Note that
* partition tables inherites by default. However, this function