mirror of https://github.com/citusdata/citus.git
Unify distributed execution logic for single replicated tables
Citus does not acquire any executor locks for shard replication == 1. With this commit, we unify this decision and exit early.simplify_executor_locks_2
parent
9bfff4ba8d
commit
0c79558cc9
|
|
@ -632,6 +632,7 @@ static void CleanUpSessions(DistributedExecution *execution);
|
|||
|
||||
static void LockPartitionsForDistributedPlan(DistributedPlan *distributedPlan);
|
||||
static void AcquireExecutorShardLocksForExecution(DistributedExecution *execution);
|
||||
static bool AnyAnchorTableIsReplicated(List *taskList);
|
||||
static bool DistributedExecutionModifiesDatabase(DistributedExecution *execution);
|
||||
static bool TaskListModifiesDatabase(RowModifyLevel modLevel, List *taskList);
|
||||
static bool DistributedExecutionRequiresRollback(List *taskList);
|
||||
|
|
@ -1577,22 +1578,135 @@ AcquireExecutorShardLocksForExecution(DistributedExecution *execution)
|
|||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* When executing in sequential mode or only executing a single task, we
|
||||
* do not need multi-shard locks.
|
||||
*/
|
||||
if (list_length(taskList) == 1 || ShouldRunTasksSequentially(taskList))
|
||||
bool parallelExecutionNotPossible =
|
||||
list_length(taskList) == 1 || ShouldRunTasksSequentially(taskList);
|
||||
|
||||
bool anyAnchorTableIsReplicated = AnyAnchorTableIsReplicated(taskList);
|
||||
if (!anyAnchorTableIsReplicated && parallelExecutionNotPossible)
|
||||
{
|
||||
Task *task = NULL;
|
||||
foreach_ptr(task, taskList)
|
||||
/*
|
||||
* When a distributed query on tables with replication
|
||||
* replication factor == 1, we rely on Postgres to handle the
|
||||
* serialization of the concurrent operations on the workers.
|
||||
*
|
||||
* For reference tables, even if their placements are replicated
|
||||
* ones (e.g., single node), we acquire the distributed execution
|
||||
* locks to be consistent when new node(s) are added.
|
||||
*/
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* We first assume that all the remaining modifications are going to
|
||||
* be serialized. So, start with an ExclusiveLock and lower the lock level
|
||||
* as much as possible.
|
||||
*/
|
||||
int lockMode = ExclusiveLock;
|
||||
|
||||
if (!anyAnchorTableIsReplicated && IsCoordinator())
|
||||
{
|
||||
/*
|
||||
* When all writes are commutative then we only need to prevent multi-shard
|
||||
* commands from running concurrently with each other and with commands
|
||||
* that are explicitly non-commutative. When there is no replication then
|
||||
* we only need to prevent concurrent multi-shard commands.
|
||||
*
|
||||
* In either case, ShareUpdateExclusive has the desired effect, since
|
||||
* it conflicts with itself and ExclusiveLock (taken by non-commutative
|
||||
* writes).
|
||||
*
|
||||
* However, some users find this too restrictive, so we allow them to
|
||||
* reduce to a RowExclusiveLock when citus.enable_deadlock_prevention
|
||||
* is enabled, which lets multi-shard modifications run in parallel as
|
||||
* long as they all disable the GUC.
|
||||
*
|
||||
* We also skip taking a heavy-weight lock when running a multi-shard
|
||||
* commands from workers, since we currently do not prevent concurrency
|
||||
* across workers anyway.
|
||||
*/
|
||||
lockMode =
|
||||
EnableDeadlockPrevention ? ShareUpdateExclusiveLock : RowExclusiveLock;
|
||||
}
|
||||
|
||||
if (AllModificationsCommutative ||
|
||||
(parallelExecutionNotPossible && modLevel < ROW_MODIFY_NONCOMMUTATIVE))
|
||||
{
|
||||
/*
|
||||
* If either the user allows via a GUC or the commands are
|
||||
* single shard commutative commands (e.g., INSERT), we
|
||||
* lower the level to RowExclusiveLock to allow concurrency
|
||||
* among the tasks.
|
||||
*/
|
||||
lockMode = RowExclusiveLock;
|
||||
}
|
||||
|
||||
/* now, iterate on the tasks and acquire the executor locks on the shards */
|
||||
Task *task = NULL;
|
||||
foreach_ptr(task, taskList)
|
||||
{
|
||||
/*
|
||||
* If we are dealing with a partition we are also taking locks on parent table
|
||||
* to prevent deadlocks on concurrent operations on a partition and its parent.
|
||||
*/
|
||||
LockParentShardResourceIfPartition(task->anchorShardId, lockMode);
|
||||
|
||||
ShardInterval *anchorShardInterval = LoadShardInterval(task->anchorShardId);
|
||||
SerializeNonCommutativeWrites(list_make1(anchorShardInterval), lockMode);
|
||||
|
||||
/* Acquire additional locks for SELECT .. FOR UPDATE on reference tables */
|
||||
AcquireExecutorShardLocksForRelationRowLockList(task->relationRowLockList);
|
||||
|
||||
/*
|
||||
* If the task has a subselect, then we may need to lock the shards from which
|
||||
* the query selects as well to prevent the subselects from seeing different
|
||||
* results on different replicas.
|
||||
*/
|
||||
if (RequiresConsistentSnapshot(task))
|
||||
{
|
||||
AcquireExecutorShardLocks(task, modLevel);
|
||||
/*
|
||||
* ExclusiveLock conflicts with all lock types used by modifications
|
||||
* and therefore prevents other modifications from running
|
||||
* concurrently.
|
||||
*/
|
||||
|
||||
LockRelationShardResources(task->relationShardList, ExclusiveLock);
|
||||
}
|
||||
}
|
||||
else if (list_length(taskList) > 1)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AnyAnchorTableIsReplicated iterates on the task list and returns true
|
||||
* if any of the tasks' anchor shard is a replicated table. We qualify
|
||||
* replicated tables as any reference table or any distributed table with
|
||||
* replication factor > 1.
|
||||
*/
|
||||
static bool
|
||||
AnyAnchorTableIsReplicated(List *taskList)
|
||||
{
|
||||
Task *task = NULL;
|
||||
foreach_ptr(task, taskList)
|
||||
{
|
||||
AcquireExecutorMultiShardLocks(taskList);
|
||||
int64 shardId = task->anchorShardId;
|
||||
|
||||
if (shardId == INVALID_SHARD_ID)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ReferenceTableShardId(shardId))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
Oid relationId = RelationIdForShard(shardId);
|
||||
if (!SingleReplicatedTable(relationId))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -19,9 +19,7 @@
|
|||
#include "distributed/transaction_management.h"
|
||||
|
||||
|
||||
static bool RequiresConsistentSnapshot(Task *task);
|
||||
static void AcquireExecutorShardLockForRowModify(Task *task, RowModifyLevel modLevel);
|
||||
static void AcquireExecutorShardLocksForRelationRowLockList(List *relationRowLockList);
|
||||
|
||||
|
||||
/*
|
||||
|
|
@ -88,84 +86,7 @@ AcquireExecutorShardLocks(Task *task, RowModifyLevel modLevel)
|
|||
*/
|
||||
void
|
||||
AcquireExecutorMultiShardLocks(List *taskList)
|
||||
{
|
||||
Task *task = NULL;
|
||||
foreach_ptr(task, taskList)
|
||||
{
|
||||
LOCKMODE lockMode = NoLock;
|
||||
|
||||
if (task->anchorShardId == INVALID_SHARD_ID)
|
||||
{
|
||||
/* no shard locks to take if the task is not anchored to a shard */
|
||||
continue;
|
||||
}
|
||||
|
||||
if (AllModificationsCommutative || list_length(task->taskPlacementList) == 1)
|
||||
{
|
||||
/*
|
||||
* When all writes are commutative then we only need to prevent multi-shard
|
||||
* commands from running concurrently with each other and with commands
|
||||
* that are explicitly non-commutative. When there is no replication then
|
||||
* we only need to prevent concurrent multi-shard commands.
|
||||
*
|
||||
* In either case, ShareUpdateExclusive has the desired effect, since
|
||||
* it conflicts with itself and ExclusiveLock (taken by non-commutative
|
||||
* writes).
|
||||
*
|
||||
* However, some users find this too restrictive, so we allow them to
|
||||
* reduce to a RowExclusiveLock when citus.enable_deadlock_prevention
|
||||
* is enabled, which lets multi-shard modifications run in parallel as
|
||||
* long as they all disable the GUC.
|
||||
*
|
||||
* We also skip taking a heavy-weight lock when running a multi-shard
|
||||
* commands from workers, since we cannot prevent concurrency across
|
||||
* workers anyway.
|
||||
*/
|
||||
|
||||
if (EnableDeadlockPrevention && IsCoordinator())
|
||||
{
|
||||
lockMode = ShareUpdateExclusiveLock;
|
||||
}
|
||||
else
|
||||
{
|
||||
lockMode = RowExclusiveLock;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* When there is replication, prevent all concurrent writes to the same
|
||||
* shards to ensure the writes are ordered.
|
||||
*/
|
||||
|
||||
lockMode = ExclusiveLock;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we are dealing with a partition we are also taking locks on parent table
|
||||
* to prevent deadlocks on concurrent operations on a partition and its parent.
|
||||
*/
|
||||
LockParentShardResourceIfPartition(task->anchorShardId, lockMode);
|
||||
LockShardResource(task->anchorShardId, lockMode);
|
||||
|
||||
/*
|
||||
* If the task has a subselect, then we may need to lock the shards from which
|
||||
* the query selects as well to prevent the subselects from seeing different
|
||||
* results on different replicas.
|
||||
*/
|
||||
|
||||
if (RequiresConsistentSnapshot(task))
|
||||
{
|
||||
/*
|
||||
* ExclusiveLock conflicts with all lock types used by modifications
|
||||
* and therefore prevents other modifications from running
|
||||
* concurrently.
|
||||
*/
|
||||
|
||||
LockRelationShardResources(task->relationShardList, ExclusiveLock);
|
||||
}
|
||||
}
|
||||
}
|
||||
{ }
|
||||
|
||||
|
||||
/*
|
||||
|
|
@ -173,7 +94,7 @@ AcquireExecutorMultiShardLocks(List *taskList)
|
|||
* the necessary locks to ensure that a subquery in the modify query
|
||||
* returns the same output for all task placements.
|
||||
*/
|
||||
static bool
|
||||
bool
|
||||
RequiresConsistentSnapshot(Task *task)
|
||||
{
|
||||
bool requiresIsolation = false;
|
||||
|
|
@ -248,111 +169,10 @@ AcquireMetadataLocks(List *taskList)
|
|||
|
||||
static void
|
||||
AcquireExecutorShardLockForRowModify(Task *task, RowModifyLevel modLevel)
|
||||
{
|
||||
LOCKMODE lockMode = NoLock;
|
||||
int64 shardId = task->anchorShardId;
|
||||
|
||||
if (shardId == INVALID_SHARD_ID)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (modLevel <= ROW_MODIFY_READONLY)
|
||||
{
|
||||
/*
|
||||
* The executor shard lock is used to maintain consistency between
|
||||
* replicas and therefore no lock is required for read-only queries
|
||||
* or in general when there is only one replica.
|
||||
*/
|
||||
|
||||
lockMode = NoLock;
|
||||
}
|
||||
else if (list_length(task->taskPlacementList) == 1)
|
||||
{
|
||||
if (task->replicationModel == REPLICATION_MODEL_2PC)
|
||||
{
|
||||
/*
|
||||
* While we don't need a lock to ensure writes are applied in
|
||||
* a consistent order when there is a single replica. We also use
|
||||
* shard resource locks as a crude implementation of SELECT..
|
||||
* FOR UPDATE on reference tables, so we should always take
|
||||
* a lock that conflicts with the FOR UPDATE/SHARE locks.
|
||||
*/
|
||||
lockMode = RowExclusiveLock;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* When there is no replication, the worker itself can decide on
|
||||
* on the order in which writes are applied.
|
||||
*/
|
||||
lockMode = NoLock;
|
||||
}
|
||||
}
|
||||
else if (AllModificationsCommutative)
|
||||
{
|
||||
/*
|
||||
* Bypass commutativity checks when citus.all_modifications_commutative
|
||||
* is enabled.
|
||||
*
|
||||
* A RowExclusiveLock does not conflict with itself and therefore allows
|
||||
* multiple commutative commands to proceed concurrently. It does
|
||||
* conflict with ExclusiveLock, which may still be obtained by another
|
||||
* session that executes an UPDATE/DELETE/UPSERT command with
|
||||
* citus.all_modifications_commutative disabled.
|
||||
*/
|
||||
|
||||
lockMode = RowExclusiveLock;
|
||||
}
|
||||
else if (modLevel < ROW_MODIFY_NONCOMMUTATIVE)
|
||||
{
|
||||
/*
|
||||
* An INSERT commutes with other INSERT commands, since performing them
|
||||
* out-of-order only affects the table order on disk, but not the
|
||||
* contents.
|
||||
*
|
||||
* When a unique constraint exists, INSERTs are not strictly commutative,
|
||||
* but whichever INSERT comes last will error out and thus has no effect.
|
||||
* INSERT is not commutative with UPDATE/DELETE/UPSERT, since the
|
||||
* UPDATE/DELETE/UPSERT may consider the INSERT, depending on execution
|
||||
* order.
|
||||
*
|
||||
* A RowExclusiveLock does not conflict with itself and therefore allows
|
||||
* multiple INSERT commands to proceed concurrently. It conflicts with
|
||||
* ExclusiveLock obtained by UPDATE/DELETE/UPSERT, ensuring those do
|
||||
* not run concurrently with INSERT.
|
||||
*/
|
||||
|
||||
lockMode = RowExclusiveLock;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* UPDATE/DELETE/UPSERT commands do not commute with other modifications
|
||||
* since the rows modified by one command may be affected by the outcome
|
||||
* of another command.
|
||||
*
|
||||
* We need to handle upsert before INSERT, because PostgreSQL models
|
||||
* upsert commands as INSERT with an ON CONFLICT section.
|
||||
*
|
||||
* ExclusiveLock conflicts with all lock types used by modifications
|
||||
* and therefore prevents other modifications from running
|
||||
* concurrently.
|
||||
*/
|
||||
|
||||
lockMode = ExclusiveLock;
|
||||
}
|
||||
|
||||
if (lockMode != NoLock)
|
||||
{
|
||||
ShardInterval *shardInterval = LoadShardInterval(shardId);
|
||||
|
||||
SerializeNonCommutativeWrites(list_make1(shardInterval), lockMode);
|
||||
}
|
||||
}
|
||||
{ }
|
||||
|
||||
|
||||
static void
|
||||
void
|
||||
AcquireExecutorShardLocksForRelationRowLockList(List *relationRowLockList)
|
||||
{
|
||||
LOCKMODE rowLockMode = NoLock;
|
||||
|
|
|
|||
|
|
@ -17,7 +17,9 @@
|
|||
#include "distributed/multi_physical_planner.h"
|
||||
|
||||
extern void AcquireExecutorShardLocks(Task *task, RowModifyLevel modLevel);
|
||||
extern void AcquireExecutorShardLocksForRelationRowLockList(List *relationRowLockList);
|
||||
extern void AcquireExecutorMultiShardLocks(List *taskList);
|
||||
extern bool RequiresConsistentSnapshot(Task *task);
|
||||
extern void AcquireMetadataLocks(List *taskList);
|
||||
extern void LockPartitionsInRelationList(List *relationIdList, LOCKMODE lockmode);
|
||||
extern void LockPartitionRelations(Oid relationId, LOCKMODE lockMode);
|
||||
|
|
|
|||
Loading…
Reference in New Issue