diff --git a/src/backend/distributed/master/master_create_shards.c b/src/backend/distributed/master/master_create_shards.c index 19f334092..76126c165 100644 --- a/src/backend/distributed/master/master_create_shards.c +++ b/src/backend/distributed/master/master_create_shards.c @@ -162,6 +162,9 @@ CreateShardsWithRoundRobinPolicy(Oid distributedTableId, int32 shardCount, /* calculate the split of the hash space */ hashTokenIncrement = HASH_TOKEN_COUNT / shardCount; + /* don't allow concurrent node list changes that require an exclusive lock */ + LockRelationOid(DistNodeRelationId(), RowShareLock); + /* load and sort the worker node list for deterministic placement */ workerNodeList = ActivePrimaryNodeList(); workerNodeList = SortList(workerNodeList, CompareWorkerNodes); diff --git a/src/backend/distributed/master/master_stage_protocol.c b/src/backend/distributed/master/master_stage_protocol.c index ac60b5f4d..60f39103a 100644 --- a/src/backend/distributed/master/master_stage_protocol.c +++ b/src/backend/distributed/master/master_stage_protocol.c @@ -44,6 +44,7 @@ #include "distributed/transaction_management.h" #include "distributed/worker_manager.h" #include "distributed/worker_protocol.h" +#include "storage/lmgr.h" #include "utils/builtins.h" #include "utils/fmgroids.h" #include "utils/inval.h" @@ -94,6 +95,12 @@ master_create_empty_shard(PG_FUNCTION_ARGS) EnsureTablePermissions(relationId, ACL_INSERT); CheckDistributedTable(relationId); + /* don't allow the table to be dropped */ + LockRelationOid(relationId, AccessShareLock); + + /* don't allow concurrent node list changes that require an exclusive lock */ + LockRelationOid(DistNodeRelationId(), RowShareLock); + /* * We check whether the table is a foreign table or not. If it is, we set * storage type as foreign also. Only exception is if foreign table is a diff --git a/src/backend/distributed/utils/node_metadata.c b/src/backend/distributed/utils/node_metadata.c index 2ecff7329..e966c4124 100644 --- a/src/backend/distributed/utils/node_metadata.c +++ b/src/backend/distributed/utils/node_metadata.c @@ -219,6 +219,9 @@ master_disable_node(PG_FUNCTION_ARGS) CheckCitusVersion(ERROR); + /* take an exclusive lock on pg_dist_node to serialize pg_dist_node changes */ + LockRelationOid(DistNodeRelationId(), ExclusiveLock); + workerNode = FindWorkerNodeAnyCluster(nodeName, nodePort); if (workerNode == NULL) { @@ -351,28 +354,12 @@ PrimaryNodeForGroup(uint32 groupId, bool *groupContainsNodes) static Datum ActivateNode(char *nodeName, int nodePort) { - Relation pgDistNode = heap_open(DistNodeRelationId(), RowExclusiveLock); - HeapTuple heapTuple = GetNodeTuple(nodeName, nodePort); - CommandId commandId = GetCurrentCommandId(true); - LockTupleMode lockTupleMode = LockTupleExclusive; - LockWaitPolicy lockWaitPolicy = LockWaitError; - bool followUpdates = false; - Buffer buffer = 0; - HeapUpdateFailureData heapUpdateFailureData; - WorkerNode *workerNode = NULL; bool isActive = true; Datum nodeRecord = 0; - if (heapTuple == NULL) - { - ereport(ERROR, (errmsg("could not find valid entry for node \"%s:%d\"", - nodeName, nodePort))); - } - - heap_lock_tuple(pgDistNode, heapTuple, commandId, lockTupleMode, lockWaitPolicy, - followUpdates, &buffer, &heapUpdateFailureData); - ReleaseBuffer(buffer); + /* take an exclusive lock on pg_dist_node to serialize pg_dist_node changes */ + LockRelationOid(DistNodeRelationId(), ExclusiveLock); SetNodeState(nodeName, nodePort, isActive); @@ -385,8 +372,6 @@ ActivateNode(char *nodeName, int nodePort) nodeRecord = GenerateNodeTuple(workerNode); - heap_close(pgDistNode, NoLock); - return nodeRecord; } @@ -400,7 +385,7 @@ Datum master_initialize_node_metadata(PG_FUNCTION_ARGS) { ListCell *workerNodeCell = NULL; - List *workerNodes = NULL; + List *workerNodes = NIL; bool nodeAlreadyExists = false; /* nodeRole and nodeCluster don't exist when this function is caled */ @@ -409,7 +394,15 @@ master_initialize_node_metadata(PG_FUNCTION_ARGS) CheckCitusVersion(ERROR); + /* + * This function should only ever be called from the create extension + * script, but just to be sure, take an exclusive lock on pg_dist_node + * to prevent concurrent calls. + */ + LockRelationOid(DistNodeRelationId(), ExclusiveLock); + workerNodes = ParseWorkerNodeFileAndRename(); + foreach(workerNodeCell, workerNodes) { WorkerNode *workerNode = (WorkerNode *) lfirst(workerNodeCell); @@ -629,6 +622,9 @@ RemoveNodeFromCluster(char *nodeName, int32 nodePort) EnsureCoordinator(); EnsureSuperUser(); + /* take an exclusive lock on pg_dist_node to serialize pg_dist_node changes */ + LockRelationOid(DistNodeRelationId(), ExclusiveLock); + workerNode = FindWorkerNodeAnyCluster(nodeName, nodePort); if (workerNode == NULL) { @@ -734,17 +730,19 @@ AddNodeMetadata(char *nodeName, int32 nodePort, int32 groupId, char *nodeRack, *nodeAlreadyExists = false; /* - * Acquire a lock so that no one can do this concurrently. Specifically, - * pg_dist_node_trigger_func also takes out a ShareRowExclusiveLock. This lets us - * ensure there is only one primary per node group. + * Take an exclusive lock on pg_dist_node to serialize node changes. + * We may want to relax or have more fine-grained locking in the future + * to allow users to add multiple nodes concurrently. */ - LockRelationOid(DistNodeRelationId(), ShareRowExclusiveLock); + LockRelationOid(DistNodeRelationId(), ExclusiveLock); workerNode = FindWorkerNodeAnyCluster(nodeName, nodePort); if (workerNode != NULL) { - *nodeAlreadyExists = true; + /* fill return data and return */ returnData = GenerateNodeTuple(workerNode); + *nodeAlreadyExists = true; + return returnData; } @@ -923,7 +921,6 @@ GenerateNodeTuple(WorkerNode *workerNode) values[Anum_pg_dist_node_noderole - 1] = ObjectIdGetDatum(workerNode->nodeRole); values[Anum_pg_dist_node_nodecluster - 1] = nodeClusterNameDatum; - /* open shard relation and generate new tuple */ pgDistNode = heap_open(DistNodeRelationId(), AccessShareLock); /* generate the tuple */ @@ -931,7 +928,6 @@ GenerateNodeTuple(WorkerNode *workerNode) heapTuple = heap_form_tuple(tupleDescriptor, values, isNulls); nodeDatum = HeapTupleGetDatum(heapTuple); - /* close the relation */ heap_close(pgDistNode, NoLock); return nodeDatum; @@ -1088,7 +1084,6 @@ InsertNodeRow(int nodeid, char *nodeName, int32 nodePort, uint32 groupId, char * values[Anum_pg_dist_node_noderole - 1] = ObjectIdGetDatum(nodeRole); values[Anum_pg_dist_node_nodecluster - 1] = nodeClusterNameDatum; - /* open shard relation and insert new tuple */ pgDistNode = heap_open(DistNodeRelationId(), RowExclusiveLock); tupleDescriptor = RelationGetDescr(pgDistNode); @@ -1096,13 +1091,13 @@ InsertNodeRow(int nodeid, char *nodeName, int32 nodePort, uint32 groupId, char * CatalogTupleInsert(pgDistNode, heapTuple); - /* close relation and invalidate previous cache entry */ - heap_close(pgDistNode, NoLock); - CitusInvalidateRelcacheByRelid(DistNodeRelationId()); /* increment the counter so that next command can see the row */ CommandCounterIncrement(); + + /* close relation */ + heap_close(pgDistNode, NoLock); } @@ -1140,13 +1135,14 @@ DeleteNodeRow(char *nodeName, int32 nodePort) simple_heap_delete(pgDistNode, &(heapTuple->t_self)); systable_endscan(heapScan); - heap_close(pgDistNode, NoLock); /* ensure future commands don't use the node we just removed */ CitusInvalidateRelcacheByRelid(DistNodeRelationId()); /* increment the counter so that next command won't see the row */ CommandCounterIncrement(); + + heap_close(pgDistNode, NoLock); } diff --git a/src/backend/distributed/utils/reference_table_utils.c b/src/backend/distributed/utils/reference_table_utils.c index d0362c88e..fc139ea89 100644 --- a/src/backend/distributed/utils/reference_table_utils.c +++ b/src/backend/distributed/utils/reference_table_utils.c @@ -230,6 +230,9 @@ ReplicateShardToAllWorkers(ShardInterval *shardInterval) List *workerNodeList = NULL; ListCell *workerNodeCell = NULL; + /* prevent concurrent pg_dist_node changes */ + LockRelationOid(DistNodeRelationId(), RowShareLock); + workerNodeList = ActivePrimaryNodeList(); /* diff --git a/src/backend/distributed/utils/resource_lock.c b/src/backend/distributed/utils/resource_lock.c index 296508e9d..d4da891ae 100644 --- a/src/backend/distributed/utils/resource_lock.c +++ b/src/backend/distributed/utils/resource_lock.c @@ -316,18 +316,3 @@ LockRelationShardResources(List *relationShardList, LOCKMODE lockMode) } } } - - -/* - * LockMetadataSnapshot acquires a lock needed to serialize changes to pg_dist_node - * and all other metadata changes. Operations that modify pg_dist_node should acquire - * AccessExclusiveLock. All other metadata changes should acquire AccessShareLock. Any locks - * acquired using this method are released at transaction end. - */ -void -LockMetadataSnapshot(LOCKMODE lockMode) -{ - Assert(lockMode == AccessExclusiveLock || lockMode == AccessShareLock); - - (void) LockRelationOid(DistNodeRelationId(), lockMode); -} diff --git a/src/include/distributed/resource_lock.h b/src/include/distributed/resource_lock.h index 31dfff855..896d1fbb0 100644 --- a/src/include/distributed/resource_lock.h +++ b/src/include/distributed/resource_lock.h @@ -80,6 +80,5 @@ extern void LockShardListMetadata(List *shardIntervalList, LOCKMODE lockMode); extern void LockShardListResources(List *shardIntervalList, LOCKMODE lockMode); extern void LockRelationShardResources(List *relationShardList, LOCKMODE lockMode); -extern void LockMetadataSnapshot(LOCKMODE lockMode); #endif /* RESOURCE_LOCK_H */ diff --git a/src/test/regress/expected/isolation_add_remove_node.out b/src/test/regress/expected/isolation_add_remove_node.out new file mode 100644 index 000000000..e6d03b1a4 --- /dev/null +++ b/src/test/regress/expected/isolation_add_remove_node.out @@ -0,0 +1,634 @@ +Parsed test spec with 2 sessions + +starting permutation: s1-begin s1-add-node-1 s2-remove-node-1 s1-commit s1-show-nodes +?column? + +1 +step s1-begin: + BEGIN; + +step s1-add-node-1: + SELECT 1 FROM master_add_node('localhost', 57637); + +?column? + +1 +step s2-remove-node-1: + SELECT * FROM master_remove_node('localhost', 57637); + +step s1-commit: + COMMIT; + +step s2-remove-node-1: <... completed> +master_remove_node + + +step s1-show-nodes: + SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport; + +nodename nodeport isactive + +master_remove_node + + +starting permutation: s1-begin s1-add-node-1 s2-add-node-2 s1-commit s1-show-nodes +?column? + +1 +step s1-begin: + BEGIN; + +step s1-add-node-1: + SELECT 1 FROM master_add_node('localhost', 57637); + +?column? + +1 +step s2-add-node-2: + SELECT 1 FROM master_add_node('localhost', 57638); + +step s1-commit: + COMMIT; + +step s2-add-node-2: <... completed> +?column? + +1 +step s1-show-nodes: + SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport; + +nodename nodeport isactive + +localhost 57637 t +localhost 57638 t +master_remove_node + + + + +starting permutation: s1-begin s1-add-node-1 s2-add-node-1 s1-commit s1-show-nodes +?column? + +1 +step s1-begin: + BEGIN; + +step s1-add-node-1: + SELECT 1 FROM master_add_node('localhost', 57637); + +?column? + +1 +step s2-add-node-1: + SELECT 1 FROM master_add_node('localhost', 57637); + +step s1-commit: + COMMIT; + +step s2-add-node-1: <... completed> +?column? + +1 +step s1-show-nodes: + SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport; + +nodename nodeport isactive + +localhost 57637 t +master_remove_node + + + +starting permutation: s1-begin s1-add-node-1 s2-add-node-2 s1-abort s1-show-nodes +?column? + +1 +step s1-begin: + BEGIN; + +step s1-add-node-1: + SELECT 1 FROM master_add_node('localhost', 57637); + +?column? + +1 +step s2-add-node-2: + SELECT 1 FROM master_add_node('localhost', 57638); + +step s1-abort: + ABORT; + +step s2-add-node-2: <... completed> +?column? + +1 +step s1-show-nodes: + SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport; + +nodename nodeport isactive + +localhost 57638 t +master_remove_node + + + +starting permutation: s1-begin s1-add-node-1 s2-add-node-1 s1-abort s1-show-nodes +?column? + +1 +step s1-begin: + BEGIN; + +step s1-add-node-1: + SELECT 1 FROM master_add_node('localhost', 57637); + +?column? + +1 +step s2-add-node-1: + SELECT 1 FROM master_add_node('localhost', 57637); + +step s1-abort: + ABORT; + +step s2-add-node-1: <... completed> +?column? + +1 +step s1-show-nodes: + SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport; + +nodename nodeport isactive + +localhost 57637 t +master_remove_node + + + +starting permutation: s1-add-node-1 s1-add-node-2 s1-begin s1-remove-node-1 s2-remove-node-2 s1-commit s1-show-nodes +?column? + +1 +step s1-add-node-1: + SELECT 1 FROM master_add_node('localhost', 57637); + +?column? + +1 +step s1-add-node-2: + SELECT 1 FROM master_add_node('localhost', 57638); + +?column? + +1 +step s1-begin: + BEGIN; + +step s1-remove-node-1: + SELECT * FROM master_remove_node('localhost', 57637); + +master_remove_node + + +step s2-remove-node-2: + SELECT * FROM master_remove_node('localhost', 57638); + +step s1-commit: + COMMIT; + +step s2-remove-node-2: <... completed> +master_remove_node + + +step s1-show-nodes: + SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport; + +nodename nodeport isactive + +master_remove_node + + +starting permutation: s1-add-node-1 s1-begin s1-remove-node-1 s2-remove-node-1 s1-commit s1-show-nodes +?column? + +1 +step s1-add-node-1: + SELECT 1 FROM master_add_node('localhost', 57637); + +?column? + +1 +step s1-begin: + BEGIN; + +step s1-remove-node-1: + SELECT * FROM master_remove_node('localhost', 57637); + +master_remove_node + + +step s2-remove-node-1: + SELECT * FROM master_remove_node('localhost', 57637); + +step s1-commit: + COMMIT; + +step s2-remove-node-1: <... completed> +error in steps s1-commit s2-remove-node-1: ERROR: node at "localhost:57637" does not exist +step s1-show-nodes: + SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport; + +nodename nodeport isactive + +master_remove_node + + +starting permutation: s1-add-node-1 s1-begin s1-activate-node-1 s2-activate-node-1 s1-commit s1-show-nodes +?column? + +1 +step s1-add-node-1: + SELECT 1 FROM master_add_node('localhost', 57637); + +?column? + +1 +step s1-begin: + BEGIN; + +step s1-activate-node-1: + SELECT 1 FROM master_activate_node('localhost', 57637); + +?column? + +1 +step s2-activate-node-1: + SELECT 1 FROM master_activate_node('localhost', 57637); + +step s1-commit: + COMMIT; + +step s2-activate-node-1: <... completed> +?column? + +1 +step s1-show-nodes: + SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport; + +nodename nodeport isactive + +localhost 57637 t +master_remove_node + + + +starting permutation: s1-add-node-1 s1-begin s1-disable-node-1 s2-disable-node-1 s1-commit s1-show-nodes +?column? + +1 +step s1-add-node-1: + SELECT 1 FROM master_add_node('localhost', 57637); + +?column? + +1 +step s1-begin: + BEGIN; + +step s1-disable-node-1: + SELECT 1 FROM master_disable_node('localhost', 57637); + +?column? + +1 +step s2-disable-node-1: + SELECT 1 FROM master_disable_node('localhost', 57637); + +step s1-commit: + COMMIT; + +step s2-disable-node-1: <... completed> +?column? + +1 +step s1-show-nodes: + SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport; + +nodename nodeport isactive + +localhost 57637 f +master_remove_node + + + +starting permutation: s1-add-inactive-1 s1-begin s1-activate-node-1 s2-activate-node-1 s1-commit s1-show-nodes +?column? + +1 +step s1-add-inactive-1: + SELECT 1 FROM master_add_inactive_node('localhost', 57637); + +?column? + +1 +step s1-begin: + BEGIN; + +step s1-activate-node-1: + SELECT 1 FROM master_activate_node('localhost', 57637); + +?column? + +1 +step s2-activate-node-1: + SELECT 1 FROM master_activate_node('localhost', 57637); + +step s1-commit: + COMMIT; + +step s2-activate-node-1: <... completed> +?column? + +1 +step s1-show-nodes: + SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport; + +nodename nodeport isactive + +localhost 57637 t +master_remove_node + + + +starting permutation: s1-add-inactive-1 s1-begin s1-disable-node-1 s2-disable-node-1 s1-commit s1-show-nodes +?column? + +1 +step s1-add-inactive-1: + SELECT 1 FROM master_add_inactive_node('localhost', 57637); + +?column? + +1 +step s1-begin: + BEGIN; + +step s1-disable-node-1: + SELECT 1 FROM master_disable_node('localhost', 57637); + +?column? + +1 +step s2-disable-node-1: + SELECT 1 FROM master_disable_node('localhost', 57637); + +step s1-commit: + COMMIT; + +step s2-disable-node-1: <... completed> +?column? + +1 +step s1-show-nodes: + SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport; + +nodename nodeport isactive + +localhost 57637 f +master_remove_node + + + +starting permutation: s1-add-node-1 s1-begin s1-disable-node-1 s2-activate-node-1 s1-commit s1-show-nodes +?column? + +1 +step s1-add-node-1: + SELECT 1 FROM master_add_node('localhost', 57637); + +?column? + +1 +step s1-begin: + BEGIN; + +step s1-disable-node-1: + SELECT 1 FROM master_disable_node('localhost', 57637); + +?column? + +1 +step s2-activate-node-1: + SELECT 1 FROM master_activate_node('localhost', 57637); + +step s1-commit: + COMMIT; + +step s2-activate-node-1: <... completed> +?column? + +1 +step s1-show-nodes: + SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport; + +nodename nodeport isactive + +localhost 57637 t +master_remove_node + + + +starting permutation: s1-add-node-1 s1-begin s1-activate-node-1 s2-disable-node-1 s1-commit s1-show-nodes +?column? + +1 +step s1-add-node-1: + SELECT 1 FROM master_add_node('localhost', 57637); + +?column? + +1 +step s1-begin: + BEGIN; + +step s1-activate-node-1: + SELECT 1 FROM master_activate_node('localhost', 57637); + +?column? + +1 +step s2-disable-node-1: + SELECT 1 FROM master_disable_node('localhost', 57637); + +step s1-commit: + COMMIT; + +step s2-disable-node-1: <... completed> +?column? + +1 +step s1-show-nodes: + SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport; + +nodename nodeport isactive + +localhost 57637 f +master_remove_node + + + +starting permutation: s1-add-inactive-1 s1-begin s1-disable-node-1 s2-activate-node-1 s1-commit s1-show-nodes +?column? + +1 +step s1-add-inactive-1: + SELECT 1 FROM master_add_inactive_node('localhost', 57637); + +?column? + +1 +step s1-begin: + BEGIN; + +step s1-disable-node-1: + SELECT 1 FROM master_disable_node('localhost', 57637); + +?column? + +1 +step s2-activate-node-1: + SELECT 1 FROM master_activate_node('localhost', 57637); + +step s1-commit: + COMMIT; + +step s2-activate-node-1: <... completed> +?column? + +1 +step s1-show-nodes: + SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport; + +nodename nodeport isactive + +localhost 57637 t +master_remove_node + + + +starting permutation: s1-add-inactive-1 s1-begin s1-activate-node-1 s2-disable-node-1 s1-commit s1-show-nodes +?column? + +1 +step s1-add-inactive-1: + SELECT 1 FROM master_add_inactive_node('localhost', 57637); + +?column? + +1 +step s1-begin: + BEGIN; + +step s1-activate-node-1: + SELECT 1 FROM master_activate_node('localhost', 57637); + +?column? + +1 +step s2-disable-node-1: + SELECT 1 FROM master_disable_node('localhost', 57637); + +step s1-commit: + COMMIT; + +step s2-disable-node-1: <... completed> +?column? + +1 +step s1-show-nodes: + SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport; + +nodename nodeport isactive + +localhost 57637 f +master_remove_node + + + +starting permutation: s1-add-inactive-1 s1-begin s1-activate-node-1 s2-disable-node-1 s1-abort s1-show-nodes +?column? + +1 +step s1-add-inactive-1: + SELECT 1 FROM master_add_inactive_node('localhost', 57637); + +?column? + +1 +step s1-begin: + BEGIN; + +step s1-activate-node-1: + SELECT 1 FROM master_activate_node('localhost', 57637); + +?column? + +1 +step s2-disable-node-1: + SELECT 1 FROM master_disable_node('localhost', 57637); + +step s1-abort: + ABORT; + +step s2-disable-node-1: <... completed> +?column? + +1 +step s1-show-nodes: + SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport; + +nodename nodeport isactive + +localhost 57637 f +master_remove_node + + + +starting permutation: s1-add-node-1 s1-begin s1-disable-node-1 s2-disable-node-1 s1-abort s1-show-nodes +?column? + +1 +step s1-add-node-1: + SELECT 1 FROM master_add_node('localhost', 57637); + +?column? + +1 +step s1-begin: + BEGIN; + +step s1-disable-node-1: + SELECT 1 FROM master_disable_node('localhost', 57637); + +?column? + +1 +step s2-disable-node-1: + SELECT 1 FROM master_disable_node('localhost', 57637); + +step s1-abort: + ABORT; + +step s2-disable-node-1: <... completed> +?column? + +1 +step s1-show-nodes: + SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport; + +nodename nodeport isactive + +localhost 57637 f +master_remove_node + + diff --git a/src/test/regress/expected/isolation_create_table_vs_add_remove_node.out b/src/test/regress/expected/isolation_create_table_vs_add_remove_node.out new file mode 100644 index 000000000..023e320c6 --- /dev/null +++ b/src/test/regress/expected/isolation_create_table_vs_add_remove_node.out @@ -0,0 +1,493 @@ +Parsed test spec with 2 sessions + +starting permutation: s1-begin s1-add-node-2 s2-create-table-1 s1-commit s1-show-placements s2-select +node_name node_port + +localhost 57637 +step s1-begin: + BEGIN; + +step s1-add-node-2: + SELECT 1 FROM master_add_node('localhost', 57638); + +?column? + +1 +step s2-create-table-1: + SET citus.shard_count TO 4; + SET citus.shard_replication_factor TO 1; + CREATE TABLE dist_table (x int, y int); + SELECT create_distributed_table('dist_table', 'x'); + +step s1-commit: + COMMIT; + +step s2-create-table-1: <... completed> +create_distributed_table + + +step s1-show-placements: + SELECT + nodename, nodeport + FROM + pg_dist_shard_placement JOIN pg_dist_shard USING (shardid) + WHERE + logicalrelid = 'dist_table'::regclass + ORDER BY + nodename, nodeport; + +nodename nodeport + +localhost 57637 +localhost 57637 +localhost 57638 +localhost 57638 +step s2-select: + SELECT * FROM dist_table; + +x y + +master_remove_node + + + + +starting permutation: s1-begin s1-add-node-2 s2-create-table-1 s1-abort s1-show-placements s2-select +node_name node_port + +localhost 57637 +step s1-begin: + BEGIN; + +step s1-add-node-2: + SELECT 1 FROM master_add_node('localhost', 57638); + +?column? + +1 +step s2-create-table-1: + SET citus.shard_count TO 4; + SET citus.shard_replication_factor TO 1; + CREATE TABLE dist_table (x int, y int); + SELECT create_distributed_table('dist_table', 'x'); + +step s1-abort: + ABORT; + +step s2-create-table-1: <... completed> +create_distributed_table + + +step s1-show-placements: + SELECT + nodename, nodeport + FROM + pg_dist_shard_placement JOIN pg_dist_shard USING (shardid) + WHERE + logicalrelid = 'dist_table'::regclass + ORDER BY + nodename, nodeport; + +nodename nodeport + +localhost 57637 +localhost 57637 +localhost 57637 +localhost 57637 +step s2-select: + SELECT * FROM dist_table; + +x y + +master_remove_node + + + +starting permutation: s2-begin s2-create-table-1 s1-add-node-2 s2-commit s1-show-placements s2-select +node_name node_port + +localhost 57637 +step s2-begin: + BEGIN; + +step s2-create-table-1: + SET citus.shard_count TO 4; + SET citus.shard_replication_factor TO 1; + CREATE TABLE dist_table (x int, y int); + SELECT create_distributed_table('dist_table', 'x'); + +create_distributed_table + + +step s1-add-node-2: + SELECT 1 FROM master_add_node('localhost', 57638); + +step s2-commit: + COMMIT; + +step s1-add-node-2: <... completed> +?column? + +1 +step s1-show-placements: + SELECT + nodename, nodeport + FROM + pg_dist_shard_placement JOIN pg_dist_shard USING (shardid) + WHERE + logicalrelid = 'dist_table'::regclass + ORDER BY + nodename, nodeport; + +nodename nodeport + +localhost 57637 +localhost 57637 +localhost 57637 +localhost 57637 +step s2-select: + SELECT * FROM dist_table; + +x y + +master_remove_node + + + + +starting permutation: s1-add-node-2 s1-begin s1-remove-node-2 s2-create-table-1 s1-commit s1-show-placements s2-select +node_name node_port + +localhost 57637 +step s1-add-node-2: + SELECT 1 FROM master_add_node('localhost', 57638); + +?column? + +1 +step s1-begin: + BEGIN; + +step s1-remove-node-2: + SELECT * FROM master_remove_node('localhost', 57638); + +master_remove_node + + +step s2-create-table-1: + SET citus.shard_count TO 4; + SET citus.shard_replication_factor TO 1; + CREATE TABLE dist_table (x int, y int); + SELECT create_distributed_table('dist_table', 'x'); + +step s1-commit: + COMMIT; + +step s2-create-table-1: <... completed> +create_distributed_table + + +step s1-show-placements: + SELECT + nodename, nodeport + FROM + pg_dist_shard_placement JOIN pg_dist_shard USING (shardid) + WHERE + logicalrelid = 'dist_table'::regclass + ORDER BY + nodename, nodeport; + +nodename nodeport + +localhost 57637 +localhost 57637 +localhost 57637 +localhost 57637 +step s2-select: + SELECT * FROM dist_table; + +x y + +master_remove_node + + + +starting permutation: s1-add-node-2 s1-begin s1-remove-node-2 s2-create-table-1 s1-abort s1-show-placements s2-select +node_name node_port + +localhost 57637 +step s1-add-node-2: + SELECT 1 FROM master_add_node('localhost', 57638); + +?column? + +1 +step s1-begin: + BEGIN; + +step s1-remove-node-2: + SELECT * FROM master_remove_node('localhost', 57638); + +master_remove_node + + +step s2-create-table-1: + SET citus.shard_count TO 4; + SET citus.shard_replication_factor TO 1; + CREATE TABLE dist_table (x int, y int); + SELECT create_distributed_table('dist_table', 'x'); + +step s1-abort: + ABORT; + +step s2-create-table-1: <... completed> +create_distributed_table + + +step s1-show-placements: + SELECT + nodename, nodeport + FROM + pg_dist_shard_placement JOIN pg_dist_shard USING (shardid) + WHERE + logicalrelid = 'dist_table'::regclass + ORDER BY + nodename, nodeport; + +nodename nodeport + +localhost 57637 +localhost 57637 +localhost 57638 +localhost 57638 +step s2-select: + SELECT * FROM dist_table; + +x y + +master_remove_node + + + + +starting permutation: s1-add-node-2 s2-begin s2-create-table-1 s1-remove-node-2 s2-commit s1-show-placements s2-select +node_name node_port + +localhost 57637 +step s1-add-node-2: + SELECT 1 FROM master_add_node('localhost', 57638); + +?column? + +1 +step s2-begin: + BEGIN; + +step s2-create-table-1: + SET citus.shard_count TO 4; + SET citus.shard_replication_factor TO 1; + CREATE TABLE dist_table (x int, y int); + SELECT create_distributed_table('dist_table', 'x'); + +create_distributed_table + + +step s1-remove-node-2: + SELECT * FROM master_remove_node('localhost', 57638); + +step s2-commit: + COMMIT; + +step s1-remove-node-2: <... completed> +error in steps s2-commit s1-remove-node-2: ERROR: you cannot remove the primary node of a node group which has shard placements +step s1-show-placements: + SELECT + nodename, nodeport + FROM + pg_dist_shard_placement JOIN pg_dist_shard USING (shardid) + WHERE + logicalrelid = 'dist_table'::regclass + ORDER BY + nodename, nodeport; + +nodename nodeport + +localhost 57637 +localhost 57637 +localhost 57638 +localhost 57638 +step s2-select: + SELECT * FROM dist_table; + +x y + +master_remove_node + + + + +starting permutation: s1-add-node-2 s1-begin s1-remove-node-2 s2-create-table-2 s1-commit s2-select +node_name node_port + +localhost 57637 +step s1-add-node-2: + SELECT 1 FROM master_add_node('localhost', 57638); + +?column? + +1 +step s1-begin: + BEGIN; + +step s1-remove-node-2: + SELECT * FROM master_remove_node('localhost', 57638); + +master_remove_node + + +step s2-create-table-2: + SET citus.shard_count TO 4; + SET citus.shard_replication_factor TO 2; + CREATE TABLE dist_table (x int, y int); + SELECT create_distributed_table('dist_table', 'x'); + +step s1-commit: + COMMIT; + +step s2-create-table-2: <... completed> +error in steps s1-commit s2-create-table-2: ERROR: replication_factor (2) exceeds number of worker nodes (1) +step s2-select: + SELECT * FROM dist_table; + +ERROR: relation "dist_table" does not exist +master_remove_node + + + +starting permutation: s1-add-node-2 s2-begin s2-create-table-2 s1-remove-node-2 s2-commit s2-select +node_name node_port + +localhost 57637 +step s1-add-node-2: + SELECT 1 FROM master_add_node('localhost', 57638); + +?column? + +1 +step s2-begin: + BEGIN; + +step s2-create-table-2: + SET citus.shard_count TO 4; + SET citus.shard_replication_factor TO 2; + CREATE TABLE dist_table (x int, y int); + SELECT create_distributed_table('dist_table', 'x'); + +create_distributed_table + + +step s1-remove-node-2: + SELECT * FROM master_remove_node('localhost', 57638); + +step s2-commit: + COMMIT; + +step s1-remove-node-2: <... completed> +error in steps s2-commit s1-remove-node-2: ERROR: you cannot remove the primary node of a node group which has shard placements +step s2-select: + SELECT * FROM dist_table; + +x y + +master_remove_node + + + + +starting permutation: s1-add-node-2 s1-begin s1-remove-node-2 s2-create-append-table s1-commit s2-select +node_name node_port + +localhost 57637 +step s1-add-node-2: + SELECT 1 FROM master_add_node('localhost', 57638); + +?column? + +1 +step s1-begin: + BEGIN; + +step s1-remove-node-2: + SELECT * FROM master_remove_node('localhost', 57638); + +master_remove_node + + +step s2-create-append-table: + SET citus.shard_replication_factor TO 1; + CREATE TABLE dist_table (x int, y int); + SELECT create_distributed_table('dist_table', 'x', 'append'); + SELECT 1 FROM master_create_empty_shard('dist_table'); + +step s1-commit: + COMMIT; + +step s2-create-append-table: <... completed> +create_distributed_table + + +?column? + +1 +step s2-select: + SELECT * FROM dist_table; + +x y + +master_remove_node + + + +starting permutation: s1-add-node-2 s2-begin s2-create-append-table s1-remove-node-2 s2-commit s2-select +node_name node_port + +localhost 57637 +step s1-add-node-2: + SELECT 1 FROM master_add_node('localhost', 57638); + +?column? + +1 +step s2-begin: + BEGIN; + +step s2-create-append-table: + SET citus.shard_replication_factor TO 1; + CREATE TABLE dist_table (x int, y int); + SELECT create_distributed_table('dist_table', 'x', 'append'); + SELECT 1 FROM master_create_empty_shard('dist_table'); + +create_distributed_table + + +?column? + +1 +step s1-remove-node-2: + SELECT * FROM master_remove_node('localhost', 57638); + +step s2-commit: + COMMIT; + +step s1-remove-node-2: <... completed> +master_remove_node + + +step s2-select: + SELECT * FROM dist_table; + +x y + +master_remove_node + + diff --git a/src/test/regress/expected/isolation_distributed_transaction_id.out b/src/test/regress/expected/isolation_distributed_transaction_id.out index 823b9b049..41806ad2e 100644 --- a/src/test/regress/expected/isolation_distributed_transaction_id.out +++ b/src/test/regress/expected/isolation_distributed_transaction_id.out @@ -98,7 +98,7 @@ step s1-get-current-transaction-id: row -(0,189) +(0,290) step s2-get-first-worker-active-transactions: SELECT * FROM run_command_on_workers('SELECT row(initiator_node_identifier, transaction_number) FROM @@ -109,4 +109,4 @@ step s2-get-first-worker-active-transactions: nodename nodeport success result -localhost 57637 t (0,189) +localhost 57637 t (0,290) diff --git a/src/test/regress/expected/isolation_dump_global_wait_edges.out b/src/test/regress/expected/isolation_dump_global_wait_edges.out index 16b50ba93..6365b7719 100644 --- a/src/test/regress/expected/isolation_dump_global_wait_edges.out +++ b/src/test/regress/expected/isolation_dump_global_wait_edges.out @@ -29,11 +29,11 @@ step detector-dump-wait-edges: waiting_transaction_numblocking_transaction_numblocking_transaction_waiting -192 191 f +293 292 f transactionnumberwaitingtransactionnumbers -191 -192 191 +292 +293 292 step s1-abort: ABORT; @@ -77,14 +77,14 @@ step detector-dump-wait-edges: waiting_transaction_numblocking_transaction_numblocking_transaction_waiting -196 195 f -197 195 f -197 196 t +297 296 f +298 296 f +298 297 t transactionnumberwaitingtransactionnumbers -195 -196 195 -197 195,196 +296 +297 296 +298 296,297 step s1-abort: ABORT; diff --git a/src/test/regress/expected/isolation_replace_wait_function.out b/src/test/regress/expected/isolation_replace_wait_function.out index 265a66afb..2b0cbd87c 100644 --- a/src/test/regress/expected/isolation_replace_wait_function.out +++ b/src/test/regress/expected/isolation_replace_wait_function.out @@ -16,7 +16,7 @@ step s1-finish: COMMIT; step s2-insert: <... completed> -error in steps s1-finish s2-insert: ERROR: duplicate key value violates unique constraint "test_locking_a_key_102751" +error in steps s1-finish s2-insert: ERROR: duplicate key value violates unique constraint "test_locking_a_key_102781" step s2-finish: COMMIT; diff --git a/src/test/regress/isolation_schedule b/src/test/regress/isolation_schedule index f7a665fd7..135fbad55 100644 --- a/src/test/regress/isolation_schedule +++ b/src/test/regress/isolation_schedule @@ -1,4 +1,6 @@ +test: isolation_add_remove_node test: isolation_add_node_vs_reference_table_operations +test: isolation_create_table_vs_add_remove_node # tests that change node metadata should precede # isolation_cluster_management such that tests diff --git a/src/test/regress/specs/isolation_add_remove_node.spec b/src/test/regress/specs/isolation_add_remove_node.spec new file mode 100644 index 000000000..348553a08 --- /dev/null +++ b/src/test/regress/specs/isolation_add_remove_node.spec @@ -0,0 +1,149 @@ +setup +{ + SELECT 1; +} + +teardown +{ + SELECT master_remove_node(nodename, nodeport) FROM pg_dist_node; +} + +session "s1" + +step "s1-begin" +{ + BEGIN; +} + +step "s1-add-node-1" +{ + SELECT 1 FROM master_add_node('localhost', 57637); +} + +step "s1-add-node-2" +{ + SELECT 1 FROM master_add_node('localhost', 57638); +} + +step "s1-add-inactive-1" +{ + SELECT 1 FROM master_add_inactive_node('localhost', 57637); +} + +step "s1-activate-node-1" +{ + SELECT 1 FROM master_activate_node('localhost', 57637); +} + +step "s1-disable-node-1" +{ + SELECT 1 FROM master_disable_node('localhost', 57637); +} + +step "s1-remove-node-1" +{ + SELECT * FROM master_remove_node('localhost', 57637); +} + +step "s1-remove-node-2" +{ + SELECT * FROM master_remove_node('localhost', 57638); +} + +step "s1-abort" +{ + ABORT; +} + +step "s1-commit" +{ + COMMIT; +} + +step "s1-show-nodes" +{ + SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport; +} + +session "s2" + +step "s2-begin" +{ + BEGIN; +} + +step "s2-add-node-1" +{ + SELECT 1 FROM master_add_node('localhost', 57637); +} + +step "s2-add-node-2" +{ + SELECT 1 FROM master_add_node('localhost', 57638); +} + +step "s2-activate-node-1" +{ + SELECT 1 FROM master_activate_node('localhost', 57637); +} + +step "s2-disable-node-1" +{ + SELECT 1 FROM master_disable_node('localhost', 57637); +} + +step "s2-remove-node-1" +{ + SELECT * FROM master_remove_node('localhost', 57637); +} + +step "s2-remove-node-2" +{ + SELECT * FROM master_remove_node('localhost', 57638); +} + +step "s2-commit" +{ + COMMIT; +} + +# session 1 adds a node, session 2 removes it, should be ok +permutation "s1-begin" "s1-add-node-1" "s2-remove-node-1" "s1-commit" "s1-show-nodes" +# add a different node from 2 sessions, should be ok +permutation "s1-begin" "s1-add-node-1" "s2-add-node-2" "s1-commit" "s1-show-nodes" +# add the same node from 2 sessions, should be ok (idempotent) +permutation "s1-begin" "s1-add-node-1" "s2-add-node-1" "s1-commit" "s1-show-nodes" +# add a different node from 2 sessions, one aborts +permutation "s1-begin" "s1-add-node-1" "s2-add-node-2" "s1-abort" "s1-show-nodes" +# add the same node from 2 sessions, one aborts +permutation "s1-begin" "s1-add-node-1" "s2-add-node-1" "s1-abort" "s1-show-nodes" + +# remove a different node from 2 transactions, should be ok +permutation "s1-add-node-1" "s1-add-node-2" "s1-begin" "s1-remove-node-1" "s2-remove-node-2" "s1-commit" "s1-show-nodes" +# remove the same node from 2 transactions, should be ok (idempotent) +permutation "s1-add-node-1" "s1-begin" "s1-remove-node-1" "s2-remove-node-1" "s1-commit" "s1-show-nodes" + +# activate an active node from 2 transactions, should be ok +permutation "s1-add-node-1" "s1-begin" "s1-activate-node-1" "s2-activate-node-1" "s1-commit" "s1-show-nodes" +# disable an active node from 2 transactions, should be ok +permutation "s1-add-node-1" "s1-begin" "s1-disable-node-1" "s2-disable-node-1" "s1-commit" "s1-show-nodes" + +# activate an inactive node from 2 transactions, should be ok +permutation "s1-add-inactive-1" "s1-begin" "s1-activate-node-1" "s2-activate-node-1" "s1-commit" "s1-show-nodes" +# disable an inactive node from 2 transactions, should be ok +permutation "s1-add-inactive-1" "s1-begin" "s1-disable-node-1" "s2-disable-node-1" "s1-commit" "s1-show-nodes" + +# disable and activate an active node from 2 transactions, should be ok +permutation "s1-add-node-1" "s1-begin" "s1-disable-node-1" "s2-activate-node-1" "s1-commit" "s1-show-nodes" +# activate and disable an active node node from 2 transactions, should be ok +permutation "s1-add-node-1" "s1-begin" "s1-activate-node-1" "s2-disable-node-1" "s1-commit" "s1-show-nodes" + +# disable and activate an inactive node from 2 transactions, should be ok +permutation "s1-add-inactive-1" "s1-begin" "s1-disable-node-1" "s2-activate-node-1" "s1-commit" "s1-show-nodes" +# activate and disable an inactive node node from 2 transactions, should be ok +permutation "s1-add-inactive-1" "s1-begin" "s1-activate-node-1" "s2-disable-node-1" "s1-commit" "s1-show-nodes" + +# activate and disable an inactive node from 2 transactions, one aborts +permutation "s1-add-inactive-1" "s1-begin" "s1-activate-node-1" "s2-disable-node-1" "s1-abort" "s1-show-nodes" +# disable an active node from 2 transactions, one aborts +permutation "s1-add-node-1" "s1-begin" "s1-disable-node-1" "s2-disable-node-1" "s1-abort" "s1-show-nodes" diff --git a/src/test/regress/specs/isolation_create_table_vs_add_remove_node.spec b/src/test/regress/specs/isolation_create_table_vs_add_remove_node.spec new file mode 100644 index 000000000..4986beee3 --- /dev/null +++ b/src/test/regress/specs/isolation_create_table_vs_add_remove_node.spec @@ -0,0 +1,120 @@ +setup +{ + SELECT 1 FROM master_add_node('localhost', 57637); + SELECT * FROM master_get_active_worker_nodes() ORDER BY node_name, node_port; +} + +teardown +{ + DROP TABLE IF EXISTS dist_table; + + SELECT master_remove_node(nodename, nodeport) FROM pg_dist_node; +} + +session "s1" + +step "s1-begin" +{ + BEGIN; +} + +step "s1-add-node-2" +{ + SELECT 1 FROM master_add_node('localhost', 57638); +} + +step "s1-remove-node-2" +{ + SELECT * FROM master_remove_node('localhost', 57638); +} + +step "s1-abort" +{ + ABORT; +} + +step "s1-commit" +{ + COMMIT; +} + +step "s1-query-table" +{ + SELECT * FROM dist_table; +} + +step "s1-show-nodes" +{ + SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport; +} + +step "s1-show-placements" +{ + SELECT + nodename, nodeport + FROM + pg_dist_shard_placement JOIN pg_dist_shard USING (shardid) + WHERE + logicalrelid = 'dist_table'::regclass + ORDER BY + nodename, nodeport; +} + +session "s2" + +step "s2-begin" +{ + BEGIN; +} + +step "s2-create-table-1" +{ + SET citus.shard_count TO 4; + SET citus.shard_replication_factor TO 1; + CREATE TABLE dist_table (x int, y int); + SELECT create_distributed_table('dist_table', 'x'); +} + +step "s2-create-table-2" +{ + SET citus.shard_count TO 4; + SET citus.shard_replication_factor TO 2; + CREATE TABLE dist_table (x int, y int); + SELECT create_distributed_table('dist_table', 'x'); +} + +step "s2-create-append-table" +{ + SET citus.shard_replication_factor TO 1; + CREATE TABLE dist_table (x int, y int); + SELECT create_distributed_table('dist_table', 'x', 'append'); + SELECT 1 FROM master_create_empty_shard('dist_table'); +} + +step "s2-select" +{ + SELECT * FROM dist_table; +} + +step "s2-commit" +{ + COMMIT; +} + +# session 1 adds a node, session 2 creates a distributed table +permutation "s1-begin" "s1-add-node-2" "s2-create-table-1" "s1-commit" "s1-show-placements" "s2-select" +permutation "s1-begin" "s1-add-node-2" "s2-create-table-1" "s1-abort" "s1-show-placements" "s2-select" +permutation "s2-begin" "s2-create-table-1" "s1-add-node-2" "s2-commit" "s1-show-placements" "s2-select" + +# session 1 removes a node, session 2 creates a distributed table +permutation "s1-add-node-2" "s1-begin" "s1-remove-node-2" "s2-create-table-1" "s1-commit" "s1-show-placements" "s2-select" +permutation "s1-add-node-2" "s1-begin" "s1-remove-node-2" "s2-create-table-1" "s1-abort" "s1-show-placements" "s2-select" +permutation "s1-add-node-2" "s2-begin" "s2-create-table-1" "s1-remove-node-2" "s2-commit" "s1-show-placements" "s2-select" + +# session 1 removes a node, session 2 creates a distributed table with replication factor 2, should throw a sane error +permutation "s1-add-node-2" "s1-begin" "s1-remove-node-2" "s2-create-table-2" "s1-commit" "s2-select" +permutation "s1-add-node-2" "s2-begin" "s2-create-table-2" "s1-remove-node-2" "s2-commit" "s2-select" + +# session 1 removes a node, session 2 creates a shard in an append-distributed table +permutation "s1-add-node-2" "s1-begin" "s1-remove-node-2" "s2-create-append-table" "s1-commit" "s2-select" +permutation "s1-add-node-2" "s2-begin" "s2-create-append-table" "s1-remove-node-2" "s2-commit" "s2-select"