Merge pull request #1407 from citusdata/fix_node_locking

Fix pg_dist_node locking
pull/1542/head
Marco Slot 2017-08-09 16:32:28 +04:00 committed by GitHub
commit 3f338a3fc6
14 changed files with 1452 additions and 61 deletions

View File

@ -162,6 +162,9 @@ CreateShardsWithRoundRobinPolicy(Oid distributedTableId, int32 shardCount,
/* calculate the split of the hash space */
hashTokenIncrement = HASH_TOKEN_COUNT / shardCount;
/* don't allow concurrent node list changes that require an exclusive lock */
LockRelationOid(DistNodeRelationId(), RowShareLock);
/* load and sort the worker node list for deterministic placement */
workerNodeList = ActivePrimaryNodeList();
workerNodeList = SortList(workerNodeList, CompareWorkerNodes);

View File

@ -44,6 +44,7 @@
#include "distributed/transaction_management.h"
#include "distributed/worker_manager.h"
#include "distributed/worker_protocol.h"
#include "storage/lmgr.h"
#include "utils/builtins.h"
#include "utils/fmgroids.h"
#include "utils/inval.h"
@ -94,6 +95,12 @@ master_create_empty_shard(PG_FUNCTION_ARGS)
EnsureTablePermissions(relationId, ACL_INSERT);
CheckDistributedTable(relationId);
/* don't allow the table to be dropped */
LockRelationOid(relationId, AccessShareLock);
/* don't allow concurrent node list changes that require an exclusive lock */
LockRelationOid(DistNodeRelationId(), RowShareLock);
/*
* We check whether the table is a foreign table or not. If it is, we set
* storage type as foreign also. Only exception is if foreign table is a

View File

@ -219,6 +219,9 @@ master_disable_node(PG_FUNCTION_ARGS)
CheckCitusVersion(ERROR);
/* take an exclusive lock on pg_dist_node to serialize pg_dist_node changes */
LockRelationOid(DistNodeRelationId(), ExclusiveLock);
workerNode = FindWorkerNodeAnyCluster(nodeName, nodePort);
if (workerNode == NULL)
{
@ -351,28 +354,12 @@ PrimaryNodeForGroup(uint32 groupId, bool *groupContainsNodes)
static Datum
ActivateNode(char *nodeName, int nodePort)
{
Relation pgDistNode = heap_open(DistNodeRelationId(), RowExclusiveLock);
HeapTuple heapTuple = GetNodeTuple(nodeName, nodePort);
CommandId commandId = GetCurrentCommandId(true);
LockTupleMode lockTupleMode = LockTupleExclusive;
LockWaitPolicy lockWaitPolicy = LockWaitError;
bool followUpdates = false;
Buffer buffer = 0;
HeapUpdateFailureData heapUpdateFailureData;
WorkerNode *workerNode = NULL;
bool isActive = true;
Datum nodeRecord = 0;
if (heapTuple == NULL)
{
ereport(ERROR, (errmsg("could not find valid entry for node \"%s:%d\"",
nodeName, nodePort)));
}
heap_lock_tuple(pgDistNode, heapTuple, commandId, lockTupleMode, lockWaitPolicy,
followUpdates, &buffer, &heapUpdateFailureData);
ReleaseBuffer(buffer);
/* take an exclusive lock on pg_dist_node to serialize pg_dist_node changes */
LockRelationOid(DistNodeRelationId(), ExclusiveLock);
SetNodeState(nodeName, nodePort, isActive);
@ -385,8 +372,6 @@ ActivateNode(char *nodeName, int nodePort)
nodeRecord = GenerateNodeTuple(workerNode);
heap_close(pgDistNode, NoLock);
return nodeRecord;
}
@ -400,7 +385,7 @@ Datum
master_initialize_node_metadata(PG_FUNCTION_ARGS)
{
ListCell *workerNodeCell = NULL;
List *workerNodes = NULL;
List *workerNodes = NIL;
bool nodeAlreadyExists = false;
/* nodeRole and nodeCluster don't exist when this function is caled */
@ -409,7 +394,15 @@ master_initialize_node_metadata(PG_FUNCTION_ARGS)
CheckCitusVersion(ERROR);
/*
* This function should only ever be called from the create extension
* script, but just to be sure, take an exclusive lock on pg_dist_node
* to prevent concurrent calls.
*/
LockRelationOid(DistNodeRelationId(), ExclusiveLock);
workerNodes = ParseWorkerNodeFileAndRename();
foreach(workerNodeCell, workerNodes)
{
WorkerNode *workerNode = (WorkerNode *) lfirst(workerNodeCell);
@ -629,6 +622,9 @@ RemoveNodeFromCluster(char *nodeName, int32 nodePort)
EnsureCoordinator();
EnsureSuperUser();
/* take an exclusive lock on pg_dist_node to serialize pg_dist_node changes */
LockRelationOid(DistNodeRelationId(), ExclusiveLock);
workerNode = FindWorkerNodeAnyCluster(nodeName, nodePort);
if (workerNode == NULL)
{
@ -734,17 +730,19 @@ AddNodeMetadata(char *nodeName, int32 nodePort, int32 groupId, char *nodeRack,
*nodeAlreadyExists = false;
/*
* Acquire a lock so that no one can do this concurrently. Specifically,
* pg_dist_node_trigger_func also takes out a ShareRowExclusiveLock. This lets us
* ensure there is only one primary per node group.
* Take an exclusive lock on pg_dist_node to serialize node changes.
* We may want to relax or have more fine-grained locking in the future
* to allow users to add multiple nodes concurrently.
*/
LockRelationOid(DistNodeRelationId(), ShareRowExclusiveLock);
LockRelationOid(DistNodeRelationId(), ExclusiveLock);
workerNode = FindWorkerNodeAnyCluster(nodeName, nodePort);
if (workerNode != NULL)
{
*nodeAlreadyExists = true;
/* fill return data and return */
returnData = GenerateNodeTuple(workerNode);
*nodeAlreadyExists = true;
return returnData;
}
@ -923,7 +921,6 @@ GenerateNodeTuple(WorkerNode *workerNode)
values[Anum_pg_dist_node_noderole - 1] = ObjectIdGetDatum(workerNode->nodeRole);
values[Anum_pg_dist_node_nodecluster - 1] = nodeClusterNameDatum;
/* open shard relation and generate new tuple */
pgDistNode = heap_open(DistNodeRelationId(), AccessShareLock);
/* generate the tuple */
@ -931,7 +928,6 @@ GenerateNodeTuple(WorkerNode *workerNode)
heapTuple = heap_form_tuple(tupleDescriptor, values, isNulls);
nodeDatum = HeapTupleGetDatum(heapTuple);
/* close the relation */
heap_close(pgDistNode, NoLock);
return nodeDatum;
@ -1088,7 +1084,6 @@ InsertNodeRow(int nodeid, char *nodeName, int32 nodePort, uint32 groupId, char *
values[Anum_pg_dist_node_noderole - 1] = ObjectIdGetDatum(nodeRole);
values[Anum_pg_dist_node_nodecluster - 1] = nodeClusterNameDatum;
/* open shard relation and insert new tuple */
pgDistNode = heap_open(DistNodeRelationId(), RowExclusiveLock);
tupleDescriptor = RelationGetDescr(pgDistNode);
@ -1096,13 +1091,13 @@ InsertNodeRow(int nodeid, char *nodeName, int32 nodePort, uint32 groupId, char *
CatalogTupleInsert(pgDistNode, heapTuple);
/* close relation and invalidate previous cache entry */
heap_close(pgDistNode, NoLock);
CitusInvalidateRelcacheByRelid(DistNodeRelationId());
/* increment the counter so that next command can see the row */
CommandCounterIncrement();
/* close relation */
heap_close(pgDistNode, NoLock);
}
@ -1140,13 +1135,14 @@ DeleteNodeRow(char *nodeName, int32 nodePort)
simple_heap_delete(pgDistNode, &(heapTuple->t_self));
systable_endscan(heapScan);
heap_close(pgDistNode, NoLock);
/* ensure future commands don't use the node we just removed */
CitusInvalidateRelcacheByRelid(DistNodeRelationId());
/* increment the counter so that next command won't see the row */
CommandCounterIncrement();
heap_close(pgDistNode, NoLock);
}

View File

@ -230,6 +230,9 @@ ReplicateShardToAllWorkers(ShardInterval *shardInterval)
List *workerNodeList = NULL;
ListCell *workerNodeCell = NULL;
/* prevent concurrent pg_dist_node changes */
LockRelationOid(DistNodeRelationId(), RowShareLock);
workerNodeList = ActivePrimaryNodeList();
/*

View File

@ -316,18 +316,3 @@ LockRelationShardResources(List *relationShardList, LOCKMODE lockMode)
}
}
}
/*
* LockMetadataSnapshot acquires a lock needed to serialize changes to pg_dist_node
* and all other metadata changes. Operations that modify pg_dist_node should acquire
* AccessExclusiveLock. All other metadata changes should acquire AccessShareLock. Any locks
* acquired using this method are released at transaction end.
*/
void
LockMetadataSnapshot(LOCKMODE lockMode)
{
Assert(lockMode == AccessExclusiveLock || lockMode == AccessShareLock);
(void) LockRelationOid(DistNodeRelationId(), lockMode);
}

View File

@ -80,6 +80,5 @@ extern void LockShardListMetadata(List *shardIntervalList, LOCKMODE lockMode);
extern void LockShardListResources(List *shardIntervalList, LOCKMODE lockMode);
extern void LockRelationShardResources(List *relationShardList, LOCKMODE lockMode);
extern void LockMetadataSnapshot(LOCKMODE lockMode);
#endif /* RESOURCE_LOCK_H */

View File

@ -0,0 +1,634 @@
Parsed test spec with 2 sessions
starting permutation: s1-begin s1-add-node-1 s2-remove-node-1 s1-commit s1-show-nodes
?column?
1
step s1-begin:
BEGIN;
step s1-add-node-1:
SELECT 1 FROM master_add_node('localhost', 57637);
?column?
1
step s2-remove-node-1:
SELECT * FROM master_remove_node('localhost', 57637);
<waiting ...>
step s1-commit:
COMMIT;
step s2-remove-node-1: <... completed>
master_remove_node
step s1-show-nodes:
SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport;
nodename nodeport isactive
master_remove_node
starting permutation: s1-begin s1-add-node-1 s2-add-node-2 s1-commit s1-show-nodes
?column?
1
step s1-begin:
BEGIN;
step s1-add-node-1:
SELECT 1 FROM master_add_node('localhost', 57637);
?column?
1
step s2-add-node-2:
SELECT 1 FROM master_add_node('localhost', 57638);
<waiting ...>
step s1-commit:
COMMIT;
step s2-add-node-2: <... completed>
?column?
1
step s1-show-nodes:
SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport;
nodename nodeport isactive
localhost 57637 t
localhost 57638 t
master_remove_node
starting permutation: s1-begin s1-add-node-1 s2-add-node-1 s1-commit s1-show-nodes
?column?
1
step s1-begin:
BEGIN;
step s1-add-node-1:
SELECT 1 FROM master_add_node('localhost', 57637);
?column?
1
step s2-add-node-1:
SELECT 1 FROM master_add_node('localhost', 57637);
<waiting ...>
step s1-commit:
COMMIT;
step s2-add-node-1: <... completed>
?column?
1
step s1-show-nodes:
SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport;
nodename nodeport isactive
localhost 57637 t
master_remove_node
starting permutation: s1-begin s1-add-node-1 s2-add-node-2 s1-abort s1-show-nodes
?column?
1
step s1-begin:
BEGIN;
step s1-add-node-1:
SELECT 1 FROM master_add_node('localhost', 57637);
?column?
1
step s2-add-node-2:
SELECT 1 FROM master_add_node('localhost', 57638);
<waiting ...>
step s1-abort:
ABORT;
step s2-add-node-2: <... completed>
?column?
1
step s1-show-nodes:
SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport;
nodename nodeport isactive
localhost 57638 t
master_remove_node
starting permutation: s1-begin s1-add-node-1 s2-add-node-1 s1-abort s1-show-nodes
?column?
1
step s1-begin:
BEGIN;
step s1-add-node-1:
SELECT 1 FROM master_add_node('localhost', 57637);
?column?
1
step s2-add-node-1:
SELECT 1 FROM master_add_node('localhost', 57637);
<waiting ...>
step s1-abort:
ABORT;
step s2-add-node-1: <... completed>
?column?
1
step s1-show-nodes:
SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport;
nodename nodeport isactive
localhost 57637 t
master_remove_node
starting permutation: s1-add-node-1 s1-add-node-2 s1-begin s1-remove-node-1 s2-remove-node-2 s1-commit s1-show-nodes
?column?
1
step s1-add-node-1:
SELECT 1 FROM master_add_node('localhost', 57637);
?column?
1
step s1-add-node-2:
SELECT 1 FROM master_add_node('localhost', 57638);
?column?
1
step s1-begin:
BEGIN;
step s1-remove-node-1:
SELECT * FROM master_remove_node('localhost', 57637);
master_remove_node
step s2-remove-node-2:
SELECT * FROM master_remove_node('localhost', 57638);
<waiting ...>
step s1-commit:
COMMIT;
step s2-remove-node-2: <... completed>
master_remove_node
step s1-show-nodes:
SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport;
nodename nodeport isactive
master_remove_node
starting permutation: s1-add-node-1 s1-begin s1-remove-node-1 s2-remove-node-1 s1-commit s1-show-nodes
?column?
1
step s1-add-node-1:
SELECT 1 FROM master_add_node('localhost', 57637);
?column?
1
step s1-begin:
BEGIN;
step s1-remove-node-1:
SELECT * FROM master_remove_node('localhost', 57637);
master_remove_node
step s2-remove-node-1:
SELECT * FROM master_remove_node('localhost', 57637);
<waiting ...>
step s1-commit:
COMMIT;
step s2-remove-node-1: <... completed>
error in steps s1-commit s2-remove-node-1: ERROR: node at "localhost:57637" does not exist
step s1-show-nodes:
SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport;
nodename nodeport isactive
master_remove_node
starting permutation: s1-add-node-1 s1-begin s1-activate-node-1 s2-activate-node-1 s1-commit s1-show-nodes
?column?
1
step s1-add-node-1:
SELECT 1 FROM master_add_node('localhost', 57637);
?column?
1
step s1-begin:
BEGIN;
step s1-activate-node-1:
SELECT 1 FROM master_activate_node('localhost', 57637);
?column?
1
step s2-activate-node-1:
SELECT 1 FROM master_activate_node('localhost', 57637);
<waiting ...>
step s1-commit:
COMMIT;
step s2-activate-node-1: <... completed>
?column?
1
step s1-show-nodes:
SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport;
nodename nodeport isactive
localhost 57637 t
master_remove_node
starting permutation: s1-add-node-1 s1-begin s1-disable-node-1 s2-disable-node-1 s1-commit s1-show-nodes
?column?
1
step s1-add-node-1:
SELECT 1 FROM master_add_node('localhost', 57637);
?column?
1
step s1-begin:
BEGIN;
step s1-disable-node-1:
SELECT 1 FROM master_disable_node('localhost', 57637);
?column?
1
step s2-disable-node-1:
SELECT 1 FROM master_disable_node('localhost', 57637);
<waiting ...>
step s1-commit:
COMMIT;
step s2-disable-node-1: <... completed>
?column?
1
step s1-show-nodes:
SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport;
nodename nodeport isactive
localhost 57637 f
master_remove_node
starting permutation: s1-add-inactive-1 s1-begin s1-activate-node-1 s2-activate-node-1 s1-commit s1-show-nodes
?column?
1
step s1-add-inactive-1:
SELECT 1 FROM master_add_inactive_node('localhost', 57637);
?column?
1
step s1-begin:
BEGIN;
step s1-activate-node-1:
SELECT 1 FROM master_activate_node('localhost', 57637);
?column?
1
step s2-activate-node-1:
SELECT 1 FROM master_activate_node('localhost', 57637);
<waiting ...>
step s1-commit:
COMMIT;
step s2-activate-node-1: <... completed>
?column?
1
step s1-show-nodes:
SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport;
nodename nodeport isactive
localhost 57637 t
master_remove_node
starting permutation: s1-add-inactive-1 s1-begin s1-disable-node-1 s2-disable-node-1 s1-commit s1-show-nodes
?column?
1
step s1-add-inactive-1:
SELECT 1 FROM master_add_inactive_node('localhost', 57637);
?column?
1
step s1-begin:
BEGIN;
step s1-disable-node-1:
SELECT 1 FROM master_disable_node('localhost', 57637);
?column?
1
step s2-disable-node-1:
SELECT 1 FROM master_disable_node('localhost', 57637);
<waiting ...>
step s1-commit:
COMMIT;
step s2-disable-node-1: <... completed>
?column?
1
step s1-show-nodes:
SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport;
nodename nodeport isactive
localhost 57637 f
master_remove_node
starting permutation: s1-add-node-1 s1-begin s1-disable-node-1 s2-activate-node-1 s1-commit s1-show-nodes
?column?
1
step s1-add-node-1:
SELECT 1 FROM master_add_node('localhost', 57637);
?column?
1
step s1-begin:
BEGIN;
step s1-disable-node-1:
SELECT 1 FROM master_disable_node('localhost', 57637);
?column?
1
step s2-activate-node-1:
SELECT 1 FROM master_activate_node('localhost', 57637);
<waiting ...>
step s1-commit:
COMMIT;
step s2-activate-node-1: <... completed>
?column?
1
step s1-show-nodes:
SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport;
nodename nodeport isactive
localhost 57637 t
master_remove_node
starting permutation: s1-add-node-1 s1-begin s1-activate-node-1 s2-disable-node-1 s1-commit s1-show-nodes
?column?
1
step s1-add-node-1:
SELECT 1 FROM master_add_node('localhost', 57637);
?column?
1
step s1-begin:
BEGIN;
step s1-activate-node-1:
SELECT 1 FROM master_activate_node('localhost', 57637);
?column?
1
step s2-disable-node-1:
SELECT 1 FROM master_disable_node('localhost', 57637);
<waiting ...>
step s1-commit:
COMMIT;
step s2-disable-node-1: <... completed>
?column?
1
step s1-show-nodes:
SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport;
nodename nodeport isactive
localhost 57637 f
master_remove_node
starting permutation: s1-add-inactive-1 s1-begin s1-disable-node-1 s2-activate-node-1 s1-commit s1-show-nodes
?column?
1
step s1-add-inactive-1:
SELECT 1 FROM master_add_inactive_node('localhost', 57637);
?column?
1
step s1-begin:
BEGIN;
step s1-disable-node-1:
SELECT 1 FROM master_disable_node('localhost', 57637);
?column?
1
step s2-activate-node-1:
SELECT 1 FROM master_activate_node('localhost', 57637);
<waiting ...>
step s1-commit:
COMMIT;
step s2-activate-node-1: <... completed>
?column?
1
step s1-show-nodes:
SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport;
nodename nodeport isactive
localhost 57637 t
master_remove_node
starting permutation: s1-add-inactive-1 s1-begin s1-activate-node-1 s2-disable-node-1 s1-commit s1-show-nodes
?column?
1
step s1-add-inactive-1:
SELECT 1 FROM master_add_inactive_node('localhost', 57637);
?column?
1
step s1-begin:
BEGIN;
step s1-activate-node-1:
SELECT 1 FROM master_activate_node('localhost', 57637);
?column?
1
step s2-disable-node-1:
SELECT 1 FROM master_disable_node('localhost', 57637);
<waiting ...>
step s1-commit:
COMMIT;
step s2-disable-node-1: <... completed>
?column?
1
step s1-show-nodes:
SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport;
nodename nodeport isactive
localhost 57637 f
master_remove_node
starting permutation: s1-add-inactive-1 s1-begin s1-activate-node-1 s2-disable-node-1 s1-abort s1-show-nodes
?column?
1
step s1-add-inactive-1:
SELECT 1 FROM master_add_inactive_node('localhost', 57637);
?column?
1
step s1-begin:
BEGIN;
step s1-activate-node-1:
SELECT 1 FROM master_activate_node('localhost', 57637);
?column?
1
step s2-disable-node-1:
SELECT 1 FROM master_disable_node('localhost', 57637);
<waiting ...>
step s1-abort:
ABORT;
step s2-disable-node-1: <... completed>
?column?
1
step s1-show-nodes:
SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport;
nodename nodeport isactive
localhost 57637 f
master_remove_node
starting permutation: s1-add-node-1 s1-begin s1-disable-node-1 s2-disable-node-1 s1-abort s1-show-nodes
?column?
1
step s1-add-node-1:
SELECT 1 FROM master_add_node('localhost', 57637);
?column?
1
step s1-begin:
BEGIN;
step s1-disable-node-1:
SELECT 1 FROM master_disable_node('localhost', 57637);
?column?
1
step s2-disable-node-1:
SELECT 1 FROM master_disable_node('localhost', 57637);
<waiting ...>
step s1-abort:
ABORT;
step s2-disable-node-1: <... completed>
?column?
1
step s1-show-nodes:
SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport;
nodename nodeport isactive
localhost 57637 f
master_remove_node

View File

@ -0,0 +1,493 @@
Parsed test spec with 2 sessions
starting permutation: s1-begin s1-add-node-2 s2-create-table-1 s1-commit s1-show-placements s2-select
node_name node_port
localhost 57637
step s1-begin:
BEGIN;
step s1-add-node-2:
SELECT 1 FROM master_add_node('localhost', 57638);
?column?
1
step s2-create-table-1:
SET citus.shard_count TO 4;
SET citus.shard_replication_factor TO 1;
CREATE TABLE dist_table (x int, y int);
SELECT create_distributed_table('dist_table', 'x');
<waiting ...>
step s1-commit:
COMMIT;
step s2-create-table-1: <... completed>
create_distributed_table
step s1-show-placements:
SELECT
nodename, nodeport
FROM
pg_dist_shard_placement JOIN pg_dist_shard USING (shardid)
WHERE
logicalrelid = 'dist_table'::regclass
ORDER BY
nodename, nodeport;
nodename nodeport
localhost 57637
localhost 57637
localhost 57638
localhost 57638
step s2-select:
SELECT * FROM dist_table;
x y
master_remove_node
starting permutation: s1-begin s1-add-node-2 s2-create-table-1 s1-abort s1-show-placements s2-select
node_name node_port
localhost 57637
step s1-begin:
BEGIN;
step s1-add-node-2:
SELECT 1 FROM master_add_node('localhost', 57638);
?column?
1
step s2-create-table-1:
SET citus.shard_count TO 4;
SET citus.shard_replication_factor TO 1;
CREATE TABLE dist_table (x int, y int);
SELECT create_distributed_table('dist_table', 'x');
<waiting ...>
step s1-abort:
ABORT;
step s2-create-table-1: <... completed>
create_distributed_table
step s1-show-placements:
SELECT
nodename, nodeport
FROM
pg_dist_shard_placement JOIN pg_dist_shard USING (shardid)
WHERE
logicalrelid = 'dist_table'::regclass
ORDER BY
nodename, nodeport;
nodename nodeport
localhost 57637
localhost 57637
localhost 57637
localhost 57637
step s2-select:
SELECT * FROM dist_table;
x y
master_remove_node
starting permutation: s2-begin s2-create-table-1 s1-add-node-2 s2-commit s1-show-placements s2-select
node_name node_port
localhost 57637
step s2-begin:
BEGIN;
step s2-create-table-1:
SET citus.shard_count TO 4;
SET citus.shard_replication_factor TO 1;
CREATE TABLE dist_table (x int, y int);
SELECT create_distributed_table('dist_table', 'x');
create_distributed_table
step s1-add-node-2:
SELECT 1 FROM master_add_node('localhost', 57638);
<waiting ...>
step s2-commit:
COMMIT;
step s1-add-node-2: <... completed>
?column?
1
step s1-show-placements:
SELECT
nodename, nodeport
FROM
pg_dist_shard_placement JOIN pg_dist_shard USING (shardid)
WHERE
logicalrelid = 'dist_table'::regclass
ORDER BY
nodename, nodeport;
nodename nodeport
localhost 57637
localhost 57637
localhost 57637
localhost 57637
step s2-select:
SELECT * FROM dist_table;
x y
master_remove_node
starting permutation: s1-add-node-2 s1-begin s1-remove-node-2 s2-create-table-1 s1-commit s1-show-placements s2-select
node_name node_port
localhost 57637
step s1-add-node-2:
SELECT 1 FROM master_add_node('localhost', 57638);
?column?
1
step s1-begin:
BEGIN;
step s1-remove-node-2:
SELECT * FROM master_remove_node('localhost', 57638);
master_remove_node
step s2-create-table-1:
SET citus.shard_count TO 4;
SET citus.shard_replication_factor TO 1;
CREATE TABLE dist_table (x int, y int);
SELECT create_distributed_table('dist_table', 'x');
<waiting ...>
step s1-commit:
COMMIT;
step s2-create-table-1: <... completed>
create_distributed_table
step s1-show-placements:
SELECT
nodename, nodeport
FROM
pg_dist_shard_placement JOIN pg_dist_shard USING (shardid)
WHERE
logicalrelid = 'dist_table'::regclass
ORDER BY
nodename, nodeport;
nodename nodeport
localhost 57637
localhost 57637
localhost 57637
localhost 57637
step s2-select:
SELECT * FROM dist_table;
x y
master_remove_node
starting permutation: s1-add-node-2 s1-begin s1-remove-node-2 s2-create-table-1 s1-abort s1-show-placements s2-select
node_name node_port
localhost 57637
step s1-add-node-2:
SELECT 1 FROM master_add_node('localhost', 57638);
?column?
1
step s1-begin:
BEGIN;
step s1-remove-node-2:
SELECT * FROM master_remove_node('localhost', 57638);
master_remove_node
step s2-create-table-1:
SET citus.shard_count TO 4;
SET citus.shard_replication_factor TO 1;
CREATE TABLE dist_table (x int, y int);
SELECT create_distributed_table('dist_table', 'x');
<waiting ...>
step s1-abort:
ABORT;
step s2-create-table-1: <... completed>
create_distributed_table
step s1-show-placements:
SELECT
nodename, nodeport
FROM
pg_dist_shard_placement JOIN pg_dist_shard USING (shardid)
WHERE
logicalrelid = 'dist_table'::regclass
ORDER BY
nodename, nodeport;
nodename nodeport
localhost 57637
localhost 57637
localhost 57638
localhost 57638
step s2-select:
SELECT * FROM dist_table;
x y
master_remove_node
starting permutation: s1-add-node-2 s2-begin s2-create-table-1 s1-remove-node-2 s2-commit s1-show-placements s2-select
node_name node_port
localhost 57637
step s1-add-node-2:
SELECT 1 FROM master_add_node('localhost', 57638);
?column?
1
step s2-begin:
BEGIN;
step s2-create-table-1:
SET citus.shard_count TO 4;
SET citus.shard_replication_factor TO 1;
CREATE TABLE dist_table (x int, y int);
SELECT create_distributed_table('dist_table', 'x');
create_distributed_table
step s1-remove-node-2:
SELECT * FROM master_remove_node('localhost', 57638);
<waiting ...>
step s2-commit:
COMMIT;
step s1-remove-node-2: <... completed>
error in steps s2-commit s1-remove-node-2: ERROR: you cannot remove the primary node of a node group which has shard placements
step s1-show-placements:
SELECT
nodename, nodeport
FROM
pg_dist_shard_placement JOIN pg_dist_shard USING (shardid)
WHERE
logicalrelid = 'dist_table'::regclass
ORDER BY
nodename, nodeport;
nodename nodeport
localhost 57637
localhost 57637
localhost 57638
localhost 57638
step s2-select:
SELECT * FROM dist_table;
x y
master_remove_node
starting permutation: s1-add-node-2 s1-begin s1-remove-node-2 s2-create-table-2 s1-commit s2-select
node_name node_port
localhost 57637
step s1-add-node-2:
SELECT 1 FROM master_add_node('localhost', 57638);
?column?
1
step s1-begin:
BEGIN;
step s1-remove-node-2:
SELECT * FROM master_remove_node('localhost', 57638);
master_remove_node
step s2-create-table-2:
SET citus.shard_count TO 4;
SET citus.shard_replication_factor TO 2;
CREATE TABLE dist_table (x int, y int);
SELECT create_distributed_table('dist_table', 'x');
<waiting ...>
step s1-commit:
COMMIT;
step s2-create-table-2: <... completed>
error in steps s1-commit s2-create-table-2: ERROR: replication_factor (2) exceeds number of worker nodes (1)
step s2-select:
SELECT * FROM dist_table;
ERROR: relation "dist_table" does not exist
master_remove_node
starting permutation: s1-add-node-2 s2-begin s2-create-table-2 s1-remove-node-2 s2-commit s2-select
node_name node_port
localhost 57637
step s1-add-node-2:
SELECT 1 FROM master_add_node('localhost', 57638);
?column?
1
step s2-begin:
BEGIN;
step s2-create-table-2:
SET citus.shard_count TO 4;
SET citus.shard_replication_factor TO 2;
CREATE TABLE dist_table (x int, y int);
SELECT create_distributed_table('dist_table', 'x');
create_distributed_table
step s1-remove-node-2:
SELECT * FROM master_remove_node('localhost', 57638);
<waiting ...>
step s2-commit:
COMMIT;
step s1-remove-node-2: <... completed>
error in steps s2-commit s1-remove-node-2: ERROR: you cannot remove the primary node of a node group which has shard placements
step s2-select:
SELECT * FROM dist_table;
x y
master_remove_node
starting permutation: s1-add-node-2 s1-begin s1-remove-node-2 s2-create-append-table s1-commit s2-select
node_name node_port
localhost 57637
step s1-add-node-2:
SELECT 1 FROM master_add_node('localhost', 57638);
?column?
1
step s1-begin:
BEGIN;
step s1-remove-node-2:
SELECT * FROM master_remove_node('localhost', 57638);
master_remove_node
step s2-create-append-table:
SET citus.shard_replication_factor TO 1;
CREATE TABLE dist_table (x int, y int);
SELECT create_distributed_table('dist_table', 'x', 'append');
SELECT 1 FROM master_create_empty_shard('dist_table');
<waiting ...>
step s1-commit:
COMMIT;
step s2-create-append-table: <... completed>
create_distributed_table
?column?
1
step s2-select:
SELECT * FROM dist_table;
x y
master_remove_node
starting permutation: s1-add-node-2 s2-begin s2-create-append-table s1-remove-node-2 s2-commit s2-select
node_name node_port
localhost 57637
step s1-add-node-2:
SELECT 1 FROM master_add_node('localhost', 57638);
?column?
1
step s2-begin:
BEGIN;
step s2-create-append-table:
SET citus.shard_replication_factor TO 1;
CREATE TABLE dist_table (x int, y int);
SELECT create_distributed_table('dist_table', 'x', 'append');
SELECT 1 FROM master_create_empty_shard('dist_table');
create_distributed_table
?column?
1
step s1-remove-node-2:
SELECT * FROM master_remove_node('localhost', 57638);
<waiting ...>
step s2-commit:
COMMIT;
step s1-remove-node-2: <... completed>
master_remove_node
step s2-select:
SELECT * FROM dist_table;
x y
master_remove_node

View File

@ -98,7 +98,7 @@ step s1-get-current-transaction-id:
row
(0,189)
(0,290)
step s2-get-first-worker-active-transactions:
SELECT * FROM run_command_on_workers('SELECT row(initiator_node_identifier, transaction_number)
FROM
@ -109,4 +109,4 @@ step s2-get-first-worker-active-transactions:
nodename nodeport success result
localhost 57637 t (0,189)
localhost 57637 t (0,290)

View File

@ -29,11 +29,11 @@ step detector-dump-wait-edges:
waiting_transaction_numblocking_transaction_numblocking_transaction_waiting
192 191 f
293 292 f
transactionnumberwaitingtransactionnumbers
191
192 191
292
293 292
step s1-abort:
ABORT;
@ -77,14 +77,14 @@ step detector-dump-wait-edges:
waiting_transaction_numblocking_transaction_numblocking_transaction_waiting
196 195 f
197 195 f
197 196 t
297 296 f
298 296 f
298 297 t
transactionnumberwaitingtransactionnumbers
195
196 195
197 195,196
296
297 296
298 296,297
step s1-abort:
ABORT;

View File

@ -16,7 +16,7 @@ step s1-finish:
COMMIT;
step s2-insert: <... completed>
error in steps s1-finish s2-insert: ERROR: duplicate key value violates unique constraint "test_locking_a_key_102751"
error in steps s1-finish s2-insert: ERROR: duplicate key value violates unique constraint "test_locking_a_key_102781"
step s2-finish:
COMMIT;

View File

@ -1,4 +1,6 @@
test: isolation_add_remove_node
test: isolation_add_node_vs_reference_table_operations
test: isolation_create_table_vs_add_remove_node
# tests that change node metadata should precede
# isolation_cluster_management such that tests

View File

@ -0,0 +1,149 @@
setup
{
SELECT 1;
}
teardown
{
SELECT master_remove_node(nodename, nodeport) FROM pg_dist_node;
}
session "s1"
step "s1-begin"
{
BEGIN;
}
step "s1-add-node-1"
{
SELECT 1 FROM master_add_node('localhost', 57637);
}
step "s1-add-node-2"
{
SELECT 1 FROM master_add_node('localhost', 57638);
}
step "s1-add-inactive-1"
{
SELECT 1 FROM master_add_inactive_node('localhost', 57637);
}
step "s1-activate-node-1"
{
SELECT 1 FROM master_activate_node('localhost', 57637);
}
step "s1-disable-node-1"
{
SELECT 1 FROM master_disable_node('localhost', 57637);
}
step "s1-remove-node-1"
{
SELECT * FROM master_remove_node('localhost', 57637);
}
step "s1-remove-node-2"
{
SELECT * FROM master_remove_node('localhost', 57638);
}
step "s1-abort"
{
ABORT;
}
step "s1-commit"
{
COMMIT;
}
step "s1-show-nodes"
{
SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport;
}
session "s2"
step "s2-begin"
{
BEGIN;
}
step "s2-add-node-1"
{
SELECT 1 FROM master_add_node('localhost', 57637);
}
step "s2-add-node-2"
{
SELECT 1 FROM master_add_node('localhost', 57638);
}
step "s2-activate-node-1"
{
SELECT 1 FROM master_activate_node('localhost', 57637);
}
step "s2-disable-node-1"
{
SELECT 1 FROM master_disable_node('localhost', 57637);
}
step "s2-remove-node-1"
{
SELECT * FROM master_remove_node('localhost', 57637);
}
step "s2-remove-node-2"
{
SELECT * FROM master_remove_node('localhost', 57638);
}
step "s2-commit"
{
COMMIT;
}
# session 1 adds a node, session 2 removes it, should be ok
permutation "s1-begin" "s1-add-node-1" "s2-remove-node-1" "s1-commit" "s1-show-nodes"
# add a different node from 2 sessions, should be ok
permutation "s1-begin" "s1-add-node-1" "s2-add-node-2" "s1-commit" "s1-show-nodes"
# add the same node from 2 sessions, should be ok (idempotent)
permutation "s1-begin" "s1-add-node-1" "s2-add-node-1" "s1-commit" "s1-show-nodes"
# add a different node from 2 sessions, one aborts
permutation "s1-begin" "s1-add-node-1" "s2-add-node-2" "s1-abort" "s1-show-nodes"
# add the same node from 2 sessions, one aborts
permutation "s1-begin" "s1-add-node-1" "s2-add-node-1" "s1-abort" "s1-show-nodes"
# remove a different node from 2 transactions, should be ok
permutation "s1-add-node-1" "s1-add-node-2" "s1-begin" "s1-remove-node-1" "s2-remove-node-2" "s1-commit" "s1-show-nodes"
# remove the same node from 2 transactions, should be ok (idempotent)
permutation "s1-add-node-1" "s1-begin" "s1-remove-node-1" "s2-remove-node-1" "s1-commit" "s1-show-nodes"
# activate an active node from 2 transactions, should be ok
permutation "s1-add-node-1" "s1-begin" "s1-activate-node-1" "s2-activate-node-1" "s1-commit" "s1-show-nodes"
# disable an active node from 2 transactions, should be ok
permutation "s1-add-node-1" "s1-begin" "s1-disable-node-1" "s2-disable-node-1" "s1-commit" "s1-show-nodes"
# activate an inactive node from 2 transactions, should be ok
permutation "s1-add-inactive-1" "s1-begin" "s1-activate-node-1" "s2-activate-node-1" "s1-commit" "s1-show-nodes"
# disable an inactive node from 2 transactions, should be ok
permutation "s1-add-inactive-1" "s1-begin" "s1-disable-node-1" "s2-disable-node-1" "s1-commit" "s1-show-nodes"
# disable and activate an active node from 2 transactions, should be ok
permutation "s1-add-node-1" "s1-begin" "s1-disable-node-1" "s2-activate-node-1" "s1-commit" "s1-show-nodes"
# activate and disable an active node node from 2 transactions, should be ok
permutation "s1-add-node-1" "s1-begin" "s1-activate-node-1" "s2-disable-node-1" "s1-commit" "s1-show-nodes"
# disable and activate an inactive node from 2 transactions, should be ok
permutation "s1-add-inactive-1" "s1-begin" "s1-disable-node-1" "s2-activate-node-1" "s1-commit" "s1-show-nodes"
# activate and disable an inactive node node from 2 transactions, should be ok
permutation "s1-add-inactive-1" "s1-begin" "s1-activate-node-1" "s2-disable-node-1" "s1-commit" "s1-show-nodes"
# activate and disable an inactive node from 2 transactions, one aborts
permutation "s1-add-inactive-1" "s1-begin" "s1-activate-node-1" "s2-disable-node-1" "s1-abort" "s1-show-nodes"
# disable an active node from 2 transactions, one aborts
permutation "s1-add-node-1" "s1-begin" "s1-disable-node-1" "s2-disable-node-1" "s1-abort" "s1-show-nodes"

View File

@ -0,0 +1,120 @@
setup
{
SELECT 1 FROM master_add_node('localhost', 57637);
SELECT * FROM master_get_active_worker_nodes() ORDER BY node_name, node_port;
}
teardown
{
DROP TABLE IF EXISTS dist_table;
SELECT master_remove_node(nodename, nodeport) FROM pg_dist_node;
}
session "s1"
step "s1-begin"
{
BEGIN;
}
step "s1-add-node-2"
{
SELECT 1 FROM master_add_node('localhost', 57638);
}
step "s1-remove-node-2"
{
SELECT * FROM master_remove_node('localhost', 57638);
}
step "s1-abort"
{
ABORT;
}
step "s1-commit"
{
COMMIT;
}
step "s1-query-table"
{
SELECT * FROM dist_table;
}
step "s1-show-nodes"
{
SELECT nodename, nodeport, isactive FROM pg_dist_node ORDER BY nodename, nodeport;
}
step "s1-show-placements"
{
SELECT
nodename, nodeport
FROM
pg_dist_shard_placement JOIN pg_dist_shard USING (shardid)
WHERE
logicalrelid = 'dist_table'::regclass
ORDER BY
nodename, nodeport;
}
session "s2"
step "s2-begin"
{
BEGIN;
}
step "s2-create-table-1"
{
SET citus.shard_count TO 4;
SET citus.shard_replication_factor TO 1;
CREATE TABLE dist_table (x int, y int);
SELECT create_distributed_table('dist_table', 'x');
}
step "s2-create-table-2"
{
SET citus.shard_count TO 4;
SET citus.shard_replication_factor TO 2;
CREATE TABLE dist_table (x int, y int);
SELECT create_distributed_table('dist_table', 'x');
}
step "s2-create-append-table"
{
SET citus.shard_replication_factor TO 1;
CREATE TABLE dist_table (x int, y int);
SELECT create_distributed_table('dist_table', 'x', 'append');
SELECT 1 FROM master_create_empty_shard('dist_table');
}
step "s2-select"
{
SELECT * FROM dist_table;
}
step "s2-commit"
{
COMMIT;
}
# session 1 adds a node, session 2 creates a distributed table
permutation "s1-begin" "s1-add-node-2" "s2-create-table-1" "s1-commit" "s1-show-placements" "s2-select"
permutation "s1-begin" "s1-add-node-2" "s2-create-table-1" "s1-abort" "s1-show-placements" "s2-select"
permutation "s2-begin" "s2-create-table-1" "s1-add-node-2" "s2-commit" "s1-show-placements" "s2-select"
# session 1 removes a node, session 2 creates a distributed table
permutation "s1-add-node-2" "s1-begin" "s1-remove-node-2" "s2-create-table-1" "s1-commit" "s1-show-placements" "s2-select"
permutation "s1-add-node-2" "s1-begin" "s1-remove-node-2" "s2-create-table-1" "s1-abort" "s1-show-placements" "s2-select"
permutation "s1-add-node-2" "s2-begin" "s2-create-table-1" "s1-remove-node-2" "s2-commit" "s1-show-placements" "s2-select"
# session 1 removes a node, session 2 creates a distributed table with replication factor 2, should throw a sane error
permutation "s1-add-node-2" "s1-begin" "s1-remove-node-2" "s2-create-table-2" "s1-commit" "s2-select"
permutation "s1-add-node-2" "s2-begin" "s2-create-table-2" "s1-remove-node-2" "s2-commit" "s2-select"
# session 1 removes a node, session 2 creates a shard in an append-distributed table
permutation "s1-add-node-2" "s1-begin" "s1-remove-node-2" "s2-create-append-table" "s1-commit" "s2-select"
permutation "s1-add-node-2" "s2-begin" "s2-create-append-table" "s1-remove-node-2" "s2-commit" "s2-select"