diff --git a/src/backend/distributed/metadata/metadata_utility.c b/src/backend/distributed/metadata/metadata_utility.c index 36152f69e..c19917ebc 100644 --- a/src/backend/distributed/metadata/metadata_utility.c +++ b/src/backend/distributed/metadata/metadata_utility.c @@ -1255,26 +1255,6 @@ ShardLength(uint64 shardId) } -/* - * NodeGroupHasLivePlacements returns true if there is any placement - * on the given node group which is not a SHARD_STATE_TO_DELETE placement. - */ -bool -NodeGroupHasLivePlacements(int32 groupId) -{ - List *shardPlacements = AllShardPlacementsOnNodeGroup(groupId); - GroupShardPlacement *placement = NULL; - foreach_ptr(placement, shardPlacements) - { - if (placement->shardState != SHARD_STATE_TO_DELETE) - { - return true; - } - } - return false; -} - - /* * NodeGroupHasShardPlacements returns whether any active shards are placed on the group */ diff --git a/src/backend/distributed/metadata/node_metadata.c b/src/backend/distributed/metadata/node_metadata.c index 2b83a319a..6d67219d0 100644 --- a/src/backend/distributed/metadata/node_metadata.c +++ b/src/backend/distributed/metadata/node_metadata.c @@ -91,8 +91,10 @@ typedef struct NodeMetadata /* local function forward declarations */ static int ActivateNode(char *nodeName, int nodePort); -static bool CanRemoveReferenceTablePlacements(void); static void RemoveNodeFromCluster(char *nodeName, int32 nodePort); +static void ErrorIfNodeContainsNonRemovablePlacements(WorkerNode *workerNode); +static bool PlacementHasActivePlacementOnAnotherGroup(GroupShardPlacement + *sourcePlacement); static int AddNodeMetadata(char *nodeName, int32 nodePort, NodeMetadata *nodeMetadata, bool *nodeAlreadyExists); static WorkerNode * SetNodeState(char *nodeName, int32 nodePort, bool isActive); @@ -1295,35 +1297,18 @@ RemoveNodeFromCluster(char *nodeName, int32 nodePort) WorkerNode *workerNode = ModifiableWorkerNode(nodeName, nodePort); if (NodeIsPrimary(workerNode)) { - if (CanRemoveReferenceTablePlacements()) - { - /* - * Delete reference table placements so they are not taken into account - * for the check if there are placements after this. - */ - DeleteAllReferenceTablePlacementsFromNodeGroup(workerNode->groupId); - } - if (NodeGroupHasLivePlacements(workerNode->groupId)) - { - if (ActivePrimaryNodeCount() == 1 && ClusterHasReferenceTable()) - { - ereport(ERROR, (errmsg( - "cannot remove the last worker node because there are reference " - "tables and it would cause data loss on reference tables"), - errhint( - "To proceed, either drop the reference tables or use " - "undistribute_table() function to convert them to local tables"))); - } - ereport(ERROR, (errmsg("cannot remove the primary node of a node group " - "which has shard placements"), - errhint( - "To proceed, either drop the distributed tables or use " - "undistribute_table() function to convert them to local tables"))); - } + ErrorIfNodeContainsNonRemovablePlacements(workerNode); + + /* + * Delete reference table placements so they are not taken into account + * for the check if there are placements after this. + */ + DeleteAllReferenceTablePlacementsFromNodeGroup(workerNode->groupId); /* * Secondary nodes are read-only, never 2PC is used. - * Hence, no items can be inserted to pg_dist_transaction for secondary nodes. + * Hence, no items can be inserted to pg_dist_transaction + * for secondary nodes. */ DeleteWorkerTransactions(workerNode); } @@ -1341,6 +1326,65 @@ RemoveNodeFromCluster(char *nodeName, int32 nodePort) } +/* + * ErrorIfNodeContainsNonRemovablePlacements throws an error if the input node + * contains at least one placement on the node that is the last active + * placement. + */ +static void +ErrorIfNodeContainsNonRemovablePlacements(WorkerNode *workerNode) +{ + int32 groupId = workerNode->groupId; + List *shardPlacements = AllShardPlacementsOnNodeGroup(groupId); + GroupShardPlacement *placement = NULL; + foreach_ptr(placement, shardPlacements) + { + if (!PlacementHasActivePlacementOnAnotherGroup(placement)) + { + Oid relationId = RelationIdForShard(placement->shardId); + char *qualifiedRelationName = generate_qualified_relation_name(relationId); + + ereport(ERROR, (errmsg("cannot remove or disable the node " + "%s:%d because because it contains " + "the only shard placement for " + "shard " UINT64_FORMAT, workerNode->workerName, + workerNode->workerPort, placement->shardId), + errdetail("One of the table(s) that prevents the operation " + "complete successfully is %s", + qualifiedRelationName), + errhint("To proceed, either drop the tables or use " + "undistribute_table() function to convert " + "them to local tables"))); + } + } +} + + +/* + * PlacementHasActivePlacementOnAnotherGroup returns true if there is at least + * one more healthy placement of the input sourcePlacement on another group. + */ +static bool +PlacementHasActivePlacementOnAnotherGroup(GroupShardPlacement *sourcePlacement) +{ + uint64 shardId = sourcePlacement->shardId; + List *activePlacementList = ActiveShardPlacementList(shardId); + + bool foundHealtyPlacementOnAnotherGroup = false; + ShardPlacement *activePlacement = NULL; + foreach_ptr(activePlacement, activePlacementList) + { + if (activePlacement->groupId != sourcePlacement->groupId) + { + foundHealtyPlacementOnAnotherGroup = true; + break; + } + } + + return foundHealtyPlacementOnAnotherGroup; +} + + /* * RemoveOldShardPlacementForNodeGroup removes all old shard placements * for the given node group from pg_dist_placement. @@ -1364,18 +1408,6 @@ RemoveOldShardPlacementForNodeGroup(int groupId) } -/* - * CanRemoveReferenceTablePlacements returns true if active primary - * node count is more than 1, which means that even if we remove a node - * we will still have some other node that has reference table placement. - */ -static bool -CanRemoveReferenceTablePlacements(void) -{ - return ActivePrimaryNodeCount() > 1; -} - - /* CountPrimariesWithMetadata returns the number of primary nodes which have metadata. */ uint32 CountPrimariesWithMetadata(void) diff --git a/src/include/distributed/metadata_utility.h b/src/include/distributed/metadata_utility.h index 51978a53a..4303901f1 100644 --- a/src/include/distributed/metadata_utility.h +++ b/src/include/distributed/metadata_utility.h @@ -207,7 +207,6 @@ extern int ShardIntervalCount(Oid relationId); extern List * LoadShardList(Oid relationId); extern ShardInterval * CopyShardInterval(ShardInterval *srcInterval); extern uint64 ShardLength(uint64 shardId); -extern bool NodeGroupHasLivePlacements(int32 groupId); extern bool NodeGroupHasShardPlacements(int32 groupId, bool onlyConsiderActivePlacements); extern List * ActiveShardPlacementListOnGroup(uint64 shardId, int32 groupId); diff --git a/src/test/regress/expected/citus_local_tables.out b/src/test/regress/expected/citus_local_tables.out index 35cdc56fd..b21d461ec 100644 --- a/src/test/regress/expected/citus_local_tables.out +++ b/src/test/regress/expected/citus_local_tables.out @@ -27,7 +27,7 @@ SELECT citus_add_local_table_to_metadata('citus_local_table_1'); -- try to remove coordinator and observe failure as there exist a citus local table SELECT 1 FROM master_remove_node('localhost', :master_port); -ERROR: cannot remove the primary node of a node group which has shard placements +ERROR: cannot remove or disable the node localhost:xxxxx because because it contains the only shard placement for shard xxxxx DROP TABLE citus_local_table_1; NOTICE: executing the command locally: DROP TABLE IF EXISTS citus_local_tables_test_schema.citus_local_table_1_xxxxx CASCADE -- this should work now as the citus local table is dropped diff --git a/src/test/regress/expected/failure_add_disable_node.out b/src/test/regress/expected/failure_add_disable_node.out index 8780575e7..8611617a3 100644 --- a/src/test/regress/expected/failure_add_disable_node.out +++ b/src/test/regress/expected/failure_add_disable_node.out @@ -122,10 +122,18 @@ ORDER BY placementid; 200000 | 1 (1 row) --- master_remove_node fails when there are shards on that worker -SELECT master_remove_node('localhost', :worker_2_proxy_port); -ERROR: cannot remove the last worker node because there are reference tables and it would cause data loss on reference tables -HINT: To proceed, either drop the reference tables or use undistribute_table() function to convert them to local tables +BEGIN; + -- master_remove_node succeeds because there are the + -- healthy placements of the shards that exists on + -- worker_2_proxy_port on the other worker (worker_1_port) + -- as well + SELECT master_remove_node('localhost', :worker_2_proxy_port); + master_remove_node +--------------------------------------------------------------------- + +(1 row) + +ROLLBACK; -- drop event table and re-run remove DROP TABLE event_table; SELECT master_remove_node('localhost', :worker_2_proxy_port); diff --git a/src/test/regress/expected/isolation_create_citus_local_table.out b/src/test/regress/expected/isolation_create_citus_local_table.out index 375bc6ffd..a7c2c8445 100644 --- a/src/test/regress/expected/isolation_create_citus_local_table.out +++ b/src/test/regress/expected/isolation_create_citus_local_table.out @@ -162,7 +162,7 @@ citus_add_local_table_to_metadata step s2-remove-coordinator: SELECT master_remove_node('localhost', 57636); step s1-commit: COMMIT; step s2-remove-coordinator: <... completed> -ERROR: cannot remove the primary node of a node group which has shard placements +ERROR: cannot remove or disable the node localhost:xxxxx because because it contains the only shard placement for shard xxxxx step s2-commit: COMMIT; master_remove_node --------------------------------------------------------------------- diff --git a/src/test/regress/expected/isolation_create_table_vs_add_remove_node.out b/src/test/regress/expected/isolation_create_table_vs_add_remove_node.out index 8ea0325e9..df2243478 100644 --- a/src/test/regress/expected/isolation_create_table_vs_add_remove_node.out +++ b/src/test/regress/expected/isolation_create_table_vs_add_remove_node.out @@ -7,10 +7,10 @@ localhost| 57637 (1 row) step s1-begin: - BEGIN; + BEGIN; step s1-add-node-2: - SELECT 1 FROM master_add_node('localhost', 57638); + SELECT 1 FROM master_add_node('localhost', 57638); ?column? --------------------------------------------------------------------- @@ -18,13 +18,13 @@ step s1-add-node-2: (1 row) step s2-create-table-1: - SET citus.shard_count TO 4; - SET citus.shard_replication_factor TO 1; - CREATE TABLE dist_table (x int, y int); - SELECT create_distributed_table('dist_table', 'x'); + SET citus.shard_count TO 4; + SET citus.shard_replication_factor TO 1; + CREATE TABLE dist_table (x int, y int); + SELECT create_distributed_table('dist_table', 'x'); step s1-commit: - COMMIT; + COMMIT; step s2-create-table-1: <... completed> create_distributed_table @@ -33,14 +33,14 @@ create_distributed_table (1 row) step s1-show-placements: - SELECT - nodename, nodeport - FROM - pg_dist_shard_placement JOIN pg_dist_shard USING (shardid) - WHERE - logicalrelid = 'dist_table'::regclass - ORDER BY - nodename, nodeport; + SELECT + nodename, nodeport + FROM + pg_dist_shard_placement JOIN pg_dist_shard USING (shardid) + WHERE + logicalrelid = 'dist_table'::regclass + ORDER BY + nodename, nodeport; nodename |nodeport --------------------------------------------------------------------- @@ -51,7 +51,7 @@ localhost| 57638 (4 rows) step s2-select: - SELECT * FROM dist_table; + SELECT * FROM dist_table; x|y --------------------------------------------------------------------- @@ -71,10 +71,10 @@ localhost| 57637 (1 row) step s1-begin: - BEGIN; + BEGIN; step s1-add-node-2: - SELECT 1 FROM master_add_node('localhost', 57638); + SELECT 1 FROM master_add_node('localhost', 57638); ?column? --------------------------------------------------------------------- @@ -82,13 +82,13 @@ step s1-add-node-2: (1 row) step s2-create-table-1: - SET citus.shard_count TO 4; - SET citus.shard_replication_factor TO 1; - CREATE TABLE dist_table (x int, y int); - SELECT create_distributed_table('dist_table', 'x'); + SET citus.shard_count TO 4; + SET citus.shard_replication_factor TO 1; + CREATE TABLE dist_table (x int, y int); + SELECT create_distributed_table('dist_table', 'x'); step s1-abort: - ABORT; + ABORT; step s2-create-table-1: <... completed> create_distributed_table @@ -97,14 +97,14 @@ create_distributed_table (1 row) step s1-show-placements: - SELECT - nodename, nodeport - FROM - pg_dist_shard_placement JOIN pg_dist_shard USING (shardid) - WHERE - logicalrelid = 'dist_table'::regclass - ORDER BY - nodename, nodeport; + SELECT + nodename, nodeport + FROM + pg_dist_shard_placement JOIN pg_dist_shard USING (shardid) + WHERE + logicalrelid = 'dist_table'::regclass + ORDER BY + nodename, nodeport; nodename |nodeport --------------------------------------------------------------------- @@ -115,7 +115,7 @@ localhost| 57637 (4 rows) step s2-select: - SELECT * FROM dist_table; + SELECT * FROM dist_table; x|y --------------------------------------------------------------------- @@ -134,13 +134,13 @@ localhost| 57637 (1 row) step s2-begin: - BEGIN; + BEGIN; step s2-create-table-1: - SET citus.shard_count TO 4; - SET citus.shard_replication_factor TO 1; - CREATE TABLE dist_table (x int, y int); - SELECT create_distributed_table('dist_table', 'x'); + SET citus.shard_count TO 4; + SET citus.shard_replication_factor TO 1; + CREATE TABLE dist_table (x int, y int); + SELECT create_distributed_table('dist_table', 'x'); create_distributed_table --------------------------------------------------------------------- @@ -148,10 +148,10 @@ create_distributed_table (1 row) step s1-add-node-2: - SELECT 1 FROM master_add_node('localhost', 57638); + SELECT 1 FROM master_add_node('localhost', 57638); step s2-commit: - COMMIT; + COMMIT; step s1-add-node-2: <... completed> ?column? @@ -160,14 +160,14 @@ step s1-add-node-2: <... completed> (1 row) step s1-show-placements: - SELECT - nodename, nodeport - FROM - pg_dist_shard_placement JOIN pg_dist_shard USING (shardid) - WHERE - logicalrelid = 'dist_table'::regclass - ORDER BY - nodename, nodeport; + SELECT + nodename, nodeport + FROM + pg_dist_shard_placement JOIN pg_dist_shard USING (shardid) + WHERE + logicalrelid = 'dist_table'::regclass + ORDER BY + nodename, nodeport; nodename |nodeport --------------------------------------------------------------------- @@ -178,7 +178,7 @@ localhost| 57637 (4 rows) step s2-select: - SELECT * FROM dist_table; + SELECT * FROM dist_table; x|y --------------------------------------------------------------------- @@ -198,7 +198,7 @@ localhost| 57637 (1 row) step s1-add-node-2: - SELECT 1 FROM master_add_node('localhost', 57638); + SELECT 1 FROM master_add_node('localhost', 57638); ?column? --------------------------------------------------------------------- @@ -206,10 +206,10 @@ step s1-add-node-2: (1 row) step s1-begin: - BEGIN; + BEGIN; step s1-remove-node-2: - SELECT * FROM master_remove_node('localhost', 57638); + SELECT * FROM master_remove_node('localhost', 57638); master_remove_node --------------------------------------------------------------------- @@ -217,13 +217,13 @@ master_remove_node (1 row) step s2-create-table-1: - SET citus.shard_count TO 4; - SET citus.shard_replication_factor TO 1; - CREATE TABLE dist_table (x int, y int); - SELECT create_distributed_table('dist_table', 'x'); + SET citus.shard_count TO 4; + SET citus.shard_replication_factor TO 1; + CREATE TABLE dist_table (x int, y int); + SELECT create_distributed_table('dist_table', 'x'); step s1-commit: - COMMIT; + COMMIT; step s2-create-table-1: <... completed> create_distributed_table @@ -232,14 +232,14 @@ create_distributed_table (1 row) step s1-show-placements: - SELECT - nodename, nodeport - FROM - pg_dist_shard_placement JOIN pg_dist_shard USING (shardid) - WHERE - logicalrelid = 'dist_table'::regclass - ORDER BY - nodename, nodeport; + SELECT + nodename, nodeport + FROM + pg_dist_shard_placement JOIN pg_dist_shard USING (shardid) + WHERE + logicalrelid = 'dist_table'::regclass + ORDER BY + nodename, nodeport; nodename |nodeport --------------------------------------------------------------------- @@ -250,7 +250,7 @@ localhost| 57637 (4 rows) step s2-select: - SELECT * FROM dist_table; + SELECT * FROM dist_table; x|y --------------------------------------------------------------------- @@ -269,7 +269,7 @@ localhost| 57637 (1 row) step s1-add-node-2: - SELECT 1 FROM master_add_node('localhost', 57638); + SELECT 1 FROM master_add_node('localhost', 57638); ?column? --------------------------------------------------------------------- @@ -277,10 +277,10 @@ step s1-add-node-2: (1 row) step s1-begin: - BEGIN; + BEGIN; step s1-remove-node-2: - SELECT * FROM master_remove_node('localhost', 57638); + SELECT * FROM master_remove_node('localhost', 57638); master_remove_node --------------------------------------------------------------------- @@ -288,13 +288,13 @@ master_remove_node (1 row) step s2-create-table-1: - SET citus.shard_count TO 4; - SET citus.shard_replication_factor TO 1; - CREATE TABLE dist_table (x int, y int); - SELECT create_distributed_table('dist_table', 'x'); + SET citus.shard_count TO 4; + SET citus.shard_replication_factor TO 1; + CREATE TABLE dist_table (x int, y int); + SELECT create_distributed_table('dist_table', 'x'); step s1-abort: - ABORT; + ABORT; step s2-create-table-1: <... completed> create_distributed_table @@ -303,14 +303,14 @@ create_distributed_table (1 row) step s1-show-placements: - SELECT - nodename, nodeport - FROM - pg_dist_shard_placement JOIN pg_dist_shard USING (shardid) - WHERE - logicalrelid = 'dist_table'::regclass - ORDER BY - nodename, nodeport; + SELECT + nodename, nodeport + FROM + pg_dist_shard_placement JOIN pg_dist_shard USING (shardid) + WHERE + logicalrelid = 'dist_table'::regclass + ORDER BY + nodename, nodeport; nodename |nodeport --------------------------------------------------------------------- @@ -321,7 +321,7 @@ localhost| 57638 (4 rows) step s2-select: - SELECT * FROM dist_table; + SELECT * FROM dist_table; x|y --------------------------------------------------------------------- @@ -341,7 +341,7 @@ localhost| 57637 (1 row) step s1-add-node-2: - SELECT 1 FROM master_add_node('localhost', 57638); + SELECT 1 FROM master_add_node('localhost', 57638); ?column? --------------------------------------------------------------------- @@ -349,13 +349,13 @@ step s1-add-node-2: (1 row) step s2-begin: - BEGIN; + BEGIN; step s2-create-table-1: - SET citus.shard_count TO 4; - SET citus.shard_replication_factor TO 1; - CREATE TABLE dist_table (x int, y int); - SELECT create_distributed_table('dist_table', 'x'); + SET citus.shard_count TO 4; + SET citus.shard_replication_factor TO 1; + CREATE TABLE dist_table (x int, y int); + SELECT create_distributed_table('dist_table', 'x'); create_distributed_table --------------------------------------------------------------------- @@ -363,22 +363,22 @@ create_distributed_table (1 row) step s1-remove-node-2: - SELECT * FROM master_remove_node('localhost', 57638); + SELECT * FROM master_remove_node('localhost', 57638); step s2-commit: - COMMIT; + COMMIT; step s1-remove-node-2: <... completed> -ERROR: cannot remove the primary node of a node group which has shard placements +ERROR: cannot remove or disable the node localhost:xxxxx because because it contains the only shard placement for shard xxxxx step s1-show-placements: - SELECT - nodename, nodeport - FROM - pg_dist_shard_placement JOIN pg_dist_shard USING (shardid) - WHERE - logicalrelid = 'dist_table'::regclass - ORDER BY - nodename, nodeport; + SELECT + nodename, nodeport + FROM + pg_dist_shard_placement JOIN pg_dist_shard USING (shardid) + WHERE + logicalrelid = 'dist_table'::regclass + ORDER BY + nodename, nodeport; nodename |nodeport --------------------------------------------------------------------- @@ -389,7 +389,7 @@ localhost| 57638 (4 rows) step s2-select: - SELECT * FROM dist_table; + SELECT * FROM dist_table; x|y --------------------------------------------------------------------- @@ -409,7 +409,7 @@ localhost| 57637 (1 row) step s1-add-node-2: - SELECT 1 FROM master_add_node('localhost', 57638); + SELECT 1 FROM master_add_node('localhost', 57638); ?column? --------------------------------------------------------------------- @@ -417,10 +417,10 @@ step s1-add-node-2: (1 row) step s1-begin: - BEGIN; + BEGIN; step s1-remove-node-2: - SELECT * FROM master_remove_node('localhost', 57638); + SELECT * FROM master_remove_node('localhost', 57638); master_remove_node --------------------------------------------------------------------- @@ -428,20 +428,27 @@ master_remove_node (1 row) step s2-create-table-2: - SET citus.shard_count TO 4; - SET citus.shard_replication_factor TO 2; - CREATE TABLE dist_table (x int, y int); - SELECT create_distributed_table('dist_table', 'x'); + SET citus.shard_count TO 4; + SET citus.shard_replication_factor TO 1; + CREATE TABLE dist_table (x int, y int); + SELECT create_distributed_table('dist_table', 'x'); step s1-commit: - COMMIT; + COMMIT; step s2-create-table-2: <... completed> -ERROR: replication_factor (2) exceeds number of worker nodes (1) -step s2-select: - SELECT * FROM dist_table; +create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +step s2-select: + SELECT * FROM dist_table; + +x|y +--------------------------------------------------------------------- +(0 rows) -ERROR: relation "dist_table" does not exist master_remove_node --------------------------------------------------------------------- @@ -455,7 +462,7 @@ localhost| 57637 (1 row) step s1-add-node-2: - SELECT 1 FROM master_add_node('localhost', 57638); + SELECT 1 FROM master_add_node('localhost', 57638); ?column? --------------------------------------------------------------------- @@ -463,13 +470,13 @@ step s1-add-node-2: (1 row) step s2-begin: - BEGIN; + BEGIN; step s2-create-table-2: - SET citus.shard_count TO 4; - SET citus.shard_replication_factor TO 2; - CREATE TABLE dist_table (x int, y int); - SELECT create_distributed_table('dist_table', 'x'); + SET citus.shard_count TO 4; + SET citus.shard_replication_factor TO 1; + CREATE TABLE dist_table (x int, y int); + SELECT create_distributed_table('dist_table', 'x'); create_distributed_table --------------------------------------------------------------------- @@ -477,15 +484,15 @@ create_distributed_table (1 row) step s1-remove-node-2: - SELECT * FROM master_remove_node('localhost', 57638); + SELECT * FROM master_remove_node('localhost', 57638); step s2-commit: - COMMIT; + COMMIT; step s1-remove-node-2: <... completed> -ERROR: cannot remove the primary node of a node group which has shard placements +ERROR: cannot remove or disable the node localhost:xxxxx because because it contains the only shard placement for shard xxxxx step s2-select: - SELECT * FROM dist_table; + SELECT * FROM dist_table; x|y --------------------------------------------------------------------- @@ -505,7 +512,7 @@ localhost| 57637 (1 row) step s1-add-node-2: - SELECT 1 FROM master_add_node('localhost', 57638); + SELECT 1 FROM master_add_node('localhost', 57638); ?column? --------------------------------------------------------------------- @@ -513,10 +520,10 @@ step s1-add-node-2: (1 row) step s1-begin: - BEGIN; + BEGIN; step s1-remove-node-2: - SELECT * FROM master_remove_node('localhost', 57638); + SELECT * FROM master_remove_node('localhost', 57638); master_remove_node --------------------------------------------------------------------- @@ -524,13 +531,13 @@ master_remove_node (1 row) step s2-create-append-table: - SET citus.shard_replication_factor TO 1; - CREATE TABLE dist_table (x int, y int); - SELECT create_distributed_table('dist_table', 'x', 'append'); - SELECT 1 FROM master_create_empty_shard('dist_table'); + SET citus.shard_replication_factor TO 1; + CREATE TABLE dist_table (x int, y int); + SELECT create_distributed_table('dist_table', 'x', 'append'); + SELECT 1 FROM master_create_empty_shard('dist_table'); step s1-commit: - COMMIT; + COMMIT; step s2-create-append-table: <... completed> create_distributed_table @@ -544,7 +551,7 @@ create_distributed_table (1 row) step s2-select: - SELECT * FROM dist_table; + SELECT * FROM dist_table; x|y --------------------------------------------------------------------- @@ -563,7 +570,7 @@ localhost| 57637 (1 row) step s1-add-node-2: - SELECT 1 FROM master_add_node('localhost', 57638); + SELECT 1 FROM master_add_node('localhost', 57638); ?column? --------------------------------------------------------------------- @@ -571,13 +578,13 @@ step s1-add-node-2: (1 row) step s2-begin: - BEGIN; + BEGIN; step s2-create-append-table: - SET citus.shard_replication_factor TO 1; - CREATE TABLE dist_table (x int, y int); - SELECT create_distributed_table('dist_table', 'x', 'append'); - SELECT 1 FROM master_create_empty_shard('dist_table'); + SET citus.shard_replication_factor TO 1; + CREATE TABLE dist_table (x int, y int); + SELECT create_distributed_table('dist_table', 'x', 'append'); + SELECT 1 FROM master_create_empty_shard('dist_table'); create_distributed_table --------------------------------------------------------------------- @@ -590,15 +597,15 @@ create_distributed_table (1 row) step s1-remove-node-2: - SELECT * FROM master_remove_node('localhost', 57638); + SELECT * FROM master_remove_node('localhost', 57638); step s2-commit: - COMMIT; + COMMIT; step s1-remove-node-2: <... completed> -ERROR: cannot remove the primary node of a node group which has shard placements +ERROR: cannot remove or disable the node localhost:xxxxx because because it contains the only shard placement for shard xxxxx step s2-select: - SELECT * FROM dist_table; + SELECT * FROM dist_table; x|y --------------------------------------------------------------------- diff --git a/src/test/regress/expected/isolation_dump_global_wait_edges.out b/src/test/regress/expected/isolation_dump_global_wait_edges.out index e71a3085d..bb0e22a7f 100644 --- a/src/test/regress/expected/isolation_dump_global_wait_edges.out +++ b/src/test/regress/expected/isolation_dump_global_wait_edges.out @@ -28,13 +28,13 @@ step detector-dump-wait-edges: waiting_transaction_num|blocking_transaction_num|blocking_transaction_waiting --------------------------------------------------------------------- - 405| 404|f + 406| 405|f (1 row) transactionnumber|waitingtransactionnumbers --------------------------------------------------------------------- - 404| - 405| 404 + 405| + 406| 405 (2 rows) step s1-abort: @@ -84,16 +84,16 @@ step detector-dump-wait-edges: waiting_transaction_num|blocking_transaction_num|blocking_transaction_waiting --------------------------------------------------------------------- - 409| 408|f - 410| 408|f - 410| 409|t + 410| 409|f + 411| 409|f + 411| 410|t (3 rows) transactionnumber|waitingtransactionnumbers --------------------------------------------------------------------- - 408| - 409|408 - 410|408,409 + 409| + 410|409 + 411|409,410 (3 rows) step s1-abort: diff --git a/src/test/regress/expected/multi_cluster_management.out b/src/test/regress/expected/multi_cluster_management.out index d65699f06..e219d293f 100644 --- a/src/test/regress/expected/multi_cluster_management.out +++ b/src/test/regress/expected/multi_cluster_management.out @@ -134,8 +134,9 @@ SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement WHER -- try to remove a node with active placements and see that node removal is failed SELECT master_remove_node('localhost', :worker_2_port); -ERROR: cannot remove the primary node of a node group which has shard placements -HINT: To proceed, either drop the distributed tables or use undistribute_table() function to convert them to local tables +ERROR: cannot remove or disable the node localhost:xxxxx because because it contains the only shard placement for shard xxxxx +DETAIL: One of the table(s) that prevents the operation complete successfully is public.cluster_management_test +HINT: To proceed, either drop the tables or use undistribute_table() function to convert them to local tables SELECT master_get_active_worker_nodes(); master_get_active_worker_nodes --------------------------------------------------------------------- @@ -154,8 +155,9 @@ SELECT create_reference_table('test_reference_table'); INSERT INTO test_reference_table VALUES (1, '1'); -- try to remove a node with active placements and reference tables SELECT citus_remove_node('localhost', :worker_2_port); -ERROR: cannot remove the primary node of a node group which has shard placements -HINT: To proceed, either drop the distributed tables or use undistribute_table() function to convert them to local tables +ERROR: cannot remove or disable the node localhost:xxxxx because because it contains the only shard placement for shard xxxxx +DETAIL: One of the table(s) that prevents the operation complete successfully is public.cluster_management_test +HINT: To proceed, either drop the tables or use undistribute_table() function to convert them to local tables -- try to disable a node with active placements see that node is removed -- observe that a notification is displayed SELECT master_disable_node('localhost', :worker_2_port); @@ -335,8 +337,9 @@ SELECT create_distributed_table('cluster_management_test', 'col_1', 'hash'); -- try to remove a node with active placements and see that node removal is failed SELECT master_remove_node('localhost', :worker_2_port); -ERROR: cannot remove the primary node of a node group which has shard placements -HINT: To proceed, either drop the distributed tables or use undistribute_table() function to convert them to local tables +ERROR: cannot remove or disable the node localhost:xxxxx because because it contains the only shard placement for shard xxxxx +DETAIL: One of the table(s) that prevents the operation complete successfully is public.cluster_management_test +HINT: To proceed, either drop the tables or use undistribute_table() function to convert them to local tables -- mark all placements in the candidate node as inactive SELECT groupid AS worker_2_group FROM pg_dist_node WHERE nodeport=:worker_2_port \gset UPDATE pg_dist_placement SET shardstate=3 WHERE groupid=:worker_2_group; @@ -363,8 +366,9 @@ SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement WHER -- try to remove a node with only inactive placements and see that removal still fails SELECT master_remove_node('localhost', :worker_2_port); -ERROR: cannot remove the primary node of a node group which has shard placements -HINT: To proceed, either drop the distributed tables or use undistribute_table() function to convert them to local tables +ERROR: cannot remove or disable the node localhost:xxxxx because because it contains the only shard placement for shard xxxxx +DETAIL: One of the table(s) that prevents the operation complete successfully is public.cluster_management_test +HINT: To proceed, either drop the tables or use undistribute_table() function to convert them to local tables SELECT master_get_active_worker_nodes(); master_get_active_worker_nodes --------------------------------------------------------------------- @@ -440,68 +444,6 @@ SELECT logicalrelid, shardid, shardstate, nodename, nodeport FROM pg_dist_shard_ cluster_management_test | 1220015 | 4 | localhost | 57638 (24 rows) -SELECT * INTO removed_placements FROM pg_dist_placement WHERE shardstate = 4; -SELECT run_command_on_workers('SELECT * INTO removed_placements FROM pg_dist_placement WHERE shardstate = 4'); - run_command_on_workers ---------------------------------------------------------------------- - (localhost,57637,t,"SELECT 8") - (localhost,57638,t,"SELECT 8") -(2 rows) - --- try to remove a node with only to be deleted placements and see that removal succeeds -SELECT master_remove_node('localhost', :worker_2_port); - master_remove_node ---------------------------------------------------------------------- - -(1 row) - -SELECT master_get_active_worker_nodes(); - master_get_active_worker_nodes ---------------------------------------------------------------------- - (localhost,57637) -(1 row) - -SELECT master_add_node('localhost', :worker_2_port, groupId := :worker_2_group); -WARNING: citus.enable_object_propagation is off, not creating distributed objects on worker -DETAIL: distributed objects are only kept in sync when citus.enable_object_propagation is set to on. Newly activated nodes will not get these objects created -WARNING: could not find any shard placements for shardId 1220001 -WARNING: could not find any shard placements for shardId 1220003 -WARNING: could not find any shard placements for shardId 1220005 -WARNING: could not find any shard placements for shardId 1220007 -WARNING: could not find any shard placements for shardId 1220009 -WARNING: could not find any shard placements for shardId 1220011 -WARNING: could not find any shard placements for shardId 1220013 -WARNING: could not find any shard placements for shardId 1220015 -WARNING: could not find any shard placements for shardId 1220017 -WARNING: could not find any shard placements for shardId 1220019 -WARNING: could not find any shard placements for shardId 1220021 -WARNING: could not find any shard placements for shardId 1220023 -WARNING: could not find any shard placements for shardId 1220025 -WARNING: could not find any shard placements for shardId 1220027 -WARNING: could not find any shard placements for shardId 1220029 -WARNING: could not find any shard placements for shardId 1220031 - master_add_node ---------------------------------------------------------------------- - 7 -(1 row) - --- put removed placements back for testing purposes(in practice we wouldn't have only old placements for a shard) -INSERT INTO pg_dist_placement SELECT * FROM removed_placements; -SELECT run_command_on_workers('INSERT INTO pg_dist_placement SELECT * FROM removed_placements'); - run_command_on_workers ---------------------------------------------------------------------- - (localhost,57637,f,"ERROR: duplicate key value violates unique constraint ""pg_dist_placement_placementid_index""") - (localhost,57638,t,"INSERT 0 8") -(2 rows) - -DROP TABLE removed_placements; -SELECT run_command_on_workers('DROP TABLE removed_placements'); - run_command_on_workers ---------------------------------------------------------------------- - (localhost,57637,t,"DROP TABLE") - (localhost,57638,t,"DROP TABLE") -(2 rows) - -- clean-up SELECT 1 FROM master_add_node('localhost', :worker_2_port); ?column? @@ -572,8 +514,9 @@ SELECT 1 FROM master_add_node('localhost', 9990, groupid => :new_group, noderole (1 row) SELECT master_remove_node('localhost', :worker_2_port); -ERROR: cannot remove the primary node of a node group which has shard placements -HINT: To proceed, either drop the distributed tables or use undistribute_table() function to convert them to local tables +ERROR: cannot remove or disable the node localhost:xxxxx because because it contains the only shard placement for shard xxxxx +DETAIL: One of the table(s) that prevents the operation complete successfully is public.cluster_management_test +HINT: To proceed, either drop the tables or use undistribute_table() function to convert them to local tables SELECT master_remove_node('localhost', 9990); master_remove_node --------------------------------------------------------------------- @@ -674,14 +617,14 @@ WARNING: citus.enable_object_propagation is off, not creating distributed objec DETAIL: distributed objects are only kept in sync when citus.enable_object_propagation is set to on. Newly activated nodes will not get these objects created master_add_node | master_add_node --------------------------------------------------------------------- - 12 | 13 + 11 | 12 (1 row) SELECT * FROM pg_dist_node ORDER BY nodeid; nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards --------------------------------------------------------------------- - 12 | 9 | localhost | 57637 | default | t | t | primary | default | t | t - 13 | 10 | localhost | 57638 | default | t | t | primary | default | t | t + 11 | 9 | localhost | 57637 | default | t | t | primary | default | t | t + 12 | 10 | localhost | 57638 | default | t | t | primary | default | t | t (2 rows) -- check that mixed add/remove node commands work fine inside transaction @@ -881,13 +824,13 @@ SELECT 1 FROM master_add_inactive_node('localhost', 9996, groupid => :worker_2_g SELECT master_add_inactive_node('localhost', 9999, groupid => :worker_2_group, nodecluster => 'olap', noderole => 'secondary'); master_add_inactive_node --------------------------------------------------------------------- - 23 + 22 (1 row) SELECT master_activate_node('localhost', 9999); master_activate_node --------------------------------------------------------------------- - 23 + 22 (1 row) SELECT master_disable_node('localhost', 9999); @@ -915,17 +858,17 @@ CONTEXT: PL/pgSQL function citus_internal.pg_dist_node_trigger_func() line XX a INSERT INTO pg_dist_node (nodename, nodeport, groupid, noderole, nodecluster) VALUES ('localhost', 5000, 1000, 'primary', 'olap'); ERROR: new row for relation "pg_dist_node" violates check constraint "primaries_are_only_allowed_in_the_default_cluster" -DETAIL: Failing row contains (25, 1000, localhost, 5000, default, f, t, primary, olap, f, t). +DETAIL: Failing row contains (24, 1000, localhost, 5000, default, f, t, primary, olap, f, t). UPDATE pg_dist_node SET nodecluster = 'olap' WHERE nodeport = :worker_1_port; ERROR: new row for relation "pg_dist_node" violates check constraint "primaries_are_only_allowed_in_the_default_cluster" -DETAIL: Failing row contains (17, 14, localhost, 57637, default, f, t, primary, olap, f, t). +DETAIL: Failing row contains (16, 14, localhost, 57637, default, f, t, primary, olap, f, t). -- check that you /can/ add a secondary node to a non-default cluster SELECT groupid AS worker_2_group FROM pg_dist_node WHERE nodeport = :worker_2_port \gset SELECT master_add_node('localhost', 8888, groupid => :worker_1_group, noderole => 'secondary', nodecluster=> 'olap'); master_add_node --------------------------------------------------------------------- - 26 + 25 (1 row) -- check that super-long cluster names are truncated @@ -938,13 +881,13 @@ SELECT master_add_node('localhost', 8887, groupid => :worker_1_group, noderole = ); master_add_node --------------------------------------------------------------------- - 27 + 26 (1 row) SELECT * FROM pg_dist_node WHERE nodeport=8887; nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards --------------------------------------------------------------------- - 27 | 14 | localhost | 8887 | default | f | t | secondary | thisisasixtyfourcharacterstringrepeatedfourtimestomake256chars. | f | t + 26 | 14 | localhost | 8887 | default | f | t | secondary | thisisasixtyfourcharacterstringrepeatedfourtimestomake256chars. | f | t (1 row) -- don't remove the secondary and unavailable nodes, check that no commands are sent to @@ -953,13 +896,13 @@ SELECT * FROM pg_dist_node WHERE nodeport=8887; SELECT master_add_secondary_node('localhost', 9995, 'localhost', :worker_1_port); master_add_secondary_node --------------------------------------------------------------------- - 28 + 27 (1 row) SELECT master_add_secondary_node('localhost', 9994, primaryname => 'localhost', primaryport => :worker_2_port); master_add_secondary_node --------------------------------------------------------------------- - 29 + 28 (1 row) SELECT master_add_secondary_node('localhost', 9993, 'localhost', 2000); @@ -967,7 +910,7 @@ ERROR: node at "localhost:xxxxx" does not exist SELECT master_add_secondary_node('localhost', 9992, 'localhost', :worker_1_port, nodecluster => 'second-cluster'); master_add_secondary_node --------------------------------------------------------------------- - 30 + 29 (1 row) SELECT nodeid AS worker_1_node FROM pg_dist_node WHERE nodeport=:worker_1_port \gset @@ -987,7 +930,7 @@ SELECT master_update_node(:worker_1_node, 'somehost', 9000); SELECT * FROM pg_dist_node WHERE nodeid = :worker_1_node; nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards --------------------------------------------------------------------- - 17 | 14 | somehost | 9000 | default | f | t | primary | default | f | t + 16 | 14 | somehost | 9000 | default | f | t | primary | default | f | t (1 row) -- cleanup @@ -1000,7 +943,7 @@ SELECT master_update_node(:worker_1_node, 'localhost', :worker_1_port); SELECT * FROM pg_dist_node WHERE nodeid = :worker_1_node; nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards --------------------------------------------------------------------- - 17 | 14 | localhost | 57637 | default | f | t | primary | default | f | t + 16 | 14 | localhost | 57637 | default | f | t | primary | default | f | t (1 row) SET client_min_messages TO ERROR; diff --git a/src/test/regress/expected/multi_metadata_sync.out b/src/test/regress/expected/multi_metadata_sync.out index 1fa8a4816..79e3c1f5c 100644 --- a/src/test/regress/expected/multi_metadata_sync.out +++ b/src/test/regress/expected/multi_metadata_sync.out @@ -273,7 +273,7 @@ SELECT * FROM pg_dist_node ORDER BY nodeid; (4 rows) SELECT * FROM pg_dist_partition WHERE logicalrelid::text LIKE 'mx_testing_schema%' ORDER BY logicalrelid; - logicalrelid | partmethod | partkey | colocationid | repmodel | autoconverted + logicalrelid | partmethod | partkey | colocationid | repmodel | autoconverted --------------------------------------------------------------------- mx_testing_schema.mx_test_table | h | {VAR :varno 1 :varattno 1 :vartype 23 :vartypmod -1 :varcollid 0 :varlevelsup 0 :varnoold 1 :varoattno 1 :location -1} | 0 | s | f (1 row) @@ -410,7 +410,7 @@ SELECT * FROM pg_dist_node ORDER BY nodeid; (4 rows) SELECT * FROM pg_dist_partition WHERE logicalrelid::text LIKE 'mx_testing_schema%' ORDER BY logicalrelid; - logicalrelid | partmethod | partkey | colocationid | repmodel | autoconverted + logicalrelid | partmethod | partkey | colocationid | repmodel | autoconverted --------------------------------------------------------------------- mx_testing_schema.mx_test_table | h | {VAR :varno 1 :varattno 1 :vartype 23 :vartypmod -1 :varcollid 0 :varlevelsup 0 :varnoold 1 :varoattno 1 :location -1} | 0 | s | f (1 row) @@ -1128,9 +1128,9 @@ INSERT INTO mx_table_with_sequence VALUES (2), (4); \c - - - :master_port -- check our small sequence values SELECT a, b, c FROM mx_table_with_small_sequence ORDER BY a,b,c; - a | b | c + a | b | c --------------------------------------------------------------------- - 0 | 1 | 1 + 0 | 1 | 1 (1 row) --check our bigint sequence values @@ -1594,11 +1594,13 @@ ERROR: Disabling localhost:xxxxx failed DETAIL: localhost:xxxxx is a metadata node, but is out of sync HINT: If you are using MX, try stop_metadata_sync_to_node(hostname, port) for nodes that are down before disabling them. SELECT master_remove_node('localhost', :worker_1_port); -ERROR: localhost:xxxxx is a metadata node, but is out of sync -HINT: If the node is up, wait until metadata gets synced to it and try again. +ERROR: cannot remove or disable the node localhost:xxxxx because because it contains the only shard placement for shard xxxxx +DETAIL: One of the table(s) that prevents the operation complete successfully is mx_testing_schema.mx_test_table +HINT: To proceed, either drop the tables or use undistribute_table() function to convert them to local tables SELECT master_remove_node('localhost', :worker_2_port); -ERROR: localhost:xxxxx is a metadata node, but is out of sync -HINT: If the node is up, wait until metadata gets synced to it and try again. +ERROR: cannot remove or disable the node localhost:xxxxx because because it contains the only shard placement for shard xxxxx +DETAIL: One of the table(s) that prevents the operation complete successfully is mx_testing_schema.mx_test_table +HINT: To proceed, either drop the tables or use undistribute_table() function to convert them to local tables -- master_update_node should succeed SELECT nodeid AS worker_2_nodeid FROM pg_dist_node WHERE nodeport=:worker_2_port \gset SELECT master_update_node(:worker_2_nodeid, 'localhost', 4444); diff --git a/src/test/regress/expected/multi_remove_node_reference_table.out b/src/test/regress/expected/multi_remove_node_reference_table.out index 6b5c1b35d..c67693aab 100644 --- a/src/test/regress/expected/multi_remove_node_reference_table.out +++ b/src/test/regress/expected/multi_remove_node_reference_table.out @@ -214,8 +214,9 @@ WHERE colocationid IN (1 row) SELECT master_remove_node('localhost', :worker_1_port); -ERROR: cannot remove the last worker node because there are reference tables and it would cause data loss on reference tables -HINT: To proceed, either drop the reference tables or use undistribute_table() function to convert them to local tables +ERROR: cannot remove or disable the node localhost:xxxxx because because it contains the only shard placement for shard xxxxx +DETAIL: One of the table(s) that prevents the operation complete successfully is public.remove_node_reference_table +HINT: To proceed, either drop the tables or use undistribute_table() function to convert them to local tables \c - - - :worker_1_port SELECT COUNT(*) FROM pg_dist_node WHERE nodeport = :worker_2_port; count diff --git a/src/test/regress/expected/single_node.out b/src/test/regress/expected/single_node.out index d23eb5600..fbc2e079b 100644 --- a/src/test/regress/expected/single_node.out +++ b/src/test/regress/expected/single_node.out @@ -2118,8 +2118,9 @@ SELECT pg_reload_conf(); SET client_min_messages TO error; -- cannot remove coordinator since a reference table exists on coordinator and no other worker nodes are added SELECT 1 FROM master_remove_node('localhost', :master_port); -ERROR: cannot remove the last worker node because there are reference tables and it would cause data loss on reference tables -HINT: To proceed, either drop the reference tables or use undistribute_table() function to convert them to local tables +ERROR: cannot remove or disable the node localhost:xxxxx because because it contains the only shard placement for shard xxxxx +DETAIL: One of the table(s) that prevents the operation complete successfully is single_node.ref +HINT: To proceed, either drop the tables or use undistribute_table() function to convert them to local tables -- Cleanup DROP SCHEMA single_node CASCADE; -- Remove the coordinator again diff --git a/src/test/regress/expected/start_stop_metadata_sync.out b/src/test/regress/expected/start_stop_metadata_sync.out index c9f7def08..40582d2db 100644 --- a/src/test/regress/expected/start_stop_metadata_sync.out +++ b/src/test/regress/expected/start_stop_metadata_sync.out @@ -156,7 +156,7 @@ SELECT * FROM test_matview; (1 row) SELECT * FROM pg_dist_partition WHERE logicalrelid::text LIKE 'events%' ORDER BY logicalrelid::text; - logicalrelid | partmethod | partkey | colocationid | repmodel | autoconverted + logicalrelid | partmethod | partkey | colocationid | repmodel | autoconverted --------------------------------------------------------------------- events | h | {VAR :varno 1 :varattno 1 :vartype 1184 :vartypmod -1 :varcollid 0 :varlevelsup 0 :varnoold 1 :varoattno 1 :location -1} | 1390012 | s | f events_2021_feb | h | {VAR :varno 1 :varattno 1 :vartype 1184 :vartypmod -1 :varcollid 0 :varlevelsup 0 :varnoold 1 :varoattno 1 :location -1} | 1390012 | s | f @@ -433,7 +433,7 @@ SET client_min_messages TO ERROR; SELECT citus_activate_node('localhost', :worker_1_port); citus_activate_node --------------------------------------------------------------------- - 17 + 16 (1 row) \c - - - :worker_2_port diff --git a/src/test/regress/spec/isolation_create_table_vs_add_remove_node.spec b/src/test/regress/spec/isolation_create_table_vs_add_remove_node.spec index 2f91ae1a7..dbafd666b 100644 --- a/src/test/regress/spec/isolation_create_table_vs_add_remove_node.spec +++ b/src/test/regress/spec/isolation_create_table_vs_add_remove_node.spec @@ -68,7 +68,7 @@ step "s2-create-table-1" step "s2-create-table-2" { SET citus.shard_count TO 4; - SET citus.shard_replication_factor TO 2; + SET citus.shard_replication_factor TO 1; CREATE TABLE dist_table (x int, y int); SELECT create_distributed_table('dist_table', 'x'); } @@ -101,7 +101,7 @@ permutation "s1-add-node-2" "s1-begin" "s1-remove-node-2" "s2-create-table-1" "s permutation "s1-add-node-2" "s1-begin" "s1-remove-node-2" "s2-create-table-1" "s1-abort" "s1-show-placements" "s2-select" permutation "s1-add-node-2" "s2-begin" "s2-create-table-1" "s1-remove-node-2" "s2-commit" "s1-show-placements" "s2-select" -// session 1 removes a node, session 2 creates a distributed table with replication factor 2, should throw a sane error +// session 1 removes a node, session 2 creates a distributed table with replication factor 1, should throw a sane error permutation "s1-add-node-2" "s1-begin" "s1-remove-node-2" "s2-create-table-2" "s1-commit" "s2-select" permutation "s1-add-node-2" "s2-begin" "s2-create-table-2" "s1-remove-node-2" "s2-commit" "s2-select" diff --git a/src/test/regress/sql/failure_add_disable_node.sql b/src/test/regress/sql/failure_add_disable_node.sql index eff908ae2..2d66bf42f 100644 --- a/src/test/regress/sql/failure_add_disable_node.sql +++ b/src/test/regress/sql/failure_add_disable_node.sql @@ -63,8 +63,13 @@ FROM pg_dist_placement p JOIN pg_dist_shard s USING (shardid) WHERE s.logicalrelid = 'user_table'::regclass ORDER BY placementid; --- master_remove_node fails when there are shards on that worker -SELECT master_remove_node('localhost', :worker_2_proxy_port); +BEGIN; + -- master_remove_node succeeds because there are the + -- healthy placements of the shards that exists on + -- worker_2_proxy_port on the other worker (worker_1_port) + -- as well + SELECT master_remove_node('localhost', :worker_2_proxy_port); +ROLLBACK; -- drop event table and re-run remove DROP TABLE event_table; diff --git a/src/test/regress/sql/multi_cluster_management.sql b/src/test/regress/sql/multi_cluster_management.sql index 0f7d10640..4c02a985d 100644 --- a/src/test/regress/sql/multi_cluster_management.sql +++ b/src/test/regress/sql/multi_cluster_management.sql @@ -168,20 +168,6 @@ SELECT create_distributed_table('cluster_management_test_colocated', 'col_1', 'h -- Check that colocated shards don't get created for shards that are to be deleted SELECT logicalrelid, shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement NATURAL JOIN pg_dist_shard ORDER BY shardstate, shardid; -SELECT * INTO removed_placements FROM pg_dist_placement WHERE shardstate = 4; -SELECT run_command_on_workers('SELECT * INTO removed_placements FROM pg_dist_placement WHERE shardstate = 4'); --- try to remove a node with only to be deleted placements and see that removal succeeds -SELECT master_remove_node('localhost', :worker_2_port); -SELECT master_get_active_worker_nodes(); - -SELECT master_add_node('localhost', :worker_2_port, groupId := :worker_2_group); --- put removed placements back for testing purposes(in practice we wouldn't have only old placements for a shard) -INSERT INTO pg_dist_placement SELECT * FROM removed_placements; -SELECT run_command_on_workers('INSERT INTO pg_dist_placement SELECT * FROM removed_placements'); - -DROP TABLE removed_placements; -SELECT run_command_on_workers('DROP TABLE removed_placements'); - -- clean-up SELECT 1 FROM master_add_node('localhost', :worker_2_port); UPDATE pg_dist_placement SET shardstate=1 WHERE groupid=:worker_2_group;