mirror of https://github.com/citusdata/citus.git
Merge pull request #5469 from citusdata/make_errors_generic
Generalize the error checks while removing nodepull/5493/head
commit
7b6588fec0
|
@ -1255,26 +1255,6 @@ ShardLength(uint64 shardId)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* NodeGroupHasLivePlacements returns true if there is any placement
|
||||
* on the given node group which is not a SHARD_STATE_TO_DELETE placement.
|
||||
*/
|
||||
bool
|
||||
NodeGroupHasLivePlacements(int32 groupId)
|
||||
{
|
||||
List *shardPlacements = AllShardPlacementsOnNodeGroup(groupId);
|
||||
GroupShardPlacement *placement = NULL;
|
||||
foreach_ptr(placement, shardPlacements)
|
||||
{
|
||||
if (placement->shardState != SHARD_STATE_TO_DELETE)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* NodeGroupHasShardPlacements returns whether any active shards are placed on the group
|
||||
*/
|
||||
|
|
|
@ -91,8 +91,10 @@ typedef struct NodeMetadata
|
|||
|
||||
/* local function forward declarations */
|
||||
static int ActivateNode(char *nodeName, int nodePort);
|
||||
static bool CanRemoveReferenceTablePlacements(void);
|
||||
static void RemoveNodeFromCluster(char *nodeName, int32 nodePort);
|
||||
static void ErrorIfNodeContainsNonRemovablePlacements(WorkerNode *workerNode);
|
||||
static bool PlacementHasActivePlacementOnAnotherGroup(GroupShardPlacement
|
||||
*sourcePlacement);
|
||||
static int AddNodeMetadata(char *nodeName, int32 nodePort, NodeMetadata
|
||||
*nodeMetadata, bool *nodeAlreadyExists);
|
||||
static WorkerNode * SetNodeState(char *nodeName, int32 nodePort, bool isActive);
|
||||
|
@ -1295,35 +1297,18 @@ RemoveNodeFromCluster(char *nodeName, int32 nodePort)
|
|||
WorkerNode *workerNode = ModifiableWorkerNode(nodeName, nodePort);
|
||||
if (NodeIsPrimary(workerNode))
|
||||
{
|
||||
if (CanRemoveReferenceTablePlacements())
|
||||
{
|
||||
ErrorIfNodeContainsNonRemovablePlacements(workerNode);
|
||||
|
||||
/*
|
||||
* Delete reference table placements so they are not taken into account
|
||||
* for the check if there are placements after this.
|
||||
*/
|
||||
DeleteAllReferenceTablePlacementsFromNodeGroup(workerNode->groupId);
|
||||
}
|
||||
if (NodeGroupHasLivePlacements(workerNode->groupId))
|
||||
{
|
||||
if (ActivePrimaryNodeCount() == 1 && ClusterHasReferenceTable())
|
||||
{
|
||||
ereport(ERROR, (errmsg(
|
||||
"cannot remove the last worker node because there are reference "
|
||||
"tables and it would cause data loss on reference tables"),
|
||||
errhint(
|
||||
"To proceed, either drop the reference tables or use "
|
||||
"undistribute_table() function to convert them to local tables")));
|
||||
}
|
||||
ereport(ERROR, (errmsg("cannot remove the primary node of a node group "
|
||||
"which has shard placements"),
|
||||
errhint(
|
||||
"To proceed, either drop the distributed tables or use "
|
||||
"undistribute_table() function to convert them to local tables")));
|
||||
}
|
||||
|
||||
/*
|
||||
* Secondary nodes are read-only, never 2PC is used.
|
||||
* Hence, no items can be inserted to pg_dist_transaction for secondary nodes.
|
||||
* Hence, no items can be inserted to pg_dist_transaction
|
||||
* for secondary nodes.
|
||||
*/
|
||||
DeleteWorkerTransactions(workerNode);
|
||||
}
|
||||
|
@ -1341,6 +1326,65 @@ RemoveNodeFromCluster(char *nodeName, int32 nodePort)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* ErrorIfNodeContainsNonRemovablePlacements throws an error if the input node
|
||||
* contains at least one placement on the node that is the last active
|
||||
* placement.
|
||||
*/
|
||||
static void
|
||||
ErrorIfNodeContainsNonRemovablePlacements(WorkerNode *workerNode)
|
||||
{
|
||||
int32 groupId = workerNode->groupId;
|
||||
List *shardPlacements = AllShardPlacementsOnNodeGroup(groupId);
|
||||
GroupShardPlacement *placement = NULL;
|
||||
foreach_ptr(placement, shardPlacements)
|
||||
{
|
||||
if (!PlacementHasActivePlacementOnAnotherGroup(placement))
|
||||
{
|
||||
Oid relationId = RelationIdForShard(placement->shardId);
|
||||
char *qualifiedRelationName = generate_qualified_relation_name(relationId);
|
||||
|
||||
ereport(ERROR, (errmsg("cannot remove or disable the node "
|
||||
"%s:%d because because it contains "
|
||||
"the only shard placement for "
|
||||
"shard " UINT64_FORMAT, workerNode->workerName,
|
||||
workerNode->workerPort, placement->shardId),
|
||||
errdetail("One of the table(s) that prevents the operation "
|
||||
"complete successfully is %s",
|
||||
qualifiedRelationName),
|
||||
errhint("To proceed, either drop the tables or use "
|
||||
"undistribute_table() function to convert "
|
||||
"them to local tables")));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* PlacementHasActivePlacementOnAnotherGroup returns true if there is at least
|
||||
* one more healthy placement of the input sourcePlacement on another group.
|
||||
*/
|
||||
static bool
|
||||
PlacementHasActivePlacementOnAnotherGroup(GroupShardPlacement *sourcePlacement)
|
||||
{
|
||||
uint64 shardId = sourcePlacement->shardId;
|
||||
List *activePlacementList = ActiveShardPlacementList(shardId);
|
||||
|
||||
bool foundHealtyPlacementOnAnotherGroup = false;
|
||||
ShardPlacement *activePlacement = NULL;
|
||||
foreach_ptr(activePlacement, activePlacementList)
|
||||
{
|
||||
if (activePlacement->groupId != sourcePlacement->groupId)
|
||||
{
|
||||
foundHealtyPlacementOnAnotherGroup = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return foundHealtyPlacementOnAnotherGroup;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* RemoveOldShardPlacementForNodeGroup removes all old shard placements
|
||||
* for the given node group from pg_dist_placement.
|
||||
|
@ -1364,18 +1408,6 @@ RemoveOldShardPlacementForNodeGroup(int groupId)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* CanRemoveReferenceTablePlacements returns true if active primary
|
||||
* node count is more than 1, which means that even if we remove a node
|
||||
* we will still have some other node that has reference table placement.
|
||||
*/
|
||||
static bool
|
||||
CanRemoveReferenceTablePlacements(void)
|
||||
{
|
||||
return ActivePrimaryNodeCount() > 1;
|
||||
}
|
||||
|
||||
|
||||
/* CountPrimariesWithMetadata returns the number of primary nodes which have metadata. */
|
||||
uint32
|
||||
CountPrimariesWithMetadata(void)
|
||||
|
|
|
@ -207,7 +207,6 @@ extern int ShardIntervalCount(Oid relationId);
|
|||
extern List * LoadShardList(Oid relationId);
|
||||
extern ShardInterval * CopyShardInterval(ShardInterval *srcInterval);
|
||||
extern uint64 ShardLength(uint64 shardId);
|
||||
extern bool NodeGroupHasLivePlacements(int32 groupId);
|
||||
extern bool NodeGroupHasShardPlacements(int32 groupId,
|
||||
bool onlyConsiderActivePlacements);
|
||||
extern List * ActiveShardPlacementListOnGroup(uint64 shardId, int32 groupId);
|
||||
|
|
|
@ -27,7 +27,7 @@ SELECT citus_add_local_table_to_metadata('citus_local_table_1');
|
|||
|
||||
-- try to remove coordinator and observe failure as there exist a citus local table
|
||||
SELECT 1 FROM master_remove_node('localhost', :master_port);
|
||||
ERROR: cannot remove the primary node of a node group which has shard placements
|
||||
ERROR: cannot remove or disable the node localhost:xxxxx because because it contains the only shard placement for shard xxxxx
|
||||
DROP TABLE citus_local_table_1;
|
||||
NOTICE: executing the command locally: DROP TABLE IF EXISTS citus_local_tables_test_schema.citus_local_table_1_xxxxx CASCADE
|
||||
-- this should work now as the citus local table is dropped
|
||||
|
|
|
@ -122,10 +122,18 @@ ORDER BY placementid;
|
|||
200000 | 1
|
||||
(1 row)
|
||||
|
||||
-- master_remove_node fails when there are shards on that worker
|
||||
SELECT master_remove_node('localhost', :worker_2_proxy_port);
|
||||
ERROR: cannot remove the last worker node because there are reference tables and it would cause data loss on reference tables
|
||||
HINT: To proceed, either drop the reference tables or use undistribute_table() function to convert them to local tables
|
||||
BEGIN;
|
||||
-- master_remove_node succeeds because there are the
|
||||
-- healthy placements of the shards that exists on
|
||||
-- worker_2_proxy_port on the other worker (worker_1_port)
|
||||
-- as well
|
||||
SELECT master_remove_node('localhost', :worker_2_proxy_port);
|
||||
master_remove_node
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
ROLLBACK;
|
||||
-- drop event table and re-run remove
|
||||
DROP TABLE event_table;
|
||||
SELECT master_remove_node('localhost', :worker_2_proxy_port);
|
||||
|
|
|
@ -162,7 +162,7 @@ citus_add_local_table_to_metadata
|
|||
step s2-remove-coordinator: SELECT master_remove_node('localhost', 57636); <waiting ...>
|
||||
step s1-commit: COMMIT;
|
||||
step s2-remove-coordinator: <... completed>
|
||||
ERROR: cannot remove the primary node of a node group which has shard placements
|
||||
ERROR: cannot remove or disable the node localhost:xxxxx because because it contains the only shard placement for shard xxxxx
|
||||
step s2-commit: COMMIT;
|
||||
master_remove_node
|
||||
---------------------------------------------------------------------
|
||||
|
|
|
@ -369,7 +369,7 @@ step s2-commit:
|
|||
COMMIT;
|
||||
|
||||
step s1-remove-node-2: <... completed>
|
||||
ERROR: cannot remove the primary node of a node group which has shard placements
|
||||
ERROR: cannot remove or disable the node localhost:xxxxx because because it contains the only shard placement for shard xxxxx
|
||||
step s1-show-placements:
|
||||
SELECT
|
||||
nodename, nodeport
|
||||
|
@ -429,7 +429,7 @@ master_remove_node
|
|||
|
||||
step s2-create-table-2:
|
||||
SET citus.shard_count TO 4;
|
||||
SET citus.shard_replication_factor TO 2;
|
||||
SET citus.shard_replication_factor TO 1;
|
||||
CREATE TABLE dist_table (x int, y int);
|
||||
SELECT create_distributed_table('dist_table', 'x');
|
||||
<waiting ...>
|
||||
|
@ -437,11 +437,18 @@ step s1-commit:
|
|||
COMMIT;
|
||||
|
||||
step s2-create-table-2: <... completed>
|
||||
ERROR: replication_factor (2) exceeds number of worker nodes (1)
|
||||
create_distributed_table
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
step s2-select:
|
||||
SELECT * FROM dist_table;
|
||||
|
||||
ERROR: relation "dist_table" does not exist
|
||||
x|y
|
||||
---------------------------------------------------------------------
|
||||
(0 rows)
|
||||
|
||||
master_remove_node
|
||||
---------------------------------------------------------------------
|
||||
|
||||
|
@ -467,7 +474,7 @@ step s2-begin:
|
|||
|
||||
step s2-create-table-2:
|
||||
SET citus.shard_count TO 4;
|
||||
SET citus.shard_replication_factor TO 2;
|
||||
SET citus.shard_replication_factor TO 1;
|
||||
CREATE TABLE dist_table (x int, y int);
|
||||
SELECT create_distributed_table('dist_table', 'x');
|
||||
|
||||
|
@ -483,7 +490,7 @@ step s2-commit:
|
|||
COMMIT;
|
||||
|
||||
step s1-remove-node-2: <... completed>
|
||||
ERROR: cannot remove the primary node of a node group which has shard placements
|
||||
ERROR: cannot remove or disable the node localhost:xxxxx because because it contains the only shard placement for shard xxxxx
|
||||
step s2-select:
|
||||
SELECT * FROM dist_table;
|
||||
|
||||
|
@ -596,7 +603,7 @@ step s2-commit:
|
|||
COMMIT;
|
||||
|
||||
step s1-remove-node-2: <... completed>
|
||||
ERROR: cannot remove the primary node of a node group which has shard placements
|
||||
ERROR: cannot remove or disable the node localhost:xxxxx because because it contains the only shard placement for shard xxxxx
|
||||
step s2-select:
|
||||
SELECT * FROM dist_table;
|
||||
|
||||
|
|
|
@ -28,13 +28,13 @@ step detector-dump-wait-edges:
|
|||
|
||||
waiting_transaction_num|blocking_transaction_num|blocking_transaction_waiting
|
||||
---------------------------------------------------------------------
|
||||
405| 404|f
|
||||
406| 405|f
|
||||
(1 row)
|
||||
|
||||
transactionnumber|waitingtransactionnumbers
|
||||
---------------------------------------------------------------------
|
||||
404|
|
||||
405| 404
|
||||
405|
|
||||
406| 405
|
||||
(2 rows)
|
||||
|
||||
step s1-abort:
|
||||
|
@ -84,16 +84,16 @@ step detector-dump-wait-edges:
|
|||
|
||||
waiting_transaction_num|blocking_transaction_num|blocking_transaction_waiting
|
||||
---------------------------------------------------------------------
|
||||
409| 408|f
|
||||
410| 408|f
|
||||
410| 409|t
|
||||
410| 409|f
|
||||
411| 409|f
|
||||
411| 410|t
|
||||
(3 rows)
|
||||
|
||||
transactionnumber|waitingtransactionnumbers
|
||||
---------------------------------------------------------------------
|
||||
408|
|
||||
409|408
|
||||
410|408,409
|
||||
409|
|
||||
410|409
|
||||
411|409,410
|
||||
(3 rows)
|
||||
|
||||
step s1-abort:
|
||||
|
|
|
@ -134,8 +134,9 @@ SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement WHER
|
|||
|
||||
-- try to remove a node with active placements and see that node removal is failed
|
||||
SELECT master_remove_node('localhost', :worker_2_port);
|
||||
ERROR: cannot remove the primary node of a node group which has shard placements
|
||||
HINT: To proceed, either drop the distributed tables or use undistribute_table() function to convert them to local tables
|
||||
ERROR: cannot remove or disable the node localhost:xxxxx because because it contains the only shard placement for shard xxxxx
|
||||
DETAIL: One of the table(s) that prevents the operation complete successfully is public.cluster_management_test
|
||||
HINT: To proceed, either drop the tables or use undistribute_table() function to convert them to local tables
|
||||
SELECT master_get_active_worker_nodes();
|
||||
master_get_active_worker_nodes
|
||||
---------------------------------------------------------------------
|
||||
|
@ -154,8 +155,9 @@ SELECT create_reference_table('test_reference_table');
|
|||
INSERT INTO test_reference_table VALUES (1, '1');
|
||||
-- try to remove a node with active placements and reference tables
|
||||
SELECT citus_remove_node('localhost', :worker_2_port);
|
||||
ERROR: cannot remove the primary node of a node group which has shard placements
|
||||
HINT: To proceed, either drop the distributed tables or use undistribute_table() function to convert them to local tables
|
||||
ERROR: cannot remove or disable the node localhost:xxxxx because because it contains the only shard placement for shard xxxxx
|
||||
DETAIL: One of the table(s) that prevents the operation complete successfully is public.cluster_management_test
|
||||
HINT: To proceed, either drop the tables or use undistribute_table() function to convert them to local tables
|
||||
-- try to disable a node with active placements see that node is removed
|
||||
-- observe that a notification is displayed
|
||||
SELECT master_disable_node('localhost', :worker_2_port);
|
||||
|
@ -335,8 +337,9 @@ SELECT create_distributed_table('cluster_management_test', 'col_1', 'hash');
|
|||
|
||||
-- try to remove a node with active placements and see that node removal is failed
|
||||
SELECT master_remove_node('localhost', :worker_2_port);
|
||||
ERROR: cannot remove the primary node of a node group which has shard placements
|
||||
HINT: To proceed, either drop the distributed tables or use undistribute_table() function to convert them to local tables
|
||||
ERROR: cannot remove or disable the node localhost:xxxxx because because it contains the only shard placement for shard xxxxx
|
||||
DETAIL: One of the table(s) that prevents the operation complete successfully is public.cluster_management_test
|
||||
HINT: To proceed, either drop the tables or use undistribute_table() function to convert them to local tables
|
||||
-- mark all placements in the candidate node as inactive
|
||||
SELECT groupid AS worker_2_group FROM pg_dist_node WHERE nodeport=:worker_2_port \gset
|
||||
UPDATE pg_dist_placement SET shardstate=3 WHERE groupid=:worker_2_group;
|
||||
|
@ -363,8 +366,9 @@ SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement WHER
|
|||
|
||||
-- try to remove a node with only inactive placements and see that removal still fails
|
||||
SELECT master_remove_node('localhost', :worker_2_port);
|
||||
ERROR: cannot remove the primary node of a node group which has shard placements
|
||||
HINT: To proceed, either drop the distributed tables or use undistribute_table() function to convert them to local tables
|
||||
ERROR: cannot remove or disable the node localhost:xxxxx because because it contains the only shard placement for shard xxxxx
|
||||
DETAIL: One of the table(s) that prevents the operation complete successfully is public.cluster_management_test
|
||||
HINT: To proceed, either drop the tables or use undistribute_table() function to convert them to local tables
|
||||
SELECT master_get_active_worker_nodes();
|
||||
master_get_active_worker_nodes
|
||||
---------------------------------------------------------------------
|
||||
|
@ -440,68 +444,6 @@ SELECT logicalrelid, shardid, shardstate, nodename, nodeport FROM pg_dist_shard_
|
|||
cluster_management_test | 1220015 | 4 | localhost | 57638
|
||||
(24 rows)
|
||||
|
||||
SELECT * INTO removed_placements FROM pg_dist_placement WHERE shardstate = 4;
|
||||
SELECT run_command_on_workers('SELECT * INTO removed_placements FROM pg_dist_placement WHERE shardstate = 4');
|
||||
run_command_on_workers
|
||||
---------------------------------------------------------------------
|
||||
(localhost,57637,t,"SELECT 8")
|
||||
(localhost,57638,t,"SELECT 8")
|
||||
(2 rows)
|
||||
|
||||
-- try to remove a node with only to be deleted placements and see that removal succeeds
|
||||
SELECT master_remove_node('localhost', :worker_2_port);
|
||||
master_remove_node
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT master_get_active_worker_nodes();
|
||||
master_get_active_worker_nodes
|
||||
---------------------------------------------------------------------
|
||||
(localhost,57637)
|
||||
(1 row)
|
||||
|
||||
SELECT master_add_node('localhost', :worker_2_port, groupId := :worker_2_group);
|
||||
WARNING: citus.enable_object_propagation is off, not creating distributed objects on worker
|
||||
DETAIL: distributed objects are only kept in sync when citus.enable_object_propagation is set to on. Newly activated nodes will not get these objects created
|
||||
WARNING: could not find any shard placements for shardId 1220001
|
||||
WARNING: could not find any shard placements for shardId 1220003
|
||||
WARNING: could not find any shard placements for shardId 1220005
|
||||
WARNING: could not find any shard placements for shardId 1220007
|
||||
WARNING: could not find any shard placements for shardId 1220009
|
||||
WARNING: could not find any shard placements for shardId 1220011
|
||||
WARNING: could not find any shard placements for shardId 1220013
|
||||
WARNING: could not find any shard placements for shardId 1220015
|
||||
WARNING: could not find any shard placements for shardId 1220017
|
||||
WARNING: could not find any shard placements for shardId 1220019
|
||||
WARNING: could not find any shard placements for shardId 1220021
|
||||
WARNING: could not find any shard placements for shardId 1220023
|
||||
WARNING: could not find any shard placements for shardId 1220025
|
||||
WARNING: could not find any shard placements for shardId 1220027
|
||||
WARNING: could not find any shard placements for shardId 1220029
|
||||
WARNING: could not find any shard placements for shardId 1220031
|
||||
master_add_node
|
||||
---------------------------------------------------------------------
|
||||
7
|
||||
(1 row)
|
||||
|
||||
-- put removed placements back for testing purposes(in practice we wouldn't have only old placements for a shard)
|
||||
INSERT INTO pg_dist_placement SELECT * FROM removed_placements;
|
||||
SELECT run_command_on_workers('INSERT INTO pg_dist_placement SELECT * FROM removed_placements');
|
||||
run_command_on_workers
|
||||
---------------------------------------------------------------------
|
||||
(localhost,57637,f,"ERROR: duplicate key value violates unique constraint ""pg_dist_placement_placementid_index""")
|
||||
(localhost,57638,t,"INSERT 0 8")
|
||||
(2 rows)
|
||||
|
||||
DROP TABLE removed_placements;
|
||||
SELECT run_command_on_workers('DROP TABLE removed_placements');
|
||||
run_command_on_workers
|
||||
---------------------------------------------------------------------
|
||||
(localhost,57637,t,"DROP TABLE")
|
||||
(localhost,57638,t,"DROP TABLE")
|
||||
(2 rows)
|
||||
|
||||
-- clean-up
|
||||
SELECT 1 FROM master_add_node('localhost', :worker_2_port);
|
||||
?column?
|
||||
|
@ -572,8 +514,9 @@ SELECT 1 FROM master_add_node('localhost', 9990, groupid => :new_group, noderole
|
|||
(1 row)
|
||||
|
||||
SELECT master_remove_node('localhost', :worker_2_port);
|
||||
ERROR: cannot remove the primary node of a node group which has shard placements
|
||||
HINT: To proceed, either drop the distributed tables or use undistribute_table() function to convert them to local tables
|
||||
ERROR: cannot remove or disable the node localhost:xxxxx because because it contains the only shard placement for shard xxxxx
|
||||
DETAIL: One of the table(s) that prevents the operation complete successfully is public.cluster_management_test
|
||||
HINT: To proceed, either drop the tables or use undistribute_table() function to convert them to local tables
|
||||
SELECT master_remove_node('localhost', 9990);
|
||||
master_remove_node
|
||||
---------------------------------------------------------------------
|
||||
|
@ -674,14 +617,14 @@ WARNING: citus.enable_object_propagation is off, not creating distributed objec
|
|||
DETAIL: distributed objects are only kept in sync when citus.enable_object_propagation is set to on. Newly activated nodes will not get these objects created
|
||||
master_add_node | master_add_node
|
||||
---------------------------------------------------------------------
|
||||
12 | 13
|
||||
11 | 12
|
||||
(1 row)
|
||||
|
||||
SELECT * FROM pg_dist_node ORDER BY nodeid;
|
||||
nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards
|
||||
---------------------------------------------------------------------
|
||||
12 | 9 | localhost | 57637 | default | t | t | primary | default | t | t
|
||||
13 | 10 | localhost | 57638 | default | t | t | primary | default | t | t
|
||||
11 | 9 | localhost | 57637 | default | t | t | primary | default | t | t
|
||||
12 | 10 | localhost | 57638 | default | t | t | primary | default | t | t
|
||||
(2 rows)
|
||||
|
||||
-- check that mixed add/remove node commands work fine inside transaction
|
||||
|
@ -881,13 +824,13 @@ SELECT 1 FROM master_add_inactive_node('localhost', 9996, groupid => :worker_2_g
|
|||
SELECT master_add_inactive_node('localhost', 9999, groupid => :worker_2_group, nodecluster => 'olap', noderole => 'secondary');
|
||||
master_add_inactive_node
|
||||
---------------------------------------------------------------------
|
||||
23
|
||||
22
|
||||
(1 row)
|
||||
|
||||
SELECT master_activate_node('localhost', 9999);
|
||||
master_activate_node
|
||||
---------------------------------------------------------------------
|
||||
23
|
||||
22
|
||||
(1 row)
|
||||
|
||||
SELECT master_disable_node('localhost', 9999);
|
||||
|
@ -915,17 +858,17 @@ CONTEXT: PL/pgSQL function citus_internal.pg_dist_node_trigger_func() line XX a
|
|||
INSERT INTO pg_dist_node (nodename, nodeport, groupid, noderole, nodecluster)
|
||||
VALUES ('localhost', 5000, 1000, 'primary', 'olap');
|
||||
ERROR: new row for relation "pg_dist_node" violates check constraint "primaries_are_only_allowed_in_the_default_cluster"
|
||||
DETAIL: Failing row contains (25, 1000, localhost, 5000, default, f, t, primary, olap, f, t).
|
||||
DETAIL: Failing row contains (24, 1000, localhost, 5000, default, f, t, primary, olap, f, t).
|
||||
UPDATE pg_dist_node SET nodecluster = 'olap'
|
||||
WHERE nodeport = :worker_1_port;
|
||||
ERROR: new row for relation "pg_dist_node" violates check constraint "primaries_are_only_allowed_in_the_default_cluster"
|
||||
DETAIL: Failing row contains (17, 14, localhost, 57637, default, f, t, primary, olap, f, t).
|
||||
DETAIL: Failing row contains (16, 14, localhost, 57637, default, f, t, primary, olap, f, t).
|
||||
-- check that you /can/ add a secondary node to a non-default cluster
|
||||
SELECT groupid AS worker_2_group FROM pg_dist_node WHERE nodeport = :worker_2_port \gset
|
||||
SELECT master_add_node('localhost', 8888, groupid => :worker_1_group, noderole => 'secondary', nodecluster=> 'olap');
|
||||
master_add_node
|
||||
---------------------------------------------------------------------
|
||||
26
|
||||
25
|
||||
(1 row)
|
||||
|
||||
-- check that super-long cluster names are truncated
|
||||
|
@ -938,13 +881,13 @@ SELECT master_add_node('localhost', 8887, groupid => :worker_1_group, noderole =
|
|||
);
|
||||
master_add_node
|
||||
---------------------------------------------------------------------
|
||||
27
|
||||
26
|
||||
(1 row)
|
||||
|
||||
SELECT * FROM pg_dist_node WHERE nodeport=8887;
|
||||
nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards
|
||||
---------------------------------------------------------------------
|
||||
27 | 14 | localhost | 8887 | default | f | t | secondary | thisisasixtyfourcharacterstringrepeatedfourtimestomake256chars. | f | t
|
||||
26 | 14 | localhost | 8887 | default | f | t | secondary | thisisasixtyfourcharacterstringrepeatedfourtimestomake256chars. | f | t
|
||||
(1 row)
|
||||
|
||||
-- don't remove the secondary and unavailable nodes, check that no commands are sent to
|
||||
|
@ -953,13 +896,13 @@ SELECT * FROM pg_dist_node WHERE nodeport=8887;
|
|||
SELECT master_add_secondary_node('localhost', 9995, 'localhost', :worker_1_port);
|
||||
master_add_secondary_node
|
||||
---------------------------------------------------------------------
|
||||
28
|
||||
27
|
||||
(1 row)
|
||||
|
||||
SELECT master_add_secondary_node('localhost', 9994, primaryname => 'localhost', primaryport => :worker_2_port);
|
||||
master_add_secondary_node
|
||||
---------------------------------------------------------------------
|
||||
29
|
||||
28
|
||||
(1 row)
|
||||
|
||||
SELECT master_add_secondary_node('localhost', 9993, 'localhost', 2000);
|
||||
|
@ -967,7 +910,7 @@ ERROR: node at "localhost:xxxxx" does not exist
|
|||
SELECT master_add_secondary_node('localhost', 9992, 'localhost', :worker_1_port, nodecluster => 'second-cluster');
|
||||
master_add_secondary_node
|
||||
---------------------------------------------------------------------
|
||||
30
|
||||
29
|
||||
(1 row)
|
||||
|
||||
SELECT nodeid AS worker_1_node FROM pg_dist_node WHERE nodeport=:worker_1_port \gset
|
||||
|
@ -987,7 +930,7 @@ SELECT master_update_node(:worker_1_node, 'somehost', 9000);
|
|||
SELECT * FROM pg_dist_node WHERE nodeid = :worker_1_node;
|
||||
nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards
|
||||
---------------------------------------------------------------------
|
||||
17 | 14 | somehost | 9000 | default | f | t | primary | default | f | t
|
||||
16 | 14 | somehost | 9000 | default | f | t | primary | default | f | t
|
||||
(1 row)
|
||||
|
||||
-- cleanup
|
||||
|
@ -1000,7 +943,7 @@ SELECT master_update_node(:worker_1_node, 'localhost', :worker_1_port);
|
|||
SELECT * FROM pg_dist_node WHERE nodeid = :worker_1_node;
|
||||
nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards
|
||||
---------------------------------------------------------------------
|
||||
17 | 14 | localhost | 57637 | default | f | t | primary | default | f | t
|
||||
16 | 14 | localhost | 57637 | default | f | t | primary | default | f | t
|
||||
(1 row)
|
||||
|
||||
SET client_min_messages TO ERROR;
|
||||
|
|
|
@ -1594,11 +1594,13 @@ ERROR: Disabling localhost:xxxxx failed
|
|||
DETAIL: localhost:xxxxx is a metadata node, but is out of sync
|
||||
HINT: If you are using MX, try stop_metadata_sync_to_node(hostname, port) for nodes that are down before disabling them.
|
||||
SELECT master_remove_node('localhost', :worker_1_port);
|
||||
ERROR: localhost:xxxxx is a metadata node, but is out of sync
|
||||
HINT: If the node is up, wait until metadata gets synced to it and try again.
|
||||
ERROR: cannot remove or disable the node localhost:xxxxx because because it contains the only shard placement for shard xxxxx
|
||||
DETAIL: One of the table(s) that prevents the operation complete successfully is mx_testing_schema.mx_test_table
|
||||
HINT: To proceed, either drop the tables or use undistribute_table() function to convert them to local tables
|
||||
SELECT master_remove_node('localhost', :worker_2_port);
|
||||
ERROR: localhost:xxxxx is a metadata node, but is out of sync
|
||||
HINT: If the node is up, wait until metadata gets synced to it and try again.
|
||||
ERROR: cannot remove or disable the node localhost:xxxxx because because it contains the only shard placement for shard xxxxx
|
||||
DETAIL: One of the table(s) that prevents the operation complete successfully is mx_testing_schema.mx_test_table
|
||||
HINT: To proceed, either drop the tables or use undistribute_table() function to convert them to local tables
|
||||
-- master_update_node should succeed
|
||||
SELECT nodeid AS worker_2_nodeid FROM pg_dist_node WHERE nodeport=:worker_2_port \gset
|
||||
SELECT master_update_node(:worker_2_nodeid, 'localhost', 4444);
|
||||
|
|
|
@ -214,8 +214,9 @@ WHERE colocationid IN
|
|||
(1 row)
|
||||
|
||||
SELECT master_remove_node('localhost', :worker_1_port);
|
||||
ERROR: cannot remove the last worker node because there are reference tables and it would cause data loss on reference tables
|
||||
HINT: To proceed, either drop the reference tables or use undistribute_table() function to convert them to local tables
|
||||
ERROR: cannot remove or disable the node localhost:xxxxx because because it contains the only shard placement for shard xxxxx
|
||||
DETAIL: One of the table(s) that prevents the operation complete successfully is public.remove_node_reference_table
|
||||
HINT: To proceed, either drop the tables or use undistribute_table() function to convert them to local tables
|
||||
\c - - - :worker_1_port
|
||||
SELECT COUNT(*) FROM pg_dist_node WHERE nodeport = :worker_2_port;
|
||||
count
|
||||
|
|
|
@ -2118,8 +2118,9 @@ SELECT pg_reload_conf();
|
|||
SET client_min_messages TO error;
|
||||
-- cannot remove coordinator since a reference table exists on coordinator and no other worker nodes are added
|
||||
SELECT 1 FROM master_remove_node('localhost', :master_port);
|
||||
ERROR: cannot remove the last worker node because there are reference tables and it would cause data loss on reference tables
|
||||
HINT: To proceed, either drop the reference tables or use undistribute_table() function to convert them to local tables
|
||||
ERROR: cannot remove or disable the node localhost:xxxxx because because it contains the only shard placement for shard xxxxx
|
||||
DETAIL: One of the table(s) that prevents the operation complete successfully is single_node.ref
|
||||
HINT: To proceed, either drop the tables or use undistribute_table() function to convert them to local tables
|
||||
-- Cleanup
|
||||
DROP SCHEMA single_node CASCADE;
|
||||
-- Remove the coordinator again
|
||||
|
|
|
@ -433,7 +433,7 @@ SET client_min_messages TO ERROR;
|
|||
SELECT citus_activate_node('localhost', :worker_1_port);
|
||||
citus_activate_node
|
||||
---------------------------------------------------------------------
|
||||
17
|
||||
16
|
||||
(1 row)
|
||||
|
||||
\c - - - :worker_2_port
|
||||
|
|
|
@ -68,7 +68,7 @@ step "s2-create-table-1"
|
|||
step "s2-create-table-2"
|
||||
{
|
||||
SET citus.shard_count TO 4;
|
||||
SET citus.shard_replication_factor TO 2;
|
||||
SET citus.shard_replication_factor TO 1;
|
||||
CREATE TABLE dist_table (x int, y int);
|
||||
SELECT create_distributed_table('dist_table', 'x');
|
||||
}
|
||||
|
@ -101,7 +101,7 @@ permutation "s1-add-node-2" "s1-begin" "s1-remove-node-2" "s2-create-table-1" "s
|
|||
permutation "s1-add-node-2" "s1-begin" "s1-remove-node-2" "s2-create-table-1" "s1-abort" "s1-show-placements" "s2-select"
|
||||
permutation "s1-add-node-2" "s2-begin" "s2-create-table-1" "s1-remove-node-2" "s2-commit" "s1-show-placements" "s2-select"
|
||||
|
||||
// session 1 removes a node, session 2 creates a distributed table with replication factor 2, should throw a sane error
|
||||
// session 1 removes a node, session 2 creates a distributed table with replication factor 1, should throw a sane error
|
||||
permutation "s1-add-node-2" "s1-begin" "s1-remove-node-2" "s2-create-table-2" "s1-commit" "s2-select"
|
||||
permutation "s1-add-node-2" "s2-begin" "s2-create-table-2" "s1-remove-node-2" "s2-commit" "s2-select"
|
||||
|
||||
|
|
|
@ -63,8 +63,13 @@ FROM pg_dist_placement p JOIN pg_dist_shard s USING (shardid)
|
|||
WHERE s.logicalrelid = 'user_table'::regclass
|
||||
ORDER BY placementid;
|
||||
|
||||
-- master_remove_node fails when there are shards on that worker
|
||||
SELECT master_remove_node('localhost', :worker_2_proxy_port);
|
||||
BEGIN;
|
||||
-- master_remove_node succeeds because there are the
|
||||
-- healthy placements of the shards that exists on
|
||||
-- worker_2_proxy_port on the other worker (worker_1_port)
|
||||
-- as well
|
||||
SELECT master_remove_node('localhost', :worker_2_proxy_port);
|
||||
ROLLBACK;
|
||||
|
||||
-- drop event table and re-run remove
|
||||
DROP TABLE event_table;
|
||||
|
|
|
@ -168,20 +168,6 @@ SELECT create_distributed_table('cluster_management_test_colocated', 'col_1', 'h
|
|||
-- Check that colocated shards don't get created for shards that are to be deleted
|
||||
SELECT logicalrelid, shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement NATURAL JOIN pg_dist_shard ORDER BY shardstate, shardid;
|
||||
|
||||
SELECT * INTO removed_placements FROM pg_dist_placement WHERE shardstate = 4;
|
||||
SELECT run_command_on_workers('SELECT * INTO removed_placements FROM pg_dist_placement WHERE shardstate = 4');
|
||||
-- try to remove a node with only to be deleted placements and see that removal succeeds
|
||||
SELECT master_remove_node('localhost', :worker_2_port);
|
||||
SELECT master_get_active_worker_nodes();
|
||||
|
||||
SELECT master_add_node('localhost', :worker_2_port, groupId := :worker_2_group);
|
||||
-- put removed placements back for testing purposes(in practice we wouldn't have only old placements for a shard)
|
||||
INSERT INTO pg_dist_placement SELECT * FROM removed_placements;
|
||||
SELECT run_command_on_workers('INSERT INTO pg_dist_placement SELECT * FROM removed_placements');
|
||||
|
||||
DROP TABLE removed_placements;
|
||||
SELECT run_command_on_workers('DROP TABLE removed_placements');
|
||||
|
||||
-- clean-up
|
||||
SELECT 1 FROM master_add_node('localhost', :worker_2_port);
|
||||
UPDATE pg_dist_placement SET shardstate=1 WHERE groupid=:worker_2_group;
|
||||
|
|
Loading…
Reference in New Issue