diff --git a/src/backend/distributed/Makefile b/src/backend/distributed/Makefile index 7df3ad9cc..1648ec5fb 100644 --- a/src/backend/distributed/Makefile +++ b/src/backend/distributed/Makefile @@ -9,7 +9,7 @@ EXTVERSIONS = 5.0 5.0-1 5.0-2 \ 5.1-1 5.1-2 5.1-3 5.1-4 5.1-5 5.1-6 5.1-7 5.1-8 \ 5.2-1 5.2-2 5.2-3 5.2-4 \ 6.0-1 6.0-2 6.0-3 6.0-4 6.0-5 6.0-6 6.0-7 6.0-8 6.0-9 6.0-10 6.0-11 6.0-12 6.0-13 6.0-14 6.0-15 6.0-16 6.0-17 6.0-18 \ - 6.1-1 6.1-2 6.1-3 6.1-4 6.1-5 6.1-6 6.1-7 6.1-8 6.1-9 6.1-10 6.1-11 6.1-12 + 6.1-1 6.1-2 6.1-3 6.1-4 6.1-5 6.1-6 6.1-7 6.1-8 6.1-9 6.1-10 6.1-11 6.1-12 6.1-13 # All citus--*.sql files in the source directory DATA = $(patsubst $(citus_abs_srcdir)/%.sql,%.sql,$(wildcard $(citus_abs_srcdir)/$(EXTENSION)--*--*.sql)) @@ -119,6 +119,8 @@ $(EXTENSION)--6.1-11.sql: $(EXTENSION)--6.1-10.sql $(EXTENSION)--6.1-10--6.1-11. cat $^ > $@ $(EXTENSION)--6.1-12.sql: $(EXTENSION)--6.1-11.sql $(EXTENSION)--6.1-11--6.1-12.sql cat $^ > $@ +$(EXTENSION)--6.1-13.sql: $(EXTENSION)--6.1-12.sql $(EXTENSION)--6.1-12--6.1-13.sql + cat $^ > $@ NO_PGXS = 1 diff --git a/src/backend/distributed/citus--6.1-12--6.1-13.sql b/src/backend/distributed/citus--6.1-12--6.1-13.sql new file mode 100644 index 000000000..ee87fc732 --- /dev/null +++ b/src/backend/distributed/citus--6.1-12--6.1-13.sql @@ -0,0 +1,12 @@ +/* citus--6.1-12--6.1-13.sql */ + +SET search_path = 'pg_catalog'; + +CREATE FUNCTION master_disable_node(nodename text, nodeport integer) + RETURNS void + LANGUAGE C STRICT + AS 'MODULE_PATHNAME', $$master_disable_node$$; +COMMENT ON FUNCTION master_disable_node(nodename text, nodeport integer) + IS 'removes node from the cluster temporarily'; + +RESET search_path; diff --git a/src/backend/distributed/citus.control b/src/backend/distributed/citus.control index d40b2142d..a3c856e4f 100644 --- a/src/backend/distributed/citus.control +++ b/src/backend/distributed/citus.control @@ -1,6 +1,6 @@ # Citus extension comment = 'Citus distributed database' -default_version = '6.1-12' +default_version = '6.1-13' module_pathname = '$libdir/citus' relocatable = false schema = pg_catalog diff --git a/src/backend/distributed/utils/node_metadata.c b/src/backend/distributed/utils/node_metadata.c index ac8254955..263b11d74 100644 --- a/src/backend/distributed/utils/node_metadata.c +++ b/src/backend/distributed/utils/node_metadata.c @@ -48,6 +48,7 @@ int GroupSize = 1; /* local function forward declarations */ +static void RemoveNodeFromCluster(char *nodeName, int32 nodePort, bool forceRemove); static Datum AddNodeMetadata(char *nodeName, int32 nodePort, int32 groupId, char *nodeRack, bool hasMetadata, bool *nodeAlreadyExists); static Datum GenerateNodeTuple(WorkerNode *workerNode); @@ -63,6 +64,7 @@ static WorkerNode * TupleToWorkerNode(TupleDesc tupleDescriptor, HeapTuple heapT /* declarations for dynamic loading */ PG_FUNCTION_INFO_V1(master_add_node); PG_FUNCTION_INFO_V1(master_remove_node); +PG_FUNCTION_INFO_V1(master_disable_node); PG_FUNCTION_INFO_V1(master_initialize_node_metadata); PG_FUNCTION_INFO_V1(get_shard_id_for_distribution_column); @@ -112,30 +114,27 @@ master_remove_node(PG_FUNCTION_ARGS) text *nodeName = PG_GETARG_TEXT_P(0); int32 nodePort = PG_GETARG_INT32(1); char *nodeNameString = text_to_cstring(nodeName); - char *nodeDeleteCommand = NULL; - bool hasShardPlacements = false; - WorkerNode *workerNode = NULL; + bool forceRemove = false; + RemoveNodeFromCluster(nodeNameString, nodePort, forceRemove); - EnsureSchemaNode(); - EnsureSuperUser(); + PG_RETURN_VOID(); +} - hasShardPlacements = NodeHasActiveShardPlacements(nodeNameString, nodePort); - if (hasShardPlacements) - { - ereport(ERROR, (errmsg("you cannot remove a node which has active " - "shard placements"))); - } - workerNode = FindWorkerNode(nodeNameString, nodePort); - - DeleteNodeRow(nodeNameString, nodePort); - - nodeDeleteCommand = NodeDeleteCommand(workerNode->nodeId); - - /* make sure we don't have any open connections */ - CloseNodeConnections(nodeNameString, nodePort); - - SendCommandToWorkers(WORKERS_WITH_METADATA, nodeDeleteCommand); +/* + * master_disable_node function removes the provided node from the pg_dist_node table of + * the master node and all nodes with metadata regardless of the node having an active + * shard placement. + * The call to the master_remove_node should be done by the super user. + */ +Datum +master_disable_node(PG_FUNCTION_ARGS) +{ + text *nodeName = PG_GETARG_TEXT_P(0); + int32 nodePort = PG_GETARG_INT32(1); + char *nodeNameString = text_to_cstring(nodeName); + bool forceRemove = true; + RemoveNodeFromCluster(nodeNameString, nodePort, forceRemove); PG_RETURN_VOID(); } @@ -340,6 +339,55 @@ ReadWorkerNodes() } +/* + * RemoveNodeFromCluster removes the provided node from the pg_dist_node table of + * the master node and all nodes with metadata. + * The call to the master_remove_node should be done by the super user. If there are + * active shard placements on the node; the function removes the node when forceRemove + * flag is set, it errors out otherwise. + */ +static void +RemoveNodeFromCluster(char *nodeName, int32 nodePort, bool forceRemove) +{ + char *nodeDeleteCommand = NULL; + bool hasShardPlacements = false; + WorkerNode *workerNode = NULL; + + EnsureSchemaNode(); + EnsureSuperUser(); + + hasShardPlacements = NodeHasActiveShardPlacements(nodeName, nodePort); + if (hasShardPlacements) + { + if (forceRemove) + { + ereport(NOTICE, (errmsg("Node %s:%d has active shard placements. Some " + "queries may fail after this operation. Use " + "select master_add_node('%s', %d) to add this " + "node back.", + nodeName, nodePort, nodeName, nodePort))); + } + else + { + ereport(ERROR, (errmsg("you cannot remove a node which has active " + "shard placements"), + errhint("Consider using master_disable_node."))); + } + } + + workerNode = FindWorkerNode(nodeName, nodePort); + + DeleteNodeRow(nodeName, nodePort); + + nodeDeleteCommand = NodeDeleteCommand(workerNode->nodeId); + + /* make sure we don't have any open connections */ + CloseNodeConnections(nodeName, nodePort); + + SendCommandToWorkers(WORKERS_WITH_METADATA, nodeDeleteCommand); +} + + /* * AddNodeMetadata checks the given node information and adds the specified node to the * pg_dist_node table of the master and workers with metadata. diff --git a/src/test/regress/expected/multi_cluster_management.out b/src/test/regress/expected/multi_cluster_management.out index b91119739..7d191d13b 100644 --- a/src/test/regress/expected/multi_cluster_management.out +++ b/src/test/regress/expected/multi_cluster_management.out @@ -57,13 +57,32 @@ SELECT master_get_active_worker_nodes(); (localhost,57637) (1 row) --- add some shard placements to the cluster +-- try to disable a node with no placements see that node is removed SELECT master_add_node('localhost', :worker_2_port); master_add_node --------------------------------- (3,3,localhost,57638,default,f) (1 row) +SELECT master_disable_node('localhost', :worker_2_port); + master_disable_node +--------------------- + +(1 row) + +SELECT master_get_active_worker_nodes(); + master_get_active_worker_nodes +-------------------------------- + (localhost,57637) +(1 row) + +-- add some shard placements to the cluster +SELECT master_add_node('localhost', :worker_2_port); + master_add_node +--------------------------------- + (4,4,localhost,57638,default,f) +(1 row) + CREATE TABLE cluster_management_test (col_1 text, col_2 int); SELECT master_create_distributed_table('cluster_management_test', 'col_1', 'hash'); master_create_distributed_table @@ -94,6 +113,7 @@ SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement WHER -- try to remove a node with active placements and see that node removal is failed SELECT master_remove_node('localhost', :worker_2_port); ERROR: you cannot remove a node which has active shard placements +HINT: Consider using master_disable_node. SELECT master_get_active_worker_nodes(); master_get_active_worker_nodes -------------------------------- @@ -101,6 +121,32 @@ SELECT master_get_active_worker_nodes(); (localhost,57637) (2 rows) +-- try to disable a node with active placements see that node is removed +-- observe that a notification is displayed +SELECT master_disable_node('localhost', :worker_2_port); +NOTICE: Node localhost:57638 has active shard placements. Some queries may fail after this operation. Use select master_add_node('localhost', 57638) to add this node back. + master_disable_node +--------------------- + +(1 row) + +SELECT master_get_active_worker_nodes(); + master_get_active_worker_nodes +-------------------------------- + (localhost,57637) +(1 row) + +-- restore the node for next tests +SELECT master_add_node('localhost', :worker_2_port); + master_add_node +--------------------------------- + (5,5,localhost,57638,default,f) +(1 row) + +-- try to remove a node with active placements and see that node removal is failed +SELECT master_remove_node('localhost', :worker_2_port); +ERROR: you cannot remove a node which has active shard placements +HINT: Consider using master_disable_node. -- mark all placements in the candidate node as inactive UPDATE pg_dist_shard_placement SET shardstate=3 WHERE nodeport=:worker_2_port; SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement WHERE nodeport=:worker_2_port; @@ -133,7 +179,7 @@ SELECT master_get_active_worker_nodes(); SELECT master_add_node('localhost', :worker_2_port); master_add_node --------------------------------- - (4,4,localhost,57638,default,f) + (6,6,localhost,57638,default,f) (1 row) UPDATE pg_dist_shard_placement SET shardstate=1 WHERE nodeport=:worker_2_port; @@ -149,7 +195,7 @@ UPDATE pg_dist_node SET hasmetadata=true WHERE nodeport=:worker_1_port; SELECT master_add_node('localhost', :worker_2_port); master_add_node --------------------------------- - (5,5,localhost,57638,default,f) + (7,7,localhost,57638,default,f) (1 row) \c - - - :worker_1_port @@ -178,7 +224,7 @@ UPDATE pg_dist_node SET hasmetadata=false WHERE nodeport=:worker_1_port; SELECT master_add_node('localhost', :worker_2_port); master_add_node --------------------------------- - (6,6,localhost,57638,default,f) + (8,8,localhost,57638,default,f) (1 row) \c - - - :worker_1_port @@ -206,16 +252,16 @@ SELECT * FROM pg_dist_node ORDER BY nodeid; SELECT master_add_node('localhost', :worker_1_port), master_add_node('localhost', :worker_2_port); - master_add_node | master_add_node ----------------------------------+--------------------------------- - (7,7,localhost,57637,default,f) | (8,8,localhost,57638,default,f) + master_add_node | master_add_node +---------------------------------+----------------------------------- + (9,9,localhost,57637,default,f) | (10,10,localhost,57638,default,f) (1 row) SELECT * FROM pg_dist_node ORDER BY nodeid; nodeid | groupid | nodename | nodeport | noderack | hasmetadata --------+---------+-----------+----------+----------+------------- - 7 | 7 | localhost | 57637 | default | f - 8 | 8 | localhost | 57638 | default | f + 9 | 9 | localhost | 57637 | default | f + 10 | 10 | localhost | 57638 | default | f (2 rows) -- check that mixed add/remove node commands work fine inside transaction @@ -227,9 +273,9 @@ SELECT master_remove_node('localhost', :worker_2_port); (1 row) SELECT master_add_node('localhost', :worker_2_port); - master_add_node ---------------------------------- - (9,9,localhost,57638,default,f) + master_add_node +----------------------------------- + (11,11,localhost,57638,default,f) (1 row) SELECT master_remove_node('localhost', :worker_2_port); @@ -249,7 +295,7 @@ BEGIN; SELECT master_add_node('localhost', :worker_2_port); master_add_node ----------------------------------- - (10,10,localhost,57638,default,f) + (12,12,localhost,57638,default,f) (1 row) SELECT master_remove_node('localhost', :worker_2_port); @@ -261,7 +307,7 @@ SELECT master_remove_node('localhost', :worker_2_port); SELECT master_add_node('localhost', :worker_2_port); master_add_node ----------------------------------- - (11,11,localhost,57638,default,f) + (13,13,localhost,57638,default,f) (1 row) COMMIT; @@ -289,13 +335,13 @@ SELECT master_remove_node(nodename, nodeport) FROM pg_dist_node; SELECT master_add_node('localhost', :worker_1_port); master_add_node ----------------------------------- - (12,12,localhost,57637,default,f) + (14,14,localhost,57637,default,f) (1 row) SELECT master_add_node('localhost', :worker_2_port); master_add_node ----------------------------------- - (13,13,localhost,57638,default,f) + (15,15,localhost,57638,default,f) (1 row) -- check that a distributed table can be created after adding a node in a transaction @@ -309,7 +355,7 @@ BEGIN; SELECT master_add_node('localhost', :worker_2_port); master_add_node ----------------------------------- - (14,14,localhost,57638,default,f) + (16,16,localhost,57638,default,f) (1 row) CREATE TABLE temp(col1 text, col2 int); diff --git a/src/test/regress/expected/multi_extension.out b/src/test/regress/expected/multi_extension.out index be7433906..e32b44512 100644 --- a/src/test/regress/expected/multi_extension.out +++ b/src/test/regress/expected/multi_extension.out @@ -70,6 +70,7 @@ ALTER EXTENSION citus UPDATE TO '6.1-9'; ALTER EXTENSION citus UPDATE TO '6.1-10'; ALTER EXTENSION citus UPDATE TO '6.1-11'; ALTER EXTENSION citus UPDATE TO '6.1-12'; +ALTER EXTENSION citus UPDATE TO '6.1-13'; -- ensure no objects were created outside pg_catalog SELECT COUNT(*) FROM pg_depend AS pgd, diff --git a/src/test/regress/sql/multi_cluster_management.sql b/src/test/regress/sql/multi_cluster_management.sql index dc765d02d..4e5e5ed2e 100644 --- a/src/test/regress/sql/multi_cluster_management.sql +++ b/src/test/regress/sql/multi_cluster_management.sql @@ -27,6 +27,11 @@ SELECT master_remove_node('localhost', :worker_2_port); -- verify that the node has been deleted SELECT master_get_active_worker_nodes(); +-- try to disable a node with no placements see that node is removed +SELECT master_add_node('localhost', :worker_2_port); +SELECT master_disable_node('localhost', :worker_2_port); +SELECT master_get_active_worker_nodes(); + -- add some shard placements to the cluster SELECT master_add_node('localhost', :worker_2_port); CREATE TABLE cluster_management_test (col_1 text, col_2 int); @@ -40,6 +45,17 @@ SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement WHER SELECT master_remove_node('localhost', :worker_2_port); SELECT master_get_active_worker_nodes(); +-- try to disable a node with active placements see that node is removed +-- observe that a notification is displayed +SELECT master_disable_node('localhost', :worker_2_port); +SELECT master_get_active_worker_nodes(); + +-- restore the node for next tests +SELECT master_add_node('localhost', :worker_2_port); + +-- try to remove a node with active placements and see that node removal is failed +SELECT master_remove_node('localhost', :worker_2_port); + -- mark all placements in the candidate node as inactive UPDATE pg_dist_shard_placement SET shardstate=3 WHERE nodeport=:worker_2_port; SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement WHERE nodeport=:worker_2_port; diff --git a/src/test/regress/sql/multi_extension.sql b/src/test/regress/sql/multi_extension.sql index 72e695767..42e864018 100644 --- a/src/test/regress/sql/multi_extension.sql +++ b/src/test/regress/sql/multi_extension.sql @@ -70,6 +70,7 @@ ALTER EXTENSION citus UPDATE TO '6.1-9'; ALTER EXTENSION citus UPDATE TO '6.1-10'; ALTER EXTENSION citus UPDATE TO '6.1-11'; ALTER EXTENSION citus UPDATE TO '6.1-12'; +ALTER EXTENSION citus UPDATE TO '6.1-13'; -- ensure no objects were created outside pg_catalog SELECT COUNT(*)