mirror of https://github.com/citusdata/citus.git
Do not warn unncessarily when a node is removed
In the past (pre-11), we allowed removing worker nodes that had active placements for replicated distributed table, without even checking if there are any other replicas of the same placement. However, with #5469, we prevent disabling nodes via a hard error when there is the last active placement of shard, as we do for reference tables. Note that otherwise, we'd allow users to lose data. As of today, the NOTICE is completely irrelevant.pull/5912/head
parent
b4dbd84743
commit
127450466e
|
@ -453,8 +453,7 @@ citus_disable_node(PG_FUNCTION_ARGS)
|
|||
{
|
||||
text *nodeNameText = PG_GETARG_TEXT_P(0);
|
||||
int32 nodePort = PG_GETARG_INT32(1);
|
||||
bool forceDisableNode = PG_GETARG_BOOL(2);
|
||||
bool synchronousDisableNode = PG_GETARG_BOOL(3);
|
||||
bool synchronousDisableNode = PG_GETARG_BOOL(2);
|
||||
|
||||
char *nodeName = text_to_cstring(nodeNameText);
|
||||
WorkerNode *workerNode = ModifiableWorkerNode(nodeName, nodePort);
|
||||
|
@ -465,8 +464,10 @@ citus_disable_node(PG_FUNCTION_ARGS)
|
|||
"isactive");
|
||||
|
||||
WorkerNode *firstWorkerNode = GetFirstPrimaryWorkerNode();
|
||||
if (!forceDisableNode && firstWorkerNode &&
|
||||
firstWorkerNode->nodeId == workerNode->nodeId)
|
||||
bool disablingFirstNode =
|
||||
(firstWorkerNode && firstWorkerNode->nodeId == workerNode->nodeId);
|
||||
|
||||
if (disablingFirstNode && !synchronousDisableNode)
|
||||
{
|
||||
/*
|
||||
* We sync metadata async and optionally in the background worker,
|
||||
|
@ -480,21 +481,21 @@ citus_disable_node(PG_FUNCTION_ARGS)
|
|||
* possibility of diverged shard placements for the same shard.
|
||||
*
|
||||
* To prevent that, we currently do not allow disabling the first
|
||||
* worker node.
|
||||
* worker node unless it is explicitly opted synchronous.
|
||||
*/
|
||||
ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
||||
errmsg("disabling the first worker node in the "
|
||||
"metadata is not allowed"),
|
||||
errhint("You can force disabling node, SELECT "
|
||||
"citus_disable_node('%s', %d, force:=true, "
|
||||
"synchronous:=true); "
|
||||
"Passing synchronous:=false might cause replicated shards "
|
||||
"to diverge.", workerNode->workerName, nodePort),
|
||||
"citus_disable_node('%s', %d, "
|
||||
"synchronous:=true);", workerNode->workerName,
|
||||
nodePort),
|
||||
errdetail("Citus uses the first worker node in the "
|
||||
"metadata for certain internal operations when "
|
||||
"replicated tables are modified. Synchronous mode "
|
||||
"ensures the first worker node is accurately "
|
||||
"visible to all nodes.")));
|
||||
"ensures that all nodes have the same view of the "
|
||||
"first worker node, which is used for certain "
|
||||
"locking operations.")));
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -513,20 +514,6 @@ citus_disable_node(PG_FUNCTION_ARGS)
|
|||
* for any given shard.
|
||||
*/
|
||||
ErrorIfNodeContainsNonRemovablePlacements(workerNode);
|
||||
|
||||
bool onlyConsiderActivePlacements = false;
|
||||
if (NodeGroupHasShardPlacements(workerNode->groupId,
|
||||
onlyConsiderActivePlacements))
|
||||
{
|
||||
ereport(NOTICE, (errmsg(
|
||||
"Node %s:%d has active shard placements. Some queries "
|
||||
"may fail after this operation. Use "
|
||||
"SELECT citus_activate_node('%s', %d) to activate this "
|
||||
"node back.",
|
||||
workerNode->workerName, nodePort,
|
||||
workerNode->workerName,
|
||||
nodePort)));
|
||||
}
|
||||
}
|
||||
|
||||
TransactionModifiedNodeMetadata = true;
|
||||
|
@ -584,7 +571,8 @@ citus_disable_node(PG_FUNCTION_ARGS)
|
|||
|
||||
|
||||
/*
|
||||
* BlockDistributedQueriesOnMetadataNodes blocks all the
|
||||
* BlockDistributedQueriesOnMetadataNodes blocks all the modification queries on
|
||||
* all nodes. Hence, should be used with caution.
|
||||
*/
|
||||
static void
|
||||
BlockDistributedQueriesOnMetadataNodes(void)
|
||||
|
@ -592,36 +580,20 @@ BlockDistributedQueriesOnMetadataNodes(void)
|
|||
/* first, block on the coordinator */
|
||||
LockRelationOid(DistNodeRelationId(), ExclusiveLock);
|
||||
|
||||
List *workerList = ActivePrimaryNonCoordinatorNodeList(NoLock);
|
||||
WorkerNode *workerNodeToLock = NULL;
|
||||
foreach_ptr(workerNodeToLock, workerList)
|
||||
{
|
||||
if (!workerNodeToLock->hasMetadata)
|
||||
{
|
||||
/* non-metadata workers cannot run distributed queries, so skip */
|
||||
continue;
|
||||
}
|
||||
/*
|
||||
* Note that we might re-design this lock to be more granular than
|
||||
* pg_dist_node, scoping only for modifications on the replicated
|
||||
* tables. However, we currently do not have any such mechanism and
|
||||
* given that citus_disable_node() runs instantly, it seems acceptable
|
||||
* to block reads (or modifications on non-replicated tables) for
|
||||
* a while.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Note that we might re-design this lock to be more granular than
|
||||
* pg_dist_node, scoping only for modifications on the replicated
|
||||
* tables. However, we currently do not have any such mechanism and
|
||||
* given that citus_disable_node() runs instantly, it seems acceptable
|
||||
* to block reads (or modifications on non-replicated tables) for
|
||||
* a while.
|
||||
*/
|
||||
/* only superuser can disable node */
|
||||
Assert(superuser());
|
||||
|
||||
/* only superuser can disable node */
|
||||
Assert(superuser());
|
||||
|
||||
List *commandList =
|
||||
list_make1("LOCK TABLE pg_dist_node IN EXCLUSIVE MODE;");
|
||||
SendMetadataCommandListToWorkerInCoordinatedTransaction(
|
||||
workerNodeToLock->workerName,
|
||||
workerNodeToLock->workerPort,
|
||||
CurrentUserName(),
|
||||
commandList);
|
||||
}
|
||||
SendCommandToWorkersWithMetadata(
|
||||
"LOCK TABLE pg_catalog.pg_dist_node IN EXCLUSIVE MODE;");
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
DROP FUNCTION pg_catalog.citus_disable_node(nodename text, nodeport integer, force bool);
|
||||
CREATE FUNCTION pg_catalog.citus_disable_node(nodename text, nodeport integer, force bool default false, synchronous bool default false)
|
||||
CREATE FUNCTION pg_catalog.citus_disable_node(nodename text, nodeport integer, synchronous bool default false)
|
||||
RETURNS void
|
||||
LANGUAGE C STRICT
|
||||
AS 'MODULE_PATHNAME', $$citus_disable_node$$;
|
||||
COMMENT ON FUNCTION pg_catalog.citus_disable_node(nodename text, nodeport integer, force bool, synchronous bool)
|
||||
COMMENT ON FUNCTION pg_catalog.citus_disable_node(nodename text, nodeport integer, synchronous bool)
|
||||
IS 'removes node from the cluster temporarily';
|
||||
|
||||
REVOKE ALL ON FUNCTION pg_catalog.citus_disable_node(text,int, bool, bool) FROM PUBLIC;
|
||||
REVOKE ALL ON FUNCTION pg_catalog.citus_disable_node(text,int, bool) FROM PUBLIC;
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
DROP FUNCTION pg_catalog.citus_disable_node(nodename text, nodeport integer, force bool);
|
||||
CREATE FUNCTION pg_catalog.citus_disable_node(nodename text, nodeport integer, force bool default false, synchronous bool default false)
|
||||
CREATE FUNCTION pg_catalog.citus_disable_node(nodename text, nodeport integer, synchronous bool default false)
|
||||
RETURNS void
|
||||
LANGUAGE C STRICT
|
||||
AS 'MODULE_PATHNAME', $$citus_disable_node$$;
|
||||
COMMENT ON FUNCTION pg_catalog.citus_disable_node(nodename text, nodeport integer, force bool, synchronous bool)
|
||||
COMMENT ON FUNCTION pg_catalog.citus_disable_node(nodename text, nodeport integer, synchronous bool)
|
||||
IS 'removes node from the cluster temporarily';
|
||||
|
||||
REVOKE ALL ON FUNCTION pg_catalog.citus_disable_node(text,int, bool, bool) FROM PUBLIC;
|
||||
REVOKE ALL ON FUNCTION pg_catalog.citus_disable_node(text,int, bool) FROM PUBLIC;
|
||||
|
|
|
@ -54,7 +54,6 @@ ORDER BY placementid;
|
|||
(2 rows)
|
||||
|
||||
SELECT citus_disable_node('localhost', :worker_2_proxy_port, true);
|
||||
NOTICE: Node localhost:xxxxx has active shard placements. Some queries may fail after this operation. Use SELECT citus_activate_node('localhost', 9060) to activate this node back.
|
||||
citus_disable_node
|
||||
---------------------------------------------------------------------
|
||||
|
||||
|
|
|
@ -132,9 +132,9 @@ SELECT 1 FROM citus_activate_node('localhost', :worker_2_port);
|
|||
-- disable node with sync/force options
|
||||
SELECT citus_disable_node('localhost', :worker_1_port);
|
||||
ERROR: disabling the first worker node in the metadata is not allowed
|
||||
DETAIL: Citus uses the first worker node in the metadata for certain internal operations when replicated tables are modified. Synchronous mode ensures the first worker node is accurately visible to all nodes.
|
||||
HINT: You can force disabling node, SELECT citus_disable_node('localhost', 57637, force:=true, synchronous:=true); Passing synchronous:=false might cause replicated shards to diverge.
|
||||
SELECT citus_disable_node('localhost', :worker_1_port, force:=true, synchronous:=true);
|
||||
DETAIL: Citus uses the first worker node in the metadata for certain internal operations when replicated tables are modified. Synchronous mode ensures that all nodes have the same view of the first worker node, which is used for certain locking operations.
|
||||
HINT: You can force disabling node, SELECT citus_disable_node('localhost', 57637, synchronous:=true);
|
||||
SELECT citus_disable_node('localhost', :worker_1_port, synchronous:=true);
|
||||
citus_disable_node
|
||||
---------------------------------------------------------------------
|
||||
|
||||
|
@ -265,7 +265,7 @@ GRANT EXECUTE ON FUNCTION master_activate_node(text,int) TO node_metadata_user;
|
|||
GRANT EXECUTE ON FUNCTION master_add_inactive_node(text,int,int,noderole,name) TO node_metadata_user;
|
||||
GRANT EXECUTE ON FUNCTION master_add_node(text,int,int,noderole,name) TO node_metadata_user;
|
||||
GRANT EXECUTE ON FUNCTION master_add_secondary_node(text,int,text,int,name) TO node_metadata_user;
|
||||
GRANT EXECUTE ON FUNCTION citus_disable_node(text,int,bool,bool) TO node_metadata_user;
|
||||
GRANT EXECUTE ON FUNCTION citus_disable_node(text,int,bool) TO node_metadata_user;
|
||||
GRANT EXECUTE ON FUNCTION citus_disable_node_and_wait(text,int,bool) TO node_metadata_user;
|
||||
GRANT EXECUTE ON FUNCTION master_remove_node(text,int) TO node_metadata_user;
|
||||
GRANT EXECUTE ON FUNCTION master_update_node(int,text,int,bool,int) TO node_metadata_user;
|
||||
|
|
|
@ -1034,12 +1034,10 @@ SELECT * FROM multi_extension.print_extension_changes();
|
|||
-- Snapshot of state at 11.0-2
|
||||
ALTER EXTENSION citus UPDATE TO '11.0-2';
|
||||
SELECT * FROM multi_extension.print_extension_changes();
|
||||
previous_object | current_object
|
||||
previous_object | current_object
|
||||
---------------------------------------------------------------------
|
||||
function citus_disable_node(text,integer,boolean) void |
|
||||
| function citus_disable_node(text,integer,boolean,boolean) void
|
||||
| function citus_is_coordinator() boolean
|
||||
(3 rows)
|
||||
| function citus_is_coordinator() boolean
|
||||
(1 row)
|
||||
|
||||
-- Snapshot of state at 11.1-1
|
||||
ALTER EXTENSION citus UPDATE TO '11.1-1';
|
||||
|
|
|
@ -1780,8 +1780,8 @@ ERROR: localhost:xxxxx is a metadata node, but is out of sync
|
|||
HINT: If the node is up, wait until metadata gets synced to it and try again.
|
||||
SELECT citus_disable_node_and_wait('localhost', :worker_1_port);
|
||||
ERROR: disabling the first worker node in the metadata is not allowed
|
||||
DETAIL: Citus uses the first worker node in the metadata for certain internal operations when replicated tables are modified. Synchronous mode ensures the first worker node is accurately visible to all nodes.
|
||||
HINT: You can force disabling node, SELECT citus_disable_node('localhost', 57637, force:=true, synchronous:=true); Passing synchronous:=false might cause replicated shards to diverge.
|
||||
DETAIL: Citus uses the first worker node in the metadata for certain internal operations when replicated tables are modified. Synchronous mode ensures that all nodes have the same view of the first worker node, which is used for certain locking operations.
|
||||
HINT: You can force disabling node, SELECT citus_disable_node('localhost', 57637, synchronous:=true);
|
||||
CONTEXT: SQL statement "SELECT pg_catalog.citus_disable_node(nodename, nodeport, force)"
|
||||
PL/pgSQL function citus_disable_node_and_wait(text,integer,boolean) line XX at PERFORM
|
||||
SELECT citus_disable_node_and_wait('localhost', :worker_2_port);
|
||||
|
|
|
@ -642,7 +642,6 @@ SELECT verify_metadata('localhost', :worker_1_port),
|
|||
-- Don't drop the reference table so it has shards on the nodes being disabled
|
||||
DROP TABLE dist_table_1, dist_table_2;
|
||||
SELECT pg_catalog.citus_disable_node('localhost', :worker_2_port);
|
||||
NOTICE: Node localhost:xxxxx has active shard placements. Some queries may fail after this operation. Use SELECT citus_activate_node('localhost', 57638) to activate this node back.
|
||||
citus_disable_node
|
||||
---------------------------------------------------------------------
|
||||
|
||||
|
|
|
@ -20,7 +20,6 @@ INSERT INTO replicated SELECT i,i FROM generate_series(0,10)i;
|
|||
INSERT INTO ref SELECT i,i FROM generate_series(0,10)i;
|
||||
-- should be successfully disable node
|
||||
SELECT citus_disable_node('localhost', :worker_2_port, true);
|
||||
NOTICE: Node localhost:xxxxx has active shard placements. Some queries may fail after this operation. Use SELECT citus_activate_node('localhost', 57638) to activate this node back.
|
||||
citus_disable_node
|
||||
---------------------------------------------------------------------
|
||||
|
||||
|
|
|
@ -47,7 +47,7 @@ ORDER BY 1;
|
|||
function citus_coordinator_nodeid()
|
||||
function citus_copy_shard_placement(bigint,text,integer,text,integer,boolean,citus.shard_transfer_mode)
|
||||
function citus_create_restore_point(text)
|
||||
function citus_disable_node(text,integer,boolean,boolean)
|
||||
function citus_disable_node(text,integer,boolean)
|
||||
function citus_dist_local_group_cache_invalidate()
|
||||
function citus_dist_node_cache_invalidate()
|
||||
function citus_dist_object_cache_invalidate()
|
||||
|
|
|
@ -58,7 +58,7 @@ SELECT 1 FROM citus_activate_node('localhost', :worker_2_port);
|
|||
|
||||
-- disable node with sync/force options
|
||||
SELECT citus_disable_node('localhost', :worker_1_port);
|
||||
SELECT citus_disable_node('localhost', :worker_1_port, force:=true, synchronous:=true);
|
||||
SELECT citus_disable_node('localhost', :worker_1_port, synchronous:=true);
|
||||
SELECT run_command_on_workers($$SELECT array_agg(isactive ORDER BY nodeport) FROM pg_dist_node WHERE hasmetadata and noderole='primary'::noderole AND nodecluster='default'$$);
|
||||
SELECT 1 FROM citus_activate_node('localhost', :worker_1_port);
|
||||
|
||||
|
@ -112,7 +112,7 @@ GRANT EXECUTE ON FUNCTION master_activate_node(text,int) TO node_metadata_user;
|
|||
GRANT EXECUTE ON FUNCTION master_add_inactive_node(text,int,int,noderole,name) TO node_metadata_user;
|
||||
GRANT EXECUTE ON FUNCTION master_add_node(text,int,int,noderole,name) TO node_metadata_user;
|
||||
GRANT EXECUTE ON FUNCTION master_add_secondary_node(text,int,text,int,name) TO node_metadata_user;
|
||||
GRANT EXECUTE ON FUNCTION citus_disable_node(text,int,bool,bool) TO node_metadata_user;
|
||||
GRANT EXECUTE ON FUNCTION citus_disable_node(text,int,bool) TO node_metadata_user;
|
||||
GRANT EXECUTE ON FUNCTION citus_disable_node_and_wait(text,int,bool) TO node_metadata_user;
|
||||
GRANT EXECUTE ON FUNCTION master_remove_node(text,int) TO node_metadata_user;
|
||||
GRANT EXECUTE ON FUNCTION master_update_node(int,text,int,bool,int) TO node_metadata_user;
|
||||
|
|
Loading…
Reference in New Issue