mirror of https://github.com/citusdata/citus.git
Propagate isactive to metadata nodes.
parent
b9b7fd7660
commit
cb011bb30f
|
@ -273,29 +273,51 @@ master_disable_node(PG_FUNCTION_ARGS)
|
|||
WorkerNode *workerNode = ModifiableWorkerNode(nodeName, nodePort);
|
||||
bool isActive = false;
|
||||
bool onlyConsiderActivePlacements = false;
|
||||
MemoryContext savedContext = CurrentMemoryContext;
|
||||
|
||||
if (WorkerNodeIsPrimary(workerNode))
|
||||
PG_TRY();
|
||||
{
|
||||
/*
|
||||
* Delete reference table placements so they are not taken into account
|
||||
* for the check if there are placements after this
|
||||
*/
|
||||
DeleteAllReferenceTablePlacementsFromNodeGroup(workerNode->groupId);
|
||||
|
||||
if (NodeGroupHasShardPlacements(workerNode->groupId,
|
||||
onlyConsiderActivePlacements))
|
||||
if (WorkerNodeIsPrimary(workerNode))
|
||||
{
|
||||
ereport(NOTICE, (errmsg(
|
||||
"Node %s:%d has active shard placements. Some queries "
|
||||
"may fail after this operation. Use "
|
||||
"SELECT master_activate_node('%s', %d) to activate this "
|
||||
"node back.",
|
||||
workerNode->workerName, nodePort, workerNode->workerName,
|
||||
nodePort)));
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Delete reference table placements so they are not taken into account
|
||||
* for the check if there are placements after this.
|
||||
*/
|
||||
DeleteAllReferenceTablePlacementsFromNodeGroup(workerNode->groupId);
|
||||
|
||||
SetNodeState(nodeName, nodePort, isActive);
|
||||
if (NodeGroupHasShardPlacements(workerNode->groupId,
|
||||
onlyConsiderActivePlacements))
|
||||
{
|
||||
ereport(NOTICE, (errmsg(
|
||||
"Node %s:%d has active shard placements. Some queries "
|
||||
"may fail after this operation. Use "
|
||||
"SELECT master_activate_node('%s', %d) to activate this "
|
||||
"node back.",
|
||||
workerNode->workerName, nodePort,
|
||||
workerNode->workerName,
|
||||
nodePort)));
|
||||
}
|
||||
}
|
||||
|
||||
SetNodeState(nodeName, nodePort, isActive);
|
||||
}
|
||||
PG_CATCH();
|
||||
{
|
||||
ErrorData *edata = NULL;
|
||||
|
||||
/* CopyErrorData() requires (CurrentMemoryContext != ErrorContext) */
|
||||
MemoryContextSwitchTo(savedContext);
|
||||
edata = CopyErrorData();
|
||||
|
||||
ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
||||
errmsg("Disabling %s:%d failed", workerNode->workerName,
|
||||
nodePort),
|
||||
errdetail("%s", edata->message),
|
||||
errhint(
|
||||
"If you are using MX, try stop_metadata_sync_to_node(hostname, port) "
|
||||
"for nodes that are down before disabling them.")));
|
||||
}
|
||||
PG_END_TRY();
|
||||
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
@ -350,7 +372,7 @@ SetUpDistributedTableDependencies(WorkerNode *newWorkerNode)
|
|||
newWorkerNode->workerPort);
|
||||
|
||||
/*
|
||||
* Let the maintanince deamon do the hard work of syncing the metadata.
|
||||
* Let the maintenance daemon do the hard work of syncing the metadata.
|
||||
* We prefer this because otherwise node activation might fail within
|
||||
* transaction blocks.
|
||||
*/
|
||||
|
@ -1129,8 +1151,8 @@ SetWorkerColumn(WorkerNode *workerNode, int columnIndex, Datum value)
|
|||
{
|
||||
case Anum_pg_dist_node_isactive:
|
||||
{
|
||||
metadataSyncCommand = ShouldHaveShardsUpdateCommand(workerNode->nodeId,
|
||||
DatumGetBool(value));
|
||||
metadataSyncCommand = NodeStateUpdateCommand(workerNode->nodeId,
|
||||
DatumGetBool(value));
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -406,8 +406,7 @@ CreateReferenceTableColocationId()
|
|||
/*
|
||||
* DeleteAllReferenceTablePlacementsFromNodeGroup function iterates over list of reference
|
||||
* tables and deletes all reference table placements from pg_dist_placement table
|
||||
* for given group. However, it does not modify replication factor of the colocation
|
||||
* group of reference tables. It is caller's responsibility to do that if it is necessary.
|
||||
* for given group.
|
||||
*/
|
||||
void
|
||||
DeleteAllReferenceTablePlacementsFromNodeGroup(int32 groupId)
|
||||
|
|
|
@ -210,7 +210,7 @@ SELECT nodeid, hasmetadata, metadatasynced FROM pg_dist_node;
|
|||
--------------------------------------------------------------------------
|
||||
-- Test updating a node when another node is in readonly-mode
|
||||
--------------------------------------------------------------------------
|
||||
SELECT FROM master_add_node('localhost', :worker_2_port) AS nodeid_2 \gset
|
||||
SELECT master_add_node('localhost', :worker_2_port) AS nodeid_2 \gset
|
||||
NOTICE: Replicating reference table "ref_table" to the node localhost:57638
|
||||
SELECT 1 FROM start_metadata_sync_to_node('localhost', :worker_2_port);
|
||||
?column?
|
||||
|
@ -403,8 +403,166 @@ SELECT verify_metadata('localhost', :worker_1_port),
|
|||
t | t
|
||||
(1 row)
|
||||
|
||||
--------------------------------------------------------------------------
|
||||
-- Test that changes in isactive is propagated to the metadata nodes
|
||||
--------------------------------------------------------------------------
|
||||
-- Don't drop the reference table so it has shards on the nodes being disabled
|
||||
DROP TABLE dist_table_1, dist_table_2;
|
||||
SELECT 1 FROM master_disable_node('localhost', :worker_2_port);
|
||||
?column?
|
||||
----------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
SELECT verify_metadata('localhost', :worker_1_port);
|
||||
verify_metadata
|
||||
-----------------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
SELECT 1 FROM master_activate_node('localhost', :worker_2_port);
|
||||
NOTICE: Replicating reference table "ref_table" to the node localhost:57638
|
||||
?column?
|
||||
----------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
SELECT verify_metadata('localhost', :worker_1_port);
|
||||
verify_metadata
|
||||
-----------------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
------------------------------------------------------------------------------------
|
||||
-- Test master_disable_node() when the node that is being disabled is actually down
|
||||
------------------------------------------------------------------------------------
|
||||
SELECT master_update_node(:nodeid_2, 'localhost', 1);
|
||||
master_update_node
|
||||
--------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT wait_until_metadata_sync();
|
||||
wait_until_metadata_sync
|
||||
--------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
-- set metadatasynced so we try porpagating metadata changes
|
||||
UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid IN (:nodeid_1, :nodeid_2);
|
||||
-- should error out
|
||||
SELECT 1 FROM master_disable_node('localhost', 1);
|
||||
ERROR: Disabling localhost:1 failed
|
||||
DETAIL: connection error: localhost:1
|
||||
HINT: If you are using MX, try stop_metadata_sync_to_node(hostname, port) for nodes that are down before disabling them.
|
||||
-- try again after stopping metadata sync
|
||||
SELECT stop_metadata_sync_to_node('localhost', 1);
|
||||
stop_metadata_sync_to_node
|
||||
----------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT 1 FROM master_disable_node('localhost', 1);
|
||||
?column?
|
||||
----------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
SELECT verify_metadata('localhost', :worker_1_port);
|
||||
verify_metadata
|
||||
-----------------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
SELECT master_update_node(:nodeid_2, 'localhost', :worker_2_port);
|
||||
master_update_node
|
||||
--------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT wait_until_metadata_sync();
|
||||
wait_until_metadata_sync
|
||||
--------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT 1 FROM master_activate_node('localhost', :worker_2_port);
|
||||
NOTICE: Replicating reference table "ref_table" to the node localhost:57638
|
||||
?column?
|
||||
----------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
SELECT verify_metadata('localhost', :worker_1_port);
|
||||
verify_metadata
|
||||
-----------------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
------------------------------------------------------------------------------------
|
||||
-- Test master_disable_node() when the other node is down
|
||||
------------------------------------------------------------------------------------
|
||||
-- node 1 is down.
|
||||
SELECT master_update_node(:nodeid_1, 'localhost', 1);
|
||||
master_update_node
|
||||
--------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT wait_until_metadata_sync();
|
||||
wait_until_metadata_sync
|
||||
--------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
-- set metadatasynced so we try porpagating metadata changes
|
||||
UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid IN (:nodeid_1, :nodeid_2);
|
||||
-- should error out
|
||||
SELECT 1 FROM master_disable_node('localhost', :worker_2_port);
|
||||
ERROR: Disabling localhost:57638 failed
|
||||
DETAIL: connection error: localhost:1
|
||||
HINT: If you are using MX, try stop_metadata_sync_to_node(hostname, port) for nodes that are down before disabling them.
|
||||
-- try again after stopping metadata sync
|
||||
SELECT stop_metadata_sync_to_node('localhost', 1);
|
||||
stop_metadata_sync_to_node
|
||||
----------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT 1 FROM master_disable_node('localhost', :worker_2_port);
|
||||
?column?
|
||||
----------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
-- bring up node 1
|
||||
SELECT master_update_node(:nodeid_1, 'localhost', :worker_1_port);
|
||||
master_update_node
|
||||
--------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT wait_until_metadata_sync();
|
||||
wait_until_metadata_sync
|
||||
--------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT 1 FROM master_activate_node('localhost', :worker_2_port);
|
||||
NOTICE: Replicating reference table "ref_table" to the node localhost:57638
|
||||
?column?
|
||||
----------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
SELECT verify_metadata('localhost', :worker_1_port);
|
||||
verify_metadata
|
||||
-----------------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
-- cleanup
|
||||
DROP TABLE dist_table_1, ref_table, dist_table_2;
|
||||
DROP TABLE ref_table;
|
||||
TRUNCATE pg_dist_colocation;
|
||||
SELECT count(*) FROM (SELECT master_remove_node(nodename, nodeport) FROM pg_dist_node) t;
|
||||
count
|
|
@ -14,7 +14,7 @@
|
|||
# Tests around schema changes, these are run first, so there's no preexisting objects.
|
||||
# ---
|
||||
test: multi_extension
|
||||
test: multi_mx_master_update_node
|
||||
test: multi_mx_node_metadata
|
||||
test: multi_cluster_management
|
||||
test: multi_test_helpers
|
||||
|
||||
|
|
|
@ -121,7 +121,7 @@ SELECT nodeid, hasmetadata, metadatasynced FROM pg_dist_node;
|
|||
-- Test updating a node when another node is in readonly-mode
|
||||
--------------------------------------------------------------------------
|
||||
|
||||
SELECT FROM master_add_node('localhost', :worker_2_port) AS nodeid_2 \gset
|
||||
SELECT master_add_node('localhost', :worker_2_port) AS nodeid_2 \gset
|
||||
SELECT 1 FROM start_metadata_sync_to_node('localhost', :worker_2_port);
|
||||
|
||||
-- Create a table with shards on both nodes
|
||||
|
@ -197,8 +197,70 @@ SELECT nodeid, hasmetadata, metadatasynced FROM pg_dist_node ORDER BY nodeid;
|
|||
SELECT verify_metadata('localhost', :worker_1_port),
|
||||
verify_metadata('localhost', :worker_2_port);
|
||||
|
||||
--------------------------------------------------------------------------
|
||||
-- Test that changes in isactive is propagated to the metadata nodes
|
||||
--------------------------------------------------------------------------
|
||||
-- Don't drop the reference table so it has shards on the nodes being disabled
|
||||
DROP TABLE dist_table_1, dist_table_2;
|
||||
|
||||
SELECT 1 FROM master_disable_node('localhost', :worker_2_port);
|
||||
SELECT verify_metadata('localhost', :worker_1_port);
|
||||
|
||||
SELECT 1 FROM master_activate_node('localhost', :worker_2_port);
|
||||
SELECT verify_metadata('localhost', :worker_1_port);
|
||||
|
||||
------------------------------------------------------------------------------------
|
||||
-- Test master_disable_node() when the node that is being disabled is actually down
|
||||
------------------------------------------------------------------------------------
|
||||
SELECT master_update_node(:nodeid_2, 'localhost', 1);
|
||||
SELECT wait_until_metadata_sync();
|
||||
|
||||
-- set metadatasynced so we try porpagating metadata changes
|
||||
UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid IN (:nodeid_1, :nodeid_2);
|
||||
|
||||
-- should error out
|
||||
SELECT 1 FROM master_disable_node('localhost', 1);
|
||||
|
||||
-- try again after stopping metadata sync
|
||||
SELECT stop_metadata_sync_to_node('localhost', 1);
|
||||
SELECT 1 FROM master_disable_node('localhost', 1);
|
||||
|
||||
SELECT verify_metadata('localhost', :worker_1_port);
|
||||
|
||||
SELECT master_update_node(:nodeid_2, 'localhost', :worker_2_port);
|
||||
SELECT wait_until_metadata_sync();
|
||||
|
||||
SELECT 1 FROM master_activate_node('localhost', :worker_2_port);
|
||||
SELECT verify_metadata('localhost', :worker_1_port);
|
||||
|
||||
|
||||
------------------------------------------------------------------------------------
|
||||
-- Test master_disable_node() when the other node is down
|
||||
------------------------------------------------------------------------------------
|
||||
-- node 1 is down.
|
||||
SELECT master_update_node(:nodeid_1, 'localhost', 1);
|
||||
SELECT wait_until_metadata_sync();
|
||||
|
||||
-- set metadatasynced so we try porpagating metadata changes
|
||||
UPDATE pg_dist_node SET metadatasynced = TRUE WHERE nodeid IN (:nodeid_1, :nodeid_2);
|
||||
|
||||
-- should error out
|
||||
SELECT 1 FROM master_disable_node('localhost', :worker_2_port);
|
||||
|
||||
-- try again after stopping metadata sync
|
||||
SELECT stop_metadata_sync_to_node('localhost', 1);
|
||||
SELECT 1 FROM master_disable_node('localhost', :worker_2_port);
|
||||
|
||||
-- bring up node 1
|
||||
SELECT master_update_node(:nodeid_1, 'localhost', :worker_1_port);
|
||||
SELECT wait_until_metadata_sync();
|
||||
|
||||
SELECT 1 FROM master_activate_node('localhost', :worker_2_port);
|
||||
|
||||
SELECT verify_metadata('localhost', :worker_1_port);
|
||||
|
||||
-- cleanup
|
||||
DROP TABLE dist_table_1, ref_table, dist_table_2;
|
||||
DROP TABLE ref_table;
|
||||
TRUNCATE pg_dist_colocation;
|
||||
SELECT count(*) FROM (SELECT master_remove_node(nodename, nodeport) FROM pg_dist_node) t;
|
||||
ALTER SEQUENCE pg_catalog.pg_dist_groupid_seq RESTART :last_group_id;
|
Loading…
Reference in New Issue