mirror of https://github.com/citusdata/citus.git
Not allow removing a single node with ref tables (#4127)
* Not allow removing a single node with ref tables We should not allow removing a node if it is the only node in the cluster and there is a data on it. We have this check for distributed tables but we didn't have it for reference tables. * Update src/test/regress/expected/single_node.out Co-authored-by: Onur Tirtir <onurcantirtir@gmail.com> * Update src/test/regress/sql/single_node.sql Co-authored-by: Onur Tirtir <onurcantirtir@gmail.com>pull/4176/head
parent
6e316d46a2
commit
dae2c69fd7
|
@ -3867,6 +3867,17 @@ ReferenceTableOidList()
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* ClusterHasReferenceTable returns true if the cluster has
|
||||
* any reference table.
|
||||
*/
|
||||
bool
|
||||
ClusterHasReferenceTable(void)
|
||||
{
|
||||
return list_length(ReferenceTableOidList()) > 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* InvalidateNodeRelationCacheCallback destroys the WorkerNodeHash when
|
||||
* any change happens on pg_dist_node table. It also set WorkerNodeHash to
|
||||
|
|
|
@ -86,6 +86,7 @@ typedef struct NodeMetadata
|
|||
|
||||
/* local function forward declarations */
|
||||
static int ActivateNode(char *nodeName, int nodePort);
|
||||
static bool CanRemoveReferenceTablePlacements(void);
|
||||
static void RemoveNodeFromCluster(char *nodeName, int32 nodePort);
|
||||
static int AddNodeMetadata(char *nodeName, int32 nodePort, NodeMetadata
|
||||
*nodeMetadata, bool *nodeAlreadyExists);
|
||||
|
@ -1053,19 +1054,32 @@ RemoveNodeFromCluster(char *nodeName, int32 nodePort)
|
|||
WorkerNode *workerNode = ModifiableWorkerNode(nodeName, nodePort);
|
||||
if (NodeIsPrimary(workerNode))
|
||||
{
|
||||
if (CanRemoveReferenceTablePlacements())
|
||||
{
|
||||
/*
|
||||
* Delete reference table placements so they are not taken into account
|
||||
* for the check if there are placements after this.
|
||||
*/
|
||||
DeleteAllReferenceTablePlacementsFromNodeGroup(workerNode->groupId);
|
||||
}
|
||||
bool onlyConsiderActivePlacements = false;
|
||||
|
||||
/*
|
||||
* Delete reference table placements so they are not taken into account
|
||||
* for the check if there are placements after this
|
||||
*/
|
||||
DeleteAllReferenceTablePlacementsFromNodeGroup(workerNode->groupId);
|
||||
|
||||
if (NodeGroupHasShardPlacements(workerNode->groupId,
|
||||
onlyConsiderActivePlacements))
|
||||
{
|
||||
ereport(ERROR, (errmsg("you cannot remove the primary node of a node group "
|
||||
"which has shard placements")));
|
||||
if (ClusterHasReferenceTable())
|
||||
{
|
||||
ereport(ERROR, (errmsg(
|
||||
"cannot remove the last worker node because there are reference "
|
||||
"tables and it would cause data loss on reference tables"),
|
||||
errhint(
|
||||
"To proceed, either drop the reference tables or use "
|
||||
"undistribute_table() function to convert them to local tables")));
|
||||
}
|
||||
ereport(ERROR, (errmsg("cannot remove the primary node of a node group "
|
||||
"which has shard placements"),
|
||||
errhint(
|
||||
"To proceed, either drop the distributed tables or use "
|
||||
"undistribute_table() function to convert them to local tables")));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1080,6 +1094,18 @@ RemoveNodeFromCluster(char *nodeName, int32 nodePort)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* CanRemoveReferenceTablePlacements returns true if active primary
|
||||
* node count is more than 1, which means that even if we remove a node
|
||||
* we will still have some other node that has reference table placement.
|
||||
*/
|
||||
static bool
|
||||
CanRemoveReferenceTablePlacements(void)
|
||||
{
|
||||
return ActivePrimaryNodeCount() > 1;
|
||||
}
|
||||
|
||||
|
||||
/* CountPrimariesWithMetadata returns the number of primary nodes which have metadata. */
|
||||
uint32
|
||||
CountPrimariesWithMetadata(void)
|
||||
|
|
|
@ -307,6 +307,17 @@ ActivePrimaryNonCoordinatorNodeCount(void)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* ActivePrimaryNodeCount returns the number of groups with a primary in the cluster.
|
||||
*/
|
||||
uint32
|
||||
ActivePrimaryNodeCount(void)
|
||||
{
|
||||
List *nodeList = ActivePrimaryNodeList(NoLock);
|
||||
return list_length(nodeList);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ActiveReadableNonCoordinatorNodeCount returns the number of groups with a node we can read from.
|
||||
* This method excludes coordinator even if it is added as a worker.
|
||||
|
|
|
@ -162,6 +162,7 @@ extern void InvalidateForeignKeyGraph(void);
|
|||
extern void FlushDistTableCache(void);
|
||||
extern void InvalidateMetadataSystemCache(void);
|
||||
extern Datum DistNodeMetadata(void);
|
||||
extern bool ClusterHasReferenceTable(void);
|
||||
extern bool HasUniformHashDistribution(ShardInterval **shardIntervalArray,
|
||||
int shardIntervalArrayLength);
|
||||
extern bool HasUninitializedShardInterval(ShardInterval **sortedShardIntervalArray,
|
||||
|
|
|
@ -71,6 +71,7 @@ extern WorkerNode * WorkerGetRoundRobinCandidateNode(List *workerNodeList,
|
|||
uint32 placementIndex);
|
||||
extern WorkerNode * WorkerGetLocalFirstCandidateNode(List *currentNodeList);
|
||||
extern uint32 ActivePrimaryNonCoordinatorNodeCount(void);
|
||||
extern uint32 ActivePrimaryNodeCount(void);
|
||||
extern List * ActivePrimaryNonCoordinatorNodeList(LOCKMODE lockMode);
|
||||
extern List * ActivePrimaryNodeList(LOCKMODE lockMode);
|
||||
extern bool CoordinatorAddedAsWorkerNode(void);
|
||||
|
|
|
@ -27,7 +27,7 @@ SELECT create_citus_local_table('citus_local_table_1');
|
|||
|
||||
-- try to remove coordinator and observe failure as there exist a citus local table
|
||||
SELECT 1 FROM master_remove_node('localhost', :master_port);
|
||||
ERROR: you cannot remove the primary node of a node group which has shard placements
|
||||
ERROR: cannot remove the primary node of a node group which has shard placements
|
||||
DROP TABLE citus_local_table_1;
|
||||
NOTICE: executing the command locally: DROP TABLE IF EXISTS citus_local_tables_test_schema.citus_local_table_1_xxxxx CASCADE
|
||||
-- this should work now as the citus local table is dropped
|
||||
|
@ -559,7 +559,7 @@ FROM pg_dist_partition WHERE logicalrelid = 'citus_local_table_4'::regclass;
|
|||
(1 row)
|
||||
|
||||
SELECT column_name_to_column('citus_local_table_4', 'a');
|
||||
column_name_to_column
|
||||
column_name_to_column
|
||||
---------------------------------------------------------------------
|
||||
{VAR :varno 1 :varattno 1 :vartype 23 :vartypmod -1 :varcollid 0 :varlevelsup 0 :varnoold 1 :varoattno 1 :location -1}
|
||||
(1 row)
|
||||
|
|
|
@ -124,7 +124,8 @@ ORDER BY placementid;
|
|||
|
||||
-- master_remove_node fails when there are shards on that worker
|
||||
SELECT master_remove_node('localhost', :worker_2_proxy_port);
|
||||
ERROR: you cannot remove the primary node of a node group which has shard placements
|
||||
ERROR: cannot remove the last worker node because there are reference tables and it would cause data loss on reference tables
|
||||
HINT: To proceed, either drop the reference tables or use undistribute_table() function to convert them to local tables
|
||||
-- drop event table and re-run remove
|
||||
DROP TABLE event_table;
|
||||
SELECT master_remove_node('localhost', :worker_2_proxy_port);
|
||||
|
|
|
@ -126,7 +126,7 @@ create_citus_local_table
|
|||
step s2-remove-coordinator: SELECT master_remove_node('localhost', 57636); <waiting ...>
|
||||
step s1-commit: COMMIT;
|
||||
step s2-remove-coordinator: <... completed>
|
||||
error in steps s1-commit s2-remove-coordinator: ERROR: you cannot remove the primary node of a node group which has shard placements
|
||||
error in steps s1-commit s2-remove-coordinator: ERROR: cannot remove the primary node of a node group which has shard placements
|
||||
step s2-commit: COMMIT;
|
||||
master_remove_node
|
||||
|
||||
|
|
|
@ -299,7 +299,7 @@ step s2-commit:
|
|||
COMMIT;
|
||||
|
||||
step s1-remove-node-2: <... completed>
|
||||
error in steps s2-commit s1-remove-node-2: ERROR: you cannot remove the primary node of a node group which has shard placements
|
||||
error in steps s2-commit s1-remove-node-2: ERROR: cannot remove the primary node of a node group which has shard placements
|
||||
step s1-show-placements:
|
||||
SELECT
|
||||
nodename, nodeport
|
||||
|
@ -393,7 +393,7 @@ step s2-commit:
|
|||
COMMIT;
|
||||
|
||||
step s1-remove-node-2: <... completed>
|
||||
error in steps s2-commit s1-remove-node-2: ERROR: you cannot remove the primary node of a node group which has shard placements
|
||||
error in steps s2-commit s1-remove-node-2: ERROR: cannot remove the primary node of a node group which has shard placements
|
||||
step s2-select:
|
||||
SELECT * FROM dist_table;
|
||||
|
||||
|
@ -480,7 +480,7 @@ step s2-commit:
|
|||
COMMIT;
|
||||
|
||||
step s1-remove-node-2: <... completed>
|
||||
error in steps s2-commit s1-remove-node-2: ERROR: you cannot remove the primary node of a node group which has shard placements
|
||||
error in steps s2-commit s1-remove-node-2: ERROR: cannot remove the primary node of a node group which has shard placements
|
||||
step s2-select:
|
||||
SELECT * FROM dist_table;
|
||||
|
||||
|
|
|
@ -117,7 +117,8 @@ SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement WHER
|
|||
|
||||
-- try to remove a node with active placements and see that node removal is failed
|
||||
SELECT master_remove_node('localhost', :worker_2_port);
|
||||
ERROR: you cannot remove the primary node of a node group which has shard placements
|
||||
ERROR: cannot remove the primary node of a node group which has shard placements
|
||||
HINT: To proceed, either drop the distributed tables or use undistribute_table() function to convert them to local tables
|
||||
SELECT master_get_active_worker_nodes();
|
||||
master_get_active_worker_nodes
|
||||
---------------------------------------------------------------------
|
||||
|
@ -256,7 +257,8 @@ DETAIL: distributed objects are only kept in sync when citus.enable_object_prop
|
|||
|
||||
-- try to remove a node with active placements and see that node removal is failed
|
||||
SELECT master_remove_node('localhost', :worker_2_port);
|
||||
ERROR: you cannot remove the primary node of a node group which has shard placements
|
||||
ERROR: cannot remove the primary node of a node group which has shard placements
|
||||
HINT: To proceed, either drop the distributed tables or use undistribute_table() function to convert them to local tables
|
||||
-- mark all placements in the candidate node as inactive
|
||||
SELECT groupid AS worker_2_group FROM pg_dist_node WHERE nodeport=:worker_2_port \gset
|
||||
UPDATE pg_dist_placement SET shardstate=3 WHERE groupid=:worker_2_group;
|
||||
|
@ -275,7 +277,8 @@ SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement WHER
|
|||
|
||||
-- try to remove a node with only inactive placements and see that removal still fails
|
||||
SELECT master_remove_node('localhost', :worker_2_port);
|
||||
ERROR: you cannot remove the primary node of a node group which has shard placements
|
||||
ERROR: cannot remove the primary node of a node group which has shard placements
|
||||
HINT: To proceed, either drop the distributed tables or use undistribute_table() function to convert them to local tables
|
||||
SELECT master_get_active_worker_nodes();
|
||||
master_get_active_worker_nodes
|
||||
---------------------------------------------------------------------
|
||||
|
@ -337,7 +340,8 @@ SELECT logicalrelid, shardid, shardstate, nodename, nodeport FROM pg_dist_shard_
|
|||
|
||||
-- try to remove a node with only to be deleted placements and see that removal still fails
|
||||
SELECT master_remove_node('localhost', :worker_2_port);
|
||||
ERROR: you cannot remove the primary node of a node group which has shard placements
|
||||
ERROR: cannot remove the primary node of a node group which has shard placements
|
||||
HINT: To proceed, either drop the distributed tables or use undistribute_table() function to convert them to local tables
|
||||
SELECT master_get_active_worker_nodes();
|
||||
master_get_active_worker_nodes
|
||||
---------------------------------------------------------------------
|
||||
|
@ -378,7 +382,8 @@ SELECT 1 FROM master_add_node('localhost', 9990, groupid => :new_group, noderole
|
|||
(1 row)
|
||||
|
||||
SELECT master_remove_node('localhost', :worker_2_port);
|
||||
ERROR: you cannot remove the primary node of a node group which has shard placements
|
||||
ERROR: cannot remove the primary node of a node group which has shard placements
|
||||
HINT: To proceed, either drop the distributed tables or use undistribute_table() function to convert them to local tables
|
||||
SELECT master_remove_node('localhost', 9990);
|
||||
master_remove_node
|
||||
---------------------------------------------------------------------
|
||||
|
|
|
@ -199,6 +199,9 @@ WHERE colocationid IN
|
|||
1 | -1 | 0
|
||||
(1 row)
|
||||
|
||||
SELECT master_remove_node('localhost', :worker_1_port);
|
||||
ERROR: cannot remove the last worker node because there are reference tables and it would cause data loss on reference tables
|
||||
HINT: To proceed, either drop the reference tables or use undistribute_table() function to convert them to local tables
|
||||
\c - - - :worker_1_port
|
||||
SELECT COUNT(*) FROM pg_dist_node WHERE nodeport = :worker_2_port;
|
||||
count
|
||||
|
|
|
@ -739,6 +739,12 @@ SELECT create_distributed_function('call_delegation(int)', '$1', 'test');
|
|||
(1 row)
|
||||
|
||||
CALL call_delegation(1);
|
||||
DROP TABLE test CASCADE;
|
||||
NOTICE: drop cascades to view single_node_view
|
||||
-- cannot remove coordinator since a reference table exists on coordinator and no other worker nodes are added
|
||||
SELECT 1 FROM master_remove_node('localhost', :master_port);
|
||||
ERROR: cannot remove the last worker node because there are reference tables and it would cause data loss on reference tables
|
||||
HINT: To proceed, either drop the reference tables or use undistribute_table() function to convert them to local tables
|
||||
-- Cleanup
|
||||
SET client_min_messages TO WARNING;
|
||||
DROP SCHEMA single_node CASCADE;
|
||||
|
|
|
@ -104,6 +104,8 @@ WHERE colocationid IN
|
|||
FROM pg_dist_partition
|
||||
WHERE logicalrelid = 'remove_node_reference_table'::regclass);
|
||||
|
||||
SELECT master_remove_node('localhost', :worker_1_port);
|
||||
|
||||
\c - - - :worker_1_port
|
||||
|
||||
SELECT COUNT(*) FROM pg_dist_node WHERE nodeport = :worker_2_port;
|
||||
|
|
|
@ -361,6 +361,9 @@ END;$$;
|
|||
SELECT * FROM pg_dist_node;
|
||||
SELECT create_distributed_function('call_delegation(int)', '$1', 'test');
|
||||
CALL call_delegation(1);
|
||||
DROP TABLE test CASCADE;
|
||||
-- cannot remove coordinator since a reference table exists on coordinator and no other worker nodes are added
|
||||
SELECT 1 FROM master_remove_node('localhost', :master_port);
|
||||
|
||||
-- Cleanup
|
||||
SET client_min_messages TO WARNING;
|
||||
|
|
Loading…
Reference in New Issue