Merge pull request #5391 from citusdata/fix_multi_cluster_management_with_mx

Fix multi cluster management with metadata syncing enabled
pull/5433/head
Halil Ozan Akgül 2021-11-04 12:01:24 +03:00 committed by GitHub
commit 9bfff4ba8d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 258 additions and 52 deletions

View File

@ -838,7 +838,7 @@ ActivateNode(char *nodeName, int nodePort)
SetUpDistributedTableDependencies(newWorkerNode);
if (EnableMetadataSyncByDefault)
if (EnableMetadataSyncByDefault && NodeIsPrimary(newWorkerNode))
{
StartMetadataSyncToNode(nodeName, nodePort);
}

View File

@ -1,6 +1,4 @@
test: turn_mx_off
test: multi_cluster_management
test: turn_mx_on
test: multi_test_helpers multi_test_helpers_superuser columnar_test_helpers
test: multi_test_catalog_views

View File

@ -4,6 +4,7 @@ SET client_min_messages TO DEBUG;
SET citus.next_shard_id TO 1570000;
SET citus.replicate_reference_tables_on_activate TO off;
SELECT 1 FROM master_add_node('localhost', :master_port, groupid := 0);
NOTICE: localhost:xxxxx is the coordinator and already contains metadata, skipping syncing the metadata
?column?
---------------------------------------------------------------------
1

View File

@ -184,20 +184,21 @@ NOTICE: Replicating reference table "test_reference_table" to the node localhos
3
(1 row)
DROP TABLE test_reference_table;
SELECT master_disable_node('localhost', :worker_2_port);
NOTICE: Node localhost:xxxxx has active shard placements. Some queries may fail after this operation. Use SELECT citus_activate_node('localhost', 57638) to activate this node back.
master_disable_node
---------------------------------------------------------------------
(1 row)
DROP TABLE test_reference_table, cluster_management_test;
-- create users like this so results of community and enterprise are same
SET citus.enable_object_propagation TO ON;
SET client_min_messages TO ERROR;
CREATE USER non_super_user;
NOTICE: not propagating CREATE ROLE/USER commands to worker nodes
HINT: Connect to worker nodes directly to manually create all necessary users and roles.
CREATE USER node_metadata_user;
NOTICE: not propagating CREATE ROLE/USER commands to worker nodes
HINT: Connect to worker nodes directly to manually create all necessary users and roles.
SELECT 1 FROM run_command_on_workers('CREATE USER node_metadata_user');
?column?
---------------------------------------------------------------------
1
1
(2 rows)
RESET client_min_messages;
SET citus.enable_object_propagation TO OFF;
GRANT EXECUTE ON FUNCTION master_activate_node(text,int) TO node_metadata_user;
GRANT EXECUTE ON FUNCTION master_add_inactive_node(text,int,int,noderole,name) TO node_metadata_user;
GRANT EXECUTE ON FUNCTION master_add_node(text,int,int,noderole,name) TO node_metadata_user;
@ -205,6 +206,27 @@ GRANT EXECUTE ON FUNCTION master_add_secondary_node(text,int,text,int,name) TO n
GRANT EXECUTE ON FUNCTION master_disable_node(text,int) TO node_metadata_user;
GRANT EXECUTE ON FUNCTION master_remove_node(text,int) TO node_metadata_user;
GRANT EXECUTE ON FUNCTION master_update_node(int,text,int,bool,int) TO node_metadata_user;
-- user needs permission for the pg_dist_node and pg_dist_local_group for metadata syncing
SELECT run_command_on_workers('GRANT ALL ON pg_dist_node TO node_metadata_user');
run_command_on_workers
---------------------------------------------------------------------
(localhost,57637,t,GRANT)
(localhost,57638,t,GRANT)
(2 rows)
SELECT run_command_on_workers('GRANT ALL ON pg_dist_local_group TO node_metadata_user');
run_command_on_workers
---------------------------------------------------------------------
(localhost,57637,t,GRANT)
(localhost,57638,t,GRANT)
(2 rows)
SELECT master_remove_node('localhost', :worker_2_port);
master_remove_node
---------------------------------------------------------------------
(1 row)
-- Removing public schema from pg_dist_object because it breaks the next tests
DELETE FROM citus.pg_dist_object WHERE objid = 'public'::regnamespace::oid;
-- try to manipulate node metadata via non-super user
@ -227,13 +249,13 @@ ERROR: permission denied for function master_update_node
SET ROLE node_metadata_user;
SET citus.enable_object_propagation TO off; -- prevent master activate node to actually connect for this test
BEGIN;
SELECT 1 FROM master_add_inactive_node('localhost', :worker_2_port + 1);
SELECT 1 FROM master_add_inactive_node('localhost', :worker_2_port);
?column?
---------------------------------------------------------------------
1
(1 row)
SELECT 1 FROM master_activate_node('localhost', :worker_2_port + 1);
SELECT 1 FROM master_activate_node('localhost', :worker_2_port);
WARNING: citus.enable_object_propagation is off, not creating distributed objects on worker
DETAIL: distributed objects are only kept in sync when citus.enable_object_propagation is set to on. Newly activated nodes will not get these objects created
?column?
@ -241,19 +263,19 @@ DETAIL: distributed objects are only kept in sync when citus.enable_object_prop
1
(1 row)
SELECT 1 FROM master_disable_node('localhost', :worker_2_port + 1);
SELECT 1 FROM master_disable_node('localhost', :worker_2_port);
?column?
---------------------------------------------------------------------
1
(1 row)
SELECT 1 FROM master_remove_node('localhost', :worker_2_port + 1);
SELECT 1 FROM master_remove_node('localhost', :worker_2_port);
?column?
---------------------------------------------------------------------
1
(1 row)
SELECT 1 FROM master_add_node('localhost', :worker_2_port + 1);
SELECT 1 FROM master_add_node('localhost', :worker_2_port);
WARNING: citus.enable_object_propagation is off, not creating distributed objects on worker
DETAIL: distributed objects are only kept in sync when citus.enable_object_propagation is set to on. Newly activated nodes will not get these objects created
?column?
@ -277,15 +299,15 @@ SELECT nodename, nodeport, noderole FROM pg_dist_node ORDER BY nodeport;
nodename | nodeport | noderole
---------------------------------------------------------------------
localhost | 57637 | primary
localhost | 57639 | primary
localhost | 57640 | secondary
localhost | 57641 | primary
(4 rows)
(3 rows)
ABORT;
\c - postgres - :master_port
SET citus.next_shard_id TO 1220016;
SET citus.next_shard_id TO 1220000;
SET citus.enable_object_propagation TO off; -- prevent object propagation on add node during setup
SET citus.shard_count TO 16;
SET citus.shard_replication_factor TO 1;
SELECT master_get_active_worker_nodes();
master_get_active_worker_nodes
@ -294,12 +316,21 @@ SELECT master_get_active_worker_nodes();
(1 row)
-- restore the node for next tests
SELECT * FROM master_activate_node('localhost', :worker_2_port);
SELECT * FROM master_add_node('localhost', :worker_2_port);
WARNING: citus.enable_object_propagation is off, not creating distributed objects on worker
DETAIL: distributed objects are only kept in sync when citus.enable_object_propagation is set to on. Newly activated nodes will not get these objects created
master_activate_node
master_add_node
---------------------------------------------------------------------
3
7
(1 row)
ALTER SEQUENCE pg_dist_node_nodeid_seq RESTART WITH 7;
ALTER SEQUENCE pg_dist_groupid_seq RESTART WITH 6;
CREATE TABLE cluster_management_test (col_1 text, col_2 int);
SELECT create_distributed_table('cluster_management_test', 'col_1', 'hash');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- try to remove a node with active placements and see that node removal is failed
@ -309,6 +340,14 @@ HINT: To proceed, either drop the distributed tables or use undistribute_table(
-- mark all placements in the candidate node as inactive
SELECT groupid AS worker_2_group FROM pg_dist_node WHERE nodeport=:worker_2_port \gset
UPDATE pg_dist_placement SET shardstate=3 WHERE groupid=:worker_2_group;
-- manual updates to pg_dist* tables are not automatically reflected to the workers, so we manually do that too
SELECT run_command_on_workers('UPDATE pg_dist_placement SET shardstate=3 WHERE groupid=' || :'worker_2_group');
run_command_on_workers
---------------------------------------------------------------------
(localhost,57637,t,"UPDATE 8")
(localhost,57638,t,"UPDATE 8")
(2 rows)
SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement WHERE nodeport=:worker_2_port;
shardid | shardstate | nodename | nodeport
---------------------------------------------------------------------
@ -335,6 +374,13 @@ SELECT master_get_active_worker_nodes();
-- mark all placements in the candidate node as to be deleted
UPDATE pg_dist_placement SET shardstate=4 WHERE groupid=:worker_2_group;
SELECT run_command_on_workers('UPDATE pg_dist_placement SET shardstate=4 WHERE groupid=' || :'worker_2_group');
run_command_on_workers
---------------------------------------------------------------------
(localhost,57637,t,"UPDATE 8")
(localhost,57638,t,"UPDATE 8")
(2 rows)
SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement WHERE nodeport=:worker_2_port;
shardid | shardstate | nodename | nodeport
---------------------------------------------------------------------
@ -395,6 +441,13 @@ SELECT logicalrelid, shardid, shardstate, nodename, nodeport FROM pg_dist_shard_
(24 rows)
SELECT * INTO removed_placements FROM pg_dist_placement WHERE shardstate = 4;
SELECT run_command_on_workers('SELECT * INTO removed_placements FROM pg_dist_placement WHERE shardstate = 4');
run_command_on_workers
---------------------------------------------------------------------
(localhost,57637,t,"SELECT 8")
(localhost,57638,t,"SELECT 8")
(2 rows)
-- try to remove a node with only to be deleted placements and see that removal succeeds
SELECT master_remove_node('localhost', :worker_2_port);
master_remove_node
@ -411,6 +464,22 @@ SELECT master_get_active_worker_nodes();
SELECT master_add_node('localhost', :worker_2_port, groupId := :worker_2_group);
WARNING: citus.enable_object_propagation is off, not creating distributed objects on worker
DETAIL: distributed objects are only kept in sync when citus.enable_object_propagation is set to on. Newly activated nodes will not get these objects created
WARNING: could not find any shard placements for shardId 1220001
WARNING: could not find any shard placements for shardId 1220003
WARNING: could not find any shard placements for shardId 1220005
WARNING: could not find any shard placements for shardId 1220007
WARNING: could not find any shard placements for shardId 1220009
WARNING: could not find any shard placements for shardId 1220011
WARNING: could not find any shard placements for shardId 1220013
WARNING: could not find any shard placements for shardId 1220015
WARNING: could not find any shard placements for shardId 1220017
WARNING: could not find any shard placements for shardId 1220019
WARNING: could not find any shard placements for shardId 1220021
WARNING: could not find any shard placements for shardId 1220023
WARNING: could not find any shard placements for shardId 1220025
WARNING: could not find any shard placements for shardId 1220027
WARNING: could not find any shard placements for shardId 1220029
WARNING: could not find any shard placements for shardId 1220031
master_add_node
---------------------------------------------------------------------
7
@ -418,6 +487,21 @@ DETAIL: distributed objects are only kept in sync when citus.enable_object_prop
-- put removed placements back for testing purposes(in practice we wouldn't have only old placements for a shard)
INSERT INTO pg_dist_placement SELECT * FROM removed_placements;
SELECT run_command_on_workers('INSERT INTO pg_dist_placement SELECT * FROM removed_placements');
run_command_on_workers
---------------------------------------------------------------------
(localhost,57637,f,"ERROR: duplicate key value violates unique constraint ""pg_dist_placement_placementid_index""")
(localhost,57638,t,"INSERT 0 8")
(2 rows)
DROP TABLE removed_placements;
SELECT run_command_on_workers('DROP TABLE removed_placements');
run_command_on_workers
---------------------------------------------------------------------
(localhost,57637,t,"DROP TABLE")
(localhost,57638,t,"DROP TABLE")
(2 rows)
-- clean-up
SELECT 1 FROM master_add_node('localhost', :worker_2_port);
?column?
@ -426,23 +510,60 @@ SELECT 1 FROM master_add_node('localhost', :worker_2_port);
(1 row)
UPDATE pg_dist_placement SET shardstate=1 WHERE groupid=:worker_2_group;
SELECT run_command_on_workers('UPDATE pg_dist_placement SET shardstate=1 WHERE groupid=' || :'worker_2_group');
run_command_on_workers
---------------------------------------------------------------------
(localhost,57637,t,"UPDATE 8")
(localhost,57638,t,"UPDATE 8")
(2 rows)
SET client_min_messages TO ERROR;
DROP TABLE cluster_management_test_colocated;
RESET client_min_messages;
-- when there is no primary we should get a pretty error
UPDATE pg_dist_node SET noderole = 'secondary' WHERE nodeport=:worker_2_port;
SELECT * FROM cluster_management_test;
ERROR: node group 3 does not have a primary node
ERROR: node group 6 does not have a primary node
-- when there is no node at all in the group we should get a different error
DELETE FROM pg_dist_node WHERE nodeport=:worker_2_port;
SELECT run_command_on_workers('DELETE FROM pg_dist_node WHERE nodeport=' || :'worker_2_port');
run_command_on_workers
---------------------------------------------------------------------
(localhost,57637,t,"DELETE 1")
(1 row)
SELECT * FROM cluster_management_test;
ERROR: there is a shard placement in node group 3 but there are no nodes in that group
ERROR: there is a shard placement in node group 6 but there are no nodes in that group
-- clean-up
SELECT * INTO old_placements FROM pg_dist_placement WHERE groupid = :worker_2_group;
DELETE FROM pg_dist_placement WHERE groupid = :worker_2_group;
SELECT master_add_node('localhost', :worker_2_port) AS new_node \gset
WARNING: citus.enable_object_propagation is off, not creating distributed objects on worker
DETAIL: distributed objects are only kept in sync when citus.enable_object_propagation is set to on. Newly activated nodes will not get these objects created
WARNING: could not find any shard placements for shardId 1220001
WARNING: could not find any shard placements for shardId 1220003
WARNING: could not find any shard placements for shardId 1220005
WARNING: could not find any shard placements for shardId 1220007
WARNING: could not find any shard placements for shardId 1220009
WARNING: could not find any shard placements for shardId 1220011
WARNING: could not find any shard placements for shardId 1220013
WARNING: could not find any shard placements for shardId 1220015
INSERT INTO pg_dist_placement SELECT * FROM old_placements;
SELECT groupid AS new_group FROM pg_dist_node WHERE nodeid = :new_node \gset
UPDATE pg_dist_placement SET groupid = :new_group WHERE groupid = :worker_2_group;
SELECT run_command_on_workers('UPDATE pg_dist_placement SET groupid = ' || :'new_group' || ' WHERE groupid = ' || :'worker_2_group');
run_command_on_workers
---------------------------------------------------------------------
(localhost,57637,t,"UPDATE 8")
(localhost,57638,t,"UPDATE 0")
(2 rows)
SELECT start_metadata_sync_to_node('localhost', :worker_2_port);
start_metadata_sync_to_node
---------------------------------------------------------------------
(1 row)
-- test that you are allowed to remove secondary nodes even if there are placements
SELECT 1 FROM master_add_node('localhost', 9990, groupid => :new_group, noderole => 'secondary');
?column?
@ -559,8 +680,8 @@ DETAIL: distributed objects are only kept in sync when citus.enable_object_prop
SELECT * FROM pg_dist_node ORDER BY nodeid;
nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards
---------------------------------------------------------------------
12 | 9 | localhost | 57637 | default | f | t | primary | default | f | t
13 | 10 | localhost | 57638 | default | f | t | primary | default | f | t
12 | 9 | localhost | 57637 | default | t | t | primary | default | t | t
13 | 10 | localhost | 57638 | default | t | t | primary | default | t | t
(2 rows)
-- check that mixed add/remove node commands work fine inside transaction
@ -882,6 +1003,15 @@ SELECT * FROM pg_dist_node WHERE nodeid = :worker_1_node;
17 | 14 | localhost | 57637 | default | f | t | primary | default | f | t
(1 row)
SET client_min_messages TO ERROR;
SELECT start_metadata_sync_to_node(nodename, nodeport) FROM pg_dist_node WHERE isactive = 't' and noderole = 'primary';
start_metadata_sync_to_node
---------------------------------------------------------------------
(2 rows)
RESET client_min_messages;
SET citus.shard_replication_factor TO 1;
CREATE TABLE test_dist (x int, y int);
SELECT create_distributed_table('test_dist', 'x');
@ -1070,3 +1200,10 @@ WHERE logicalrelid = 'test_dist_non_colocated'::regclass GROUP BY nodeport ORDER
SELECT * from master_set_node_property('localhost', :worker_2_port, 'bogusproperty', false);
ERROR: only the 'shouldhaveshards' property can be set using this function
DROP TABLE test_dist, test_ref, test_dist_colocated, test_dist_non_colocated;
-- verify that at the end of this file, all primary nodes have metadata synced
SELECT bool_and(hasmetadata) AND bool_and(metadatasynced) FROM pg_dist_node WHERE isactive = 't' and noderole = 'primary';
?column?
---------------------------------------------------------------------
t
(1 row)

View File

@ -1108,17 +1108,17 @@ SELECT * FROM public.table_placements_per_node;
(4 rows)
CALL citus_cleanup_orphaned_shards();
select * from pg_dist_placement;
select * from pg_dist_placement ORDER BY placementid;
placementid | shardid | shardstate | shardlength | groupid
---------------------------------------------------------------------
135 | 123023 | 1 | 0 | 14
138 | 123024 | 1 | 0 | 14
141 | 123027 | 1 | 0 | 14
142 | 123028 | 1 | 0 | 14
143 | 123021 | 1 | 0 | 16
144 | 123025 | 1 | 0 | 16
145 | 123022 | 1 | 0 | 16
146 | 123026 | 1 | 0 | 16
151 | 123023 | 1 | 0 | 14
154 | 123024 | 1 | 0 | 14
157 | 123027 | 1 | 0 | 14
158 | 123028 | 1 | 0 | 14
159 | 123021 | 1 | 0 | 16
160 | 123025 | 1 | 0 | 16
161 | 123022 | 1 | 0 | 16
162 | 123026 | 1 | 0 | 16
(8 rows)
-- Move all shards to worker1 again

View File

@ -0,0 +1,16 @@
ALTER SYSTEM SET citus.enable_metadata_sync_by_default TO ON;
SELECT pg_reload_conf();
pg_reload_conf
---------------------------------------------------------------------
t
(1 row)
SET client_min_messages TO ERROR;
SELECT start_metadata_sync_to_node(nodename, nodeport) FROM pg_dist_node WHERE isactive = 't' and noderole = 'primary';
start_metadata_sync_to_node
---------------------------------------------------------------------
(3 rows)

View File

@ -0,0 +1,13 @@
ALTER SYSTEM SET citus.enable_metadata_sync_by_default TO ON;
SELECT pg_reload_conf();
pg_reload_conf
---------------------------------------------------------------------
t
(1 row)
SET client_min_messages TO ERROR;
SELECT start_metadata_sync_to_node(nodename, nodeport) FROM pg_dist_node WHERE isactive = 't' and noderole = 'primary';
start_metadata_sync_to_node
---------------------------------------------------------------------
(0 rows)

View File

@ -19,7 +19,9 @@ test: turn_mx_off
test: multi_extension
test: single_node
test: single_node_truncate
test: turn_mx_on
test: multi_cluster_management
test: turn_mx_off
# below tests are placed right after multi_cluster_management as we do
# remove/add node operations and we do not want any preexisting objects

View File

@ -17,6 +17,7 @@ test: turn_mx_off
test: multi_extension
test: multi_test_helpers multi_test_helpers_superuser
test: multi_mx_node_metadata
test: turn_mx_on
test: multi_cluster_management
test: multi_mx_function_table_reference
test: multi_test_catalog_views

View File

@ -1,6 +1,4 @@
test: turn_mx_off
test: multi_cluster_management
test: turn_mx_on
test: multi_test_helpers multi_test_helpers_superuser multi_create_fdw
test: multi_test_catalog_views

View File

@ -1,5 +1,5 @@
test: turn_mx_off
test: multi_cluster_management
test: turn_mx_off
test: multi_test_helpers multi_test_helpers_superuser
test: multi_test_catalog_views
test: shard_rebalancer_unit

View File

@ -76,11 +76,16 @@ SELECT master_disable_node('localhost.noexist', 2345);
-- drop the table without leaving a shard placement behind (messes up other tests)
SELECT master_activate_node('localhost', :worker_2_port);
DROP TABLE test_reference_table;
SELECT master_disable_node('localhost', :worker_2_port);
DROP TABLE test_reference_table, cluster_management_test;
-- create users like this so results of community and enterprise are same
SET citus.enable_object_propagation TO ON;
SET client_min_messages TO ERROR;
CREATE USER non_super_user;
CREATE USER node_metadata_user;
SELECT 1 FROM run_command_on_workers('CREATE USER node_metadata_user');
RESET client_min_messages;
SET citus.enable_object_propagation TO OFF;
GRANT EXECUTE ON FUNCTION master_activate_node(text,int) TO node_metadata_user;
GRANT EXECUTE ON FUNCTION master_add_inactive_node(text,int,int,noderole,name) TO node_metadata_user;
GRANT EXECUTE ON FUNCTION master_add_node(text,int,int,noderole,name) TO node_metadata_user;
@ -89,6 +94,12 @@ GRANT EXECUTE ON FUNCTION master_disable_node(text,int) TO node_metadata_user;
GRANT EXECUTE ON FUNCTION master_remove_node(text,int) TO node_metadata_user;
GRANT EXECUTE ON FUNCTION master_update_node(int,text,int,bool,int) TO node_metadata_user;
-- user needs permission for the pg_dist_node and pg_dist_local_group for metadata syncing
SELECT run_command_on_workers('GRANT ALL ON pg_dist_node TO node_metadata_user');
SELECT run_command_on_workers('GRANT ALL ON pg_dist_local_group TO node_metadata_user');
SELECT master_remove_node('localhost', :worker_2_port);
-- Removing public schema from pg_dist_object because it breaks the next tests
DELETE FROM citus.pg_dist_object WHERE objid = 'public'::regnamespace::oid;
@ -106,24 +117,31 @@ SELECT master_update_node(nodeid, 'localhost', :worker_2_port + 3) FROM pg_dist_
SET ROLE node_metadata_user;
SET citus.enable_object_propagation TO off; -- prevent master activate node to actually connect for this test
BEGIN;
SELECT 1 FROM master_add_inactive_node('localhost', :worker_2_port + 1);
SELECT 1 FROM master_activate_node('localhost', :worker_2_port + 1);
SELECT 1 FROM master_disable_node('localhost', :worker_2_port + 1);
SELECT 1 FROM master_remove_node('localhost', :worker_2_port + 1);
SELECT 1 FROM master_add_node('localhost', :worker_2_port + 1);
SELECT 1 FROM master_add_inactive_node('localhost', :worker_2_port);
SELECT 1 FROM master_activate_node('localhost', :worker_2_port);
SELECT 1 FROM master_disable_node('localhost', :worker_2_port);
SELECT 1 FROM master_remove_node('localhost', :worker_2_port);
SELECT 1 FROM master_add_node('localhost', :worker_2_port);
SELECT 1 FROM master_add_secondary_node('localhost', :worker_2_port + 2, 'localhost', :worker_2_port);
SELECT master_update_node(nodeid, 'localhost', :worker_2_port + 3) FROM pg_dist_node WHERE nodeport = :worker_2_port;
SELECT nodename, nodeport, noderole FROM pg_dist_node ORDER BY nodeport;
ABORT;
\c - postgres - :master_port
SET citus.next_shard_id TO 1220016;
SET citus.next_shard_id TO 1220000;
SET citus.enable_object_propagation TO off; -- prevent object propagation on add node during setup
SET citus.shard_count TO 16;
SET citus.shard_replication_factor TO 1;
SELECT master_get_active_worker_nodes();
-- restore the node for next tests
SELECT * FROM master_activate_node('localhost', :worker_2_port);
SELECT * FROM master_add_node('localhost', :worker_2_port);
ALTER SEQUENCE pg_dist_node_nodeid_seq RESTART WITH 7;
ALTER SEQUENCE pg_dist_groupid_seq RESTART WITH 6;
CREATE TABLE cluster_management_test (col_1 text, col_2 int);
SELECT create_distributed_table('cluster_management_test', 'col_1', 'hash');
-- try to remove a node with active placements and see that node removal is failed
SELECT master_remove_node('localhost', :worker_2_port);
@ -131,6 +149,8 @@ SELECT master_remove_node('localhost', :worker_2_port);
-- mark all placements in the candidate node as inactive
SELECT groupid AS worker_2_group FROM pg_dist_node WHERE nodeport=:worker_2_port \gset
UPDATE pg_dist_placement SET shardstate=3 WHERE groupid=:worker_2_group;
-- manual updates to pg_dist* tables are not automatically reflected to the workers, so we manually do that too
SELECT run_command_on_workers('UPDATE pg_dist_placement SET shardstate=3 WHERE groupid=' || :'worker_2_group');
SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement WHERE nodeport=:worker_2_port;
-- try to remove a node with only inactive placements and see that removal still fails
@ -139,6 +159,7 @@ SELECT master_get_active_worker_nodes();
-- mark all placements in the candidate node as to be deleted
UPDATE pg_dist_placement SET shardstate=4 WHERE groupid=:worker_2_group;
SELECT run_command_on_workers('UPDATE pg_dist_placement SET shardstate=4 WHERE groupid=' || :'worker_2_group');
SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement WHERE nodeport=:worker_2_port;
CREATE TABLE cluster_management_test_colocated (col_1 text, col_2 int);
-- Check that we warn the user about colocated shards that will not get created for shards that do not have active placements
@ -148,6 +169,7 @@ SELECT create_distributed_table('cluster_management_test_colocated', 'col_1', 'h
SELECT logicalrelid, shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement NATURAL JOIN pg_dist_shard ORDER BY shardstate, shardid;
SELECT * INTO removed_placements FROM pg_dist_placement WHERE shardstate = 4;
SELECT run_command_on_workers('SELECT * INTO removed_placements FROM pg_dist_placement WHERE shardstate = 4');
-- try to remove a node with only to be deleted placements and see that removal succeeds
SELECT master_remove_node('localhost', :worker_2_port);
SELECT master_get_active_worker_nodes();
@ -155,10 +177,15 @@ SELECT master_get_active_worker_nodes();
SELECT master_add_node('localhost', :worker_2_port, groupId := :worker_2_group);
-- put removed placements back for testing purposes(in practice we wouldn't have only old placements for a shard)
INSERT INTO pg_dist_placement SELECT * FROM removed_placements;
SELECT run_command_on_workers('INSERT INTO pg_dist_placement SELECT * FROM removed_placements');
DROP TABLE removed_placements;
SELECT run_command_on_workers('DROP TABLE removed_placements');
-- clean-up
SELECT 1 FROM master_add_node('localhost', :worker_2_port);
UPDATE pg_dist_placement SET shardstate=1 WHERE groupid=:worker_2_group;
SELECT run_command_on_workers('UPDATE pg_dist_placement SET shardstate=1 WHERE groupid=' || :'worker_2_group');
SET client_min_messages TO ERROR;
DROP TABLE cluster_management_test_colocated;
RESET client_min_messages;
@ -169,12 +196,18 @@ SELECT * FROM cluster_management_test;
-- when there is no node at all in the group we should get a different error
DELETE FROM pg_dist_node WHERE nodeport=:worker_2_port;
SELECT run_command_on_workers('DELETE FROM pg_dist_node WHERE nodeport=' || :'worker_2_port');
SELECT * FROM cluster_management_test;
-- clean-up
SELECT * INTO old_placements FROM pg_dist_placement WHERE groupid = :worker_2_group;
DELETE FROM pg_dist_placement WHERE groupid = :worker_2_group;
SELECT master_add_node('localhost', :worker_2_port) AS new_node \gset
INSERT INTO pg_dist_placement SELECT * FROM old_placements;
SELECT groupid AS new_group FROM pg_dist_node WHERE nodeid = :new_node \gset
UPDATE pg_dist_placement SET groupid = :new_group WHERE groupid = :worker_2_group;
SELECT run_command_on_workers('UPDATE pg_dist_placement SET groupid = ' || :'new_group' || ' WHERE groupid = ' || :'worker_2_group');
SELECT start_metadata_sync_to_node('localhost', :worker_2_port);
-- test that you are allowed to remove secondary nodes even if there are placements
SELECT 1 FROM master_add_node('localhost', 9990, groupid => :new_group, noderole => 'secondary');
@ -238,6 +271,7 @@ SELECT nodename, nodeport FROM pg_dist_node WHERE nodename='localhost' AND nodep
\c - - - :worker_1_port
SELECT nodename, nodeport FROM pg_dist_node WHERE nodename='localhost' AND nodeport=:worker_2_port;
\c - - - :master_port
SET citus.enable_object_propagation TO off; -- prevent object propagation on add node during setup
SELECT master_remove_node(nodename, nodeport) FROM pg_dist_node;
@ -350,6 +384,9 @@ SELECT * FROM pg_dist_node WHERE nodeid = :worker_1_node;
SELECT master_update_node(:worker_1_node, 'localhost', :worker_1_port);
SELECT * FROM pg_dist_node WHERE nodeid = :worker_1_node;
SET client_min_messages TO ERROR;
SELECT start_metadata_sync_to_node(nodename, nodeport) FROM pg_dist_node WHERE isactive = 't' and noderole = 'primary';
RESET client_min_messages;
SET citus.shard_replication_factor TO 1;
@ -442,3 +479,6 @@ WHERE logicalrelid = 'test_dist_non_colocated'::regclass GROUP BY nodeport ORDER
SELECT * from master_set_node_property('localhost', :worker_2_port, 'bogusproperty', false);
DROP TABLE test_dist, test_ref, test_dist_colocated, test_dist_non_colocated;
-- verify that at the end of this file, all primary nodes have metadata synced
SELECT bool_and(hasmetadata) AND bool_and(metadatasynced) FROM pg_dist_node WHERE isactive = 't' and noderole = 'primary';

View File

@ -709,7 +709,7 @@ SELECT * FROM get_rebalance_progress();
SELECT * FROM public.table_placements_per_node;
CALL citus_cleanup_orphaned_shards();
select * from pg_dist_placement;
select * from pg_dist_placement ORDER BY placementid;
-- Move all shards to worker1 again