Fix multi_cluster_management fails for metadata syncing

pull/5391/head
Halil Ozan Akgul 2021-10-19 09:25:30 +03:00
parent 19f28eabae
commit 91b377490b
13 changed files with 258 additions and 52 deletions

View File

@ -838,7 +838,7 @@ ActivateNode(char *nodeName, int nodePort)
SetUpDistributedTableDependencies(newWorkerNode); SetUpDistributedTableDependencies(newWorkerNode);
if (EnableMetadataSyncByDefault) if (EnableMetadataSyncByDefault && NodeIsPrimary(newWorkerNode))
{ {
StartMetadataSyncToNode(nodeName, nodePort); StartMetadataSyncToNode(nodeName, nodePort);
} }

View File

@ -1,6 +1,4 @@
test: turn_mx_off
test: multi_cluster_management test: multi_cluster_management
test: turn_mx_on
test: multi_test_helpers multi_test_helpers_superuser columnar_test_helpers test: multi_test_helpers multi_test_helpers_superuser columnar_test_helpers
test: multi_test_catalog_views test: multi_test_catalog_views

View File

@ -4,6 +4,7 @@ SET client_min_messages TO DEBUG;
SET citus.next_shard_id TO 1570000; SET citus.next_shard_id TO 1570000;
SET citus.replicate_reference_tables_on_activate TO off; SET citus.replicate_reference_tables_on_activate TO off;
SELECT 1 FROM master_add_node('localhost', :master_port, groupid := 0); SELECT 1 FROM master_add_node('localhost', :master_port, groupid := 0);
NOTICE: localhost:xxxxx is the coordinator and already contains metadata, skipping syncing the metadata
?column? ?column?
--------------------------------------------------------------------- ---------------------------------------------------------------------
1 1

View File

@ -184,20 +184,21 @@ NOTICE: Replicating reference table "test_reference_table" to the node localhos
3 3
(1 row) (1 row)
DROP TABLE test_reference_table; DROP TABLE test_reference_table, cluster_management_test;
SELECT master_disable_node('localhost', :worker_2_port); -- create users like this so results of community and enterprise are same
NOTICE: Node localhost:xxxxx has active shard placements. Some queries may fail after this operation. Use SELECT citus_activate_node('localhost', 57638) to activate this node back. SET citus.enable_object_propagation TO ON;
master_disable_node SET client_min_messages TO ERROR;
---------------------------------------------------------------------
(1 row)
CREATE USER non_super_user; CREATE USER non_super_user;
NOTICE: not propagating CREATE ROLE/USER commands to worker nodes
HINT: Connect to worker nodes directly to manually create all necessary users and roles.
CREATE USER node_metadata_user; CREATE USER node_metadata_user;
NOTICE: not propagating CREATE ROLE/USER commands to worker nodes SELECT 1 FROM run_command_on_workers('CREATE USER node_metadata_user');
HINT: Connect to worker nodes directly to manually create all necessary users and roles. ?column?
---------------------------------------------------------------------
1
1
(2 rows)
RESET client_min_messages;
SET citus.enable_object_propagation TO OFF;
GRANT EXECUTE ON FUNCTION master_activate_node(text,int) TO node_metadata_user; GRANT EXECUTE ON FUNCTION master_activate_node(text,int) TO node_metadata_user;
GRANT EXECUTE ON FUNCTION master_add_inactive_node(text,int,int,noderole,name) TO node_metadata_user; GRANT EXECUTE ON FUNCTION master_add_inactive_node(text,int,int,noderole,name) TO node_metadata_user;
GRANT EXECUTE ON FUNCTION master_add_node(text,int,int,noderole,name) TO node_metadata_user; GRANT EXECUTE ON FUNCTION master_add_node(text,int,int,noderole,name) TO node_metadata_user;
@ -205,6 +206,27 @@ GRANT EXECUTE ON FUNCTION master_add_secondary_node(text,int,text,int,name) TO n
GRANT EXECUTE ON FUNCTION master_disable_node(text,int) TO node_metadata_user; GRANT EXECUTE ON FUNCTION master_disable_node(text,int) TO node_metadata_user;
GRANT EXECUTE ON FUNCTION master_remove_node(text,int) TO node_metadata_user; GRANT EXECUTE ON FUNCTION master_remove_node(text,int) TO node_metadata_user;
GRANT EXECUTE ON FUNCTION master_update_node(int,text,int,bool,int) TO node_metadata_user; GRANT EXECUTE ON FUNCTION master_update_node(int,text,int,bool,int) TO node_metadata_user;
-- user needs permission for the pg_dist_node and pg_dist_local_group for metadata syncing
SELECT run_command_on_workers('GRANT ALL ON pg_dist_node TO node_metadata_user');
run_command_on_workers
---------------------------------------------------------------------
(localhost,57637,t,GRANT)
(localhost,57638,t,GRANT)
(2 rows)
SELECT run_command_on_workers('GRANT ALL ON pg_dist_local_group TO node_metadata_user');
run_command_on_workers
---------------------------------------------------------------------
(localhost,57637,t,GRANT)
(localhost,57638,t,GRANT)
(2 rows)
SELECT master_remove_node('localhost', :worker_2_port);
master_remove_node
---------------------------------------------------------------------
(1 row)
-- Removing public schema from pg_dist_object because it breaks the next tests -- Removing public schema from pg_dist_object because it breaks the next tests
DELETE FROM citus.pg_dist_object WHERE objid = 'public'::regnamespace::oid; DELETE FROM citus.pg_dist_object WHERE objid = 'public'::regnamespace::oid;
-- try to manipulate node metadata via non-super user -- try to manipulate node metadata via non-super user
@ -227,13 +249,13 @@ ERROR: permission denied for function master_update_node
SET ROLE node_metadata_user; SET ROLE node_metadata_user;
SET citus.enable_object_propagation TO off; -- prevent master activate node to actually connect for this test SET citus.enable_object_propagation TO off; -- prevent master activate node to actually connect for this test
BEGIN; BEGIN;
SELECT 1 FROM master_add_inactive_node('localhost', :worker_2_port + 1); SELECT 1 FROM master_add_inactive_node('localhost', :worker_2_port);
?column? ?column?
--------------------------------------------------------------------- ---------------------------------------------------------------------
1 1
(1 row) (1 row)
SELECT 1 FROM master_activate_node('localhost', :worker_2_port + 1); SELECT 1 FROM master_activate_node('localhost', :worker_2_port);
WARNING: citus.enable_object_propagation is off, not creating distributed objects on worker WARNING: citus.enable_object_propagation is off, not creating distributed objects on worker
DETAIL: distributed objects are only kept in sync when citus.enable_object_propagation is set to on. Newly activated nodes will not get these objects created DETAIL: distributed objects are only kept in sync when citus.enable_object_propagation is set to on. Newly activated nodes will not get these objects created
?column? ?column?
@ -241,19 +263,19 @@ DETAIL: distributed objects are only kept in sync when citus.enable_object_prop
1 1
(1 row) (1 row)
SELECT 1 FROM master_disable_node('localhost', :worker_2_port + 1); SELECT 1 FROM master_disable_node('localhost', :worker_2_port);
?column? ?column?
--------------------------------------------------------------------- ---------------------------------------------------------------------
1 1
(1 row) (1 row)
SELECT 1 FROM master_remove_node('localhost', :worker_2_port + 1); SELECT 1 FROM master_remove_node('localhost', :worker_2_port);
?column? ?column?
--------------------------------------------------------------------- ---------------------------------------------------------------------
1 1
(1 row) (1 row)
SELECT 1 FROM master_add_node('localhost', :worker_2_port + 1); SELECT 1 FROM master_add_node('localhost', :worker_2_port);
WARNING: citus.enable_object_propagation is off, not creating distributed objects on worker WARNING: citus.enable_object_propagation is off, not creating distributed objects on worker
DETAIL: distributed objects are only kept in sync when citus.enable_object_propagation is set to on. Newly activated nodes will not get these objects created DETAIL: distributed objects are only kept in sync when citus.enable_object_propagation is set to on. Newly activated nodes will not get these objects created
?column? ?column?
@ -277,15 +299,15 @@ SELECT nodename, nodeport, noderole FROM pg_dist_node ORDER BY nodeport;
nodename | nodeport | noderole nodename | nodeport | noderole
--------------------------------------------------------------------- ---------------------------------------------------------------------
localhost | 57637 | primary localhost | 57637 | primary
localhost | 57639 | primary
localhost | 57640 | secondary localhost | 57640 | secondary
localhost | 57641 | primary localhost | 57641 | primary
(4 rows) (3 rows)
ABORT; ABORT;
\c - postgres - :master_port \c - postgres - :master_port
SET citus.next_shard_id TO 1220016; SET citus.next_shard_id TO 1220000;
SET citus.enable_object_propagation TO off; -- prevent object propagation on add node during setup SET citus.enable_object_propagation TO off; -- prevent object propagation on add node during setup
SET citus.shard_count TO 16;
SET citus.shard_replication_factor TO 1; SET citus.shard_replication_factor TO 1;
SELECT master_get_active_worker_nodes(); SELECT master_get_active_worker_nodes();
master_get_active_worker_nodes master_get_active_worker_nodes
@ -294,12 +316,21 @@ SELECT master_get_active_worker_nodes();
(1 row) (1 row)
-- restore the node for next tests -- restore the node for next tests
SELECT * FROM master_activate_node('localhost', :worker_2_port); SELECT * FROM master_add_node('localhost', :worker_2_port);
WARNING: citus.enable_object_propagation is off, not creating distributed objects on worker WARNING: citus.enable_object_propagation is off, not creating distributed objects on worker
DETAIL: distributed objects are only kept in sync when citus.enable_object_propagation is set to on. Newly activated nodes will not get these objects created DETAIL: distributed objects are only kept in sync when citus.enable_object_propagation is set to on. Newly activated nodes will not get these objects created
master_activate_node master_add_node
--------------------------------------------------------------------- ---------------------------------------------------------------------
3 7
(1 row)
ALTER SEQUENCE pg_dist_node_nodeid_seq RESTART WITH 7;
ALTER SEQUENCE pg_dist_groupid_seq RESTART WITH 6;
CREATE TABLE cluster_management_test (col_1 text, col_2 int);
SELECT create_distributed_table('cluster_management_test', 'col_1', 'hash');
create_distributed_table
---------------------------------------------------------------------
(1 row) (1 row)
-- try to remove a node with active placements and see that node removal is failed -- try to remove a node with active placements and see that node removal is failed
@ -309,6 +340,14 @@ HINT: To proceed, either drop the distributed tables or use undistribute_table(
-- mark all placements in the candidate node as inactive -- mark all placements in the candidate node as inactive
SELECT groupid AS worker_2_group FROM pg_dist_node WHERE nodeport=:worker_2_port \gset SELECT groupid AS worker_2_group FROM pg_dist_node WHERE nodeport=:worker_2_port \gset
UPDATE pg_dist_placement SET shardstate=3 WHERE groupid=:worker_2_group; UPDATE pg_dist_placement SET shardstate=3 WHERE groupid=:worker_2_group;
-- manual updates to pg_dist* tables are not automatically reflected to the workers, so we manually do that too
SELECT run_command_on_workers('UPDATE pg_dist_placement SET shardstate=3 WHERE groupid=' || :'worker_2_group');
run_command_on_workers
---------------------------------------------------------------------
(localhost,57637,t,"UPDATE 8")
(localhost,57638,t,"UPDATE 8")
(2 rows)
SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement WHERE nodeport=:worker_2_port; SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement WHERE nodeport=:worker_2_port;
shardid | shardstate | nodename | nodeport shardid | shardstate | nodename | nodeport
--------------------------------------------------------------------- ---------------------------------------------------------------------
@ -335,6 +374,13 @@ SELECT master_get_active_worker_nodes();
-- mark all placements in the candidate node as to be deleted -- mark all placements in the candidate node as to be deleted
UPDATE pg_dist_placement SET shardstate=4 WHERE groupid=:worker_2_group; UPDATE pg_dist_placement SET shardstate=4 WHERE groupid=:worker_2_group;
SELECT run_command_on_workers('UPDATE pg_dist_placement SET shardstate=4 WHERE groupid=' || :'worker_2_group');
run_command_on_workers
---------------------------------------------------------------------
(localhost,57637,t,"UPDATE 8")
(localhost,57638,t,"UPDATE 8")
(2 rows)
SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement WHERE nodeport=:worker_2_port; SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement WHERE nodeport=:worker_2_port;
shardid | shardstate | nodename | nodeport shardid | shardstate | nodename | nodeport
--------------------------------------------------------------------- ---------------------------------------------------------------------
@ -395,6 +441,13 @@ SELECT logicalrelid, shardid, shardstate, nodename, nodeport FROM pg_dist_shard_
(24 rows) (24 rows)
SELECT * INTO removed_placements FROM pg_dist_placement WHERE shardstate = 4; SELECT * INTO removed_placements FROM pg_dist_placement WHERE shardstate = 4;
SELECT run_command_on_workers('SELECT * INTO removed_placements FROM pg_dist_placement WHERE shardstate = 4');
run_command_on_workers
---------------------------------------------------------------------
(localhost,57637,t,"SELECT 8")
(localhost,57638,t,"SELECT 8")
(2 rows)
-- try to remove a node with only to be deleted placements and see that removal succeeds -- try to remove a node with only to be deleted placements and see that removal succeeds
SELECT master_remove_node('localhost', :worker_2_port); SELECT master_remove_node('localhost', :worker_2_port);
master_remove_node master_remove_node
@ -411,6 +464,22 @@ SELECT master_get_active_worker_nodes();
SELECT master_add_node('localhost', :worker_2_port, groupId := :worker_2_group); SELECT master_add_node('localhost', :worker_2_port, groupId := :worker_2_group);
WARNING: citus.enable_object_propagation is off, not creating distributed objects on worker WARNING: citus.enable_object_propagation is off, not creating distributed objects on worker
DETAIL: distributed objects are only kept in sync when citus.enable_object_propagation is set to on. Newly activated nodes will not get these objects created DETAIL: distributed objects are only kept in sync when citus.enable_object_propagation is set to on. Newly activated nodes will not get these objects created
WARNING: could not find any shard placements for shardId 1220001
WARNING: could not find any shard placements for shardId 1220003
WARNING: could not find any shard placements for shardId 1220005
WARNING: could not find any shard placements for shardId 1220007
WARNING: could not find any shard placements for shardId 1220009
WARNING: could not find any shard placements for shardId 1220011
WARNING: could not find any shard placements for shardId 1220013
WARNING: could not find any shard placements for shardId 1220015
WARNING: could not find any shard placements for shardId 1220017
WARNING: could not find any shard placements for shardId 1220019
WARNING: could not find any shard placements for shardId 1220021
WARNING: could not find any shard placements for shardId 1220023
WARNING: could not find any shard placements for shardId 1220025
WARNING: could not find any shard placements for shardId 1220027
WARNING: could not find any shard placements for shardId 1220029
WARNING: could not find any shard placements for shardId 1220031
master_add_node master_add_node
--------------------------------------------------------------------- ---------------------------------------------------------------------
7 7
@ -418,6 +487,21 @@ DETAIL: distributed objects are only kept in sync when citus.enable_object_prop
-- put removed placements back for testing purposes(in practice we wouldn't have only old placements for a shard) -- put removed placements back for testing purposes(in practice we wouldn't have only old placements for a shard)
INSERT INTO pg_dist_placement SELECT * FROM removed_placements; INSERT INTO pg_dist_placement SELECT * FROM removed_placements;
SELECT run_command_on_workers('INSERT INTO pg_dist_placement SELECT * FROM removed_placements');
run_command_on_workers
---------------------------------------------------------------------
(localhost,57637,f,"ERROR: duplicate key value violates unique constraint ""pg_dist_placement_placementid_index""")
(localhost,57638,t,"INSERT 0 8")
(2 rows)
DROP TABLE removed_placements;
SELECT run_command_on_workers('DROP TABLE removed_placements');
run_command_on_workers
---------------------------------------------------------------------
(localhost,57637,t,"DROP TABLE")
(localhost,57638,t,"DROP TABLE")
(2 rows)
-- clean-up -- clean-up
SELECT 1 FROM master_add_node('localhost', :worker_2_port); SELECT 1 FROM master_add_node('localhost', :worker_2_port);
?column? ?column?
@ -426,23 +510,60 @@ SELECT 1 FROM master_add_node('localhost', :worker_2_port);
(1 row) (1 row)
UPDATE pg_dist_placement SET shardstate=1 WHERE groupid=:worker_2_group; UPDATE pg_dist_placement SET shardstate=1 WHERE groupid=:worker_2_group;
SELECT run_command_on_workers('UPDATE pg_dist_placement SET shardstate=1 WHERE groupid=' || :'worker_2_group');
run_command_on_workers
---------------------------------------------------------------------
(localhost,57637,t,"UPDATE 8")
(localhost,57638,t,"UPDATE 8")
(2 rows)
SET client_min_messages TO ERROR; SET client_min_messages TO ERROR;
DROP TABLE cluster_management_test_colocated; DROP TABLE cluster_management_test_colocated;
RESET client_min_messages; RESET client_min_messages;
-- when there is no primary we should get a pretty error -- when there is no primary we should get a pretty error
UPDATE pg_dist_node SET noderole = 'secondary' WHERE nodeport=:worker_2_port; UPDATE pg_dist_node SET noderole = 'secondary' WHERE nodeport=:worker_2_port;
SELECT * FROM cluster_management_test; SELECT * FROM cluster_management_test;
ERROR: node group 3 does not have a primary node ERROR: node group 6 does not have a primary node
-- when there is no node at all in the group we should get a different error -- when there is no node at all in the group we should get a different error
DELETE FROM pg_dist_node WHERE nodeport=:worker_2_port; DELETE FROM pg_dist_node WHERE nodeport=:worker_2_port;
SELECT run_command_on_workers('DELETE FROM pg_dist_node WHERE nodeport=' || :'worker_2_port');
run_command_on_workers
---------------------------------------------------------------------
(localhost,57637,t,"DELETE 1")
(1 row)
SELECT * FROM cluster_management_test; SELECT * FROM cluster_management_test;
ERROR: there is a shard placement in node group 3 but there are no nodes in that group ERROR: there is a shard placement in node group 6 but there are no nodes in that group
-- clean-up -- clean-up
SELECT * INTO old_placements FROM pg_dist_placement WHERE groupid = :worker_2_group;
DELETE FROM pg_dist_placement WHERE groupid = :worker_2_group;
SELECT master_add_node('localhost', :worker_2_port) AS new_node \gset SELECT master_add_node('localhost', :worker_2_port) AS new_node \gset
WARNING: citus.enable_object_propagation is off, not creating distributed objects on worker WARNING: citus.enable_object_propagation is off, not creating distributed objects on worker
DETAIL: distributed objects are only kept in sync when citus.enable_object_propagation is set to on. Newly activated nodes will not get these objects created DETAIL: distributed objects are only kept in sync when citus.enable_object_propagation is set to on. Newly activated nodes will not get these objects created
WARNING: could not find any shard placements for shardId 1220001
WARNING: could not find any shard placements for shardId 1220003
WARNING: could not find any shard placements for shardId 1220005
WARNING: could not find any shard placements for shardId 1220007
WARNING: could not find any shard placements for shardId 1220009
WARNING: could not find any shard placements for shardId 1220011
WARNING: could not find any shard placements for shardId 1220013
WARNING: could not find any shard placements for shardId 1220015
INSERT INTO pg_dist_placement SELECT * FROM old_placements;
SELECT groupid AS new_group FROM pg_dist_node WHERE nodeid = :new_node \gset SELECT groupid AS new_group FROM pg_dist_node WHERE nodeid = :new_node \gset
UPDATE pg_dist_placement SET groupid = :new_group WHERE groupid = :worker_2_group; UPDATE pg_dist_placement SET groupid = :new_group WHERE groupid = :worker_2_group;
SELECT run_command_on_workers('UPDATE pg_dist_placement SET groupid = ' || :'new_group' || ' WHERE groupid = ' || :'worker_2_group');
run_command_on_workers
---------------------------------------------------------------------
(localhost,57637,t,"UPDATE 8")
(localhost,57638,t,"UPDATE 0")
(2 rows)
SELECT start_metadata_sync_to_node('localhost', :worker_2_port);
start_metadata_sync_to_node
---------------------------------------------------------------------
(1 row)
-- test that you are allowed to remove secondary nodes even if there are placements -- test that you are allowed to remove secondary nodes even if there are placements
SELECT 1 FROM master_add_node('localhost', 9990, groupid => :new_group, noderole => 'secondary'); SELECT 1 FROM master_add_node('localhost', 9990, groupid => :new_group, noderole => 'secondary');
?column? ?column?
@ -559,8 +680,8 @@ DETAIL: distributed objects are only kept in sync when citus.enable_object_prop
SELECT * FROM pg_dist_node ORDER BY nodeid; SELECT * FROM pg_dist_node ORDER BY nodeid;
nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards
--------------------------------------------------------------------- ---------------------------------------------------------------------
12 | 9 | localhost | 57637 | default | f | t | primary | default | f | t 12 | 9 | localhost | 57637 | default | t | t | primary | default | t | t
13 | 10 | localhost | 57638 | default | f | t | primary | default | f | t 13 | 10 | localhost | 57638 | default | t | t | primary | default | t | t
(2 rows) (2 rows)
-- check that mixed add/remove node commands work fine inside transaction -- check that mixed add/remove node commands work fine inside transaction
@ -882,6 +1003,15 @@ SELECT * FROM pg_dist_node WHERE nodeid = :worker_1_node;
17 | 14 | localhost | 57637 | default | f | t | primary | default | f | t 17 | 14 | localhost | 57637 | default | f | t | primary | default | f | t
(1 row) (1 row)
SET client_min_messages TO ERROR;
SELECT start_metadata_sync_to_node(nodename, nodeport) FROM pg_dist_node WHERE isactive = 't' and noderole = 'primary';
start_metadata_sync_to_node
---------------------------------------------------------------------
(2 rows)
RESET client_min_messages;
SET citus.shard_replication_factor TO 1; SET citus.shard_replication_factor TO 1;
CREATE TABLE test_dist (x int, y int); CREATE TABLE test_dist (x int, y int);
SELECT create_distributed_table('test_dist', 'x'); SELECT create_distributed_table('test_dist', 'x');
@ -1070,3 +1200,10 @@ WHERE logicalrelid = 'test_dist_non_colocated'::regclass GROUP BY nodeport ORDER
SELECT * from master_set_node_property('localhost', :worker_2_port, 'bogusproperty', false); SELECT * from master_set_node_property('localhost', :worker_2_port, 'bogusproperty', false);
ERROR: only the 'shouldhaveshards' property can be set using this function ERROR: only the 'shouldhaveshards' property can be set using this function
DROP TABLE test_dist, test_ref, test_dist_colocated, test_dist_non_colocated; DROP TABLE test_dist, test_ref, test_dist_colocated, test_dist_non_colocated;
-- verify that at the end of this file, all primary nodes have metadata synced
SELECT bool_and(hasmetadata) AND bool_and(metadatasynced) FROM pg_dist_node WHERE isactive = 't' and noderole = 'primary';
?column?
---------------------------------------------------------------------
t
(1 row)

View File

@ -1108,17 +1108,17 @@ SELECT * FROM public.table_placements_per_node;
(4 rows) (4 rows)
CALL citus_cleanup_orphaned_shards(); CALL citus_cleanup_orphaned_shards();
select * from pg_dist_placement; select * from pg_dist_placement ORDER BY placementid;
placementid | shardid | shardstate | shardlength | groupid placementid | shardid | shardstate | shardlength | groupid
--------------------------------------------------------------------- ---------------------------------------------------------------------
135 | 123023 | 1 | 0 | 14 151 | 123023 | 1 | 0 | 14
138 | 123024 | 1 | 0 | 14 154 | 123024 | 1 | 0 | 14
141 | 123027 | 1 | 0 | 14 157 | 123027 | 1 | 0 | 14
142 | 123028 | 1 | 0 | 14 158 | 123028 | 1 | 0 | 14
143 | 123021 | 1 | 0 | 16 159 | 123021 | 1 | 0 | 16
144 | 123025 | 1 | 0 | 16 160 | 123025 | 1 | 0 | 16
145 | 123022 | 1 | 0 | 16 161 | 123022 | 1 | 0 | 16
146 | 123026 | 1 | 0 | 16 162 | 123026 | 1 | 0 | 16
(8 rows) (8 rows)
-- Move all shards to worker1 again -- Move all shards to worker1 again

View File

@ -0,0 +1,16 @@
ALTER SYSTEM SET citus.enable_metadata_sync_by_default TO ON;
SELECT pg_reload_conf();
pg_reload_conf
---------------------------------------------------------------------
t
(1 row)
SET client_min_messages TO ERROR;
SELECT start_metadata_sync_to_node(nodename, nodeport) FROM pg_dist_node WHERE isactive = 't' and noderole = 'primary';
start_metadata_sync_to_node
---------------------------------------------------------------------
(3 rows)

View File

@ -0,0 +1,13 @@
ALTER SYSTEM SET citus.enable_metadata_sync_by_default TO ON;
SELECT pg_reload_conf();
pg_reload_conf
---------------------------------------------------------------------
t
(1 row)
SET client_min_messages TO ERROR;
SELECT start_metadata_sync_to_node(nodename, nodeport) FROM pg_dist_node WHERE isactive = 't' and noderole = 'primary';
start_metadata_sync_to_node
---------------------------------------------------------------------
(0 rows)

View File

@ -19,7 +19,9 @@ test: turn_mx_off
test: multi_extension test: multi_extension
test: single_node test: single_node
test: single_node_truncate test: single_node_truncate
test: turn_mx_on
test: multi_cluster_management test: multi_cluster_management
test: turn_mx_off
# below tests are placed right after multi_cluster_management as we do # below tests are placed right after multi_cluster_management as we do
# remove/add node operations and we do not want any preexisting objects # remove/add node operations and we do not want any preexisting objects

View File

@ -17,6 +17,7 @@ test: turn_mx_off
test: multi_extension test: multi_extension
test: multi_test_helpers multi_test_helpers_superuser test: multi_test_helpers multi_test_helpers_superuser
test: multi_mx_node_metadata test: multi_mx_node_metadata
test: turn_mx_on
test: multi_cluster_management test: multi_cluster_management
test: multi_mx_function_table_reference test: multi_mx_function_table_reference
test: multi_test_catalog_views test: multi_test_catalog_views

View File

@ -1,6 +1,4 @@
test: turn_mx_off
test: multi_cluster_management test: multi_cluster_management
test: turn_mx_on
test: multi_test_helpers multi_test_helpers_superuser multi_create_fdw test: multi_test_helpers multi_test_helpers_superuser multi_create_fdw
test: multi_test_catalog_views test: multi_test_catalog_views

View File

@ -1,5 +1,5 @@
test: turn_mx_off
test: multi_cluster_management test: multi_cluster_management
test: turn_mx_off
test: multi_test_helpers multi_test_helpers_superuser test: multi_test_helpers multi_test_helpers_superuser
test: multi_test_catalog_views test: multi_test_catalog_views
test: shard_rebalancer_unit test: shard_rebalancer_unit

View File

@ -76,11 +76,16 @@ SELECT master_disable_node('localhost.noexist', 2345);
-- drop the table without leaving a shard placement behind (messes up other tests) -- drop the table without leaving a shard placement behind (messes up other tests)
SELECT master_activate_node('localhost', :worker_2_port); SELECT master_activate_node('localhost', :worker_2_port);
DROP TABLE test_reference_table; DROP TABLE test_reference_table, cluster_management_test;
SELECT master_disable_node('localhost', :worker_2_port);
-- create users like this so results of community and enterprise are same
SET citus.enable_object_propagation TO ON;
SET client_min_messages TO ERROR;
CREATE USER non_super_user; CREATE USER non_super_user;
CREATE USER node_metadata_user; CREATE USER node_metadata_user;
SELECT 1 FROM run_command_on_workers('CREATE USER node_metadata_user');
RESET client_min_messages;
SET citus.enable_object_propagation TO OFF;
GRANT EXECUTE ON FUNCTION master_activate_node(text,int) TO node_metadata_user; GRANT EXECUTE ON FUNCTION master_activate_node(text,int) TO node_metadata_user;
GRANT EXECUTE ON FUNCTION master_add_inactive_node(text,int,int,noderole,name) TO node_metadata_user; GRANT EXECUTE ON FUNCTION master_add_inactive_node(text,int,int,noderole,name) TO node_metadata_user;
GRANT EXECUTE ON FUNCTION master_add_node(text,int,int,noderole,name) TO node_metadata_user; GRANT EXECUTE ON FUNCTION master_add_node(text,int,int,noderole,name) TO node_metadata_user;
@ -89,6 +94,12 @@ GRANT EXECUTE ON FUNCTION master_disable_node(text,int) TO node_metadata_user;
GRANT EXECUTE ON FUNCTION master_remove_node(text,int) TO node_metadata_user; GRANT EXECUTE ON FUNCTION master_remove_node(text,int) TO node_metadata_user;
GRANT EXECUTE ON FUNCTION master_update_node(int,text,int,bool,int) TO node_metadata_user; GRANT EXECUTE ON FUNCTION master_update_node(int,text,int,bool,int) TO node_metadata_user;
-- user needs permission for the pg_dist_node and pg_dist_local_group for metadata syncing
SELECT run_command_on_workers('GRANT ALL ON pg_dist_node TO node_metadata_user');
SELECT run_command_on_workers('GRANT ALL ON pg_dist_local_group TO node_metadata_user');
SELECT master_remove_node('localhost', :worker_2_port);
-- Removing public schema from pg_dist_object because it breaks the next tests -- Removing public schema from pg_dist_object because it breaks the next tests
DELETE FROM citus.pg_dist_object WHERE objid = 'public'::regnamespace::oid; DELETE FROM citus.pg_dist_object WHERE objid = 'public'::regnamespace::oid;
@ -106,24 +117,31 @@ SELECT master_update_node(nodeid, 'localhost', :worker_2_port + 3) FROM pg_dist_
SET ROLE node_metadata_user; SET ROLE node_metadata_user;
SET citus.enable_object_propagation TO off; -- prevent master activate node to actually connect for this test SET citus.enable_object_propagation TO off; -- prevent master activate node to actually connect for this test
BEGIN; BEGIN;
SELECT 1 FROM master_add_inactive_node('localhost', :worker_2_port + 1); SELECT 1 FROM master_add_inactive_node('localhost', :worker_2_port);
SELECT 1 FROM master_activate_node('localhost', :worker_2_port + 1); SELECT 1 FROM master_activate_node('localhost', :worker_2_port);
SELECT 1 FROM master_disable_node('localhost', :worker_2_port + 1); SELECT 1 FROM master_disable_node('localhost', :worker_2_port);
SELECT 1 FROM master_remove_node('localhost', :worker_2_port + 1); SELECT 1 FROM master_remove_node('localhost', :worker_2_port);
SELECT 1 FROM master_add_node('localhost', :worker_2_port + 1); SELECT 1 FROM master_add_node('localhost', :worker_2_port);
SELECT 1 FROM master_add_secondary_node('localhost', :worker_2_port + 2, 'localhost', :worker_2_port); SELECT 1 FROM master_add_secondary_node('localhost', :worker_2_port + 2, 'localhost', :worker_2_port);
SELECT master_update_node(nodeid, 'localhost', :worker_2_port + 3) FROM pg_dist_node WHERE nodeport = :worker_2_port; SELECT master_update_node(nodeid, 'localhost', :worker_2_port + 3) FROM pg_dist_node WHERE nodeport = :worker_2_port;
SELECT nodename, nodeport, noderole FROM pg_dist_node ORDER BY nodeport; SELECT nodename, nodeport, noderole FROM pg_dist_node ORDER BY nodeport;
ABORT; ABORT;
\c - postgres - :master_port \c - postgres - :master_port
SET citus.next_shard_id TO 1220016; SET citus.next_shard_id TO 1220000;
SET citus.enable_object_propagation TO off; -- prevent object propagation on add node during setup SET citus.enable_object_propagation TO off; -- prevent object propagation on add node during setup
SET citus.shard_count TO 16;
SET citus.shard_replication_factor TO 1; SET citus.shard_replication_factor TO 1;
SELECT master_get_active_worker_nodes(); SELECT master_get_active_worker_nodes();
-- restore the node for next tests -- restore the node for next tests
SELECT * FROM master_activate_node('localhost', :worker_2_port); SELECT * FROM master_add_node('localhost', :worker_2_port);
ALTER SEQUENCE pg_dist_node_nodeid_seq RESTART WITH 7;
ALTER SEQUENCE pg_dist_groupid_seq RESTART WITH 6;
CREATE TABLE cluster_management_test (col_1 text, col_2 int);
SELECT create_distributed_table('cluster_management_test', 'col_1', 'hash');
-- try to remove a node with active placements and see that node removal is failed -- try to remove a node with active placements and see that node removal is failed
SELECT master_remove_node('localhost', :worker_2_port); SELECT master_remove_node('localhost', :worker_2_port);
@ -131,6 +149,8 @@ SELECT master_remove_node('localhost', :worker_2_port);
-- mark all placements in the candidate node as inactive -- mark all placements in the candidate node as inactive
SELECT groupid AS worker_2_group FROM pg_dist_node WHERE nodeport=:worker_2_port \gset SELECT groupid AS worker_2_group FROM pg_dist_node WHERE nodeport=:worker_2_port \gset
UPDATE pg_dist_placement SET shardstate=3 WHERE groupid=:worker_2_group; UPDATE pg_dist_placement SET shardstate=3 WHERE groupid=:worker_2_group;
-- manual updates to pg_dist* tables are not automatically reflected to the workers, so we manually do that too
SELECT run_command_on_workers('UPDATE pg_dist_placement SET shardstate=3 WHERE groupid=' || :'worker_2_group');
SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement WHERE nodeport=:worker_2_port; SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement WHERE nodeport=:worker_2_port;
-- try to remove a node with only inactive placements and see that removal still fails -- try to remove a node with only inactive placements and see that removal still fails
@ -139,6 +159,7 @@ SELECT master_get_active_worker_nodes();
-- mark all placements in the candidate node as to be deleted -- mark all placements in the candidate node as to be deleted
UPDATE pg_dist_placement SET shardstate=4 WHERE groupid=:worker_2_group; UPDATE pg_dist_placement SET shardstate=4 WHERE groupid=:worker_2_group;
SELECT run_command_on_workers('UPDATE pg_dist_placement SET shardstate=4 WHERE groupid=' || :'worker_2_group');
SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement WHERE nodeport=:worker_2_port; SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement WHERE nodeport=:worker_2_port;
CREATE TABLE cluster_management_test_colocated (col_1 text, col_2 int); CREATE TABLE cluster_management_test_colocated (col_1 text, col_2 int);
-- Check that we warn the user about colocated shards that will not get created for shards that do not have active placements -- Check that we warn the user about colocated shards that will not get created for shards that do not have active placements
@ -148,6 +169,7 @@ SELECT create_distributed_table('cluster_management_test_colocated', 'col_1', 'h
SELECT logicalrelid, shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement NATURAL JOIN pg_dist_shard ORDER BY shardstate, shardid; SELECT logicalrelid, shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement NATURAL JOIN pg_dist_shard ORDER BY shardstate, shardid;
SELECT * INTO removed_placements FROM pg_dist_placement WHERE shardstate = 4; SELECT * INTO removed_placements FROM pg_dist_placement WHERE shardstate = 4;
SELECT run_command_on_workers('SELECT * INTO removed_placements FROM pg_dist_placement WHERE shardstate = 4');
-- try to remove a node with only to be deleted placements and see that removal succeeds -- try to remove a node with only to be deleted placements and see that removal succeeds
SELECT master_remove_node('localhost', :worker_2_port); SELECT master_remove_node('localhost', :worker_2_port);
SELECT master_get_active_worker_nodes(); SELECT master_get_active_worker_nodes();
@ -155,10 +177,15 @@ SELECT master_get_active_worker_nodes();
SELECT master_add_node('localhost', :worker_2_port, groupId := :worker_2_group); SELECT master_add_node('localhost', :worker_2_port, groupId := :worker_2_group);
-- put removed placements back for testing purposes(in practice we wouldn't have only old placements for a shard) -- put removed placements back for testing purposes(in practice we wouldn't have only old placements for a shard)
INSERT INTO pg_dist_placement SELECT * FROM removed_placements; INSERT INTO pg_dist_placement SELECT * FROM removed_placements;
SELECT run_command_on_workers('INSERT INTO pg_dist_placement SELECT * FROM removed_placements');
DROP TABLE removed_placements;
SELECT run_command_on_workers('DROP TABLE removed_placements');
-- clean-up -- clean-up
SELECT 1 FROM master_add_node('localhost', :worker_2_port); SELECT 1 FROM master_add_node('localhost', :worker_2_port);
UPDATE pg_dist_placement SET shardstate=1 WHERE groupid=:worker_2_group; UPDATE pg_dist_placement SET shardstate=1 WHERE groupid=:worker_2_group;
SELECT run_command_on_workers('UPDATE pg_dist_placement SET shardstate=1 WHERE groupid=' || :'worker_2_group');
SET client_min_messages TO ERROR; SET client_min_messages TO ERROR;
DROP TABLE cluster_management_test_colocated; DROP TABLE cluster_management_test_colocated;
RESET client_min_messages; RESET client_min_messages;
@ -169,12 +196,18 @@ SELECT * FROM cluster_management_test;
-- when there is no node at all in the group we should get a different error -- when there is no node at all in the group we should get a different error
DELETE FROM pg_dist_node WHERE nodeport=:worker_2_port; DELETE FROM pg_dist_node WHERE nodeport=:worker_2_port;
SELECT run_command_on_workers('DELETE FROM pg_dist_node WHERE nodeport=' || :'worker_2_port');
SELECT * FROM cluster_management_test; SELECT * FROM cluster_management_test;
-- clean-up -- clean-up
SELECT * INTO old_placements FROM pg_dist_placement WHERE groupid = :worker_2_group;
DELETE FROM pg_dist_placement WHERE groupid = :worker_2_group;
SELECT master_add_node('localhost', :worker_2_port) AS new_node \gset SELECT master_add_node('localhost', :worker_2_port) AS new_node \gset
INSERT INTO pg_dist_placement SELECT * FROM old_placements;
SELECT groupid AS new_group FROM pg_dist_node WHERE nodeid = :new_node \gset SELECT groupid AS new_group FROM pg_dist_node WHERE nodeid = :new_node \gset
UPDATE pg_dist_placement SET groupid = :new_group WHERE groupid = :worker_2_group; UPDATE pg_dist_placement SET groupid = :new_group WHERE groupid = :worker_2_group;
SELECT run_command_on_workers('UPDATE pg_dist_placement SET groupid = ' || :'new_group' || ' WHERE groupid = ' || :'worker_2_group');
SELECT start_metadata_sync_to_node('localhost', :worker_2_port);
-- test that you are allowed to remove secondary nodes even if there are placements -- test that you are allowed to remove secondary nodes even if there are placements
SELECT 1 FROM master_add_node('localhost', 9990, groupid => :new_group, noderole => 'secondary'); SELECT 1 FROM master_add_node('localhost', 9990, groupid => :new_group, noderole => 'secondary');
@ -238,6 +271,7 @@ SELECT nodename, nodeport FROM pg_dist_node WHERE nodename='localhost' AND nodep
\c - - - :worker_1_port \c - - - :worker_1_port
SELECT nodename, nodeport FROM pg_dist_node WHERE nodename='localhost' AND nodeport=:worker_2_port; SELECT nodename, nodeport FROM pg_dist_node WHERE nodename='localhost' AND nodeport=:worker_2_port;
\c - - - :master_port \c - - - :master_port
SET citus.enable_object_propagation TO off; -- prevent object propagation on add node during setup SET citus.enable_object_propagation TO off; -- prevent object propagation on add node during setup
SELECT master_remove_node(nodename, nodeport) FROM pg_dist_node; SELECT master_remove_node(nodename, nodeport) FROM pg_dist_node;
@ -350,6 +384,9 @@ SELECT * FROM pg_dist_node WHERE nodeid = :worker_1_node;
SELECT master_update_node(:worker_1_node, 'localhost', :worker_1_port); SELECT master_update_node(:worker_1_node, 'localhost', :worker_1_port);
SELECT * FROM pg_dist_node WHERE nodeid = :worker_1_node; SELECT * FROM pg_dist_node WHERE nodeid = :worker_1_node;
SET client_min_messages TO ERROR;
SELECT start_metadata_sync_to_node(nodename, nodeport) FROM pg_dist_node WHERE isactive = 't' and noderole = 'primary';
RESET client_min_messages;
SET citus.shard_replication_factor TO 1; SET citus.shard_replication_factor TO 1;
@ -442,3 +479,6 @@ WHERE logicalrelid = 'test_dist_non_colocated'::regclass GROUP BY nodeport ORDER
SELECT * from master_set_node_property('localhost', :worker_2_port, 'bogusproperty', false); SELECT * from master_set_node_property('localhost', :worker_2_port, 'bogusproperty', false);
DROP TABLE test_dist, test_ref, test_dist_colocated, test_dist_non_colocated; DROP TABLE test_dist, test_ref, test_dist_colocated, test_dist_non_colocated;
-- verify that at the end of this file, all primary nodes have metadata synced
SELECT bool_and(hasmetadata) AND bool_and(metadatasynced) FROM pg_dist_node WHERE isactive = 't' and noderole = 'primary';

View File

@ -709,7 +709,7 @@ SELECT * FROM get_rebalance_progress();
SELECT * FROM public.table_placements_per_node; SELECT * FROM public.table_placements_per_node;
CALL citus_cleanup_orphaned_shards(); CALL citus_cleanup_orphaned_shards();
select * from pg_dist_placement; select * from pg_dist_placement ORDER BY placementid;
-- Move all shards to worker1 again -- Move all shards to worker1 again