SET citus.next_shard_id TO 1220000; -- Tests functions related to cluster membership -- before starting the test, lets try to create reference table and see a -- meaningful error CREATE TABLE test_reference_table (y int primary key, name text); SELECT create_reference_table('test_reference_table'); ERROR: cannot create reference table "test_reference_table" DETAIL: There are no active worker nodes. -- add the nodes to the cluster SELECT 1 FROM master_add_node('localhost', :worker_1_port); ?column? ---------- 1 (1 row) SELECT 1 FROM master_add_node('localhost', :worker_2_port); ?column? ---------- 1 (1 row) -- get the active nodes SELECT master_get_active_worker_nodes(); master_get_active_worker_nodes -------------------------------- (localhost,57638) (localhost,57637) (2 rows) -- try to add a node that is already in the cluster SELECT nodeid, groupid FROM master_add_node('localhost', :worker_1_port); nodeid | groupid --------+--------- 1 | 1 (1 row) -- get the active nodes SELECT master_get_active_worker_nodes(); master_get_active_worker_nodes -------------------------------- (localhost,57638) (localhost,57637) (2 rows) -- try to remove a node (with no placements) SELECT master_remove_node('localhost', :worker_2_port); master_remove_node -------------------- (1 row) -- verify that the node has been deleted SELECT master_get_active_worker_nodes(); master_get_active_worker_nodes -------------------------------- (localhost,57637) (1 row) -- try to disable a node with no placements see that node is removed SELECT 1 FROM master_add_node('localhost', :worker_2_port); ?column? ---------- 1 (1 row) SELECT master_disable_node('localhost', :worker_2_port); master_disable_node --------------------- (1 row) SELECT master_get_active_worker_nodes(); master_get_active_worker_nodes -------------------------------- (localhost,57637) (1 row) -- add some shard placements to the cluster SELECT isactive FROM master_activate_node('localhost', :worker_2_port); isactive ---------- t (1 row) CREATE TABLE cluster_management_test (col_1 text, col_2 int); SELECT master_create_distributed_table('cluster_management_test', 'col_1', 'hash'); master_create_distributed_table --------------------------------- (1 row) SELECT master_create_worker_shards('cluster_management_test', 16, 1); master_create_worker_shards ----------------------------- (1 row) -- see that there are some active placements in the candidate node SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement WHERE nodeport=:worker_2_port; shardid | shardstate | nodename | nodeport ---------+------------+-----------+---------- 1220001 | 1 | localhost | 57638 1220003 | 1 | localhost | 57638 1220005 | 1 | localhost | 57638 1220007 | 1 | localhost | 57638 1220009 | 1 | localhost | 57638 1220011 | 1 | localhost | 57638 1220013 | 1 | localhost | 57638 1220015 | 1 | localhost | 57638 (8 rows) -- try to remove a node with active placements and see that node removal is failed SELECT master_remove_node('localhost', :worker_2_port); ERROR: you cannot remove the primary node of a node group which has shard placements SELECT master_get_active_worker_nodes(); master_get_active_worker_nodes -------------------------------- (localhost,57638) (localhost,57637) (2 rows) -- insert a row so that master_disable_node() exercises closing connections INSERT INTO test_reference_table VALUES (1, '1'); -- try to disable a node with active placements see that node is removed -- observe that a notification is displayed SELECT master_disable_node('localhost', :worker_2_port); NOTICE: Node localhost:57638 has active shard placements. Some queries may fail after this operation. Use SELECT master_activate_node('localhost', 57638) to activate this node back. master_disable_node --------------------- (1 row) SELECT master_get_active_worker_nodes(); master_get_active_worker_nodes -------------------------------- (localhost,57637) (1 row) -- try to disable a node which does not exist and see that an error is thrown SELECT master_disable_node('localhost.noexist', 2345); ERROR: node at "localhost.noexist:2345" does not exist -- restore the node for next tests SELECT isactive FROM master_activate_node('localhost', :worker_2_port); isactive ---------- t (1 row) -- try to remove a node with active placements and see that node removal is failed SELECT master_remove_node('localhost', :worker_2_port); ERROR: you cannot remove the primary node of a node group which has shard placements -- mark all placements in the candidate node as inactive SELECT groupid AS worker_2_group FROM pg_dist_node WHERE nodeport=:worker_2_port \gset UPDATE pg_dist_placement SET shardstate=3 WHERE groupid=:worker_2_group; SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement WHERE nodeport=:worker_2_port; shardid | shardstate | nodename | nodeport ---------+------------+-----------+---------- 1220001 | 3 | localhost | 57638 1220003 | 3 | localhost | 57638 1220005 | 3 | localhost | 57638 1220007 | 3 | localhost | 57638 1220009 | 3 | localhost | 57638 1220011 | 3 | localhost | 57638 1220013 | 3 | localhost | 57638 1220015 | 3 | localhost | 57638 (8 rows) -- try to remove a node with only inactive placements and see that removal still fails SELECT master_remove_node('localhost', :worker_2_port); ERROR: you cannot remove the primary node of a node group which has shard placements SELECT master_get_active_worker_nodes(); master_get_active_worker_nodes -------------------------------- (localhost,57638) (localhost,57637) (2 rows) -- clean-up SELECT 1 FROM master_add_node('localhost', :worker_2_port); ?column? ---------- 1 (1 row) UPDATE pg_dist_placement SET shardstate=1 WHERE groupid=:worker_2_group; -- when there is no primary we should get a pretty error UPDATE pg_dist_node SET noderole = 'secondary' WHERE nodeport=:worker_2_port; SELECT * FROM cluster_management_test; ERROR: node group 3 does not have a primary node -- when there is no node at all in the group we should get a different error DELETE FROM pg_dist_node WHERE nodeport=:worker_2_port; SELECT * FROM cluster_management_test; ERROR: there is a shard placement in node group 3 but there are no nodes in that group -- clean-up SELECT groupid as new_group FROM master_add_node('localhost', :worker_2_port) \gset UPDATE pg_dist_placement SET groupid = :new_group WHERE groupid = :worker_2_group; -- test that you are allowed to remove secondary nodes even if there are placements SELECT 1 FROM master_add_node('localhost', 9990, groupid => :new_group, noderole => 'secondary'); ?column? ---------- 1 (1 row) SELECT master_remove_node('localhost', :worker_2_port); ERROR: you cannot remove the primary node of a node group which has shard placements SELECT master_remove_node('localhost', 9990); master_remove_node -------------------- (1 row) -- clean-up DROP TABLE cluster_management_test; -- check that adding/removing nodes are propagated to nodes with hasmetadata=true SELECT master_remove_node('localhost', :worker_2_port); master_remove_node -------------------- (1 row) UPDATE pg_dist_node SET hasmetadata=true WHERE nodeport=:worker_1_port; SELECT 1 FROM master_add_node('localhost', :worker_2_port); ?column? ---------- 1 (1 row) \c - - - :worker_1_port SELECT nodename, nodeport FROM pg_dist_node WHERE nodename='localhost' AND nodeport=:worker_2_port; nodename | nodeport -----------+---------- localhost | 57638 (1 row) \c - - - :master_port SELECT master_remove_node('localhost', :worker_2_port); master_remove_node -------------------- (1 row) \c - - - :worker_1_port SELECT nodename, nodeport FROM pg_dist_node WHERE nodename='localhost' AND nodeport=:worker_2_port; nodename | nodeport ----------+---------- (0 rows) \c - - - :master_port -- check that added nodes are not propagated to nodes with hasmetadata=false UPDATE pg_dist_node SET hasmetadata=false WHERE nodeport=:worker_1_port; SELECT 1 FROM master_add_node('localhost', :worker_2_port); ?column? ---------- 1 (1 row) \c - - - :worker_1_port SELECT nodename, nodeport FROM pg_dist_node WHERE nodename='localhost' AND nodeport=:worker_2_port; nodename | nodeport ----------+---------- (0 rows) \c - - - :master_port -- check that removing two nodes in the same transaction works SELECT master_remove_node('localhost', :worker_1_port), master_remove_node('localhost', :worker_2_port); master_remove_node | master_remove_node --------------------+-------------------- | (1 row) SELECT count(1) FROM pg_dist_node; count ------- 0 (1 row) -- check that adding two nodes in the same transaction works SELECT master_add_node('localhost', :worker_1_port), master_add_node('localhost', :worker_2_port); master_add_node | master_add_node ---------------------------------------------------+--------------------------------------------------- (8,7,localhost,57637,default,f,t,primary,default) | (9,8,localhost,57638,default,f,t,primary,default) (1 row) SELECT * FROM pg_dist_node ORDER BY nodeid; nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster --------+---------+-----------+----------+----------+-------------+----------+----------+------------- 8 | 7 | localhost | 57637 | default | f | t | primary | default 9 | 8 | localhost | 57638 | default | f | t | primary | default (2 rows) -- check that mixed add/remove node commands work fine inside transaction BEGIN; SELECT master_remove_node('localhost', :worker_2_port); master_remove_node -------------------- (1 row) SELECT 1 FROM master_add_node('localhost', :worker_2_port); ?column? ---------- 1 (1 row) SELECT master_remove_node('localhost', :worker_2_port); master_remove_node -------------------- (1 row) COMMIT; SELECT nodename, nodeport FROM pg_dist_node WHERE nodename='localhost' AND nodeport=:worker_2_port; nodename | nodeport ----------+---------- (0 rows) UPDATE pg_dist_node SET hasmetadata=true WHERE nodeport=:worker_1_port; BEGIN; SELECT 1 FROM master_add_node('localhost', :worker_2_port); ?column? ---------- 1 (1 row) SELECT master_remove_node('localhost', :worker_2_port); master_remove_node -------------------- (1 row) SELECT 1 FROM master_add_node('localhost', :worker_2_port); ?column? ---------- 1 (1 row) COMMIT; SELECT nodename, nodeport FROM pg_dist_node WHERE nodename='localhost' AND nodeport=:worker_2_port; nodename | nodeport -----------+---------- localhost | 57638 (1 row) \c - - - :worker_1_port SELECT nodename, nodeport FROM pg_dist_node WHERE nodename='localhost' AND nodeport=:worker_2_port; nodename | nodeport -----------+---------- localhost | 57638 (1 row) \c - - - :master_port SELECT master_remove_node(nodename, nodeport) FROM pg_dist_node; master_remove_node -------------------- (2 rows) SELECT 1 FROM master_add_node('localhost', :worker_1_port); ?column? ---------- 1 (1 row) SELECT 1 FROM master_add_node('localhost', :worker_2_port); ?column? ---------- 1 (1 row) -- check that a distributed table can be created after adding a node in a transaction SELECT master_remove_node('localhost', :worker_2_port); master_remove_node -------------------- (1 row) BEGIN; SELECT 1 FROM master_add_node('localhost', :worker_2_port); ?column? ---------- 1 (1 row) CREATE TABLE temp(col1 text, col2 int); SELECT create_distributed_table('temp', 'col1'); create_distributed_table -------------------------- (1 row) INSERT INTO temp VALUES ('row1', 1); INSERT INTO temp VALUES ('row2', 2); COMMIT; SELECT col1, col2 FROM temp ORDER BY col1; col1 | col2 ------+------ row1 | 1 row2 | 2 (2 rows) SELECT count(*) FROM pg_dist_shard_placement, pg_dist_shard WHERE pg_dist_shard_placement.shardid = pg_dist_shard.shardid AND pg_dist_shard.logicalrelid = 'temp'::regclass AND pg_dist_shard_placement.nodeport = :worker_2_port; count ------- 4 (1 row) DROP TABLE temp; \c - - - :worker_1_port DELETE FROM pg_dist_partition; DELETE FROM pg_dist_shard; DELETE FROM pg_dist_placement; DELETE FROM pg_dist_node; \c - - - :master_port SELECT stop_metadata_sync_to_node('localhost', :worker_1_port); stop_metadata_sync_to_node ---------------------------- (1 row) SELECT stop_metadata_sync_to_node('localhost', :worker_2_port); stop_metadata_sync_to_node ---------------------------- (1 row) -- check that you can't add a primary to a non-default cluster SELECT master_add_node('localhost', 9999, nodecluster => 'olap'); ERROR: primaries must be added to the default cluster -- check that you can't add more than one primary to a group SELECT groupid AS worker_1_group FROM pg_dist_node WHERE nodeport = :worker_1_port \gset SELECT master_add_node('localhost', 9999, groupid => :worker_1_group, noderole => 'primary'); ERROR: group 12 already has a primary node -- check that you can add secondaries and unavailable nodes to a group SELECT groupid AS worker_2_group FROM pg_dist_node WHERE nodeport = :worker_2_port \gset SELECT 1 FROM master_add_node('localhost', 9998, groupid => :worker_1_group, noderole => 'secondary'); ?column? ---------- 1 (1 row) SELECT 1 FROM master_add_node('localhost', 9997, groupid => :worker_1_group, noderole => 'unavailable'); ?column? ---------- 1 (1 row) -- add_inactive_node also works with secondaries SELECT 1 FROM master_add_inactive_node('localhost', 9996, groupid => :worker_2_group, noderole => 'secondary'); ?column? ---------- 1 (1 row) -- check that you can add a seconary to a non-default cluster, and activate it, and remove it SELECT master_add_inactive_node('localhost', 9999, groupid => :worker_2_group, nodecluster => 'olap', noderole => 'secondary'); master_add_inactive_node --------------------------------------------------- (19,14,localhost,9999,default,f,f,secondary,olap) (1 row) SELECT master_activate_node('localhost', 9999); master_activate_node --------------------------------------------------- (19,14,localhost,9999,default,f,t,secondary,olap) (1 row) SELECT master_disable_node('localhost', 9999); master_disable_node --------------------- (1 row) SELECT master_remove_node('localhost', 9999); master_remove_node -------------------- (1 row) -- check that you can't manually add two primaries to a group INSERT INTO pg_dist_node (nodename, nodeport, groupid, noderole) VALUES ('localhost', 5000, :worker_1_group, 'primary'); ERROR: there cannot be two primary nodes in a group CONTEXT: PL/pgSQL function citus.pg_dist_node_trigger_func() line 10 at RAISE UPDATE pg_dist_node SET noderole = 'primary' WHERE groupid = :worker_1_group AND nodeport = 9998; ERROR: there cannot be two primary nodes in a group CONTEXT: PL/pgSQL function citus.pg_dist_node_trigger_func() line 18 at RAISE -- check that you can't manually add a primary to a non-default cluster INSERT INTO pg_dist_node (nodename, nodeport, groupid, noderole, nodecluster) VALUES ('localhost', 5000, 1000, 'primary', 'olap'); ERROR: new row for relation "pg_dist_node" violates check constraint "primaries_are_only_allowed_in_the_default_cluster" DETAIL: Failing row contains (17, 1000, localhost, 5000, default, f, t, primary, olap). UPDATE pg_dist_node SET nodecluster = 'olap' WHERE nodeport = :worker_1_port; ERROR: new row for relation "pg_dist_node" violates check constraint "primaries_are_only_allowed_in_the_default_cluster" DETAIL: Failing row contains (13, 12, localhost, 57637, default, f, t, primary, olap). -- check that you /can/ add a secondary node to a non-default cluster SELECT groupid AS worker_2_group FROM pg_dist_node WHERE nodeport = :worker_2_port \gset SELECT master_add_node('localhost', 8888, groupid => :worker_1_group, noderole => 'secondary', nodecluster=> 'olap'); master_add_node --------------------------------------------------- (20,12,localhost,8888,default,f,t,secondary,olap) (1 row) -- check that super-long cluster names are truncated SELECT master_add_node('localhost', 8887, groupid => :worker_1_group, noderole => 'secondary', nodecluster=> 'thisisasixtyfourcharacterstringrepeatedfourtimestomake256chars.' 'thisisasixtyfourcharacterstringrepeatedfourtimestomake256chars.' 'thisisasixtyfourcharacterstringrepeatedfourtimestomake256chars.' 'thisisasixtyfourcharacterstringrepeatedfourtimestomake256chars.' 'overflow' ); master_add_node -------------------------------------------------------------------------------------------------------------- (21,12,localhost,8887,default,f,t,secondary,thisisasixtyfourcharacterstringrepeatedfourtimestomake256chars.) (1 row) SELECT * FROM pg_dist_node WHERE nodeport=8887; nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster --------+---------+-----------+----------+----------+-------------+----------+-----------+----------------------------------------------------------------- 21 | 12 | localhost | 8887 | default | f | t | secondary | thisisasixtyfourcharacterstringrepeatedfourtimestomake256chars. (1 row) -- don't remove the secondary and unavailable nodes, check that no commands are sent to -- them in any of the remaining tests -- master_add_secondary_node lets you skip looking up the groupid SELECT master_add_secondary_node('localhost', 9995, 'localhost', :worker_1_port); master_add_secondary_node ------------------------------------------------------ (22,12,localhost,9995,default,f,t,secondary,default) (1 row) SELECT master_add_secondary_node('localhost', 9994, primaryname => 'localhost', primaryport => :worker_2_port); master_add_secondary_node ------------------------------------------------------ (23,14,localhost,9994,default,f,t,secondary,default) (1 row) SELECT master_add_secondary_node('localhost', 9993, 'localhost', 2000); ERROR: node at "localhost:2000" does not exist SELECT master_add_secondary_node('localhost', 9992, 'localhost', :worker_1_port, nodecluster => 'second-cluster'); master_add_secondary_node ------------------------------------------------------------- (24,12,localhost,9992,default,f,t,secondary,second-cluster) (1 row) SELECT nodeid AS worker_1_node FROM pg_dist_node WHERE nodeport=:worker_1_port \gset -- master_update_node checks node exists SELECT master_update_node(100, 'localhost', 8000); ERROR: could not find valid entry for node "localhost:8000" -- master_update_node disallows aliasing existing node SELECT master_update_node(:worker_1_node, 'localhost', :worker_2_port); ERROR: node at "localhost:57638" already exists -- master_update_node moves a node SELECT master_update_node(:worker_1_node, 'somehost', 9000); master_update_node -------------------- (1 row) SELECT * FROM pg_dist_node WHERE nodeid = :worker_1_node; nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster --------+---------+----------+----------+----------+-------------+----------+----------+------------- 13 | 12 | somehost | 9000 | default | f | t | primary | default (1 row) -- cleanup SELECT master_update_node(:worker_1_node, 'localhost', :worker_1_port); master_update_node -------------------- (1 row) SELECT * FROM pg_dist_node WHERE nodeid = :worker_1_node; nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster --------+---------+-----------+----------+----------+-------------+----------+----------+------------- 13 | 12 | localhost | 57637 | default | f | t | primary | default (1 row)