citus/src/test/regress/expected/failure_add_disable_node.out

386 lines
12 KiB
Plaintext

--
-- failure_add_disable_node tests master_add_node, master_remove_node
-- master_activate_node for failures.
-- master_disable_node and master_add_inactive_node can not be
-- tested as they don't create network activity
--
SELECT citus.mitmproxy('conn.allow()');
mitmproxy
---------------------------------------------------------------------
(1 row)
SET citus.next_shard_id TO 200000;
-- verify we have all worker nodes present
SELECT * FROM master_get_active_worker_nodes()
ORDER BY 1, 2;
node_name | node_port
---------------------------------------------------------------------
localhost | 9060
localhost | 57637
(2 rows)
-- verify there are no tables that could prevent add/remove node operations
SELECT * FROM pg_dist_partition;
logicalrelid | partmethod | partkey | colocationid | repmodel
---------------------------------------------------------------------
(0 rows)
CREATE SCHEMA add_remove_node;
SET SEARCH_PATH=add_remove_node;
CREATE TABLE user_table(user_id int, user_name text);
SELECT create_reference_table('user_table');
create_reference_table
---------------------------------------------------------------------
(1 row)
CREATE TABLE event_table(user_id int, event_id int, event_name text);
SELECT create_distributed_table('event_table', 'user_id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT shardid, shardstate
FROM pg_dist_placement p JOIN pg_dist_shard s USING (shardid)
WHERE s.logicalrelid = 'user_table'::regclass
ORDER BY placementid;
shardid | shardstate
---------------------------------------------------------------------
200000 | 1
200000 | 1
(2 rows)
SELECT master_disable_node('localhost', :worker_2_proxy_port);
NOTICE: Node localhost:xxxxx has active shard placements. Some queries may fail after this operation. Use SELECT master_activate_node('localhost', 9060) to activate this node back.
master_disable_node
---------------------------------------------------------------------
(1 row)
SELECT * FROM master_get_active_worker_nodes()
ORDER BY 1, 2;
node_name | node_port
---------------------------------------------------------------------
localhost | 57637
(1 row)
SELECT shardid, shardstate
FROM pg_dist_placement p JOIN pg_dist_shard s USING (shardid)
WHERE s.logicalrelid = 'user_table'::regclass
ORDER BY placementid;
shardid | shardstate
---------------------------------------------------------------------
200000 | 1
(1 row)
-- fail activate node by failing reference table creation
SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE").kill()');
mitmproxy
---------------------------------------------------------------------
(1 row)
SELECT master_activate_node('localhost', :worker_2_proxy_port);
NOTICE: Replicating reference table "user_table" to the node localhost:xxxxx
ERROR: server closed the connection unexpectedly
This probably means the server terminated abnormally
before or while processing the request.
CONTEXT: while executing command on localhost:xxxxx
SELECT citus.mitmproxy('conn.allow()');
mitmproxy
---------------------------------------------------------------------
(1 row)
-- verify node is not activated
SELECT * FROM master_get_active_worker_nodes()
ORDER BY 1, 2;
node_name | node_port
---------------------------------------------------------------------
localhost | 57637
(1 row)
SELECT shardid, shardstate
FROM pg_dist_placement p JOIN pg_dist_shard s USING (shardid)
WHERE s.logicalrelid = 'user_table'::regclass
ORDER BY placementid;
shardid | shardstate
---------------------------------------------------------------------
200000 | 1
(1 row)
-- fail create schema command
SELECT citus.mitmproxy('conn.onQuery(query="CREATE SCHEMA").kill()');
mitmproxy
---------------------------------------------------------------------
(1 row)
SELECT master_activate_node('localhost', :worker_2_proxy_port);
ERROR: server closed the connection unexpectedly
This probably means the server terminated abnormally
before or while processing the request.
CONTEXT: while executing command on localhost:xxxxx
-- verify node is not activated
SELECT * FROM master_get_active_worker_nodes()
ORDER BY 1, 2;
node_name | node_port
---------------------------------------------------------------------
localhost | 57637
(1 row)
SELECT shardid, shardstate
FROM pg_dist_placement p JOIN pg_dist_shard s USING (shardid)
WHERE s.logicalrelid = 'user_table'::regclass
ORDER BY placementid;
shardid | shardstate
---------------------------------------------------------------------
200000 | 1
(1 row)
-- fail activate node by failing reference table creation
SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE").cancel(' || pg_backend_pid() || ')');
mitmproxy
---------------------------------------------------------------------
(1 row)
SELECT master_activate_node('localhost', :worker_2_proxy_port);
NOTICE: Replicating reference table "user_table" to the node localhost:xxxxx
ERROR: canceling statement due to user request
-- verify node is not activated
SELECT * FROM master_get_active_worker_nodes()
ORDER BY 1, 2;
node_name | node_port
---------------------------------------------------------------------
localhost | 57637
(1 row)
SELECT shardid, shardstate
FROM pg_dist_placement p JOIN pg_dist_shard s USING (shardid)
WHERE s.logicalrelid = 'user_table'::regclass
ORDER BY placementid;
shardid | shardstate
---------------------------------------------------------------------
200000 | 1
(1 row)
SELECT citus.mitmproxy('conn.allow()');
mitmproxy
---------------------------------------------------------------------
(1 row)
-- master_remove_node fails when there are shards on that worker
SELECT master_remove_node('localhost', :worker_2_proxy_port);
ERROR: you cannot remove the primary node of a node group which has shard placements
-- drop event table and re-run remove
DROP TABLE event_table;
SELECT master_remove_node('localhost', :worker_2_proxy_port);
master_remove_node
---------------------------------------------------------------------
(1 row)
-- verify node is removed
SELECT * FROM master_get_active_worker_nodes()
ORDER BY 1, 2;
node_name | node_port
---------------------------------------------------------------------
localhost | 57637
(1 row)
SELECT shardid, shardstate
FROM pg_dist_placement p JOIN pg_dist_shard s USING (shardid)
WHERE s.logicalrelid = 'user_table'::regclass
ORDER BY placementid;
shardid | shardstate
---------------------------------------------------------------------
200000 | 1
(1 row)
-- test master_add_inactive_node
-- it does not create any network activity therefore can not
-- be injected failure through network
SELECT master_add_inactive_node('localhost', :worker_2_proxy_port);
master_add_inactive_node
---------------------------------------------------------------------
3
(1 row)
SELECT master_remove_node('localhost', :worker_2_proxy_port);
master_remove_node
---------------------------------------------------------------------
(1 row)
SELECT shardid, shardstate
FROM pg_dist_placement p JOIN pg_dist_shard s USING (shardid)
WHERE s.logicalrelid = 'user_table'::regclass
ORDER BY placementid;
shardid | shardstate
---------------------------------------------------------------------
200000 | 1
(1 row)
-- test master_add_node replicated a reference table
-- to newly added node.
SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE").kill()');
mitmproxy
---------------------------------------------------------------------
(1 row)
SELECT master_add_node('localhost', :worker_2_proxy_port);
NOTICE: Replicating reference table "user_table" to the node localhost:xxxxx
ERROR: server closed the connection unexpectedly
This probably means the server terminated abnormally
before or while processing the request.
CONTEXT: while executing command on localhost:xxxxx
-- verify node is not added
SELECT * FROM master_get_active_worker_nodes()
ORDER BY 1, 2;
node_name | node_port
---------------------------------------------------------------------
localhost | 57637
(1 row)
SELECT shardid, shardstate
FROM pg_dist_placement p JOIN pg_dist_shard s USING (shardid)
WHERE s.logicalrelid = 'user_table'::regclass
ORDER BY placementid;
shardid | shardstate
---------------------------------------------------------------------
200000 | 1
(1 row)
SELECT citus.mitmproxy('conn.onQuery(query="CREATE TABLE").cancel(' || pg_backend_pid() || ')');
mitmproxy
---------------------------------------------------------------------
(1 row)
SELECT master_add_node('localhost', :worker_2_proxy_port);
NOTICE: Replicating reference table "user_table" to the node localhost:xxxxx
ERROR: canceling statement due to user request
-- verify node is not added
SELECT * FROM master_get_active_worker_nodes()
ORDER BY 1, 2;
node_name | node_port
---------------------------------------------------------------------
localhost | 57637
(1 row)
SELECT shardid, shardstate
FROM pg_dist_placement p JOIN pg_dist_shard s USING (shardid)
WHERE s.logicalrelid = 'user_table'::regclass
ORDER BY placementid;
shardid | shardstate
---------------------------------------------------------------------
200000 | 1
(1 row)
-- reset cluster to original state
SELECT citus.mitmproxy('conn.allow()');
mitmproxy
---------------------------------------------------------------------
(1 row)
SELECT master_add_node('localhost', :worker_2_proxy_port);
NOTICE: Replicating reference table "user_table" to the node localhost:xxxxx
master_add_node
---------------------------------------------------------------------
6
(1 row)
-- verify node is added
SELECT * FROM master_get_active_worker_nodes()
ORDER BY 1, 2;
node_name | node_port
---------------------------------------------------------------------
localhost | 9060
localhost | 57637
(2 rows)
SELECT shardid, shardstate
FROM pg_dist_placement p JOIN pg_dist_shard s USING (shardid)
WHERE s.logicalrelid = 'user_table'::regclass
ORDER BY placementid;
shardid | shardstate
---------------------------------------------------------------------
200000 | 1
200000 | 1
(2 rows)
-- fail master_add_node by failing copy out operation
SELECT master_remove_node('localhost', :worker_1_port);
master_remove_node
---------------------------------------------------------------------
(1 row)
SELECT citus.mitmproxy('conn.onQuery(query="COPY").kill()');
mitmproxy
---------------------------------------------------------------------
(1 row)
SELECT master_add_node('localhost', :worker_1_port);
NOTICE: Replicating reference table "user_table" to the node localhost:xxxxx
ERROR: could not copy table "user_table_200000" from "localhost:xxxxx"
CONTEXT: while executing command on localhost:xxxxx
-- verify node is not added
SELECT * FROM master_get_active_worker_nodes()
ORDER BY 1, 2;
node_name | node_port
---------------------------------------------------------------------
localhost | 9060
(1 row)
SELECT citus.mitmproxy('conn.allow()');
mitmproxy
---------------------------------------------------------------------
(1 row)
SELECT master_add_node('localhost', :worker_1_port);
NOTICE: Replicating reference table "user_table" to the node localhost:xxxxx
master_add_node
---------------------------------------------------------------------
8
(1 row)
-- verify node is added
SELECT * FROM master_get_active_worker_nodes()
ORDER BY 1, 2;
node_name | node_port
---------------------------------------------------------------------
localhost | 9060
localhost | 57637
(2 rows)
SELECT shardid, shardstate
FROM pg_dist_placement p JOIN pg_dist_shard s USING (shardid)
WHERE s.logicalrelid = 'user_table'::regclass
ORDER BY placementid;
shardid | shardstate
---------------------------------------------------------------------
200000 | 1
200000 | 1
(2 rows)
RESET SEARCH_PATH;
DROP SCHEMA add_remove_node CASCADE;
NOTICE: drop cascades to table add_remove_node.user_table
SELECT * FROM run_command_on_workers('DROP SCHEMA IF EXISTS add_remove_node CASCADE')
ORDER BY nodeport;
nodename | nodeport | success | result
---------------------------------------------------------------------
localhost | 9060 | t | DROP SCHEMA
localhost | 57637 | t | DROP SCHEMA
(2 rows)