citus/src/test/regress/sql/failure_split_cleanup.sql

318 lines
14 KiB
SQL

-- The test excercises below failure scenarios
--1. Failure while creating publications
--2. Failure while creating shared memory segment
--3. Failure while creating replication slots
--4. Failure while enabling subscription
--5. Failure on polling last write-ahead log location reported to origin WAL sender
--6. Failure on dropping subscription
CREATE SCHEMA "citus_failure_split_cleanup_schema";
SET search_path TO "citus_failure_split_cleanup_schema";
SET citus.next_shard_id TO 8981000;
SET citus.next_placement_id TO 8610000;
SET citus.next_operation_id TO 777;
SET citus.next_cleanup_record_id TO 11;
SET citus.shard_count TO 2;
SET citus.shard_replication_factor TO 1;
SELECT pg_backend_pid() as pid \gset
-- cleanup any leftovers from previous tests so we get consistent output
SELECT public.wait_for_resource_cleanup();
-- Disable defer shard delete to stop auto cleanup.
ALTER SYSTEM SET citus.defer_shard_delete_interval TO -1;
SELECT pg_reload_conf();
-- Connections on the proxy port(worker_2) are monitored
SELECT nodeid AS worker_1_node FROM pg_dist_node WHERE nodeport=:worker_1_port \gset
SELECT nodeid AS worker_2_node FROM pg_dist_node WHERE nodeport=:worker_2_proxy_port \gset
CREATE TABLE table_to_split(id int PRIMARY KEY, int_data int, data text);
SELECT create_distributed_table('table_to_split', 'id');
--1. Failure while creating publications
SELECT citus.mitmproxy('conn.onQuery(query="CREATE PUBLICATION .* FOR TABLE").killall()');
SELECT pg_catalog.citus_split_shard_by_split_points(
8981000,
ARRAY['-100000'],
ARRAY[:worker_1_node, :worker_2_node],
'force_logical');
SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
-- we need to allow connection so that we can connect to proxy
SELECT citus.mitmproxy('conn.allow()');
\c - - - :worker_2_proxy_port
SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog;
SET citus.show_shards_for_app_name_prefixes = '*';
-- Left over child shards
SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname;
-- Left over publications
SELECT pubname FROM pg_publication;
-- Left over replication slots
SELECT slot_name FROM pg_replication_slots;
-- Left over subscriptions
SELECT subname FROM pg_subscription;
\c - postgres - :master_port
SELECT public.wait_for_resource_cleanup();
SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
\c - - - :worker_2_proxy_port
SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog;
SET citus.show_shards_for_app_name_prefixes = '*';
-- Empty child shards after cleanup
SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname;
-- Empty publications
SELECT pubname FROM pg_publication;
-- Empty replication slot table
SELECT slot_name FROM pg_replication_slots;
-- Empty subscriptions
SELECT subname FROM pg_subscription;
--2. Failure while creating shared memory segment
\c - postgres - :master_port
SET citus.next_shard_id TO 8981002;
SET citus.next_operation_id TO 777;
SET citus.next_cleanup_record_id TO 11;
SELECT citus.mitmproxy('conn.onQuery(query="SELECT \* FROM pg_catalog.worker_split_shard_replication_setup\(.*").killall()');
-- set log level to prevent flakiness
SET client_min_messages TO ERROR;
SELECT pg_catalog.citus_split_shard_by_split_points(
8981000,
ARRAY['-100000'],
ARRAY[:worker_1_node, :worker_2_node],
'force_logical');
RESET client_min_messages;
SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
-- we need to allow connection so that we can connect to proxy
SELECT citus.mitmproxy('conn.allow()');
\c - - - :worker_2_proxy_port
SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog;
SET citus.show_shards_for_app_name_prefixes = '*';
-- Left over child shards
SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname;
-- Left over publications
SELECT pubname FROM pg_publication;
-- Left over replication slots
SELECT slot_name FROM pg_replication_slots;
-- Left over subscriptions
SELECT subname FROM pg_subscription;
\c - postgres - :master_port
SELECT public.wait_for_resource_cleanup();
SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
\c - - - :worker_2_proxy_port
SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog;
SET citus.show_shards_for_app_name_prefixes = '*';
-- Empty child shards after cleanup
SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname;
-- Empty publications
SELECT pubname FROM pg_publication;
-- Empty replication slot table
SELECT slot_name FROM pg_replication_slots;
-- Empty subscriptions
SELECT subname FROM pg_subscription;
--3. Failure while executing 'CREATE_REPLICATION_SLOT' for Snapshot.
\c - postgres - :master_port
SET citus.next_shard_id TO 8981002;
SET citus.next_operation_id TO 777;
SET citus.next_cleanup_record_id TO 11;
SELECT citus.mitmproxy('conn.onQuery(query="CREATE_REPLICATION_SLOT .* LOGICAL .* EXPORT_SNAPSHOT.*").killall()');
SELECT pg_catalog.citus_split_shard_by_split_points(
8981000,
ARRAY['-100000'],
ARRAY[:worker_1_node, :worker_2_node],
'force_logical');
SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
-- we need to allow connection so that we can connect to proxy
SELECT citus.mitmproxy('conn.allow()');
\c - - - :worker_2_proxy_port
SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog;
SET citus.show_shards_for_app_name_prefixes = '*';
-- Left over child shards
SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname;
-- Left over publications
SELECT pubname FROM pg_publication;
-- Left over replication slots
SELECT slot_name FROM pg_replication_slots;
-- Left over subscriptions
SELECT subname FROM pg_subscription;
\c - postgres - :master_port
SELECT public.wait_for_resource_cleanup();
SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
\c - - - :worker_2_proxy_port
SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog;
SET citus.show_shards_for_app_name_prefixes = '*';
-- Empty child shards after cleanup
SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname;
-- Empty publications
SELECT pubname FROM pg_publication;
-- Empty replication slot table
SELECT slot_name FROM pg_replication_slots;
-- Empty subscriptions
SELECT subname FROM pg_subscription;
--4. Failure while enabling subscription
\c - postgres - :master_port
SET citus.next_shard_id TO 8981002;
SET citus.next_operation_id TO 777;
SET citus.next_cleanup_record_id TO 11;
SELECT citus.mitmproxy('conn.onQuery(query="ALTER SUBSCRIPTION .* ENABLE").killall()');
SELECT pg_catalog.citus_split_shard_by_split_points(
8981000,
ARRAY['-100000'],
ARRAY[:worker_1_node, :worker_2_node],
'force_logical');
SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
-- we need to allow connection so that we can connect to proxy
SELECT citus.mitmproxy('conn.allow()');
\c - - - :worker_2_proxy_port
SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog;
SET citus.show_shards_for_app_name_prefixes = '*';
-- Left over child shards
SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname;
-- Left over publications
SELECT pubname FROM pg_publication;
-- Left over replication slots
SELECT slot_name FROM pg_replication_slots;
-- Left over subscriptions
SELECT subname FROM pg_subscription;
\c - postgres - :master_port
SELECT public.wait_for_resource_cleanup();
SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
\c - - - :worker_2_proxy_port
SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog;
SET citus.show_shards_for_app_name_prefixes = '*';
-- Empty child shards after cleanup
SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname;
-- Empty publications
SELECT pubname FROM pg_publication;
-- Empty replication slot table
SELECT slot_name FROM pg_replication_slots;
-- Empty subscriptions
SELECT subname FROM pg_subscription;
--5. Failure on polling last write-ahead log location reported to origin WAL sender
\c - postgres - :master_port
SET citus.next_shard_id TO 8981002;
SET citus.next_operation_id TO 777;
SET citus.next_cleanup_record_id TO 11;
SELECT citus.mitmproxy('conn.onQuery(query="^SELECT min\(latest_end_lsn").killall()');
SELECT pg_catalog.citus_split_shard_by_split_points(
8981000,
ARRAY['-100000'],
ARRAY[:worker_1_node, :worker_2_node],
'force_logical');
SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
-- we need to allow connection so that we can connect to proxy
SELECT citus.mitmproxy('conn.allow()');
\c - - - :worker_2_proxy_port
SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog;
SET citus.show_shards_for_app_name_prefixes = '*';
-- Left over child shards
SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname;
-- Left over publications
SELECT pubname FROM pg_publication;
-- Left over replication slots
SELECT slot_name FROM pg_replication_slots;
-- Left over subscriptions
SELECT subname FROM pg_subscription;
\c - postgres - :master_port
SELECT public.wait_for_resource_cleanup();
SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
\c - - - :worker_2_proxy_port
SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog;
SET citus.show_shards_for_app_name_prefixes = '*';
-- Empty child shards after cleanup
SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname;
-- Empty publications
SELECT pubname FROM pg_publication;
-- Empty replication slot table
SELECT slot_name FROM pg_replication_slots;
-- Empty subscriptions
SELECT subname FROM pg_subscription;
--6. Failure on dropping subscription
\c - postgres - :master_port
SET citus.next_shard_id TO 8981002;
SET citus.next_operation_id TO 777;
SET citus.next_cleanup_record_id TO 11;
SELECT citus.mitmproxy('conn.onQuery(query="^DROP SUBSCRIPTION").killall()');
SELECT pg_catalog.citus_split_shard_by_split_points(
8981000,
ARRAY['-100000'],
ARRAY[:worker_1_node, :worker_2_node],
'force_logical');
SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname;
-- we need to allow connection so that we can connect to proxy
SELECT citus.mitmproxy('conn.allow()');
\c - - - :worker_2_proxy_port
SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog;
SET citus.show_shards_for_app_name_prefixes = '*';
-- Left over child shards
SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname;
-- Left over publications
SELECT pubname FROM pg_publication;
-- Left over replication slots
SELECT slot_name FROM pg_replication_slots;
-- Left over subscriptions
SELECT subname FROM pg_subscription;
\c - postgres - :master_port
SELECT public.wait_for_resource_cleanup();
SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
\c - - - :worker_2_proxy_port
SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog;
SET citus.show_shards_for_app_name_prefixes = '*';
-- Empty child shards after cleanup
SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname;
-- Empty publications
SELECT pubname FROM pg_publication;
-- Empty replication slot table
SELECT slot_name FROM pg_replication_slots;
-- Empty subscriptions
SELECT subname FROM pg_subscription;
-- Cleanup
\c - postgres - :master_port
SET client_min_messages TO WARNING;
DROP SCHEMA "citus_failure_split_cleanup_schema" CASCADE;
-- Cleanup