-- -- failure_on_create_subscription -- -- Since the result of these tests depends on the success of background -- process that creating the replication slot on the publisher. These -- tests are separated. CREATE SCHEMA IF NOT EXISTS move_shard; SET SEARCH_PATH = move_shard; SET citus.shard_count TO 4; SET citus.next_shard_id TO 100; SET citus.shard_replication_factor TO 1; SELECT pg_backend_pid() as pid \gset SELECT citus.mitmproxy('conn.allow()'); mitmproxy --------------------------------------------------------------------- (1 row) CREATE TABLE t(id int PRIMARY KEY, int_data int, data text); SELECT create_distributed_table('t', 'id'); create_distributed_table --------------------------------------------------------------------- (1 row) CREATE VIEW shards_in_workers AS SELECT shardid, (CASE WHEN nodeport = :worker_1_port THEN 'worker1' ELSE 'worker2' END) AS worker FROM pg_dist_placement NATURAL JOIN pg_dist_node WHERE shardstate != 4 ORDER BY 1,2 ASC; -- Insert some data INSERT INTO t SELECT x, x+1, MD5(random()::text) FROM generate_series(1,100000) AS f(x); -- Initial shard placements SELECT * FROM shards_in_workers; shardid | worker --------------------------------------------------------------------- 100 | worker2 101 | worker1 102 | worker2 103 | worker1 (4 rows) -- Failure on creating the subscription -- Failing exactly on CREATE SUBSCRIPTION is causing flaky test where we fail with either: -- 1) ERROR: connection to the remote node localhost:xxxxx failed with the following error: ERROR: subscription "citus_shard_move_subscription_xxxxxxx" does not exist -- another command is already in progress -- 2) ERROR: connection to the remote node localhost:xxxxx failed with the following error: another command is already in progress -- Instead fail on the next step (ALTER SUBSCRIPTION) instead which is also required logically as part of uber CREATE SUBSCRIPTION operation. SELECT citus.mitmproxy('conn.onQuery(query="ALTER SUBSCRIPTION").kill()'); mitmproxy --------------------------------------------------------------------- (1 row) SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port); ERROR: connection not open CONTEXT: while executing command on localhost:xxxxx -- Verify that the shard is not moved and the number of rows are still 100k SELECT * FROM shards_in_workers; shardid | worker --------------------------------------------------------------------- 100 | worker2 101 | worker1 102 | worker2 103 | worker1 (4 rows) SELECT count(*) FROM t; count --------------------------------------------------------------------- 100000 (1 row) -- Verify that shard can be moved after a temporary failure SELECT citus.mitmproxy('conn.allow()'); mitmproxy --------------------------------------------------------------------- (1 row) SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port); master_move_shard_placement --------------------------------------------------------------------- (1 row) SELECT * FROM shards_in_workers; shardid | worker --------------------------------------------------------------------- 100 | worker2 101 | worker2 102 | worker2 103 | worker1 (4 rows) SELECT count(*) FROM t; count --------------------------------------------------------------------- 100000 (1 row) DROP SCHEMA move_shard CASCADE ; NOTICE: drop cascades to 2 other objects DETAIL: drop cascades to table t drop cascades to view shards_in_workers