SET citus.next_shard_id TO 820000; SELECT groupid AS worker_2_group FROM pg_dist_node WHERE nodeport=:worker_2_port \gset SELECT groupid AS worker_1_group FROM pg_dist_node WHERE nodeport=:worker_1_port \gset -- =================================================================== -- test shard repair functionality -- =================================================================== -- create a table and create its distribution metadata CREATE TABLE customer_engagements ( id integer, created_at date, event_data text ); -- add some indexes CREATE INDEX ON customer_engagements (id); CREATE INDEX ON customer_engagements (created_at); CREATE INDEX ON customer_engagements (event_data); -- distribute the table -- create a single shard on the first worker SET citus.shard_count TO 1; SET citus.shard_replication_factor TO 2; SELECT create_distributed_table('customer_engagements', 'id', 'hash'); create_distributed_table --------------------------------------------------------------------- (1 row) -- ingest some data for the tests INSERT INTO customer_engagements VALUES (1, '01-01-2015', 'first event'); INSERT INTO customer_engagements VALUES (2, '02-01-2015', 'second event'); INSERT INTO customer_engagements VALUES (1, '03-01-2015', 'third event'); -- the following queries does the following: -- (i) create a new shard -- (ii) mark the second shard placements as unhealthy -- (iii) do basic checks i.e., only allow copy from healthy placement to unhealthy ones -- (iv) do a successful master_copy_shard_placement from the first placement to the second -- (v) mark the first placement as unhealthy and execute a query that is routed to the second placement -- get the newshardid SELECT shardid as newshardid FROM pg_dist_shard WHERE logicalrelid = 'customer_engagements'::regclass \gset -- now, update the second placement as unhealthy UPDATE pg_dist_placement SET shardstate = 3 WHERE shardid = :newshardid AND groupid = :worker_2_group; -- cannot repair a shard after a modification (transaction still open during repair) BEGIN; ALTER TABLE customer_engagements ADD COLUMN value float; SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port); ERROR: cannot open new connections after the first modification command within a transaction ROLLBACK; BEGIN; INSERT INTO customer_engagements VALUES (4, '04-01-2015', 'fourth event'); SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port); ERROR: cannot open new connections after the first modification command within a transaction ROLLBACK; -- modifications after reparing a shard are fine (will use new metadata) BEGIN; SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port); master_copy_shard_placement --------------------------------------------------------------------- (1 row) ALTER TABLE customer_engagements ADD COLUMN value float; ROLLBACK; BEGIN; SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port); master_copy_shard_placement --------------------------------------------------------------------- (1 row) INSERT INTO customer_engagements VALUES (4, '04-01-2015', 'fourth event'); ROLLBACK; -- deactivate placement UPDATE pg_dist_placement SET shardstate = 1 WHERE groupid = :worker_2_group and shardid = :newshardid; SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port); ERROR: target placement must be in inactive state UPDATE pg_dist_placement SET shardstate = 3 WHERE groupid = :worker_2_group and shardid = :newshardid; -- also try to copy from an inactive placement SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port); ERROR: source placement must be in active state -- "copy" this shard from the first placement to the second one SELECT master_copy_shard_placement(:newshardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port); master_copy_shard_placement --------------------------------------------------------------------- (1 row) -- now, update first placement as unhealthy (and raise a notice) so that queries are not routed to there UPDATE pg_dist_placement SET shardstate = 3 WHERE shardid = :newshardid AND groupid = :worker_1_group; -- get the data from the second placement SELECT * FROM customer_engagements; id | created_at | event_data --------------------------------------------------------------------- 1 | 01-01-2015 | first event 2 | 02-01-2015 | second event 1 | 03-01-2015 | third event (3 rows)