From 6869b3ad10af6987680c839a45b42c23a4eb3e26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Sedl=C3=A1k?= Date: Wed, 7 Feb 2024 13:04:52 +0100 Subject: [PATCH] Fail early when shard can't be safely moved to a new node (#7467) DESCRIPTION: citus_move_shard_placement now fails early when shard cannot be safely moved The implementation is quite simplistic - `citus_move_shard_placement(...)` will fail with an error if there's any new node in the cluster that doesn't have reference tables yet. It could have been finer-grained, i.e. erroring only when trying to move a shard to an unitialized node. Looking at the related functions - `replicate_reference_tables()` or `citus_rebalance_start()`, I think it's acceptable behaviour. These other functions also treat "any" unitialized node as a temporary anomaly. Fixes #7426 --------- Co-authored-by: Jelte Fennema-Nio --- .../distributed/operations/shard_transfer.c | 11 +++ .../regress/expected/shard_rebalancer.out | 68 +++++++++++++++++++ src/test/regress/sql/shard_rebalancer.sql | 37 ++++++++++ 3 files changed, 116 insertions(+) diff --git a/src/backend/distributed/operations/shard_transfer.c b/src/backend/distributed/operations/shard_transfer.c index 805ef39d7..0e7f48f68 100644 --- a/src/backend/distributed/operations/shard_transfer.c +++ b/src/backend/distributed/operations/shard_transfer.c @@ -294,6 +294,17 @@ citus_move_shard_placement(PG_FUNCTION_ARGS) CheckCitusVersion(ERROR); EnsureCoordinator(); + List *referenceTableIdList = NIL; + + if (HasNodesWithMissingReferenceTables(&referenceTableIdList)) + { + ereport(ERROR, (errmsg("there are missing reference tables on some nodes"), + errhint("Copy reference tables first with " + "replicate_reference_tables() or use " + "citus_rebalance_start() that will do it automatically." + ))); + } + int64 shardId = PG_GETARG_INT64(0); char *sourceNodeName = text_to_cstring(PG_GETARG_TEXT_P(1)); int32 sourceNodePort = PG_GETARG_INT32(2); diff --git a/src/test/regress/expected/shard_rebalancer.out b/src/test/regress/expected/shard_rebalancer.out index a7cd6b38c..988fa68be 100644 --- a/src/test/regress/expected/shard_rebalancer.out +++ b/src/test/regress/expected/shard_rebalancer.out @@ -2395,6 +2395,74 @@ SELECT count(*) FROM pg_dist_partition; 0 (1 row) +-- verify a system with a new node won't copy distributed table shards without reference tables +SELECT 1 from master_remove_node('localhost', :worker_2_port); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT public.wait_until_metadata_sync(30000); + wait_until_metadata_sync +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE r1 (a int PRIMARY KEY, b int); +SELECT create_reference_table('r1'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE d1 (a int PRIMARY KEY, b int); +SELECT create_distributed_table('d1', 'a'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +ALTER SEQUENCE pg_dist_groupid_seq RESTART WITH 15; +SELECT 1 from master_add_node('localhost', :worker_2_port); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +-- count the number of placements for the reference table to verify it is not available on +-- all nodes +SELECT count(*) +FROM pg_dist_shard +JOIN pg_dist_shard_placement USING (shardid) +WHERE logicalrelid = 'r1'::regclass; + count +--------------------------------------------------------------------- + 1 +(1 row) + +-- #7426 We can't move shards to the fresh node before we copy reference tables there. +-- rebalance_table_shards() will do the copy, but the low-level +-- citus_move_shard_placement() should raise an error +SELECT citus_move_shard_placement(pg_dist_shard.shardid, nodename, nodeport, 'localhost', :worker_2_port) + FROM pg_dist_shard JOIN pg_dist_shard_placement USING (shardid) + WHERE logicalrelid = 'd1'::regclass AND nodename = 'localhost' AND nodeport = :worker_1_port LIMIT 1; +ERROR: there are missing reference tables on some nodes +SELECT replicate_reference_tables(); + replicate_reference_tables +--------------------------------------------------------------------- + +(1 row) + +-- After replication, the move should succeed. +SELECT citus_move_shard_placement(pg_dist_shard.shardid, nodename, nodeport, 'localhost', :worker_2_port) + FROM pg_dist_shard JOIN pg_dist_shard_placement USING (shardid) + WHERE logicalrelid = 'd1'::regclass AND nodename = 'localhost' AND nodeport = :worker_1_port LIMIT 1; + citus_move_shard_placement +--------------------------------------------------------------------- + +(1 row) + +DROP TABLE d1, r1; -- verify a system having only reference tables will copy the reference tables when -- executing the rebalancer SELECT 1 from master_remove_node('localhost', :worker_2_port); diff --git a/src/test/regress/sql/shard_rebalancer.sql b/src/test/regress/sql/shard_rebalancer.sql index 5d8e89b36..9037f8f75 100644 --- a/src/test/regress/sql/shard_rebalancer.sql +++ b/src/test/regress/sql/shard_rebalancer.sql @@ -1340,6 +1340,43 @@ DROP TABLE t1, r1, r2; -- test suites should clean up their distributed tables. SELECT count(*) FROM pg_dist_partition; +-- verify a system with a new node won't copy distributed table shards without reference tables + +SELECT 1 from master_remove_node('localhost', :worker_2_port); +SELECT public.wait_until_metadata_sync(30000); + +CREATE TABLE r1 (a int PRIMARY KEY, b int); +SELECT create_reference_table('r1'); + +CREATE TABLE d1 (a int PRIMARY KEY, b int); +SELECT create_distributed_table('d1', 'a'); + +ALTER SEQUENCE pg_dist_groupid_seq RESTART WITH 15; +SELECT 1 from master_add_node('localhost', :worker_2_port); + +-- count the number of placements for the reference table to verify it is not available on +-- all nodes +SELECT count(*) +FROM pg_dist_shard +JOIN pg_dist_shard_placement USING (shardid) +WHERE logicalrelid = 'r1'::regclass; + +-- #7426 We can't move shards to the fresh node before we copy reference tables there. +-- rebalance_table_shards() will do the copy, but the low-level +-- citus_move_shard_placement() should raise an error +SELECT citus_move_shard_placement(pg_dist_shard.shardid, nodename, nodeport, 'localhost', :worker_2_port) + FROM pg_dist_shard JOIN pg_dist_shard_placement USING (shardid) + WHERE logicalrelid = 'd1'::regclass AND nodename = 'localhost' AND nodeport = :worker_1_port LIMIT 1; + +SELECT replicate_reference_tables(); + +-- After replication, the move should succeed. +SELECT citus_move_shard_placement(pg_dist_shard.shardid, nodename, nodeport, 'localhost', :worker_2_port) + FROM pg_dist_shard JOIN pg_dist_shard_placement USING (shardid) + WHERE logicalrelid = 'd1'::regclass AND nodename = 'localhost' AND nodeport = :worker_1_port LIMIT 1; + +DROP TABLE d1, r1; + -- verify a system having only reference tables will copy the reference tables when -- executing the rebalancer