From c968dc9c274592c0f7efa60744e7eda4df248150 Mon Sep 17 00:00:00 2001 From: ahmet gedemenli Date: Thu, 20 Jul 2023 13:08:46 +0300 Subject: [PATCH] Do not rebalance if replication factor is greater than the node count --- src/backend/distributed/operations/shard_rebalancer.c | 7 +++++++ src/test/regress/expected/shard_rebalancer.out | 6 ++++++ src/test/regress/expected/single_node_enterprise.out | 1 + src/test/regress/sql/shard_rebalancer.sql | 6 ++++++ src/test/regress/sql/single_node_enterprise.sql | 2 ++ 5 files changed, 22 insertions(+) diff --git a/src/backend/distributed/operations/shard_rebalancer.c b/src/backend/distributed/operations/shard_rebalancer.c index a8cb3df5c..61a4ee9b0 100644 --- a/src/backend/distributed/operations/shard_rebalancer.c +++ b/src/backend/distributed/operations/shard_rebalancer.c @@ -526,6 +526,13 @@ GetRebalanceSteps(RebalanceOptions *options) } } + if (shardAllowedNodeCount < ShardReplicationFactor) + { + ereport(ERROR, (errmsg("Shard replication factor (%d) cannot be greater than " + "number of nodes with should_have_shards=true (%d).", + ShardReplicationFactor, shardAllowedNodeCount))); + } + List *activeShardPlacementListList = NIL; List *unbalancedShards = NIL; diff --git a/src/test/regress/expected/shard_rebalancer.out b/src/test/regress/expected/shard_rebalancer.out index b8f4010b1..6d608d1f9 100644 --- a/src/test/regress/expected/shard_rebalancer.out +++ b/src/test/regress/expected/shard_rebalancer.out @@ -2553,12 +2553,18 @@ SELECT public.wait_until_metadata_sync(30000); (1 row) +-- errors out because shard replication factor > shard allowed node count +SELECT rebalance_table_shards('test_rebalance_with_disabled_worker'); +ERROR: Shard replication factor (2) cannot be greater than number of nodes with should_have_shards=true (1). +-- set replication factor to one, and try again +SET citus.shard_replication_factor TO 1; SELECT rebalance_table_shards('test_rebalance_with_disabled_worker'); rebalance_table_shards --------------------------------------------------------------------- (1 row) +SET citus.shard_replication_factor TO 2; SELECT 1 FROM citus_activate_node('localhost', :worker_2_port); ?column? --------------------------------------------------------------------- diff --git a/src/test/regress/expected/single_node_enterprise.out b/src/test/regress/expected/single_node_enterprise.out index 305a02b8e..79f231864 100644 --- a/src/test/regress/expected/single_node_enterprise.out +++ b/src/test/regress/expected/single_node_enterprise.out @@ -411,6 +411,7 @@ NOTICE: executing the command locally: SELECT count(*) AS count FROM single_nod ROLLBACK; NOTICE: issuing ROLLBACK +SET citus.shard_replication_factor TO 1; -- now, lets move all the shards of distributed tables out of the coordinator -- block writes is much faster for the sake of the test timings we prefer it SELECT master_drain_node('localhost', :master_port, shard_transfer_mode:='block_writes'); diff --git a/src/test/regress/sql/shard_rebalancer.sql b/src/test/regress/sql/shard_rebalancer.sql index d64fb6826..a53ec8752 100644 --- a/src/test/regress/sql/shard_rebalancer.sql +++ b/src/test/regress/sql/shard_rebalancer.sql @@ -1427,8 +1427,14 @@ SELECT create_distributed_table('test_rebalance_with_disabled_worker', 'a', colo SELECT citus_disable_node('localhost', :worker_2_port); SELECT public.wait_until_metadata_sync(30000); +-- errors out because shard replication factor > shard allowed node count SELECT rebalance_table_shards('test_rebalance_with_disabled_worker'); +-- set replication factor to one, and try again +SET citus.shard_replication_factor TO 1; +SELECT rebalance_table_shards('test_rebalance_with_disabled_worker'); +SET citus.shard_replication_factor TO 2; + SELECT 1 FROM citus_activate_node('localhost', :worker_2_port); DROP TABLE test_rebalance_with_disabled_worker; diff --git a/src/test/regress/sql/single_node_enterprise.sql b/src/test/regress/sql/single_node_enterprise.sql index fb6e47b9a..19393ba24 100644 --- a/src/test/regress/sql/single_node_enterprise.sql +++ b/src/test/regress/sql/single_node_enterprise.sql @@ -272,6 +272,8 @@ BEGIN; SELECT count(*) FROM test; ROLLBACK; +SET citus.shard_replication_factor TO 1; + -- now, lets move all the shards of distributed tables out of the coordinator -- block writes is much faster for the sake of the test timings we prefer it SELECT master_drain_node('localhost', :master_port, shard_transfer_mode:='block_writes');