Change default rebalance strategy to by_disk_size (#7033)

DESCRIPTION: Change default rebalance strategy to by_disk_size When introducing rebalancing by disk size we didn't make it the default initially. The main reason was, because we expected some problems with it. We have indeed had some problems/bugs with it over the years, and have fixed all of them. By now we're quite confident in its stability, and that it pretty much always gives better results than by_shard_count. So this PR makes by_disk_size the new default. We don't change the default when some other strategy than by_shard_count is the current default. This is in case someone defined their own rebalance strategy and marked this as the default themselves. Note: It explicitly does nothing during a downgrade, because there's no way of knowing if the rebalance strategy before the upgrade was by_disk_size or by_shard_count. And even in previous versions by_disk_size is considered superior for quite some time.
2023-07-03 11:08:24 +02:00 · 2023-07-03 11:08:24 +02:00 · ac24e11986
parent fd1427de2c
commit ac24e11986
6 changed files with 32 additions and 2 deletions
--- a/src/backend/distributed/sql/citus--11.3-1--12.0-1.sql
+++ b/src/backend/distributed/sql/citus--11.3-1--12.0-1.sql
@ -42,3 +42,10 @@ DROP FUNCTION citus_shard_sizes;

 #include "udfs/drop_old_time_partitions/12.0-1.sql"
 #include "udfs/get_missing_time_partition_ranges/12.0-1.sql"
+
+-- Update the default rebalance strategy to 'by_disk_size', but only if the
+-- default is currently 'by_shard_count'
+SELECT citus_set_default_rebalance_strategy(name)
+FROM pg_dist_rebalance_strategy
+WHERE name = 'by_disk_size'
+    AND (SELECT default_strategy FROM pg_dist_rebalance_strategy WHERE name = 'by_shard_count');
--- a/src/backend/distributed/sql/downgrades/citus--12.0-1--11.3-1.sql
+++ b/src/backend/distributed/sql/downgrades/citus--12.0-1--11.3-1.sql
@ -76,3 +76,8 @@ DROP FUNCTION pg_catalog.citus_stat_tenants_local_internal(

 #include "../udfs/drop_old_time_partitions/10.2-1.sql"
 #include "../udfs/get_missing_time_partition_ranges/10.2-1.sql"
+
+-- This explicitly does not reset the rebalance strategy to by_shard_count,
+-- because there's no way of knowing if the rebalance strategy before the
+-- upgrade was by_disk_size or by_shard_count. And even in previous versions
+-- by_disk_size is considered superior for quite some time.
--- a/src/test/regress/expected/shard_rebalancer.out
+++ b/src/test/regress/expected/shard_rebalancer.out
@ -3,6 +3,14 @@
 --
 SET citus.next_shard_id TO 433000;
 SET citus.propagate_session_settings_for_loopback_connection TO ON;
+-- Because of historic reasons this test was written in a way that assumes that
+-- by_shard_count is the default strategy.
+SELECT citus_set_default_rebalance_strategy('by_shard_count');
+ citus_set_default_rebalance_strategy
+---------------------------------------------------------------------
+
+(1 row)
+
 -- Lower the minimum disk size that a shard group is considered as. Otherwise
 -- we need to create shards of more than 100MB.
 ALTER SYSTEM SET citus.rebalancer_by_disk_size_base_cost = 0;
@ -2863,6 +2871,12 @@ select 1 from citus_add_node('localhost', :worker_2_port);
 select rebalance_table_shards();
 ERROR:  cannot use logical replication to transfer shards of the relation table_without_primary_key since it doesn't have a REPLICA IDENTITY or PRIMARY KEY
 DROP TABLE table_with_primary_key, table_without_primary_key;
+SELECT citus_set_default_rebalance_strategy('by_disk_size');
+ citus_set_default_rebalance_strategy
+---------------------------------------------------------------------
+
+(1 row)
+
 ALTER SYSTEM RESET citus.rebalancer_by_disk_size_base_cost;
 SELECT pg_reload_conf();
 pg_reload_conf
--- a/src/test/regress/expected/single_shard_table_udfs.out
+++ b/src/test/regress/expected/single_shard_table_udfs.out
@ -334,7 +334,7 @@ ERROR:  Table 'single_shard_table_col2_1' is streaming replicated. Shards of str
 SELECT citus_copy_shard_placement(1820005, :worker_1_node, :worker_2_node);
 ERROR:  Table 'single_shard_table_col2_1' is streaming replicated. Shards of streaming replicated tables cannot be copied
 -- no changes because it's already balanced
-SELECT rebalance_table_shards();
+SELECT rebalance_table_shards(rebalance_strategy := 'by_shard_count');
 rebalance_table_shards
 ---------------------------------------------------------------------

--- a/src/test/regress/sql/shard_rebalancer.sql
+++ b/src/test/regress/sql/shard_rebalancer.sql
@ -5,6 +5,9 @@
 SET citus.next_shard_id TO 433000;
 SET citus.propagate_session_settings_for_loopback_connection TO ON;

+-- Because of historic reasons this test was written in a way that assumes that
+-- by_shard_count is the default strategy.
+SELECT citus_set_default_rebalance_strategy('by_shard_count');
 -- Lower the minimum disk size that a shard group is considered as. Otherwise
 -- we need to create shards of more than 100MB.
 ALTER SYSTEM SET citus.rebalancer_by_disk_size_base_cost = 0;
@ -1574,6 +1577,7 @@ select 1 from citus_add_node('localhost', :worker_2_port);
 select rebalance_table_shards();

 DROP TABLE table_with_primary_key, table_without_primary_key;
+SELECT citus_set_default_rebalance_strategy('by_disk_size');
 ALTER SYSTEM RESET citus.rebalancer_by_disk_size_base_cost;
 SELECT pg_reload_conf();
 \c - - - :worker_1_port
--- a/src/test/regress/sql/single_shard_table_udfs.sql
+++ b/src/test/regress/sql/single_shard_table_udfs.sql
@ -160,7 +160,7 @@ SELECT master_copy_shard_placement(1820005, 'localhost', :worker_1_port, 'localh
 SELECT citus_copy_shard_placement(1820005, :worker_1_node, :worker_2_node);

 -- no changes because it's already balanced
-SELECT rebalance_table_shards();
+SELECT rebalance_table_shards(rebalance_strategy := 'by_shard_count');

 -- same placements
 SELECT shardid, nodeport FROM pg_dist_shard_placement WHERE shardid > 1820000 ORDER BY shardid;