diff --git a/.circleci/config.yml b/.circleci/config.yml
index ecf5032a9..7b2e56ffe 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -407,6 +407,12 @@ workflows:
           image_tag: '11.9'
           make: check-worker
           requires: [build-11]
+      - test-citus:
+          name: 'test-11_check-operations'
+          pg_major: 11
+          image_tag: '11.9'
+          make: check-operations
+          requires: [build-11]
       - test-citus:
           name: 'test-11_check-follower-cluster'
           pg_major: 11
@@ -451,6 +457,12 @@ workflows:
           image_tag: '12.4'
           make: check-worker
           requires: [build-12]
+      - test-citus:
+          name: 'test-12_check-operations'
+          pg_major: 12
+          image_tag: '12.4'
+          make: check-operations
+          requires: [build-12]
       - test-citus:
           name: 'test-12_check-follower-cluster'
           pg_major: 12
@@ -507,6 +519,12 @@ workflows:
           image_tag: '13.0'
           make: check-worker
           requires: [build-13]
+      - test-citus:
+          name: 'test-13_check-operations'
+          pg_major: 13
+          image_tag: '13.0'
+          make: check-operations
+          requires: [build-13]
       - test-citus:
           name: 'test-13_check-follower-cluster'
           pg_major: 13
diff --git a/src/backend/distributed/commands/foreign_constraint.c b/src/backend/distributed/commands/foreign_constraint.c
index 966b8d795..371cd0514 100644
--- a/src/backend/distributed/commands/foreign_constraint.c
+++ b/src/backend/distributed/commands/foreign_constraint.c
@@ -15,6 +15,8 @@
 #include "distributed/pg_version_constants.h"
 
 #include "access/htup_details.h"
+#include "access/sysattr.h"
+#include "access/xact.h"
 #include "catalog/namespace.h"
 #include "catalog/pg_constraint.h"
 #if (PG_VERSION_NUM >= PG_VERSION_12)
@@ -23,13 +25,16 @@
 #include "catalog/pg_type.h"
 #include "distributed/colocation_utils.h"
 #include "distributed/commands.h"
+#include "distributed/coordinator_protocol.h"
 #include "distributed/listutils.h"
 #include "distributed/coordinator_protocol.h"
 #include "distributed/multi_join_order.h"
 #include "distributed/namespace_utils.h"
 #include "distributed/reference_table_utils.h"
 #include "distributed/version_compat.h"
+#include "utils/builtins.h"
 #include "utils/fmgroids.h"
+#include "utils/inval.h"
 #include "utils/lsyscache.h"
 #include "utils/rel.h"
 #include "utils/relcache.h"
@@ -69,6 +74,8 @@ static List * GetForeignKeyIdsForColumn(char *columnName, Oid relationId,
 static List * GetForeignConstraintCommandsInternal(Oid relationId, int flags);
 static Oid get_relation_constraint_oid_compat(HeapTuple heapTuple);
 static bool IsTableTypeIncluded(Oid relationId, int flags);
+static void UpdateConstraintIsValid(Oid constraintId, bool isValid);
+
 
 /*
  * ConstraintIsAForeignKeyToReferenceTable checks if the given constraint is a
@@ -1015,3 +1022,199 @@ IsTableTypeIncluded(Oid relationId, int flags)
 	}
 	return false;
 }
+
+
+/*
+ * GetForeignConstraintCommandsToReferenceTable takes in a shardInterval, and
+ * returns the list of commands that are required to create the foreign
+ * constraints for that shardInterval.
+ *
+ * The function does the following hack:
+ *    - Create the foreign constraints as INVALID on the shards
+ *    - Manually update pg_constraint to mark the same foreign
+ *      constraints as VALID
+ *
+ * We implement the above hack because we aim to skip the validation phase
+ * of foreign keys to reference tables. The validation is pretty costly and
+ * given that the source placements already valid, the validation in the
+ * target nodes is useless.
+ *
+ * The function does not apply the same logic for the already invalid foreign
+ * constraints.
+ */
+List *
+GetForeignConstraintCommandsToReferenceTable(ShardInterval *shardInterval)
+{
+	ScanKeyData scanKey[1];
+	int scanKeyCount = 1;
+	uint64 shardId = shardInterval->shardId;
+	Oid relationId = shardInterval->relationId;
+
+	List *commandList = NIL;
+
+	/*
+	 * Set search_path to NIL so that all objects outside of pg_catalog will be
+	 * schema-prefixed. pg_catalog will be added automatically when we call
+	 * PushOverrideSearchPath(), since we set addCatalog to true;
+	 */
+	OverrideSearchPath *overridePath = GetOverrideSearchPath(CurrentMemoryContext);
+	overridePath->schemas = NIL;
+	overridePath->addCatalog = true;
+	PushOverrideSearchPath(overridePath);
+
+	/* open system catalog and scan all constraints that belong to this table */
+	Relation pgConstraint = table_open(ConstraintRelationId, AccessShareLock);
+	ScanKeyInit(&scanKey[0], Anum_pg_constraint_conrelid, BTEqualStrategyNumber, F_OIDEQ,
+				relationId);
+
+	SysScanDesc scanDescriptor = systable_beginscan(pgConstraint,
+													ConstraintRelidTypidNameIndexId,
+													true, NULL, scanKeyCount, scanKey);
+
+	HeapTuple heapTuple = systable_getnext(scanDescriptor);
+	while (HeapTupleIsValid(heapTuple))
+	{
+		Form_pg_constraint constraintForm = (Form_pg_constraint) GETSTRUCT(heapTuple);
+		char *constraintDefinition = NULL;
+
+
+		if (constraintForm->contype != CONSTRAINT_FOREIGN)
+		{
+			heapTuple = systable_getnext(scanDescriptor);
+			continue;
+		}
+
+		Oid referencedRelationId = constraintForm->confrelid;
+		if (PartitionMethod(referencedRelationId) != DISTRIBUTE_BY_NONE)
+		{
+			heapTuple = systable_getnext(scanDescriptor);
+			continue;
+		}
+
+		Oid constraintId = get_relation_constraint_oid(relationId,
+													   constraintForm->conname.data,
+													   true);
+
+		int64 referencedShardId = GetFirstShardId(referencedRelationId);
+		Oid referencedSchemaId = get_rel_namespace(referencedRelationId);
+		char *referencedSchemaName = get_namespace_name(referencedSchemaId);
+		char *escapedReferencedSchemaName = quote_literal_cstr(referencedSchemaName);
+
+		Oid schemaId = get_rel_namespace(relationId);
+		char *schemaName = get_namespace_name(schemaId);
+		char *escapedSchemaName = quote_literal_cstr(schemaName);
+
+		/*
+		 * We're first marking the constraint's valid field as invalid
+		 * and get the constraint definition. Later, we mark the constraint
+		 * as valid back with directly updating to pg_constraint.
+		 */
+		if (constraintForm->convalidated == true)
+		{
+			UpdateConstraintIsValid(constraintId, false);
+			constraintDefinition = pg_get_constraintdef_command(constraintId);
+			UpdateConstraintIsValid(constraintId, true);
+		}
+		else
+		{
+			/* if the constraint is not valid, simply do nothing special */
+			constraintDefinition = pg_get_constraintdef_command(constraintId);
+		}
+
+		StringInfo applyForeignConstraintCommand = makeStringInfo();
+		appendStringInfo(applyForeignConstraintCommand,
+						 WORKER_APPLY_INTER_SHARD_DDL_COMMAND, shardId,
+						 escapedSchemaName, referencedShardId,
+						 escapedReferencedSchemaName,
+						 quote_literal_cstr(constraintDefinition));
+		commandList = lappend(commandList, applyForeignConstraintCommand->data);
+
+		/* mark the constraint as valid again on the shard */
+		if (constraintForm->convalidated == true)
+		{
+			StringInfo markConstraintValid = makeStringInfo();
+			char *qualifiedReferencingShardName =
+				ConstructQualifiedShardName(shardInterval);
+
+			char *shardConstraintName = pstrdup(constraintForm->conname.data);
+			AppendShardIdToName(&shardConstraintName, shardId);
+
+			appendStringInfo(markConstraintValid,
+							 "UPDATE pg_constraint SET convalidated = true WHERE "
+							 "conrelid = %s::regclass AND conname = '%s'",
+							 quote_literal_cstr(qualifiedReferencingShardName),
+							 shardConstraintName);
+			commandList = lappend(commandList, markConstraintValid->data);
+		}
+
+		heapTuple = systable_getnext(scanDescriptor);
+	}
+
+	/* clean up scan and close system catalog */
+	systable_endscan(scanDescriptor);
+	table_close(pgConstraint, AccessShareLock);
+
+	/* revert back to original search_path */
+	PopOverrideSearchPath();
+
+	return commandList;
+}
+
+
+/*
+ * UpdateConstraintIsValid is a utility function with sets the
+ * pg_constraint.convalidated to the given isValid for the given
+ * constraintId.
+ *
+ * This function should be called with caution because if used wrong
+ * could lead to data inconsistencies.
+ */
+static void
+UpdateConstraintIsValid(Oid constraintId, bool isValid)
+{
+	HeapTuple heapTuple = NULL;
+	SysScanDesc scanDescriptor;
+	ScanKeyData scankey[1];
+	Relation pgConstraint = table_open(ConstraintRelationId, AccessShareLock);
+	TupleDesc tupleDescriptor = RelationGetDescr(pgConstraint);
+	Datum values[Natts_pg_constraint];
+	bool isnull[Natts_pg_constraint];
+	bool replace[Natts_pg_constraint];
+
+	ScanKeyInit(&scankey[0],
+#if PG_VERSION_NUM >= 120000
+				Anum_pg_constraint_oid,
+#else
+				ObjectIdAttributeNumber,
+#endif
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(constraintId));
+
+	scanDescriptor = systable_beginscan(pgConstraint,
+										ConstraintOidIndexId,
+										true,
+										NULL,
+										1,
+										scankey);
+	heapTuple = systable_getnext(scanDescriptor);
+	if (!HeapTupleIsValid(heapTuple))
+	{
+		elog(ERROR, "could not find tuple for constraint %u", constraintId);
+	}
+
+	memset(replace, 0, sizeof(replace));
+
+	values[Anum_pg_constraint_convalidated - 1] = BoolGetDatum(isValid);
+	isnull[Anum_pg_constraint_convalidated - 1] = false;
+	replace[Anum_pg_constraint_convalidated - 1] = true;
+
+	heapTuple = heap_modify_tuple(heapTuple, tupleDescriptor, values, isnull, replace);
+
+	CatalogTupleUpdate(pgConstraint, &heapTuple->t_self, heapTuple);
+
+	CacheInvalidateHeapTuple(pgConstraint, heapTuple, NULL);
+	CommandCounterIncrement();
+
+	systable_endscan(scanDescriptor);
+	table_close(pgConstraint, NoLock);
+}
diff --git a/src/backend/distributed/metadata/metadata_cache.c b/src/backend/distributed/metadata/metadata_cache.c
index 324a5ac38..1699202e8 100644
--- a/src/backend/distributed/metadata/metadata_cache.c
+++ b/src/backend/distributed/metadata/metadata_cache.c
@@ -132,6 +132,7 @@ typedef struct MetadataCacheData
 	bool extensionLoaded;
 	Oid distShardRelationId;
 	Oid distPlacementRelationId;
+	Oid distRebalanceStrategyRelationId;
 	Oid distNodeRelationId;
 	Oid distNodeNodeIdIndexId;
 	Oid distLocalGroupRelationId;
@@ -2061,6 +2062,17 @@ DistLocalGroupIdRelationId(void)
 }
 
 
+/* return oid of pg_dist_rebalance_strategy relation */
+Oid
+DistRebalanceStrategyRelationId(void)
+{
+	CachedRelationLookup("pg_dist_rebalance_strategy",
+						 &MetadataCache.distRebalanceStrategyRelationId);
+
+	return MetadataCache.distRebalanceStrategyRelationId;
+}
+
+
 /* return the oid of citus namespace */
 Oid
 CitusCatalogNamespaceId(void)
@@ -3939,6 +3951,37 @@ LookupShardRelationFromCatalog(int64 shardId, bool missingOk)
 }
 
 
+/*
+ * ShardExists returns whether the given shard ID exists in pg_dist_shard.
+ */
+bool
+ShardExists(int64 shardId)
+{
+	ScanKeyData scanKey[1];
+	int scanKeyCount = 1;
+	Relation pgDistShard = table_open(DistShardRelationId(), AccessShareLock);
+	bool shardExists = false;
+
+	ScanKeyInit(&scanKey[0], Anum_pg_dist_shard_shardid,
+				BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(shardId));
+
+	SysScanDesc scanDescriptor = systable_beginscan(pgDistShard,
+													DistShardShardidIndexId(), true,
+													NULL, scanKeyCount, scanKey);
+
+	HeapTuple heapTuple = systable_getnext(scanDescriptor);
+	if (HeapTupleIsValid(heapTuple))
+	{
+		shardExists = true;
+	}
+
+	systable_endscan(scanDescriptor);
+	table_close(pgDistShard, NoLock);
+
+	return shardExists;
+}
+
+
 /*
  * GetPartitionTypeInputInfo populates output parameters with the interval type
  * identifier and modifier for the specified partition key/method combination.
diff --git a/src/backend/distributed/metadata/metadata_utility.c b/src/backend/distributed/metadata/metadata_utility.c
index af1563129..d30b33546 100644
--- a/src/backend/distributed/metadata/metadata_utility.c
+++ b/src/backend/distributed/metadata/metadata_utility.c
@@ -902,6 +902,46 @@ AllShardPlacementsOnNodeGroup(int32 groupId)
 }
 
 
+/*
+ * AllShardPlacementsWithShardPlacementState finds shard placements with the given
+ * shardState from system catalogs, converts these placements to their in-memory
+ * representation, and returns the converted shard placements in a new list.
+ */
+List *
+AllShardPlacementsWithShardPlacementState(ShardState shardState)
+{
+	List *shardPlacementList = NIL;
+	ScanKeyData scanKey[1];
+	int scanKeyCount = 1;
+
+	Relation pgPlacement = table_open(DistPlacementRelationId(), AccessShareLock);
+
+	ScanKeyInit(&scanKey[0], Anum_pg_dist_placement_shardstate,
+				BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(shardState));
+
+	SysScanDesc scanDescriptor = systable_beginscan(pgPlacement, InvalidOid, false,
+													NULL, scanKeyCount, scanKey);
+
+	HeapTuple heapTuple = systable_getnext(scanDescriptor);
+	while (HeapTupleIsValid(heapTuple))
+	{
+		TupleDesc tupleDescriptor = RelationGetDescr(pgPlacement);
+
+		GroupShardPlacement *placement =
+			TupleToGroupShardPlacement(tupleDescriptor, heapTuple);
+
+		shardPlacementList = lappend(shardPlacementList, placement);
+
+		heapTuple = systable_getnext(scanDescriptor);
+	}
+
+	systable_endscan(scanDescriptor);
+	table_close(pgPlacement, NoLock);
+
+	return shardPlacementList;
+}
+
+
 /*
  * TupleToGroupShardPlacement takes in a heap tuple from pg_dist_placement,
  * and converts this tuple to in-memory struct. The function assumes the
diff --git a/src/backend/distributed/operations/repair_shards.c b/src/backend/distributed/operations/repair_shards.c
index 75d12b530..56edc95ff 100644
--- a/src/backend/distributed/operations/repair_shards.c
+++ b/src/backend/distributed/operations/repair_shards.c
@@ -11,18 +11,21 @@
  */
 
 #include "postgres.h"
-#include "c.h"
 #include "fmgr.h"
 #include "miscadmin.h"
 
 #include <string.h>
 
+#include "access/htup_details.h"
 #include "catalog/pg_class.h"
+#include "catalog/pg_enum.h"
+#include "distributed/citus_ruleutils.h"
 #include "distributed/colocation_utils.h"
 #include "distributed/commands.h"
 #include "distributed/connection_management.h"
 #include "distributed/distributed_planner.h"
 #include "distributed/listutils.h"
+#include "distributed/shard_cleaner.h"
 #include "distributed/coordinator_protocol.h"
 #include "distributed/metadata_cache.h"
 #include "distributed/metadata_sync.h"
@@ -35,6 +38,7 @@
 #include "distributed/worker_transaction.h"
 #include "lib/stringinfo.h"
 #include "nodes/pg_list.h"
+#include "storage/lmgr.h"
 #include "storage/lock.h"
 #include "storage/lmgr.h"
 #include "utils/builtins.h"
@@ -42,6 +46,8 @@
 #include "utils/errcodes.h"
 #include "utils/lsyscache.h"
 #include "utils/palloc.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
 
 /* local function forward declarations */
 static void ErrorIfTableCannotBeReplicated(Oid relationId);
@@ -65,15 +71,27 @@ static void EnsureShardCanBeCopied(int64 shardId, const char *sourceNodeName,
 								   int32 sourceNodePort, const char *targetNodeName,
 								   int32 targetNodePort);
 static List * RecreateTableDDLCommandList(Oid relationId);
-static List * WorkerApplyShardDDLCommandList(List *ddlCommandList, int64 shardId);
 static void EnsureTableListOwner(List *tableIdList);
 static void EnsureTableListSuitableForReplication(List *tableIdList);
 
+static void DropColocatedShardPlacement(ShardInterval *shardInterval, char *nodeName,
+										int32 nodePort);
+static void MarkForDropColocatedShardPlacement(ShardInterval *shardInterval,
+											   char *nodeName, int32 nodePort);
+static void UpdateColocatedShardPlacementMetadataOnWorkers(int64 shardId,
+														   char *sourceNodeName,
+														   int32 sourceNodePort,
+														   char *targetNodeName,
+														   int32 targetNodePort);
+
 /* declarations for dynamic loading */
 PG_FUNCTION_INFO_V1(master_copy_shard_placement);
 PG_FUNCTION_INFO_V1(master_move_shard_placement);
 
 
+bool DeferShardDeleteOnMove = false;
+
+
 /*
  * master_copy_shard_placement implements a user-facing UDF to repair data from
  * a healthy (source) node to an inactive (target) node. To accomplish this it
@@ -98,16 +116,15 @@ master_copy_shard_placement(PG_FUNCTION_ARGS)
 	char *sourceNodeName = text_to_cstring(sourceNodeNameText);
 	char *targetNodeName = text_to_cstring(targetNodeNameText);
 
-	CheckCitusVersion(ERROR);
 	EnsureCoordinator();
+	CheckCitusVersion(ERROR);
 
 	char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid);
 	if (shardReplicationMode == TRANSFER_MODE_FORCE_LOGICAL)
 	{
 		ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-						errmsg("using logical replication in "
-							   "master_copy_shard_placement() requires Citus "
-							   "Enterprise")));
+						errmsg("the force_logical transfer mode is currently "
+							   "unsupported")));
 	}
 
 	ShardInterval *shardInterval = LoadShardInterval(shardId);
@@ -131,19 +148,155 @@ master_copy_shard_placement(PG_FUNCTION_ARGS)
 
 /*
  * master_move_shard_placement moves given shard (and its co-located shards) from one
- * node to the other node.
+ * node to the other node. To accomplish this it entirely recreates the table structure
+ * before copying all data.
+ *
+ * After that, there are two different paths. First one is blocking shard move in the
+ * sense that during shard move all modifications are paused to the shard. The second
+ * one relies on logical replication meaning that the writes blocked only for a very
+ * short duration almost only when the metadata is actually being updated. This option
+ * is currently only available in Citus Enterprise.
+ *
+ * After successful move operation, shards in the source node gets deleted. If the move
+ * fails at any point, this function throws an error, leaving the cluster without doing
+ * any changes in source node or target node.
  */
 Datum
 master_move_shard_placement(PG_FUNCTION_ARGS)
 {
-	ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					errmsg("master_move_shard_placement() is only supported on "
-						   "Citus Enterprise")));
+	int64 shardId = PG_GETARG_INT64(0);
+	char *sourceNodeName = text_to_cstring(PG_GETARG_TEXT_P(1));
+	int32 sourceNodePort = PG_GETARG_INT32(2);
+	char *targetNodeName = text_to_cstring(PG_GETARG_TEXT_P(3));
+	int32 targetNodePort = PG_GETARG_INT32(4);
+	Oid shardReplicationModeOid = PG_GETARG_OID(5);
+
+
+	ListCell *colocatedTableCell = NULL;
+	ListCell *colocatedShardCell = NULL;
+
+
+	CheckCitusVersion(ERROR);
+	EnsureCoordinator();
+
+	Oid relationId = RelationIdForShard(shardId);
+	ErrorIfMoveCitusLocalTable(relationId);
+
+	ShardInterval *shardInterval = LoadShardInterval(shardId);
+	Oid distributedTableId = shardInterval->relationId;
+
+	List *colocatedTableList = ColocatedTableList(distributedTableId);
+	List *colocatedShardList = ColocatedShardIntervalList(shardInterval);
+
+	foreach(colocatedTableCell, colocatedTableList)
+	{
+		Oid colocatedTableId = lfirst_oid(colocatedTableCell);
+		char relationKind = '\0';
+
+		/* check that user has owner rights in all co-located tables */
+		EnsureTableOwner(colocatedTableId);
+
+		/*
+		 * Block concurrent DDL / TRUNCATE commands on the relation. Similarly,
+		 * block concurrent master_move_shard_placement() on any shard of
+		 * the same relation. This is OK for now since we're executing shard
+		 * moves sequentially anyway.
+		 */
+		LockRelationOid(colocatedTableId, ShareUpdateExclusiveLock);
+
+		relationKind = get_rel_relkind(colocatedTableId);
+		if (relationKind == RELKIND_FOREIGN_TABLE)
+		{
+			char *relationName = get_rel_name(colocatedTableId);
+			ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							errmsg("cannot repair shard"),
+							errdetail("Table %s is a foreign table. Repairing "
+									  "shards backed by foreign tables is "
+									  "not supported.", relationName)));
+		}
+	}
+
+	/* we sort colocatedShardList so that lock operations will not cause any deadlocks */
+	colocatedShardList = SortList(colocatedShardList, CompareShardIntervalsById);
+	foreach(colocatedShardCell, colocatedShardList)
+	{
+		ShardInterval *colocatedShard = (ShardInterval *) lfirst(colocatedShardCell);
+		uint64 colocatedShardId = colocatedShard->shardId;
+
+		EnsureShardCanBeCopied(colocatedShardId, sourceNodeName, sourceNodePort,
+							   targetNodeName, targetNodePort);
+	}
+
+	char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid);
+	if (shardReplicationMode == TRANSFER_MODE_FORCE_LOGICAL)
+	{
+		ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						errmsg("the force_logical transfer mode is currently "
+							   "unsupported")));
+	}
+
+	BlockWritesToShardList(colocatedShardList);
+
+	/*
+	 * CopyColocatedShardPlacement function copies given shard with its co-located
+	 * shards.
+	 */
+	CopyShardTables(colocatedShardList, sourceNodeName, sourceNodePort, targetNodeName,
+					targetNodePort);
+
+	ShardInterval *colocatedShard = NULL;
+	foreach_ptr(colocatedShard, colocatedShardList)
+	{
+		uint64 colocatedShardId = colocatedShard->shardId;
+		uint32 groupId = GroupForNode(targetNodeName, targetNodePort);
+		uint64 placementId = GetNextPlacementId();
+
+		InsertShardPlacementRow(colocatedShardId, placementId,
+								SHARD_STATE_ACTIVE, ShardLength(colocatedShardId),
+								groupId);
+	}
+
+	/* since this is move operation, we remove shards from source node after copy */
+	if (DeferShardDeleteOnMove)
+	{
+		MarkForDropColocatedShardPlacement(shardInterval, sourceNodeName, sourceNodePort);
+	}
+	else
+	{
+		DropColocatedShardPlacement(shardInterval, sourceNodeName, sourceNodePort);
+	}
+
+	UpdateColocatedShardPlacementMetadataOnWorkers(shardId, sourceNodeName,
+												   sourceNodePort, targetNodeName,
+												   targetNodePort);
+
+	PG_RETURN_VOID();
 }
 
 
 /*
- * BlockWritesToShardList blocks writes to all shards in the given shard
+ * ErrorIfMoveCitusLocalTable is a helper function for rebalance_table_shards
+ * and master_move_shard_placement udf's to error out if relation with relationId
+ * is a citus local table.
+ */
+void
+ErrorIfMoveCitusLocalTable(Oid relationId)
+{
+	if (!IsCitusTableType(relationId, CITUS_LOCAL_TABLE))
+	{
+		return;
+	}
+
+	char *qualifiedRelationName = generate_qualified_relation_name(relationId);
+	ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					errmsg("table %s is a citus local table, moving shard of "
+						   "a citus local table is currently not supported",
+						   qualifiedRelationName)));
+}
+
+
+/*
+ * BlockWritesToColocatedShardList blocks writes to all shards in the given shard
  * list. The function assumes that all the shards in the list are colocated.
  */
 void
@@ -185,8 +338,11 @@ BlockWritesToShardList(List *shardList)
 
 /*
  * ErrorIfTableCannotBeReplicated function errors out if the given table is not suitable
- * for its shard being replicated. Shard replications is not allowed only for MX tables,
- * since RF=1 is a must MX tables.
+ * for its shard being replicated. There are 2 cases in which shard replication is not
+ * allowed:
+ *
+ * 1) MX tables, since RF=1 is a must MX tables
+ * 2) Reference tables, since the shard should already exist in all workers
  */
 static void
 ErrorIfTableCannotBeReplicated(Oid relationId)
@@ -336,6 +492,7 @@ RepairShardPlacement(int64 shardId, const char *sourceNodeName, int32 sourceNode
 	/* we generate necessary commands to recreate the shard in target node */
 	List *ddlCommandList =
 		CopyShardCommandList(shardInterval, sourceNodeName, sourceNodePort, includeData);
+
 	List *foreignConstraintCommandList = CopyShardForeignConstraintCommandList(
 		shardInterval);
 	ddlCommandList = list_concat(ddlCommandList, foreignConstraintCommandList);
@@ -502,15 +659,7 @@ EnsureTableListSuitableForReplication(List *tableIdList)
 			IsCitusTableType(tableId, DISTRIBUTED_TABLE))
 		{
 			ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-							errmsg("cannot create foreign key constraint"),
-							errdetail("This shard has foreign constraints on it. "
-									  "Citus currently supports "
-									  "foreign key constraints only for "
-									  "\"citus.shard_replication_factor = 1\"."),
-							errhint("Please change \"citus.shard_replication_factor to "
-									"1\". To learn more about using foreign keys with "
-									"other replication factors, please contact us at "
-									"https://citusdata.com/about/contact_us.")));
+							errmsg("cannot replicate shards with foreign keys")));
 		}
 	}
 }
@@ -553,21 +702,12 @@ CopyShardTables(List *shardIntervalList, char *sourceNodeName, int32 sourceNodeP
 	 *
 	 * Iterate through the colocated shards and create the foreign constraints and
 	 * attach child tables to their parents in a partitioning hierarchy.
-	 *
-	 * Note: After implementing foreign constraints from distributed to reference
-	 * tables, we have decided to not create foreign constraints from hash
-	 * distributed to reference tables at this stage for nonblocking rebalancer.
-	 * We just create the co-located ones here. We add the foreign constraints
-	 * from hash distributed to reference tables after being completely done with
-	 * the copy procedure inside LogicallyReplicateShards. The reason is that,
-	 * the reference tables have placements in both source and target workers and
-	 * the copied shard would get updated twice because of a cascading DML coming
-	 * from both of the placements.
 	 */
 	foreach_ptr(shardInterval, shardIntervalList)
 	{
 		List *shardForeignConstraintCommandList = NIL;
 		List *referenceTableForeignConstraintList = NIL;
+
 		char *tableOwner = TableOwner(shardInterval->relationId);
 
 		CopyShardForeignConstraintCommandListGrouped(shardInterval,
@@ -988,12 +1128,109 @@ RecreateTableDDLCommandList(Oid relationId)
 }
 
 
+/*
+ * DropColocatedShardPlacement deletes the shard placement metadata for the given shard
+ * placement from the pg_dist_placement, and then it drops the shard table
+ * from the given node. The function does this for all colocated placements.
+ */
+static void
+DropColocatedShardPlacement(ShardInterval *shardInterval, char *nodeName, int32 nodePort)
+{
+	List *colocatedShardList = ColocatedShardIntervalList(shardInterval);
+	ListCell *colocatedShardCell = NULL;
+
+	foreach(colocatedShardCell, colocatedShardList)
+	{
+		ShardInterval *colocatedShard = (ShardInterval *) lfirst(colocatedShardCell);
+		char *qualifiedTableName = ConstructQualifiedShardName(colocatedShard);
+		StringInfo dropQuery = makeStringInfo();
+		uint64 shardId = colocatedShard->shardId;
+		List *shardPlacementList = ShardPlacementList(shardId);
+		ShardPlacement *placement =
+			SearchShardPlacementInListOrError(shardPlacementList, nodeName, nodePort);
+
+		appendStringInfo(dropQuery, DROP_REGULAR_TABLE_COMMAND, qualifiedTableName);
+
+		DeleteShardPlacementRow(placement->placementId);
+		SendCommandToWorker(nodeName, nodePort, dropQuery->data);
+	}
+}
+
+
+/*
+ * MarkForDropColocatedShardPlacement marks the shard placement metadata for the given
+ * shard placement to be deleted in pg_dist_placement. The function does this for all
+ * colocated placements.
+ */
+static void
+MarkForDropColocatedShardPlacement(ShardInterval *shardInterval, char *nodeName, int32
+								   nodePort)
+{
+	List *colocatedShardList = ColocatedShardIntervalList(shardInterval);
+	ListCell *colocatedShardCell = NULL;
+
+	foreach(colocatedShardCell, colocatedShardList)
+	{
+		ShardInterval *colocatedShard = (ShardInterval *) lfirst(colocatedShardCell);
+		uint64 shardId = colocatedShard->shardId;
+		List *shardPlacementList = ShardPlacementList(shardId);
+		ShardPlacement *placement =
+			SearchShardPlacementInListOrError(shardPlacementList, nodeName, nodePort);
+
+		UpdateShardPlacementState(placement->placementId, SHARD_STATE_TO_DELETE);
+	}
+}
+
+
+/*
+ * UpdateColocatedShardPlacementMetadataOnWorkers updates the metadata about the
+ * placements of the given shard and its colocated shards by changing the nodename and
+ * nodeport of the shards from the source nodename/port to target nodename/port.
+ *
+ * Note that the function does nothing if the given shard belongs to a non-mx table.
+ */
+static void
+UpdateColocatedShardPlacementMetadataOnWorkers(int64 shardId,
+											   char *sourceNodeName, int32 sourceNodePort,
+											   char *targetNodeName, int32 targetNodePort)
+{
+	ShardInterval *shardInterval = LoadShardInterval(shardId);
+	ListCell *colocatedShardCell = NULL;
+	bool shouldSyncMetadata = ShouldSyncTableMetadata(shardInterval->relationId);
+
+	if (!shouldSyncMetadata)
+	{
+		return;
+	}
+
+	List *colocatedShardList = ColocatedShardIntervalList(shardInterval);
+
+	/* iterate through the colocated shards and copy each */
+	foreach(colocatedShardCell, colocatedShardList)
+	{
+		ShardInterval *colocatedShard = (ShardInterval *) lfirst(colocatedShardCell);
+		StringInfo updateCommand = makeStringInfo();
+
+		appendStringInfo(updateCommand, "UPDATE pg_dist_shard_placement "
+										"SET nodename=%s, nodeport=%d WHERE "
+										"shardid=%lu AND nodename=%s AND nodeport=%d",
+						 quote_literal_cstr(targetNodeName),
+						 targetNodePort,
+						 colocatedShard->shardId,
+						 quote_literal_cstr(sourceNodeName),
+						 sourceNodePort);
+
+		SendCommandToWorkersWithMetadata(updateCommand->data);
+	}
+}
+
+
 /*
  * WorkerApplyShardDDLCommandList wraps all DDL commands in ddlCommandList
  * in a call to worker_apply_shard_ddl_command to apply the DDL command to
  * the shard specified by shardId.
  */
-static List *
+List *
 WorkerApplyShardDDLCommandList(List *ddlCommandList, int64 shardId)
 {
 	List *applyDDLCommandList = NIL;
diff --git a/src/backend/distributed/operations/shard_cleaner.c b/src/backend/distributed/operations/shard_cleaner.c
new file mode 100644
index 000000000..f8e8d851b
--- /dev/null
+++ b/src/backend/distributed/operations/shard_cleaner.c
@@ -0,0 +1,144 @@
+/*-------------------------------------------------------------------------
+ *
+ * shard_cleaner.c
+ *	  This implements the background process that cleans shards that are
+ *	  left around. Shards that are left around are marked as state 4
+ *	  (SHARD_STATE_TO_DELETE) in pg_dist_placement.
+ *
+ * Copyright (c), Citus Data, Inc.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+
+#include "distributed/coordinator_protocol.h"
+#include "distributed/metadata_cache.h"
+#include "distributed/shard_cleaner.h"
+#include "distributed/worker_transaction.h"
+
+
+/* declarations for dynamic loading */
+PG_FUNCTION_INFO_V1(master_defer_delete_shards);
+
+
+static int DropMarkedShards(void);
+
+
+/*
+ * master_defer_delete_shards implements a user-facing UDF to deleter orphaned shards that
+ * are still haning around in the system. These shards are orphaned by previous actions
+ * that were not directly able to delete the placements eg. shard moving or dropping of a
+ * distributed table while one of the data nodes was not online.
+ *
+ * This function iterates through placements where shardstate is SHARD_STATE_TO_DELETE
+ * (shardstate = 4), drops the corresponding tables from the node and removes the
+ * placement information from the catalog.
+ *
+ * The function takes no arguments and runs cluster wide
+ */
+Datum
+master_defer_delete_shards(PG_FUNCTION_ARGS)
+{
+	CheckCitusVersion(ERROR);
+	EnsureCoordinator();
+
+	int droppedShardCount = DropMarkedShards();
+
+	PG_RETURN_INT32(droppedShardCount);
+}
+
+
+/*
+ * TryDropMarkedShards is a wrapper around DropMarkedShards that catches
+ * any errors to make it safe to use in the maintenance daemon.
+ */
+int
+TryDropMarkedShards(void)
+{
+	int droppedShardCount = 0;
+	MemoryContext savedContext = CurrentMemoryContext;
+
+	PG_TRY();
+	{
+		droppedShardCount = DropMarkedShards();
+	}
+	PG_CATCH();
+	{
+		MemoryContextSwitchTo(savedContext);
+		ErrorData *edata = CopyErrorData();
+		FlushErrorState();
+
+		/* rethrow as WARNING */
+		edata->elevel = WARNING;
+		ThrowErrorData(edata);
+	}
+	PG_END_TRY();
+
+	return droppedShardCount;
+}
+
+
+/*
+ * DropMarkedShards removes shards that were marked SHARD_STATE_TO_DELETE before.
+ *
+ * It does so by taking an exclusive lock on the shard and its colocated
+ * placements before removing. If the lock cannot be obtained it skips the
+ * group and continues with others. The group that has been skipped will be
+ * removed at a later time when there are no locks held anymore on those
+ * placements.
+ */
+static int
+DropMarkedShards(void)
+{
+	int removedShardCount = 0;
+	ListCell *shardPlacementCell = NULL;
+
+	if (!IsCoordinator())
+	{
+		return removedShardCount;
+	}
+
+	List *shardPlacementList = AllShardPlacementsWithShardPlacementState(
+		SHARD_STATE_TO_DELETE);
+	foreach(shardPlacementCell, shardPlacementList)
+	{
+		GroupShardPlacement *placement = (GroupShardPlacement *) lfirst(
+			shardPlacementCell);
+
+		if (!PrimaryNodeForGroup(placement->groupId, NULL) ||
+			!ShardExists(placement->shardId))
+		{
+			continue;
+		}
+
+		ShardPlacement *shardPlacement = LoadShardPlacement(placement->shardId,
+															placement->placementId);
+		ShardInterval *shardInterval = LoadShardInterval(shardPlacement->shardId);
+
+		ereport(LOG, (errmsg("dropping shard placement " INT64_FORMAT " of shard "
+							 INT64_FORMAT " on %s:%d after it was moved away",
+							 shardPlacement->placementId, shardPlacement->shardId,
+							 shardPlacement->nodeName, shardPlacement->nodePort)));
+
+		/* prepare sql query to execute to drop the shard */
+		StringInfo dropQuery = makeStringInfo();
+		char *qualifiedTableName = ConstructQualifiedShardName(shardInterval);
+		appendStringInfo(dropQuery, DROP_REGULAR_TABLE_COMMAND, qualifiedTableName);
+
+		List *dropCommandList = list_make2("SET LOCAL lock_timeout TO '1s'",
+										   dropQuery->data);
+
+		/* remove the shard from the node and the placement information */
+		SendCommandListToWorkerInSingleTransaction(shardPlacement->nodeName,
+												   shardPlacement->nodePort,
+												   NULL, dropCommandList);
+
+		DeleteShardPlacementRow(placement->placementId);
+
+		removedShardCount++;
+	}
+
+	return removedShardCount;
+}
diff --git a/src/backend/distributed/operations/shard_rebalancer.c b/src/backend/distributed/operations/shard_rebalancer.c
index dda7a849f..58f435218 100644
--- a/src/backend/distributed/operations/shard_rebalancer.c
+++ b/src/backend/distributed/operations/shard_rebalancer.c
@@ -6,32 +6,2164 @@
  *
  * Copyright (c) Citus Data, Inc.
  *
- * $Id$
- *
  *-------------------------------------------------------------------------
  */
 
+
 #include "postgres.h"
+#include "libpq-fe.h"
+
+#include <math.h>
+
+#include "distributed/pg_version_constants.h"
 
 #include "access/htup_details.h"
+#include "access/genam.h"
 #include "catalog/pg_type.h"
 #include "catalog/pg_proc.h"
+#include "commands/dbcommands.h"
+#include "commands/sequence.h"
+#include "distributed/argutils.h"
+#include "distributed/citus_safe_lib.h"
+#include "distributed/citus_ruleutils.h"
+#include "distributed/colocation_utils.h"
+#include "distributed/connection_management.h"
 #include "distributed/enterprise.h"
+#include "distributed/hash_helpers.h"
+#include "distributed/intermediate_result_pruning.h"
+#include "distributed/listutils.h"
+#include "distributed/coordinator_protocol.h"
+#include "distributed/metadata_cache.h"
+#include "distributed/multi_client_executor.h"
+#include "distributed/multi_progress.h"
+#include "distributed/multi_server_executor.h"
+#include "distributed/pg_dist_rebalance_strategy.h"
+#include "distributed/reference_table_utils.h"
+#include "distributed/remote_commands.h"
+#include "distributed/resource_lock.h"
+#include "distributed/shard_rebalancer.h"
+#include "distributed/tuplestore.h"
+#include "distributed/worker_protocol.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "postmaster/postmaster.h"
+#include "storage/lmgr.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/int8.h"
+#include "utils/json.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
 #include "utils/syscache.h"
 
+#if PG_VERSION_NUM >= PG_VERSION_13
+#include "common/hashfn.h"
+#endif
 
+
+/* RebalanceOptions are the options used to control the rebalance algorithm */
+typedef struct RebalanceOptions
+{
+	List *relationIdList;
+	float4 threshold;
+	int32 maxShardMoves;
+	ArrayType *excludedShardArray;
+	bool drainOnly;
+	Form_pg_dist_rebalance_strategy rebalanceStrategy;
+} RebalanceOptions;
+
+
+/*
+ * RebalanceState is used to keep the internal state of the rebalance
+ * algorithm in one place.
+ */
+typedef struct RebalanceState
+{
+	HTAB *placementsHash;
+	List *placementUpdateList;
+	RebalancePlanFunctions *functions;
+	List *fillStateListDesc;
+	List *fillStateListAsc;
+	List *disallowedPlacementList;
+	float4 totalCost;
+	float4 totalCapacity;
+} RebalanceState;
+
+
+/* RebalanceContext stores the context for the function callbacks */
+typedef struct RebalanceContext
+{
+	FmgrInfo shardCostUDF;
+	FmgrInfo nodeCapacityUDF;
+	FmgrInfo shardAllowedOnNodeUDF;
+} RebalanceContext;
+
+
+/* static declarations for main logic */
+static int ShardActivePlacementCount(HTAB *activePlacementsHash, uint64 shardId,
+									 List *activeWorkerNodeList);
+static bool UpdateShardPlacement(PlacementUpdateEvent *placementUpdateEvent,
+								 List *responsiveNodeList, Oid shardReplicationModeOid);
+
+/* static declarations for main logic's utility functions */
+static HTAB * ActivePlacementsHash(List *shardPlacementList);
+static bool PlacementsHashFind(HTAB *placementsHash, uint64 shardId,
+							   WorkerNode *workerNode);
+static void PlacementsHashEnter(HTAB *placementsHash, uint64 shardId,
+								WorkerNode *workerNode);
+static void PlacementsHashRemove(HTAB *placementsHash, uint64 shardId,
+								 WorkerNode *workerNode);
+static int PlacementsHashCompare(const void *lhsKey, const void *rhsKey, Size keySize);
+static uint32 PlacementsHashHashCode(const void *key, Size keySize);
+static bool WorkerNodeListContains(List *workerNodeList, const char *workerName,
+								   uint32 workerPort);
+static void UpdateColocatedShardPlacementProgress(uint64 shardId, char *sourceName,
+												  int sourcePort, uint64 progress);
+static bool IsPlacementOnWorkerNode(ShardPlacement *placement, WorkerNode *workerNode);
+static NodeFillState * FindFillStateForPlacement(RebalanceState *state,
+												 ShardPlacement *placement);
+static RebalanceState * InitRebalanceState(List *workerNodeList, List *shardPlacementList,
+										   RebalancePlanFunctions *functions);
+static void MoveShardsAwayFromDisallowedNodes(RebalanceState *state);
+static bool FindAndMoveShardCost(float4 utilizationLowerBound,
+								 float4 utilizationUpperBound,
+								 RebalanceState *state);
+static NodeFillState * FindAllowedTargetFillState(RebalanceState *state, uint64 shardId);
+static void MoveShardCost(NodeFillState *sourceFillState, NodeFillState *targetFillState,
+						  ShardCost *shardCost, RebalanceState *state);
+static int CompareNodeFillStateAsc(const void *void1, const void *void2);
+static int CompareNodeFillStateDesc(const void *void1, const void *void2);
+static int CompareShardCostAsc(const void *void1, const void *void2);
+static int CompareShardCostDesc(const void *void1, const void *void2);
+static int CompareDisallowedPlacementAsc(const void *void1, const void *void2);
+static int CompareDisallowedPlacementDesc(const void *void1, const void *void2);
+static bool ShardAllowedOnNode(uint64 shardId, WorkerNode *workerNode, void *context);
+static float4 NodeCapacity(WorkerNode *workerNode, void *context);
+static ShardCost GetShardCost(uint64 shardId, void *context);
+static List * NonColocatedDistRelationIdList(void);
+static void RebalanceTableShards(RebalanceOptions *options, Oid shardReplicationModeOid);
+static void AcquireColocationLock(Oid relationId, const char *operationName);
+static void ExecutePlacementUpdates(List *placementUpdateList, Oid
+									shardReplicationModeOid, char *noticeOperation);
+static float4 CalculateUtilization(float4 totalCost, float4 capacity);
+static Form_pg_dist_rebalance_strategy GetRebalanceStrategy(Name name);
 static void EnsureShardCostUDF(Oid functionOid);
 static void EnsureNodeCapacityUDF(Oid functionOid);
 static void EnsureShardAllowedOnNodeUDF(Oid functionOid);
 
-NOT_SUPPORTED_IN_COMMUNITY(rebalance_table_shards);
-NOT_SUPPORTED_IN_COMMUNITY(replicate_table_shards);
-NOT_SUPPORTED_IN_COMMUNITY(get_rebalance_table_shards_plan);
-NOT_SUPPORTED_IN_COMMUNITY(get_rebalance_progress);
-NOT_SUPPORTED_IN_COMMUNITY(master_drain_node);
-NOT_SUPPORTED_IN_COMMUNITY(citus_shard_cost_by_disk_size);
-PG_FUNCTION_INFO_V1(pg_dist_rebalance_strategy_enterprise_check);
+
+/* declarations for dynamic loading */
+PG_FUNCTION_INFO_V1(rebalance_table_shards);
+PG_FUNCTION_INFO_V1(replicate_table_shards);
+PG_FUNCTION_INFO_V1(get_rebalance_table_shards_plan);
+PG_FUNCTION_INFO_V1(get_rebalance_progress);
+PG_FUNCTION_INFO_V1(master_drain_node);
+PG_FUNCTION_INFO_V1(citus_shard_cost_by_disk_size);
 PG_FUNCTION_INFO_V1(citus_validate_rebalance_strategy_functions);
+PG_FUNCTION_INFO_V1(pg_dist_rebalance_strategy_enterprise_check);
+
+
+#ifdef USE_ASSERT_CHECKING
+
+/*
+ * Check that all the invariants of the state hold.
+ */
+static void
+CheckRebalanceStateInvariants(const RebalanceState *state)
+{
+	NodeFillState *fillState = NULL;
+	NodeFillState *prevFillState = NULL;
+	int fillStateIndex = 0;
+	int fillStateLength = list_length(state->fillStateListAsc);
+
+	Assert(state != NULL);
+	Assert(list_length(state->fillStateListAsc) == list_length(state->fillStateListDesc));
+	foreach_ptr(fillState, state->fillStateListAsc)
+	{
+		float4 totalCost = 0;
+		ShardCost *shardCost = NULL;
+		ShardCost *prevShardCost = NULL;
+		if (prevFillState != NULL)
+		{
+			/* Check that the previous fill state is more empty than this one */
+			bool higherUtilization = fillState->utilization > prevFillState->utilization;
+			bool sameUtilization = fillState->utilization == prevFillState->utilization;
+			bool lowerOrSameCapacity = fillState->capacity <= prevFillState->capacity;
+			Assert(higherUtilization || (sameUtilization && lowerOrSameCapacity));
+		}
+
+		/* Check that fillStateListDesc is the reversed version of fillStateListAsc */
+		Assert(list_nth(state->fillStateListDesc, fillStateLength - fillStateIndex - 1) ==
+			   fillState);
+
+
+		foreach_ptr(shardCost, fillState->shardCostListDesc)
+		{
+			if (prevShardCost != NULL)
+			{
+				/* Check that shard costs are sorted in descending order */
+				Assert(shardCost->cost <= prevShardCost->cost);
+			}
+			totalCost += shardCost->cost;
+		}
+
+		/* Check that utilization field is up to date. */
+		Assert(fillState->utilization == CalculateUtilization(fillState->totalCost,
+															  fillState->capacity));
+
+		/*
+		 * Check that fillState->totalCost is within 0.1% difference of
+		 * sum(fillState->shardCostListDesc->cost)
+		 * We cannot compare exactly, because these numbers are floats and
+		 * fillState->totalCost is modified by doing + and - on it. So instead
+		 * we check that the numbers are roughly the same.
+		 */
+		float4 absoluteDifferenceBetweenTotalCosts =
+			fabsf(fillState->totalCost - totalCost);
+		float4 maximumAbsoluteValueOfTotalCosts =
+			fmaxf(fabsf(fillState->totalCost), fabsf(totalCost));
+		Assert(absoluteDifferenceBetweenTotalCosts <= maximumAbsoluteValueOfTotalCosts /
+			   1000);
+
+		prevFillState = fillState;
+		fillStateIndex++;
+	}
+}
+
+
+#else
+#define CheckRebalanceStateInvariants(l) ((void) 0)
+#endif                          /* USE_ASSERT_CHECKING */
+
+/*
+ * BigIntArrayDatumContains checks if the array contains the given number.
+ */
+static bool
+BigIntArrayDatumContains(Datum *array, int arrayLength, uint64 toFind)
+{
+	for (int i = 0; i < arrayLength; i++)
+	{
+		if (DatumGetInt64(array[i]) == toFind)
+		{
+			return true;
+		}
+	}
+	return false;
+}
+
+
+/*
+ * FullShardPlacementList returns a List containing all the shard placements of
+ * a specific table (excluding the excludedShardArray)
+ */
+static List *
+FullShardPlacementList(Oid relationId, ArrayType *excludedShardArray)
+{
+	List *shardPlacementList = NIL;
+	CitusTableCacheEntry *citusTableCacheEntry = GetCitusTableCacheEntry(relationId);
+	int shardIntervalArrayLength = citusTableCacheEntry->shardIntervalArrayLength;
+	int excludedShardIdCount = ArrayObjectCount(excludedShardArray);
+	Datum *excludedShardArrayDatum = DeconstructArrayObject(excludedShardArray);
+
+	for (int shardIndex = 0; shardIndex < shardIntervalArrayLength; shardIndex++)
+	{
+		ShardInterval *shardInterval =
+			citusTableCacheEntry->sortedShardIntervalArray[shardIndex];
+		GroupShardPlacement *placementArray =
+			citusTableCacheEntry->arrayOfPlacementArrays[shardIndex];
+		int numberOfPlacements =
+			citusTableCacheEntry->arrayOfPlacementArrayLengths[shardIndex];
+
+		if (BigIntArrayDatumContains(excludedShardArrayDatum, excludedShardIdCount,
+									 shardInterval->shardId))
+		{
+			continue;
+		}
+
+		for (int placementIndex = 0; placementIndex < numberOfPlacements;
+			 placementIndex++)
+		{
+			GroupShardPlacement *groupPlacement = &placementArray[placementIndex];
+			WorkerNode *worker = LookupNodeForGroup(groupPlacement->groupId);
+			ShardPlacement *placement = CitusMakeNode(ShardPlacement);
+			placement->shardId = groupPlacement->shardId;
+			placement->shardLength = groupPlacement->shardLength;
+			placement->shardState = groupPlacement->shardState;
+			placement->nodeName = pstrdup(worker->workerName);
+			placement->nodePort = worker->workerPort;
+			placement->placementId = groupPlacement->placementId;
+
+			shardPlacementList = lappend(shardPlacementList, placement);
+		}
+	}
+	return SortList(shardPlacementList, CompareShardPlacements);
+}
+
+
+/*
+ * SortedActiveWorkers returns all the active workers like
+ * ActiveReadableNodeList, but sorted.
+ */
+static List *
+SortedActiveWorkers()
+{
+	List *activeWorkerList = ActiveReadableNodeList();
+	return SortList(activeWorkerList, CompareWorkerNodes);
+}
+
+
+/*
+ * GetRebalanceSteps returns a List of PlacementUpdateEvents that are needed to
+ * rebalance a list of tables.
+ */
+static List *
+GetRebalanceSteps(RebalanceOptions *options)
+{
+	EnsureShardCostUDF(options->rebalanceStrategy->shardCostFunction);
+	EnsureNodeCapacityUDF(options->rebalanceStrategy->nodeCapacityFunction);
+	EnsureShardAllowedOnNodeUDF(options->rebalanceStrategy->shardAllowedOnNodeFunction);
+
+	RebalanceContext context;
+	memset(&context, 0, sizeof(RebalanceContext));
+	fmgr_info(options->rebalanceStrategy->shardCostFunction, &context.shardCostUDF);
+	fmgr_info(options->rebalanceStrategy->nodeCapacityFunction, &context.nodeCapacityUDF);
+	fmgr_info(options->rebalanceStrategy->shardAllowedOnNodeFunction,
+			  &context.shardAllowedOnNodeUDF);
+
+	RebalancePlanFunctions rebalancePlanFunctions = {
+		.shardAllowedOnNode = ShardAllowedOnNode,
+		.nodeCapacity = NodeCapacity,
+		.shardCost = GetShardCost,
+		.context = &context,
+	};
+
+	/* sort the lists to make the function more deterministic */
+	List *activeWorkerList = SortedActiveWorkers();
+	List *shardPlacementListList = NIL;
+
+	Oid relationId = InvalidOid;
+	foreach_oid(relationId, options->relationIdList)
+	{
+		List *shardPlacementList = FullShardPlacementList(relationId,
+														  options->excludedShardArray);
+		shardPlacementListList = lappend(shardPlacementListList, shardPlacementList);
+	}
+
+	if (options->threshold < options->rebalanceStrategy->minimumThreshold)
+	{
+		ereport(WARNING, (errmsg(
+							  "the given threshold is lower than the minimum "
+							  "threshold allowed by the rebalance strategy, "
+							  "using the minimum allowed threshold instead"
+							  ),
+						  errdetail("Using threshold of %.2f",
+									options->rebalanceStrategy->minimumThreshold
+									)
+						  ));
+		options->threshold = options->rebalanceStrategy->minimumThreshold;
+	}
+
+	return RebalancePlacementUpdates(activeWorkerList,
+									 shardPlacementListList,
+									 options->threshold,
+									 options->maxShardMoves,
+									 options->drainOnly,
+									 &rebalancePlanFunctions);
+}
+
+
+/*
+ * ShardAllowedOnNode determines if shard is allowed on a specific worker node.
+ */
+static bool
+ShardAllowedOnNode(uint64 shardId, WorkerNode *workerNode, void *voidContext)
+{
+	if (!workerNode->shouldHaveShards)
+	{
+		return false;
+	}
+
+	RebalanceContext *context = voidContext;
+	Datum allowed = FunctionCall2(&context->shardAllowedOnNodeUDF, shardId,
+								  workerNode->nodeId);
+	return DatumGetBool(allowed);
+}
+
+
+/*
+ * NodeCapacity returns the relative capacity of a node. A node with capacity 2
+ * can contain twice as many shards as a node with capacity 1. The actual
+ * capacity can be a number grounded in reality, like the disk size, number of
+ * cores, but it doesn't have to be.
+ */
+static float4
+NodeCapacity(WorkerNode *workerNode, void *voidContext)
+{
+	if (!workerNode->shouldHaveShards)
+	{
+		return 0;
+	}
+
+	RebalanceContext *context = voidContext;
+	Datum capacity = FunctionCall1(&context->nodeCapacityUDF, workerNode->nodeId);
+	return DatumGetFloat4(capacity);
+}
+
+
+/*
+ * GetShardCost returns the cost of the given shard. A shard with cost 2 will
+ * be weighted as heavily as two shards with cost 1. This cost number can be a
+ * number grounded in reality, like the shard size on disk, but it doesn't have
+ * to be.
+ */
+static ShardCost
+GetShardCost(uint64 shardId, void *voidContext)
+{
+	ShardCost shardCost;
+	memset_struct_0(shardCost);
+	shardCost.shardId = shardId;
+	RebalanceContext *context = voidContext;
+	Datum shardCostDatum = FunctionCall1(&context->shardCostUDF, UInt64GetDatum(shardId));
+	shardCost.cost = DatumGetFloat4(shardCostDatum);
+	return shardCost;
+}
+
+
+/*
+ * citus_shard_cost_by_disk_size gets the cost for a shard based on the disk
+ * size of the shard on a worker. The worker to check the disk size is
+ * determined by choosing the first active placement for the shard. The disk
+ * size is calculated using pg_total_relation_size, so it includes indexes.
+ *
+ * SQL signature:
+ * citus_shard_cost_by_disk_size(shardid bigint) returns float4
+ */
+Datum
+citus_shard_cost_by_disk_size(PG_FUNCTION_ARGS)
+{
+	uint64 shardId = PG_GETARG_INT64(0);
+	bool missingOk = false;
+	ShardPlacement *shardPlacement = ActiveShardPlacement(shardId, missingOk);
+	char *workerNodeName = shardPlacement->nodeName;
+	uint32 workerNodePort = shardPlacement->nodePort;
+	uint32 connectionFlag = 0;
+	PGresult *result = NULL;
+	bool raiseErrors = true;
+	char *sizeQuery = PG_TOTAL_RELATION_SIZE_FUNCTION;
+	ShardInterval *shardInterval = LoadShardInterval(shardId);
+	List *colocatedShardList = ColocatedShardIntervalList(shardInterval);
+	StringInfo tableSizeQuery = GenerateSizeQueryOnMultiplePlacements(colocatedShardList,
+																	  sizeQuery);
+
+	MultiConnection *connection = GetNodeConnection(connectionFlag, workerNodeName,
+													workerNodePort);
+	int queryResult = ExecuteOptionalRemoteCommand(connection, tableSizeQuery->data,
+												   &result);
+
+	if (queryResult != RESPONSE_OKAY)
+	{
+		ereport(ERROR, (errcode(ERRCODE_CONNECTION_FAILURE),
+						errmsg("cannot get the size because of a connection error")));
+	}
+
+	List *sizeList = ReadFirstColumnAsText(result);
+	if (list_length(sizeList) != 1)
+	{
+		ereport(ERROR, (errmsg(
+							"received wrong number of rows from worker, expected 1 received %d",
+							list_length(sizeList))));
+	}
+
+	StringInfo tableSizeStringInfo = (StringInfo) linitial(sizeList);
+	char *tableSizeString = tableSizeStringInfo->data;
+	uint64 tableSize = SafeStringToUint64(tableSizeString);
+
+	PQclear(result);
+	ClearResults(connection, raiseErrors);
+	if (tableSize <= 0)
+	{
+		PG_RETURN_FLOAT4(1);
+	}
+
+	PG_RETURN_FLOAT4(tableSize);
+}
+
+
+/*
+ * GetColocatedRebalanceSteps takes a List of PlacementUpdateEvents and creates
+ * a new List of containing those and all the updates for colocated shards.
+ */
+static List *
+GetColocatedRebalanceSteps(List *placementUpdateList)
+{
+	ListCell *placementUpdateCell = NULL;
+	List *colocatedUpdateList = NIL;
+
+	foreach(placementUpdateCell, placementUpdateList)
+	{
+		PlacementUpdateEvent *placementUpdate = lfirst(placementUpdateCell);
+		ShardInterval *shardInterval = LoadShardInterval(placementUpdate->shardId);
+		List *colocatedShardList = ColocatedShardIntervalList(shardInterval);
+		ListCell *colocatedShardCell = NULL;
+
+		foreach(colocatedShardCell, colocatedShardList)
+		{
+			ShardInterval *colocatedShard = lfirst(colocatedShardCell);
+			PlacementUpdateEvent *colocatedUpdate = palloc0(sizeof(PlacementUpdateEvent));
+
+			colocatedUpdate->shardId = colocatedShard->shardId;
+			colocatedUpdate->sourceNode = placementUpdate->sourceNode;
+			colocatedUpdate->targetNode = placementUpdate->targetNode;
+			colocatedUpdate->updateType = placementUpdate->updateType;
+
+			colocatedUpdateList = lappend(colocatedUpdateList, colocatedUpdate);
+		}
+	}
+
+	return colocatedUpdateList;
+}
+
+
+/*
+ * AcquireColocationLock tries to acquire a lock for rebalance/replication. If
+ * this is it not possible it fails instantly because this means another
+ * rebalance/repliction is currently happening. This would really mess up
+ * planning.
+ */
+static void
+AcquireColocationLock(Oid relationId, const char *operationName)
+{
+	uint32 lockId = relationId;
+	LOCKTAG tag;
+
+	CitusTableCacheEntry *citusTableCacheEntry = GetCitusTableCacheEntry(relationId);
+	if (citusTableCacheEntry->colocationId != INVALID_COLOCATION_ID)
+	{
+		lockId = citusTableCacheEntry->colocationId;
+	}
+
+	SET_LOCKTAG_REBALANCE_COLOCATION(tag, (int64) lockId);
+
+	LockAcquireResult lockAcquired = LockAcquire(&tag, ExclusiveLock, false, true);
+	if (!lockAcquired)
+	{
+		ereport(ERROR, (errmsg("could not acquire the lock required to %s %s",
+							   operationName, generate_qualified_relation_name(
+								   relationId))));
+	}
+}
+
+
+/*
+ * GetResponsiveWorkerList returns a List of workers that respond to new
+ * connection requests.
+ */
+static List *
+GetResponsiveWorkerList()
+{
+	List *activeWorkerList = ActiveReadableNodeList();
+	ListCell *activeWorkerCell = NULL;
+	List *responsiveWorkerList = NIL;
+
+	foreach(activeWorkerCell, activeWorkerList)
+	{
+		WorkerNode *worker = lfirst(activeWorkerCell);
+		int connectionFlag = FORCE_NEW_CONNECTION;
+
+		MultiConnection *connection = GetNodeConnection(connectionFlag,
+														worker->workerName,
+														worker->workerPort);
+
+		if (connection != NULL && connection->pgConn != NULL)
+		{
+			if (PQstatus(connection->pgConn) == CONNECTION_OK)
+			{
+				responsiveWorkerList = lappend(responsiveWorkerList, worker);
+			}
+
+			CloseConnection(connection);
+		}
+	}
+	return responsiveWorkerList;
+}
+
+
+/*
+ * ExecutePlacementUpdates copies or moves a shard placement by calling the
+ * corresponding functions in Citus in a separate subtransaction for each
+ * update.
+ */
+static void
+ExecutePlacementUpdates(List *placementUpdateList, Oid shardReplicationModeOid,
+						char *noticeOperation)
+{
+	List *responsiveWorkerList = GetResponsiveWorkerList();
+	ListCell *placementUpdateCell = NULL;
+
+	char shardReplicationMode = LookupShardTransferMode(shardReplicationModeOid);
+	if (shardReplicationMode == TRANSFER_MODE_FORCE_LOGICAL)
+	{
+		ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						errmsg("the force_logical transfer mode is currently "
+							   "unsupported")));
+	}
+
+	foreach(placementUpdateCell, placementUpdateList)
+	{
+		PlacementUpdateEvent *placementUpdate = lfirst(placementUpdateCell);
+		ereport(NOTICE, (errmsg(
+							 "%s shard %lu from %s:%u to %s:%u ...",
+							 noticeOperation,
+							 placementUpdate->shardId,
+							 placementUpdate->sourceNode->workerName,
+							 placementUpdate->sourceNode->workerPort,
+							 placementUpdate->targetNode->workerName,
+							 placementUpdate->targetNode->workerPort
+							 )));
+		UpdateShardPlacement(placementUpdate, responsiveWorkerList,
+							 shardReplicationModeOid);
+	}
+}
+
+
+/*
+ * SetupRebalanceMonitor initializes the dynamic shared memory required for storing the
+ * progress information of a rebalance process. The function takes a List of
+ * PlacementUpdateEvents for all shards that will be moved (including colocated
+ * ones) and the relation id of the target table. The dynamic shared memory
+ * portion consists of a RebalanceMonitorHeader and multiple
+ * PlacementUpdateEventProgress, one for each planned shard placement move. The
+ * dsm_handle of the created segment is savedin the progress of the current backend so
+ * that it can be read by external agents such as get_rebalance_progress function by
+ * calling pg_stat_get_progress_info UDF. Since currently only VACUUM commands are
+ * officially allowed as the command type, we describe ourselves as a VACUUM command and
+ * in order to distinguish a rebalancer progress from regular VACUUM progresses, we put
+ * a magic number to the first progress field as an indicator. Finally we return the
+ * dsm handle so that it can be used for updating the progress and cleaning things up.
+ */
+static void
+SetupRebalanceMonitor(List *placementUpdateList, Oid relationId)
+{
+	List *colocatedUpdateList = GetColocatedRebalanceSteps(placementUpdateList);
+	ListCell *colocatedUpdateCell = NULL;
+
+	ProgressMonitorData *monitor = CreateProgressMonitor(REBALANCE_ACTIVITY_MAGIC_NUMBER,
+														 list_length(colocatedUpdateList),
+														 sizeof(
+															 PlacementUpdateEventProgress),
+														 relationId);
+	PlacementUpdateEventProgress *rebalanceSteps = monitor->steps;
+
+	int32 eventIndex = 0;
+	foreach(colocatedUpdateCell, colocatedUpdateList)
+	{
+		PlacementUpdateEvent *colocatedUpdate = lfirst(colocatedUpdateCell);
+		PlacementUpdateEventProgress *event = rebalanceSteps + eventIndex;
+
+		strlcpy(event->sourceName, colocatedUpdate->sourceNode->workerName, 255);
+		strlcpy(event->targetName, colocatedUpdate->targetNode->workerName, 255);
+
+		event->shardId = colocatedUpdate->shardId;
+		event->sourcePort = colocatedUpdate->sourceNode->workerPort;
+		event->targetPort = colocatedUpdate->targetNode->workerPort;
+		event->shardSize = ShardLength(colocatedUpdate->shardId);
+
+		eventIndex++;
+	}
+}
+
+
+/*
+ * rebalance_table_shards rebalances the shards across the workers.
+ *
+ * SQL signature:
+ *
+ * rebalance_table_shards(
+ *     relation regclass,
+ *     threshold float4,
+ *     max_shard_moves int,
+ *     excluded_shard_list bigint[],
+ *     shard_transfer_mode citus.shard_transfer_mode,
+ *     drain_only boolean,
+ *     rebalance_strategy name
+ * ) RETURNS VOID
+ */
+Datum
+rebalance_table_shards(PG_FUNCTION_ARGS)
+{
+	List *relationIdList = NIL;
+	if (!PG_ARGISNULL(0))
+	{
+		Oid relationId = PG_GETARG_OID(0);
+		ErrorIfMoveCitusLocalTable(relationId);
+
+		relationIdList = list_make1_oid(relationId);
+	}
+	else
+	{
+		/*
+		 * Note that we don't need to do any checks to error out for
+		 * citus local tables here as NonColocatedDistRelationIdList
+		 * already doesn't return non-distributed tables.
+		 */
+		relationIdList = NonColocatedDistRelationIdList();
+	}
+
+	PG_ENSURE_ARGNOTNULL(2, "max_shard_moves");
+	PG_ENSURE_ARGNOTNULL(3, "excluded_shard_list");
+	PG_ENSURE_ARGNOTNULL(4, "shard_transfer_mode");
+	PG_ENSURE_ARGNOTNULL(5, "drain_only");
+
+	Form_pg_dist_rebalance_strategy strategy = GetRebalanceStrategy(
+		PG_GETARG_NAME_OR_NULL(6));
+	RebalanceOptions options = {
+		.relationIdList = relationIdList,
+		.threshold = PG_GETARG_FLOAT4_OR_DEFAULT(1, strategy->defaultThreshold),
+		.maxShardMoves = PG_GETARG_INT32(2),
+		.excludedShardArray = PG_GETARG_ARRAYTYPE_P(3),
+		.drainOnly = PG_GETARG_BOOL(5),
+		.rebalanceStrategy = strategy,
+	};
+	Oid shardTransferModeOid = PG_GETARG_OID(4);
+	RebalanceTableShards(&options, shardTransferModeOid);
+	PG_RETURN_VOID();
+}
+
+
+/*
+ * GetRebalanceStrategy returns the rebalance strategy from
+ * pg_dist_rebalance_strategy matching the given name. If name is NULL it
+ * returns the default rebalance strategy from pg_dist_rebalance_strategy.
+ */
+static Form_pg_dist_rebalance_strategy
+GetRebalanceStrategy(Name name)
+{
+	Relation pgDistRebalanceStrategy = table_open(DistRebalanceStrategyRelationId(),
+												  AccessShareLock);
+
+	const int scanKeyCount = 1;
+	ScanKeyData scanKey[1];
+	if (name == NULL)
+	{
+		/* WHERE default_strategy=true */
+		ScanKeyInit(&scanKey[0], Anum_pg_dist_rebalance_strategy_default_strategy,
+					BTEqualStrategyNumber, F_BOOLEQ, BoolGetDatum(true));
+	}
+	else
+	{
+		/* WHERE name=$name */
+		ScanKeyInit(&scanKey[0], Anum_pg_dist_rebalance_strategy_name,
+					BTEqualStrategyNumber, F_NAMEEQ, NameGetDatum(name));
+	}
+	SysScanDesc scanDescriptor = systable_beginscan(pgDistRebalanceStrategy,
+													InvalidOid, false,
+													NULL, scanKeyCount, scanKey);
+
+	HeapTuple heapTuple = systable_getnext(scanDescriptor);
+	if (!HeapTupleIsValid(heapTuple))
+	{
+		if (name == NULL)
+		{
+			ereport(ERROR, (errmsg(
+								"no rebalance_strategy was provided, but there is also no default strategy set")));
+		}
+		ereport(ERROR, (errmsg("could not find rebalance strategy with name %s",
+							   (char *) name)));
+	}
+
+	Form_pg_dist_rebalance_strategy strategy =
+		(Form_pg_dist_rebalance_strategy) GETSTRUCT(heapTuple);
+	Form_pg_dist_rebalance_strategy strategy_copy =
+		palloc0(sizeof(FormData_pg_dist_rebalance_strategy));
+
+	/* Copy data over by dereferencing */
+	*strategy_copy = *strategy;
+
+
+	systable_endscan(scanDescriptor);
+	table_close(pgDistRebalanceStrategy, NoLock);
+
+	return strategy_copy;
+}
+
+
+/*
+ * master_drain_node drains a node by setting shouldhaveshards to false and
+ * running the rebalancer after in drain_only mode.
+ */
+Datum
+master_drain_node(PG_FUNCTION_ARGS)
+{
+	PG_ENSURE_ARGNOTNULL(0, "nodename");
+	PG_ENSURE_ARGNOTNULL(1, "nodeport");
+	PG_ENSURE_ARGNOTNULL(2, "shard_transfer_mode");
+
+	text *nodeNameText = PG_GETARG_TEXT_P(0);
+	int32 nodePort = PG_GETARG_INT32(1);
+	Oid shardTransferModeOid = PG_GETARG_OID(2);
+	Form_pg_dist_rebalance_strategy strategy = GetRebalanceStrategy(
+		PG_GETARG_NAME_OR_NULL(3));
+	RebalanceOptions options = {
+		.relationIdList = NonColocatedDistRelationIdList(),
+		.threshold = strategy->defaultThreshold,
+		.maxShardMoves = 0,
+		.excludedShardArray = construct_empty_array(INT4OID),
+		.drainOnly = true,
+		.rebalanceStrategy = strategy,
+	};
+
+	char *nodeName = text_to_cstring(nodeNameText);
+	int connectionFlag = FORCE_NEW_CONNECTION;
+	MultiConnection *connection = GetNodeConnection(connectionFlag, LOCAL_HOST_NAME,
+													PostPortNumber);
+
+	/*
+	 * This is done in a separate session. This way it's not undone if the
+	 * draining fails midway through.
+	 */
+	ExecuteCriticalRemoteCommand(connection, psprintf(
+									 "SELECT master_set_node_property(%s, %i, 'shouldhaveshards', false)",
+									 quote_literal_cstr(nodeName), nodePort));
+
+	RebalanceTableShards(&options, shardTransferModeOid);
+
+	PG_RETURN_VOID();
+}
+
+
+/*
+ * replicate_table_shards replicates under-replicated shards of the specified
+ * table.
+ */
+Datum
+replicate_table_shards(PG_FUNCTION_ARGS)
+{
+	Oid relationId = PG_GETARG_OID(0);
+	uint32 shardReplicationFactor = PG_GETARG_INT32(1);
+	int32 maxShardCopies = PG_GETARG_INT32(2);
+	ArrayType *excludedShardArray = PG_GETARG_ARRAYTYPE_P(3);
+	Oid shardReplicationModeOid = PG_GETARG_OID(4);
+
+	char transferMode = LookupShardTransferMode(shardReplicationModeOid);
+	EnsureReferenceTablesExistOnAllNodesExtended(transferMode);
+
+	AcquireColocationLock(relationId, "replicate");
+
+	List *activeWorkerList = SortedActiveWorkers();
+	List *shardPlacementList = FullShardPlacementList(relationId, excludedShardArray);
+
+	List *placementUpdateList = ReplicationPlacementUpdates(activeWorkerList,
+															shardPlacementList,
+															shardReplicationFactor);
+	placementUpdateList = list_truncate(placementUpdateList, maxShardCopies);
+
+	ExecutePlacementUpdates(placementUpdateList, shardReplicationModeOid, "Copying");
+
+	PG_RETURN_VOID();
+}
+
+
+/*
+ * get_rebalance_table_shards_plan function calculates the shard move steps
+ * required for the rebalance operations including the ones for colocated
+ * tables.
+ *
+ * SQL signature:
+ *
+ * get_rebalance_table_shards_plan(
+ *     relation regclass,
+ *     threshold float4,
+ *     max_shard_moves int,
+ *     excluded_shard_list bigint[],
+ *     drain_only boolean,
+ *     rebalance_strategy name
+ * )
+ */
+Datum
+get_rebalance_table_shards_plan(PG_FUNCTION_ARGS)
+{
+	List *relationIdList = NIL;
+	if (!PG_ARGISNULL(0))
+	{
+		Oid relationId = PG_GETARG_OID(0);
+		ErrorIfMoveCitusLocalTable(relationId);
+
+		relationIdList = list_make1_oid(relationId);
+	}
+	else
+	{
+		/*
+		 * Note that we don't need to do any checks to error out for
+		 * citus local tables here as NonColocatedDistRelationIdList
+		 * already doesn't return non-distributed tables.
+		 */
+		relationIdList = NonColocatedDistRelationIdList();
+	}
+
+	PG_ENSURE_ARGNOTNULL(2, "max_shard_moves");
+	PG_ENSURE_ARGNOTNULL(3, "excluded_shard_list");
+	PG_ENSURE_ARGNOTNULL(4, "drain_only");
+
+	Form_pg_dist_rebalance_strategy strategy = GetRebalanceStrategy(
+		PG_GETARG_NAME_OR_NULL(5));
+	RebalanceOptions options = {
+		.relationIdList = relationIdList,
+		.threshold = PG_GETARG_FLOAT4_OR_DEFAULT(1, strategy->defaultThreshold),
+		.maxShardMoves = PG_GETARG_INT32(2),
+		.excludedShardArray = PG_GETARG_ARRAYTYPE_P(3),
+		.drainOnly = PG_GETARG_BOOL(4),
+		.rebalanceStrategy = strategy,
+	};
+
+
+	List *placementUpdateList = GetRebalanceSteps(&options);
+	List *colocatedUpdateList = GetColocatedRebalanceSteps(placementUpdateList);
+	ListCell *colocatedUpdateCell = NULL;
+
+	TupleDesc tupdesc;
+	Tuplestorestate *tupstore = SetupTuplestore(fcinfo, &tupdesc);
+
+	foreach(colocatedUpdateCell, colocatedUpdateList)
+	{
+		PlacementUpdateEvent *colocatedUpdate = lfirst(colocatedUpdateCell);
+		Datum values[7];
+		bool nulls[7];
+
+		memset(values, 0, sizeof(values));
+		memset(nulls, 0, sizeof(nulls));
+
+		values[0] = ObjectIdGetDatum(RelationIdForShard(colocatedUpdate->shardId));
+		values[1] = UInt64GetDatum(colocatedUpdate->shardId);
+		values[2] = UInt64GetDatum(ShardLength(colocatedUpdate->shardId));
+		values[3] = PointerGetDatum(cstring_to_text(
+										colocatedUpdate->sourceNode->workerName));
+		values[4] = UInt32GetDatum(colocatedUpdate->sourceNode->workerPort);
+		values[5] = PointerGetDatum(cstring_to_text(
+										colocatedUpdate->targetNode->workerName));
+		values[6] = UInt32GetDatum(colocatedUpdate->targetNode->workerPort);
+
+		tuplestore_putvalues(tupstore, tupdesc, values, nulls);
+	}
+
+	tuplestore_donestoring(tupstore);
+
+	return (Datum) 0;
+}
+
+
+/*
+ * get_rebalance_progress collects information about the ongoing rebalance operations and
+ * returns the concatenated list of steps involved in the operations, along with their
+ * progress information. Currently the progress field can take 4 integer values
+ * (-1: error, 0: waiting, 1: moving, 2: moved). The progress field is of type bigint
+ * because we may implement a more granular, byte-level progress as a future improvement.
+ */
+Datum
+get_rebalance_progress(PG_FUNCTION_ARGS)
+{
+	List *segmentList = NIL;
+	ListCell *rebalanceMonitorCell = NULL;
+	TupleDesc tupdesc;
+	Tuplestorestate *tupstore = SetupTuplestore(fcinfo, &tupdesc);
+
+	/* get the addresses of all current rebalance monitors */
+	List *rebalanceMonitorList = ProgressMonitorList(REBALANCE_ACTIVITY_MAGIC_NUMBER,
+													 &segmentList);
+
+	foreach(rebalanceMonitorCell, rebalanceMonitorList)
+	{
+		ProgressMonitorData *monitor = lfirst(rebalanceMonitorCell);
+		PlacementUpdateEventProgress *placementUpdateEvents = monitor->steps;
+
+		for (int eventIndex = 0; eventIndex < monitor->stepCount; eventIndex++)
+		{
+			PlacementUpdateEventProgress *step = placementUpdateEvents + eventIndex;
+			uint64 shardId = step->shardId;
+			ShardInterval *shardInterval = LoadShardInterval(shardId);
+
+			Datum values[9];
+			bool nulls[9];
+
+			memset(values, 0, sizeof(values));
+			memset(nulls, 0, sizeof(nulls));
+
+			values[0] = monitor->processId;
+			values[1] = ObjectIdGetDatum(shardInterval->relationId);
+			values[2] = UInt64GetDatum(shardId);
+			values[3] = UInt64GetDatum(step->shardSize);
+			values[4] = PointerGetDatum(cstring_to_text(step->sourceName));
+			values[5] = UInt32GetDatum(step->sourcePort);
+			values[6] = PointerGetDatum(cstring_to_text(step->targetName));
+			values[7] = UInt32GetDatum(step->targetPort);
+			values[8] = UInt64GetDatum(step->progress);
+
+			tuplestore_putvalues(tupstore, tupdesc, values, nulls);
+		}
+	}
+
+	tuplestore_donestoring(tupstore);
+
+	DetachFromDSMSegments(segmentList);
+
+	return (Datum) 0;
+}
+
+
+/*
+ * NonColocatedDistRelationIdList returns a list of distributed table oids, one
+ * for each existing colocation group.
+ */
+static List *
+NonColocatedDistRelationIdList(void)
+{
+	List *relationIdList = NIL;
+	List *allCitusTablesList = CitusTableTypeIdList(ANY_CITUS_TABLE_TYPE);
+	Oid tableId = InvalidOid;
+
+	/* allocate sufficient capacity for O(1) expected look-up time */
+	int capacity = (int) (list_length(allCitusTablesList) / 0.75) + 1;
+	int flags = HASH_ELEM | HASH_CONTEXT | HASH_BLOBS;
+	HASHCTL info = {
+		.keysize = sizeof(Oid),
+		.entrysize = sizeof(Oid),
+		.hcxt = CurrentMemoryContext
+	};
+
+	HTAB *alreadySelectedColocationIds = hash_create("RebalanceColocationIdSet",
+													 capacity, &info, flags);
+	foreach_oid(tableId, allCitusTablesList)
+	{
+		bool foundInSet = false;
+		CitusTableCacheEntry *citusTableCacheEntry = GetCitusTableCacheEntry(
+			tableId);
+
+		if (!IsCitusTableTypeCacheEntry(citusTableCacheEntry, DISTRIBUTED_TABLE))
+		{
+			/*
+			 * We're only interested in distributed tables, should ignore
+			 * reference tables and citus local tables.
+			 */
+			continue;
+		}
+
+		if (citusTableCacheEntry->colocationId != INVALID_COLOCATION_ID)
+		{
+			hash_search(alreadySelectedColocationIds,
+						&citusTableCacheEntry->colocationId, HASH_ENTER,
+						&foundInSet);
+			if (foundInSet)
+			{
+				continue;
+			}
+		}
+		relationIdList = lappend_oid(relationIdList, tableId);
+	}
+	return relationIdList;
+}
+
+
+/*
+ * RebalanceTableShards rebalances the shards for the relations inside the
+ * relationIdList across the different workers.
+ */
+static void
+RebalanceTableShards(RebalanceOptions *options, Oid shardReplicationModeOid)
+{
+	char transferMode = LookupShardTransferMode(shardReplicationModeOid);
+	EnsureReferenceTablesExistOnAllNodesExtended(transferMode);
+
+	if (list_length(options->relationIdList) == 0)
+	{
+		return;
+	}
+
+	Oid relationId = InvalidOid;
+	char *operationName = "rebalance";
+	if (options->drainOnly)
+	{
+		operationName = "move";
+	}
+
+	foreach_oid(relationId, options->relationIdList)
+	{
+		AcquireColocationLock(relationId, operationName);
+	}
+
+	List *placementUpdateList = GetRebalanceSteps(options);
+
+	if (list_length(placementUpdateList) == 0)
+	{
+		return;
+	}
+
+	/*
+	 * This uses the first relationId from the list, it's only used for display
+	 * purposes so it does not really matter which to show
+	 */
+	SetupRebalanceMonitor(placementUpdateList, linitial_oid(options->relationIdList));
+	ExecutePlacementUpdates(placementUpdateList, shardReplicationModeOid, "Moving");
+	FinalizeCurrentProgressMonitor();
+}
+
+
+/*
+ * UpdateShardPlacement copies or moves a shard placement by calling
+ * the corresponding functions in Citus in a subtransaction.
+ */
+static bool
+UpdateShardPlacement(PlacementUpdateEvent *placementUpdateEvent,
+					 List *responsiveNodeList, Oid shardReplicationModeOid)
+{
+	PlacementUpdateType updateType = placementUpdateEvent->updateType;
+	uint64 shardId = placementUpdateEvent->shardId;
+	WorkerNode *sourceNode = placementUpdateEvent->sourceNode;
+	WorkerNode *targetNode = placementUpdateEvent->targetNode;
+	const char *doRepair = "false";
+	int connectionFlag = FORCE_NEW_CONNECTION;
+
+	Datum shardTranferModeLabelDatum =
+		DirectFunctionCall1(enum_out, shardReplicationModeOid);
+	char *shardTranferModeLabel = DatumGetCString(shardTranferModeLabelDatum);
+
+	StringInfo placementUpdateCommand = makeStringInfo();
+
+	/* if target node is not responsive, don't continue */
+	bool targetResponsive = WorkerNodeListContains(responsiveNodeList,
+												   targetNode->workerName,
+												   targetNode->workerPort);
+	if (!targetResponsive)
+	{
+		ereport(WARNING, (errmsg("%s:%d is not responsive", targetNode->workerName,
+								 targetNode->workerPort)));
+		UpdateColocatedShardPlacementProgress(shardId,
+											  sourceNode->workerName,
+											  sourceNode->workerPort,
+											  REBALANCE_PROGRESS_ERROR);
+		return false;
+	}
+
+	/* if source node is not responsive, don't continue */
+	bool sourceResponsive = WorkerNodeListContains(responsiveNodeList,
+												   sourceNode->workerName,
+												   sourceNode->workerPort);
+	if (!sourceResponsive)
+	{
+		ereport(WARNING, (errmsg("%s:%d is not responsive", sourceNode->workerName,
+								 sourceNode->workerPort)));
+		UpdateColocatedShardPlacementProgress(shardId,
+											  sourceNode->workerName,
+											  sourceNode->workerPort,
+											  REBALANCE_PROGRESS_ERROR);
+		return false;
+	}
+
+	if (updateType == PLACEMENT_UPDATE_MOVE)
+	{
+		appendStringInfo(placementUpdateCommand,
+						 "SELECT master_move_shard_placement(%ld,%s,%u,%s,%u,%s)",
+						 shardId,
+						 quote_literal_cstr(sourceNode->workerName),
+						 sourceNode->workerPort,
+						 quote_literal_cstr(targetNode->workerName),
+						 targetNode->workerPort,
+						 quote_literal_cstr(shardTranferModeLabel));
+	}
+	else if (updateType == PLACEMENT_UPDATE_COPY)
+	{
+		appendStringInfo(placementUpdateCommand,
+						 "SELECT master_copy_shard_placement(%ld,%s,%u,%s,%u,%s,%s)",
+						 shardId,
+						 quote_literal_cstr(sourceNode->workerName),
+						 sourceNode->workerPort,
+						 quote_literal_cstr(targetNode->workerName),
+						 targetNode->workerPort,
+						 doRepair,
+						 quote_literal_cstr(shardTranferModeLabel));
+	}
+	else
+	{
+		ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						errmsg("only moving or copying shards is supported")));
+	}
+
+	UpdateColocatedShardPlacementProgress(shardId,
+										  sourceNode->workerName,
+										  sourceNode->workerPort,
+										  REBALANCE_PROGRESS_MOVING);
+
+	MultiConnection *connection = GetNodeConnection(connectionFlag, LOCAL_HOST_NAME,
+													PostPortNumber);
+
+	/*
+	 * In case of failure, we throw an error such that rebalance_table_shards
+	 * fails early.
+	 */
+	ExecuteCriticalRemoteCommand(connection, placementUpdateCommand->data);
+
+	UpdateColocatedShardPlacementProgress(shardId,
+										  sourceNode->workerName,
+										  sourceNode->workerPort,
+										  REBALANCE_PROGRESS_MOVED);
+
+	return true;
+}
+
+
+/*
+ * RebalancePlacementUpdates returns a list of placement updates which makes the
+ * cluster balanced. We move shards to these nodes until all nodes become utilized.
+ * We consider a node under-utilized if it has less than floor((1.0 - threshold) *
+ * placementCountAverage) shard placements. In each iteration we choose the node
+ * with maximum number of shard placements as the source, and we choose the node
+ * with minimum number of shard placements as the target. Then we choose a shard
+ * which is placed in the source node but not in the target node as the shard to
+ * move.
+ *
+ * The shardPlacementListList argument contains a list of lists of shard
+ * placements. Each of these lists are balanced independently. This is used to
+ * make sure different colocation groups are balanced separately, so each list
+ * contains the placements of a colocation group.
+ */
+List *
+RebalancePlacementUpdates(List *workerNodeList, List *shardPlacementListList,
+						  double threshold,
+						  int32 maxShardMoves,
+						  bool drainOnly,
+						  RebalancePlanFunctions *functions)
+{
+	List *rebalanceStates = NIL;
+	RebalanceState *state = NULL;
+	List *shardPlacementList = NIL;
+	List *placementUpdateList = NIL;
+
+	foreach_ptr(shardPlacementList, shardPlacementListList)
+	{
+		state = InitRebalanceState(workerNodeList, shardPlacementList,
+								   functions);
+		rebalanceStates = lappend(rebalanceStates, state);
+	}
+
+	foreach_ptr(state, rebalanceStates)
+	{
+		state->placementUpdateList = placementUpdateList;
+		MoveShardsAwayFromDisallowedNodes(state);
+		placementUpdateList = state->placementUpdateList;
+	}
+
+	if (!drainOnly)
+	{
+		foreach_ptr(state, rebalanceStates)
+		{
+			state->placementUpdateList = placementUpdateList;
+
+			/* calculate lower bound for placement count */
+			float4 averageUtilization = (state->totalCost / state->totalCapacity);
+			float4 utilizationLowerBound = ((1.0 - threshold) * averageUtilization);
+			float4 utilizationUpperBound = ((1.0 + threshold) * averageUtilization);
+
+			bool moreMovesAvailable = true;
+			while (list_length(state->placementUpdateList) < maxShardMoves &&
+				   moreMovesAvailable)
+			{
+				moreMovesAvailable = FindAndMoveShardCost(utilizationLowerBound,
+														  utilizationUpperBound,
+														  state);
+			}
+			placementUpdateList = state->placementUpdateList;
+
+			if (moreMovesAvailable)
+			{
+				ereport(NOTICE, (errmsg(
+									 "Stopped searching before we were out of moves. "
+									 "Please rerun the rebalancer after it's finished "
+									 "for a more optimal placement.")));
+				break;
+			}
+		}
+	}
+
+	foreach_ptr(state, rebalanceStates)
+	{
+		hash_destroy(state->placementsHash);
+	}
+
+	return placementUpdateList;
+}
+
+
+/*
+ * InitRebalanceState sets up a RebalanceState for it's arguments. The
+ * RebalanceState contains the information needed to calculate shard moves.
+ */
+static RebalanceState *
+InitRebalanceState(List *workerNodeList, List *shardPlacementList,
+				   RebalancePlanFunctions *functions)
+{
+	ShardPlacement *placement = NULL;
+	HASH_SEQ_STATUS status;
+	WorkerNode *workerNode = NULL;
+
+	RebalanceState *state = palloc0(sizeof(RebalanceState));
+	state->functions = functions;
+	state->placementsHash = ActivePlacementsHash(shardPlacementList);
+
+	/* create empty fill state for all of the worker nodes */
+	foreach_ptr(workerNode, workerNodeList)
+	{
+		NodeFillState *fillState = palloc0(sizeof(NodeFillState));
+		fillState->node = workerNode;
+		fillState->capacity = functions->nodeCapacity(workerNode, functions->context);
+
+		/*
+		 * Set the utilization here although the totalCost is not set yet. This is
+		 * important to set the utilization to INFINITY when the capacity is 0.
+		 */
+		fillState->utilization = CalculateUtilization(fillState->totalCost,
+													  fillState->capacity);
+		state->fillStateListAsc = lappend(state->fillStateListAsc, fillState);
+		state->fillStateListDesc = lappend(state->fillStateListDesc, fillState);
+		state->totalCapacity += fillState->capacity;
+	}
+
+	/* Fill the fill states for all of the worker nodes based on the placements */
+	foreach_htab(placement, &status, state->placementsHash)
+	{
+		ShardCost *shardCost = palloc0(sizeof(ShardCost));
+		NodeFillState *fillState = FindFillStateForPlacement(state, placement);
+
+		Assert(fillState != NULL);
+
+		*shardCost = functions->shardCost(placement->shardId, functions->context);
+
+		fillState->totalCost += shardCost->cost;
+		fillState->utilization = CalculateUtilization(fillState->totalCost,
+													  fillState->capacity);
+		fillState->shardCostListDesc = lappend(fillState->shardCostListDesc,
+											   shardCost);
+		fillState->shardCostListDesc = SortList(fillState->shardCostListDesc,
+												CompareShardCostDesc);
+
+		state->totalCost += shardCost->cost;
+
+		if (!functions->shardAllowedOnNode(placement->shardId, fillState->node,
+										   functions->context))
+		{
+			DisallowedPlacement *disallowed = palloc0(sizeof(DisallowedPlacement));
+			disallowed->shardCost = shardCost;
+			disallowed->fillState = fillState;
+			state->disallowedPlacementList = lappend(state->disallowedPlacementList,
+													 disallowed);
+		}
+	}
+	foreach_htab_cleanup(placement, &status);
+
+	state->fillStateListAsc = SortList(state->fillStateListAsc, CompareNodeFillStateAsc);
+	state->fillStateListDesc = SortList(state->fillStateListDesc,
+										CompareNodeFillStateDesc);
+	CheckRebalanceStateInvariants(state);
+
+	return state;
+}
+
+
+/*
+ * CalculateUtilization returns INFINITY when capacity is 0 and
+ * totalCost/capacity otherwise.
+ */
+static float4
+CalculateUtilization(float4 totalCost, float4 capacity)
+{
+	if (capacity <= 0)
+	{
+		return INFINITY;
+	}
+	return totalCost / capacity;
+}
+
+
+/*
+ * FindFillStateForPlacement finds the fillState for the workernode that
+ * matches the placement.
+ */
+static NodeFillState *
+FindFillStateForPlacement(RebalanceState *state, ShardPlacement *placement)
+{
+	NodeFillState *fillState = NULL;
+
+	/* Find the correct fill state to add the placement to and do that */
+	foreach_ptr(fillState, state->fillStateListAsc)
+	{
+		if (IsPlacementOnWorkerNode(placement, fillState->node))
+		{
+			return fillState;
+		}
+	}
+	return NULL;
+}
+
+
+/*
+ * IsPlacementOnWorkerNode checks if the shard placement is for to the given
+ * workenode.
+ */
+static bool
+IsPlacementOnWorkerNode(ShardPlacement *placement, WorkerNode *workerNode)
+{
+	if (strncmp(workerNode->workerName, placement->nodeName, WORKER_LENGTH) != 0)
+	{
+		return false;
+	}
+	return workerNode->workerPort == placement->nodePort;
+}
+
+
+/*
+ * CompareNodeFillStateAsc can be used to sort fill states from empty to full.
+ */
+static int
+CompareNodeFillStateAsc(const void *void1, const void *void2)
+{
+	const NodeFillState *a = *((const NodeFillState **) void1);
+	const NodeFillState *b = *((const NodeFillState **) void2);
+	if (a->utilization < b->utilization)
+	{
+		return -1;
+	}
+	if (a->utilization > b->utilization)
+	{
+		return 1;
+	}
+
+	/*
+	 * If utilization prefer nodes with more capacity, since utilization will
+	 * grow slower on those
+	 */
+	if (a->capacity > b->capacity)
+	{
+		return -1;
+	}
+	if (a->capacity < b->capacity)
+	{
+		return 1;
+	}
+
+	/* Finally differentiate by node id */
+	if (a->node->nodeId < b->node->nodeId)
+	{
+		return -1;
+	}
+	return a->node->nodeId > b->node->nodeId;
+}
+
+
+/*
+ * CompareNodeFillStateDesc can be used to sort fill states from full to empty.
+ */
+static int
+CompareNodeFillStateDesc(const void *a, const void *b)
+{
+	return -CompareNodeFillStateAsc(a, b);
+}
+
+
+/*
+ * CompareShardCostAsc can be used to sort shard costs from low cost to high
+ * cost.
+ */
+static int
+CompareShardCostAsc(const void *void1, const void *void2)
+{
+	const ShardCost *a = *((const ShardCost **) void1);
+	const ShardCost *b = *((const ShardCost **) void2);
+	if (a->cost < b->cost)
+	{
+		return -1;
+	}
+	if (a->cost > b->cost)
+	{
+		return 1;
+	}
+
+	/* make compare function (more) stable for tests */
+	if (a->shardId > b->shardId)
+	{
+		return -1;
+	}
+	return a->shardId < b->shardId;
+}
+
+
+/*
+ * CompareShardCostAsc can be used to sort shard costs from high cost to low
+ * cost.
+ */
+static int
+CompareShardCostDesc(const void *a, const void *b)
+{
+	return -CompareShardCostAsc(a, b);
+}
+
+
+/*
+ * MoveShardsAwayFromDisallowedNodes returns a list of placement updates that
+ * move any shards that are not allowed on their current node to a node that
+ * they are allowed on.
+ */
+static void
+MoveShardsAwayFromDisallowedNodes(RebalanceState *state)
+{
+	DisallowedPlacement *disallowedPlacement = NULL;
+
+	state->disallowedPlacementList = SortList(state->disallowedPlacementList,
+											  CompareDisallowedPlacementDesc);
+
+	/* Move shards off of nodes they are not allowed on */
+	foreach_ptr(disallowedPlacement, state->disallowedPlacementList)
+	{
+		NodeFillState *targetFillState = FindAllowedTargetFillState(
+			state, disallowedPlacement->shardCost->shardId);
+		if (targetFillState == NULL)
+		{
+			ereport(WARNING, (errmsg(
+								  "Not allowed to move shard " UINT64_FORMAT
+								  " anywhere from %s:%d",
+								  disallowedPlacement->shardCost->shardId,
+								  disallowedPlacement->fillState->node->workerName,
+								  disallowedPlacement->fillState->node->workerPort
+								  )));
+			continue;
+		}
+		MoveShardCost(disallowedPlacement->fillState,
+					  targetFillState,
+					  disallowedPlacement->shardCost,
+					  state);
+	}
+}
+
+
+/*
+ * CompareDisallowedPlacementAsc can be used to sort disallowed placements from
+ * low cost to high cost.
+ */
+static int
+CompareDisallowedPlacementAsc(const void *void1, const void *void2)
+{
+	const DisallowedPlacement *a = *((const DisallowedPlacement **) void1);
+	const DisallowedPlacement *b = *((const DisallowedPlacement **) void2);
+	return CompareShardCostAsc(&(a->shardCost), &(b->shardCost));
+}
+
+
+/*
+ * CompareDisallowedPlacementAsc can be used to sort disallowed placements from
+ * low cost to high cost.
+ */
+static int
+CompareDisallowedPlacementDesc(const void *a, const void *b)
+{
+	return -CompareDisallowedPlacementAsc(a, b);
+}
+
+
+/*
+ * FindAllowedTargetFillState finds the first fill state in fillStateListAsc
+ * where the shard can be moved to.
+ */
+static NodeFillState *
+FindAllowedTargetFillState(RebalanceState *state, uint64 shardId)
+{
+	NodeFillState *targetFillState = NULL;
+	foreach_ptr(targetFillState, state->fillStateListAsc)
+	{
+		bool hasShard = PlacementsHashFind(
+			state->placementsHash,
+			shardId,
+			targetFillState->node);
+		if (!hasShard && state->functions->shardAllowedOnNode(
+				shardId,
+				targetFillState->node,
+				state->functions->context))
+		{
+			return targetFillState;
+		}
+	}
+	return NULL;
+}
+
+
+/*
+ * MoveShardCost moves a shardcost from the source to the target fill states
+ * and updates the RebalanceState accordingly. What it does in detail is:
+ * 1. add a placement update to state->placementUpdateList
+ * 2. update state->placementsHash
+ * 3. update totalcost, utilization and shardCostListDesc in source and target
+ * 4. resort state->fillStateListAsc/Desc
+ */
+static void
+MoveShardCost(NodeFillState *sourceFillState,
+			  NodeFillState *targetFillState,
+			  ShardCost *shardCost,
+			  RebalanceState *state)
+{
+	uint64 shardIdToMove = shardCost->shardId;
+
+	/* construct the placement update */
+	PlacementUpdateEvent *placementUpdateEvent = palloc0(sizeof(PlacementUpdateEvent));
+	placementUpdateEvent->updateType = PLACEMENT_UPDATE_MOVE;
+	placementUpdateEvent->shardId = shardIdToMove;
+	placementUpdateEvent->sourceNode = sourceFillState->node;
+	placementUpdateEvent->targetNode = targetFillState->node;
+
+	/* record the placement update */
+	state->placementUpdateList = lappend(state->placementUpdateList,
+										 placementUpdateEvent);
+
+	/* update the placements hash and the node shard lists */
+	PlacementsHashRemove(state->placementsHash, shardIdToMove, sourceFillState->node);
+	PlacementsHashEnter(state->placementsHash, shardIdToMove, targetFillState->node);
+
+	sourceFillState->totalCost -= shardCost->cost;
+	sourceFillState->utilization = CalculateUtilization(sourceFillState->totalCost,
+														sourceFillState->capacity);
+	sourceFillState->shardCostListDesc = list_delete_ptr(
+		sourceFillState->shardCostListDesc,
+		shardCost);
+
+	targetFillState->totalCost += shardCost->cost;
+	targetFillState->utilization = CalculateUtilization(targetFillState->totalCost,
+														targetFillState->capacity);
+	targetFillState->shardCostListDesc = lappend(targetFillState->shardCostListDesc,
+												 shardCost);
+	targetFillState->shardCostListDesc = SortList(targetFillState->shardCostListDesc,
+												  CompareShardCostDesc);
+
+	state->fillStateListAsc = SortList(state->fillStateListAsc, CompareNodeFillStateAsc);
+	state->fillStateListDesc = SortList(state->fillStateListDesc,
+										CompareNodeFillStateDesc);
+	CheckRebalanceStateInvariants(state);
+}
+
+
+/*
+ * FindAndMoveShardCost is the main rebalancing algorithm. This takes the
+ * current state and returns a list with a new move appended that improves the
+ * balance of shards. The algorithm is greedy and will use the first new move
+ * that improves the balance. It finds nodes by trying to move a shard from the
+ * fullest node to the emptiest node. If no moves are possible it will try the
+ * second emptiest node until it tried all of them. Then it wil try the second
+ * fullest node. If it was able to find a move it will return true and false if
+ * it couldn't.
+ */
+static bool
+FindAndMoveShardCost(float4 utilizationLowerBound, float4 utilizationUpperBound,
+					 RebalanceState *state)
+{
+	NodeFillState *sourceFillState = NULL;
+	NodeFillState *targetFillState = NULL;
+
+	/*
+	 * find a source node for the move, starting at the node with the highest
+	 * utilization
+	 */
+	foreach_ptr(sourceFillState, state->fillStateListDesc)
+	{
+		/* Don't move shards away from nodes that are already too empty, we're
+		 * done searching */
+		if (sourceFillState->utilization <= utilizationLowerBound)
+		{
+			return false;
+		}
+
+		/* find a target node for the move, starting at the node with the
+		 * lowest utilization */
+		foreach_ptr(targetFillState, state->fillStateListAsc)
+		{
+			ShardCost *shardCost = NULL;
+
+			/* Don't add more shards to nodes that are already at the upper
+			 * bound. We should try the next source node now because further
+			 * target nodes will also be above the upper bound */
+			if (targetFillState->utilization >= utilizationUpperBound)
+			{
+				break;
+			}
+
+			/* Don't move a shard between nodes that both have decent
+			 * utilization. We should try the next source node now because
+			 * further target nodes will also have have decent utilization */
+			if (targetFillState->utilization >= utilizationLowerBound &&
+				sourceFillState->utilization <= utilizationUpperBound)
+			{
+				break;
+			}
+
+			/* find a shardcost that can be moved between between nodes that
+			 * makes the cost distribution more equal */
+			foreach_ptr(shardCost, sourceFillState->shardCostListDesc)
+			{
+				bool targetHasShard = PlacementsHashFind(state->placementsHash,
+														 shardCost->shardId,
+														 targetFillState->node);
+				float4 newTargetTotalCost = targetFillState->totalCost + shardCost->cost;
+				float4 newTargetUtilization = CalculateUtilization(
+					newTargetTotalCost,
+					targetFillState->capacity);
+				float4 newSourceTotalCost = sourceFillState->totalCost - shardCost->cost;
+				float4 newSourceUtilization = CalculateUtilization(
+					newSourceTotalCost,
+					sourceFillState->capacity);
+
+				/* Skip shards that already are on the node */
+				if (targetHasShard)
+				{
+					continue;
+				}
+
+				/* Skip shards that already are not allowed on the node */
+				if (!state->functions->shardAllowedOnNode(shardCost->shardId,
+														  targetFillState->node,
+														  state->functions->context))
+				{
+					continue;
+				}
+
+				/*
+				 * Ensure that the cost distrubition is actually better
+				 * after the move, i.e. the new highest utilization of
+				 * source and target is lower than the previous highest, or
+				 * the highest utilization is the same, but the lowest
+				 * increased.
+				 */
+				if (newTargetUtilization > sourceFillState->utilization)
+				{
+					continue;
+				}
+				if (newTargetUtilization == sourceFillState->utilization &&
+					newSourceUtilization <= targetFillState->utilization
+					)
+				{
+					/*
+					 * this can trigger when capacity of the nodes is not the
+					 * same. Example (also a test):
+					 * - node with capacity 3
+					 * - node with capacity 1
+					 * - 3 shards with cost 1
+					 * Best distribution would be 2 shards on node with
+					 * capacity 3 and one on node with capacity 1
+					 */
+					continue;
+				}
+				MoveShardCost(sourceFillState, targetFillState,
+							  shardCost, state);
+				return true;
+			}
+		}
+	}
+	return false;
+}
+
+
+/*
+ * ReplicationPlacementUpdates returns a list of placement updates which
+ * replicates shard placements that need re-replication. To do this, the
+ * function loops over the shard placements, and for each shard placement
+ * which needs to be re-replicated, it chooses an active worker node with
+ * smallest number of shards as the target node.
+ */
+List *
+ReplicationPlacementUpdates(List *workerNodeList, List *shardPlacementList,
+							int shardReplicationFactor)
+{
+	List *placementUpdateList = NIL;
+	ListCell *shardPlacementCell = NULL;
+	uint32 workerNodeIndex = 0;
+	HTAB *placementsHash = ActivePlacementsHash(shardPlacementList);
+	uint32 workerNodeCount = list_length(workerNodeList);
+
+	/* get number of shards per node */
+	uint32 *shardCountArray = palloc0(workerNodeCount * sizeof(uint32));
+	foreach(shardPlacementCell, shardPlacementList)
+	{
+		ShardPlacement *placement = lfirst(shardPlacementCell);
+		if (placement->shardState != SHARD_STATE_ACTIVE)
+		{
+			continue;
+		}
+
+		for (workerNodeIndex = 0; workerNodeIndex < workerNodeCount; workerNodeIndex++)
+		{
+			WorkerNode *node = list_nth(workerNodeList, workerNodeIndex);
+			if (strncmp(node->workerName, placement->nodeName, WORKER_LENGTH) == 0 &&
+				node->workerPort == placement->nodePort)
+			{
+				shardCountArray[workerNodeIndex]++;
+				break;
+			}
+		}
+	}
+
+	foreach(shardPlacementCell, shardPlacementList)
+	{
+		WorkerNode *sourceNode = NULL;
+		WorkerNode *targetNode = NULL;
+		uint32 targetNodeShardCount = UINT_MAX;
+		uint32 targetNodeIndex = 0;
+
+		ShardPlacement *placement = (ShardPlacement *) lfirst(shardPlacementCell);
+		uint64 shardId = placement->shardId;
+
+		/* skip the shard placement if it has enough replications */
+		int activePlacementCount = ShardActivePlacementCount(placementsHash, shardId,
+															 workerNodeList);
+		if (activePlacementCount >= shardReplicationFactor)
+		{
+			continue;
+		}
+
+		/*
+		 * We can copy the shard from any active worker node that contains the
+		 * shard.
+		 */
+		for (workerNodeIndex = 0; workerNodeIndex < workerNodeCount; workerNodeIndex++)
+		{
+			WorkerNode *workerNode = list_nth(workerNodeList, workerNodeIndex);
+
+			bool placementExists = PlacementsHashFind(placementsHash, shardId,
+													  workerNode);
+			if (placementExists)
+			{
+				sourceNode = workerNode;
+				break;
+			}
+		}
+
+		/*
+		 * If we couldn't find any worker node which contains the shard, then
+		 * all copies of the shard are list and we should error out.
+		 */
+		if (sourceNode == NULL)
+		{
+			ereport(ERROR, (errmsg("could not find a source for shard " UINT64_FORMAT,
+								   shardId)));
+		}
+
+		/*
+		 * We can copy the shard to any worker node that doesn't contain the shard.
+		 * Among such worker nodes, we choose the worker node with minimum shard
+		 * count as the target.
+		 */
+		for (workerNodeIndex = 0; workerNodeIndex < workerNodeCount; workerNodeIndex++)
+		{
+			WorkerNode *workerNode = list_nth(workerNodeList, workerNodeIndex);
+
+			if (!NodeCanHaveDistTablePlacements(workerNode))
+			{
+				/* never replicate placements to nodes that should not have placements */
+				continue;
+			}
+
+			/* skip this node if it already contains the shard */
+			bool placementExists = PlacementsHashFind(placementsHash, shardId,
+													  workerNode);
+			if (placementExists)
+			{
+				continue;
+			}
+
+			/* compare and change the target node */
+			if (shardCountArray[workerNodeIndex] < targetNodeShardCount)
+			{
+				targetNode = workerNode;
+				targetNodeShardCount = shardCountArray[workerNodeIndex];
+				targetNodeIndex = workerNodeIndex;
+			}
+		}
+
+		/*
+		 * If there is no worker node which doesn't contain the shard, then the
+		 * shard replication factor is greater than number of worker nodes, and
+		 * we should error out.
+		 */
+		if (targetNode == NULL)
+		{
+			ereport(ERROR, (errmsg("could not find a target for shard " UINT64_FORMAT,
+								   shardId)));
+		}
+
+		/* construct the placement update */
+		PlacementUpdateEvent *placementUpdateEvent = palloc0(
+			sizeof(PlacementUpdateEvent));
+		placementUpdateEvent->updateType = PLACEMENT_UPDATE_COPY;
+		placementUpdateEvent->shardId = shardId;
+		placementUpdateEvent->sourceNode = sourceNode;
+		placementUpdateEvent->targetNode = targetNode;
+
+		/* record the placement update */
+		placementUpdateList = lappend(placementUpdateList, placementUpdateEvent);
+
+		/* update the placements hash and the shard count array */
+		PlacementsHashEnter(placementsHash, shardId, targetNode);
+		shardCountArray[targetNodeIndex]++;
+	}
+
+	hash_destroy(placementsHash);
+
+	return placementUpdateList;
+}
+
+
+/*
+ * ShardActivePlacementCount returns the number of active placements for the
+ * given shard which are placed at the active worker nodes.
+ */
+static int
+ShardActivePlacementCount(HTAB *activePlacementsHash, uint64 shardId,
+						  List *activeWorkerNodeList)
+{
+	int shardActivePlacementCount = 0;
+	ListCell *workerNodeCell = NULL;
+
+	foreach(workerNodeCell, activeWorkerNodeList)
+	{
+		WorkerNode *workerNode = lfirst(workerNodeCell);
+		bool placementExists = PlacementsHashFind(activePlacementsHash, shardId,
+												  workerNode);
+		if (placementExists)
+		{
+			shardActivePlacementCount++;
+		}
+	}
+
+	return shardActivePlacementCount;
+}
+
+
+/*
+ * ActivePlacementsHash creates and returns a hash set for the placements in
+ * the given list of shard placements which are in active state.
+ */
+static HTAB *
+ActivePlacementsHash(List *shardPlacementList)
+{
+	ListCell *shardPlacementCell = NULL;
+	HASHCTL info;
+	int shardPlacementCount = list_length(shardPlacementList);
+
+	memset(&info, 0, sizeof(info));
+	info.keysize = sizeof(ShardPlacement);
+	info.entrysize = sizeof(ShardPlacement);
+	info.hash = PlacementsHashHashCode;
+	info.match = PlacementsHashCompare;
+	int hashFlags = (HASH_ELEM | HASH_FUNCTION | HASH_COMPARE);
+
+	HTAB *shardPlacementsHash = hash_create("ActivePlacements Hash",
+											shardPlacementCount, &info, hashFlags);
+
+	foreach(shardPlacementCell, shardPlacementList)
+	{
+		ShardPlacement *shardPlacement = (ShardPlacement *) lfirst(shardPlacementCell);
+		if (shardPlacement->shardState == SHARD_STATE_ACTIVE)
+		{
+			void *hashKey = (void *) shardPlacement;
+			hash_search(shardPlacementsHash, hashKey, HASH_ENTER, NULL);
+		}
+	}
+
+	return shardPlacementsHash;
+}
+
+
+/*
+ * PlacementsHashFinds returns true if there exists a shard placement with the
+ * given workerNode and shard id in the given placements hash, otherwise it
+ * returns false.
+ */
+static bool
+PlacementsHashFind(HTAB *placementsHash, uint64 shardId, WorkerNode *workerNode)
+{
+	bool placementFound = false;
+
+	ShardPlacement shardPlacement;
+	memset(&shardPlacement, 0, sizeof(shardPlacement));
+
+	shardPlacement.shardId = shardId;
+	shardPlacement.nodeName = workerNode->workerName;
+	shardPlacement.nodePort = workerNode->workerPort;
+
+	void *hashKey = (void *) (&shardPlacement);
+	hash_search(placementsHash, hashKey, HASH_FIND, &placementFound);
+
+	return placementFound;
+}
+
+
+/*
+ * PlacementsHashEnter enters a shard placement for the given worker node and
+ * shard id to the given placements hash.
+ */
+static void
+PlacementsHashEnter(HTAB *placementsHash, uint64 shardId, WorkerNode *workerNode)
+{
+	ShardPlacement shardPlacement;
+	memset(&shardPlacement, 0, sizeof(shardPlacement));
+
+	shardPlacement.shardId = shardId;
+	shardPlacement.nodeName = workerNode->workerName;
+	shardPlacement.nodePort = workerNode->workerPort;
+
+	void *hashKey = (void *) (&shardPlacement);
+	hash_search(placementsHash, hashKey, HASH_ENTER, NULL);
+}
+
+
+/*
+ * PlacementsHashRemove removes the shard placement for the given worker node and
+ * shard id from the given placements hash.
+ */
+static void
+PlacementsHashRemove(HTAB *placementsHash, uint64 shardId, WorkerNode *workerNode)
+{
+	ShardPlacement shardPlacement;
+	memset(&shardPlacement, 0, sizeof(shardPlacement));
+
+	shardPlacement.shardId = shardId;
+	shardPlacement.nodeName = workerNode->workerName;
+	shardPlacement.nodePort = workerNode->workerPort;
+
+	void *hashKey = (void *) (&shardPlacement);
+	hash_search(placementsHash, hashKey, HASH_REMOVE, NULL);
+}
+
+
+/*
+ * ShardPlacementCompare compares two shard placements using shard id, node name,
+ * and node port number.
+ */
+static int
+PlacementsHashCompare(const void *lhsKey, const void *rhsKey, Size keySize)
+{
+	const ShardPlacement *placementLhs = (const ShardPlacement *) lhsKey;
+	const ShardPlacement *placementRhs = (const ShardPlacement *) rhsKey;
+
+	int shardIdCompare = 0;
+
+	/* first, compare by shard id */
+	if (placementLhs->shardId < placementRhs->shardId)
+	{
+		shardIdCompare = -1;
+	}
+	else if (placementLhs->shardId > placementRhs->shardId)
+	{
+		shardIdCompare = 1;
+	}
+
+	if (shardIdCompare != 0)
+	{
+		return shardIdCompare;
+	}
+
+	/* then, compare by node name */
+	int nodeNameCompare = strncmp(placementLhs->nodeName, placementRhs->nodeName,
+								  WORKER_LENGTH);
+	if (nodeNameCompare != 0)
+	{
+		return nodeNameCompare;
+	}
+
+	/* finally, compare by node port */
+	int nodePortCompare = placementLhs->nodePort - placementRhs->nodePort;
+	return nodePortCompare;
+}
+
+
+/*
+ * ShardPlacementHashCode computes the hash code for a shard placement from the
+ * placement's shard id, node name, and node port number.
+ */
+static uint32
+PlacementsHashHashCode(const void *key, Size keySize)
+{
+	const ShardPlacement *placement = (const ShardPlacement *) key;
+	const uint64 *shardId = &(placement->shardId);
+	const char *nodeName = placement->nodeName;
+	const uint32 *nodePort = &(placement->nodePort);
+
+	/* standard hash function outlined in Effective Java, Item 8 */
+	uint32 result = 17;
+	result = 37 * result + tag_hash(shardId, sizeof(uint64));
+	result = 37 * result + string_hash(nodeName, WORKER_LENGTH);
+	result = 37 * result + tag_hash(nodePort, sizeof(uint32));
+
+	return result;
+}
+
+
+/* WorkerNodeListContains checks if the worker node exists in the given list. */
+static bool
+WorkerNodeListContains(List *workerNodeList, const char *workerName, uint32 workerPort)
+{
+	bool workerNodeListContains = false;
+	ListCell *workerNodeCell = NULL;
+
+	foreach(workerNodeCell, workerNodeList)
+	{
+		WorkerNode *workerNode = (WorkerNode *) lfirst(workerNodeCell);
+
+		if ((strncmp(workerNode->workerName, workerName, WORKER_LENGTH) == 0) &&
+			(workerNode->workerPort == workerPort))
+		{
+			workerNodeListContains = true;
+			break;
+		}
+	}
+
+	return workerNodeListContains;
+}
+
+
+/*
+ * UpdateColocatedShardPlacementProgress updates the progress of the given placement,
+ * along with its colocated placements, to the given state.
+ */
+static void
+UpdateColocatedShardPlacementProgress(uint64 shardId, char *sourceName, int sourcePort,
+									  uint64 progress)
+{
+	ProgressMonitorData *header = GetCurrentProgressMonitor();
+
+	if (header != NULL && header->steps != NULL)
+	{
+		PlacementUpdateEventProgress *steps = header->steps;
+		ListCell *colocatedShardIntervalCell = NULL;
+
+		ShardInterval *shardInterval = LoadShardInterval(shardId);
+		List *colocatedShardIntervalList = ColocatedShardIntervalList(shardInterval);
+
+		for (int moveIndex = 0; moveIndex < header->stepCount; moveIndex++)
+		{
+			PlacementUpdateEventProgress *step = steps + moveIndex;
+			uint64 currentShardId = step->shardId;
+			bool colocatedShard = false;
+
+			foreach(colocatedShardIntervalCell, colocatedShardIntervalList)
+			{
+				ShardInterval *candidateShard = lfirst(colocatedShardIntervalCell);
+				if (candidateShard->shardId == currentShardId)
+				{
+					colocatedShard = true;
+					break;
+				}
+			}
+
+			if (colocatedShard &&
+				strcmp(step->sourceName, sourceName) == 0 &&
+				step->sourcePort == sourcePort)
+			{
+				step->progress = progress;
+			}
+		}
+	}
+}
 
 
 /*
@@ -41,15 +2173,8 @@ PG_FUNCTION_INFO_V1(citus_validate_rebalance_strategy_functions);
 Datum
 pg_dist_rebalance_strategy_enterprise_check(PG_FUNCTION_ARGS)
 {
-	ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					errmsg("cannot write to pg_dist_rebalance_strategy"),
-					errdetail(
-						"Citus Community Edition does not support the use of "
-						"custom rebalance strategies."),
-					errhint(
-						"To learn more about using advanced rebalancing schemes "
-						"with Citus, please contact us at "
-						"https://citusdata.com/about/contact_us")));
+	/* This is Enterprise, so this check is a no-op */
+	PG_RETURN_VOID();
 }
 
 
diff --git a/src/backend/distributed/shared_library_init.c b/src/backend/distributed/shared_library_init.c
index d54548b4c..4cf84fe3d 100644
--- a/src/backend/distributed/shared_library_init.c
+++ b/src/backend/distributed/shared_library_init.c
@@ -60,6 +60,7 @@
 #include "distributed/reference_table_utils.h"
 #include "distributed/relation_access_tracking.h"
 #include "distributed/run_from_same_connection.h"
+#include "distributed/shard_cleaner.h"
 #include "distributed/shared_connection_stats.h"
 #include "distributed/query_pushdown_planning.h"
 #include "distributed/time_constants.h"
@@ -890,6 +891,38 @@ RegisterCitusConfigVariables(void)
 		GUC_UNIT_MS | GUC_NO_SHOW_ALL,
 		NULL, NULL, NULL);
 
+	DefineCustomBoolVariable(
+		"citus.defer_drop_after_shard_move",
+		gettext_noop("When enabled a shard move will mark old shards for deletion"),
+		gettext_noop("The deletion of a shard can sometimes run into a conflict with a "
+					 "long running transactions on a the shard during the drop phase of "
+					 "the shard move. This causes some moves to be rolled back after "
+					 "resources have been spend on moving the shard. To prevent "
+					 "conflicts this feature lets you skip the actual deletion till a "
+					 "later point in time. When used one should set "
+					 "citus.defer_shard_delete_interval to make sure defered deletions "
+					 "will be executed"),
+		&DeferShardDeleteOnMove,
+		false,
+		PGC_USERSET,
+		0,
+		NULL, NULL, NULL);
+
+	DefineCustomIntVariable(
+		"citus.defer_shard_delete_interval",
+		gettext_noop("Sets the time to wait between background deletion for shards."),
+		gettext_noop("Shards that are marked for deferred deletion need to be deleted in "
+					 "the background at a later time. This is done at a regular interval "
+					 "configured here. The deletion is executed optimistically, it tries "
+					 "to take a lock on a shard to clean, if the lock can't be acquired "
+					 "the background worker moves on. When set to -1 this background "
+					 "process is skipped."),
+		&DeferShardDeleteInterval,
+		-1, -1, 7 * 24 * 3600 * 1000,
+		PGC_SIGHUP,
+		GUC_UNIT_MS,
+		NULL, NULL, NULL);
+
 	DefineCustomBoolVariable(
 		"citus.select_opens_transaction_block",
 		gettext_noop("Open transaction blocks for SELECT commands"),
diff --git a/src/backend/distributed/test/foreign_key_relationship_query.c b/src/backend/distributed/test/foreign_key_relationship_query.c
index ea152acc2..a01785943 100644
--- a/src/backend/distributed/test/foreign_key_relationship_query.c
+++ b/src/backend/distributed/test/foreign_key_relationship_query.c
@@ -5,7 +5,7 @@
  * This file contains UDFs for getting foreign constraint relationship between
  * distributed tables.
  *
- * Copyright (c) Citus Data, Inc.
+ * Copyright (c), Citus Data, Inc.
  *
  *-------------------------------------------------------------------------
  */
@@ -15,6 +15,7 @@
 #include "funcapi.h"
 
 #include "distributed/foreign_key_relationship.h"
+#include "distributed/coordinator_protocol.h"
 #include "distributed/listutils.h"
 #include "distributed/metadata_cache.h"
 #include "distributed/tuplestore.h"
diff --git a/src/backend/distributed/test/foreign_key_to_reference_table_rebalance.c b/src/backend/distributed/test/foreign_key_to_reference_table_rebalance.c
new file mode 100644
index 000000000..4c5a21379
--- /dev/null
+++ b/src/backend/distributed/test/foreign_key_to_reference_table_rebalance.c
@@ -0,0 +1,87 @@
+/*-------------------------------------------------------------------------
+ *
+ * foreign_key_relationship_query.c
+ *
+ * This file contains UDFs for getting foreign constraint relationship between
+ * distributed tables.
+ *
+ * Copyright (c) 2018, Citus Data, Inc.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "funcapi.h"
+
+#include "distributed/coordinator_protocol.h"
+#include "distributed/listutils.h"
+#include "distributed/metadata_cache.h"
+#include "utils/builtins.h"
+
+
+/* these functions are only exported in the regression tests */
+PG_FUNCTION_INFO_V1(get_foreign_key_to_reference_table_commands);
+
+/*
+ * get_foreign_key_to_reference_table_commands returns the list of commands
+ * for creating foreign keys to reference tables.
+ */
+Datum
+get_foreign_key_to_reference_table_commands(PG_FUNCTION_ARGS)
+{
+	FuncCallContext *functionContext = NULL;
+	ListCell *commandsCell = NULL;
+
+	CheckCitusVersion(ERROR);
+
+	/* for the first we call this UDF, we need to populate the result to return set */
+	if (SRF_IS_FIRSTCALL())
+	{
+		Oid relationId = PG_GETARG_OID(0);
+
+		/* create a function context for cross-call persistence */
+		functionContext = SRF_FIRSTCALL_INIT();
+
+		/* switch to memory context appropriate for multiple function calls */
+		MemoryContext oldContext = MemoryContextSwitchTo(
+			functionContext->multi_call_memory_ctx);
+
+		CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId);
+		ShardInterval *firstShardInterval = cacheEntry->sortedShardIntervalArray[0];
+		ListCellAndListWrapper *wrapper = palloc0(sizeof(ListCellAndListWrapper));
+		List *commandsList =
+			GetForeignConstraintCommandsToReferenceTable(firstShardInterval);
+
+		commandsCell = list_head(commandsList);
+		wrapper->list = commandsList;
+		wrapper->listCell = commandsCell;
+		functionContext->user_fctx = wrapper;
+		MemoryContextSwitchTo(oldContext);
+	}
+
+	/*
+	 * On every call to this function, we get the current position in the
+	 * statement list. We then iterate to the next position in the list and
+	 * return the current statement, if we have not yet reached the end of
+	 * list.
+	 */
+	functionContext = SRF_PERCALL_SETUP();
+
+	ListCellAndListWrapper *wrapper =
+		(ListCellAndListWrapper *) functionContext->user_fctx;
+
+	if (wrapper->listCell != NULL)
+	{
+		char *command = (char *) lfirst(wrapper->listCell);
+		text *commandText = cstring_to_text(command);
+
+		wrapper->listCell = lnext_compat(wrapper->list, wrapper->listCell);
+
+		SRF_RETURN_NEXT(functionContext, PointerGetDatum(commandText));
+	}
+	else
+	{
+		SRF_RETURN_DONE(functionContext);
+	}
+}
diff --git a/src/backend/distributed/test/shard_rebalancer.c b/src/backend/distributed/test/shard_rebalancer.c
new file mode 100644
index 000000000..a402792f0
--- /dev/null
+++ b/src/backend/distributed/test/shard_rebalancer.c
@@ -0,0 +1,628 @@
+/*-------------------------------------------------------------------------
+ *
+ * test/shard_rebalancer.c
+ *
+ * This file contains functions used for unit testing the planning part of the
+ * shard rebalancer.
+ *
+ * Copyright (c) 2014-2019, Citus Data, Inc.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+#include "libpq-fe.h"
+
+#include "safe_lib.h"
+
+#include "catalog/pg_type.h"
+#include "distributed/citus_safe_lib.h"
+#include "distributed/citus_ruleutils.h"
+#include "distributed/connection_management.h"
+#include "distributed/listutils.h"
+#include "distributed/multi_physical_planner.h"
+#include "distributed/shard_rebalancer.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "utils/builtins.h"
+#include "utils/int8.h"
+#include "utils/json.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+
+/* static declarations for json conversion */
+static List * JsonArrayToShardPlacementTestInfoList(
+	ArrayType *shardPlacementJsonArrayObject);
+static List * JsonArrayToWorkerTestInfoList(ArrayType *workerNodeJsonArrayObject);
+static bool JsonFieldValueBool(Datum jsonDocument, const char *key);
+static uint32 JsonFieldValueUInt32(Datum jsonDocument, const char *key);
+static uint64 JsonFieldValueUInt64(Datum jsonDocument, const char *key);
+static char * JsonFieldValueString(Datum jsonDocument, const char *key);
+static ArrayType * PlacementUpdateListToJsonArray(List *placementUpdateList);
+static bool ShardAllowedOnNode(uint64 shardId, WorkerNode *workerNode, void *context);
+static float NodeCapacity(WorkerNode *workerNode, void *context);
+static ShardCost GetShardCost(uint64 shardId, void *context);
+
+
+PG_FUNCTION_INFO_V1(shard_placement_rebalance_array);
+PG_FUNCTION_INFO_V1(shard_placement_replication_array);
+PG_FUNCTION_INFO_V1(worker_node_responsive);
+
+typedef struct ShardPlacementTestInfo
+{
+	ShardPlacement *placement;
+	uint64 cost;
+	bool nextColocationGroup;
+} ShardPlacementTestInfo;
+
+typedef struct WorkerTestInfo
+{
+	WorkerNode *node;
+	List *disallowedShardIds;
+	float capacity;
+} WorkerTestInfo;
+
+typedef struct RebalancePlanContext
+{
+	List *workerTestInfoList;
+	List *shardPlacementTestInfoList;
+} RebalancePlacementContext;
+
+
+/*
+ * shard_placement_rebalance_array returns a list of operations which can make a
+ * cluster consisting of given shard placements and worker nodes balanced with
+ * respect to the given threshold. Threshold is a value between 0 and 1 which
+ * determines the evenness in shard distribution. When threshold is 0, then all
+ * nodes should have equal number of shards. As threshold increases, cluster's
+ * evenness requirements decrease, and we can rebalance the cluster using less
+ * operations.
+ */
+Datum
+shard_placement_rebalance_array(PG_FUNCTION_ARGS)
+{
+	ArrayType *workerNodeJsonArray = PG_GETARG_ARRAYTYPE_P(0);
+	ArrayType *shardPlacementJsonArray = PG_GETARG_ARRAYTYPE_P(1);
+	float threshold = PG_GETARG_FLOAT4(2);
+	int32 maxShardMoves = PG_GETARG_INT32(3);
+	bool drainOnly = PG_GETARG_BOOL(4);
+
+	List *workerNodeList = NIL;
+	List *shardPlacementListList = NIL;
+	List *shardPlacementList = NIL;
+	WorkerTestInfo *workerTestInfo = NULL;
+	ShardPlacementTestInfo *shardPlacementTestInfo = NULL;
+	RebalancePlanFunctions rebalancePlanFunctions = {
+		.shardAllowedOnNode = ShardAllowedOnNode,
+		.nodeCapacity = NodeCapacity,
+		.shardCost = GetShardCost,
+	};
+	RebalancePlacementContext context = {
+		.workerTestInfoList = NULL,
+	};
+
+	context.workerTestInfoList = JsonArrayToWorkerTestInfoList(workerNodeJsonArray);
+	context.shardPlacementTestInfoList = JsonArrayToShardPlacementTestInfoList(
+		shardPlacementJsonArray);
+
+	/* we don't need original arrays any more, so we free them to save memory */
+	pfree(workerNodeJsonArray);
+	pfree(shardPlacementJsonArray);
+
+	/* map workerTestInfoList to a list of its WorkerNodes */
+	foreach_ptr(workerTestInfo, context.workerTestInfoList)
+	{
+		workerNodeList = lappend(workerNodeList, workerTestInfo->node);
+	}
+
+	/* map shardPlacementTestInfoList to a list of list of its ShardPlacements */
+	foreach_ptr(shardPlacementTestInfo, context.shardPlacementTestInfoList)
+	{
+		if (shardPlacementTestInfo->nextColocationGroup)
+		{
+			shardPlacementList = SortList(shardPlacementList, CompareShardPlacements);
+			shardPlacementListList = lappend(shardPlacementListList, shardPlacementList);
+			shardPlacementList = NIL;
+		}
+		shardPlacementList = lappend(shardPlacementList,
+									 shardPlacementTestInfo->placement);
+	}
+	shardPlacementList = SortList(shardPlacementList, CompareShardPlacements);
+	shardPlacementListList = lappend(shardPlacementListList, shardPlacementList);
+
+	rebalancePlanFunctions.context = &context;
+
+	/* sort the lists to make the function more deterministic */
+	workerNodeList = SortList(workerNodeList, CompareWorkerNodes);
+
+	List *placementUpdateList = RebalancePlacementUpdates(workerNodeList,
+														  shardPlacementListList,
+														  threshold,
+														  maxShardMoves,
+														  drainOnly,
+														  &rebalancePlanFunctions);
+	ArrayType *placementUpdateJsonArray = PlacementUpdateListToJsonArray(
+		placementUpdateList);
+
+	PG_RETURN_ARRAYTYPE_P(placementUpdateJsonArray);
+}
+
+
+/*
+ * ShardAllowedOnNode is the function that checks if shard is allowed to be on
+ * a worker when running the shard rebalancer unit tests.
+ */
+static bool
+ShardAllowedOnNode(uint64 shardId, WorkerNode *workerNode, void *voidContext)
+{
+	RebalancePlacementContext *context = voidContext;
+	WorkerTestInfo *workerTestInfo = NULL;
+	uint64 *disallowedShardIdPtr = NULL;
+	foreach_ptr(workerTestInfo, context->workerTestInfoList)
+	{
+		if (workerTestInfo->node == workerNode)
+		{
+			break;
+		}
+	}
+	Assert(workerTestInfo != NULL);
+
+	foreach_ptr(disallowedShardIdPtr, workerTestInfo->disallowedShardIds)
+	{
+		if (shardId == *disallowedShardIdPtr)
+		{
+			return false;
+		}
+	}
+	return true;
+}
+
+
+/*
+ * NodeCapacity is the function that gets the capacity of a worker when running
+ * the shard rebalancer unit tests.
+ */
+static float
+NodeCapacity(WorkerNode *workerNode, void *voidContext)
+{
+	RebalancePlacementContext *context = voidContext;
+	WorkerTestInfo *workerTestInfo = NULL;
+	foreach_ptr(workerTestInfo, context->workerTestInfoList)
+	{
+		if (workerTestInfo->node == workerNode)
+		{
+			break;
+		}
+	}
+	Assert(workerTestInfo != NULL);
+	return workerTestInfo->capacity;
+}
+
+
+/*
+ * GetShardCost is the function that gets the ShardCost of a shard when running
+ * the shard rebalancer unit tests.
+ */
+static ShardCost
+GetShardCost(uint64 shardId, void *voidContext)
+{
+	RebalancePlacementContext *context = voidContext;
+	ShardCost shardCost;
+	memset_struct_0(shardCost);
+	shardCost.shardId = shardId;
+
+	ShardPlacementTestInfo *shardPlacementTestInfo = NULL;
+	foreach_ptr(shardPlacementTestInfo, context->shardPlacementTestInfoList)
+	{
+		if (shardPlacementTestInfo->placement->shardId == shardId)
+		{
+			break;
+		}
+	}
+	Assert(shardPlacementTestInfo != NULL);
+	shardCost.cost = shardPlacementTestInfo->cost;
+	return shardCost;
+}
+
+
+/*
+ * shard_placement_replication_array returns a list of operations which will
+ * replicate under-replicated shards in a cluster consisting of given shard
+ * placements and worker nodes. A shard is under-replicated if it has less
+ * active placements than the given shard replication factor.
+ */
+Datum
+shard_placement_replication_array(PG_FUNCTION_ARGS)
+{
+	ArrayType *workerNodeJsonArray = PG_GETARG_ARRAYTYPE_P(0);
+	ArrayType *shardPlacementJsonArray = PG_GETARG_ARRAYTYPE_P(1);
+	uint32 shardReplicationFactor = PG_GETARG_INT32(2);
+
+	List *workerNodeList = NIL;
+	List *shardPlacementList = NIL;
+	WorkerTestInfo *workerTestInfo = NULL;
+	ShardPlacementTestInfo *shardPlacementTestInfo = NULL;
+
+	/* validate shard replication factor */
+	if (shardReplicationFactor < SHARD_REPLICATION_FACTOR_MINIMUM ||
+		shardReplicationFactor > SHARD_REPLICATION_FACTOR_MAXIMUM)
+	{
+		ereport(ERROR, (errmsg("invalid shard replication factor"),
+						errhint("Shard replication factor must be an integer "
+								"between %d and %d", SHARD_REPLICATION_FACTOR_MINIMUM,
+								SHARD_REPLICATION_FACTOR_MAXIMUM)));
+	}
+
+	List *workerTestInfoList = JsonArrayToWorkerTestInfoList(workerNodeJsonArray);
+	List *shardPlacementTestInfoList = JsonArrayToShardPlacementTestInfoList(
+		shardPlacementJsonArray);
+
+	/* we don't need original arrays any more, so we free them to save memory */
+	pfree(workerNodeJsonArray);
+	pfree(shardPlacementJsonArray);
+
+	foreach_ptr(workerTestInfo, workerTestInfoList)
+	{
+		workerNodeList = lappend(workerNodeList, workerTestInfo->node);
+	}
+
+	foreach_ptr(shardPlacementTestInfo, shardPlacementTestInfoList)
+	{
+		shardPlacementList = lappend(shardPlacementList,
+									 shardPlacementTestInfo->placement);
+	}
+
+	/* sort the lists to make the function more deterministic */
+	workerNodeList = SortList(workerNodeList, CompareWorkerNodes);
+	shardPlacementList = SortList(shardPlacementList, CompareShardPlacements);
+
+	List *placementUpdateList = ReplicationPlacementUpdates(workerNodeList,
+															shardPlacementList,
+															shardReplicationFactor);
+	ArrayType *placementUpdateJsonArray = PlacementUpdateListToJsonArray(
+		placementUpdateList);
+
+	PG_RETURN_ARRAYTYPE_P(placementUpdateJsonArray);
+}
+
+
+/*
+ * JsonArrayToShardPlacementTestInfoList converts the given shard placement json array
+ * to a list of ShardPlacement structs.
+ */
+static List *
+JsonArrayToShardPlacementTestInfoList(ArrayType *shardPlacementJsonArrayObject)
+{
+	List *shardPlacementTestInfoList = NIL;
+	Datum *shardPlacementJsonArray = NULL;
+	int placementCount = 0;
+
+	/*
+	 * Memory is not automatically freed when we call UDFs using DirectFunctionCall.
+	 * We call these functions in functionCallContext, so we can free the memory
+	 * once they return.
+	 */
+	MemoryContext functionCallContext = AllocSetContextCreate(CurrentMemoryContext,
+															  "Function Call Context",
+															  ALLOCSET_DEFAULT_MINSIZE,
+															  ALLOCSET_DEFAULT_INITSIZE,
+															  ALLOCSET_DEFAULT_MAXSIZE);
+
+	deconstruct_array(shardPlacementJsonArrayObject, JSONOID, -1, false, 'i',
+					  &shardPlacementJsonArray, NULL, &placementCount);
+
+	for (int placementIndex = 0; placementIndex < placementCount; placementIndex++)
+	{
+		Datum placementJson = shardPlacementJsonArray[placementIndex];
+		ShardPlacementTestInfo *placementTestInfo = palloc0(
+			sizeof(ShardPlacementTestInfo));
+
+		MemoryContext oldContext = MemoryContextSwitchTo(functionCallContext);
+
+		uint64 shardId = JsonFieldValueUInt64(placementJson, FIELD_NAME_SHARD_ID);
+		uint64 shardLength = JsonFieldValueUInt64(placementJson, FIELD_NAME_SHARD_LENGTH);
+		int shardState = JsonFieldValueUInt32(placementJson, FIELD_NAME_SHARD_STATE);
+		char *nodeName = JsonFieldValueString(placementJson, FIELD_NAME_NODE_NAME);
+		int nodePort = JsonFieldValueUInt32(placementJson, FIELD_NAME_NODE_PORT);
+		uint64 placementId = JsonFieldValueUInt64(placementJson, FIELD_NAME_PLACEMENT_ID);
+
+		MemoryContextSwitchTo(oldContext);
+
+		placementTestInfo->placement = palloc0(sizeof(ShardPlacement));
+		placementTestInfo->placement->shardId = shardId;
+		placementTestInfo->placement->shardLength = shardLength;
+		placementTestInfo->placement->shardState = shardState;
+		placementTestInfo->placement->nodeName = pstrdup(nodeName);
+		placementTestInfo->placement->nodePort = nodePort;
+		placementTestInfo->placement->placementId = placementId;
+
+		/*
+		 * We have copied whatever we needed from the UDF calls, so we can free
+		 * the memory allocated by them.
+		 */
+		MemoryContextReset(functionCallContext);
+
+
+		shardPlacementTestInfoList = lappend(shardPlacementTestInfoList,
+											 placementTestInfo);
+
+		PG_TRY();
+		{
+			placementTestInfo->cost = JsonFieldValueUInt64(placementJson,
+														   "cost");
+		}
+		PG_CATCH();
+		{
+			/* Ignore errors about not being able to find the key in that case cost is 1 */
+			FlushErrorState();
+			MemoryContextSwitchTo(oldContext);
+			placementTestInfo->cost = 1;
+		}
+		PG_END_TRY();
+
+		PG_TRY();
+		{
+			placementTestInfo->nextColocationGroup = JsonFieldValueBool(
+				placementJson, "next_colocation");
+		}
+		PG_CATCH();
+		{
+			/* Ignore errors about not being able to find the key in that case cost is 1 */
+			FlushErrorState();
+			MemoryContextSwitchTo(oldContext);
+		}
+		PG_END_TRY();
+	}
+
+	pfree(shardPlacementJsonArray);
+
+	return shardPlacementTestInfoList;
+}
+
+
+/*
+ * JsonArrayToWorkerNodeList converts the given worker node json array to a list
+ * of WorkerNode structs.
+ */
+static List *
+JsonArrayToWorkerTestInfoList(ArrayType *workerNodeJsonArrayObject)
+{
+	List *workerTestInfoList = NIL;
+	Datum *workerNodeJsonArray = NULL;
+	int workerNodeCount = 0;
+
+	deconstruct_array(workerNodeJsonArrayObject, JSONOID, -1, false, 'i',
+					  &workerNodeJsonArray, NULL, &workerNodeCount);
+
+	for (int workerNodeIndex = 0; workerNodeIndex < workerNodeCount; workerNodeIndex++)
+	{
+		Datum workerNodeJson = workerNodeJsonArray[workerNodeIndex];
+		char *workerName = JsonFieldValueString(workerNodeJson, FIELD_NAME_WORKER_NAME);
+		uint32 workerPort = JsonFieldValueUInt32(workerNodeJson,
+												 FIELD_NAME_WORKER_PORT);
+		List *disallowedShardIdList = NIL;
+		char *disallowedShardsString = NULL;
+		MemoryContext savedContext = CurrentMemoryContext;
+
+
+		WorkerTestInfo *workerTestInfo = palloc0(sizeof(WorkerTestInfo));
+		WorkerNode *workerNode = palloc0(sizeof(WorkerNode));
+		strncpy_s(workerNode->workerName, sizeof(workerNode->workerName), workerName,
+				  WORKER_LENGTH);
+		workerNode->nodeId = workerNodeIndex;
+		workerNode->workerPort = workerPort;
+		workerNode->shouldHaveShards = true;
+		workerNode->nodeRole = PrimaryNodeRoleId();
+		workerTestInfo->node = workerNode;
+
+		PG_TRY();
+		{
+			workerTestInfo->capacity = JsonFieldValueUInt64(workerNodeJson,
+															"capacity");
+		}
+		PG_CATCH();
+		{
+			/* Ignore errors about not being able to find the key in that case capacity is 1 */
+			FlushErrorState();
+			MemoryContextSwitchTo(savedContext);
+			workerTestInfo->capacity = 1;
+		}
+		PG_END_TRY();
+
+
+		workerTestInfoList = lappend(workerTestInfoList, workerTestInfo);
+		PG_TRY();
+		{
+			disallowedShardsString = JsonFieldValueString(workerNodeJson,
+														  "disallowed_shards");
+		}
+		PG_CATCH();
+		{
+			/* Ignore errors about not being able to find the key in that case all shards are allowed */
+			FlushErrorState();
+			MemoryContextSwitchTo(savedContext);
+			disallowedShardsString = NULL;
+		}
+		PG_END_TRY();
+
+		if (disallowedShardsString == NULL)
+		{
+			continue;
+		}
+
+		char *strtokPosition = NULL;
+		char *shardString = strtok_r(disallowedShardsString, ",", &strtokPosition);
+		while (shardString != NULL)
+		{
+			uint64 *shardInt = palloc0(sizeof(uint64));
+			*shardInt = SafeStringToUint64(shardString);
+			disallowedShardIdList = lappend(disallowedShardIdList, shardInt);
+			shardString = strtok_r(NULL, ",", &strtokPosition);
+		}
+		workerTestInfo->disallowedShardIds = disallowedShardIdList;
+	}
+
+	return workerTestInfoList;
+}
+
+
+/*
+ * JsonFieldValueBool gets the value of the given key in the given json
+ * document and returns it as a boolean.
+ */
+static bool
+JsonFieldValueBool(Datum jsonDocument, const char *key)
+{
+	char *valueString = JsonFieldValueString(jsonDocument, key);
+	Datum valueBoolDatum = DirectFunctionCall1(boolin, CStringGetDatum(valueString));
+
+	return DatumGetBool(valueBoolDatum);
+}
+
+
+/*
+ * JsonFieldValueUInt32 gets the value of the given key in the given json
+ * document and returns it as an unsigned 32-bit integer.
+ */
+static uint32
+JsonFieldValueUInt32(Datum jsonDocument, const char *key)
+{
+	char *valueString = JsonFieldValueString(jsonDocument, key);
+	Datum valueInt4Datum = DirectFunctionCall1(int4in, CStringGetDatum(valueString));
+
+	uint32 valueUInt32 = DatumGetInt32(valueInt4Datum);
+	return valueUInt32;
+}
+
+
+/*
+ * JsonFieldValueUInt64 gets the value of the given key in the given json
+ * document and returns it as an unsigned 64-bit integer.
+ */
+static uint64
+JsonFieldValueUInt64(Datum jsonDocument, const char *key)
+{
+	char *valueString = JsonFieldValueString(jsonDocument, key);
+	Datum valueInt8Datum = DirectFunctionCall1(int8in, CStringGetDatum(valueString));
+
+	uint64 valueUInt64 = DatumGetInt64(valueInt8Datum);
+	return valueUInt64;
+}
+
+
+/*
+ * JsonFieldValueString gets the value of the given key in the given json
+ * document and returns it as a string.
+ */
+static char *
+JsonFieldValueString(Datum jsonDocument, const char *key)
+{
+	Datum valueTextDatum = 0;
+	bool valueFetched = false;
+	Datum keyDatum = PointerGetDatum(cstring_to_text(key));
+
+	/*
+	 * json_object_field_text can return NULL, but DirectFunctionalCall2 raises
+	 * cryptic errors when the function returns NULL. We catch this error and
+	 * raise a more meaningful error.
+	 */
+	PG_TRY();
+	{
+		valueTextDatum = DirectFunctionCall2(json_object_field_text,
+											 jsonDocument, keyDatum);
+		valueFetched = true;
+	}
+	PG_CATCH();
+	{
+		FlushErrorState();
+		valueFetched = false;
+	}
+	PG_END_TRY();
+
+	if (!valueFetched)
+	{
+		ereport(ERROR, (errmsg("could not get value for '%s'", key)));
+	}
+
+	char *valueString = text_to_cstring(DatumGetTextP(valueTextDatum));
+	return valueString;
+}
+
+
+/*
+ * PlacementUpdateListToJsonArray converts the given list of placement update
+ * data to a json array.
+ */
+static ArrayType *
+PlacementUpdateListToJsonArray(List *placementUpdateList)
+{
+	ListCell *placementUpdateCell = NULL;
+	int placementUpdateIndex = 0;
+
+	int placementUpdateCount = list_length(placementUpdateList);
+	Datum *placementUpdateJsonArray = palloc0(placementUpdateCount * sizeof(Datum));
+
+	foreach(placementUpdateCell, placementUpdateList)
+	{
+		PlacementUpdateEvent *placementUpdateEvent = lfirst(placementUpdateCell);
+		WorkerNode *sourceNode = placementUpdateEvent->sourceNode;
+		WorkerNode *targetNode = placementUpdateEvent->targetNode;
+
+		StringInfo escapedSourceName = makeStringInfo();
+		escape_json(escapedSourceName, sourceNode->workerName);
+
+		StringInfo escapedTargetName = makeStringInfo();
+		escape_json(escapedTargetName, targetNode->workerName);
+
+		StringInfo placementUpdateJsonString = makeStringInfo();
+		appendStringInfo(placementUpdateJsonString, PLACEMENT_UPDATE_JSON_FORMAT,
+						 placementUpdateEvent->updateType, placementUpdateEvent->shardId,
+						 escapedSourceName->data, sourceNode->workerPort,
+						 escapedTargetName->data, targetNode->workerPort);
+
+		Datum placementUpdateStringDatum = CStringGetDatum(
+			placementUpdateJsonString->data);
+		Datum placementUpdateJsonDatum = DirectFunctionCall1(json_in,
+															 placementUpdateStringDatum);
+
+		placementUpdateJsonArray[placementUpdateIndex] = placementUpdateJsonDatum;
+		placementUpdateIndex++;
+	}
+
+	ArrayType *placementUpdateObject = construct_array(placementUpdateJsonArray,
+													   placementUpdateCount, JSONOID,
+													   -1, false, 'i');
+
+	return placementUpdateObject;
+}
+
+
+/*
+ * worker_node_responsive returns true if the given worker node is responsive.
+ * Otherwise, it returns false.
+ */
+Datum
+worker_node_responsive(PG_FUNCTION_ARGS)
+{
+	text *workerNameText = PG_GETARG_TEXT_PP(0);
+	uint32 workerPort = PG_GETARG_INT32(1);
+	int connectionFlag = FORCE_NEW_CONNECTION;
+
+	bool workerNodeResponsive = false;
+	const char *workerName = text_to_cstring(workerNameText);
+
+	MultiConnection *connection = GetNodeConnection(connectionFlag, workerName,
+													workerPort);
+
+	if (connection != NULL && connection->pgConn != NULL)
+	{
+		if (PQstatus(connection->pgConn) == CONNECTION_OK)
+		{
+			workerNodeResponsive = true;
+		}
+
+		CloseConnection(connection);
+	}
+
+	PG_RETURN_BOOL(workerNodeResponsive);
+}
diff --git a/src/backend/distributed/utils/hash_helpers.c b/src/backend/distributed/utils/hash_helpers.c
index 0ed090dca..6bbf14938 100644
--- a/src/backend/distributed/utils/hash_helpers.c
+++ b/src/backend/distributed/utils/hash_helpers.c
@@ -32,3 +32,18 @@ hash_delete_all(HTAB *htab)
 		Assert(found);
 	}
 }
+
+
+/*
+ * foreach_htab_cleanup cleans up the hash iteration state after the iteration
+ * is done. This is only needed when break statements are present in the
+ * foreach block.
+ */
+void
+foreach_htab_cleanup(void *var, HASH_SEQ_STATUS *status)
+{
+	if ((var) != NULL)
+	{
+		hash_seq_term(status);
+	}
+}
diff --git a/src/backend/distributed/utils/maintenanced.c b/src/backend/distributed/utils/maintenanced.c
index 74ac7fbe5..9b329de12 100644
--- a/src/backend/distributed/utils/maintenanced.c
+++ b/src/backend/distributed/utils/maintenanced.c
@@ -38,6 +38,7 @@
 #include "distributed/coordinator_protocol.h"
 #include "distributed/metadata_cache.h"
 #include "distributed/metadata_sync.h"
+#include "distributed/shard_cleaner.h"
 #include "distributed/statistics_collection.h"
 #include "distributed/transaction_recovery.h"
 #include "distributed/version_compat.h"
@@ -92,6 +93,7 @@ typedef struct MaintenanceDaemonDBData
 /* config variable for distributed deadlock detection timeout */
 double DistributedDeadlockDetectionTimeoutFactor = 2.0;
 int Recover2PCInterval = 60000;
+int DeferShardDeleteInterval = 60000;
 
 /* config variables for metadata sync timeout */
 int MetadataSyncInterval = 60000;
@@ -289,6 +291,7 @@ CitusMaintenanceDaemonMain(Datum main_arg)
 	bool retryStatsCollection USED_WITH_LIBCURL_ONLY = false;
 	ErrorContextCallback errorCallback;
 	TimestampTz lastRecoveryTime = 0;
+	TimestampTz lastShardCleanTime = 0;
 	TimestampTz nextMetadataSyncTime = 0;
 
 	/*
@@ -586,6 +589,45 @@ CitusMaintenanceDaemonMain(Datum main_arg)
 			timeout = Min(timeout, deadlockTimeout);
 		}
 
+		if (!RecoveryInProgress() && DeferShardDeleteInterval > 0 &&
+			TimestampDifferenceExceeds(lastShardCleanTime, GetCurrentTimestamp(),
+									   DeferShardDeleteInterval))
+		{
+			int numberOfDroppedShards = 0;
+
+			InvalidateMetadataSystemCache();
+			StartTransactionCommand();
+
+			if (!LockCitusExtension())
+			{
+				ereport(DEBUG1, (errmsg(
+									 "could not lock the citus extension, skipping shard cleaning")));
+			}
+			else if (CheckCitusVersion(DEBUG1) && CitusHasBeenLoaded())
+			{
+				/*
+				 * Record last shard clean time at start to ensure we run once per
+				 * DeferShardDeleteInterval.
+				 */
+				lastShardCleanTime = GetCurrentTimestamp();
+
+				numberOfDroppedShards = TryDropMarkedShards();
+			}
+
+			CommitTransactionCommand();
+
+			if (numberOfDroppedShards > 0)
+			{
+				ereport(LOG, (errmsg("maintenance daemon dropped %d distributed "
+									 "shards previously marked to be removed",
+									 numberOfDroppedShards)));
+			}
+
+			/* make sure we don't wait too long */
+			timeout = Min(timeout, DeferShardDeleteInterval);
+		}
+
+
 		/*
 		 * Wait until timeout, or until somebody wakes us up. Also cast the timeout to
 		 * integer where we've calculated it using double for not losing the precision.
diff --git a/src/include/distributed/coordinator_protocol.h b/src/include/distributed/coordinator_protocol.h
index f871db5b2..2eb955564 100644
--- a/src/include/distributed/coordinator_protocol.h
+++ b/src/include/distributed/coordinator_protocol.h
@@ -253,6 +253,11 @@ extern ShardPlacement * SearchShardPlacementInList(List *shardPlacementList,
 extern ShardPlacement * SearchShardPlacementInListOrError(List *shardPlacementList,
 														  const char *nodeName,
 														  uint32 nodePort);
+extern void ErrorIfMoveCitusLocalTable(Oid relationId);
 extern char LookupShardTransferMode(Oid shardReplicationModeOid);
+extern void BlockWritesToShardList(List *shardList);
+extern List * WorkerApplyShardDDLCommandList(List *ddlCommandList, int64 shardId);
+extern List * GetForeignConstraintCommandsToReferenceTable(ShardInterval *shardInterval);
+
 
 #endif   /* COORDINATOR_PROTOCOL_H */
diff --git a/src/include/distributed/hash_helpers.h b/src/include/distributed/hash_helpers.h
index b25cd1cd8..5d329e423 100644
--- a/src/include/distributed/hash_helpers.h
+++ b/src/include/distributed/hash_helpers.h
@@ -48,4 +48,6 @@ extern void hash_delete_all(HTAB *htab);
 		 (var) != NULL; \
 		 (var) = hash_seq_search(status))
 
+extern void foreach_htab_cleanup(void *var, HASH_SEQ_STATUS *status);
+
 #endif
diff --git a/src/include/distributed/metadata_cache.h b/src/include/distributed/metadata_cache.h
index da5d45d84..ed2790654 100644
--- a/src/include/distributed/metadata_cache.h
+++ b/src/include/distributed/metadata_cache.h
@@ -159,6 +159,7 @@ extern int32 GetLocalGroupId(void);
 extern void CitusTableCacheFlushInvalidatedEntries(void);
 extern Oid LookupShardRelationFromCatalog(int64 shardId, bool missing_ok);
 extern List * ShardPlacementList(uint64 shardId);
+extern bool ShardExists(int64 shardId);
 extern void CitusInvalidateRelcacheByRelid(Oid relationId);
 extern void CitusInvalidateRelcacheByShardId(int64 shardId);
 extern void InvalidateForeignKeyGraph(void);
@@ -210,6 +211,7 @@ extern Oid DistPartitionRelationId(void);
 extern Oid DistShardRelationId(void);
 extern Oid DistPlacementRelationId(void);
 extern Oid DistNodeRelationId(void);
+extern Oid DistRebalanceStrategyRelationId(void);
 extern Oid DistLocalGroupIdRelationId(void);
 extern Oid DistObjectRelationId(void);
 extern Oid DistEnabledCustomAggregatesId(void);
diff --git a/src/include/distributed/metadata_utility.h b/src/include/distributed/metadata_utility.h
index 32535421d..fab9d2f0d 100644
--- a/src/include/distributed/metadata_utility.h
+++ b/src/include/distributed/metadata_utility.h
@@ -110,6 +110,7 @@ extern List * ActiveShardPlacementList(uint64 shardId);
 extern ShardPlacement * ActiveShardPlacement(uint64 shardId, bool missingOk);
 extern List * BuildShardPlacementList(ShardInterval *shardInterval);
 extern List * AllShardPlacementsOnNodeGroup(int32 groupId);
+extern List * AllShardPlacementsWithShardPlacementState(ShardState shardState);
 extern List * GroupShardPlacementsForTableOnGroup(Oid relationId, int32 groupId);
 extern StringInfo GenerateSizeQueryOnMultiplePlacements(List *shardIntervalList,
 														char *sizeQuery);
diff --git a/src/include/distributed/shard_cleaner.h b/src/include/distributed/shard_cleaner.h
new file mode 100644
index 000000000..caa739d7e
--- /dev/null
+++ b/src/include/distributed/shard_cleaner.h
@@ -0,0 +1,20 @@
+/*-------------------------------------------------------------------------
+ *
+ * shard_cleaner.h
+ *	  Type and function declarations used in background shard cleaning
+ *
+ * Copyright (c) 2018, Citus Data, Inc.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef CITUS_SHARD_CLEANER_H
+#define CITUS_SHARD_CLEANER_H
+
+/* GUC to configure deferred shard deletion */
+extern int DeferShardDeleteInterval;
+extern bool DeferShardDeleteOnMove;
+
+extern int TryDropMarkedShards(void);
+
+#endif /*CITUS_SHARD_CLEANER_H */
diff --git a/src/include/distributed/shard_rebalancer.h b/src/include/distributed/shard_rebalancer.h
new file mode 100644
index 000000000..7e0716cb5
--- /dev/null
+++ b/src/include/distributed/shard_rebalancer.h
@@ -0,0 +1,159 @@
+/*-------------------------------------------------------------------------
+ *
+ * shard_rebalancer.h
+ *
+ * Type and function declarations for the shard rebalancer tool.
+ *
+ * Copyright (c), Citus Data, Inc.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef SHARD_REBALANCER_H
+#define SHARD_REBALANCER_H
+
+#include "postgres.h"
+
+#include "fmgr.h"
+#include "nodes/pg_list.h"
+#include "distributed/coordinator_protocol.h"
+#include "distributed/worker_manager.h"
+
+
+/* Limits for function parameters */
+#define SHARD_REPLICATION_FACTOR_MINIMUM 1
+#define SHARD_REPLICATION_FACTOR_MAXIMUM 100
+
+/* Definitions for metadata update commands */
+#define INSERT_SHARD_PLACEMENT_COMMAND "INSERT INTO pg_dist_shard_placement VALUES(" \
+	UINT64_FORMAT ", %d, " UINT64_FORMAT ", '%s', %d)"
+#define DELETE_SHARD_PLACEMENT_COMMAND "DELETE FROM pg_dist_shard_placement WHERE " \
+									   "shardid=" UINT64_FORMAT \
+	" AND nodename='%s' AND nodeport=%d"
+
+/*
+ * Definitions for shard placement json field names. These names should match
+ * the column names in pg_dist_shard_placement.
+ */
+#define FIELD_NAME_SHARD_ID "shardid"
+#define FIELD_NAME_SHARD_LENGTH "shardlength"
+#define FIELD_NAME_SHARD_STATE "shardstate"
+#define FIELD_NAME_NODE_NAME "nodename"
+#define FIELD_NAME_NODE_PORT "nodeport"
+#define FIELD_NAME_PLACEMENT_ID "placementid"
+
+/*
+ * Definitions for worker node json field names. These names should match the
+ * column names in master_get_active_worker_nodes().
+ */
+#define FIELD_NAME_WORKER_NAME "node_name"
+#define FIELD_NAME_WORKER_PORT "node_port"
+
+/* Definitions for placement update json field names */
+#define FIELD_NAME_UPDATE_TYPE "updatetype"
+#define FIELD_NAME_SOURCE_NAME "sourcename"
+#define FIELD_NAME_SOURCE_PORT "sourceport"
+#define FIELD_NAME_TARGET_NAME "targetname"
+#define FIELD_NAME_TARGET_PORT "targetport"
+
+/* *INDENT-OFF* */
+/* Definition for format of placement update json document */
+#define PLACEMENT_UPDATE_JSON_FORMAT \
+"{"\
+   "\"" FIELD_NAME_UPDATE_TYPE "\":%d,"\
+   "\"" FIELD_NAME_SHARD_ID "\":" UINT64_FORMAT ","\
+   "\"" FIELD_NAME_SOURCE_NAME "\":%s,"\
+   "\"" FIELD_NAME_SOURCE_PORT "\":%d,"\
+   "\"" FIELD_NAME_TARGET_NAME "\":%s,"\
+   "\"" FIELD_NAME_TARGET_PORT "\":%d"\
+"}"
+
+/* *INDENT-ON* */
+
+#define REBALANCE_ACTIVITY_MAGIC_NUMBER 1337
+#define REBALANCE_PROGRESS_ERROR -1
+#define REBALANCE_PROGRESS_WAITING 0
+#define REBALANCE_PROGRESS_MOVING 1
+#define REBALANCE_PROGRESS_MOVED 2
+
+/* Enumeration that defines different placement update types */
+typedef enum
+{
+	PLACEMENT_UPDATE_INVALID_FIRST = 0,
+	PLACEMENT_UPDATE_MOVE = 1,
+	PLACEMENT_UPDATE_COPY = 2
+} PlacementUpdateType;
+
+
+/*
+ * PlacementUpdateEvent represents a logical unit of work that copies or
+ * moves a shard placement.
+ */
+typedef struct PlacementUpdateEvent
+{
+	PlacementUpdateType updateType;
+	uint64 shardId;
+	WorkerNode *sourceNode;
+	WorkerNode *targetNode;
+} PlacementUpdateEvent;
+
+
+typedef struct PlacementUpdateEventProgress
+{
+	uint64 shardId;
+	char sourceName[255];
+	int sourcePort;
+	char targetName[255];
+	int targetPort;
+	uint64 shardSize;
+	uint64 progress;
+} PlacementUpdateEventProgress;
+
+typedef struct NodeFillState
+{
+	WorkerNode *node;
+	float4 capacity;
+	float4 totalCost;
+	float4 utilization;
+	List *shardCostListDesc;
+} NodeFillState;
+
+typedef struct ShardCost
+{
+	uint64 shardId;
+	float4 cost;
+} ShardCost;
+
+typedef struct DisallowedPlacement
+{
+	ShardCost *shardCost;
+	NodeFillState *fillState;
+} DisallowedPlacement;
+
+typedef struct RebalancePlanFunctions
+{
+	bool (*shardAllowedOnNode)(uint64 shardId, WorkerNode *workerNode, void *context);
+	float4 (*nodeCapacity)(WorkerNode *workerNode, void *context);
+	ShardCost (*shardCost)(uint64 shardId, void *context);
+	void *context;
+} RebalancePlanFunctions;
+
+/* External function declarations */
+extern Datum shard_placement_rebalance_array(PG_FUNCTION_ARGS);
+extern Datum shard_placement_replication_array(PG_FUNCTION_ARGS);
+extern Datum worker_node_responsive(PG_FUNCTION_ARGS);
+extern Datum update_shard_placement(PG_FUNCTION_ARGS);
+extern Datum init_rebalance_monitor(PG_FUNCTION_ARGS);
+extern Datum finalize_rebalance_monitor(PG_FUNCTION_ARGS);
+extern Datum get_rebalance_progress(PG_FUNCTION_ARGS);
+
+extern List * RebalancePlacementUpdates(List *workerNodeList, List *shardPlacementList,
+										double threshold,
+										int32 maxShardMoves,
+										bool drainOnly,
+										RebalancePlanFunctions *rebalancePlanFunctions);
+extern List * ReplicationPlacementUpdates(List *workerNodeList, List *shardPlacementList,
+										  int shardReplicationFactor);
+
+
+#endif   /* SHARD_REBALANCER_H */
diff --git a/src/test/regress/Makefile b/src/test/regress/Makefile
index c9c171afc..d259da950 100644
--- a/src/test/regress/Makefile
+++ b/src/test/regress/Makefile
@@ -42,7 +42,7 @@ output_files := $(patsubst $(citus_abs_srcdir)/output/%.source,expected/%.out, $
 # intermediate, for muscle memory backward compatibility.
 check: check-full
 # check-full triggers all tests that ought to be run routinely
-check-full: check-multi check-multi-mx check-worker check-follower-cluster check-failure
+check-full: check-multi check-multi-mx check-worker check-operations check-follower-cluster check-failure
 
 
 ISOLATION_DEPDIR=.deps/isolation
@@ -161,6 +161,10 @@ check-follower-cluster: all
 	$(pg_regress_multi_check) --load-extension=citus --follower-cluster \
 	-- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/multi_follower_schedule $(EXTRA_TESTS)
 
+check-operations: all
+	$(pg_regress_multi_check) --load-extension=citus \
+	-- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/operations_schedule $(EXTRA_TESTS)
+
 check-columnar:
 	$(pg_regress_multi_check) --load-extension=citus \
 	-- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/columnar_am_schedule $(EXTRA_TESTS)
diff --git a/src/test/regress/expected/foreign_key_to_reference_shard_rebalance.out b/src/test/regress/expected/foreign_key_to_reference_shard_rebalance.out
new file mode 100644
index 000000000..7bffe0b6f
--- /dev/null
+++ b/src/test/regress/expected/foreign_key_to_reference_shard_rebalance.out
@@ -0,0 +1,193 @@
+--
+-- FOREIGN_KEY_TO_REFERENCE_SHARD_REBALANCE
+--
+SET citus.next_shard_id TO 15000000;
+CREATE SCHEMA fkey_to_reference_shard_rebalance;
+SET search_path to fkey_to_reference_shard_rebalance;
+SET citus.shard_replication_factor TO 1;
+SET citus.shard_count to 8;
+CREATE TYPE foreign_details AS (name text, relid text, refd_relid text);
+CREATE VIEW table_fkeys_in_workers AS
+SELECT
+(json_populate_record(NULL::foreign_details,
+  json_array_elements_text((run_command_on_workers( $$
+    SELECT
+      COALESCE(json_agg(row_to_json(d)), '[]'::json)
+    FROM
+      (
+        SELECT
+          distinct name,
+          relid::regclass::text,
+          refd_relid::regclass::text
+        FROM
+          table_fkey_cols
+      )
+      d $$ )).RESULT::json )::json )).* ;
+-- check if master_move_shard_placement with logical replication creates the
+-- foreign constraints properly after moving the shard
+CREATE TABLE referenced_table(test_column int, test_column2 int UNIQUE, PRIMARY KEY(test_column));
+CREATE TABLE referencing_table(id int PRIMARY KEY, ref_id int, FOREIGN KEY (id) REFERENCES referenced_table(test_column) ON DELETE CASCADE);
+CREATE TABLE referencing_table2(id int, ref_id int, FOREIGN KEY (ref_id) REFERENCES referenced_table(test_column2) ON DELETE CASCADE, FOREIGN KEY (id) REFERENCES referencing_table(id) ON DELETE CASCADE);
+SELECT create_reference_table('referenced_table');
+ create_reference_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT create_distributed_table('referencing_table', 'id');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT create_distributed_table('referencing_table2', 'id');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+INSERT INTO referenced_table SELECT i,i FROM generate_series (0, 100) i;
+INSERT INTO referencing_table SELECT i,i FROM generate_series (0, 100) i;
+INSERT INTO referencing_table2 SELECT i,i FROM generate_series (0, 100) i;
+SELECT master_move_shard_placement(15000009, 'localhost', :worker_1_port, 'localhost', :worker_2_port);
+ master_move_shard_placement
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT count(*) FROM referencing_table2;
+ count
+---------------------------------------------------------------------
+   101
+(1 row)
+
+SELECT * FROM table_fkeys_in_workers WHERE relid LIKE 'fkey_to_reference_shard_rebalance.%' AND refd_relid LIKE 'fkey_to_reference_shard_rebalance.%' ORDER BY 1,2,3;
+                  name                   |                             relid                             |                          refd_relid
+---------------------------------------------------------------------
+ referencing_table2_id_fkey_15000009     | fkey_to_reference_shard_rebalance.referencing_table2_15000009 | fkey_to_reference_shard_rebalance.referencing_table_15000001
+ referencing_table2_id_fkey_15000010     | fkey_to_reference_shard_rebalance.referencing_table2_15000010 | fkey_to_reference_shard_rebalance.referencing_table_15000002
+ referencing_table2_id_fkey_15000011     | fkey_to_reference_shard_rebalance.referencing_table2_15000011 | fkey_to_reference_shard_rebalance.referencing_table_15000003
+ referencing_table2_id_fkey_15000012     | fkey_to_reference_shard_rebalance.referencing_table2_15000012 | fkey_to_reference_shard_rebalance.referencing_table_15000004
+ referencing_table2_id_fkey_15000013     | fkey_to_reference_shard_rebalance.referencing_table2_15000013 | fkey_to_reference_shard_rebalance.referencing_table_15000005
+ referencing_table2_id_fkey_15000014     | fkey_to_reference_shard_rebalance.referencing_table2_15000014 | fkey_to_reference_shard_rebalance.referencing_table_15000006
+ referencing_table2_id_fkey_15000015     | fkey_to_reference_shard_rebalance.referencing_table2_15000015 | fkey_to_reference_shard_rebalance.referencing_table_15000007
+ referencing_table2_id_fkey_15000016     | fkey_to_reference_shard_rebalance.referencing_table2_15000016 | fkey_to_reference_shard_rebalance.referencing_table_15000008
+ referencing_table2_ref_id_fkey_15000009 | fkey_to_reference_shard_rebalance.referencing_table2_15000009 | fkey_to_reference_shard_rebalance.referenced_table_15000000
+ referencing_table2_ref_id_fkey_15000010 | fkey_to_reference_shard_rebalance.referencing_table2_15000010 | fkey_to_reference_shard_rebalance.referenced_table_15000000
+ referencing_table2_ref_id_fkey_15000011 | fkey_to_reference_shard_rebalance.referencing_table2_15000011 | fkey_to_reference_shard_rebalance.referenced_table_15000000
+ referencing_table2_ref_id_fkey_15000012 | fkey_to_reference_shard_rebalance.referencing_table2_15000012 | fkey_to_reference_shard_rebalance.referenced_table_15000000
+ referencing_table2_ref_id_fkey_15000013 | fkey_to_reference_shard_rebalance.referencing_table2_15000013 | fkey_to_reference_shard_rebalance.referenced_table_15000000
+ referencing_table2_ref_id_fkey_15000014 | fkey_to_reference_shard_rebalance.referencing_table2_15000014 | fkey_to_reference_shard_rebalance.referenced_table_15000000
+ referencing_table2_ref_id_fkey_15000015 | fkey_to_reference_shard_rebalance.referencing_table2_15000015 | fkey_to_reference_shard_rebalance.referenced_table_15000000
+ referencing_table2_ref_id_fkey_15000016 | fkey_to_reference_shard_rebalance.referencing_table2_15000016 | fkey_to_reference_shard_rebalance.referenced_table_15000000
+ referencing_table_id_fkey_15000001      | fkey_to_reference_shard_rebalance.referencing_table_15000001  | fkey_to_reference_shard_rebalance.referenced_table_15000000
+ referencing_table_id_fkey_15000002      | fkey_to_reference_shard_rebalance.referencing_table_15000002  | fkey_to_reference_shard_rebalance.referenced_table_15000000
+ referencing_table_id_fkey_15000003      | fkey_to_reference_shard_rebalance.referencing_table_15000003  | fkey_to_reference_shard_rebalance.referenced_table_15000000
+ referencing_table_id_fkey_15000004      | fkey_to_reference_shard_rebalance.referencing_table_15000004  | fkey_to_reference_shard_rebalance.referenced_table_15000000
+ referencing_table_id_fkey_15000005      | fkey_to_reference_shard_rebalance.referencing_table_15000005  | fkey_to_reference_shard_rebalance.referenced_table_15000000
+ referencing_table_id_fkey_15000006      | fkey_to_reference_shard_rebalance.referencing_table_15000006  | fkey_to_reference_shard_rebalance.referenced_table_15000000
+ referencing_table_id_fkey_15000007      | fkey_to_reference_shard_rebalance.referencing_table_15000007  | fkey_to_reference_shard_rebalance.referenced_table_15000000
+ referencing_table_id_fkey_15000008      | fkey_to_reference_shard_rebalance.referencing_table_15000008  | fkey_to_reference_shard_rebalance.referenced_table_15000000
+(24 rows)
+
+SELECT master_move_shard_placement(15000009, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'block_writes');
+ master_move_shard_placement
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT count(*) FROM referencing_table2;
+ count
+---------------------------------------------------------------------
+   101
+(1 row)
+
+SELECT * FROM table_fkeys_in_workers WHERE relid LIKE 'fkey_to_reference_shard_rebalance.%' AND refd_relid LIKE 'fkey_to_reference_shard_rebalance.%' ORDER BY 1,2,3;
+                  name                   |                             relid                             |                          refd_relid
+---------------------------------------------------------------------
+ referencing_table2_id_fkey_15000009     | fkey_to_reference_shard_rebalance.referencing_table2_15000009 | fkey_to_reference_shard_rebalance.referencing_table_15000001
+ referencing_table2_id_fkey_15000010     | fkey_to_reference_shard_rebalance.referencing_table2_15000010 | fkey_to_reference_shard_rebalance.referencing_table_15000002
+ referencing_table2_id_fkey_15000011     | fkey_to_reference_shard_rebalance.referencing_table2_15000011 | fkey_to_reference_shard_rebalance.referencing_table_15000003
+ referencing_table2_id_fkey_15000012     | fkey_to_reference_shard_rebalance.referencing_table2_15000012 | fkey_to_reference_shard_rebalance.referencing_table_15000004
+ referencing_table2_id_fkey_15000013     | fkey_to_reference_shard_rebalance.referencing_table2_15000013 | fkey_to_reference_shard_rebalance.referencing_table_15000005
+ referencing_table2_id_fkey_15000014     | fkey_to_reference_shard_rebalance.referencing_table2_15000014 | fkey_to_reference_shard_rebalance.referencing_table_15000006
+ referencing_table2_id_fkey_15000015     | fkey_to_reference_shard_rebalance.referencing_table2_15000015 | fkey_to_reference_shard_rebalance.referencing_table_15000007
+ referencing_table2_id_fkey_15000016     | fkey_to_reference_shard_rebalance.referencing_table2_15000016 | fkey_to_reference_shard_rebalance.referencing_table_15000008
+ referencing_table2_ref_id_fkey_15000009 | fkey_to_reference_shard_rebalance.referencing_table2_15000009 | fkey_to_reference_shard_rebalance.referenced_table_15000000
+ referencing_table2_ref_id_fkey_15000010 | fkey_to_reference_shard_rebalance.referencing_table2_15000010 | fkey_to_reference_shard_rebalance.referenced_table_15000000
+ referencing_table2_ref_id_fkey_15000011 | fkey_to_reference_shard_rebalance.referencing_table2_15000011 | fkey_to_reference_shard_rebalance.referenced_table_15000000
+ referencing_table2_ref_id_fkey_15000012 | fkey_to_reference_shard_rebalance.referencing_table2_15000012 | fkey_to_reference_shard_rebalance.referenced_table_15000000
+ referencing_table2_ref_id_fkey_15000013 | fkey_to_reference_shard_rebalance.referencing_table2_15000013 | fkey_to_reference_shard_rebalance.referenced_table_15000000
+ referencing_table2_ref_id_fkey_15000014 | fkey_to_reference_shard_rebalance.referencing_table2_15000014 | fkey_to_reference_shard_rebalance.referenced_table_15000000
+ referencing_table2_ref_id_fkey_15000015 | fkey_to_reference_shard_rebalance.referencing_table2_15000015 | fkey_to_reference_shard_rebalance.referenced_table_15000000
+ referencing_table2_ref_id_fkey_15000016 | fkey_to_reference_shard_rebalance.referencing_table2_15000016 | fkey_to_reference_shard_rebalance.referenced_table_15000000
+ referencing_table_id_fkey_15000001      | fkey_to_reference_shard_rebalance.referencing_table_15000001  | fkey_to_reference_shard_rebalance.referenced_table_15000000
+ referencing_table_id_fkey_15000002      | fkey_to_reference_shard_rebalance.referencing_table_15000002  | fkey_to_reference_shard_rebalance.referenced_table_15000000
+ referencing_table_id_fkey_15000003      | fkey_to_reference_shard_rebalance.referencing_table_15000003  | fkey_to_reference_shard_rebalance.referenced_table_15000000
+ referencing_table_id_fkey_15000004      | fkey_to_reference_shard_rebalance.referencing_table_15000004  | fkey_to_reference_shard_rebalance.referenced_table_15000000
+ referencing_table_id_fkey_15000005      | fkey_to_reference_shard_rebalance.referencing_table_15000005  | fkey_to_reference_shard_rebalance.referenced_table_15000000
+ referencing_table_id_fkey_15000006      | fkey_to_reference_shard_rebalance.referencing_table_15000006  | fkey_to_reference_shard_rebalance.referenced_table_15000000
+ referencing_table_id_fkey_15000007      | fkey_to_reference_shard_rebalance.referencing_table_15000007  | fkey_to_reference_shard_rebalance.referenced_table_15000000
+ referencing_table_id_fkey_15000008      | fkey_to_reference_shard_rebalance.referencing_table_15000008  | fkey_to_reference_shard_rebalance.referenced_table_15000000
+(24 rows)
+
+-- create a function to show the
+CREATE FUNCTION get_foreign_key_to_reference_table_commands(Oid)
+    RETURNS SETOF text
+    LANGUAGE C STABLE STRICT
+    AS 'citus', $$get_foreign_key_to_reference_table_commands$$;
+CREATE TABLE reference_table_commands (id int UNIQUE);
+CREATE TABLE referenceing_dist_table (id int, col1 int, col2 int, col3 int);
+SELECT create_reference_table('reference_table_commands');
+ create_reference_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT create_distributed_table('referenceing_dist_table', 'id');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+ALTER TABLE referenceing_dist_table ADD CONSTRAINT c1 FOREIGN KEY (col1) REFERENCES reference_table_commands(id) ON UPDATE CASCADE;
+ALTER TABLE referenceing_dist_table ADD CONSTRAINT c2 FOREIGN KEY (col2) REFERENCES reference_table_commands(id) ON UPDATE CASCADE NOT VALID;
+ALTER TABLE referenceing_dist_table ADD CONSTRAINT very_very_very_very_very_very_very_very_very_very_very_very_very_long FOREIGN KEY (col3) REFERENCES reference_table_commands(id) ON UPDATE CASCADE;
+NOTICE:  identifier "very_very_very_very_very_very_very_very_very_very_very_very_very_long" will be truncated to "very_very_very_very_very_very_very_very_very_very_very_very_ver"
+SELECT * FROM get_foreign_key_to_reference_table_commands('referenceing_dist_table'::regclass);
+                                                                                                                                                                                         get_foreign_key_to_reference_table_commands
+---------------------------------------------------------------------
+ SELECT worker_apply_inter_shard_ddl_command (15000018, 'fkey_to_reference_shard_rebalance', 15000017, 'fkey_to_reference_shard_rebalance', 'ALTER TABLE fkey_to_reference_shard_rebalance.referenceing_dist_table ADD CONSTRAINT c1 FOREIGN KEY (col1) REFERENCES fkey_to_reference_shard_rebalance.reference_table_commands(id) ON UPDATE CASCADE NOT VALID')
+ UPDATE pg_constraint SET convalidated = true WHERE conrelid = 'fkey_to_reference_shard_rebalance.referenceing_dist_table_15000018'::regclass AND conname = 'c1_15000018'
+ SELECT worker_apply_inter_shard_ddl_command (15000018, 'fkey_to_reference_shard_rebalance', 15000017, 'fkey_to_reference_shard_rebalance', 'ALTER TABLE fkey_to_reference_shard_rebalance.referenceing_dist_table ADD CONSTRAINT c2 FOREIGN KEY (col2) REFERENCES fkey_to_reference_shard_rebalance.reference_table_commands(id) ON UPDATE CASCADE NOT VALID')
+ SELECT worker_apply_inter_shard_ddl_command (15000018, 'fkey_to_reference_shard_rebalance', 15000017, 'fkey_to_reference_shard_rebalance', 'ALTER TABLE fkey_to_reference_shard_rebalance.referenceing_dist_table ADD CONSTRAINT very_very_very_very_very_very_very_very_very_very_very_very_ver FOREIGN KEY (col3) REFERENCES fkey_to_reference_shard_rebalance.reference_table_commands(id) ON UPDATE CASCADE NOT VALID')
+ UPDATE pg_constraint SET convalidated = true WHERE conrelid = 'fkey_to_reference_shard_rebalance.referenceing_dist_table_15000018'::regclass AND conname = 'very_very_very_very_very_very_very_very_very__754e8716_15000018'
+(5 rows)
+
+-- and show that rebalancer works fine
+SELECT master_move_shard_placement(15000018, 'localhost', :worker_1_port, 'localhost', :worker_2_port);
+ master_move_shard_placement
+---------------------------------------------------------------------
+
+(1 row)
+
+\c - - - :worker_2_port
+SELECT conname, contype, convalidated FROM pg_constraint WHERE conrelid = 'fkey_to_reference_shard_rebalance.referenceing_dist_table_15000018'::regclass ORDER BY 1;
+                             conname                             | contype | convalidated
+---------------------------------------------------------------------
+ c1_15000018                                                     | f       | t
+ c2_15000018                                                     | f       | f
+ very_very_very_very_very_very_very_very_very__754e8716_15000018 | f       | t
+(3 rows)
+
+\c - - - :master_port
+DROP SCHEMA fkey_to_reference_shard_rebalance CASCADE;
+NOTICE:  drop cascades to 8 other objects
+DETAIL:  drop cascades to type fkey_to_reference_shard_rebalance.foreign_details
+drop cascades to view fkey_to_reference_shard_rebalance.table_fkeys_in_workers
+drop cascades to table fkey_to_reference_shard_rebalance.referenced_table
+drop cascades to table fkey_to_reference_shard_rebalance.referencing_table
+drop cascades to table fkey_to_reference_shard_rebalance.referencing_table2
+drop cascades to function fkey_to_reference_shard_rebalance.get_foreign_key_to_reference_table_commands(oid)
+drop cascades to table fkey_to_reference_shard_rebalance.reference_table_commands
+drop cascades to table fkey_to_reference_shard_rebalance.referenceing_dist_table
diff --git a/src/test/regress/expected/isolation_blocking_move_multi_shard_commands.out b/src/test/regress/expected/isolation_blocking_move_multi_shard_commands.out
new file mode 100644
index 000000000..4e09e34ad
--- /dev/null
+++ b/src/test/regress/expected/isolation_blocking_move_multi_shard_commands.out
@@ -0,0 +1,305 @@
+Parsed test spec with 2 sessions
+
+starting permutation: s1-begin s2-begin s2-insert s1-move-placement s2-end s1-end s1-select s1-get-shard-distribution
+step s1-begin:
+ BEGIN;
+
+step s2-begin:
+ BEGIN;
+
+step s2-insert:
+    INSERT INTO logical_replicate_placement VALUES (15, 15), (172, 172);
+
+step s1-move-placement:
+     SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard;
+ <waiting ...>
+step s2-end:
+ COMMIT;
+
+step s1-move-placement: <... completed>
+master_move_shard_placement
+
+
+step s1-end:
+ COMMIT;
+
+step s1-select:
+  SELECT * FROM logical_replicate_placement order by y;
+
+x              y
+
+15             15
+172            172
+step s1-get-shard-distribution:
+    select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport;
+
+nodeport
+
+57638
+
+starting permutation: s1-begin s2-begin s2-upsert s1-move-placement s2-end s1-end s1-select s1-get-shard-distribution
+step s1-begin:
+ BEGIN;
+
+step s2-begin:
+ BEGIN;
+
+step s2-upsert:
+    INSERT INTO logical_replicate_placement VALUES (15, 15), (172, 172);
+    INSERT INTO logical_replicate_placement VALUES (15, 15), (172, 172) ON CONFLICT (x) DO UPDATE SET y = logical_replicate_placement.y + 1;
+
+step s1-move-placement:
+     SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard;
+ <waiting ...>
+step s2-end:
+ COMMIT;
+
+step s1-move-placement: <... completed>
+master_move_shard_placement
+
+
+step s1-end:
+ COMMIT;
+
+step s1-select:
+  SELECT * FROM logical_replicate_placement order by y;
+
+x              y
+
+15             16
+172            173
+step s1-get-shard-distribution:
+    select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport;
+
+nodeport
+
+57638
+
+starting permutation: s1-insert s1-begin s2-begin s2-update s1-move-placement s2-end s1-end s1-select s1-get-shard-distribution
+step s1-insert:
+    INSERT INTO logical_replicate_placement VALUES (15, 15), (172, 172);
+
+step s1-begin:
+ BEGIN;
+
+step s2-begin:
+ BEGIN;
+
+step s2-update:
+    UPDATE logical_replicate_placement SET y = y + 1;
+
+step s1-move-placement:
+     SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard;
+ <waiting ...>
+step s2-end:
+ COMMIT;
+
+step s1-move-placement: <... completed>
+master_move_shard_placement
+
+
+step s1-end:
+ COMMIT;
+
+step s1-select:
+  SELECT * FROM logical_replicate_placement order by y;
+
+x              y
+
+15             16
+172            173
+step s1-get-shard-distribution:
+    select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport;
+
+nodeport
+
+57638
+
+starting permutation: s1-insert s1-begin s2-begin s2-delete s1-move-placement s2-end s1-end s1-select s1-get-shard-distribution
+step s1-insert:
+    INSERT INTO logical_replicate_placement VALUES (15, 15), (172, 172);
+
+step s1-begin:
+ BEGIN;
+
+step s2-begin:
+ BEGIN;
+
+step s2-delete:
+    DELETE FROM logical_replicate_placement;
+
+step s1-move-placement:
+     SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard;
+ <waiting ...>
+step s2-end:
+ COMMIT;
+
+step s1-move-placement: <... completed>
+master_move_shard_placement
+
+
+step s1-end:
+ COMMIT;
+
+step s1-select:
+  SELECT * FROM logical_replicate_placement order by y;
+
+x              y
+
+step s1-get-shard-distribution:
+    select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport;
+
+nodeport
+
+57638
+
+starting permutation: s1-insert s1-begin s2-begin s2-select s1-move-placement s2-end s1-end s1-get-shard-distribution
+step s1-insert:
+    INSERT INTO logical_replicate_placement VALUES (15, 15), (172, 172);
+
+step s1-begin:
+ BEGIN;
+
+step s2-begin:
+ BEGIN;
+
+step s2-select:
+    SELECT * FROM logical_replicate_placement ORDER BY y;
+
+x              y
+
+15             15
+172            172
+step s1-move-placement:
+     SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard;
+ <waiting ...>
+step s2-end:
+ COMMIT;
+
+step s1-move-placement: <... completed>
+master_move_shard_placement
+
+
+step s1-end:
+ COMMIT;
+
+step s1-get-shard-distribution:
+    select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport;
+
+nodeport
+
+57638
+
+starting permutation: s1-begin s2-begin s2-copy s1-move-placement s2-end s1-end s1-select s1-get-shard-distribution
+step s1-begin:
+ BEGIN;
+
+step s2-begin:
+ BEGIN;
+
+step s2-copy:
+ COPY logical_replicate_placement FROM PROGRAM 'echo "1,1\n2,2\n3,3\n4,4\n5,5\n15,30"' WITH CSV;
+
+step s1-move-placement:
+     SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard;
+ <waiting ...>
+step s2-end:
+ COMMIT;
+
+step s1-move-placement: <... completed>
+master_move_shard_placement
+
+
+step s1-end:
+ COMMIT;
+
+step s1-select:
+  SELECT * FROM logical_replicate_placement order by y;
+
+x              y
+
+1              1
+2              2
+3              3
+4              4
+5              5
+15             30
+step s1-get-shard-distribution:
+    select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport;
+
+nodeport
+
+57638
+
+starting permutation: s1-insert s1-begin s2-begin s2-truncate s1-move-placement s2-end s1-end s1-select s1-get-shard-distribution
+step s1-insert:
+    INSERT INTO logical_replicate_placement VALUES (15, 15), (172, 172);
+
+step s1-begin:
+ BEGIN;
+
+step s2-begin:
+ BEGIN;
+
+step s2-truncate:
+ TRUNCATE logical_replicate_placement;
+
+step s1-move-placement:
+     SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard;
+ <waiting ...>
+step s2-end:
+ COMMIT;
+
+step s1-move-placement: <... completed>
+master_move_shard_placement
+
+
+step s1-end:
+ COMMIT;
+
+step s1-select:
+  SELECT * FROM logical_replicate_placement order by y;
+
+x              y
+
+step s1-get-shard-distribution:
+    select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport;
+
+nodeport
+
+57638
+
+starting permutation: s1-begin s2-begin s2-alter-table s1-move-placement s2-end s1-end s1-select s1-get-shard-distribution
+step s1-begin:
+ BEGIN;
+
+step s2-begin:
+ BEGIN;
+
+step s2-alter-table:
+ ALTER TABLE logical_replicate_placement ADD COLUMN z INT;
+
+step s1-move-placement:
+     SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard;
+ <waiting ...>
+step s2-end:
+ COMMIT;
+
+step s1-move-placement: <... completed>
+master_move_shard_placement
+
+
+step s1-end:
+ COMMIT;
+
+step s1-select:
+  SELECT * FROM logical_replicate_placement order by y;
+
+x              y              z
+
+step s1-get-shard-distribution:
+    select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport;
+
+nodeport
+
+57638
diff --git a/src/test/regress/expected/isolation_blocking_move_multi_shard_commands_on_mx.out b/src/test/regress/expected/isolation_blocking_move_multi_shard_commands_on_mx.out
new file mode 100644
index 000000000..68cb2c1fb
--- /dev/null
+++ b/src/test/regress/expected/isolation_blocking_move_multi_shard_commands_on_mx.out
@@ -0,0 +1,245 @@
+Parsed test spec with 2 sessions
+
+starting permutation: s1-begin s2-start-session-level-connection s2-begin-on-worker s2-insert s1-move-placement s2-commit-worker s1-commit s1-select s1-get-shard-distribution s2-stop-connection
+step s1-begin:
+	BEGIN;
+
+step s2-start-session-level-connection:
+  SELECT start_session_level_connection_to_node('localhost', 57638);
+
+start_session_level_connection_to_node
+
+
+step s2-begin-on-worker:
+  SELECT run_commands_on_session_level_connection_to_node('BEGIN');
+
+run_commands_on_session_level_connection_to_node
+
+
+step s2-insert:
+    SELECT run_commands_on_session_level_connection_to_node('INSERT INTO logical_replicate_placement VALUES (15, 15), (172, 172)');
+
+run_commands_on_session_level_connection_to_node
+
+
+step s1-move-placement:
+    	SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard;
+ <waiting ...>
+step s2-commit-worker:
+	SELECT run_commands_on_session_level_connection_to_node('COMMIT');
+
+run_commands_on_session_level_connection_to_node
+
+
+step s1-move-placement: <... completed>
+master_move_shard_placement
+
+
+step s1-commit:
+	COMMIT;
+
+step s1-select:
+  SELECT * FROM logical_replicate_placement order by y;
+
+x              y
+
+15             15
+172            172
+step s1-get-shard-distribution:
+    select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport;
+
+nodeport
+
+57638
+step s2-stop-connection:
+  SELECT stop_session_level_connection_to_node();
+
+stop_session_level_connection_to_node
+
+
+restore_isolation_tester_func
+
+
+
+starting permutation: s1-insert s1-begin s2-start-session-level-connection s2-begin-on-worker s2-update s1-move-placement s2-commit-worker s1-commit s1-select s1-get-shard-distribution s2-stop-connection
+step s1-insert:
+    INSERT INTO logical_replicate_placement VALUES (15, 15), (172, 172);
+
+step s1-begin:
+	BEGIN;
+
+step s2-start-session-level-connection:
+  SELECT start_session_level_connection_to_node('localhost', 57638);
+
+start_session_level_connection_to_node
+
+
+step s2-begin-on-worker:
+  SELECT run_commands_on_session_level_connection_to_node('BEGIN');
+
+run_commands_on_session_level_connection_to_node
+
+
+step s2-update:
+    SELECT run_commands_on_session_level_connection_to_node('UPDATE logical_replicate_placement SET y = y + 1');
+
+run_commands_on_session_level_connection_to_node
+
+
+step s1-move-placement:
+    	SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard;
+ <waiting ...>
+step s2-commit-worker:
+	SELECT run_commands_on_session_level_connection_to_node('COMMIT');
+
+run_commands_on_session_level_connection_to_node
+
+
+step s1-move-placement: <... completed>
+master_move_shard_placement
+
+
+step s1-commit:
+	COMMIT;
+
+step s1-select:
+  SELECT * FROM logical_replicate_placement order by y;
+
+x              y
+
+15             16
+172            173
+step s1-get-shard-distribution:
+    select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport;
+
+nodeport
+
+57638
+step s2-stop-connection:
+  SELECT stop_session_level_connection_to_node();
+
+stop_session_level_connection_to_node
+
+
+restore_isolation_tester_func
+
+
+
+starting permutation: s1-insert s1-begin s2-start-session-level-connection s2-begin-on-worker s2-delete s1-move-placement s2-commit-worker s1-commit s1-select s1-get-shard-distribution s2-stop-connection
+step s1-insert:
+    INSERT INTO logical_replicate_placement VALUES (15, 15), (172, 172);
+
+step s1-begin:
+	BEGIN;
+
+step s2-start-session-level-connection:
+  SELECT start_session_level_connection_to_node('localhost', 57638);
+
+start_session_level_connection_to_node
+
+
+step s2-begin-on-worker:
+  SELECT run_commands_on_session_level_connection_to_node('BEGIN');
+
+run_commands_on_session_level_connection_to_node
+
+
+step s2-delete:
+    SELECT run_commands_on_session_level_connection_to_node('DELETE FROM logical_replicate_placement');
+
+run_commands_on_session_level_connection_to_node
+
+
+step s1-move-placement:
+    	SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard;
+ <waiting ...>
+step s2-commit-worker:
+	SELECT run_commands_on_session_level_connection_to_node('COMMIT');
+
+run_commands_on_session_level_connection_to_node
+
+
+step s1-move-placement: <... completed>
+master_move_shard_placement
+
+
+step s1-commit:
+	COMMIT;
+
+step s1-select:
+  SELECT * FROM logical_replicate_placement order by y;
+
+x              y
+
+step s1-get-shard-distribution:
+    select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport;
+
+nodeport
+
+57638
+step s2-stop-connection:
+  SELECT stop_session_level_connection_to_node();
+
+stop_session_level_connection_to_node
+
+
+restore_isolation_tester_func
+
+
+
+starting permutation: s1-insert s1-begin s2-start-session-level-connection s2-begin-on-worker s2-select s1-move-placement s2-commit-worker s1-commit s1-get-shard-distribution s2-stop-connection
+step s1-insert:
+    INSERT INTO logical_replicate_placement VALUES (15, 15), (172, 172);
+
+step s1-begin:
+	BEGIN;
+
+step s2-start-session-level-connection:
+  SELECT start_session_level_connection_to_node('localhost', 57638);
+
+start_session_level_connection_to_node
+
+
+step s2-begin-on-worker:
+  SELECT run_commands_on_session_level_connection_to_node('BEGIN');
+
+run_commands_on_session_level_connection_to_node
+
+
+step s2-select:
+    SELECT run_commands_on_session_level_connection_to_node('SELECT * FROM logical_replicate_placement ORDER BY y');
+
+run_commands_on_session_level_connection_to_node
+
+
+step s1-move-placement:
+    	SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard;
+ <waiting ...>
+step s2-commit-worker:
+	SELECT run_commands_on_session_level_connection_to_node('COMMIT');
+
+run_commands_on_session_level_connection_to_node
+
+
+step s1-move-placement: <... completed>
+master_move_shard_placement
+
+
+step s1-commit:
+	COMMIT;
+
+step s1-get-shard-distribution:
+    select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport;
+
+nodeport
+
+57638
+step s2-stop-connection:
+  SELECT stop_session_level_connection_to_node();
+
+stop_session_level_connection_to_node
+
+
+restore_isolation_tester_func
+
+
diff --git a/src/test/regress/expected/isolation_blocking_move_single_shard_commands.out b/src/test/regress/expected/isolation_blocking_move_single_shard_commands.out
new file mode 100644
index 000000000..800e41aca
--- /dev/null
+++ b/src/test/regress/expected/isolation_blocking_move_single_shard_commands.out
@@ -0,0 +1,223 @@
+Parsed test spec with 2 sessions
+
+starting permutation: s1-begin s2-begin s2-insert s1-move-placement s2-end s1-end s1-select s1-get-shard-distribution
+step s1-begin:
+ BEGIN;
+
+step s2-begin:
+    BEGIN;
+
+step s2-insert:
+    INSERT INTO logical_replicate_placement VALUES (15, 15);
+
+step s1-move-placement:
+     SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes');
+ <waiting ...>
+step s2-end:
+   COMMIT;
+
+step s1-move-placement: <... completed>
+master_move_shard_placement
+
+
+step s1-end:
+ COMMIT;
+
+step s1-select:
+  SELECT * FROM logical_replicate_placement order by y;
+
+x              y
+
+15             15
+step s1-get-shard-distribution:
+  select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport;
+
+nodeport
+
+57638
+
+starting permutation: s1-begin s2-begin s2-upsert s1-move-placement s2-end s1-end s1-select s1-get-shard-distribution
+step s1-begin:
+ BEGIN;
+
+step s2-begin:
+    BEGIN;
+
+step s2-upsert:
+    INSERT INTO logical_replicate_placement VALUES (15, 15);
+    INSERT INTO logical_replicate_placement VALUES (15, 15) ON CONFLICT (x) DO UPDATE SET y = logical_replicate_placement.y + 1;
+
+step s1-move-placement:
+     SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes');
+ <waiting ...>
+step s2-end:
+   COMMIT;
+
+step s1-move-placement: <... completed>
+master_move_shard_placement
+
+
+step s1-end:
+ COMMIT;
+
+step s1-select:
+  SELECT * FROM logical_replicate_placement order by y;
+
+x              y
+
+15             16
+step s1-get-shard-distribution:
+  select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport;
+
+nodeport
+
+57638
+
+starting permutation: s1-insert s1-begin s2-begin s2-update s1-move-placement s2-end s1-end s1-select s1-get-shard-distribution
+step s1-insert:
+    INSERT INTO logical_replicate_placement VALUES (15, 15);
+
+step s1-begin:
+ BEGIN;
+
+step s2-begin:
+    BEGIN;
+
+step s2-update:
+    UPDATE logical_replicate_placement SET y = y + 1 WHERE x = 15;
+
+step s1-move-placement:
+     SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes');
+ <waiting ...>
+step s2-end:
+   COMMIT;
+
+step s1-move-placement: <... completed>
+master_move_shard_placement
+
+
+step s1-end:
+ COMMIT;
+
+step s1-select:
+  SELECT * FROM logical_replicate_placement order by y;
+
+x              y
+
+15             16
+step s1-get-shard-distribution:
+  select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport;
+
+nodeport
+
+57638
+
+starting permutation: s1-insert s1-begin s2-begin s2-delete s1-move-placement s2-end s1-end s1-select s1-get-shard-distribution
+step s1-insert:
+    INSERT INTO logical_replicate_placement VALUES (15, 15);
+
+step s1-begin:
+ BEGIN;
+
+step s2-begin:
+    BEGIN;
+
+step s2-delete:
+    DELETE FROM logical_replicate_placement WHERE x = 15;
+
+step s1-move-placement:
+     SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes');
+ <waiting ...>
+step s2-end:
+   COMMIT;
+
+step s1-move-placement: <... completed>
+master_move_shard_placement
+
+
+step s1-end:
+ COMMIT;
+
+step s1-select:
+  SELECT * FROM logical_replicate_placement order by y;
+
+x              y
+
+step s1-get-shard-distribution:
+  select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport;
+
+nodeport
+
+57638
+
+starting permutation: s1-insert s1-begin s2-begin s2-select s1-move-placement s2-end s1-end s1-get-shard-distribution
+step s1-insert:
+    INSERT INTO logical_replicate_placement VALUES (15, 15);
+
+step s1-begin:
+ BEGIN;
+
+step s2-begin:
+    BEGIN;
+
+step s2-select:
+    SELECT * FROM logical_replicate_placement ORDER BY y;
+
+x              y
+
+15             15
+step s1-move-placement:
+     SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes');
+ <waiting ...>
+step s2-end:
+   COMMIT;
+
+step s1-move-placement: <... completed>
+master_move_shard_placement
+
+
+step s1-end:
+ COMMIT;
+
+step s1-get-shard-distribution:
+  select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport;
+
+nodeport
+
+57638
+
+starting permutation: s1-insert s1-begin s2-begin s2-select-for-update s1-move-placement s2-end s1-end s1-get-shard-distribution
+step s1-insert:
+    INSERT INTO logical_replicate_placement VALUES (15, 15);
+
+step s1-begin:
+ BEGIN;
+
+step s2-begin:
+    BEGIN;
+
+step s2-select-for-update:
+    SELECT * FROM logical_replicate_placement WHERE x=15 FOR UPDATE;
+
+x              y
+
+15             15
+step s1-move-placement:
+     SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes');
+ <waiting ...>
+step s2-end:
+   COMMIT;
+
+step s1-move-placement: <... completed>
+master_move_shard_placement
+
+
+step s1-end:
+ COMMIT;
+
+step s1-get-shard-distribution:
+  select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport;
+
+nodeport
+
+57638
diff --git a/src/test/regress/expected/isolation_blocking_move_single_shard_commands_on_mx.out b/src/test/regress/expected/isolation_blocking_move_single_shard_commands_on_mx.out
new file mode 100644
index 000000000..209275253
--- /dev/null
+++ b/src/test/regress/expected/isolation_blocking_move_single_shard_commands_on_mx.out
@@ -0,0 +1,300 @@
+Parsed test spec with 2 sessions
+
+starting permutation: s1-begin s2-start-session-level-connection s2-begin-on-worker s2-insert s1-move-placement s2-commit-worker s1-commit s1-select s1-get-shard-distribution s2-stop-connection
+step s1-begin:
+  BEGIN;
+
+step s2-start-session-level-connection:
+  SELECT start_session_level_connection_to_node('localhost', 57638);
+
+start_session_level_connection_to_node
+
+
+step s2-begin-on-worker:
+  SELECT run_commands_on_session_level_connection_to_node('BEGIN');
+
+run_commands_on_session_level_connection_to_node
+
+
+step s2-insert:
+  SELECT run_commands_on_session_level_connection_to_node('INSERT INTO logical_replicate_placement VALUES (15, 15)');
+
+run_commands_on_session_level_connection_to_node
+
+
+step s1-move-placement:
+   SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes');
+ <waiting ...>
+step s2-commit-worker:
+  SELECT run_commands_on_session_level_connection_to_node('COMMIT');
+
+run_commands_on_session_level_connection_to_node
+
+
+step s1-move-placement: <... completed>
+master_move_shard_placement
+
+
+step s1-commit:
+	COMMIT;
+
+step s1-select:
+  SELECT * FROM logical_replicate_placement order by y;
+
+x              y
+
+15             15
+step s1-get-shard-distribution:
+  select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport;
+
+nodeport
+
+57638
+step s2-stop-connection:
+  SELECT stop_session_level_connection_to_node();
+
+stop_session_level_connection_to_node
+
+
+restore_isolation_tester_func
+
+
+
+starting permutation: s1-insert s1-begin s2-start-session-level-connection s2-begin-on-worker s2-update s1-move-placement s2-commit-worker s1-commit s1-select s1-get-shard-distribution s2-stop-connection
+step s1-insert:
+  INSERT INTO logical_replicate_placement VALUES (15, 15);
+
+step s1-begin:
+  BEGIN;
+
+step s2-start-session-level-connection:
+  SELECT start_session_level_connection_to_node('localhost', 57638);
+
+start_session_level_connection_to_node
+
+
+step s2-begin-on-worker:
+  SELECT run_commands_on_session_level_connection_to_node('BEGIN');
+
+run_commands_on_session_level_connection_to_node
+
+
+step s2-update:
+  SELECT run_commands_on_session_level_connection_to_node('UPDATE logical_replicate_placement SET y = y + 1 WHERE x = 15');
+
+run_commands_on_session_level_connection_to_node
+
+
+step s1-move-placement:
+   SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes');
+ <waiting ...>
+step s2-commit-worker:
+  SELECT run_commands_on_session_level_connection_to_node('COMMIT');
+
+run_commands_on_session_level_connection_to_node
+
+
+step s1-move-placement: <... completed>
+master_move_shard_placement
+
+
+step s1-commit:
+	COMMIT;
+
+step s1-select:
+  SELECT * FROM logical_replicate_placement order by y;
+
+x              y
+
+15             16
+step s1-get-shard-distribution:
+  select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport;
+
+nodeport
+
+57638
+step s2-stop-connection:
+  SELECT stop_session_level_connection_to_node();
+
+stop_session_level_connection_to_node
+
+
+restore_isolation_tester_func
+
+
+
+starting permutation: s1-insert s1-begin s2-start-session-level-connection s2-begin-on-worker s2-delete s1-move-placement s2-commit-worker s1-commit s1-select s1-get-shard-distribution s2-stop-connection
+step s1-insert:
+  INSERT INTO logical_replicate_placement VALUES (15, 15);
+
+step s1-begin:
+  BEGIN;
+
+step s2-start-session-level-connection:
+  SELECT start_session_level_connection_to_node('localhost', 57638);
+
+start_session_level_connection_to_node
+
+
+step s2-begin-on-worker:
+  SELECT run_commands_on_session_level_connection_to_node('BEGIN');
+
+run_commands_on_session_level_connection_to_node
+
+
+step s2-delete:
+  SELECT run_commands_on_session_level_connection_to_node('DELETE FROM logical_replicate_placement WHERE x = 15');
+
+run_commands_on_session_level_connection_to_node
+
+
+step s1-move-placement:
+   SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes');
+ <waiting ...>
+step s2-commit-worker:
+  SELECT run_commands_on_session_level_connection_to_node('COMMIT');
+
+run_commands_on_session_level_connection_to_node
+
+
+step s1-move-placement: <... completed>
+master_move_shard_placement
+
+
+step s1-commit:
+	COMMIT;
+
+step s1-select:
+  SELECT * FROM logical_replicate_placement order by y;
+
+x              y
+
+step s1-get-shard-distribution:
+  select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport;
+
+nodeport
+
+57638
+step s2-stop-connection:
+  SELECT stop_session_level_connection_to_node();
+
+stop_session_level_connection_to_node
+
+
+restore_isolation_tester_func
+
+
+
+starting permutation: s1-insert s1-begin s2-start-session-level-connection s2-begin-on-worker s2-select s1-move-placement s2-commit-worker s1-commit s1-get-shard-distribution s2-stop-connection
+step s1-insert:
+  INSERT INTO logical_replicate_placement VALUES (15, 15);
+
+step s1-begin:
+  BEGIN;
+
+step s2-start-session-level-connection:
+  SELECT start_session_level_connection_to_node('localhost', 57638);
+
+start_session_level_connection_to_node
+
+
+step s2-begin-on-worker:
+  SELECT run_commands_on_session_level_connection_to_node('BEGIN');
+
+run_commands_on_session_level_connection_to_node
+
+
+step s2-select:
+  SELECT run_commands_on_session_level_connection_to_node('SELECT * FROM logical_replicate_placement ORDER BY y');
+
+run_commands_on_session_level_connection_to_node
+
+
+step s1-move-placement:
+   SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes');
+ <waiting ...>
+step s2-commit-worker:
+  SELECT run_commands_on_session_level_connection_to_node('COMMIT');
+
+run_commands_on_session_level_connection_to_node
+
+
+step s1-move-placement: <... completed>
+master_move_shard_placement
+
+
+step s1-commit:
+	COMMIT;
+
+step s1-get-shard-distribution:
+  select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport;
+
+nodeport
+
+57638
+step s2-stop-connection:
+  SELECT stop_session_level_connection_to_node();
+
+stop_session_level_connection_to_node
+
+
+restore_isolation_tester_func
+
+
+
+starting permutation: s1-insert s1-begin s2-start-session-level-connection s2-begin-on-worker s2-select-for-update s1-move-placement s2-commit-worker s1-commit s1-get-shard-distribution s2-stop-connection
+step s1-insert:
+  INSERT INTO logical_replicate_placement VALUES (15, 15);
+
+step s1-begin:
+  BEGIN;
+
+step s2-start-session-level-connection:
+  SELECT start_session_level_connection_to_node('localhost', 57638);
+
+start_session_level_connection_to_node
+
+
+step s2-begin-on-worker:
+  SELECT run_commands_on_session_level_connection_to_node('BEGIN');
+
+run_commands_on_session_level_connection_to_node
+
+
+step s2-select-for-update:
+  SELECT run_commands_on_session_level_connection_to_node('SELECT * FROM logical_replicate_placement WHERE x=15 FOR UPDATE');
+
+run_commands_on_session_level_connection_to_node
+
+
+step s1-move-placement:
+   SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes');
+ <waiting ...>
+step s2-commit-worker:
+  SELECT run_commands_on_session_level_connection_to_node('COMMIT');
+
+run_commands_on_session_level_connection_to_node
+
+
+step s1-move-placement: <... completed>
+master_move_shard_placement
+
+
+step s1-commit:
+	COMMIT;
+
+step s1-get-shard-distribution:
+  select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport;
+
+nodeport
+
+57638
+step s2-stop-connection:
+  SELECT stop_session_level_connection_to_node();
+
+stop_session_level_connection_to_node
+
+
+restore_isolation_tester_func
+
+
diff --git a/src/test/regress/expected/isolation_shard_rebalancer.out b/src/test/regress/expected/isolation_shard_rebalancer.out
new file mode 100644
index 000000000..4bb980230
--- /dev/null
+++ b/src/test/regress/expected/isolation_shard_rebalancer.out
@@ -0,0 +1,449 @@
+Parsed test spec with 2 sessions
+
+starting permutation: s1-rebalance-nc s2-rebalance-nc s1-commit
+create_distributed_table
+
+
+step s1-rebalance-nc:
+ BEGIN;
+ select rebalance_table_shards('non_colocated');
+
+rebalance_table_shards
+
+
+step s2-rebalance-nc:
+ select rebalance_table_shards('non_colocated');
+
+ERROR:  could not acquire the lock required to rebalance public.non_colocated
+step s1-commit:
+ COMMIT;
+
+master_set_node_property
+
+
+
+starting permutation: s1-rebalance-nc s2-replicate-nc s1-commit
+create_distributed_table
+
+
+step s1-rebalance-nc:
+ BEGIN;
+ select rebalance_table_shards('non_colocated');
+
+rebalance_table_shards
+
+
+step s2-replicate-nc:
+ select replicate_table_shards('non_colocated');
+
+ERROR:  could not acquire the lock required to replicate public.non_colocated
+step s1-commit:
+ COMMIT;
+
+master_set_node_property
+
+
+
+starting permutation: s1-replicate-nc s2-rebalance-nc s1-commit
+create_distributed_table
+
+
+step s1-replicate-nc:
+ BEGIN;
+ select replicate_table_shards('non_colocated');
+
+replicate_table_shards
+
+
+step s2-rebalance-nc:
+ select rebalance_table_shards('non_colocated');
+
+ERROR:  could not acquire the lock required to rebalance public.non_colocated
+step s1-commit:
+ COMMIT;
+
+master_set_node_property
+
+
+
+starting permutation: s1-replicate-nc s2-replicate-nc s1-commit
+create_distributed_table
+
+
+step s1-replicate-nc:
+ BEGIN;
+ select replicate_table_shards('non_colocated');
+
+replicate_table_shards
+
+
+step s2-replicate-nc:
+ select replicate_table_shards('non_colocated');
+
+ERROR:  could not acquire the lock required to replicate public.non_colocated
+step s1-commit:
+ COMMIT;
+
+master_set_node_property
+
+
+
+starting permutation: s1-rebalance-c1 s2-rebalance-c2 s1-commit
+create_distributed_table
+
+
+step s1-rebalance-c1:
+ BEGIN;
+ select rebalance_table_shards('colocated1');
+
+rebalance_table_shards
+
+
+step s2-rebalance-c2:
+ select rebalance_table_shards('colocated2');
+
+ERROR:  could not acquire the lock required to rebalance public.colocated2
+step s1-commit:
+ COMMIT;
+
+master_set_node_property
+
+
+
+starting permutation: s1-rebalance-c1 s2-replicate-c2 s1-commit
+create_distributed_table
+
+
+step s1-rebalance-c1:
+ BEGIN;
+ select rebalance_table_shards('colocated1');
+
+rebalance_table_shards
+
+
+step s2-replicate-c2:
+ select replicate_table_shards('colocated2');
+
+ERROR:  could not acquire the lock required to replicate public.colocated2
+step s1-commit:
+ COMMIT;
+
+master_set_node_property
+
+
+
+starting permutation: s1-replicate-c1 s2-rebalance-c2 s1-commit
+create_distributed_table
+
+
+step s1-replicate-c1:
+ BEGIN;
+ select replicate_table_shards('colocated1');
+
+replicate_table_shards
+
+
+step s2-rebalance-c2:
+ select rebalance_table_shards('colocated2');
+
+ERROR:  could not acquire the lock required to rebalance public.colocated2
+step s1-commit:
+ COMMIT;
+
+master_set_node_property
+
+
+
+starting permutation: s1-replicate-c1 s2-replicate-c2 s1-commit
+create_distributed_table
+
+
+step s1-replicate-c1:
+ BEGIN;
+ select replicate_table_shards('colocated1');
+
+replicate_table_shards
+
+
+step s2-replicate-c2:
+ select replicate_table_shards('colocated2');
+
+ERROR:  could not acquire the lock required to replicate public.colocated2
+step s1-commit:
+ COMMIT;
+
+master_set_node_property
+
+
+
+starting permutation: s1-rebalance-c1 s2-rebalance-nc s1-commit
+create_distributed_table
+
+
+step s1-rebalance-c1:
+ BEGIN;
+ select rebalance_table_shards('colocated1');
+
+rebalance_table_shards
+
+
+step s2-rebalance-nc:
+ select rebalance_table_shards('non_colocated');
+
+rebalance_table_shards
+
+
+step s1-commit:
+ COMMIT;
+
+master_set_node_property
+
+
+
+starting permutation: s1-rebalance-c1 s2-replicate-nc s1-commit
+create_distributed_table
+
+
+step s1-rebalance-c1:
+ BEGIN;
+ select rebalance_table_shards('colocated1');
+
+rebalance_table_shards
+
+
+step s2-replicate-nc:
+ select replicate_table_shards('non_colocated');
+
+replicate_table_shards
+
+
+step s1-commit:
+ COMMIT;
+
+master_set_node_property
+
+
+
+starting permutation: s1-replicate-c1 s2-rebalance-nc s1-commit
+create_distributed_table
+
+
+step s1-replicate-c1:
+ BEGIN;
+ select replicate_table_shards('colocated1');
+
+replicate_table_shards
+
+
+step s2-rebalance-nc:
+ select rebalance_table_shards('non_colocated');
+
+rebalance_table_shards
+
+
+step s1-commit:
+ COMMIT;
+
+master_set_node_property
+
+
+
+starting permutation: s1-replicate-c1 s2-replicate-nc s1-commit
+create_distributed_table
+
+
+step s1-replicate-c1:
+ BEGIN;
+ select replicate_table_shards('colocated1');
+
+replicate_table_shards
+
+
+step s2-replicate-nc:
+ select replicate_table_shards('non_colocated');
+
+replicate_table_shards
+
+
+step s1-commit:
+ COMMIT;
+
+master_set_node_property
+
+
+
+starting permutation: s1-rebalance-c1 s2-rebalance-all s1-commit
+create_distributed_table
+
+
+step s1-rebalance-c1:
+ BEGIN;
+ select rebalance_table_shards('colocated1');
+
+rebalance_table_shards
+
+
+step s2-rebalance-all:
+ select rebalance_table_shards();
+
+ERROR:  could not acquire the lock required to rebalance public.distributed_transaction_id_table
+step s1-commit:
+ COMMIT;
+
+master_set_node_property
+
+
+
+starting permutation: s1-replicate-c1 s2-rebalance-all s1-commit
+create_distributed_table
+
+
+step s1-replicate-c1:
+ BEGIN;
+ select replicate_table_shards('colocated1');
+
+replicate_table_shards
+
+
+step s2-rebalance-all:
+ select rebalance_table_shards();
+
+ERROR:  could not acquire the lock required to rebalance public.distributed_transaction_id_table
+step s1-commit:
+ COMMIT;
+
+master_set_node_property
+
+
+
+starting permutation: s1-rebalance-nc s2-rebalance-all s1-commit
+create_distributed_table
+
+
+step s1-rebalance-nc:
+ BEGIN;
+ select rebalance_table_shards('non_colocated');
+
+rebalance_table_shards
+
+
+step s2-rebalance-all:
+ select rebalance_table_shards();
+
+ERROR:  could not acquire the lock required to rebalance public.non_colocated
+step s1-commit:
+ COMMIT;
+
+master_set_node_property
+
+
+
+starting permutation: s1-replicate-nc s2-rebalance-all s1-commit
+create_distributed_table
+
+
+step s1-replicate-nc:
+ BEGIN;
+ select replicate_table_shards('non_colocated');
+
+replicate_table_shards
+
+
+step s2-rebalance-all:
+ select rebalance_table_shards();
+
+ERROR:  could not acquire the lock required to rebalance public.non_colocated
+step s1-commit:
+ COMMIT;
+
+master_set_node_property
+
+
+
+starting permutation: s1-rebalance-c1 s2-drain s1-commit
+create_distributed_table
+
+
+step s1-rebalance-c1:
+ BEGIN;
+ select rebalance_table_shards('colocated1');
+
+rebalance_table_shards
+
+
+step s2-drain:
+ select master_drain_node('localhost', 57638);
+
+ERROR:  could not acquire the lock required to move public.distributed_transaction_id_table
+step s1-commit:
+ COMMIT;
+
+master_set_node_property
+
+
+
+starting permutation: s1-replicate-c1 s2-drain s1-commit
+create_distributed_table
+
+
+step s1-replicate-c1:
+ BEGIN;
+ select replicate_table_shards('colocated1');
+
+replicate_table_shards
+
+
+step s2-drain:
+ select master_drain_node('localhost', 57638);
+
+ERROR:  could not acquire the lock required to move public.distributed_transaction_id_table
+step s1-commit:
+ COMMIT;
+
+master_set_node_property
+
+
+
+starting permutation: s1-rebalance-nc s2-drain s1-commit
+create_distributed_table
+
+
+step s1-rebalance-nc:
+ BEGIN;
+ select rebalance_table_shards('non_colocated');
+
+rebalance_table_shards
+
+
+step s2-drain:
+ select master_drain_node('localhost', 57638);
+
+ERROR:  could not acquire the lock required to move public.non_colocated
+step s1-commit:
+ COMMIT;
+
+master_set_node_property
+
+
+
+starting permutation: s1-replicate-nc s2-drain s1-commit
+create_distributed_table
+
+
+step s1-replicate-nc:
+ BEGIN;
+ select replicate_table_shards('non_colocated');
+
+replicate_table_shards
+
+
+step s2-drain:
+ select master_drain_node('localhost', 57638);
+
+ERROR:  could not acquire the lock required to move public.non_colocated
+step s1-commit:
+ COMMIT;
+
+master_set_node_property
+
+
diff --git a/src/test/regress/expected/master_copy_shard_placement.out b/src/test/regress/expected/master_copy_shard_placement.out
index f8e37fa39..65af97264 100644
--- a/src/test/regress/expected/master_copy_shard_placement.out
+++ b/src/test/regress/expected/master_copy_shard_placement.out
@@ -45,7 +45,8 @@ SELECT master_copy_shard_placement(
            get_shard_id_for_distribution_column('data', 'key-1'),
            'localhost', :worker_1_port,
            'localhost', :worker_2_port,
-           do_repair := false);
+           do_repair := false,
+           transfer_mode := 'block_writes');
 ERROR:  could not find placement matching "localhost:xxxxx"
 HINT:  Confirm the placement still exists and try again.
 -- verify we error out if source and destination are the same
@@ -53,14 +54,16 @@ SELECT master_copy_shard_placement(
            get_shard_id_for_distribution_column('data', 'key-1'),
            'localhost', :worker_2_port,
            'localhost', :worker_2_port,
-           do_repair := false);
+           do_repair := false,
+           transfer_mode := 'block_writes');
 ERROR:  shard xxxxx already exists in the target node
 -- verify we error out if target already contains a healthy placement
 SELECT master_copy_shard_placement(
            (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='ref_table'::regclass::oid),
            'localhost', :worker_1_port,
            'localhost', :worker_2_port,
-           do_repair := false);
+           do_repair := false,
+           transfer_mode := 'block_writes');
 ERROR:  shard xxxxx already exists in the target node
 -- verify we error out if table has foreign key constraints
 INSERT INTO ref_table SELECT 1, value FROM data;
@@ -70,16 +73,15 @@ SELECT master_copy_shard_placement(
            'localhost', :worker_2_port,
            'localhost', :worker_1_port,
            do_repair := false);
-ERROR:  cannot create foreign key constraint
-DETAIL:  This shard has foreign constraints on it. Citus currently supports foreign key constraints only for "citus.shard_replication_factor = 1".
-HINT:  Please change "citus.shard_replication_factor to 1". To learn more about using foreign keys with other replication factors, please contact us at https://citusdata.com/about/contact_us.
+ERROR:  cannot replicate shards with foreign keys
 ALTER TABLE data DROP CONSTRAINT distfk;
 -- replicate shard that contains key-1
 SELECT master_copy_shard_placement(
            get_shard_id_for_distribution_column('data', 'key-1'),
            'localhost', :worker_2_port,
            'localhost', :worker_1_port,
-           do_repair := false);
+           do_repair := false,
+           transfer_mode := 'block_writes');
  master_copy_shard_placement
 ---------------------------------------------------------------------
 
@@ -123,7 +125,8 @@ SELECT master_copy_shard_placement(
            get_shard_id_for_distribution_column('mx_table', '1'),
            'localhost', :worker_1_port,
            'localhost', :worker_2_port,
-           do_repair := false);
+           do_repair := false,
+           transfer_mode := 'block_writes');
 ERROR:  Table 'mx_table' is streaming replicated. Shards of streaming replicated tables cannot be copied
 SELECT stop_metadata_sync_to_node('localhost', :worker_1_port);
  stop_metadata_sync_to_node
diff --git a/src/test/regress/expected/multi_colocated_shard_rebalance.out b/src/test/regress/expected/multi_colocated_shard_rebalance.out
new file mode 100644
index 000000000..70c4d8f20
--- /dev/null
+++ b/src/test/regress/expected/multi_colocated_shard_rebalance.out
@@ -0,0 +1,639 @@
+--
+-- MULTI_COLOCATED_SHARD_REBALANCE
+--
+ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 13000000;
+SET citus.shard_count TO 6;
+SET citus.shard_replication_factor TO 1;
+-- create distributed tables
+CREATE TABLE table1_group1 ( id int PRIMARY KEY);
+SELECT create_distributed_table('table1_group1', 'id', 'hash');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+CREATE TABLE table2_group1 ( id int );
+SELECT create_distributed_table('table2_group1', 'id', 'hash');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SET citus.shard_count TO 8;
+CREATE TABLE table5_groupX ( id int );
+SELECT create_distributed_table('table5_groupX', 'id', 'hash');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+CREATE TABLE table6_append ( id int );
+SELECT master_create_distributed_table('table6_append', 'id', 'append');
+ master_create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT master_create_empty_shard('table6_append');
+ master_create_empty_shard
+---------------------------------------------------------------------
+                  13000020
+(1 row)
+
+SELECT master_create_empty_shard('table6_append');
+ master_create_empty_shard
+---------------------------------------------------------------------
+                  13000021
+(1 row)
+
+-- Mark tables as non-mx tables, in order to be able to test master_copy_shard_placement
+UPDATE pg_dist_partition SET repmodel='c' WHERE logicalrelid IN
+	('table1_group1'::regclass, 'table2_group1'::regclass, 'table5_groupX'::regclass);
+-- test copy
+-- test copying colocated shards
+-- status before shard copy
+SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport
+FROM
+    pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
+WHERE
+    p.logicalrelid = s.logicalrelid AND
+    s.shardid = sp.shardid AND
+    colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass)
+ORDER BY s.shardid, sp.nodeport;
+ shardid  | logicalrelid  | nodeport
+---------------------------------------------------------------------
+ 13000000 | table1_group1 |    57637
+ 13000001 | table1_group1 |    57638
+ 13000002 | table1_group1 |    57637
+ 13000003 | table1_group1 |    57638
+ 13000004 | table1_group1 |    57637
+ 13000005 | table1_group1 |    57638
+ 13000006 | table2_group1 |    57637
+ 13000007 | table2_group1 |    57638
+ 13000008 | table2_group1 |    57637
+ 13000009 | table2_group1 |    57638
+ 13000010 | table2_group1 |    57637
+ 13000011 | table2_group1 |    57638
+(12 rows)
+
+-- copy colocated shards
+SELECT master_copy_shard_placement(13000000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, false);
+ master_copy_shard_placement
+---------------------------------------------------------------------
+
+(1 row)
+
+-- status after shard copy
+SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport
+FROM
+    pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
+WHERE
+    p.logicalrelid = s.logicalrelid AND
+    s.shardid = sp.shardid AND
+    colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass)
+ORDER BY s.shardid, sp.nodeport;
+ shardid  | logicalrelid  | nodeport
+---------------------------------------------------------------------
+ 13000000 | table1_group1 |    57637
+ 13000000 | table1_group1 |    57638
+ 13000001 | table1_group1 |    57638
+ 13000002 | table1_group1 |    57637
+ 13000003 | table1_group1 |    57638
+ 13000004 | table1_group1 |    57637
+ 13000005 | table1_group1 |    57638
+ 13000006 | table2_group1 |    57637
+ 13000006 | table2_group1 |    57638
+ 13000007 | table2_group1 |    57638
+ 13000008 | table2_group1 |    57637
+ 13000009 | table2_group1 |    57638
+ 13000010 | table2_group1 |    57637
+ 13000011 | table2_group1 |    57638
+(14 rows)
+
+-- also connect worker to verify we successfully copied given shard (and other colocated shards)
+\c - - - :worker_2_port
+SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.table1_group1_13000000'::regclass;
+ Column |  Type   | Modifiers
+---------------------------------------------------------------------
+ id     | integer | not null
+(1 row)
+
+SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.table2_group1_13000006'::regclass;
+ Column |  Type   | Modifiers
+---------------------------------------------------------------------
+ id     | integer |
+(1 row)
+
+\c - - - :master_port
+-- copy colocated shards again to see error message
+SELECT master_copy_shard_placement(13000000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, false, 'force_logical');
+ERROR:  the force_logical transfer mode is currently unsupported
+-- test copying NOT colocated shard
+-- status before shard copy
+SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport
+FROM
+    pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
+WHERE
+    p.logicalrelid = s.logicalrelid AND
+    s.shardid = sp.shardid AND
+    p.logicalrelid = 'table5_groupX'::regclass
+ORDER BY s.shardid, sp.nodeport;
+ shardid  | logicalrelid  | nodeport
+---------------------------------------------------------------------
+ 13000012 | table5_groupx |    57637
+ 13000013 | table5_groupx |    57638
+ 13000014 | table5_groupx |    57637
+ 13000015 | table5_groupx |    57638
+ 13000016 | table5_groupx |    57637
+ 13000017 | table5_groupx |    57638
+ 13000018 | table5_groupx |    57637
+ 13000019 | table5_groupx |    57638
+(8 rows)
+
+-- copy NOT colocated shard
+SELECT master_copy_shard_placement(13000012, 'localhost', :worker_1_port, 'localhost', :worker_2_port, false);
+ master_copy_shard_placement
+---------------------------------------------------------------------
+
+(1 row)
+
+-- status after shard copy
+SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport
+FROM
+    pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
+WHERE
+    p.logicalrelid = s.logicalrelid AND
+    s.shardid = sp.shardid AND
+    p.logicalrelid = 'table5_groupX'::regclass
+ORDER BY s.shardid, sp.nodeport;
+ shardid  | logicalrelid  | nodeport
+---------------------------------------------------------------------
+ 13000012 | table5_groupx |    57637
+ 13000012 | table5_groupx |    57638
+ 13000013 | table5_groupx |    57638
+ 13000014 | table5_groupx |    57637
+ 13000015 | table5_groupx |    57638
+ 13000016 | table5_groupx |    57637
+ 13000017 | table5_groupx |    57638
+ 13000018 | table5_groupx |    57637
+ 13000019 | table5_groupx |    57638
+(9 rows)
+
+-- test copying shard in append distributed table
+-- status before shard copy
+SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport
+FROM
+    pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
+WHERE
+    p.logicalrelid = s.logicalrelid AND
+    s.shardid = sp.shardid AND
+    p.logicalrelid = 'table6_append'::regclass
+ORDER BY s.shardid, sp.nodeport;
+ shardid  | logicalrelid  | nodeport
+---------------------------------------------------------------------
+ 13000020 | table6_append |    57638
+ 13000021 | table6_append |    57637
+(2 rows)
+
+-- copy shard in append distributed table
+SELECT master_copy_shard_placement(13000020, 'localhost', :worker_2_port, 'localhost', :worker_1_port, false, 'force_logical');
+ERROR:  the force_logical transfer mode is currently unsupported
+-- status after shard copy
+SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport
+FROM
+    pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
+WHERE
+    p.logicalrelid = s.logicalrelid AND
+    s.shardid = sp.shardid AND
+    p.logicalrelid = 'table6_append'::regclass
+ORDER BY s.shardid, sp.nodeport;
+ shardid  | logicalrelid  | nodeport
+---------------------------------------------------------------------
+ 13000020 | table6_append |    57638
+ 13000021 | table6_append |    57637
+(2 rows)
+
+-- test move
+-- test moving colocated shards
+-- status before shard move
+SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport
+FROM
+    pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
+WHERE
+    p.logicalrelid = s.logicalrelid AND
+    s.shardid = sp.shardid AND
+    colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass)
+ORDER BY s.shardid, sp.nodeport;
+ shardid  | logicalrelid  | nodeport
+---------------------------------------------------------------------
+ 13000000 | table1_group1 |    57637
+ 13000000 | table1_group1 |    57638
+ 13000001 | table1_group1 |    57638
+ 13000002 | table1_group1 |    57637
+ 13000003 | table1_group1 |    57638
+ 13000004 | table1_group1 |    57637
+ 13000005 | table1_group1 |    57638
+ 13000006 | table2_group1 |    57637
+ 13000006 | table2_group1 |    57638
+ 13000007 | table2_group1 |    57638
+ 13000008 | table2_group1 |    57637
+ 13000009 | table2_group1 |    57638
+ 13000010 | table2_group1 |    57637
+ 13000011 | table2_group1 |    57638
+(14 rows)
+
+-- try force_logical
+SELECT master_move_shard_placement(13000001, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'force_logical');
+ERROR:  the force_logical transfer mode is currently unsupported
+-- move colocated shards
+SELECT master_move_shard_placement(13000001, 'localhost', :worker_2_port, 'localhost', :worker_1_port);
+ master_move_shard_placement
+---------------------------------------------------------------------
+
+(1 row)
+
+-- status after shard move
+SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport
+FROM
+    pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
+WHERE
+    p.logicalrelid = s.logicalrelid AND
+    s.shardid = sp.shardid AND
+    colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass)
+ORDER BY s.shardid, sp.nodeport;
+ shardid  | logicalrelid  | nodeport
+---------------------------------------------------------------------
+ 13000000 | table1_group1 |    57637
+ 13000000 | table1_group1 |    57638
+ 13000001 | table1_group1 |    57637
+ 13000002 | table1_group1 |    57637
+ 13000003 | table1_group1 |    57638
+ 13000004 | table1_group1 |    57637
+ 13000005 | table1_group1 |    57638
+ 13000006 | table2_group1 |    57637
+ 13000006 | table2_group1 |    57638
+ 13000007 | table2_group1 |    57637
+ 13000008 | table2_group1 |    57637
+ 13000009 | table2_group1 |    57638
+ 13000010 | table2_group1 |    57637
+ 13000011 | table2_group1 |    57638
+(14 rows)
+
+-- also connect worker to verify we successfully moved given shard (and other colocated shards)
+\c - - - :worker_1_port
+SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.table1_group1_13000001'::regclass;
+ Column |  Type   | Modifiers
+---------------------------------------------------------------------
+ id     | integer | not null
+(1 row)
+
+SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.table2_group1_13000007'::regclass;
+ Column |  Type   | Modifiers
+---------------------------------------------------------------------
+ id     | integer |
+(1 row)
+
+\c - - - :master_port
+-- test moving NOT colocated shard
+-- status before shard move
+SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport
+FROM
+    pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
+WHERE
+    p.logicalrelid = s.logicalrelid AND
+    s.shardid = sp.shardid AND
+    p.logicalrelid = 'table5_groupX'::regclass
+ORDER BY s.shardid, sp.nodeport;
+ shardid  | logicalrelid  | nodeport
+---------------------------------------------------------------------
+ 13000012 | table5_groupx |    57637
+ 13000012 | table5_groupx |    57638
+ 13000013 | table5_groupx |    57638
+ 13000014 | table5_groupx |    57637
+ 13000015 | table5_groupx |    57638
+ 13000016 | table5_groupx |    57637
+ 13000017 | table5_groupx |    57638
+ 13000018 | table5_groupx |    57637
+ 13000019 | table5_groupx |    57638
+(9 rows)
+
+-- move NOT colocated shard
+SELECT master_move_shard_placement(13000013, 'localhost', :worker_2_port, 'localhost', :worker_1_port);
+ master_move_shard_placement
+---------------------------------------------------------------------
+
+(1 row)
+
+-- status after shard move
+SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport
+FROM
+    pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
+WHERE
+    p.logicalrelid = s.logicalrelid AND
+    s.shardid = sp.shardid AND
+    p.logicalrelid = 'table5_groupX'::regclass
+ORDER BY s.shardid, sp.nodeport;
+ shardid  | logicalrelid  | nodeport
+---------------------------------------------------------------------
+ 13000012 | table5_groupx |    57637
+ 13000012 | table5_groupx |    57638
+ 13000013 | table5_groupx |    57637
+ 13000014 | table5_groupx |    57637
+ 13000015 | table5_groupx |    57638
+ 13000016 | table5_groupx |    57637
+ 13000017 | table5_groupx |    57638
+ 13000018 | table5_groupx |    57637
+ 13000019 | table5_groupx |    57638
+(9 rows)
+
+-- test moving shard in append distributed table
+-- status before shard move
+SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport
+FROM
+    pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
+WHERE
+    p.logicalrelid = s.logicalrelid AND
+    s.shardid = sp.shardid AND
+    p.logicalrelid = 'table6_append'::regclass
+ORDER BY s.shardid, sp.nodeport;
+ shardid  | logicalrelid  | nodeport
+---------------------------------------------------------------------
+ 13000020 | table6_append |    57638
+ 13000021 | table6_append |    57637
+(2 rows)
+
+-- move shard in append distributed table
+SELECT master_move_shard_placement(13000021, 'localhost', :worker_1_port, 'localhost', :worker_2_port);
+ master_move_shard_placement
+---------------------------------------------------------------------
+
+(1 row)
+
+-- status after shard move
+SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport
+FROM
+    pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
+WHERE
+    p.logicalrelid = s.logicalrelid AND
+    s.shardid = sp.shardid AND
+    p.logicalrelid = 'table6_append'::regclass
+ORDER BY s.shardid, sp.nodeport;
+ shardid  | logicalrelid  | nodeport
+---------------------------------------------------------------------
+ 13000020 | table6_append |    57638
+ 13000021 | table6_append |    57638
+(2 rows)
+
+-- try to move shard from wrong node
+SELECT master_move_shard_placement(13000021, 'localhost', :worker_1_port, 'localhost', :worker_2_port);
+ERROR:  could not find placement matching "localhost:xxxxx"
+HINT:  Confirm the placement still exists and try again.
+-- test shard move with foreign constraints
+DROP TABLE IF EXISTS table1_group1, table2_group1;
+SET citus.shard_count TO 6;
+SET citus.shard_replication_factor TO 1;
+-- create distributed tables
+CREATE TABLE table1_group1 ( id int PRIMARY KEY);
+SELECT create_distributed_table('table1_group1', 'id', 'hash');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+CREATE TABLE table2_group1 ( id int, table1_id int, FOREIGN KEY(table1_id) REFERENCES table1_group1(id));
+SELECT create_distributed_table('table2_group1', 'table1_id', 'hash');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+-- Mark the tables as non-mx tables
+UPDATE pg_dist_partition SET repmodel='c' WHERE logicalrelid IN
+	('table1_group1'::regclass, 'table2_group1'::regclass);
+-- status before shard rebalance
+SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport
+FROM
+    pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
+WHERE
+    p.logicalrelid = s.logicalrelid AND
+    s.shardid = sp.shardid AND
+	colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass)
+ORDER BY s.shardid, sp.nodeport;
+ shardid  | logicalrelid  | nodeport
+---------------------------------------------------------------------
+ 13000022 | table1_group1 |    57637
+ 13000023 | table1_group1 |    57638
+ 13000024 | table1_group1 |    57637
+ 13000025 | table1_group1 |    57638
+ 13000026 | table1_group1 |    57637
+ 13000027 | table1_group1 |    57638
+ 13000028 | table2_group1 |    57637
+ 13000029 | table2_group1 |    57638
+ 13000030 | table2_group1 |    57637
+ 13000031 | table2_group1 |    57638
+ 13000032 | table2_group1 |    57637
+ 13000033 | table2_group1 |    57638
+(12 rows)
+
+SELECT master_move_shard_placement(13000022, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'block_writes');
+ master_move_shard_placement
+---------------------------------------------------------------------
+
+(1 row)
+
+-- status after shard rebalance
+SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport
+FROM
+    pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
+WHERE
+    p.logicalrelid = s.logicalrelid AND
+    s.shardid = sp.shardid AND
+	colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass)
+ORDER BY s.shardid, sp.nodeport;
+ shardid  | logicalrelid  | nodeport
+---------------------------------------------------------------------
+ 13000022 | table1_group1 |    57638
+ 13000023 | table1_group1 |    57638
+ 13000024 | table1_group1 |    57637
+ 13000025 | table1_group1 |    57638
+ 13000026 | table1_group1 |    57637
+ 13000027 | table1_group1 |    57638
+ 13000028 | table2_group1 |    57638
+ 13000029 | table2_group1 |    57638
+ 13000030 | table2_group1 |    57637
+ 13000031 | table2_group1 |    57638
+ 13000032 | table2_group1 |    57637
+ 13000033 | table2_group1 |    57638
+(12 rows)
+
+-- also connect worker to verify we successfully moved given shard (and other colocated shards)
+\c - - - :worker_2_port
+SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.table1_group1_13000022'::regclass;
+ Column |  Type   | Modifiers
+---------------------------------------------------------------------
+ id     | integer | not null
+(1 row)
+
+SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.table2_group1_13000028'::regclass;
+  Column   |  Type   | Modifiers
+---------------------------------------------------------------------
+ id        | integer |
+ table1_id | integer |
+(2 rows)
+
+-- make sure that we've created the foreign keys
+SELECT  "Constraint", "Definition" FROM table_fkeys
+  WHERE "Constraint" LIKE 'table2_group%' OR "Constraint" LIKE 'table1_group%';
+              Constraint               |                          Definition
+---------------------------------------------------------------------
+ table2_group1_table1_id_fkey_13000028 | FOREIGN KEY (table1_id) REFERENCES table1_group1_13000022(id)
+ table2_group1_table1_id_fkey_13000029 | FOREIGN KEY (table1_id) REFERENCES table1_group1_13000023(id)
+ table2_group1_table1_id_fkey_13000031 | FOREIGN KEY (table1_id) REFERENCES table1_group1_13000025(id)
+ table2_group1_table1_id_fkey_13000033 | FOREIGN KEY (table1_id) REFERENCES table1_group1_13000027(id)
+(4 rows)
+
+\c - - - :master_port
+-- test shard copy with foreign constraints
+-- we expect it to error out because we do not support foreign constraints with replication factor > 1
+SELECT master_copy_shard_placement(13000022, 'localhost', :worker_2_port, 'localhost', :worker_1_port, false);
+ERROR:  cannot replicate shards with foreign keys
+-- lets also test that master_move_shard_placement doesn't break serials
+CREATE TABLE serial_move_test (key int, other_val serial);
+SET citus.shard_replication_factor TO 1;
+SELECT create_distributed_table('serial_move_test', 'key');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+-- key 15 goes to shard xxxxx
+INSERT INTO serial_move_test (key) VALUES (15) RETURNING *;
+ key | other_val
+---------------------------------------------------------------------
+  15 |         1
+(1 row)
+
+INSERT INTO serial_move_test (key) VALUES (15) RETURNING *;
+ key | other_val
+---------------------------------------------------------------------
+  15 |         2
+(1 row)
+
+-- confirm the shard id
+SELECT * FROM run_command_on_placements('serial_move_test', 'SELECT DISTINCT key FROM %s WHERE key = 15') WHERE result = '15' AND shardid = 13000034;
+ nodename  | nodeport | shardid  | success | result
+---------------------------------------------------------------------
+ localhost |    57637 | 13000034 | t       | 15
+(1 row)
+
+SELECT master_move_shard_placement(13000034, 'localhost', :worker_1_port, 'localhost', :worker_2_port);
+ master_move_shard_placement
+---------------------------------------------------------------------
+
+(1 row)
+
+-- confirm the successfull move
+SELECT * FROM run_command_on_placements('serial_move_test', 'SELECT DISTINCT key FROM %s WHERE key = 15') WHERE result = '15' AND shardid = 13000034;
+ nodename  | nodeport | shardid  | success | result
+---------------------------------------------------------------------
+ localhost |    57638 | 13000034 | t       | 15
+(1 row)
+
+-- finally show that serials work fine afterwards
+INSERT INTO serial_move_test (key) VALUES (15) RETURNING *;
+ key | other_val
+---------------------------------------------------------------------
+  15 |         3
+(1 row)
+
+INSERT INTO serial_move_test (key) VALUES (15) RETURNING *;
+ key | other_val
+---------------------------------------------------------------------
+  15 |         4
+(1 row)
+
+-- we should be able to move shard placements of partitioend tables
+CREATE SCHEMA move_partitions;
+CREATE TABLE move_partitions.events (
+	id serial,
+	t timestamptz default now(),
+	payload text
+)
+PARTITION BY RANGE(t);
+SET citus.shard_count TO 6;
+SELECT create_distributed_table('move_partitions.events', 'id', colocate_with := 'none');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+CREATE TABLE move_partitions.events_1 PARTITION OF move_partitions.events
+FOR VALUES FROM ('2015-01-01') TO ('2016-01-01');
+INSERT INTO move_partitions.events (t, payload)
+SELECT '2015-01-01'::date + (interval '1 day' * s), s FROM generate_series(1, 100) s;
+SELECT count(*) FROM move_partitions.events;
+ count
+---------------------------------------------------------------------
+   100
+(1 row)
+
+-- try to move automatically
+SELECT master_move_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port)
+FROM pg_dist_shard JOIN pg_dist_shard_placement USING (shardid)
+WHERE logicalrelid = 'move_partitions.events'::regclass AND nodeport = :worker_2_port
+ORDER BY shardid LIMIT 1;
+ master_move_shard_placement
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT count(*) FROM move_partitions.events;
+ count
+---------------------------------------------------------------------
+   100
+(1 row)
+
+-- add a primary key to the partition
+ALTER TABLE move_partitions.events_1 ADD CONSTRAINT e_1_pk PRIMARY KEY (id);
+-- should be able to move automatically now
+SELECT master_move_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port)
+FROM pg_dist_shard JOIN pg_dist_shard_placement USING (shardid)
+WHERE logicalrelid = 'move_partitions.events'::regclass AND nodeport = :worker_2_port
+ORDER BY shardid LIMIT 1;
+ master_move_shard_placement
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT count(*) FROM move_partitions.events;
+ count
+---------------------------------------------------------------------
+   100
+(1 row)
+
+-- should also be able to move with block writes
+SELECT master_move_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'block_writes')
+FROM pg_dist_shard JOIN pg_dist_shard_placement USING (shardid)
+WHERE logicalrelid = 'move_partitions.events'::regclass AND nodeport = :worker_2_port
+ORDER BY shardid LIMIT 1;
+ master_move_shard_placement
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT count(*) FROM move_partitions.events;
+ count
+---------------------------------------------------------------------
+   100
+(1 row)
+
+-- should have moved all shards to node 1 (2*6 = 12)
+SELECT count(*)
+FROM pg_dist_shard JOIN pg_dist_shard_placement USING (shardid)
+WHERE logicalrelid::text LIKE 'move_partitions.events%' AND nodeport = :worker_1_port;
+ count
+---------------------------------------------------------------------
+    12
+(1 row)
+
+DROP TABLE move_partitions.events;
diff --git a/src/test/regress/expected/multi_move_mx.out b/src/test/regress/expected/multi_move_mx.out
new file mode 100644
index 000000000..3d67c97de
--- /dev/null
+++ b/src/test/regress/expected/multi_move_mx.out
@@ -0,0 +1,235 @@
+--
+-- MULTI_MOVE_MX
+--
+ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1550000;
+SELECT start_metadata_sync_to_node('localhost', :worker_2_port);
+ start_metadata_sync_to_node
+---------------------------------------------------------------------
+
+(1 row)
+
+-- Create mx test tables
+SET citus.shard_count TO 4;
+SET citus.shard_replication_factor TO 1;
+SET citus.replication_model TO 'streaming';
+CREATE TABLE mx_table_1 (a int);
+SELECT create_distributed_table('mx_table_1', 'a');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+CREATE TABLE mx_table_2 (a int);
+SELECT create_distributed_table('mx_table_2', 'a');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+CREATE TABLE mx_table_3 (a text);
+SELECT create_distributed_table('mx_table_3', 'a');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+-- Check that the first two tables are colocated
+SELECT
+	logicalrelid, repmodel
+FROM
+	pg_dist_partition
+WHERE
+	logicalrelid = 'mx_table_1'::regclass
+	OR logicalrelid = 'mx_table_2'::regclass
+	OR logicalrelid = 'mx_table_3'::regclass
+ORDER BY
+	logicalrelid;
+ logicalrelid | repmodel
+---------------------------------------------------------------------
+ mx_table_1   | s
+ mx_table_2   | s
+ mx_table_3   | s
+(3 rows)
+
+-- Check the list of shards
+SELECT
+	logicalrelid, shardid, nodename, nodeport
+FROM
+	pg_dist_shard NATURAL JOIN pg_dist_shard_placement
+WHERE
+	logicalrelid = 'mx_table_1'::regclass
+	OR logicalrelid = 'mx_table_2'::regclass
+	OR logicalrelid = 'mx_table_3'::regclass
+ORDER BY
+	logicalrelid, shardid;
+ logicalrelid | shardid | nodename  | nodeport
+---------------------------------------------------------------------
+ mx_table_1   | 1550000 | localhost |    57637
+ mx_table_1   | 1550001 | localhost |    57638
+ mx_table_1   | 1550002 | localhost |    57637
+ mx_table_1   | 1550003 | localhost |    57638
+ mx_table_2   | 1550004 | localhost |    57637
+ mx_table_2   | 1550005 | localhost |    57638
+ mx_table_2   | 1550006 | localhost |    57637
+ mx_table_2   | 1550007 | localhost |    57638
+ mx_table_3   | 1550008 | localhost |    57637
+ mx_table_3   | 1550009 | localhost |    57638
+ mx_table_3   | 1550010 | localhost |    57637
+ mx_table_3   | 1550011 | localhost |    57638
+(12 rows)
+
+-- Check the data on the worker
+\c - - - :worker_2_port
+SELECT
+	logicalrelid, shardid, nodename, nodeport
+FROM
+	pg_dist_shard NATURAL JOIN pg_dist_shard_placement
+WHERE
+	logicalrelid = 'mx_table_1'::regclass
+	OR logicalrelid = 'mx_table_2'::regclass
+	OR logicalrelid = 'mx_table_3'::regclass
+ORDER BY
+	logicalrelid, shardid;
+ logicalrelid | shardid | nodename  | nodeport
+---------------------------------------------------------------------
+ mx_table_1   | 1550000 | localhost |    57637
+ mx_table_1   | 1550001 | localhost |    57638
+ mx_table_1   | 1550002 | localhost |    57637
+ mx_table_1   | 1550003 | localhost |    57638
+ mx_table_2   | 1550004 | localhost |    57637
+ mx_table_2   | 1550005 | localhost |    57638
+ mx_table_2   | 1550006 | localhost |    57637
+ mx_table_2   | 1550007 | localhost |    57638
+ mx_table_3   | 1550008 | localhost |    57637
+ mx_table_3   | 1550009 | localhost |    57638
+ mx_table_3   | 1550010 | localhost |    57637
+ mx_table_3   | 1550011 | localhost |    57638
+(12 rows)
+
+\c - - - :master_port
+-- Check that master_copy_shard_placement cannot be run with MX tables
+SELECT
+	master_copy_shard_placement(shardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port, false, 'force_logical')
+FROM
+	pg_dist_shard NATURAL JOIN pg_dist_shard_placement
+WHERE
+	logicalrelid = 'mx_table_1'::regclass
+	AND nodeport = :worker_1_port
+ORDER BY
+	shardid
+LIMIT 1;
+ERROR:  the force_logical transfer mode is currently unsupported
+-- Move a shard from worker 1 to worker 2
+SELECT
+	master_move_shard_placement(shardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port)
+FROM
+	pg_dist_shard NATURAL JOIN pg_dist_shard_placement
+WHERE
+	logicalrelid = 'mx_table_1'::regclass
+	AND nodeport = :worker_1_port
+ORDER BY
+	shardid
+LIMIT 1;
+ master_move_shard_placement
+---------------------------------------------------------------------
+
+(1 row)
+
+-- Check that the shard and its colocated shard is moved, but not the other shards
+SELECT
+	logicalrelid, shardid, nodename, nodeport
+FROM
+	pg_dist_shard NATURAL JOIN pg_dist_shard_placement
+WHERE
+	logicalrelid = 'mx_table_1'::regclass
+	OR logicalrelid = 'mx_table_2'::regclass
+	OR logicalrelid = 'mx_table_3'::regclass
+ORDER BY
+	logicalrelid, shardid;
+ logicalrelid | shardid | nodename  | nodeport
+---------------------------------------------------------------------
+ mx_table_1   | 1550000 | localhost |    57638
+ mx_table_1   | 1550001 | localhost |    57638
+ mx_table_1   | 1550002 | localhost |    57637
+ mx_table_1   | 1550003 | localhost |    57638
+ mx_table_2   | 1550004 | localhost |    57638
+ mx_table_2   | 1550005 | localhost |    57638
+ mx_table_2   | 1550006 | localhost |    57637
+ mx_table_2   | 1550007 | localhost |    57638
+ mx_table_3   | 1550008 | localhost |    57637
+ mx_table_3   | 1550009 | localhost |    57638
+ mx_table_3   | 1550010 | localhost |    57637
+ mx_table_3   | 1550011 | localhost |    57638
+(12 rows)
+
+-- Check that the changes are made in the worker as well
+\c - - - :worker_2_port
+SELECT
+	logicalrelid, shardid, nodename, nodeport
+FROM
+	pg_dist_shard NATURAL JOIN pg_dist_shard_placement
+WHERE
+	logicalrelid = 'mx_table_1'::regclass
+	OR logicalrelid = 'mx_table_2'::regclass
+	OR logicalrelid = 'mx_table_3'::regclass
+ORDER BY
+	logicalrelid, shardid;
+ logicalrelid | shardid | nodename  | nodeport
+---------------------------------------------------------------------
+ mx_table_1   | 1550000 | localhost |    57638
+ mx_table_1   | 1550001 | localhost |    57638
+ mx_table_1   | 1550002 | localhost |    57637
+ mx_table_1   | 1550003 | localhost |    57638
+ mx_table_2   | 1550004 | localhost |    57638
+ mx_table_2   | 1550005 | localhost |    57638
+ mx_table_2   | 1550006 | localhost |    57637
+ mx_table_2   | 1550007 | localhost |    57638
+ mx_table_3   | 1550008 | localhost |    57637
+ mx_table_3   | 1550009 | localhost |    57638
+ mx_table_3   | 1550010 | localhost |    57637
+ mx_table_3   | 1550011 | localhost |    57638
+(12 rows)
+
+-- Check that the UDFs cannot be called from the workers
+SELECT
+	master_copy_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port, false, 'force_logical')
+FROM
+	pg_dist_shard NATURAL JOIN pg_dist_shard_placement
+WHERE
+	logicalrelid = 'mx_table_1'::regclass
+	AND nodeport = :worker_2_port
+ORDER BY
+	shardid
+LIMIT 1 OFFSET 1;
+ERROR:  operation is not allowed on this node
+HINT:  Connect to the coordinator and run it again.
+SELECT
+	master_move_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'force_logical')
+FROM
+	pg_dist_shard NATURAL JOIN pg_dist_shard_placement
+WHERE
+	logicalrelid = 'mx_table_1'::regclass
+	AND nodeport = :worker_2_port
+ORDER BY
+	shardid
+LIMIT 1 OFFSET 1;
+ERROR:  operation is not allowed on this node
+HINT:  Connect to the coordinator and run it again.
+-- Cleanup
+\c - - - :master_port
+DROP TABLE mx_table_1;
+DROP TABLE mx_table_2;
+DROP TABLE mx_table_3;
+SELECT stop_metadata_sync_to_node('localhost', :worker_2_port);
+ stop_metadata_sync_to_node
+---------------------------------------------------------------------
+
+(1 row)
+
+\c - - - :worker_2_port
+DELETE FROM pg_dist_node;
+DELETE FROM pg_dist_partition;
+DELETE FROM pg_dist_shard;
+DELETE FROM pg_dist_shard_placement;
+\c - - - :master_port
+RESET citus.replication_model;
diff --git a/src/test/regress/expected/multi_test_helpers_superuser.out b/src/test/regress/expected/multi_test_helpers_superuser.out
index b631814f8..eca214309 100644
--- a/src/test/regress/expected/multi_test_helpers_superuser.out
+++ b/src/test/regress/expected/multi_test_helpers_superuser.out
@@ -1,3 +1,9 @@
+CREATE OR REPLACE FUNCTION master_defer_delete_shards()
+    RETURNS int
+    LANGUAGE C STRICT
+    AS 'citus', $$master_defer_delete_shards$$;
+COMMENT ON FUNCTION master_defer_delete_shards()
+    IS 'remove orphaned shards';
 CREATE OR REPLACE FUNCTION wait_until_metadata_sync(timeout INTEGER DEFAULT 15000)
     RETURNS void
     LANGUAGE C STRICT
diff --git a/src/test/regress/expected/multi_utility_warnings.out b/src/test/regress/expected/multi_utility_warnings.out
index 6a417ef96..76d5a8325 100644
--- a/src/test/regress/expected/multi_utility_warnings.out
+++ b/src/test/regress/expected/multi_utility_warnings.out
@@ -25,7 +25,3 @@ ERROR:  cannot write to pg_dist_poolinfo
 DETAIL:  Citus Community Edition does not support the use of pooler options.
 HINT:  To learn more about using advanced pooling schemes with Citus, please contact us at https://citusdata.com/about/contact_us
 ROLLBACK;
-INSERT INTO pg_dist_rebalance_strategy VALUES ('should fail', false, 'citus_shard_cost_1', 'citus_node_capacity_1', 'citus_shard_allowed_on_node_true', 0, 0);
-ERROR:  cannot write to pg_dist_rebalance_strategy
-DETAIL:  Citus Community Edition does not support the use of custom rebalance strategies.
-HINT:  To learn more about using advanced rebalancing schemes with Citus, please contact us at https://citusdata.com/about/contact_us
diff --git a/src/test/regress/expected/shard_move_deferred_delete.out b/src/test/regress/expected/shard_move_deferred_delete.out
new file mode 100644
index 000000000..ddb2be9d8
--- /dev/null
+++ b/src/test/regress/expected/shard_move_deferred_delete.out
@@ -0,0 +1,111 @@
+--
+-- SHARD_MOVE_DEFERRED_DELETE
+--
+SET citus.next_shard_id TO 20000000;
+SET citus.shard_count TO 6;
+SET citus.shard_replication_factor TO 1;
+SET citus.defer_drop_after_shard_move TO on;
+CREATE SCHEMA shard_move_deferred_delete;
+SET search_path TO shard_move_deferred_delete;
+CREATE TABLE t1 ( id int PRIMARY KEY);
+SELECT create_distributed_table('t1', 'id');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+-- by counting how ofter we see the specific shard on all workers we can verify is the shard is there
+SELECT run_command_on_workers($cmd$
+    SELECT count(*) FROM pg_class WHERE relname = 't1_20000000';
+$cmd$);
+ run_command_on_workers
+---------------------------------------------------------------------
+ (localhost,57637,t,1)
+ (localhost,57638,t,0)
+(2 rows)
+
+-- move shard
+SELECT master_move_shard_placement(20000000, 'localhost', :worker_1_port, 'localhost', :worker_2_port);
+ master_move_shard_placement
+---------------------------------------------------------------------
+
+(1 row)
+
+-- we expect the shard to be on both workers now
+SELECT run_command_on_workers($cmd$
+    SELECT count(*) FROM pg_class WHERE relname = 't1_20000000';
+$cmd$);
+ run_command_on_workers
+---------------------------------------------------------------------
+ (localhost,57637,t,1)
+ (localhost,57638,t,1)
+(2 rows)
+
+-- execute delayed removal
+SELECT public.master_defer_delete_shards();
+ master_defer_delete_shards
+---------------------------------------------------------------------
+                          1
+(1 row)
+
+-- we expect the shard to be on only the second worker
+SELECT run_command_on_workers($cmd$
+    SELECT count(*) FROM pg_class WHERE relname = 't1_20000000';
+$cmd$);
+ run_command_on_workers
+---------------------------------------------------------------------
+ (localhost,57637,t,0)
+ (localhost,57638,t,1)
+(2 rows)
+
+SELECT master_move_shard_placement(20000000, 'localhost', :worker_2_port, 'localhost', :worker_1_port);
+ master_move_shard_placement
+---------------------------------------------------------------------
+
+(1 row)
+
+-- we expect the shard to be on both workers now
+SELECT run_command_on_workers($cmd$
+    SELECT count(*) FROM pg_class WHERE relname = 't1_20000000';
+$cmd$);
+ run_command_on_workers
+---------------------------------------------------------------------
+ (localhost,57637,t,1)
+ (localhost,57638,t,1)
+(2 rows)
+
+-- enable auto delete
+ALTER SYSTEM SET citus.defer_shard_delete_interval TO 10;
+SELECT pg_reload_conf();
+ pg_reload_conf
+---------------------------------------------------------------------
+ t
+(1 row)
+
+-- Sleep 1 second to give Valgrind enough time to clear transactions
+SELECT pg_sleep(1);
+ pg_sleep
+---------------------------------------------------------------------
+
+(1 row)
+
+-- we expect the shard to be on only the first worker
+SELECT run_command_on_workers($cmd$
+    SELECT count(*) FROM pg_class WHERE relname = 't1_20000000';
+$cmd$);
+ run_command_on_workers
+---------------------------------------------------------------------
+ (localhost,57637,t,1)
+ (localhost,57638,t,0)
+(2 rows)
+
+-- reset test suite
+ALTER SYSTEM SET citus.defer_shard_delete_interval TO -1;
+SELECT pg_reload_conf();
+ pg_reload_conf
+---------------------------------------------------------------------
+ t
+(1 row)
+
+DROP SCHEMA shard_move_deferred_delete CASCADE;
+NOTICE:  drop cascades to table t1
diff --git a/src/test/regress/expected/shard_rebalancer.out b/src/test/regress/expected/shard_rebalancer.out
new file mode 100644
index 000000000..b86713993
--- /dev/null
+++ b/src/test/regress/expected/shard_rebalancer.out
@@ -0,0 +1,2116 @@
+--
+-- MUTLI_SHARD_REBALANCER
+--
+CREATE TABLE dist_table_test(a int primary key);
+SELECT create_distributed_table('dist_table_test', 'a');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+CREATE TABLE ref_table_test(a int primary key);
+SELECT create_reference_table('ref_table_test');
+ create_reference_table
+---------------------------------------------------------------------
+
+(1 row)
+
+-- make sure that all rebalance operations works fine when
+-- reference tables are replicated to the coordinator
+SELECT 1 FROM master_add_node('localhost', :master_port, groupId=>0);
+NOTICE:  Replicating reference table "ref_table_test" to the node localhost:xxxxx
+ ?column?
+---------------------------------------------------------------------
+        1
+(1 row)
+
+-- should just be noops even if we add the coordinator to the pg_dist_node
+SELECT rebalance_table_shards('dist_table_test');
+ rebalance_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT rebalance_table_shards();
+ rebalance_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+-- test that calling rebalance_table_shards without specifying relation
+-- wouldn't move shard of the citus local table.
+CREATE TABLE citus_local_table(a int, b int);
+SELECT create_citus_local_table('citus_local_table');
+ create_citus_local_table
+---------------------------------------------------------------------
+
+(1 row)
+
+INSERT INTO citus_local_table VALUES (1, 2);
+SELECT rebalance_table_shards();
+ rebalance_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+-- show that citus local table shard is still on the coordinator
+SELECT tablename FROM pg_catalog.pg_tables where tablename like 'citus_local_table_%';
+        tablename
+---------------------------------------------------------------------
+ citus_local_table_102047
+(1 row)
+
+-- also check that we still can access shard relation, not the shell table
+SELECT count(*) FROM citus_local_table;
+ count
+---------------------------------------------------------------------
+     1
+(1 row)
+
+SELECT master_drain_node('localhost', :master_port);
+ master_drain_node
+---------------------------------------------------------------------
+
+(1 row)
+
+-- show that citus local table shard is still on the coordinator
+SELECT tablename FROM pg_catalog.pg_tables where tablename like 'citus_local_table_%';
+        tablename
+---------------------------------------------------------------------
+ citus_local_table_102047
+(1 row)
+
+-- also check that we still can access shard relation, not the shell table
+SELECT count(*) FROM citus_local_table;
+ count
+---------------------------------------------------------------------
+     1
+(1 row)
+
+-- show that we do not create a shard rebalancing plan for citus local table
+SELECT get_rebalance_table_shards_plan();
+ get_rebalance_table_shards_plan
+---------------------------------------------------------------------
+(0 rows)
+
+DROP TABLE citus_local_table;
+CREATE TABLE dist_table_test_2(a int);
+SET citus.shard_count TO 4;
+SET citus.shard_replication_factor TO 1;
+SET citus.replication_model TO "statement";
+SELECT create_distributed_table('dist_table_test_2', 'a');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+-- replicate reference table should ignore the coordinator
+SET citus.shard_replication_factor TO 2;
+SELECT replicate_table_shards('dist_table_test_2',  max_shard_copies := 4,  shard_transfer_mode:='block_writes');
+NOTICE:  Copying shard xxxxx from localhost:xxxxx to localhost:xxxxx ...
+NOTICE:  Copying shard xxxxx from localhost:xxxxx to localhost:xxxxx ...
+NOTICE:  Copying shard xxxxx from localhost:xxxxx to localhost:xxxxx ...
+NOTICE:  Copying shard xxxxx from localhost:xxxxx to localhost:xxxxx ...
+ replicate_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+DROP TABLE dist_table_test, dist_table_test_2, ref_table_test;
+RESET citus.shard_count;
+RESET citus.shard_replication_factor;
+RESET citus.replication_model;
+-- Create a user to test multiuser usage of rebalancer functions
+CREATE USER testrole;
+NOTICE:  not propagating CREATE ROLE/USER commands to worker nodes
+HINT:  Connect to worker nodes directly to manually create all necessary users and roles.
+GRANT ALL ON SCHEMA public TO testrole;
+ERROR:  role "testrole" does not exist
+CONTEXT:  while executing command on localhost:xxxxx
+CREATE OR REPLACE FUNCTION shard_placement_rebalance_array(
+    worker_node_list json[],
+    shard_placement_list json[],
+    threshold float4 DEFAULT 0,
+    max_shard_moves int DEFAULT 1000000,
+    drain_only bool DEFAULT false
+)
+RETURNS json[]
+AS 'citus'
+LANGUAGE C STRICT VOLATILE;
+CREATE FUNCTION shard_placement_replication_array(worker_node_list json[],
+                                                  shard_placement_list json[],
+                                                  shard_replication_factor int)
+RETURNS json[]
+AS 'citus'
+LANGUAGE C STRICT VOLATILE;
+CREATE FUNCTION worker_node_responsive(worker_node_name text, worker_node_port int)
+RETURNS boolean
+AS 'citus'
+LANGUAGE C STRICT VOLATILE;
+SET citus.next_shard_id TO 123000;
+SELECT worker_node_responsive(node_name, node_port::int)
+    FROM master_get_active_worker_nodes()
+    ORDER BY node_name, node_port ASC;
+ worker_node_responsive
+---------------------------------------------------------------------
+ t
+ t
+(2 rows)
+
+-- Check that worker_node_responsive returns false for dead nodes
+-- Note that PostgreSQL tries all possible resolutions of localhost on failing
+-- connections. This causes different error details to be printed on different
+-- environments. Therefore, we first set verbosity to terse.
+\set VERBOSITY terse
+SELECT worker_node_responsive('localhost', 1);
+ worker_node_responsive
+---------------------------------------------------------------------
+ f
+(1 row)
+
+\set VERBOSITY default
+-- Check that with threshold=0.0 shard_placement_rebalance_array returns enough
+-- moves to make the cluster completely balanced.
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":5, "shardid":5, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":6, "shardid":6, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}']::json[],
+    0.0
+));
+                                                       unnest
+---------------------------------------------------------------------
+ {"updatetype":1,"shardid":1,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname2","targetport":5432}
+ {"updatetype":1,"shardid":2,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname2","targetport":5432}
+(2 rows)
+
+-- Check that with two nodes and threshold=1.0 shard_placement_rebalance_array
+-- doesn't return any moves, even if it is completely unbalanced.
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}']::json[],
+    1.0
+));
+ unnest
+---------------------------------------------------------------------
+(0 rows)
+
+-- Check that with three nodes and threshold=1.0
+-- shard_placement_rebalance_array returns moves when it is completely unbalanced
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432}',
+          '{"node_name": "hostname3", "node_port": 5432}'
+        ]::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}']::json[],
+    1.0
+));
+                                                       unnest
+---------------------------------------------------------------------
+ {"updatetype":1,"shardid":1,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname2","targetport":5432}
+(1 row)
+
+-- Check that with with three nodes and threshold=2.0
+-- shard_placement_rebalance_array doesn't return any moves, even if it is
+-- completely unbalanced. (with three nodes)
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432}',
+          '{"node_name": "hostname3", "node_port": 5432}'
+        ]::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}']::json[],
+    2.0
+));
+ unnest
+---------------------------------------------------------------------
+(0 rows)
+
+-- Check that with threshold=0.0 shard_placement_rebalance_array doesn't return
+-- any moves if the cluster is already balanced.
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":5, "shardid":5, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":6, "shardid":6, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}']::json[],
+    0.0
+));
+ unnest
+---------------------------------------------------------------------
+(0 rows)
+
+-- Check that shard_placement_replication_array returns a shard copy operation
+-- for each of the shards in an inactive node.
+SELECT unnest(shard_placement_replication_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":3, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname3", "nodeport":5432}',
+          '{"placementid":4, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname3", "nodeport":5432}']::json[],
+    2
+));
+                                                       unnest
+---------------------------------------------------------------------
+ {"updatetype":2,"shardid":1,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname2","targetport":5432}
+ {"updatetype":2,"shardid":2,"sourcename":"hostname2","sourceport":5432,"targetname":"hostname1","targetport":5432}
+(2 rows)
+
+-- Check that shard_placement_replication_array returns a shard copy operation
+-- for each of the inactive shards.
+SELECT unnest(shard_placement_replication_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":3, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":1, "shardstate":3, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":4, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}']::json[],
+    2
+));
+                                                       unnest
+---------------------------------------------------------------------
+ {"updatetype":2,"shardid":1,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname2","targetport":5432}
+ {"updatetype":2,"shardid":2,"sourcename":"hostname2","sourceport":5432,"targetname":"hostname1","targetport":5432}
+(2 rows)
+
+-- Check that shard_placement_replication_array errors out if all placements of
+-- a shard are placed on inactive nodes.
+SELECT unnest(shard_placement_replication_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":2, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname3", "nodeport":5432}']::json[],
+    2
+));
+ERROR:  could not find a source for shard xxxxx
+-- Check that shard_placement_replication_array errors out if replication factor
+-- is more than number of active nodes.
+SELECT unnest(shard_placement_replication_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}']::json[],
+    2
+));
+ERROR:  could not find a target for shard xxxxx
+-- Ensure that shard_replication_factor is 2 during replicate_table_shards
+-- and rebalance_table_shards tests
+SET citus.shard_replication_factor TO 2;
+-- Turn off NOTICE messages
+SET client_min_messages TO WARNING;
+-- Create a single-row test data for shard rebalancer test shards
+CREATE TABLE shard_rebalancer_test_data AS SELECT 1::int as int_column;
+-- Test replicate_table_shards, which will in turn test update_shard_placement
+-- in copy mode.
+CREATE TABLE replication_test_table(int_column int);
+SELECT master_create_distributed_table('replication_test_table', 'int_column', 'append');
+ master_create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+CREATE VIEW replication_test_table_placements_per_node AS
+    SELECT count(*) FROM pg_dist_shard_placement NATURAL JOIN pg_dist_shard
+    WHERE logicalrelid = 'replication_test_table'::regclass
+    GROUP BY nodename, nodeport
+    ORDER BY nodename, nodeport;
+-- Create four shards with replication factor 2, and delete the placements
+-- with smaller port number to simulate under-replicated shards.
+SELECT count(master_create_empty_shard('replication_test_table'))
+    FROM generate_series(1, 4);
+ count
+---------------------------------------------------------------------
+     4
+(1 row)
+
+DELETE FROM pg_dist_shard_placement WHERE placementid in (
+    SELECT pg_dist_shard_placement.placementid
+    FROM pg_dist_shard_placement NATURAL JOIN pg_dist_shard
+    WHERE logicalrelid = 'replication_test_table'::regclass
+        AND (nodename, nodeport) = (SELECT nodename, nodeport FROM pg_dist_shard_placement
+                                    ORDER BY nodename, nodeport limit 1)
+);
+-- Upload the test data to the shards
+SELECT count(master_append_table_to_shard(shardid, 'shard_rebalancer_test_data',
+                                          host(inet_server_addr()), inet_server_port()))
+    FROM pg_dist_shard
+    WHERE logicalrelid = 'replication_test_table'::regclass;
+ count
+---------------------------------------------------------------------
+     4
+(1 row)
+
+-- Verify that there is one node with all placements
+SELECT * FROM replication_test_table_placements_per_node;
+ count
+---------------------------------------------------------------------
+     4
+(1 row)
+
+-- Check excluded_shard_list by excluding three shards with smaller ids
+SELECT replicate_table_shards('replication_test_table',
+                              excluded_shard_list := excluded_shard_list,
+                              shard_transfer_mode:='block_writes')
+    FROM (
+        SELECT (array_agg(DISTINCT shardid ORDER BY shardid))[1:3] AS excluded_shard_list
+        FROM pg_dist_shard
+        WHERE logicalrelid = 'replication_test_table'::regclass
+    ) T;
+ replicate_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM replication_test_table_placements_per_node;
+ count
+---------------------------------------------------------------------
+     1
+     4
+(2 rows)
+
+-- Check that with shard_replication_factor=1 we don't do any copies
+SELECT replicate_table_shards('replication_test_table',
+                              shard_replication_factor := 1,
+                              shard_transfer_mode:='block_writes');
+ replicate_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM replication_test_table_placements_per_node;
+ count
+---------------------------------------------------------------------
+     1
+     4
+(2 rows)
+
+-- Check that max_shard_copies limits number of copy operations
+SELECT replicate_table_shards('replication_test_table',
+                              max_shard_copies := 2,
+                              shard_transfer_mode:='block_writes');
+ replicate_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM replication_test_table_placements_per_node;
+ count
+---------------------------------------------------------------------
+     3
+     4
+(2 rows)
+
+-- Replicate the remaining under-replicated shards
+SELECT replicate_table_shards('replication_test_table');
+ replicate_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM replication_test_table_placements_per_node;
+ count
+---------------------------------------------------------------------
+     4
+     4
+(2 rows)
+
+-- Check that querying the table doesn't error out
+SELECT count(*) FROM replication_test_table;
+ count
+---------------------------------------------------------------------
+     4
+(1 row)
+
+DROP TABLE public.replication_test_table CASCADE;
+-- Test rebalance_table_shards, which will in turn test update_shard_placement
+-- in move mode.
+CREATE TABLE rebalance_test_table(int_column int);
+SELECT master_create_distributed_table('rebalance_test_table', 'int_column', 'append');
+ master_create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+CREATE VIEW table_placements_per_node AS
+SELECT nodeport, logicalrelid::regclass, count(*)
+FROM pg_dist_shard_placement NATURAL JOIN pg_dist_shard
+GROUP BY logicalrelid::regclass, nodename, nodeport
+ORDER BY logicalrelid::regclass, nodename, nodeport;
+-- Create six shards with replication factor 1 and move them to the same
+-- node to create an unbalanced cluster.
+CREATE PROCEDURE create_unbalanced_shards(rel text)
+LANGUAGE SQL
+AS $$
+    SET citus.shard_replication_factor TO 1;
+
+    SELECT count(master_create_empty_shard(rel))
+    FROM generate_series(1, 6);
+
+    SELECT count(master_move_shard_placement(shardid,
+            src.nodename, src.nodeport::int,
+            dst.nodename, dst.nodeport::int,
+            shard_transfer_mode:='block_writes'))
+    FROM pg_dist_shard s JOIN
+    pg_dist_shard_placement src USING (shardid),
+    (SELECT nodename, nodeport FROM pg_dist_shard_placement ORDER BY nodeport DESC LIMIT 1) dst
+    WHERE src.nodeport < dst.nodeport AND s.logicalrelid = rel::regclass;
+$$;
+CALL create_unbalanced_shards('rebalance_test_table');
+SET citus.shard_replication_factor TO 2;
+-- Upload the test data to the shards
+SELECT count(master_append_table_to_shard(shardid, 'shard_rebalancer_test_data',
+        host(inet_server_addr()), inet_server_port()))
+FROM pg_dist_shard
+WHERE logicalrelid = 'rebalance_test_table'::regclass;
+ count
+---------------------------------------------------------------------
+     6
+(1 row)
+
+-- Verify that there is one node with all placements
+SELECT * FROM table_placements_per_node;
+ nodeport |     logicalrelid     | count
+---------------------------------------------------------------------
+    57638 | rebalance_test_table |     6
+(1 row)
+
+-- Check excluded_shard_list by excluding four shards with smaller ids
+SELECT rebalance_table_shards('rebalance_test_table',
+    excluded_shard_list := excluded_shard_list,
+    threshold := 0,
+    shard_transfer_mode:='block_writes')
+FROM (
+    SELECT (array_agg(DISTINCT shardid ORDER BY shardid))[1:4] AS excluded_shard_list
+    FROM pg_dist_shard
+    WHERE logicalrelid = 'rebalance_test_table'::regclass
+) T;
+ rebalance_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM table_placements_per_node;
+ nodeport |     logicalrelid     | count
+---------------------------------------------------------------------
+    57637 | rebalance_test_table |     1
+    57638 | rebalance_test_table |     5
+(2 rows)
+
+-- Check that max_shard_moves limits number of move operations
+-- First check that we error if not table owner
+SET ROLE testrole;
+SELECT rebalance_table_shards('rebalance_test_table',
+    threshold := 0, max_shard_moves := 1,
+    shard_transfer_mode:='block_writes');
+WARNING:  localhost:xxxxx is not responsive
+ rebalance_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+RESET ROLE;
+SELECT rebalance_table_shards('rebalance_test_table',
+    threshold := 0, max_shard_moves := 1,
+    shard_transfer_mode:='block_writes');
+ rebalance_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM table_placements_per_node;
+ nodeport |     logicalrelid     | count
+---------------------------------------------------------------------
+    57637 | rebalance_test_table |     2
+    57638 | rebalance_test_table |     4
+(2 rows)
+
+-- Check that threshold=1 doesn't move any shards
+SELECT rebalance_table_shards('rebalance_test_table', threshold := 1, shard_transfer_mode:='block_writes');
+ rebalance_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM table_placements_per_node;
+ nodeport |     logicalrelid     | count
+---------------------------------------------------------------------
+    57637 | rebalance_test_table |     2
+    57638 | rebalance_test_table |     4
+(2 rows)
+
+-- Move the remaining shards using threshold=0
+SELECT rebalance_table_shards('rebalance_test_table', threshold := 0);
+ rebalance_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM table_placements_per_node;
+ nodeport |     logicalrelid     | count
+---------------------------------------------------------------------
+    57637 | rebalance_test_table |     3
+    57638 | rebalance_test_table |     3
+(2 rows)
+
+-- Check that shard is completely balanced and rebalancing again doesn't have
+-- any effects.
+SELECT rebalance_table_shards('rebalance_test_table', threshold := 0, shard_transfer_mode:='block_writes');
+ rebalance_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM table_placements_per_node;
+ nodeport |     logicalrelid     | count
+---------------------------------------------------------------------
+    57637 | rebalance_test_table |     3
+    57638 | rebalance_test_table |     3
+(2 rows)
+
+-- Check that querying the table doesn't error out
+SELECT count(*) FROM rebalance_test_table;
+ count
+---------------------------------------------------------------------
+     6
+(1 row)
+
+DROP TABLE rebalance_test_table;
+-- Test schema support
+CREATE SCHEMA test_schema_support;
+SELECT COUNT(*) FROM pg_dist_shard_placement;
+ count
+---------------------------------------------------------------------
+     0
+(1 row)
+
+CREATE TABLE test_schema_support.nation_hash (
+    n_nationkey integer not null,
+    n_name char(25) not null,
+    n_regionkey integer not null,
+    n_comment varchar(152)
+);
+SELECT master_create_distributed_table('test_schema_support.nation_hash', 'n_nationkey', 'hash');
+ master_create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT master_create_worker_shards('test_schema_support.nation_hash', 4, 1);
+ master_create_worker_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+CREATE TABLE test_schema_support.nation_hash2 (
+    n_nationkey integer not null,
+    n_name char(25) not null,
+    n_regionkey integer not null,
+    n_comment varchar(152)
+);
+SELECT master_create_distributed_table('test_schema_support.nation_hash2', 'n_nationkey', 'hash');
+ master_create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT master_create_worker_shards('test_schema_support.nation_hash2', 4, 1);
+ master_create_worker_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+-- Shard count before replication
+SELECT COUNT(*) FROM pg_dist_shard_placement;
+ count
+---------------------------------------------------------------------
+     8
+(1 row)
+
+SET search_path TO public;
+SELECT replicate_table_shards('test_schema_support.nation_hash', shard_transfer_mode:='block_writes');
+ replicate_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+-- Confirm replication
+SELECT COUNT(*) FROM pg_dist_shard_placement;
+ count
+---------------------------------------------------------------------
+    12
+(1 row)
+
+-- Test with search_path is set
+SET search_path TO test_schema_support;
+SELECT replicate_table_shards('nation_hash2', shard_transfer_mode:='block_writes');
+ replicate_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+-- Confirm replication
+SELECT COUNT(*) FROM pg_dist_shard_placement;
+ count
+---------------------------------------------------------------------
+    16
+(1 row)
+
+DROP TABLE test_schema_support.nation_hash;
+DROP TABLE test_schema_support.nation_hash2;
+-- Test rebalancer with schema
+-- Next few operations is to create imbalanced distributed table
+CREATE TABLE test_schema_support.imbalanced_table_local (
+    id integer not null
+);
+INSERT INTO test_schema_support.imbalanced_table_local VALUES(1);
+INSERT INTO test_schema_support.imbalanced_table_local VALUES(2);
+INSERT INTO test_schema_support.imbalanced_table_local VALUES(3);
+INSERT INTO test_schema_support.imbalanced_table_local VALUES(4);
+CREATE TABLE test_schema_support.imbalanced_table (
+    id integer not null
+);
+SELECT master_create_distributed_table('test_schema_support.imbalanced_table', 'id', 'append');
+ master_create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SET citus.shard_replication_factor TO 1;
+SELECT * from master_create_empty_shard('test_schema_support.imbalanced_table');
+ master_create_empty_shard
+---------------------------------------------------------------------
+                    123018
+(1 row)
+
+SELECT master_append_table_to_shard(123018, 'test_schema_support.imbalanced_table_local', 'localhost', :master_port);
+ master_append_table_to_shard
+---------------------------------------------------------------------
+                   0.00533333
+(1 row)
+
+SET citus.shard_replication_factor TO 2;
+SELECT * from master_create_empty_shard('test_schema_support.imbalanced_table');
+ master_create_empty_shard
+---------------------------------------------------------------------
+                    123019
+(1 row)
+
+SELECT master_append_table_to_shard(123019, 'test_schema_support.imbalanced_table_local', 'localhost', :master_port);
+ master_append_table_to_shard
+---------------------------------------------------------------------
+                   0.00533333
+(1 row)
+
+SET citus.shard_replication_factor TO 1;
+SELECT * from master_create_empty_shard('test_schema_support.imbalanced_table');
+ master_create_empty_shard
+---------------------------------------------------------------------
+                    123020
+(1 row)
+
+SELECT master_append_table_to_shard(123020, 'test_schema_support.imbalanced_table_local', 'localhost', :master_port);
+ master_append_table_to_shard
+---------------------------------------------------------------------
+                   0.00533333
+(1 row)
+
+-- imbalanced_table is now imbalanced
+-- Shard counts in each node before rebalance
+SELECT * FROM public.table_placements_per_node;
+ nodeport |   logicalrelid   | count
+---------------------------------------------------------------------
+    57637 | imbalanced_table |     1
+    57638 | imbalanced_table |     3
+(2 rows)
+
+-- Row count in imbalanced table before rebalance
+SELECT COUNT(*) FROM imbalanced_table;
+ count
+---------------------------------------------------------------------
+    12
+(1 row)
+
+-- Try force_logical
+SELECT rebalance_table_shards('imbalanced_table', threshold:=0, shard_transfer_mode:='force_logical');
+ERROR:  the force_logical transfer mode is currently unsupported
+-- Test rebalance operation
+SELECT rebalance_table_shards('imbalanced_table', threshold:=0, shard_transfer_mode:='block_writes');
+ rebalance_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+-- Confirm rebalance
+-- Shard counts in each node after rebalance
+SELECT * FROM public.table_placements_per_node;
+ nodeport |   logicalrelid   | count
+---------------------------------------------------------------------
+    57637 | imbalanced_table |     2
+    57638 | imbalanced_table |     2
+(2 rows)
+
+-- Row count in imbalanced table after rebalance
+SELECT COUNT(*) FROM imbalanced_table;
+ count
+---------------------------------------------------------------------
+    12
+(1 row)
+
+DROP TABLE public.shard_rebalancer_test_data;
+DROP TABLE test_schema_support.imbalanced_table;
+DROP TABLE test_schema_support.imbalanced_table_local;
+SET citus.shard_replication_factor TO 1;
+CREATE TABLE colocated_rebalance_test(id integer);
+CREATE TABLE colocated_rebalance_test2(id integer);
+SELECT create_distributed_table('colocated_rebalance_test', 'id');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+-- Move all shards to worker1
+SELECT master_move_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'block_writes')
+FROM pg_dist_shard_placement
+WHERE nodeport = :worker_2_port;
+ master_move_shard_placement
+---------------------------------------------------------------------
+
+
+(2 rows)
+
+SELECT create_distributed_table('colocated_rebalance_test2', 'id');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+-- Confirm all shards for both tables are on worker1
+SELECT * FROM public.table_placements_per_node;
+ nodeport |       logicalrelid        | count
+---------------------------------------------------------------------
+    57637 | colocated_rebalance_test  |     4
+    57637 | colocated_rebalance_test2 |     4
+(2 rows)
+
+-- Confirm that the plan for drain_only doesn't show any moves
+SELECT * FROM get_rebalance_table_shards_plan('colocated_rebalance_test', threshold := 0, drain_only := true);
+ table_name | shardid | shard_size | sourcename | sourceport | targetname | targetport
+---------------------------------------------------------------------
+(0 rows)
+
+-- Running with drain_only shouldn't do anything
+SELECT * FROM rebalance_table_shards('colocated_rebalance_test', threshold := 0, shard_transfer_mode := 'block_writes', drain_only := true);
+ rebalance_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+-- Confirm that nothing changed
+SELECT * FROM public.table_placements_per_node;
+ nodeport |       logicalrelid        | count
+---------------------------------------------------------------------
+    57637 | colocated_rebalance_test  |     4
+    57637 | colocated_rebalance_test2 |     4
+(2 rows)
+
+-- Confirm that the plan shows 2 shards of both tables moving back to worker2
+SELECT * FROM get_rebalance_table_shards_plan('colocated_rebalance_test', threshold := 0);
+        table_name         | shardid | shard_size | sourcename | sourceport | targetname | targetport
+---------------------------------------------------------------------
+ colocated_rebalance_test  |  123021 |          0 | localhost  |      57637 | localhost  |      57638
+ colocated_rebalance_test2 |  123025 |          0 | localhost  |      57637 | localhost  |      57638
+ colocated_rebalance_test  |  123022 |          0 | localhost  |      57637 | localhost  |      57638
+ colocated_rebalance_test2 |  123026 |          0 | localhost  |      57637 | localhost  |      57638
+(4 rows)
+
+-- Confirm that this also happens when using rebalancing by disk size even if the tables are empty
+SELECT * FROM get_rebalance_table_shards_plan('colocated_rebalance_test', rebalance_strategy := 'by_disk_size');
+        table_name         | shardid | shard_size | sourcename | sourceport | targetname | targetport
+---------------------------------------------------------------------
+ colocated_rebalance_test  |  123021 |          0 | localhost  |      57637 | localhost  |      57638
+ colocated_rebalance_test2 |  123025 |          0 | localhost  |      57637 | localhost  |      57638
+ colocated_rebalance_test  |  123022 |          0 | localhost  |      57637 | localhost  |      57638
+ colocated_rebalance_test2 |  123026 |          0 | localhost  |      57637 | localhost  |      57638
+(4 rows)
+
+-- Check that we can call this function
+SELECT * FROM get_rebalance_progress();
+ sessionid | table_name | shardid | shard_size | sourcename | sourceport | targetname | targetport | progress
+---------------------------------------------------------------------
+(0 rows)
+
+-- Actually do the rebalance
+SELECT * FROM rebalance_table_shards('colocated_rebalance_test', threshold := 0, shard_transfer_mode := 'block_writes');
+ rebalance_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+-- Check that we can call this function without a crash
+SELECT * FROM get_rebalance_progress();
+ sessionid | table_name | shardid | shard_size | sourcename | sourceport | targetname | targetport | progress
+---------------------------------------------------------------------
+(0 rows)
+
+-- Confirm that the nodes are now there
+SELECT * FROM public.table_placements_per_node;
+ nodeport |       logicalrelid        | count
+---------------------------------------------------------------------
+    57637 | colocated_rebalance_test  |     2
+    57638 | colocated_rebalance_test  |     2
+    57637 | colocated_rebalance_test2 |     2
+    57638 | colocated_rebalance_test2 |     2
+(4 rows)
+
+CREATE TABLE non_colocated_rebalance_test(id integer);
+SELECT create_distributed_table('non_colocated_rebalance_test', 'id', colocate_with := 'none');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+-- confirm that both colocation groups are balanced
+SELECT * FROM public.table_placements_per_node;
+ nodeport |         logicalrelid         | count
+---------------------------------------------------------------------
+    57637 | colocated_rebalance_test     |     2
+    57638 | colocated_rebalance_test     |     2
+    57637 | colocated_rebalance_test2    |     2
+    57638 | colocated_rebalance_test2    |     2
+    57637 | non_colocated_rebalance_test |     2
+    57638 | non_colocated_rebalance_test |     2
+(6 rows)
+
+-- testing behaviour when setting isdatanode to 'marked for draining'
+SELECT * from master_set_node_property('localhost', :worker_2_port, 'shouldhaveshards', false);
+ master_set_node_property
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM get_rebalance_table_shards_plan('colocated_rebalance_test', threshold := 0);
+        table_name         | shardid | shard_size | sourcename | sourceport | targetname | targetport
+---------------------------------------------------------------------
+ colocated_rebalance_test  |  123021 |          0 | localhost  |      57638 | localhost  |      57637
+ colocated_rebalance_test2 |  123025 |          0 | localhost  |      57638 | localhost  |      57637
+ colocated_rebalance_test  |  123022 |          0 | localhost  |      57638 | localhost  |      57637
+ colocated_rebalance_test2 |  123026 |          0 | localhost  |      57638 | localhost  |      57637
+(4 rows)
+
+SELECT * FROM rebalance_table_shards('colocated_rebalance_test', threshold := 0, shard_transfer_mode := 'block_writes');
+ rebalance_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM public.table_placements_per_node;
+ nodeport |         logicalrelid         | count
+---------------------------------------------------------------------
+    57637 | colocated_rebalance_test     |     4
+    57637 | colocated_rebalance_test2    |     4
+    57637 | non_colocated_rebalance_test |     2
+    57638 | non_colocated_rebalance_test |     2
+(4 rows)
+
+SELECT * FROM get_rebalance_table_shards_plan('non_colocated_rebalance_test', threshold := 0);
+          table_name          | shardid | shard_size | sourcename | sourceport | targetname | targetport
+---------------------------------------------------------------------
+ non_colocated_rebalance_test |  123030 |          0 | localhost  |      57638 | localhost  |      57637
+ non_colocated_rebalance_test |  123032 |          0 | localhost  |      57638 | localhost  |      57637
+(2 rows)
+
+SELECT * FROM rebalance_table_shards('non_colocated_rebalance_test', threshold := 0, shard_transfer_mode := 'block_writes');
+ rebalance_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM public.table_placements_per_node;
+ nodeport |         logicalrelid         | count
+---------------------------------------------------------------------
+    57637 | colocated_rebalance_test     |     4
+    57637 | colocated_rebalance_test2    |     4
+    57637 | non_colocated_rebalance_test |     4
+(3 rows)
+
+-- Put shards back
+SELECT * from master_set_node_property('localhost', :worker_2_port, 'shouldhaveshards', true);
+ master_set_node_property
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM rebalance_table_shards('colocated_rebalance_test', threshold := 0, shard_transfer_mode := 'block_writes');
+ rebalance_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM public.table_placements_per_node;
+ nodeport |         logicalrelid         | count
+---------------------------------------------------------------------
+    57637 | colocated_rebalance_test     |     2
+    57638 | colocated_rebalance_test     |     2
+    57637 | colocated_rebalance_test2    |     2
+    57638 | colocated_rebalance_test2    |     2
+    57637 | non_colocated_rebalance_test |     4
+(5 rows)
+
+SELECT * FROM rebalance_table_shards('non_colocated_rebalance_test', threshold := 0, shard_transfer_mode := 'block_writes');
+ rebalance_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM public.table_placements_per_node;
+ nodeport |         logicalrelid         | count
+---------------------------------------------------------------------
+    57637 | colocated_rebalance_test     |     2
+    57638 | colocated_rebalance_test     |     2
+    57637 | colocated_rebalance_test2    |     2
+    57638 | colocated_rebalance_test2    |     2
+    57637 | non_colocated_rebalance_test |     2
+    57638 | non_colocated_rebalance_test |     2
+(6 rows)
+
+-- testing behaviour when setting shouldhaveshards to false and rebalancing all
+-- colocation groups with drain_only=true
+SELECT * from master_set_node_property('localhost', :worker_2_port, 'shouldhaveshards', false);
+ master_set_node_property
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM get_rebalance_table_shards_plan(threshold := 0, drain_only := true);
+          table_name          | shardid | shard_size | sourcename | sourceport | targetname | targetport
+---------------------------------------------------------------------
+ colocated_rebalance_test     |  123021 |          0 | localhost  |      57638 | localhost  |      57637
+ colocated_rebalance_test2    |  123025 |          0 | localhost  |      57638 | localhost  |      57637
+ colocated_rebalance_test     |  123022 |          0 | localhost  |      57638 | localhost  |      57637
+ colocated_rebalance_test2    |  123026 |          0 | localhost  |      57638 | localhost  |      57637
+ non_colocated_rebalance_test |  123029 |          0 | localhost  |      57638 | localhost  |      57637
+ non_colocated_rebalance_test |  123030 |          0 | localhost  |      57638 | localhost  |      57637
+(6 rows)
+
+SELECT * FROM rebalance_table_shards(threshold := 0, shard_transfer_mode := 'block_writes', drain_only := true);
+ rebalance_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM public.table_placements_per_node;
+ nodeport |         logicalrelid         | count
+---------------------------------------------------------------------
+    57637 | colocated_rebalance_test     |     4
+    57637 | colocated_rebalance_test2    |     4
+    57637 | non_colocated_rebalance_test |     4
+(3 rows)
+
+-- Put shards back
+SELECT * from master_set_node_property('localhost', :worker_2_port, 'shouldhaveshards', true);
+ master_set_node_property
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM rebalance_table_shards(threshold := 0, shard_transfer_mode := 'block_writes');
+ rebalance_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM public.table_placements_per_node;
+ nodeport |         logicalrelid         | count
+---------------------------------------------------------------------
+    57637 | colocated_rebalance_test     |     2
+    57638 | colocated_rebalance_test     |     2
+    57637 | colocated_rebalance_test2    |     2
+    57638 | colocated_rebalance_test2    |     2
+    57637 | non_colocated_rebalance_test |     2
+    57638 | non_colocated_rebalance_test |     2
+(6 rows)
+
+-- testing behaviour when setting shouldhaveshards to false and rebalancing all
+-- colocation groups with drain_only=false
+SELECT * from master_set_node_property('localhost', :worker_2_port, 'shouldhaveshards', false);
+ master_set_node_property
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM get_rebalance_table_shards_plan(threshold := 0);
+          table_name          | shardid | shard_size | sourcename | sourceport | targetname | targetport
+---------------------------------------------------------------------
+ colocated_rebalance_test     |  123021 |          0 | localhost  |      57638 | localhost  |      57637
+ colocated_rebalance_test2    |  123025 |          0 | localhost  |      57638 | localhost  |      57637
+ colocated_rebalance_test     |  123022 |          0 | localhost  |      57638 | localhost  |      57637
+ colocated_rebalance_test2    |  123026 |          0 | localhost  |      57638 | localhost  |      57637
+ non_colocated_rebalance_test |  123029 |          0 | localhost  |      57638 | localhost  |      57637
+ non_colocated_rebalance_test |  123030 |          0 | localhost  |      57638 | localhost  |      57637
+(6 rows)
+
+SELECT * FROM rebalance_table_shards(threshold := 0, shard_transfer_mode := 'block_writes');
+ rebalance_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM public.table_placements_per_node;
+ nodeport |         logicalrelid         | count
+---------------------------------------------------------------------
+    57637 | colocated_rebalance_test     |     4
+    57637 | colocated_rebalance_test2    |     4
+    57637 | non_colocated_rebalance_test |     4
+(3 rows)
+
+-- Put shards back
+SELECT * from master_set_node_property('localhost', :worker_2_port, 'shouldhaveshards', true);
+ master_set_node_property
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM rebalance_table_shards(threshold := 0, shard_transfer_mode := 'block_writes');
+ rebalance_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM public.table_placements_per_node;
+ nodeport |         logicalrelid         | count
+---------------------------------------------------------------------
+    57637 | colocated_rebalance_test     |     2
+    57638 | colocated_rebalance_test     |     2
+    57637 | colocated_rebalance_test2    |     2
+    57638 | colocated_rebalance_test2    |     2
+    57637 | non_colocated_rebalance_test |     2
+    57638 | non_colocated_rebalance_test |     2
+(6 rows)
+
+-- Make it a data node again
+SELECT * from master_set_node_property('localhost', :worker_2_port, 'shouldhaveshards', true);
+ master_set_node_property
+---------------------------------------------------------------------
+
+(1 row)
+
+-- testing behaviour of master_drain_node
+SELECT * from master_drain_node('localhost', :worker_2_port, shard_transfer_mode := 'block_writes');
+ master_drain_node
+---------------------------------------------------------------------
+
+(1 row)
+
+select shouldhaveshards from pg_dist_node where nodeport = :worker_2_port;
+ shouldhaveshards
+---------------------------------------------------------------------
+ f
+(1 row)
+
+SELECT * FROM public.table_placements_per_node;
+ nodeport |         logicalrelid         | count
+---------------------------------------------------------------------
+    57637 | colocated_rebalance_test     |     4
+    57637 | colocated_rebalance_test2    |     4
+    57637 | non_colocated_rebalance_test |     4
+(3 rows)
+
+-- Put shards back
+SELECT * from master_set_node_property('localhost', :worker_2_port, 'shouldhaveshards', true);
+ master_set_node_property
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM rebalance_table_shards(threshold := 0, shard_transfer_mode := 'block_writes');
+ rebalance_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM public.table_placements_per_node;
+ nodeport |         logicalrelid         | count
+---------------------------------------------------------------------
+    57637 | colocated_rebalance_test     |     2
+    57638 | colocated_rebalance_test     |     2
+    57637 | colocated_rebalance_test2    |     2
+    57638 | colocated_rebalance_test2    |     2
+    57637 | non_colocated_rebalance_test |     2
+    57638 | non_colocated_rebalance_test |     2
+(6 rows)
+
+-- Drop some tables for clear consistent error
+DROP TABLE test_schema_support.colocated_rebalance_test2;
+-- Leave no trace on workers
+RESET search_path;
+\set VERBOSITY terse
+DROP SCHEMA test_schema_support CASCADE;
+\set VERBOSITY default
+REVOKE ALL ON SCHEMA public FROM testrole;
+ERROR:  role "testrole" does not exist
+CONTEXT:  while executing command on localhost:xxxxx
+DROP USER testrole;
+-- Test costs
+set citus.shard_count = 4;
+CREATE TABLE tab (x int);
+SELECT create_distributed_table('tab','x');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+-- The following numbers are chosen such that they are placed on different
+-- shards.
+INSERT INTO tab SELECT 1 from generate_series(1, 30000);
+INSERT INTO tab SELECT 2 from generate_series(1, 10000);
+INSERT INTO tab SELECT 3 from generate_series(1, 10000);
+INSERT INTO tab SELECT 6 from generate_series(1, 10000);
+ANALYZE tab;
+\c - - - :worker_1_port
+SELECT table_schema, table_name, row_estimate, total_bytes
+  FROM (
+  SELECT *, total_bytes-index_bytes-COALESCE(toast_bytes,0) AS table_bytes FROM (
+      SELECT c.oid,nspname AS table_schema, relname AS TABLE_NAME
+              , c.reltuples AS row_estimate
+              , pg_total_relation_size(c.oid) AS total_bytes
+              , pg_indexes_size(c.oid) AS index_bytes
+              , pg_total_relation_size(reltoastrelid) AS toast_bytes
+          FROM pg_class c
+          LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
+          WHERE relkind = 'r'
+  ) a
+WHERE table_schema = 'public'
+) a ORDER BY table_name;
+ table_schema | table_name | row_estimate | total_bytes
+---------------------------------------------------------------------
+ public       | tab_123033 |        30000 |     1114112
+ public       | tab_123035 |        10000 |      393216
+(2 rows)
+
+\c - - - :worker_2_port
+SELECT table_schema, table_name, row_estimate, total_bytes
+  FROM (
+  SELECT *, total_bytes-index_bytes-COALESCE(toast_bytes,0) AS table_bytes FROM (
+      SELECT c.oid,nspname AS table_schema, relname AS TABLE_NAME
+              , c.reltuples AS row_estimate
+              , pg_total_relation_size(c.oid) AS total_bytes
+              , pg_indexes_size(c.oid) AS index_bytes
+              , pg_total_relation_size(reltoastrelid) AS toast_bytes
+          FROM pg_class c
+          LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
+          WHERE relkind = 'r'
+  ) a
+WHERE table_schema = 'public'
+) a ORDER BY table_name;
+ table_schema | table_name | row_estimate | total_bytes
+---------------------------------------------------------------------
+ public       | tab_123034 |        10000 |      393216
+ public       | tab_123036 |        10000 |      393216
+(2 rows)
+
+\c - - - :master_port
+SELECT * FROM get_rebalance_table_shards_plan('tab');
+ table_name | shardid | shard_size | sourcename | sourceport | targetname | targetport
+---------------------------------------------------------------------
+(0 rows)
+
+SELECT * FROM get_rebalance_table_shards_plan('tab', rebalance_strategy := 'by_disk_size');
+ table_name | shardid | shard_size | sourcename | sourceport | targetname | targetport
+---------------------------------------------------------------------
+ tab        |  123035 |          0 | localhost  |      57637 | localhost  |      57638
+(1 row)
+
+SELECT * FROM get_rebalance_table_shards_plan('tab', rebalance_strategy := 'by_disk_size', threshold := 0);
+WARNING:  the given threshold is lower than the minimum threshold allowed by the rebalance strategy, using the minimum allowed threshold instead
+DETAIL:  Using threshold of 0.01
+ table_name | shardid | shard_size | sourcename | sourceport | targetname | targetport
+---------------------------------------------------------------------
+ tab        |  123035 |          0 | localhost  |      57637 | localhost  |      57638
+(1 row)
+
+SELECT * FROM rebalance_table_shards('tab', shard_transfer_mode:='block_writes');
+ rebalance_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM public.table_placements_per_node;
+ nodeport | logicalrelid | count
+---------------------------------------------------------------------
+    57637 | tab          |     2
+    57638 | tab          |     2
+(2 rows)
+
+SELECT * FROM rebalance_table_shards('tab', rebalance_strategy := 'by_disk_size', shard_transfer_mode:='block_writes');
+NOTICE:  Moving shard xxxxx from localhost:xxxxx to localhost:xxxxx ...
+ rebalance_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM public.table_placements_per_node;
+ nodeport | logicalrelid | count
+---------------------------------------------------------------------
+    57637 | tab          |     1
+    57638 | tab          |     3
+(2 rows)
+
+SELECT * FROM rebalance_table_shards('tab', rebalance_strategy := 'by_disk_size', shard_transfer_mode:='block_writes', threshold := 0);
+WARNING:  the given threshold is lower than the minimum threshold allowed by the rebalance strategy, using the minimum allowed threshold instead
+DETAIL:  Using threshold of 0.01
+ rebalance_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM public.table_placements_per_node;
+ nodeport | logicalrelid | count
+---------------------------------------------------------------------
+    57637 | tab          |     1
+    57638 | tab          |     3
+(2 rows)
+
+-- Check that sizes of colocated tables are added together for rebalances
+set citus.shard_count = 4;
+SET citus.next_shard_id TO 123050;
+CREATE TABLE tab2 (x int);
+SELECT create_distributed_table('tab2','x', colocate_with := 'tab');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+INSERT INTO tab2 SELECT 1 from generate_series(1, 0);
+INSERT INTO tab2 SELECT 2 from generate_series(1, 60000);
+INSERT INTO tab2 SELECT 3 from generate_series(1, 10000);
+INSERT INTO tab2 SELECT 6 from generate_series(1, 10000);
+ANALYZE tab, tab2;
+\c - - - :worker_1_port
+SELECT table_schema, table_name, row_estimate, total_bytes
+  FROM (
+  SELECT *, total_bytes-index_bytes-COALESCE(toast_bytes,0) AS table_bytes FROM (
+      SELECT c.oid,nspname AS table_schema, relname AS TABLE_NAME
+              , c.reltuples AS row_estimate
+              , pg_total_relation_size(c.oid) AS total_bytes
+              , pg_indexes_size(c.oid) AS index_bytes
+              , pg_total_relation_size(reltoastrelid) AS toast_bytes
+          FROM pg_class c
+          LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
+          WHERE relkind = 'r'
+  ) a
+WHERE table_schema = 'public'
+) a ORDER BY table_name;
+ table_schema | table_name  | row_estimate | total_bytes
+---------------------------------------------------------------------
+ public       | tab2_123050 |            0 |           0
+ public       | tab_123033  |        30000 |     1114112
+(2 rows)
+
+\c - - - :worker_2_port
+SELECT table_schema, table_name, row_estimate, total_bytes
+  FROM (
+  SELECT *, total_bytes-index_bytes-COALESCE(toast_bytes,0) AS table_bytes FROM (
+      SELECT c.oid,nspname AS table_schema, relname AS TABLE_NAME
+              , c.reltuples AS row_estimate
+              , pg_total_relation_size(c.oid) AS total_bytes
+              , pg_indexes_size(c.oid) AS index_bytes
+              , pg_total_relation_size(reltoastrelid) AS toast_bytes
+          FROM pg_class c
+          LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
+          WHERE relkind = 'r'
+  ) a
+WHERE table_schema = 'public'
+) a ORDER BY table_name;
+ table_schema | table_name  | row_estimate | total_bytes
+---------------------------------------------------------------------
+ public       | tab2_123051 |        10000 |      393216
+ public       | tab2_123052 |        10000 |      393216
+ public       | tab2_123053 |        60000 |     2203648
+ public       | tab_123034  |        10000 |      393216
+ public       | tab_123035  |        10000 |      368640
+ public       | tab_123036  |        10000 |      393216
+(6 rows)
+
+\c - - - :master_port
+SELECT * FROM get_rebalance_table_shards_plan('tab', rebalance_strategy := 'by_disk_size');
+ table_name | shardid | shard_size | sourcename | sourceport | targetname | targetport
+---------------------------------------------------------------------
+ tab        |  123036 |          0 | localhost  |      57638 | localhost  |      57637
+ tab2       |  123053 |          0 | localhost  |      57638 | localhost  |      57637
+ tab        |  123033 |          0 | localhost  |      57637 | localhost  |      57638
+ tab2       |  123050 |          0 | localhost  |      57637 | localhost  |      57638
+(4 rows)
+
+SELECT * FROM rebalance_table_shards('tab', rebalance_strategy := 'by_disk_size', shard_transfer_mode:='block_writes');
+NOTICE:  Moving shard xxxxx from localhost:xxxxx to localhost:xxxxx ...
+NOTICE:  Moving shard xxxxx from localhost:xxxxx to localhost:xxxxx ...
+ rebalance_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM public.table_placements_per_node;
+ nodeport | logicalrelid | count
+---------------------------------------------------------------------
+    57637 | tab          |     1
+    57638 | tab          |     3
+    57637 | tab2         |     1
+    57638 | tab2         |     3
+(4 rows)
+
+ANALYZE tab, tab2;
+\c - - - :worker_1_port
+SELECT table_schema, table_name, row_estimate, total_bytes
+  FROM (
+  SELECT *, total_bytes-index_bytes-COALESCE(toast_bytes,0) AS table_bytes FROM (
+      SELECT c.oid,nspname AS table_schema, relname AS TABLE_NAME
+              , c.reltuples AS row_estimate
+              , pg_total_relation_size(c.oid) AS total_bytes
+              , pg_indexes_size(c.oid) AS index_bytes
+              , pg_total_relation_size(reltoastrelid) AS toast_bytes
+          FROM pg_class c
+          LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
+          WHERE relkind = 'r'
+  ) a
+WHERE table_schema = 'public'
+) a ORDER BY table_name;
+ table_schema | table_name  | row_estimate | total_bytes
+---------------------------------------------------------------------
+ public       | tab2_123053 |        60000 |     2179072
+ public       | tab_123036  |        10000 |      368640
+(2 rows)
+
+\c - - - :worker_2_port
+SELECT table_schema, table_name, row_estimate, total_bytes
+  FROM (
+  SELECT *, total_bytes-index_bytes-COALESCE(toast_bytes,0) AS table_bytes FROM (
+      SELECT c.oid,nspname AS table_schema, relname AS TABLE_NAME
+              , c.reltuples AS row_estimate
+              , pg_total_relation_size(c.oid) AS total_bytes
+              , pg_indexes_size(c.oid) AS index_bytes
+              , pg_total_relation_size(reltoastrelid) AS toast_bytes
+          FROM pg_class c
+          LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
+          WHERE relkind = 'r'
+  ) a
+WHERE table_schema = 'public'
+) a ORDER BY table_name;
+ table_schema | table_name  | row_estimate | total_bytes
+---------------------------------------------------------------------
+ public       | tab2_123050 |            0 |           0
+ public       | tab2_123051 |        10000 |      393216
+ public       | tab2_123052 |        10000 |      393216
+ public       | tab_123033  |        30000 |     1089536
+ public       | tab_123034  |        10000 |      393216
+ public       | tab_123035  |        10000 |      368640
+(6 rows)
+
+\c - - - :master_port
+DROP TABLE tab2;
+CREATE OR REPLACE FUNCTION capacity_high_worker_1(nodeidarg int)
+    RETURNS real AS $$
+    SELECT
+        (CASE WHEN nodeport = 57637 THEN 1000 ELSE 1 END)::real
+    FROM pg_dist_node where nodeid = nodeidarg
+    $$ LANGUAGE sql;
+SELECT citus_add_rebalance_strategy(
+        'capacity_high_worker_1',
+        'citus_shard_cost_1',
+        'capacity_high_worker_1',
+        'citus_shard_allowed_on_node_true',
+        0
+    );
+ citus_add_rebalance_strategy
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM get_rebalance_table_shards_plan('tab', rebalance_strategy := 'capacity_high_worker_1');
+ table_name | shardid | shard_size | sourcename | sourceport | targetname | targetport
+---------------------------------------------------------------------
+ tab        |  123033 |          0 | localhost  |      57638 | localhost  |      57637
+ tab        |  123034 |          0 | localhost  |      57638 | localhost  |      57637
+ tab        |  123035 |          0 | localhost  |      57638 | localhost  |      57637
+(3 rows)
+
+SELECT * FROM rebalance_table_shards('tab', rebalance_strategy := 'capacity_high_worker_1', shard_transfer_mode:='block_writes');
+NOTICE:  Moving shard xxxxx from localhost:xxxxx to localhost:xxxxx ...
+NOTICE:  Moving shard xxxxx from localhost:xxxxx to localhost:xxxxx ...
+NOTICE:  Moving shard xxxxx from localhost:xxxxx to localhost:xxxxx ...
+ rebalance_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM public.table_placements_per_node;
+ nodeport | logicalrelid | count
+---------------------------------------------------------------------
+    57637 | tab          |     4
+(1 row)
+
+SELECT citus_set_default_rebalance_strategy('capacity_high_worker_1');
+ citus_set_default_rebalance_strategy
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM get_rebalance_table_shards_plan('tab');
+ table_name | shardid | shard_size | sourcename | sourceport | targetname | targetport
+---------------------------------------------------------------------
+(0 rows)
+
+SELECT * FROM rebalance_table_shards('tab', shard_transfer_mode:='block_writes');
+ rebalance_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM public.table_placements_per_node;
+ nodeport | logicalrelid | count
+---------------------------------------------------------------------
+    57637 | tab          |     4
+(1 row)
+
+CREATE FUNCTION only_worker_2(shardid bigint, nodeidarg int)
+    RETURNS boolean AS $$
+    SELECT
+        (CASE WHEN nodeport = 57638 THEN TRUE ELSE FALSE END)
+    FROM pg_dist_node where nodeid = nodeidarg
+    $$ LANGUAGE sql;
+SELECT citus_add_rebalance_strategy(
+        'only_worker_2',
+        'citus_shard_cost_1',
+        'citus_node_capacity_1',
+        'only_worker_2',
+        0
+    );
+ citus_add_rebalance_strategy
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT citus_set_default_rebalance_strategy('only_worker_2');
+ citus_set_default_rebalance_strategy
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM get_rebalance_table_shards_plan('tab');
+ table_name | shardid | shard_size | sourcename | sourceport | targetname | targetport
+---------------------------------------------------------------------
+ tab        |  123033 |          0 | localhost  |      57637 | localhost  |      57638
+ tab        |  123034 |          0 | localhost  |      57637 | localhost  |      57638
+ tab        |  123035 |          0 | localhost  |      57637 | localhost  |      57638
+ tab        |  123036 |          0 | localhost  |      57637 | localhost  |      57638
+(4 rows)
+
+SELECT * FROM rebalance_table_shards('tab', shard_transfer_mode:='block_writes');
+NOTICE:  Moving shard xxxxx from localhost:xxxxx to localhost:xxxxx ...
+NOTICE:  Moving shard xxxxx from localhost:xxxxx to localhost:xxxxx ...
+NOTICE:  Moving shard xxxxx from localhost:xxxxx to localhost:xxxxx ...
+NOTICE:  Moving shard xxxxx from localhost:xxxxx to localhost:xxxxx ...
+ rebalance_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM public.table_placements_per_node;
+ nodeport | logicalrelid | count
+---------------------------------------------------------------------
+    57638 | tab          |     4
+(1 row)
+
+SELECT citus_set_default_rebalance_strategy('by_shard_count');
+ citus_set_default_rebalance_strategy
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT * FROM get_rebalance_table_shards_plan('tab');
+ table_name | shardid | shard_size | sourcename | sourceport | targetname | targetport
+---------------------------------------------------------------------
+ tab        |  123033 |          0 | localhost  |      57638 | localhost  |      57637
+ tab        |  123034 |          0 | localhost  |      57638 | localhost  |      57637
+(2 rows)
+
+-- Check all the error handling cases
+SELECT * FROM get_rebalance_table_shards_plan('tab', rebalance_strategy := 'non_existing');
+ERROR:  could not find rebalance strategy with name non_existing
+SELECT * FROM rebalance_table_shards('tab', rebalance_strategy := 'non_existing');
+ERROR:  could not find rebalance strategy with name non_existing
+SELECT * FROM master_drain_node('localhost', :worker_2_port, rebalance_strategy := 'non_existing');
+ERROR:  could not find rebalance strategy with name non_existing
+SELECT citus_set_default_rebalance_strategy('non_existing');
+ERROR:  strategy with specified name does not exist
+CONTEXT:  PL/pgSQL function citus_set_default_rebalance_strategy(text) line 5 at RAISE
+UPDATE pg_dist_rebalance_strategy SET default_strategy=false;
+SELECT * FROM get_rebalance_table_shards_plan('tab');
+ERROR:  no rebalance_strategy was provided, but there is also no default strategy set
+SELECT * FROM rebalance_table_shards('tab');
+ERROR:  no rebalance_strategy was provided, but there is also no default strategy set
+SELECT * FROM master_drain_node('localhost', :worker_2_port);
+ERROR:  no rebalance_strategy was provided, but there is also no default strategy set
+UPDATE pg_dist_rebalance_strategy SET default_strategy=true WHERE name='by_shard_count';
+CREATE OR REPLACE FUNCTION shard_cost_no_arguments()
+    RETURNS real AS $$ SELECT 1.0::real $$ LANGUAGE sql;
+CREATE OR REPLACE FUNCTION shard_cost_bad_arg_type(text)
+    RETURNS real AS $$ SELECT 1.0::real $$ LANGUAGE sql;
+CREATE OR REPLACE FUNCTION shard_cost_bad_return_type(bigint)
+    RETURNS int AS $$ SELECT 1 $$ LANGUAGE sql;
+CREATE OR REPLACE FUNCTION node_capacity_no_arguments()
+    RETURNS real AS $$ SELECT 1.0::real $$ LANGUAGE sql;
+CREATE OR REPLACE FUNCTION node_capacity_bad_arg_type(text)
+    RETURNS real AS $$ SELECT 1.0::real $$ LANGUAGE sql;
+CREATE OR REPLACE FUNCTION node_capacity_bad_return_type(int)
+    RETURNS int AS $$ SELECT 1 $$ LANGUAGE sql;
+CREATE OR REPLACE FUNCTION shard_allowed_on_node_no_arguments()
+    RETURNS boolean AS $$ SELECT true $$ LANGUAGE sql;
+CREATE OR REPLACE FUNCTION shard_allowed_on_node_bad_arg1(text, int)
+    RETURNS boolean AS $$ SELECT true $$ LANGUAGE sql;
+CREATE OR REPLACE FUNCTION shard_allowed_on_node_bad_arg2(bigint, text)
+    RETURNS boolean AS $$ SELECT true $$ LANGUAGE sql;
+CREATE OR REPLACE FUNCTION shard_allowed_on_node_bad_return_type(bigint, int)
+    RETURNS int AS $$ SELECT 1 $$ LANGUAGE sql;
+SELECT citus_add_rebalance_strategy(
+        'insert_should_fail',
+        'shard_cost_no_arguments',
+        'citus_node_capacity_1',
+        'citus_shard_allowed_on_node_true',
+        0
+    );
+ERROR:  signature for shard_cost_function is incorrect
+DETAIL:  number of arguments of shard_cost_no_arguments should be 1, not 0
+CONTEXT:  SQL statement "SELECT citus_validate_rebalance_strategy_functions(
+        NEW.shard_cost_function,
+        NEW.node_capacity_function,
+        NEW.shard_allowed_on_node_function)"
+PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 5 at PERFORM
+SQL function "citus_add_rebalance_strategy" statement 1
+SELECT citus_add_rebalance_strategy(
+        'insert_should_fail',
+        'shard_cost_bad_arg_type',
+        'citus_node_capacity_1',
+        'citus_shard_allowed_on_node_true',
+        0
+    );
+ERROR:  signature for shard_cost_function is incorrect
+DETAIL:  argument type of shard_cost_bad_arg_type should be bigint
+CONTEXT:  SQL statement "SELECT citus_validate_rebalance_strategy_functions(
+        NEW.shard_cost_function,
+        NEW.node_capacity_function,
+        NEW.shard_allowed_on_node_function)"
+PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 5 at PERFORM
+SQL function "citus_add_rebalance_strategy" statement 1
+SELECT citus_add_rebalance_strategy(
+        'insert_should_fail',
+        'shard_cost_bad_return_type',
+        'citus_node_capacity_1',
+        'citus_shard_allowed_on_node_true',
+        0
+    );
+ERROR:  signature for shard_cost_function is incorrect
+DETAIL:  return type of shard_cost_bad_return_type should be real
+CONTEXT:  SQL statement "SELECT citus_validate_rebalance_strategy_functions(
+        NEW.shard_cost_function,
+        NEW.node_capacity_function,
+        NEW.shard_allowed_on_node_function)"
+PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 5 at PERFORM
+SQL function "citus_add_rebalance_strategy" statement 1
+SELECT citus_add_rebalance_strategy(
+        'insert_should_fail',
+        0,
+        'citus_node_capacity_1',
+        'citus_shard_allowed_on_node_true',
+        0
+    );
+ERROR:  cache lookup failed for shard_cost_function with oid 0
+CONTEXT:  SQL statement "SELECT citus_validate_rebalance_strategy_functions(
+        NEW.shard_cost_function,
+        NEW.node_capacity_function,
+        NEW.shard_allowed_on_node_function)"
+PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 5 at PERFORM
+SQL function "citus_add_rebalance_strategy" statement 1
+SELECT citus_add_rebalance_strategy(
+        'insert_should_fail',
+        'citus_shard_cost_1',
+        'node_capacity_no_arguments',
+        'citus_shard_allowed_on_node_true',
+        0
+    );
+ERROR:  signature for node_capacity_function is incorrect
+DETAIL:  number of arguments of node_capacity_no_arguments should be 1, not 0
+CONTEXT:  SQL statement "SELECT citus_validate_rebalance_strategy_functions(
+        NEW.shard_cost_function,
+        NEW.node_capacity_function,
+        NEW.shard_allowed_on_node_function)"
+PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 5 at PERFORM
+SQL function "citus_add_rebalance_strategy" statement 1
+SELECT citus_add_rebalance_strategy(
+        'insert_should_fail',
+        'citus_shard_cost_1',
+        'node_capacity_bad_arg_type',
+        'citus_shard_allowed_on_node_true',
+        0
+    );
+ERROR:  signature for node_capacity_function is incorrect
+DETAIL:  argument type of node_capacity_bad_arg_type should be int
+CONTEXT:  SQL statement "SELECT citus_validate_rebalance_strategy_functions(
+        NEW.shard_cost_function,
+        NEW.node_capacity_function,
+        NEW.shard_allowed_on_node_function)"
+PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 5 at PERFORM
+SQL function "citus_add_rebalance_strategy" statement 1
+SELECT citus_add_rebalance_strategy(
+        'insert_should_fail',
+        'citus_shard_cost_1',
+        'node_capacity_bad_return_type',
+        'citus_shard_allowed_on_node_true',
+        0
+    );
+ERROR:  signature for node_capacity_function is incorrect
+DETAIL:  return type of node_capacity_bad_return_type should be real
+CONTEXT:  SQL statement "SELECT citus_validate_rebalance_strategy_functions(
+        NEW.shard_cost_function,
+        NEW.node_capacity_function,
+        NEW.shard_allowed_on_node_function)"
+PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 5 at PERFORM
+SQL function "citus_add_rebalance_strategy" statement 1
+SELECT citus_add_rebalance_strategy(
+        'insert_should_fail',
+        'citus_shard_cost_1',
+        0,
+        'citus_shard_allowed_on_node_true',
+        0
+    );
+ERROR:  cache lookup failed for node_capacity_function with oid 0
+CONTEXT:  SQL statement "SELECT citus_validate_rebalance_strategy_functions(
+        NEW.shard_cost_function,
+        NEW.node_capacity_function,
+        NEW.shard_allowed_on_node_function)"
+PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 5 at PERFORM
+SQL function "citus_add_rebalance_strategy" statement 1
+SELECT citus_add_rebalance_strategy(
+        'insert_should_fail',
+        'citus_shard_cost_1',
+        'citus_node_capacity_1',
+        'shard_allowed_on_node_no_arguments',
+        0
+    );
+ERROR:  signature for shard_allowed_on_node_function is incorrect
+DETAIL:  number of arguments of shard_allowed_on_node_no_arguments should be 2, not 0
+CONTEXT:  SQL statement "SELECT citus_validate_rebalance_strategy_functions(
+        NEW.shard_cost_function,
+        NEW.node_capacity_function,
+        NEW.shard_allowed_on_node_function)"
+PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 5 at PERFORM
+SQL function "citus_add_rebalance_strategy" statement 1
+SELECT citus_add_rebalance_strategy(
+        'insert_should_fail',
+        'citus_shard_cost_1',
+        'citus_node_capacity_1',
+        'shard_allowed_on_node_bad_arg1',
+        0
+    );
+ERROR:  signature for shard_allowed_on_node_function is incorrect
+DETAIL:  type of first argument of shard_allowed_on_node_bad_arg1 should be bigint
+CONTEXT:  SQL statement "SELECT citus_validate_rebalance_strategy_functions(
+        NEW.shard_cost_function,
+        NEW.node_capacity_function,
+        NEW.shard_allowed_on_node_function)"
+PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 5 at PERFORM
+SQL function "citus_add_rebalance_strategy" statement 1
+SELECT citus_add_rebalance_strategy(
+        'insert_should_fail',
+        'citus_shard_cost_1',
+        'citus_node_capacity_1',
+        'shard_allowed_on_node_bad_arg2',
+        0
+    );
+ERROR:  signature for shard_allowed_on_node_function is incorrect
+DETAIL:  type of second argument of shard_allowed_on_node_bad_arg2 should be int
+CONTEXT:  SQL statement "SELECT citus_validate_rebalance_strategy_functions(
+        NEW.shard_cost_function,
+        NEW.node_capacity_function,
+        NEW.shard_allowed_on_node_function)"
+PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 5 at PERFORM
+SQL function "citus_add_rebalance_strategy" statement 1
+SELECT citus_add_rebalance_strategy(
+        'insert_should_fail',
+        'citus_shard_cost_1',
+        'citus_node_capacity_1',
+        'shard_allowed_on_node_bad_return_type',
+        0
+    );
+ERROR:  signature for shard_allowed_on_node_function is incorrect
+DETAIL:  return type of shard_allowed_on_node_bad_return_type should be boolean
+CONTEXT:  SQL statement "SELECT citus_validate_rebalance_strategy_functions(
+        NEW.shard_cost_function,
+        NEW.node_capacity_function,
+        NEW.shard_allowed_on_node_function)"
+PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 5 at PERFORM
+SQL function "citus_add_rebalance_strategy" statement 1
+SELECT citus_add_rebalance_strategy(
+        'insert_should_fail',
+        'citus_shard_cost_1',
+        'citus_node_capacity_1',
+        0,
+        0
+    );
+ERROR:  cache lookup failed for shard_allowed_on_node_function with oid 0
+CONTEXT:  SQL statement "SELECT citus_validate_rebalance_strategy_functions(
+        NEW.shard_cost_function,
+        NEW.node_capacity_function,
+        NEW.shard_allowed_on_node_function)"
+PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 5 at PERFORM
+SQL function "citus_add_rebalance_strategy" statement 1
+-- Confirm that manual insert/update has the same checks
+INSERT INTO
+    pg_catalog.pg_dist_rebalance_strategy(
+        name,
+        shard_cost_function,
+        node_capacity_function,
+        shard_allowed_on_node_function,
+        default_threshold
+    ) VALUES (
+        'shard_cost_no_arguments',
+        'shard_cost_no_arguments',
+        'citus_node_capacity_1',
+        'citus_shard_allowed_on_node_true',
+        0
+    );
+ERROR:  signature for shard_cost_function is incorrect
+DETAIL:  number of arguments of shard_cost_no_arguments should be 1, not 0
+CONTEXT:  SQL statement "SELECT citus_validate_rebalance_strategy_functions(
+        NEW.shard_cost_function,
+        NEW.node_capacity_function,
+        NEW.shard_allowed_on_node_function)"
+PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 5 at PERFORM
+UPDATE pg_dist_rebalance_strategy SET shard_cost_function='shard_cost_no_arguments' WHERE name='by_disk_size';
+ERROR:  signature for shard_cost_function is incorrect
+DETAIL:  number of arguments of shard_cost_no_arguments should be 1, not 0
+CONTEXT:  SQL statement "SELECT citus_validate_rebalance_strategy_functions(
+        NEW.shard_cost_function,
+        NEW.node_capacity_function,
+        NEW.shard_allowed_on_node_function)"
+PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 5 at PERFORM
+-- Confirm that only a single default strategy can exist
+INSERT INTO
+    pg_catalog.pg_dist_rebalance_strategy(
+        name,
+        default_strategy,
+        shard_cost_function,
+        node_capacity_function,
+        shard_allowed_on_node_function,
+        default_threshold
+    ) VALUES (
+        'second_default',
+        true,
+        'citus_shard_cost_1',
+        'citus_node_capacity_1',
+        'citus_shard_allowed_on_node_true',
+        0
+    );
+ERROR:  there cannot be two default strategies
+CONTEXT:  PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 19 at RAISE
+UPDATE pg_dist_rebalance_strategy SET default_strategy=true WHERE name='by_disk_size';
+ERROR:  there cannot be two default strategies
+CONTEXT:  PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 19 at RAISE
+-- ensure the trigger allows updating the default strategy
+UPDATE pg_dist_rebalance_strategy SET default_strategy=true WHERE name='by_shard_count';
+-- Confirm that default strategy should be higher than minimum strategy
+SELECT citus_add_rebalance_strategy(
+        'default_threshold_too_low',
+        'citus_shard_cost_1',
+        'capacity_high_worker_1',
+        'citus_shard_allowed_on_node_true',
+        0,
+        0.1
+    );
+ERROR:  default_threshold cannot be smaller than minimum_threshold
+CONTEXT:  PL/pgSQL function citus_internal.pg_dist_rebalance_strategy_trigger_func() line 10 at RAISE
+SQL function "citus_add_rebalance_strategy" statement 1
+-- Make it a data node again
+SELECT * from master_set_node_property('localhost', :worker_2_port, 'shouldhaveshards', true);
+ master_set_node_property
+---------------------------------------------------------------------
+
+(1 row)
+
+DROP TABLE tab;
+-- we don't need the coordinator on pg_dist_node anymore
+SELECT 1 FROM master_remove_node('localhost', :master_port);
+ ?column?
+---------------------------------------------------------------------
+        1
+(1 row)
+
+--
+-- Make sure that rebalance_table_shards() and replicate_table_shards() replicate
+-- reference tables to the coordinator when replicate_reference_tables_on_activate
+-- is off.
+--
+SET citus.replicate_reference_tables_on_activate TO off;
+SET client_min_messages TO WARNING;
+CREATE TABLE dist_table_test_3(a int);
+SET citus.shard_count TO 4;
+SET citus.shard_replication_factor TO 1;
+SET citus.replication_model TO "statement";
+SELECT create_distributed_table('dist_table_test_3', 'a');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+CREATE TABLE ref_table(a int);
+SELECT create_reference_table('ref_table');
+ create_reference_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT 1 FROM master_add_node('localhost', :master_port, groupId=>0);
+ ?column?
+---------------------------------------------------------------------
+        1
+(1 row)
+
+SELECT count(*) FROM pg_dist_shard NATURAL JOIN pg_dist_shard_placement WHERE logicalrelid = 'ref_table'::regclass;
+ count
+---------------------------------------------------------------------
+     2
+(1 row)
+
+SET citus.shard_replication_factor TO 2;
+SELECT replicate_table_shards('dist_table_test_3',  max_shard_copies := 4,  shard_transfer_mode:='block_writes');
+ replicate_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT count(*) FROM pg_dist_shard NATURAL JOIN pg_dist_shard_placement WHERE logicalrelid = 'ref_table'::regclass;
+ count
+---------------------------------------------------------------------
+     3
+(1 row)
+
+SELECT 1 FROM master_remove_node('localhost', :master_port);
+ ?column?
+---------------------------------------------------------------------
+        1
+(1 row)
+
+CREATE TABLE rebalance_test_table(int_column int);
+SELECT master_create_distributed_table('rebalance_test_table', 'int_column', 'append');
+ master_create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+CALL create_unbalanced_shards('rebalance_test_table');
+SELECT 1 FROM master_add_node('localhost', :master_port, groupId=>0);
+ ?column?
+---------------------------------------------------------------------
+        1
+(1 row)
+
+SELECT count(*) FROM pg_dist_shard NATURAL JOIN pg_dist_shard_placement WHERE logicalrelid = 'ref_table'::regclass;
+ count
+---------------------------------------------------------------------
+     2
+(1 row)
+
+SELECT rebalance_table_shards('rebalance_test_table', shard_transfer_mode:='block_writes');
+ rebalance_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT count(*) FROM pg_dist_shard NATURAL JOIN pg_dist_shard_placement WHERE logicalrelid = 'ref_table'::regclass;
+ count
+---------------------------------------------------------------------
+     3
+(1 row)
+
+DROP TABLE dist_table_test_3, rebalance_test_table, ref_table;
+SELECT 1 FROM master_remove_node('localhost', :master_port);
+ ?column?
+---------------------------------------------------------------------
+        1
+(1 row)
+
+-- reference table 2 will not have a replica identity, causing the rebalancer to not work
+-- when ran in the default mode. Instead we need to change the shard transfer mode to make
+-- it work. This verifies the shard transfer mode used in the rebalancer is used for the
+-- ensurance of reference table existence.
+CREATE TABLE t1 (a int PRIMARY KEY, b int);
+CREATE TABLE r1 (a int PRIMARY KEY, b int);
+CREATE TABLE r2 (a int, b int);
+-- we remove worker 2 before creating the tables, this will allow us to have an active
+-- node without the reference tables
+SELECT 1 from master_remove_node('localhost', :worker_2_port);
+ ?column?
+---------------------------------------------------------------------
+        1
+(1 row)
+
+SELECT create_distributed_table('t1','a');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT create_reference_table('r1');
+ create_reference_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT create_reference_table('r2');
+ create_reference_table
+---------------------------------------------------------------------
+
+(1 row)
+
+-- add data so to actually copy data when forcing logical replication for reference tables
+INSERT INTO r1 VALUES (1,2), (3,4);
+INSERT INTO r2 VALUES (1,2), (3,4);
+SELECT 1 from master_add_node('localhost', :worker_2_port);
+ ?column?
+---------------------------------------------------------------------
+        1
+(1 row)
+
+SELECT rebalance_table_shards();
+ rebalance_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+DROP TABLE t1, r1, r2;
+-- verify there are no distributed tables before we perform the following tests. Preceding
+-- test suites should clean up their distributed tables.
+SELECT count(*) FROM pg_dist_partition;
+ count
+---------------------------------------------------------------------
+     0
+(1 row)
+
+-- verify a system having only reference tables will copy the reference tables when
+-- executing the rebalancer
+SELECT 1 from master_remove_node('localhost', :worker_2_port);
+ ?column?
+---------------------------------------------------------------------
+        1
+(1 row)
+
+CREATE TABLE r1 (a int PRIMARY KEY, b int);
+SELECT create_reference_table('r1');
+ create_reference_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT 1 from master_add_node('localhost', :worker_2_port);
+ ?column?
+---------------------------------------------------------------------
+        1
+(1 row)
+
+-- count the number of placements for the reference table to verify it is not available on
+-- all nodes
+SELECT count(*)
+FROM pg_dist_shard
+JOIN pg_dist_shard_placement USING (shardid)
+WHERE logicalrelid = 'r1'::regclass;
+ count
+---------------------------------------------------------------------
+     1
+(1 row)
+
+-- rebalance with _only_ a reference table, this should trigger the copy
+SELECT rebalance_table_shards();
+ rebalance_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+-- verify the reference table is on all nodes after the rebalance
+SELECT count(*)
+FROM pg_dist_shard
+JOIN pg_dist_shard_placement USING (shardid)
+WHERE logicalrelid = 'r1'::regclass;
+ count
+---------------------------------------------------------------------
+     2
+(1 row)
+
+-- cleanup tables
+DROP TABLE r1;
+-- lastly we need to verify that reference tables are copied before the replication factor
+-- of other tables is increased. Without the copy of reference tables the replication might
+-- fail.
+SELECT 1 from master_remove_node('localhost', :worker_2_port);
+ ?column?
+---------------------------------------------------------------------
+        1
+(1 row)
+
+CREATE TABLE t1 (a int PRIMARY KEY, b int);
+CREATE TABLE r1 (a int PRIMARY KEY, b int);
+SELECT create_distributed_table('t1', 'a');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT create_reference_table('r1');
+ create_reference_table
+---------------------------------------------------------------------
+
+(1 row)
+
+SELECT 1 from master_add_node('localhost', :worker_2_port);
+ ?column?
+---------------------------------------------------------------------
+        1
+(1 row)
+
+-- count the number of placements for the reference table to verify it is not available on
+-- all nodes
+SELECT count(*)
+FROM pg_dist_shard
+JOIN pg_dist_shard_placement USING (shardid)
+WHERE logicalrelid = 'r1'::regclass;
+ count
+---------------------------------------------------------------------
+     1
+(1 row)
+
+SELECT replicate_table_shards('t1',  shard_replication_factor := 2);
+ replicate_table_shards
+---------------------------------------------------------------------
+
+(1 row)
+
+-- verify the reference table is on all nodes after replicate_table_shards
+SELECT count(*)
+FROM pg_dist_shard
+JOIN pg_dist_shard_placement USING (shardid)
+WHERE logicalrelid = 'r1'::regclass;
+ count
+---------------------------------------------------------------------
+     2
+(1 row)
+
+DROP TABLE t1, r1;
diff --git a/src/test/regress/expected/shard_rebalancer_unit.out b/src/test/regress/expected/shard_rebalancer_unit.out
new file mode 100644
index 000000000..3308dfc3e
--- /dev/null
+++ b/src/test/regress/expected/shard_rebalancer_unit.out
@@ -0,0 +1,502 @@
+CREATE OR REPLACE FUNCTION shard_placement_rebalance_array(
+    worker_node_list json[],
+    shard_placement_list json[],
+    threshold float4 DEFAULT 0,
+    max_shard_moves int DEFAULT 1000000,
+    drain_only bool DEFAULT false
+)
+RETURNS json[]
+AS 'citus'
+LANGUAGE C STRICT VOLATILE;
+-- Check that even with threshold=0.0 shard_placement_rebalance_array returns
+-- something when there's no completely balanced solution.
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}']::json[]
+));
+                                                       unnest
+---------------------------------------------------------------------
+ {"updatetype":1,"shardid":1,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname2","targetport":5432}
+(1 row)
+
+-- Check that a node can be drained in a balanced cluster
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432, "disallowed_shards": "1,2,3,4"}',
+          '{"node_name": "hostname2", "node_port": 5432}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}'
+        ]::json[]
+));
+                                                       unnest
+---------------------------------------------------------------------
+ {"updatetype":1,"shardid":1,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname2","targetport":5432}
+ {"updatetype":1,"shardid":2,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname2","targetport":5432}
+(2 rows)
+
+-- Check that an already drained node won't be filled again after a second
+-- rebalance
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432, "disallowed_shards": "1,2,3,4"}',
+          '{"node_name": "hostname2", "node_port": 5432}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}'
+        ]::json[]
+));
+ unnest
+---------------------------------------------------------------------
+(0 rows)
+
+-- Check that even when shards are already balanced, but shard xxxxx is on a node
+-- where it is not allowed it will be moved and there will be rebalancing later
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432, "disallowed_shards": "1,2,3,5,6"}',
+          '{"node_name": "hostname2", "node_port": 5432, "disallowed_shards": "4"}',
+          '{"node_name": "hostname3", "node_port": 5432, "disallowed_shards": "4"}'
+        ]::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":5, "shardid":5, "shardstate":1, "shardlength":1, "nodename":"hostname3", "nodeport":5432}',
+          '{"placementid":6, "shardid":6, "shardstate":1, "shardlength":1, "nodename":"hostname3", "nodeport":5432}'
+        ]::json[]
+));
+                                                       unnest
+---------------------------------------------------------------------
+ {"updatetype":1,"shardid":1,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname2","targetport":5432}
+ {"updatetype":1,"shardid":2,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname3","targetport":5432}
+ {"updatetype":1,"shardid":4,"sourcename":"hostname2","sourceport":5432,"targetname":"hostname1","targetport":5432}
+(3 rows)
+
+-- Check that even when shards are already balanced, disallowed shards will be
+-- moved away from hostname1 and the only shard that is allowed there will be
+-- moved there
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432, "disallowed_shards": "1,2,3,5,6"}',
+          '{"node_name": "hostname2", "node_port": 5432}',
+          '{"node_name": "hostname3", "node_port": 5432}'
+        ]::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":5, "shardid":5, "shardstate":1, "shardlength":1, "nodename":"hostname3", "nodeport":5432}',
+          '{"placementid":6, "shardid":6, "shardstate":1, "shardlength":1, "nodename":"hostname3", "nodeport":5432}'
+        ]::json[]
+));
+                                                       unnest
+---------------------------------------------------------------------
+ {"updatetype":1,"shardid":1,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname2","targetport":5432}
+ {"updatetype":1,"shardid":2,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname3","targetport":5432}
+ {"updatetype":1,"shardid":4,"sourcename":"hostname2","sourceport":5432,"targetname":"hostname1","targetport":5432}
+(3 rows)
+
+-- Check that an error is returned when a shard is not allowed anywhere
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432, "disallowed_shards": "2,4"}',
+          '{"node_name": "hostname2", "node_port": 5432, "disallowed_shards": "1,4"}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}'
+        ]::json[]
+));
+WARNING:  Not allowed to move shard xxxxx anywhere from hostname2:5432
+                                                       unnest
+---------------------------------------------------------------------
+ {"updatetype":1,"shardid":2,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname2","targetport":5432}
+ {"updatetype":1,"shardid":3,"sourcename":"hostname2","sourceport":5432,"targetname":"hostname1","targetport":5432}
+(2 rows)
+
+-- Check that cost is taken into account when rebalancing
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432, "cost": 3}']::json[]
+));
+                                                       unnest
+---------------------------------------------------------------------
+ {"updatetype":1,"shardid":4,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname2","targetport":5432}
+(1 row)
+
+-- Check that cost is taken into account when rebalancing disallowed placements
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432, "disallowed_shards": "1,2,3,4"}',
+          '{"node_name": "hostname2", "node_port": 5432}',
+          '{"node_name": "hostname3", "node_port": 5432}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432, "cost": 3}']::json[]
+));
+                                                       unnest
+---------------------------------------------------------------------
+ {"updatetype":1,"shardid":4,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname2","targetport":5432}
+ {"updatetype":1,"shardid":1,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname3","targetport":5432}
+ {"updatetype":1,"shardid":2,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname3","targetport":5432}
+ {"updatetype":1,"shardid":3,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname3","targetport":5432}
+(4 rows)
+
+-- Check that node capacacity is taken into account.
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432, "capacity": 3}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}']::json[]
+));
+                                                       unnest
+---------------------------------------------------------------------
+ {"updatetype":1,"shardid":1,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname2","targetport":5432}
+ {"updatetype":1,"shardid":2,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname2","targetport":5432}
+ {"updatetype":1,"shardid":3,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname2","targetport":5432}
+(3 rows)
+
+-- Check that shards are not moved when target utilization stays the same and
+-- the source utilization goes below the original target utilization. hostname1
+-- has utilization of 1, after move hostname2 would have a utilization of 1 as
+-- well. hostname1 would have utilization of 1 while hostname2 has utilization
+-- of 2/3 now. Since load is spread more fairly with utilization 2/3 than 0 it
+-- should choose that distribution.
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432, "capacity": 3}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}']::json[]
+));
+ unnest
+---------------------------------------------------------------------
+(0 rows)
+
+-- Check that shards are moved even when target utilization stays the same, but
+-- source utilization goes below the original target utilization. hostname2
+-- has utilization of 1, after move hostname1 would have a utilization of 1 as
+-- well. hostname2 would have utilization of 2/3 while hostname1 now has
+-- utilization of 0 now. Since load is spread more fairly with utilization 2/3
+-- than 0 it should choose that distribution.
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432, "capacity": 3}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}']::json[]
+));
+                                                       unnest
+---------------------------------------------------------------------
+ {"updatetype":1,"shardid":1,"sourcename":"hostname2","sourceport":5432,"targetname":"hostname1","targetport":5432}
+(1 row)
+
+-- Check that shards are moved even when target utilization stays the same, but
+-- source utilization goes below the original target utilization. hostname2
+-- has utilization of 2, after move hostname1 would have a utilization of 2 as
+-- well. hostname2 would have utilization of 1.5 while hostname1 now has
+-- utilization of 1. Since load is spread more fairly with utilization 1.5 than
+-- 1 it should choose that distribution.
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432, "capacity": 2}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":5, "shardid":5, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}']::json[]
+));
+                                                       unnest
+---------------------------------------------------------------------
+ {"updatetype":1,"shardid":2,"sourcename":"hostname2","sourceport":5432,"targetname":"hostname1","targetport":5432}
+(1 row)
+
+-- Check that shards are moved even when target utilization stays the same, but
+-- source utilization goes below the original target utilization. hostname1
+-- has utilization of 2, after move hostname2 would have a utilization of 2 as
+-- well. hostname1 would have utilization of 1 while hostname2 now has
+-- utilization of 1.5. Since load is spread more fairly with utilization 1.5
+-- than 1 it should choose that distribution.
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432, "capacity": 2}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":5, "shardid":5, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}']::json[]
+));
+ unnest
+---------------------------------------------------------------------
+(0 rows)
+
+-- Check that all shards will be moved to 1 node if its capacity is big enough
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432, "capacity": 4}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}']::json[]
+));
+                                                       unnest
+---------------------------------------------------------------------
+ {"updatetype":1,"shardid":1,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname2","targetport":5432}
+ {"updatetype":1,"shardid":2,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname2","targetport":5432}
+ {"updatetype":1,"shardid":3,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname2","targetport":5432}
+(3 rows)
+
+-- Check that shards will be moved to a smaller node node if utilization improves
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432, "capacity": 3}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}']::json[]
+));
+                                                       unnest
+---------------------------------------------------------------------
+ {"updatetype":1,"shardid":1,"sourcename":"hostname2","sourceport":5432,"targetname":"hostname1","targetport":5432}
+(1 row)
+
+-- Check that node capacity works with different shard costs
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432, "capacity": 3}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432, "cost": 3}']::json[]
+));
+                                                       unnest
+---------------------------------------------------------------------
+ {"updatetype":1,"shardid":1,"sourcename":"hostname2","sourceport":5432,"targetname":"hostname1","targetport":5432}
+(1 row)
+
+-- Check that node capacity works with different shard costs again
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432, "capacity": 3}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432, "cost": 2}']::json[]
+));
+                                                       unnest
+---------------------------------------------------------------------
+ {"updatetype":1,"shardid":3,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname2","targetport":5432}
+ {"updatetype":1,"shardid":1,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname2","targetport":5432}
+(2 rows)
+
+-- Check that max_shard_moves works and that we get a NOTICE that it is hit
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432, "capacity": 3}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432, "cost": 2}']::json[],
+    max_shard_moves := 1
+));
+NOTICE:  Stopped searching before we were out of moves. Please rerun the rebalancer after it's finished for a more optimal placement.
+                                                       unnest
+---------------------------------------------------------------------
+ {"updatetype":1,"shardid":3,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname2","targetport":5432}
+(1 row)
+
+-- Check that node capacity works with different shard costs and disallowed_shards
+-- NOTE: these moves are not optimal, once we implement merging of updates this
+-- output should change.
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432, "capacity": 5}',
+          '{"node_name": "hostname3", "node_port": 5432, "disallowed_shards": "1,2"}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname3", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname3", "nodeport":5432, "cost": 2}']::json[]
+));
+                                                       unnest
+---------------------------------------------------------------------
+ {"updatetype":1,"shardid":2,"sourcename":"hostname3","sourceport":5432,"targetname":"hostname2","targetport":5432}
+ {"updatetype":1,"shardid":1,"sourcename":"hostname3","sourceport":5432,"targetname":"hostname1","targetport":5432}
+ {"updatetype":1,"shardid":1,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname2","targetport":5432}
+(3 rows)
+
+-- Check that draining + rebalancing nodes works
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432, "disallowed_shards": "1,2,3,4,5,6", "capacity": 0}',
+          '{"node_name": "hostname2", "node_port": 5432}',
+          '{"node_name": "hostname3", "node_port": 5432}'
+        ]::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":5, "shardid":5, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":6, "shardid":6, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}'
+        ]::json[]
+));
+                                                       unnest
+---------------------------------------------------------------------
+ {"updatetype":1,"shardid":1,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname3","targetport":5432}
+ {"updatetype":1,"shardid":2,"sourcename":"hostname2","sourceport":5432,"targetname":"hostname3","targetport":5432}
+ {"updatetype":1,"shardid":3,"sourcename":"hostname2","sourceport":5432,"targetname":"hostname3","targetport":5432}
+(3 rows)
+
+-- Check that draining nodes with drain only works
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432, "disallowed_shards": "1,2,3,4,5,6", "capacity": 0}',
+          '{"node_name": "hostname2", "node_port": 5432}',
+          '{"node_name": "hostname3", "node_port": 5432}'
+        ]::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":5, "shardid":5, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":6, "shardid":6, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}'
+        ]::json[],
+    drain_only := true
+));
+                                                       unnest
+---------------------------------------------------------------------
+ {"updatetype":1,"shardid":1,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname3","targetport":5432}
+(1 row)
+
+-- Check that draining nodes has priority over max_shard_moves
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432, "disallowed_shards": "1,2,3,4,5,6", "capacity": 0}',
+          '{"node_name": "hostname2", "node_port": 5432}',
+          '{"node_name": "hostname3", "node_port": 5432}'
+        ]::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":5, "shardid":5, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":6, "shardid":6, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}'
+        ]::json[],
+    max_shard_moves := 0
+));
+NOTICE:  Stopped searching before we were out of moves. Please rerun the rebalancer after it's finished for a more optimal placement.
+                                                       unnest
+---------------------------------------------------------------------
+ {"updatetype":1,"shardid":1,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname3","targetport":5432}
+(1 row)
+
+-- Check that drained moves are counted towards shard moves and thus use up the
+-- limit when doing normal rebalance moves
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432, "disallowed_shards": "1,2,3,4,5,6", "capacity": 0}',
+          '{"node_name": "hostname2", "node_port": 5432}',
+          '{"node_name": "hostname3", "node_port": 5432}'
+        ]::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":5, "shardid":5, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":6, "shardid":6, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}'
+        ]::json[],
+    max_shard_moves := 2
+));
+NOTICE:  Stopped searching before we were out of moves. Please rerun the rebalancer after it's finished for a more optimal placement.
+                                                       unnest
+---------------------------------------------------------------------
+ {"updatetype":1,"shardid":1,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname3","targetport":5432}
+ {"updatetype":1,"shardid":2,"sourcename":"hostname2","sourceport":5432,"targetname":"hostname3","targetport":5432}
+(2 rows)
+
+-- Check that draining for all colocation groups is done before rebalancing
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432, "disallowed_shards": "1,2,3,4,5,6,7,8,9,10,11,12", "capacity": 0}',
+          '{"node_name": "hostname2", "node_port": 5432}',
+          '{"node_name": "hostname3", "node_port": 5432}'
+        ]::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":5, "shardid":5, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":6, "shardid":6, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":7, "shardid":7, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432, "next_colocation": true}',
+          '{"placementid":8, "shardid":8, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":9, "shardid":9, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":10, "shardid":10, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":11, "shardid":11, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":12, "shardid":12, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}'
+        ]::json[]
+));
+                                                       unnest
+---------------------------------------------------------------------
+ {"updatetype":1,"shardid":1,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname3","targetport":5432}
+ {"updatetype":1,"shardid":7,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname3","targetport":5432}
+ {"updatetype":1,"shardid":2,"sourcename":"hostname2","sourceport":5432,"targetname":"hostname3","targetport":5432}
+ {"updatetype":1,"shardid":3,"sourcename":"hostname2","sourceport":5432,"targetname":"hostname3","targetport":5432}
+ {"updatetype":1,"shardid":8,"sourcename":"hostname2","sourceport":5432,"targetname":"hostname3","targetport":5432}
+ {"updatetype":1,"shardid":9,"sourcename":"hostname2","sourceport":5432,"targetname":"hostname3","targetport":5432}
+(6 rows)
+
+-- Check that max_shard_moves warning is only shown once even if more than one
+-- colocation group its placement updates are ignored because of it
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432, "disallowed_shards": "1,2,3,4,5,6,7,8,9,10,11,12", "capacity": 0}',
+          '{"node_name": "hostname2", "node_port": 5432}',
+          '{"node_name": "hostname3", "node_port": 5432}'
+        ]::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":5, "shardid":5, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":6, "shardid":6, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":7, "shardid":7, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432, "next_colocation": true}',
+          '{"placementid":8, "shardid":8, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":9, "shardid":9, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":10, "shardid":10, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":11, "shardid":11, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":12, "shardid":12, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}'
+        ]::json[],
+    max_shard_moves := 1
+));
+NOTICE:  Stopped searching before we were out of moves. Please rerun the rebalancer after it's finished for a more optimal placement.
+                                                       unnest
+---------------------------------------------------------------------
+ {"updatetype":1,"shardid":1,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname3","targetport":5432}
+ {"updatetype":1,"shardid":7,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname3","targetport":5432}
+(2 rows)
+
+-- Check that moves for different colocation groups are added together when
+-- taking into account max_shard_moves
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432, "disallowed_shards": "1,2,3,4,5,6,7,8,9,10,11,12", "capacity": 0}',
+          '{"node_name": "hostname2", "node_port": 5432}',
+          '{"node_name": "hostname3", "node_port": 5432}'
+        ]::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":5, "shardid":5, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":6, "shardid":6, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":7, "shardid":7, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432, "next_colocation": true}',
+          '{"placementid":8, "shardid":8, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":9, "shardid":9, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":10, "shardid":10, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":11, "shardid":11, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":12, "shardid":12, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}'
+        ]::json[],
+    max_shard_moves := 5
+));
+NOTICE:  Stopped searching before we were out of moves. Please rerun the rebalancer after it's finished for a more optimal placement.
+                                                       unnest
+---------------------------------------------------------------------
+ {"updatetype":1,"shardid":1,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname3","targetport":5432}
+ {"updatetype":1,"shardid":7,"sourcename":"hostname1","sourceport":5432,"targetname":"hostname3","targetport":5432}
+ {"updatetype":1,"shardid":2,"sourcename":"hostname2","sourceport":5432,"targetname":"hostname3","targetport":5432}
+ {"updatetype":1,"shardid":3,"sourcename":"hostname2","sourceport":5432,"targetname":"hostname3","targetport":5432}
+ {"updatetype":1,"shardid":8,"sourcename":"hostname2","sourceport":5432,"targetname":"hostname3","targetport":5432}
+(5 rows)
+
diff --git a/src/test/regress/isolation_schedule b/src/test/regress/isolation_schedule
index e734870ee..39ce4402f 100644
--- a/src/test/regress/isolation_schedule
+++ b/src/test/regress/isolation_schedule
@@ -66,6 +66,13 @@ test: shared_connection_waits
 test: isolation_cancellation
 test: isolation_undistribute_table
 
+# Rebalancer
+test: isolation_blocking_move_single_shard_commands
+test: isolation_blocking_move_multi_shard_commands
+test: isolation_blocking_move_single_shard_commands_on_mx
+test: isolation_blocking_move_multi_shard_commands_on_mx
+test: isolation_shard_rebalancer
+
 # MX tests
 test: isolation_reference_on_mx
 test: isolation_ref2ref_foreign_keys_on_mx
diff --git a/src/test/regress/operations_schedule b/src/test/regress/operations_schedule
new file mode 100644
index 000000000..4e526e19d
--- /dev/null
+++ b/src/test/regress/operations_schedule
@@ -0,0 +1,9 @@
+test: multi_cluster_management
+test: multi_test_helpers multi_test_helpers_superuser
+test: multi_test_catalog_views
+test: shard_rebalancer_unit
+test: shard_rebalancer
+test: foreign_key_to_reference_shard_rebalance
+test: multi_move_mx
+test: shard_move_deferred_delete
+test: multi_colocated_shard_rebalance
diff --git a/src/test/regress/spec/isolation_blocking_move_multi_shard_commands.spec b/src/test/regress/spec/isolation_blocking_move_multi_shard_commands.spec
new file mode 100644
index 000000000..ba534046b
--- /dev/null
+++ b/src/test/regress/spec/isolation_blocking_move_multi_shard_commands.spec
@@ -0,0 +1,121 @@
+// we use 15 as partition key values through out the test
+// so setting the corresponding shard here is useful
+
+setup
+{
+  SELECT citus_internal.replace_isolation_tester_func();
+  SELECT citus_internal.refresh_isolation_tester_prepared_statement();
+
+  SET citus.shard_count TO 8;
+	SET citus.shard_replication_factor TO 1;
+	CREATE TABLE logical_replicate_placement (x int PRIMARY KEY, y int);
+	SELECT create_distributed_table('logical_replicate_placement', 'x');
+
+	SELECT get_shard_id_for_distribution_column('logical_replicate_placement', 15) INTO selected_shard;
+
+}
+
+teardown
+{
+  SELECT citus_internal.restore_isolation_tester_func();
+
+  DROP TABLE selected_shard;
+	DROP TABLE logical_replicate_placement;
+}
+
+
+session "s1"
+
+step "s1-begin"
+{
+	BEGIN;
+}
+
+step "s1-move-placement"
+{
+    	SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard;
+}
+
+step "s1-end"
+{
+	COMMIT;
+}
+
+step "s1-select"
+{
+  SELECT * FROM logical_replicate_placement order by y;
+}
+
+step "s1-insert"
+{
+    INSERT INTO logical_replicate_placement VALUES (15, 15), (172, 172);
+}
+
+step "s1-get-shard-distribution"
+{
+    select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport;
+}
+
+session "s2"
+
+step "s2-begin"
+{
+	BEGIN;
+}
+
+step "s2-select"
+{
+    SELECT * FROM logical_replicate_placement ORDER BY y;
+}
+
+step "s2-insert"
+{
+    INSERT INTO logical_replicate_placement VALUES (15, 15), (172, 172);
+}
+
+step "s2-delete"
+{
+    DELETE FROM logical_replicate_placement;
+}
+
+step "s2-update"
+{
+    UPDATE logical_replicate_placement SET y = y + 1;
+}
+
+step "s2-upsert"
+{
+    INSERT INTO logical_replicate_placement VALUES (15, 15), (172, 172);
+
+    INSERT INTO logical_replicate_placement VALUES (15, 15), (172, 172) ON CONFLICT (x) DO UPDATE SET y = logical_replicate_placement.y + 1;
+}
+
+step "s2-copy"
+{
+	COPY logical_replicate_placement FROM PROGRAM 'echo "1,1\n2,2\n3,3\n4,4\n5,5\n15,30"' WITH CSV;
+}
+
+step "s2-truncate"
+{
+	TRUNCATE logical_replicate_placement;
+}
+
+step "s2-alter-table"
+{
+	ALTER TABLE logical_replicate_placement ADD COLUMN z INT;
+}
+
+step "s2-end"
+{
+	COMMIT;
+}
+
+permutation "s1-begin" "s2-begin" "s2-insert" "s1-move-placement"  "s2-end"  "s1-end" "s1-select" "s1-get-shard-distribution"
+permutation "s1-begin" "s2-begin" "s2-upsert" "s1-move-placement" "s2-end" "s1-end" "s1-select"  "s1-get-shard-distribution"
+permutation "s1-insert" "s1-begin" "s2-begin" "s2-update" "s1-move-placement" "s2-end" "s1-end" "s1-select" "s1-get-shard-distribution"
+permutation "s1-insert" "s1-begin" "s2-begin" "s2-delete" "s1-move-placement" "s2-end" "s1-end" "s1-select" "s1-get-shard-distribution"
+permutation "s1-insert" "s1-begin" "s2-begin" "s2-select" "s1-move-placement" "s2-end" "s1-end" "s1-get-shard-distribution"
+permutation "s1-begin" "s2-begin" "s2-copy" "s1-move-placement" "s2-end" "s1-end" "s1-select" "s1-get-shard-distribution"
+permutation "s1-insert" "s1-begin" "s2-begin" "s2-truncate" "s1-move-placement" "s2-end" "s1-end" "s1-select" "s1-get-shard-distribution"
+permutation "s1-begin" "s2-begin" "s2-alter-table" "s1-move-placement" "s2-end" "s1-end" "s1-select" "s1-get-shard-distribution"
+
diff --git a/src/test/regress/spec/isolation_blocking_move_multi_shard_commands_on_mx.spec b/src/test/regress/spec/isolation_blocking_move_multi_shard_commands_on_mx.spec
new file mode 100644
index 000000000..ac26a5f2c
--- /dev/null
+++ b/src/test/regress/spec/isolation_blocking_move_multi_shard_commands_on_mx.spec
@@ -0,0 +1,132 @@
+// we use 15 as partition key values through out the test
+// so setting the corresponding shard here is useful
+
+setup
+{
+  CREATE OR REPLACE FUNCTION start_session_level_connection_to_node(text, integer)
+      RETURNS void
+      LANGUAGE C STRICT VOLATILE
+      AS 'citus', $$start_session_level_connection_to_node$$;
+
+  CREATE OR REPLACE FUNCTION run_commands_on_session_level_connection_to_node(text)
+      RETURNS void
+      LANGUAGE C STRICT VOLATILE
+      AS 'citus', $$run_commands_on_session_level_connection_to_node$$;
+
+  CREATE OR REPLACE FUNCTION stop_session_level_connection_to_node()
+      RETURNS void
+      LANGUAGE C STRICT VOLATILE
+      AS 'citus', $$stop_session_level_connection_to_node$$;
+
+  SELECT citus_internal.replace_isolation_tester_func();
+  SELECT citus_internal.refresh_isolation_tester_prepared_statement();
+
+  -- start_metadata_sync_to_node can not be run inside a transaction block
+  -- following is a workaround to overcome that
+  -- port numbers are hard coded at the moment
+  SELECT master_run_on_worker(
+          ARRAY['localhost']::text[],
+          ARRAY[57636]::int[],
+          ARRAY[format('SELECT start_metadata_sync_to_node(''%s'', %s)', nodename, nodeport)]::text[],
+          false)
+  FROM pg_dist_node;
+
+  SET citus.replication_model to streaming;
+  SET citus.shard_replication_factor TO 1;
+
+	SET citus.shard_count TO 8;
+	SET citus.shard_replication_factor TO 1;
+	CREATE TABLE logical_replicate_placement (x int PRIMARY KEY, y int);
+	SELECT create_distributed_table('logical_replicate_placement', 'x');
+
+	SELECT get_shard_id_for_distribution_column('logical_replicate_placement', 15) INTO selected_shard;
+}
+
+teardown
+{
+	DROP TABLE selected_shard;
+	DROP TABLE logical_replicate_placement;
+
+  SELECT citus_internal.restore_isolation_tester_func();
+}
+
+
+session "s1"
+
+step "s1-begin"
+{
+	BEGIN;
+}
+
+step "s1-move-placement"
+{
+    	SELECT master_move_shard_placement(get_shard_id_for_distribution_column, 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes') FROM selected_shard;
+}
+
+step "s1-commit"
+{
+	COMMIT;
+}
+
+step "s1-select"
+{
+  SELECT * FROM logical_replicate_placement order by y;
+}
+
+step "s1-insert"
+{
+    INSERT INTO logical_replicate_placement VALUES (15, 15), (172, 172);
+}
+
+step "s1-get-shard-distribution"
+{
+    select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport;
+}
+
+session "s2"
+
+step "s2-start-session-level-connection"
+{
+  SELECT start_session_level_connection_to_node('localhost', 57638);
+}
+
+step "s2-begin-on-worker"
+{
+  SELECT run_commands_on_session_level_connection_to_node('BEGIN');
+}
+
+step "s2-select"
+{
+    SELECT run_commands_on_session_level_connection_to_node('SELECT * FROM logical_replicate_placement ORDER BY y');
+}
+
+step "s2-insert"
+{
+    SELECT run_commands_on_session_level_connection_to_node('INSERT INTO logical_replicate_placement VALUES (15, 15), (172, 172)');
+}
+
+step "s2-delete"
+{
+    SELECT run_commands_on_session_level_connection_to_node('DELETE FROM logical_replicate_placement');
+}
+
+step "s2-update"
+{
+    SELECT run_commands_on_session_level_connection_to_node('UPDATE logical_replicate_placement SET y = y + 1');
+}
+
+step "s2-commit-worker"
+{
+	SELECT run_commands_on_session_level_connection_to_node('COMMIT');
+}
+
+step "s2-stop-connection"
+{
+  SELECT stop_session_level_connection_to_node();
+}
+
+permutation "s1-begin" "s2-start-session-level-connection" "s2-begin-on-worker" "s2-insert" "s1-move-placement"  "s2-commit-worker"  "s1-commit" "s1-select" "s1-get-shard-distribution" "s2-stop-connection"
+permutation "s1-insert" "s1-begin" "s2-start-session-level-connection" "s2-begin-on-worker" "s2-update" "s1-move-placement" "s2-commit-worker" "s1-commit" "s1-select" "s1-get-shard-distribution" "s2-stop-connection"
+permutation "s1-insert" "s1-begin" "s2-start-session-level-connection" "s2-begin-on-worker" "s2-delete" "s1-move-placement" "s2-commit-worker" "s1-commit" "s1-select" "s1-get-shard-distribution" "s2-stop-connection"
+permutation "s1-insert" "s1-begin" "s2-start-session-level-connection" "s2-begin-on-worker" "s2-select" "s1-move-placement" "s2-commit-worker" "s1-commit" "s1-get-shard-distribution" "s2-stop-connection"
+
diff --git a/src/test/regress/spec/isolation_blocking_move_single_shard_commands.spec b/src/test/regress/spec/isolation_blocking_move_single_shard_commands.spec
new file mode 100644
index 000000000..f1250010f
--- /dev/null
+++ b/src/test/regress/spec/isolation_blocking_move_single_shard_commands.spec
@@ -0,0 +1,107 @@
+// we use 15 as the partition key value through out the test
+// so setting the corresponding shard here is useful
+setup
+{
+  SELECT citus_internal.replace_isolation_tester_func();
+  SELECT citus_internal.refresh_isolation_tester_prepared_statement();
+
+	SET citus.shard_count TO 8;
+	SET citus.shard_replication_factor TO 1;
+	CREATE TABLE logical_replicate_placement (x int PRIMARY KEY, y int);
+	SELECT create_distributed_table('logical_replicate_placement', 'x');
+
+	SELECT get_shard_id_for_distribution_column('logical_replicate_placement', 15) INTO selected_shard;
+}
+
+teardown
+{
+  SELECT citus_internal.restore_isolation_tester_func();
+
+  DROP TABLE selected_shard;
+	DROP TABLE logical_replicate_placement;
+}
+
+
+session "s1"
+
+step "s1-begin"
+{
+	BEGIN;
+}
+
+step "s1-move-placement"
+{
+    	SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes');
+}
+
+step "s1-end"
+{
+	COMMIT;
+}
+
+step "s1-select"
+{
+  SELECT * FROM logical_replicate_placement order by y;
+}
+
+step "s1-insert"
+{
+    INSERT INTO logical_replicate_placement VALUES (15, 15);
+}
+
+step "s1-get-shard-distribution"
+{
+  select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport;
+}
+
+session "s2"
+
+step "s2-begin"
+{
+    BEGIN;
+}
+
+step "s2-select"
+{
+    SELECT * FROM logical_replicate_placement ORDER BY y;
+}
+
+step "s2-insert"
+{
+    INSERT INTO logical_replicate_placement VALUES (15, 15);
+}
+
+step "s2-select-for-update"
+{
+    SELECT * FROM logical_replicate_placement WHERE x=15 FOR UPDATE;
+}
+
+step "s2-delete"
+{
+    DELETE FROM logical_replicate_placement WHERE x = 15;
+}
+
+step "s2-update"
+{
+    UPDATE logical_replicate_placement SET y = y + 1 WHERE x = 15;
+}
+
+step "s2-upsert"
+{
+    INSERT INTO logical_replicate_placement VALUES (15, 15);
+
+    INSERT INTO logical_replicate_placement VALUES (15, 15) ON CONFLICT (x) DO UPDATE SET y = logical_replicate_placement.y + 1;
+}
+
+step "s2-end"
+{
+	  COMMIT;
+}
+
+permutation "s1-begin" "s2-begin" "s2-insert" "s1-move-placement" "s2-end" "s1-end" "s1-select" "s1-get-shard-distribution"
+permutation "s1-begin" "s2-begin" "s2-upsert" "s1-move-placement" "s2-end" "s1-end" "s1-select"  "s1-get-shard-distribution"
+permutation "s1-insert" "s1-begin" "s2-begin" "s2-update" "s1-move-placement" "s2-end" "s1-end" "s1-select" "s1-get-shard-distribution"
+permutation "s1-insert" "s1-begin" "s2-begin" "s2-delete" "s1-move-placement" "s2-end" "s1-end" "s1-select" "s1-get-shard-distribution"
+permutation "s1-insert" "s1-begin" "s2-begin" "s2-select" "s1-move-placement" "s2-end" "s1-end" "s1-get-shard-distribution"
+permutation "s1-insert" "s1-begin" "s2-begin" "s2-select-for-update" "s1-move-placement" "s2-end" "s1-end" "s1-get-shard-distribution"
+
diff --git a/src/test/regress/spec/isolation_blocking_move_single_shard_commands_on_mx.spec b/src/test/regress/spec/isolation_blocking_move_single_shard_commands_on_mx.spec
new file mode 100644
index 000000000..d0a3f323f
--- /dev/null
+++ b/src/test/regress/spec/isolation_blocking_move_single_shard_commands_on_mx.spec
@@ -0,0 +1,136 @@
+// 15 as the partition key value through out the test
+// so setting the corresponding shard here is useful
+setup
+{
+  CREATE OR REPLACE FUNCTION start_session_level_connection_to_node(text, integer)
+      RETURNS void
+      LANGUAGE C STRICT VOLATILE
+      AS 'citus', $$start_session_level_connection_to_node$$;
+
+  CREATE OR REPLACE FUNCTION run_commands_on_session_level_connection_to_node(text)
+      RETURNS void
+      LANGUAGE C STRICT VOLATILE
+      AS 'citus', $$run_commands_on_session_level_connection_to_node$$;
+
+  CREATE OR REPLACE FUNCTION stop_session_level_connection_to_node()
+      RETURNS void
+      LANGUAGE C STRICT VOLATILE
+      AS 'citus', $$stop_session_level_connection_to_node$$;
+
+  SELECT citus_internal.replace_isolation_tester_func();
+  SELECT citus_internal.refresh_isolation_tester_prepared_statement();
+
+  -- start_metadata_sync_to_node can not be run inside a transaction block
+  -- following is a workaround to overcome that
+  -- port numbers are hard coded at the moment
+  SELECT master_run_on_worker(
+          ARRAY['localhost']::text[],
+          ARRAY[57636]::int[],
+          ARRAY[format('SELECT start_metadata_sync_to_node(''%s'', %s)', nodename, nodeport)]::text[],
+          false)
+  FROM pg_dist_node;
+
+  SET citus.replication_model to streaming;
+	SET citus.shard_replication_factor TO 1;
+
+	SET citus.shard_count TO 8;
+	CREATE TABLE logical_replicate_placement (x int PRIMARY KEY, y int);
+	SELECT create_distributed_table('logical_replicate_placement', 'x');
+
+	SELECT get_shard_id_for_distribution_column('logical_replicate_placement', 15) INTO selected_shard;
+}
+
+teardown
+{
+	DROP TABLE selected_shard;
+	DROP TABLE logical_replicate_placement;
+
+  SELECT citus_internal.restore_isolation_tester_func();
+}
+
+
+session "s1"
+
+step "s1-begin"
+{
+  BEGIN;
+}
+
+step "s1-move-placement"
+{
+   SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638, shard_transfer_mode:='block_writes');
+}
+
+step "s1-commit"
+{
+	COMMIT;
+}
+
+step "s1-select"
+{
+  SELECT * FROM logical_replicate_placement order by y;
+}
+
+step "s1-insert"
+{
+  INSERT INTO logical_replicate_placement VALUES (15, 15);
+}
+
+step "s1-get-shard-distribution"
+{
+  select nodeport from pg_dist_placement inner join pg_dist_node on(pg_dist_placement.groupid = pg_dist_node.groupid) where shardid in (SELECT * FROM selected_shard) order by nodeport;
+}
+
+session "s2"
+
+step "s2-start-session-level-connection"
+{
+  SELECT start_session_level_connection_to_node('localhost', 57638);
+}
+
+step "s2-begin-on-worker"
+{
+  SELECT run_commands_on_session_level_connection_to_node('BEGIN');
+}
+
+step "s2-select"
+{
+  SELECT run_commands_on_session_level_connection_to_node('SELECT * FROM logical_replicate_placement ORDER BY y');
+}
+
+step "s2-insert"
+{
+  SELECT run_commands_on_session_level_connection_to_node('INSERT INTO logical_replicate_placement VALUES (15, 15)');
+}
+
+step "s2-select-for-update"
+{
+  SELECT run_commands_on_session_level_connection_to_node('SELECT * FROM logical_replicate_placement WHERE x=15 FOR UPDATE');
+}
+
+step "s2-delete"
+{
+  SELECT run_commands_on_session_level_connection_to_node('DELETE FROM logical_replicate_placement WHERE x = 15');
+}
+
+step "s2-update"
+{
+  SELECT run_commands_on_session_level_connection_to_node('UPDATE logical_replicate_placement SET y = y + 1 WHERE x = 15');
+}
+
+step "s2-commit-worker"
+{
+  SELECT run_commands_on_session_level_connection_to_node('COMMIT');
+}
+
+step "s2-stop-connection"
+{
+  SELECT stop_session_level_connection_to_node();
+}
+
+permutation "s1-begin" "s2-start-session-level-connection" "s2-begin-on-worker" "s2-insert" "s1-move-placement" "s2-commit-worker" "s1-commit" "s1-select" "s1-get-shard-distribution" "s2-stop-connection"
+permutation "s1-insert" "s1-begin" "s2-start-session-level-connection" "s2-begin-on-worker" "s2-update" "s1-move-placement" "s2-commit-worker" "s1-commit" "s1-select" "s1-get-shard-distribution" "s2-stop-connection"
+permutation "s1-insert" "s1-begin" "s2-start-session-level-connection" "s2-begin-on-worker" "s2-delete" "s1-move-placement" "s2-commit-worker" "s1-commit" "s1-select" "s1-get-shard-distribution" "s2-stop-connection"
+permutation "s1-insert" "s1-begin" "s2-start-session-level-connection" "s2-begin-on-worker" "s2-select" "s1-move-placement" "s2-commit-worker" "s1-commit" "s1-get-shard-distribution" "s2-stop-connection"
+permutation "s1-insert" "s1-begin" "s2-start-session-level-connection" "s2-begin-on-worker" "s2-select-for-update" "s1-move-placement" "s2-commit-worker" "s1-commit" "s1-get-shard-distribution" "s2-stop-connection"
+
diff --git a/src/test/regress/spec/isolation_shard_rebalancer.spec b/src/test/regress/spec/isolation_shard_rebalancer.spec
new file mode 100644
index 000000000..ab3e0e6fe
--- /dev/null
+++ b/src/test/regress/spec/isolation_shard_rebalancer.spec
@@ -0,0 +1,114 @@
+setup
+{
+	SELECT 1 FROM master_add_node('localhost', 57637);
+	SELECT 1 FROM master_add_node('localhost', 57638);
+	CREATE TABLE colocated1 (test_id integer NOT NULL, data text);
+	SELECT create_distributed_table('colocated1', 'test_id', 'hash');
+	CREATE TABLE colocated2 (test_id integer NOT NULL, data text);
+	SELECT create_distributed_table('colocated2', 'test_id', 'hash');
+	CREATE TABLE non_colocated (test_id integer NOT NULL, data text);
+	SELECT create_distributed_table('non_colocated', 'test_id', 'hash', 'none');
+}
+
+teardown
+{
+	DROP TABLE non_colocated;
+	DROP TABLE colocated2;
+	DROP TABLE colocated1;
+	SELECT master_set_node_property('localhost', 57638, 'shouldhaveshards', true);
+}
+
+session "s1"
+
+step "s1-rebalance-c1"
+{
+	BEGIN;
+	select rebalance_table_shards('colocated1');
+}
+
+step "s1-replicate-c1"
+{
+	BEGIN;
+	select replicate_table_shards('colocated1');
+}
+
+step "s1-rebalance-nc"
+{
+	BEGIN;
+	select rebalance_table_shards('non_colocated');
+}
+
+step "s1-replicate-nc"
+{
+	BEGIN;
+	select replicate_table_shards('non_colocated');
+}
+
+step "s1-commit"
+{
+	COMMIT;
+}
+
+session "s2"
+
+
+step "s2-rebalance-c2"
+{
+	select rebalance_table_shards('colocated2');
+}
+
+step "s2-replicate-c2"
+{
+	select replicate_table_shards('colocated2');
+}
+
+step "s2-rebalance-nc"
+{
+	select rebalance_table_shards('non_colocated');
+}
+
+step "s2-replicate-nc"
+{
+	select replicate_table_shards('non_colocated');
+}
+
+step "s2-rebalance-all"
+{
+	select rebalance_table_shards();
+}
+
+step "s2-drain"
+{
+	select master_drain_node('localhost', 57638);
+}
+
+
+// disallowed because it's the same table
+permutation "s1-rebalance-nc" "s2-rebalance-nc" "s1-commit"
+permutation "s1-rebalance-nc" "s2-replicate-nc" "s1-commit"
+permutation "s1-replicate-nc" "s2-rebalance-nc" "s1-commit"
+permutation "s1-replicate-nc" "s2-replicate-nc" "s1-commit"
+
+// disallowed because it's the same colocation group
+permutation "s1-rebalance-c1" "s2-rebalance-c2" "s1-commit"
+permutation "s1-rebalance-c1" "s2-replicate-c2" "s1-commit"
+permutation "s1-replicate-c1" "s2-rebalance-c2" "s1-commit"
+permutation "s1-replicate-c1" "s2-replicate-c2" "s1-commit"
+
+// allowed because it's a different colocation group
+permutation "s1-rebalance-c1" "s2-rebalance-nc" "s1-commit"
+permutation "s1-rebalance-c1" "s2-replicate-nc" "s1-commit"
+permutation "s1-replicate-c1" "s2-rebalance-nc" "s1-commit"
+permutation "s1-replicate-c1" "s2-replicate-nc" "s1-commit"
+
+// disallowed because we because colocated1 is part of all
+permutation "s1-rebalance-c1" "s2-rebalance-all" "s1-commit"
+permutation "s1-replicate-c1" "s2-rebalance-all" "s1-commit"
+permutation "s1-rebalance-nc" "s2-rebalance-all" "s1-commit"
+permutation "s1-replicate-nc" "s2-rebalance-all" "s1-commit"
+
+// disallowed because we because draining is rebalancing
+permutation "s1-rebalance-c1" "s2-drain" "s1-commit"
+permutation "s1-replicate-c1" "s2-drain" "s1-commit"
+permutation "s1-rebalance-nc" "s2-drain" "s1-commit"
+permutation "s1-replicate-nc" "s2-drain" "s1-commit"
diff --git a/src/test/regress/sql/foreign_key_to_reference_shard_rebalance.sql b/src/test/regress/sql/foreign_key_to_reference_shard_rebalance.sql
new file mode 100644
index 000000000..390ad7357
--- /dev/null
+++ b/src/test/regress/sql/foreign_key_to_reference_shard_rebalance.sql
@@ -0,0 +1,79 @@
+--
+-- FOREIGN_KEY_TO_REFERENCE_SHARD_REBALANCE
+--
+
+SET citus.next_shard_id TO 15000000;
+CREATE SCHEMA fkey_to_reference_shard_rebalance;
+SET search_path to fkey_to_reference_shard_rebalance;
+SET citus.shard_replication_factor TO 1;
+SET citus.shard_count to 8;
+
+CREATE TYPE foreign_details AS (name text, relid text, refd_relid text);
+
+CREATE VIEW table_fkeys_in_workers AS
+SELECT
+(json_populate_record(NULL::foreign_details,
+  json_array_elements_text((run_command_on_workers( $$
+    SELECT
+      COALESCE(json_agg(row_to_json(d)), '[]'::json)
+    FROM
+      (
+        SELECT
+          distinct name,
+          relid::regclass::text,
+          refd_relid::regclass::text
+        FROM
+          table_fkey_cols
+      )
+      d $$ )).RESULT::json )::json )).* ;
+
+-- check if master_move_shard_placement with logical replication creates the
+-- foreign constraints properly after moving the shard
+CREATE TABLE referenced_table(test_column int, test_column2 int UNIQUE, PRIMARY KEY(test_column));
+CREATE TABLE referencing_table(id int PRIMARY KEY, ref_id int, FOREIGN KEY (id) REFERENCES referenced_table(test_column) ON DELETE CASCADE);
+CREATE TABLE referencing_table2(id int, ref_id int, FOREIGN KEY (ref_id) REFERENCES referenced_table(test_column2) ON DELETE CASCADE, FOREIGN KEY (id) REFERENCES referencing_table(id) ON DELETE CASCADE);
+SELECT create_reference_table('referenced_table');
+SELECT create_distributed_table('referencing_table', 'id');
+SELECT create_distributed_table('referencing_table2', 'id');
+
+INSERT INTO referenced_table SELECT i,i FROM generate_series (0, 100) i;
+INSERT INTO referencing_table SELECT i,i FROM generate_series (0, 100) i;
+INSERT INTO referencing_table2 SELECT i,i FROM generate_series (0, 100) i;
+
+SELECT master_move_shard_placement(15000009, 'localhost', :worker_1_port, 'localhost', :worker_2_port);
+
+SELECT count(*) FROM referencing_table2;
+
+SELECT * FROM table_fkeys_in_workers WHERE relid LIKE 'fkey_to_reference_shard_rebalance.%' AND refd_relid LIKE 'fkey_to_reference_shard_rebalance.%' ORDER BY 1,2,3;
+
+SELECT master_move_shard_placement(15000009, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'block_writes');
+
+SELECT count(*) FROM referencing_table2;
+
+SELECT * FROM table_fkeys_in_workers WHERE relid LIKE 'fkey_to_reference_shard_rebalance.%' AND refd_relid LIKE 'fkey_to_reference_shard_rebalance.%' ORDER BY 1,2,3;
+
+-- create a function to show the
+CREATE FUNCTION get_foreign_key_to_reference_table_commands(Oid)
+    RETURNS SETOF text
+    LANGUAGE C STABLE STRICT
+    AS 'citus', $$get_foreign_key_to_reference_table_commands$$;
+
+CREATE TABLE reference_table_commands (id int UNIQUE);
+CREATE TABLE referenceing_dist_table (id int, col1 int, col2 int, col3 int);
+SELECT create_reference_table('reference_table_commands');
+SELECT create_distributed_table('referenceing_dist_table', 'id');
+ALTER TABLE referenceing_dist_table ADD CONSTRAINT c1 FOREIGN KEY (col1) REFERENCES reference_table_commands(id) ON UPDATE CASCADE;
+ALTER TABLE referenceing_dist_table ADD CONSTRAINT c2 FOREIGN KEY (col2) REFERENCES reference_table_commands(id) ON UPDATE CASCADE NOT VALID;
+ALTER TABLE referenceing_dist_table ADD CONSTRAINT very_very_very_very_very_very_very_very_very_very_very_very_very_long FOREIGN KEY (col3) REFERENCES reference_table_commands(id) ON UPDATE CASCADE;
+SELECT * FROM get_foreign_key_to_reference_table_commands('referenceing_dist_table'::regclass);
+
+-- and show that rebalancer works fine
+SELECT master_move_shard_placement(15000018, 'localhost', :worker_1_port, 'localhost', :worker_2_port);
+
+\c - - - :worker_2_port
+
+SELECT conname, contype, convalidated FROM pg_constraint WHERE conrelid = 'fkey_to_reference_shard_rebalance.referenceing_dist_table_15000018'::regclass ORDER BY 1;
+
+\c - - - :master_port
+
+DROP SCHEMA fkey_to_reference_shard_rebalance CASCADE;
diff --git a/src/test/regress/sql/master_copy_shard_placement.sql b/src/test/regress/sql/master_copy_shard_placement.sql
index 6d0f2234c..37396cdf6 100644
--- a/src/test/regress/sql/master_copy_shard_placement.sql
+++ b/src/test/regress/sql/master_copy_shard_placement.sql
@@ -36,21 +36,24 @@ SELECT master_copy_shard_placement(
            get_shard_id_for_distribution_column('data', 'key-1'),
            'localhost', :worker_1_port,
            'localhost', :worker_2_port,
-           do_repair := false);
+           do_repair := false,
+           transfer_mode := 'block_writes');
 
 -- verify we error out if source and destination are the same
 SELECT master_copy_shard_placement(
            get_shard_id_for_distribution_column('data', 'key-1'),
            'localhost', :worker_2_port,
            'localhost', :worker_2_port,
-           do_repair := false);
+           do_repair := false,
+           transfer_mode := 'block_writes');
 
 -- verify we error out if target already contains a healthy placement
 SELECT master_copy_shard_placement(
            (SELECT shardid FROM pg_dist_shard WHERE logicalrelid='ref_table'::regclass::oid),
            'localhost', :worker_1_port,
            'localhost', :worker_2_port,
-           do_repair := false);
+           do_repair := false,
+           transfer_mode := 'block_writes');
 
 -- verify we error out if table has foreign key constraints
 INSERT INTO ref_table SELECT 1, value FROM data;
@@ -69,7 +72,8 @@ SELECT master_copy_shard_placement(
            get_shard_id_for_distribution_column('data', 'key-1'),
            'localhost', :worker_2_port,
            'localhost', :worker_1_port,
-           do_repair := false);
+           do_repair := false,
+           transfer_mode := 'block_writes');
 
 -- forcefully mark the old replica as inactive
 UPDATE pg_dist_shard_placement SET shardstate = 3
@@ -95,7 +99,8 @@ SELECT master_copy_shard_placement(
            get_shard_id_for_distribution_column('mx_table', '1'),
            'localhost', :worker_1_port,
            'localhost', :worker_2_port,
-           do_repair := false);
+           do_repair := false,
+           transfer_mode := 'block_writes');
 
 SELECT stop_metadata_sync_to_node('localhost', :worker_1_port);
 
diff --git a/src/test/regress/sql/multi_colocated_shard_rebalance.sql b/src/test/regress/sql/multi_colocated_shard_rebalance.sql
new file mode 100644
index 000000000..46de57776
--- /dev/null
+++ b/src/test/regress/sql/multi_colocated_shard_rebalance.sql
@@ -0,0 +1,336 @@
+--
+-- MULTI_COLOCATED_SHARD_REBALANCE
+--
+
+ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 13000000;
+
+SET citus.shard_count TO 6;
+SET citus.shard_replication_factor TO 1;
+
+-- create distributed tables
+CREATE TABLE table1_group1 ( id int PRIMARY KEY);
+SELECT create_distributed_table('table1_group1', 'id', 'hash');
+
+CREATE TABLE table2_group1 ( id int );
+SELECT create_distributed_table('table2_group1', 'id', 'hash');
+
+SET citus.shard_count TO 8;
+CREATE TABLE table5_groupX ( id int );
+SELECT create_distributed_table('table5_groupX', 'id', 'hash');
+
+CREATE TABLE table6_append ( id int );
+SELECT master_create_distributed_table('table6_append', 'id', 'append');
+SELECT master_create_empty_shard('table6_append');
+SELECT master_create_empty_shard('table6_append');
+
+-- Mark tables as non-mx tables, in order to be able to test master_copy_shard_placement
+UPDATE pg_dist_partition SET repmodel='c' WHERE logicalrelid IN
+	('table1_group1'::regclass, 'table2_group1'::regclass, 'table5_groupX'::regclass);
+
+-- test copy
+
+-- test copying colocated shards
+-- status before shard copy
+SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport
+FROM
+    pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
+WHERE
+    p.logicalrelid = s.logicalrelid AND
+    s.shardid = sp.shardid AND
+    colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass)
+ORDER BY s.shardid, sp.nodeport;
+
+-- copy colocated shards
+SELECT master_copy_shard_placement(13000000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, false);
+
+-- status after shard copy
+SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport
+FROM
+    pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
+WHERE
+    p.logicalrelid = s.logicalrelid AND
+    s.shardid = sp.shardid AND
+    colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass)
+ORDER BY s.shardid, sp.nodeport;
+
+-- also connect worker to verify we successfully copied given shard (and other colocated shards)
+\c - - - :worker_2_port
+SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.table1_group1_13000000'::regclass;
+SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.table2_group1_13000006'::regclass;
+\c - - - :master_port
+
+-- copy colocated shards again to see error message
+SELECT master_copy_shard_placement(13000000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, false, 'force_logical');
+
+
+-- test copying NOT colocated shard
+-- status before shard copy
+SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport
+FROM
+    pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
+WHERE
+    p.logicalrelid = s.logicalrelid AND
+    s.shardid = sp.shardid AND
+    p.logicalrelid = 'table5_groupX'::regclass
+ORDER BY s.shardid, sp.nodeport;
+
+-- copy NOT colocated shard
+SELECT master_copy_shard_placement(13000012, 'localhost', :worker_1_port, 'localhost', :worker_2_port, false);
+
+-- status after shard copy
+SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport
+FROM
+    pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
+WHERE
+    p.logicalrelid = s.logicalrelid AND
+    s.shardid = sp.shardid AND
+    p.logicalrelid = 'table5_groupX'::regclass
+ORDER BY s.shardid, sp.nodeport;
+
+
+-- test copying shard in append distributed table
+-- status before shard copy
+SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport
+FROM
+    pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
+WHERE
+    p.logicalrelid = s.logicalrelid AND
+    s.shardid = sp.shardid AND
+    p.logicalrelid = 'table6_append'::regclass
+ORDER BY s.shardid, sp.nodeport;
+
+-- copy shard in append distributed table
+SELECT master_copy_shard_placement(13000020, 'localhost', :worker_2_port, 'localhost', :worker_1_port, false, 'force_logical');
+
+-- status after shard copy
+SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport
+FROM
+    pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
+WHERE
+    p.logicalrelid = s.logicalrelid AND
+    s.shardid = sp.shardid AND
+    p.logicalrelid = 'table6_append'::regclass
+ORDER BY s.shardid, sp.nodeport;
+
+
+-- test move
+
+-- test moving colocated shards
+-- status before shard move
+SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport
+FROM
+    pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
+WHERE
+    p.logicalrelid = s.logicalrelid AND
+    s.shardid = sp.shardid AND
+    colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass)
+ORDER BY s.shardid, sp.nodeport;
+
+-- try force_logical
+SELECT master_move_shard_placement(13000001, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'force_logical');
+
+-- move colocated shards
+SELECT master_move_shard_placement(13000001, 'localhost', :worker_2_port, 'localhost', :worker_1_port);
+
+-- status after shard move
+SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport
+FROM
+    pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
+WHERE
+    p.logicalrelid = s.logicalrelid AND
+    s.shardid = sp.shardid AND
+    colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass)
+ORDER BY s.shardid, sp.nodeport;
+
+-- also connect worker to verify we successfully moved given shard (and other colocated shards)
+\c - - - :worker_1_port
+SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.table1_group1_13000001'::regclass;
+SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.table2_group1_13000007'::regclass;
+\c - - - :master_port
+
+
+-- test moving NOT colocated shard
+-- status before shard move
+SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport
+FROM
+    pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
+WHERE
+    p.logicalrelid = s.logicalrelid AND
+    s.shardid = sp.shardid AND
+    p.logicalrelid = 'table5_groupX'::regclass
+ORDER BY s.shardid, sp.nodeport;
+
+-- move NOT colocated shard
+SELECT master_move_shard_placement(13000013, 'localhost', :worker_2_port, 'localhost', :worker_1_port);
+
+-- status after shard move
+SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport
+FROM
+    pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
+WHERE
+    p.logicalrelid = s.logicalrelid AND
+    s.shardid = sp.shardid AND
+    p.logicalrelid = 'table5_groupX'::regclass
+ORDER BY s.shardid, sp.nodeport;
+
+
+-- test moving shard in append distributed table
+-- status before shard move
+SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport
+FROM
+    pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
+WHERE
+    p.logicalrelid = s.logicalrelid AND
+    s.shardid = sp.shardid AND
+    p.logicalrelid = 'table6_append'::regclass
+ORDER BY s.shardid, sp.nodeport;
+
+-- move shard in append distributed table
+SELECT master_move_shard_placement(13000021, 'localhost', :worker_1_port, 'localhost', :worker_2_port);
+
+-- status after shard move
+SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport
+FROM
+    pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
+WHERE
+    p.logicalrelid = s.logicalrelid AND
+    s.shardid = sp.shardid AND
+    p.logicalrelid = 'table6_append'::regclass
+ORDER BY s.shardid, sp.nodeport;
+
+
+-- try to move shard from wrong node
+SELECT master_move_shard_placement(13000021, 'localhost', :worker_1_port, 'localhost', :worker_2_port);
+
+
+-- test shard move with foreign constraints
+DROP TABLE IF EXISTS table1_group1, table2_group1;
+
+SET citus.shard_count TO 6;
+SET citus.shard_replication_factor TO 1;
+
+-- create distributed tables
+CREATE TABLE table1_group1 ( id int PRIMARY KEY);
+SELECT create_distributed_table('table1_group1', 'id', 'hash');
+
+CREATE TABLE table2_group1 ( id int, table1_id int, FOREIGN KEY(table1_id) REFERENCES table1_group1(id));
+SELECT create_distributed_table('table2_group1', 'table1_id', 'hash');
+
+-- Mark the tables as non-mx tables
+UPDATE pg_dist_partition SET repmodel='c' WHERE logicalrelid IN
+	('table1_group1'::regclass, 'table2_group1'::regclass);
+
+-- status before shard rebalance
+SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport
+FROM
+    pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
+WHERE
+    p.logicalrelid = s.logicalrelid AND
+    s.shardid = sp.shardid AND
+	colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass)
+ORDER BY s.shardid, sp.nodeport;
+
+SELECT master_move_shard_placement(13000022, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'block_writes');
+
+-- status after shard rebalance
+SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport
+FROM
+    pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp
+WHERE
+    p.logicalrelid = s.logicalrelid AND
+    s.shardid = sp.shardid AND
+	colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass)
+ORDER BY s.shardid, sp.nodeport;
+
+-- also connect worker to verify we successfully moved given shard (and other colocated shards)
+\c - - - :worker_2_port
+SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.table1_group1_13000022'::regclass;
+SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.table2_group1_13000028'::regclass;
+
+-- make sure that we've created the foreign keys
+SELECT  "Constraint", "Definition" FROM table_fkeys
+  WHERE "Constraint" LIKE 'table2_group%' OR "Constraint" LIKE 'table1_group%';
+
+\c - - - :master_port
+
+
+-- test shard copy with foreign constraints
+-- we expect it to error out because we do not support foreign constraints with replication factor > 1
+SELECT master_copy_shard_placement(13000022, 'localhost', :worker_2_port, 'localhost', :worker_1_port, false);
+
+
+-- lets also test that master_move_shard_placement doesn't break serials
+CREATE TABLE serial_move_test (key int, other_val serial);
+SET citus.shard_replication_factor TO 1;
+
+SELECT create_distributed_table('serial_move_test', 'key');
+
+-- key 15 goes to shard 13000035
+INSERT INTO serial_move_test (key) VALUES (15) RETURNING *;
+INSERT INTO serial_move_test (key) VALUES (15) RETURNING *;
+
+-- confirm the shard id
+SELECT * FROM run_command_on_placements('serial_move_test', 'SELECT DISTINCT key FROM %s WHERE key = 15') WHERE result = '15' AND shardid = 13000034;
+
+SELECT master_move_shard_placement(13000034, 'localhost', :worker_1_port, 'localhost', :worker_2_port);
+
+-- confirm the successfull move
+SELECT * FROM run_command_on_placements('serial_move_test', 'SELECT DISTINCT key FROM %s WHERE key = 15') WHERE result = '15' AND shardid = 13000034;
+
+-- finally show that serials work fine afterwards
+INSERT INTO serial_move_test (key) VALUES (15) RETURNING *;
+INSERT INTO serial_move_test (key) VALUES (15) RETURNING *;
+
+-- we should be able to move shard placements of partitioend tables
+CREATE SCHEMA move_partitions;
+CREATE TABLE move_partitions.events (
+	id serial,
+	t timestamptz default now(),
+	payload text
+)
+PARTITION BY RANGE(t);
+
+SET citus.shard_count TO 6;
+SELECT create_distributed_table('move_partitions.events', 'id', colocate_with := 'none');
+
+CREATE TABLE move_partitions.events_1 PARTITION OF move_partitions.events
+FOR VALUES FROM ('2015-01-01') TO ('2016-01-01');
+
+INSERT INTO move_partitions.events (t, payload)
+SELECT '2015-01-01'::date + (interval '1 day' * s), s FROM generate_series(1, 100) s;
+
+SELECT count(*) FROM move_partitions.events;
+
+-- try to move automatically
+SELECT master_move_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port)
+FROM pg_dist_shard JOIN pg_dist_shard_placement USING (shardid)
+WHERE logicalrelid = 'move_partitions.events'::regclass AND nodeport = :worker_2_port
+ORDER BY shardid LIMIT 1;
+
+SELECT count(*) FROM move_partitions.events;
+
+-- add a primary key to the partition
+ALTER TABLE move_partitions.events_1 ADD CONSTRAINT e_1_pk PRIMARY KEY (id);
+
+-- should be able to move automatically now
+SELECT master_move_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port)
+FROM pg_dist_shard JOIN pg_dist_shard_placement USING (shardid)
+WHERE logicalrelid = 'move_partitions.events'::regclass AND nodeport = :worker_2_port
+ORDER BY shardid LIMIT 1;
+
+SELECT count(*) FROM move_partitions.events;
+
+-- should also be able to move with block writes
+SELECT master_move_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'block_writes')
+FROM pg_dist_shard JOIN pg_dist_shard_placement USING (shardid)
+WHERE logicalrelid = 'move_partitions.events'::regclass AND nodeport = :worker_2_port
+ORDER BY shardid LIMIT 1;
+
+SELECT count(*) FROM move_partitions.events;
+
+-- should have moved all shards to node 1 (2*6 = 12)
+SELECT count(*)
+FROM pg_dist_shard JOIN pg_dist_shard_placement USING (shardid)
+WHERE logicalrelid::text LIKE 'move_partitions.events%' AND nodeport = :worker_1_port;
+
+DROP TABLE move_partitions.events;
diff --git a/src/test/regress/sql/multi_move_mx.sql b/src/test/regress/sql/multi_move_mx.sql
new file mode 100644
index 000000000..c317a08d7
--- /dev/null
+++ b/src/test/regress/sql/multi_move_mx.sql
@@ -0,0 +1,144 @@
+--
+-- MULTI_MOVE_MX
+--
+ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1550000;
+
+SELECT start_metadata_sync_to_node('localhost', :worker_2_port);
+
+-- Create mx test tables
+SET citus.shard_count TO 4;
+SET citus.shard_replication_factor TO 1;
+SET citus.replication_model TO 'streaming';
+
+CREATE TABLE mx_table_1 (a int);
+SELECT create_distributed_table('mx_table_1', 'a');
+
+CREATE TABLE mx_table_2 (a int);
+SELECT create_distributed_table('mx_table_2', 'a');
+
+CREATE TABLE mx_table_3 (a text);
+SELECT create_distributed_table('mx_table_3', 'a');
+
+-- Check that the first two tables are colocated
+SELECT
+	logicalrelid, repmodel
+FROM
+	pg_dist_partition
+WHERE
+	logicalrelid = 'mx_table_1'::regclass
+	OR logicalrelid = 'mx_table_2'::regclass
+	OR logicalrelid = 'mx_table_3'::regclass
+ORDER BY
+	logicalrelid;
+
+-- Check the list of shards
+SELECT
+	logicalrelid, shardid, nodename, nodeport
+FROM
+	pg_dist_shard NATURAL JOIN pg_dist_shard_placement
+WHERE
+	logicalrelid = 'mx_table_1'::regclass
+	OR logicalrelid = 'mx_table_2'::regclass
+	OR logicalrelid = 'mx_table_3'::regclass
+ORDER BY
+	logicalrelid, shardid;
+
+-- Check the data on the worker
+\c - - - :worker_2_port
+SELECT
+	logicalrelid, shardid, nodename, nodeport
+FROM
+	pg_dist_shard NATURAL JOIN pg_dist_shard_placement
+WHERE
+	logicalrelid = 'mx_table_1'::regclass
+	OR logicalrelid = 'mx_table_2'::regclass
+	OR logicalrelid = 'mx_table_3'::regclass
+ORDER BY
+	logicalrelid, shardid;
+
+\c - - - :master_port
+-- Check that master_copy_shard_placement cannot be run with MX tables
+SELECT
+	master_copy_shard_placement(shardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port, false, 'force_logical')
+FROM
+	pg_dist_shard NATURAL JOIN pg_dist_shard_placement
+WHERE
+	logicalrelid = 'mx_table_1'::regclass
+	AND nodeport = :worker_1_port
+ORDER BY
+	shardid
+LIMIT 1;
+
+-- Move a shard from worker 1 to worker 2
+SELECT
+	master_move_shard_placement(shardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port)
+FROM
+	pg_dist_shard NATURAL JOIN pg_dist_shard_placement
+WHERE
+	logicalrelid = 'mx_table_1'::regclass
+	AND nodeport = :worker_1_port
+ORDER BY
+	shardid
+LIMIT 1;
+
+-- Check that the shard and its colocated shard is moved, but not the other shards
+SELECT
+	logicalrelid, shardid, nodename, nodeport
+FROM
+	pg_dist_shard NATURAL JOIN pg_dist_shard_placement
+WHERE
+	logicalrelid = 'mx_table_1'::regclass
+	OR logicalrelid = 'mx_table_2'::regclass
+	OR logicalrelid = 'mx_table_3'::regclass
+ORDER BY
+	logicalrelid, shardid;
+
+-- Check that the changes are made in the worker as well
+\c - - - :worker_2_port
+SELECT
+	logicalrelid, shardid, nodename, nodeport
+FROM
+	pg_dist_shard NATURAL JOIN pg_dist_shard_placement
+WHERE
+	logicalrelid = 'mx_table_1'::regclass
+	OR logicalrelid = 'mx_table_2'::regclass
+	OR logicalrelid = 'mx_table_3'::regclass
+ORDER BY
+	logicalrelid, shardid;
+
+-- Check that the UDFs cannot be called from the workers
+SELECT
+	master_copy_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port, false, 'force_logical')
+FROM
+	pg_dist_shard NATURAL JOIN pg_dist_shard_placement
+WHERE
+	logicalrelid = 'mx_table_1'::regclass
+	AND nodeport = :worker_2_port
+ORDER BY
+	shardid
+LIMIT 1 OFFSET 1;
+
+SELECT
+	master_move_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'force_logical')
+FROM
+	pg_dist_shard NATURAL JOIN pg_dist_shard_placement
+WHERE
+	logicalrelid = 'mx_table_1'::regclass
+	AND nodeport = :worker_2_port
+ORDER BY
+	shardid
+LIMIT 1 OFFSET 1;
+
+-- Cleanup
+\c - - - :master_port
+DROP TABLE mx_table_1;
+DROP TABLE mx_table_2;
+DROP TABLE mx_table_3;
+SELECT stop_metadata_sync_to_node('localhost', :worker_2_port);
+\c - - - :worker_2_port
+DELETE FROM pg_dist_node;
+DELETE FROM pg_dist_partition;
+DELETE FROM pg_dist_shard;
+DELETE FROM pg_dist_shard_placement;
+\c - - - :master_port
+RESET citus.replication_model;
diff --git a/src/test/regress/sql/multi_test_helpers_superuser.sql b/src/test/regress/sql/multi_test_helpers_superuser.sql
index aa7b3ee66..a50d1d3cd 100644
--- a/src/test/regress/sql/multi_test_helpers_superuser.sql
+++ b/src/test/regress/sql/multi_test_helpers_superuser.sql
@@ -1,3 +1,10 @@
+CREATE OR REPLACE FUNCTION master_defer_delete_shards()
+    RETURNS int
+    LANGUAGE C STRICT
+    AS 'citus', $$master_defer_delete_shards$$;
+COMMENT ON FUNCTION master_defer_delete_shards()
+    IS 'remove orphaned shards';
+
 CREATE OR REPLACE FUNCTION wait_until_metadata_sync(timeout INTEGER DEFAULT 15000)
     RETURNS void
     LANGUAGE C STRICT
diff --git a/src/test/regress/sql/multi_utility_warnings.sql b/src/test/regress/sql/multi_utility_warnings.sql
index 296e4f3c2..3a7b0a910 100644
--- a/src/test/regress/sql/multi_utility_warnings.sql
+++ b/src/test/regress/sql/multi_utility_warnings.sql
@@ -21,4 +21,3 @@ BEGIN;
 INSERT INTO pg_dist_node VALUES (1234567890, 1234567890, 'localhost', 5432);
 INSERT INTO pg_dist_poolinfo VALUES (1234567890, 'port=1234');
 ROLLBACK;
-INSERT INTO pg_dist_rebalance_strategy VALUES ('should fail', false, 'citus_shard_cost_1', 'citus_node_capacity_1', 'citus_shard_allowed_on_node_true', 0, 0);
diff --git a/src/test/regress/sql/shard_move_deferred_delete.sql b/src/test/regress/sql/shard_move_deferred_delete.sql
new file mode 100644
index 000000000..1d5d38ffa
--- /dev/null
+++ b/src/test/regress/sql/shard_move_deferred_delete.sql
@@ -0,0 +1,61 @@
+--
+-- SHARD_MOVE_DEFERRED_DELETE
+--
+
+SET citus.next_shard_id TO 20000000;
+
+SET citus.shard_count TO 6;
+SET citus.shard_replication_factor TO 1;
+SET citus.defer_drop_after_shard_move TO on;
+
+CREATE SCHEMA shard_move_deferred_delete;
+SET search_path TO shard_move_deferred_delete;
+
+CREATE TABLE t1 ( id int PRIMARY KEY);
+SELECT create_distributed_table('t1', 'id');
+
+-- by counting how ofter we see the specific shard on all workers we can verify is the shard is there
+SELECT run_command_on_workers($cmd$
+    SELECT count(*) FROM pg_class WHERE relname = 't1_20000000';
+$cmd$);
+
+-- move shard
+SELECT master_move_shard_placement(20000000, 'localhost', :worker_1_port, 'localhost', :worker_2_port);
+
+-- we expect the shard to be on both workers now
+SELECT run_command_on_workers($cmd$
+    SELECT count(*) FROM pg_class WHERE relname = 't1_20000000';
+$cmd$);
+
+-- execute delayed removal
+SELECT public.master_defer_delete_shards();
+
+-- we expect the shard to be on only the second worker
+SELECT run_command_on_workers($cmd$
+    SELECT count(*) FROM pg_class WHERE relname = 't1_20000000';
+$cmd$);
+
+SELECT master_move_shard_placement(20000000, 'localhost', :worker_2_port, 'localhost', :worker_1_port);
+
+-- we expect the shard to be on both workers now
+SELECT run_command_on_workers($cmd$
+    SELECT count(*) FROM pg_class WHERE relname = 't1_20000000';
+$cmd$);
+
+-- enable auto delete
+ALTER SYSTEM SET citus.defer_shard_delete_interval TO 10;
+SELECT pg_reload_conf();
+
+-- Sleep 1 second to give Valgrind enough time to clear transactions
+SELECT pg_sleep(1);
+
+-- we expect the shard to be on only the first worker
+SELECT run_command_on_workers($cmd$
+    SELECT count(*) FROM pg_class WHERE relname = 't1_20000000';
+$cmd$);
+
+-- reset test suite
+ALTER SYSTEM SET citus.defer_shard_delete_interval TO -1;
+SELECT pg_reload_conf();
+
+DROP SCHEMA shard_move_deferred_delete CASCADE;
diff --git a/src/test/regress/sql/shard_rebalancer.sql b/src/test/regress/sql/shard_rebalancer.sql
new file mode 100644
index 000000000..02c56b103
--- /dev/null
+++ b/src/test/regress/sql/shard_rebalancer.sql
@@ -0,0 +1,1148 @@
+--
+-- MUTLI_SHARD_REBALANCER
+--
+
+CREATE TABLE dist_table_test(a int primary key);
+SELECT create_distributed_table('dist_table_test', 'a');
+CREATE TABLE ref_table_test(a int primary key);
+SELECT create_reference_table('ref_table_test');
+
+-- make sure that all rebalance operations works fine when
+-- reference tables are replicated to the coordinator
+SELECT 1 FROM master_add_node('localhost', :master_port, groupId=>0);
+
+-- should just be noops even if we add the coordinator to the pg_dist_node
+SELECT rebalance_table_shards('dist_table_test');
+SELECT rebalance_table_shards();
+
+-- test that calling rebalance_table_shards without specifying relation
+-- wouldn't move shard of the citus local table.
+CREATE TABLE citus_local_table(a int, b int);
+SELECT create_citus_local_table('citus_local_table');
+INSERT INTO citus_local_table VALUES (1, 2);
+
+SELECT rebalance_table_shards();
+
+-- show that citus local table shard is still on the coordinator
+SELECT tablename FROM pg_catalog.pg_tables where tablename like 'citus_local_table_%';
+-- also check that we still can access shard relation, not the shell table
+SELECT count(*) FROM citus_local_table;
+
+SELECT master_drain_node('localhost', :master_port);
+
+-- show that citus local table shard is still on the coordinator
+SELECT tablename FROM pg_catalog.pg_tables where tablename like 'citus_local_table_%';
+-- also check that we still can access shard relation, not the shell table
+SELECT count(*) FROM citus_local_table;
+
+-- show that we do not create a shard rebalancing plan for citus local table
+SELECT get_rebalance_table_shards_plan();
+
+DROP TABLE citus_local_table;
+
+CREATE TABLE dist_table_test_2(a int);
+SET citus.shard_count TO 4;
+
+SET citus.shard_replication_factor TO 1;
+SET citus.replication_model TO "statement";
+SELECT create_distributed_table('dist_table_test_2', 'a');
+
+-- replicate reference table should ignore the coordinator
+SET citus.shard_replication_factor TO 2;
+SELECT replicate_table_shards('dist_table_test_2',  max_shard_copies := 4,  shard_transfer_mode:='block_writes');
+
+DROP TABLE dist_table_test, dist_table_test_2, ref_table_test;
+RESET citus.shard_count;
+RESET citus.shard_replication_factor;
+RESET citus.replication_model;
+
+-- Create a user to test multiuser usage of rebalancer functions
+CREATE USER testrole;
+GRANT ALL ON SCHEMA public TO testrole;
+
+CREATE OR REPLACE FUNCTION shard_placement_rebalance_array(
+    worker_node_list json[],
+    shard_placement_list json[],
+    threshold float4 DEFAULT 0,
+    max_shard_moves int DEFAULT 1000000,
+    drain_only bool DEFAULT false
+)
+RETURNS json[]
+AS 'citus'
+LANGUAGE C STRICT VOLATILE;
+
+
+CREATE FUNCTION shard_placement_replication_array(worker_node_list json[],
+                                                  shard_placement_list json[],
+                                                  shard_replication_factor int)
+RETURNS json[]
+AS 'citus'
+LANGUAGE C STRICT VOLATILE;
+
+CREATE FUNCTION worker_node_responsive(worker_node_name text, worker_node_port int)
+RETURNS boolean
+AS 'citus'
+LANGUAGE C STRICT VOLATILE;
+
+SET citus.next_shard_id TO 123000;
+
+SELECT worker_node_responsive(node_name, node_port::int)
+    FROM master_get_active_worker_nodes()
+    ORDER BY node_name, node_port ASC;
+
+-- Check that worker_node_responsive returns false for dead nodes
+-- Note that PostgreSQL tries all possible resolutions of localhost on failing
+-- connections. This causes different error details to be printed on different
+-- environments. Therefore, we first set verbosity to terse.
+
+\set VERBOSITY terse
+
+SELECT worker_node_responsive('localhost', 1);
+
+\set VERBOSITY default
+
+-- Check that with threshold=0.0 shard_placement_rebalance_array returns enough
+-- moves to make the cluster completely balanced.
+
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":5, "shardid":5, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":6, "shardid":6, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}']::json[],
+    0.0
+));
+
+-- Check that with two nodes and threshold=1.0 shard_placement_rebalance_array
+-- doesn't return any moves, even if it is completely unbalanced.
+
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}']::json[],
+    1.0
+));
+
+-- Check that with three nodes and threshold=1.0
+-- shard_placement_rebalance_array returns moves when it is completely unbalanced
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432}',
+          '{"node_name": "hostname3", "node_port": 5432}'
+        ]::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}']::json[],
+    1.0
+));
+
+
+-- Check that with with three nodes and threshold=2.0
+-- shard_placement_rebalance_array doesn't return any moves, even if it is
+-- completely unbalanced. (with three nodes)
+
+
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432}',
+          '{"node_name": "hostname3", "node_port": 5432}'
+        ]::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}']::json[],
+    2.0
+));
+
+-- Check that with threshold=0.0 shard_placement_rebalance_array doesn't return
+-- any moves if the cluster is already balanced.
+
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":5, "shardid":5, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":6, "shardid":6, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}']::json[],
+    0.0
+));
+
+-- Check that shard_placement_replication_array returns a shard copy operation
+-- for each of the shards in an inactive node.
+
+SELECT unnest(shard_placement_replication_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":3, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname3", "nodeport":5432}',
+          '{"placementid":4, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname3", "nodeport":5432}']::json[],
+    2
+));
+
+-- Check that shard_placement_replication_array returns a shard copy operation
+-- for each of the inactive shards.
+
+SELECT unnest(shard_placement_replication_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":3, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":1, "shardstate":3, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":4, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}']::json[],
+    2
+));
+
+-- Check that shard_placement_replication_array errors out if all placements of
+-- a shard are placed on inactive nodes.
+
+SELECT unnest(shard_placement_replication_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":2, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname3", "nodeport":5432}']::json[],
+    2
+));
+
+-- Check that shard_placement_replication_array errors out if replication factor
+-- is more than number of active nodes.
+
+SELECT unnest(shard_placement_replication_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}']::json[],
+    2
+));
+
+-- Ensure that shard_replication_factor is 2 during replicate_table_shards
+-- and rebalance_table_shards tests
+
+SET citus.shard_replication_factor TO 2;
+
+-- Turn off NOTICE messages
+
+SET client_min_messages TO WARNING;
+
+-- Create a single-row test data for shard rebalancer test shards
+
+CREATE TABLE shard_rebalancer_test_data AS SELECT 1::int as int_column;
+
+-- Test replicate_table_shards, which will in turn test update_shard_placement
+-- in copy mode.
+
+CREATE TABLE replication_test_table(int_column int);
+SELECT master_create_distributed_table('replication_test_table', 'int_column', 'append');
+
+CREATE VIEW replication_test_table_placements_per_node AS
+    SELECT count(*) FROM pg_dist_shard_placement NATURAL JOIN pg_dist_shard
+    WHERE logicalrelid = 'replication_test_table'::regclass
+    GROUP BY nodename, nodeport
+    ORDER BY nodename, nodeport;
+
+-- Create four shards with replication factor 2, and delete the placements
+-- with smaller port number to simulate under-replicated shards.
+
+SELECT count(master_create_empty_shard('replication_test_table'))
+    FROM generate_series(1, 4);
+
+DELETE FROM pg_dist_shard_placement WHERE placementid in (
+    SELECT pg_dist_shard_placement.placementid
+    FROM pg_dist_shard_placement NATURAL JOIN pg_dist_shard
+    WHERE logicalrelid = 'replication_test_table'::regclass
+        AND (nodename, nodeport) = (SELECT nodename, nodeport FROM pg_dist_shard_placement
+                                    ORDER BY nodename, nodeport limit 1)
+);
+
+-- Upload the test data to the shards
+
+SELECT count(master_append_table_to_shard(shardid, 'shard_rebalancer_test_data',
+                                          host(inet_server_addr()), inet_server_port()))
+    FROM pg_dist_shard
+    WHERE logicalrelid = 'replication_test_table'::regclass;
+
+-- Verify that there is one node with all placements
+
+SELECT * FROM replication_test_table_placements_per_node;
+
+-- Check excluded_shard_list by excluding three shards with smaller ids
+
+SELECT replicate_table_shards('replication_test_table',
+                              excluded_shard_list := excluded_shard_list,
+                              shard_transfer_mode:='block_writes')
+    FROM (
+        SELECT (array_agg(DISTINCT shardid ORDER BY shardid))[1:3] AS excluded_shard_list
+        FROM pg_dist_shard
+        WHERE logicalrelid = 'replication_test_table'::regclass
+    ) T;
+
+SELECT * FROM replication_test_table_placements_per_node;
+
+-- Check that with shard_replication_factor=1 we don't do any copies
+
+SELECT replicate_table_shards('replication_test_table',
+                              shard_replication_factor := 1,
+                              shard_transfer_mode:='block_writes');
+
+SELECT * FROM replication_test_table_placements_per_node;
+
+-- Check that max_shard_copies limits number of copy operations
+
+SELECT replicate_table_shards('replication_test_table',
+                              max_shard_copies := 2,
+                              shard_transfer_mode:='block_writes');
+
+SELECT * FROM replication_test_table_placements_per_node;
+
+-- Replicate the remaining under-replicated shards
+
+SELECT replicate_table_shards('replication_test_table');
+
+SELECT * FROM replication_test_table_placements_per_node;
+
+-- Check that querying the table doesn't error out
+
+SELECT count(*) FROM replication_test_table;
+
+DROP TABLE public.replication_test_table CASCADE;
+
+-- Test rebalance_table_shards, which will in turn test update_shard_placement
+-- in move mode.
+
+CREATE TABLE rebalance_test_table(int_column int);
+SELECT master_create_distributed_table('rebalance_test_table', 'int_column', 'append');
+
+CREATE VIEW table_placements_per_node AS
+SELECT nodeport, logicalrelid::regclass, count(*)
+FROM pg_dist_shard_placement NATURAL JOIN pg_dist_shard
+GROUP BY logicalrelid::regclass, nodename, nodeport
+ORDER BY logicalrelid::regclass, nodename, nodeport;
+
+-- Create six shards with replication factor 1 and move them to the same
+-- node to create an unbalanced cluster.
+
+CREATE PROCEDURE create_unbalanced_shards(rel text)
+LANGUAGE SQL
+AS $$
+    SET citus.shard_replication_factor TO 1;
+
+    SELECT count(master_create_empty_shard(rel))
+    FROM generate_series(1, 6);
+
+    SELECT count(master_move_shard_placement(shardid,
+            src.nodename, src.nodeport::int,
+            dst.nodename, dst.nodeport::int,
+            shard_transfer_mode:='block_writes'))
+    FROM pg_dist_shard s JOIN
+    pg_dist_shard_placement src USING (shardid),
+    (SELECT nodename, nodeport FROM pg_dist_shard_placement ORDER BY nodeport DESC LIMIT 1) dst
+    WHERE src.nodeport < dst.nodeport AND s.logicalrelid = rel::regclass;
+$$;
+
+CALL create_unbalanced_shards('rebalance_test_table');
+
+SET citus.shard_replication_factor TO 2;
+
+-- Upload the test data to the shards
+
+SELECT count(master_append_table_to_shard(shardid, 'shard_rebalancer_test_data',
+        host(inet_server_addr()), inet_server_port()))
+FROM pg_dist_shard
+WHERE logicalrelid = 'rebalance_test_table'::regclass;
+
+-- Verify that there is one node with all placements
+
+SELECT * FROM table_placements_per_node;
+
+-- Check excluded_shard_list by excluding four shards with smaller ids
+
+SELECT rebalance_table_shards('rebalance_test_table',
+    excluded_shard_list := excluded_shard_list,
+    threshold := 0,
+    shard_transfer_mode:='block_writes')
+FROM (
+    SELECT (array_agg(DISTINCT shardid ORDER BY shardid))[1:4] AS excluded_shard_list
+    FROM pg_dist_shard
+    WHERE logicalrelid = 'rebalance_test_table'::regclass
+) T;
+
+SELECT * FROM table_placements_per_node;
+
+-- Check that max_shard_moves limits number of move operations
+
+-- First check that we error if not table owner
+SET ROLE testrole;
+SELECT rebalance_table_shards('rebalance_test_table',
+    threshold := 0, max_shard_moves := 1,
+    shard_transfer_mode:='block_writes');
+RESET ROLE;
+
+SELECT rebalance_table_shards('rebalance_test_table',
+    threshold := 0, max_shard_moves := 1,
+    shard_transfer_mode:='block_writes');
+
+SELECT * FROM table_placements_per_node;
+
+-- Check that threshold=1 doesn't move any shards
+
+SELECT rebalance_table_shards('rebalance_test_table', threshold := 1, shard_transfer_mode:='block_writes');
+
+SELECT * FROM table_placements_per_node;
+
+-- Move the remaining shards using threshold=0
+
+SELECT rebalance_table_shards('rebalance_test_table', threshold := 0);
+
+SELECT * FROM table_placements_per_node;
+
+-- Check that shard is completely balanced and rebalancing again doesn't have
+-- any effects.
+
+SELECT rebalance_table_shards('rebalance_test_table', threshold := 0, shard_transfer_mode:='block_writes');
+
+SELECT * FROM table_placements_per_node;
+
+-- Check that querying the table doesn't error out
+
+SELECT count(*) FROM rebalance_test_table;
+
+DROP TABLE rebalance_test_table;
+
+-- Test schema support
+
+
+CREATE SCHEMA test_schema_support;
+
+SELECT COUNT(*) FROM pg_dist_shard_placement;
+
+CREATE TABLE test_schema_support.nation_hash (
+    n_nationkey integer not null,
+    n_name char(25) not null,
+    n_regionkey integer not null,
+    n_comment varchar(152)
+);
+
+SELECT master_create_distributed_table('test_schema_support.nation_hash', 'n_nationkey', 'hash');
+SELECT master_create_worker_shards('test_schema_support.nation_hash', 4, 1);
+
+CREATE TABLE test_schema_support.nation_hash2 (
+    n_nationkey integer not null,
+    n_name char(25) not null,
+    n_regionkey integer not null,
+    n_comment varchar(152)
+);
+
+SELECT master_create_distributed_table('test_schema_support.nation_hash2', 'n_nationkey', 'hash');
+SELECT master_create_worker_shards('test_schema_support.nation_hash2', 4, 1);
+
+-- Shard count before replication
+SELECT COUNT(*) FROM pg_dist_shard_placement;
+
+SET search_path TO public;
+SELECT replicate_table_shards('test_schema_support.nation_hash', shard_transfer_mode:='block_writes');
+
+-- Confirm replication
+SELECT COUNT(*) FROM pg_dist_shard_placement;
+
+-- Test with search_path is set
+SET search_path TO test_schema_support;
+SELECT replicate_table_shards('nation_hash2', shard_transfer_mode:='block_writes');
+
+-- Confirm replication
+SELECT COUNT(*) FROM pg_dist_shard_placement;
+
+DROP TABLE test_schema_support.nation_hash;
+DROP TABLE test_schema_support.nation_hash2;
+
+-- Test rebalancer with schema
+-- Next few operations is to create imbalanced distributed table
+
+CREATE TABLE test_schema_support.imbalanced_table_local (
+    id integer not null
+);
+INSERT INTO test_schema_support.imbalanced_table_local VALUES(1);
+INSERT INTO test_schema_support.imbalanced_table_local VALUES(2);
+INSERT INTO test_schema_support.imbalanced_table_local VALUES(3);
+INSERT INTO test_schema_support.imbalanced_table_local VALUES(4);
+
+CREATE TABLE test_schema_support.imbalanced_table (
+    id integer not null
+);
+
+SELECT master_create_distributed_table('test_schema_support.imbalanced_table', 'id', 'append');
+
+SET citus.shard_replication_factor TO 1;
+SELECT * from master_create_empty_shard('test_schema_support.imbalanced_table');
+SELECT master_append_table_to_shard(123018, 'test_schema_support.imbalanced_table_local', 'localhost', :master_port);
+
+SET citus.shard_replication_factor TO 2;
+SELECT * from master_create_empty_shard('test_schema_support.imbalanced_table');
+SELECT master_append_table_to_shard(123019, 'test_schema_support.imbalanced_table_local', 'localhost', :master_port);
+
+SET citus.shard_replication_factor TO 1;
+SELECT * from master_create_empty_shard('test_schema_support.imbalanced_table');
+SELECT master_append_table_to_shard(123020, 'test_schema_support.imbalanced_table_local', 'localhost', :master_port);
+
+-- imbalanced_table is now imbalanced
+
+-- Shard counts in each node before rebalance
+SELECT * FROM public.table_placements_per_node;
+
+-- Row count in imbalanced table before rebalance
+SELECT COUNT(*) FROM imbalanced_table;
+
+-- Try force_logical
+SELECT rebalance_table_shards('imbalanced_table', threshold:=0, shard_transfer_mode:='force_logical');
+
+-- Test rebalance operation
+SELECT rebalance_table_shards('imbalanced_table', threshold:=0, shard_transfer_mode:='block_writes');
+
+-- Confirm rebalance
+-- Shard counts in each node after rebalance
+SELECT * FROM public.table_placements_per_node;
+
+-- Row count in imbalanced table after rebalance
+SELECT COUNT(*) FROM imbalanced_table;
+
+DROP TABLE public.shard_rebalancer_test_data;
+DROP TABLE test_schema_support.imbalanced_table;
+DROP TABLE test_schema_support.imbalanced_table_local;
+
+SET citus.shard_replication_factor TO 1;
+
+CREATE TABLE colocated_rebalance_test(id integer);
+CREATE TABLE colocated_rebalance_test2(id integer);
+SELECT create_distributed_table('colocated_rebalance_test', 'id');
+
+-- Move all shards to worker1
+SELECT master_move_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'block_writes')
+FROM pg_dist_shard_placement
+WHERE nodeport = :worker_2_port;
+
+
+SELECT create_distributed_table('colocated_rebalance_test2', 'id');
+
+-- Confirm all shards for both tables are on worker1
+SELECT * FROM public.table_placements_per_node;
+
+-- Confirm that the plan for drain_only doesn't show any moves
+SELECT * FROM get_rebalance_table_shards_plan('colocated_rebalance_test', threshold := 0, drain_only := true);
+-- Running with drain_only shouldn't do anything
+SELECT * FROM rebalance_table_shards('colocated_rebalance_test', threshold := 0, shard_transfer_mode := 'block_writes', drain_only := true);
+
+-- Confirm that nothing changed
+SELECT * FROM public.table_placements_per_node;
+
+-- Confirm that the plan shows 2 shards of both tables moving back to worker2
+SELECT * FROM get_rebalance_table_shards_plan('colocated_rebalance_test', threshold := 0);
+-- Confirm that this also happens when using rebalancing by disk size even if the tables are empty
+SELECT * FROM get_rebalance_table_shards_plan('colocated_rebalance_test', rebalance_strategy := 'by_disk_size');
+-- Check that we can call this function
+SELECT * FROM get_rebalance_progress();
+-- Actually do the rebalance
+SELECT * FROM rebalance_table_shards('colocated_rebalance_test', threshold := 0, shard_transfer_mode := 'block_writes');
+-- Check that we can call this function without a crash
+SELECT * FROM get_rebalance_progress();
+
+-- Confirm that the nodes are now there
+SELECT * FROM public.table_placements_per_node;
+
+
+CREATE TABLE non_colocated_rebalance_test(id integer);
+SELECT create_distributed_table('non_colocated_rebalance_test', 'id', colocate_with := 'none');
+-- confirm that both colocation groups are balanced
+SELECT * FROM public.table_placements_per_node;
+
+-- testing behaviour when setting isdatanode to 'marked for draining'
+SELECT * from master_set_node_property('localhost', :worker_2_port, 'shouldhaveshards', false);
+
+SELECT * FROM get_rebalance_table_shards_plan('colocated_rebalance_test', threshold := 0);
+SELECT * FROM rebalance_table_shards('colocated_rebalance_test', threshold := 0, shard_transfer_mode := 'block_writes');
+SELECT * FROM public.table_placements_per_node;
+
+SELECT * FROM get_rebalance_table_shards_plan('non_colocated_rebalance_test', threshold := 0);
+SELECT * FROM rebalance_table_shards('non_colocated_rebalance_test', threshold := 0, shard_transfer_mode := 'block_writes');
+SELECT * FROM public.table_placements_per_node;
+
+-- Put shards back
+SELECT * from master_set_node_property('localhost', :worker_2_port, 'shouldhaveshards', true);
+
+SELECT * FROM rebalance_table_shards('colocated_rebalance_test', threshold := 0, shard_transfer_mode := 'block_writes');
+SELECT * FROM public.table_placements_per_node;
+SELECT * FROM rebalance_table_shards('non_colocated_rebalance_test', threshold := 0, shard_transfer_mode := 'block_writes');
+SELECT * FROM public.table_placements_per_node;
+
+-- testing behaviour when setting shouldhaveshards to false and rebalancing all
+-- colocation groups with drain_only=true
+SELECT * from master_set_node_property('localhost', :worker_2_port, 'shouldhaveshards', false);
+SELECT * FROM get_rebalance_table_shards_plan(threshold := 0, drain_only := true);
+SELECT * FROM rebalance_table_shards(threshold := 0, shard_transfer_mode := 'block_writes', drain_only := true);
+SELECT * FROM public.table_placements_per_node;
+
+-- Put shards back
+SELECT * from master_set_node_property('localhost', :worker_2_port, 'shouldhaveshards', true);
+SELECT * FROM rebalance_table_shards(threshold := 0, shard_transfer_mode := 'block_writes');
+SELECT * FROM public.table_placements_per_node;
+
+-- testing behaviour when setting shouldhaveshards to false and rebalancing all
+-- colocation groups with drain_only=false
+SELECT * from master_set_node_property('localhost', :worker_2_port, 'shouldhaveshards', false);
+SELECT * FROM get_rebalance_table_shards_plan(threshold := 0);
+SELECT * FROM rebalance_table_shards(threshold := 0, shard_transfer_mode := 'block_writes');
+SELECT * FROM public.table_placements_per_node;
+
+-- Put shards back
+SELECT * from master_set_node_property('localhost', :worker_2_port, 'shouldhaveshards', true);
+SELECT * FROM rebalance_table_shards(threshold := 0, shard_transfer_mode := 'block_writes');
+SELECT * FROM public.table_placements_per_node;
+
+-- Make it a data node again
+SELECT * from master_set_node_property('localhost', :worker_2_port, 'shouldhaveshards', true);
+
+-- testing behaviour of master_drain_node
+SELECT * from master_drain_node('localhost', :worker_2_port, shard_transfer_mode := 'block_writes');
+select shouldhaveshards from pg_dist_node where nodeport = :worker_2_port;
+SELECT * FROM public.table_placements_per_node;
+
+-- Put shards back
+SELECT * from master_set_node_property('localhost', :worker_2_port, 'shouldhaveshards', true);
+SELECT * FROM rebalance_table_shards(threshold := 0, shard_transfer_mode := 'block_writes');
+SELECT * FROM public.table_placements_per_node;
+
+
+-- Drop some tables for clear consistent error
+DROP TABLE test_schema_support.colocated_rebalance_test2;
+
+-- Leave no trace on workers
+RESET search_path;
+
+\set VERBOSITY terse
+DROP SCHEMA test_schema_support CASCADE;
+\set VERBOSITY default
+
+REVOKE ALL ON SCHEMA public FROM testrole;
+DROP USER testrole;
+
+-- Test costs
+set citus.shard_count = 4;
+CREATE TABLE tab (x int);
+SELECT create_distributed_table('tab','x');
+-- The following numbers are chosen such that they are placed on different
+-- shards.
+INSERT INTO tab SELECT 1 from generate_series(1, 30000);
+INSERT INTO tab SELECT 2 from generate_series(1, 10000);
+INSERT INTO tab SELECT 3 from generate_series(1, 10000);
+INSERT INTO tab SELECT 6 from generate_series(1, 10000);
+ANALYZE tab;
+
+\c - - - :worker_1_port
+SELECT table_schema, table_name, row_estimate, total_bytes
+  FROM (
+  SELECT *, total_bytes-index_bytes-COALESCE(toast_bytes,0) AS table_bytes FROM (
+      SELECT c.oid,nspname AS table_schema, relname AS TABLE_NAME
+              , c.reltuples AS row_estimate
+              , pg_total_relation_size(c.oid) AS total_bytes
+              , pg_indexes_size(c.oid) AS index_bytes
+              , pg_total_relation_size(reltoastrelid) AS toast_bytes
+          FROM pg_class c
+          LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
+          WHERE relkind = 'r'
+  ) a
+WHERE table_schema = 'public'
+) a ORDER BY table_name;
+\c - - - :worker_2_port
+SELECT table_schema, table_name, row_estimate, total_bytes
+  FROM (
+  SELECT *, total_bytes-index_bytes-COALESCE(toast_bytes,0) AS table_bytes FROM (
+      SELECT c.oid,nspname AS table_schema, relname AS TABLE_NAME
+              , c.reltuples AS row_estimate
+              , pg_total_relation_size(c.oid) AS total_bytes
+              , pg_indexes_size(c.oid) AS index_bytes
+              , pg_total_relation_size(reltoastrelid) AS toast_bytes
+          FROM pg_class c
+          LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
+          WHERE relkind = 'r'
+  ) a
+WHERE table_schema = 'public'
+) a ORDER BY table_name;
+
+\c - - - :master_port
+
+SELECT * FROM get_rebalance_table_shards_plan('tab');
+SELECT * FROM get_rebalance_table_shards_plan('tab', rebalance_strategy := 'by_disk_size');
+SELECT * FROM get_rebalance_table_shards_plan('tab', rebalance_strategy := 'by_disk_size', threshold := 0);
+
+SELECT * FROM rebalance_table_shards('tab', shard_transfer_mode:='block_writes');
+SELECT * FROM public.table_placements_per_node;
+
+SELECT * FROM rebalance_table_shards('tab', rebalance_strategy := 'by_disk_size', shard_transfer_mode:='block_writes');
+SELECT * FROM public.table_placements_per_node;
+
+SELECT * FROM rebalance_table_shards('tab', rebalance_strategy := 'by_disk_size', shard_transfer_mode:='block_writes', threshold := 0);
+SELECT * FROM public.table_placements_per_node;
+
+-- Check that sizes of colocated tables are added together for rebalances
+set citus.shard_count = 4;
+SET citus.next_shard_id TO 123050;
+CREATE TABLE tab2 (x int);
+SELECT create_distributed_table('tab2','x', colocate_with := 'tab');
+INSERT INTO tab2 SELECT 1 from generate_series(1, 0);
+INSERT INTO tab2 SELECT 2 from generate_series(1, 60000);
+INSERT INTO tab2 SELECT 3 from generate_series(1, 10000);
+INSERT INTO tab2 SELECT 6 from generate_series(1, 10000);
+ANALYZE tab, tab2;
+
+\c - - - :worker_1_port
+SELECT table_schema, table_name, row_estimate, total_bytes
+  FROM (
+  SELECT *, total_bytes-index_bytes-COALESCE(toast_bytes,0) AS table_bytes FROM (
+      SELECT c.oid,nspname AS table_schema, relname AS TABLE_NAME
+              , c.reltuples AS row_estimate
+              , pg_total_relation_size(c.oid) AS total_bytes
+              , pg_indexes_size(c.oid) AS index_bytes
+              , pg_total_relation_size(reltoastrelid) AS toast_bytes
+          FROM pg_class c
+          LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
+          WHERE relkind = 'r'
+  ) a
+WHERE table_schema = 'public'
+) a ORDER BY table_name;
+\c - - - :worker_2_port
+SELECT table_schema, table_name, row_estimate, total_bytes
+  FROM (
+  SELECT *, total_bytes-index_bytes-COALESCE(toast_bytes,0) AS table_bytes FROM (
+      SELECT c.oid,nspname AS table_schema, relname AS TABLE_NAME
+              , c.reltuples AS row_estimate
+              , pg_total_relation_size(c.oid) AS total_bytes
+              , pg_indexes_size(c.oid) AS index_bytes
+              , pg_total_relation_size(reltoastrelid) AS toast_bytes
+          FROM pg_class c
+          LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
+          WHERE relkind = 'r'
+  ) a
+WHERE table_schema = 'public'
+) a ORDER BY table_name;
+
+\c - - - :master_port
+SELECT * FROM get_rebalance_table_shards_plan('tab', rebalance_strategy := 'by_disk_size');
+SELECT * FROM rebalance_table_shards('tab', rebalance_strategy := 'by_disk_size', shard_transfer_mode:='block_writes');
+SELECT * FROM public.table_placements_per_node;
+ANALYZE tab, tab2;
+
+\c - - - :worker_1_port
+SELECT table_schema, table_name, row_estimate, total_bytes
+  FROM (
+  SELECT *, total_bytes-index_bytes-COALESCE(toast_bytes,0) AS table_bytes FROM (
+      SELECT c.oid,nspname AS table_schema, relname AS TABLE_NAME
+              , c.reltuples AS row_estimate
+              , pg_total_relation_size(c.oid) AS total_bytes
+              , pg_indexes_size(c.oid) AS index_bytes
+              , pg_total_relation_size(reltoastrelid) AS toast_bytes
+          FROM pg_class c
+          LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
+          WHERE relkind = 'r'
+  ) a
+WHERE table_schema = 'public'
+) a ORDER BY table_name;
+\c - - - :worker_2_port
+SELECT table_schema, table_name, row_estimate, total_bytes
+  FROM (
+  SELECT *, total_bytes-index_bytes-COALESCE(toast_bytes,0) AS table_bytes FROM (
+      SELECT c.oid,nspname AS table_schema, relname AS TABLE_NAME
+              , c.reltuples AS row_estimate
+              , pg_total_relation_size(c.oid) AS total_bytes
+              , pg_indexes_size(c.oid) AS index_bytes
+              , pg_total_relation_size(reltoastrelid) AS toast_bytes
+          FROM pg_class c
+          LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
+          WHERE relkind = 'r'
+  ) a
+WHERE table_schema = 'public'
+) a ORDER BY table_name;
+\c - - - :master_port
+
+DROP TABLE tab2;
+
+CREATE OR REPLACE FUNCTION capacity_high_worker_1(nodeidarg int)
+    RETURNS real AS $$
+    SELECT
+        (CASE WHEN nodeport = 57637 THEN 1000 ELSE 1 END)::real
+    FROM pg_dist_node where nodeid = nodeidarg
+    $$ LANGUAGE sql;
+
+SELECT citus_add_rebalance_strategy(
+        'capacity_high_worker_1',
+        'citus_shard_cost_1',
+        'capacity_high_worker_1',
+        'citus_shard_allowed_on_node_true',
+        0
+    );
+
+SELECT * FROM get_rebalance_table_shards_plan('tab', rebalance_strategy := 'capacity_high_worker_1');
+SELECT * FROM rebalance_table_shards('tab', rebalance_strategy := 'capacity_high_worker_1', shard_transfer_mode:='block_writes');
+SELECT * FROM public.table_placements_per_node;
+
+SELECT citus_set_default_rebalance_strategy('capacity_high_worker_1');
+SELECT * FROM get_rebalance_table_shards_plan('tab');
+SELECT * FROM rebalance_table_shards('tab', shard_transfer_mode:='block_writes');
+SELECT * FROM public.table_placements_per_node;
+
+CREATE FUNCTION only_worker_2(shardid bigint, nodeidarg int)
+    RETURNS boolean AS $$
+    SELECT
+        (CASE WHEN nodeport = 57638 THEN TRUE ELSE FALSE END)
+    FROM pg_dist_node where nodeid = nodeidarg
+    $$ LANGUAGE sql;
+
+SELECT citus_add_rebalance_strategy(
+        'only_worker_2',
+        'citus_shard_cost_1',
+        'citus_node_capacity_1',
+        'only_worker_2',
+        0
+    );
+
+SELECT citus_set_default_rebalance_strategy('only_worker_2');
+SELECT * FROM get_rebalance_table_shards_plan('tab');
+SELECT * FROM rebalance_table_shards('tab', shard_transfer_mode:='block_writes');
+SELECT * FROM public.table_placements_per_node;
+
+SELECT citus_set_default_rebalance_strategy('by_shard_count');
+SELECT * FROM get_rebalance_table_shards_plan('tab');
+
+-- Check all the error handling cases
+SELECT * FROM get_rebalance_table_shards_plan('tab', rebalance_strategy := 'non_existing');
+SELECT * FROM rebalance_table_shards('tab', rebalance_strategy := 'non_existing');
+SELECT * FROM master_drain_node('localhost', :worker_2_port, rebalance_strategy := 'non_existing');
+SELECT citus_set_default_rebalance_strategy('non_existing');
+
+
+UPDATE pg_dist_rebalance_strategy SET default_strategy=false;
+SELECT * FROM get_rebalance_table_shards_plan('tab');
+SELECT * FROM rebalance_table_shards('tab');
+SELECT * FROM master_drain_node('localhost', :worker_2_port);
+UPDATE pg_dist_rebalance_strategy SET default_strategy=true WHERE name='by_shard_count';
+
+CREATE OR REPLACE FUNCTION shard_cost_no_arguments()
+    RETURNS real AS $$ SELECT 1.0::real $$ LANGUAGE sql;
+
+CREATE OR REPLACE FUNCTION shard_cost_bad_arg_type(text)
+    RETURNS real AS $$ SELECT 1.0::real $$ LANGUAGE sql;
+
+CREATE OR REPLACE FUNCTION shard_cost_bad_return_type(bigint)
+    RETURNS int AS $$ SELECT 1 $$ LANGUAGE sql;
+
+CREATE OR REPLACE FUNCTION node_capacity_no_arguments()
+    RETURNS real AS $$ SELECT 1.0::real $$ LANGUAGE sql;
+
+CREATE OR REPLACE FUNCTION node_capacity_bad_arg_type(text)
+    RETURNS real AS $$ SELECT 1.0::real $$ LANGUAGE sql;
+
+CREATE OR REPLACE FUNCTION node_capacity_bad_return_type(int)
+    RETURNS int AS $$ SELECT 1 $$ LANGUAGE sql;
+
+CREATE OR REPLACE FUNCTION shard_allowed_on_node_no_arguments()
+    RETURNS boolean AS $$ SELECT true $$ LANGUAGE sql;
+
+CREATE OR REPLACE FUNCTION shard_allowed_on_node_bad_arg1(text, int)
+    RETURNS boolean AS $$ SELECT true $$ LANGUAGE sql;
+
+CREATE OR REPLACE FUNCTION shard_allowed_on_node_bad_arg2(bigint, text)
+    RETURNS boolean AS $$ SELECT true $$ LANGUAGE sql;
+
+CREATE OR REPLACE FUNCTION shard_allowed_on_node_bad_return_type(bigint, int)
+    RETURNS int AS $$ SELECT 1 $$ LANGUAGE sql;
+
+SELECT citus_add_rebalance_strategy(
+        'insert_should_fail',
+        'shard_cost_no_arguments',
+        'citus_node_capacity_1',
+        'citus_shard_allowed_on_node_true',
+        0
+    );
+SELECT citus_add_rebalance_strategy(
+        'insert_should_fail',
+        'shard_cost_bad_arg_type',
+        'citus_node_capacity_1',
+        'citus_shard_allowed_on_node_true',
+        0
+    );
+SELECT citus_add_rebalance_strategy(
+        'insert_should_fail',
+        'shard_cost_bad_return_type',
+        'citus_node_capacity_1',
+        'citus_shard_allowed_on_node_true',
+        0
+    );
+SELECT citus_add_rebalance_strategy(
+        'insert_should_fail',
+        0,
+        'citus_node_capacity_1',
+        'citus_shard_allowed_on_node_true',
+        0
+    );
+
+SELECT citus_add_rebalance_strategy(
+        'insert_should_fail',
+        'citus_shard_cost_1',
+        'node_capacity_no_arguments',
+        'citus_shard_allowed_on_node_true',
+        0
+    );
+SELECT citus_add_rebalance_strategy(
+        'insert_should_fail',
+        'citus_shard_cost_1',
+        'node_capacity_bad_arg_type',
+        'citus_shard_allowed_on_node_true',
+        0
+    );
+SELECT citus_add_rebalance_strategy(
+        'insert_should_fail',
+        'citus_shard_cost_1',
+        'node_capacity_bad_return_type',
+        'citus_shard_allowed_on_node_true',
+        0
+    );
+SELECT citus_add_rebalance_strategy(
+        'insert_should_fail',
+        'citus_shard_cost_1',
+        0,
+        'citus_shard_allowed_on_node_true',
+        0
+    );
+
+SELECT citus_add_rebalance_strategy(
+        'insert_should_fail',
+        'citus_shard_cost_1',
+        'citus_node_capacity_1',
+        'shard_allowed_on_node_no_arguments',
+        0
+    );
+SELECT citus_add_rebalance_strategy(
+        'insert_should_fail',
+        'citus_shard_cost_1',
+        'citus_node_capacity_1',
+        'shard_allowed_on_node_bad_arg1',
+        0
+    );
+SELECT citus_add_rebalance_strategy(
+        'insert_should_fail',
+        'citus_shard_cost_1',
+        'citus_node_capacity_1',
+        'shard_allowed_on_node_bad_arg2',
+        0
+    );
+SELECT citus_add_rebalance_strategy(
+        'insert_should_fail',
+        'citus_shard_cost_1',
+        'citus_node_capacity_1',
+        'shard_allowed_on_node_bad_return_type',
+        0
+    );
+SELECT citus_add_rebalance_strategy(
+        'insert_should_fail',
+        'citus_shard_cost_1',
+        'citus_node_capacity_1',
+        0,
+        0
+    );
+
+
+-- Confirm that manual insert/update has the same checks
+INSERT INTO
+    pg_catalog.pg_dist_rebalance_strategy(
+        name,
+        shard_cost_function,
+        node_capacity_function,
+        shard_allowed_on_node_function,
+        default_threshold
+    ) VALUES (
+        'shard_cost_no_arguments',
+        'shard_cost_no_arguments',
+        'citus_node_capacity_1',
+        'citus_shard_allowed_on_node_true',
+        0
+    );
+UPDATE pg_dist_rebalance_strategy SET shard_cost_function='shard_cost_no_arguments' WHERE name='by_disk_size';
+
+-- Confirm that only a single default strategy can exist
+INSERT INTO
+    pg_catalog.pg_dist_rebalance_strategy(
+        name,
+        default_strategy,
+        shard_cost_function,
+        node_capacity_function,
+        shard_allowed_on_node_function,
+        default_threshold
+    ) VALUES (
+        'second_default',
+        true,
+        'citus_shard_cost_1',
+        'citus_node_capacity_1',
+        'citus_shard_allowed_on_node_true',
+        0
+    );
+UPDATE pg_dist_rebalance_strategy SET default_strategy=true WHERE name='by_disk_size';
+-- ensure the trigger allows updating the default strategy
+UPDATE pg_dist_rebalance_strategy SET default_strategy=true WHERE name='by_shard_count';
+
+-- Confirm that default strategy should be higher than minimum strategy
+SELECT citus_add_rebalance_strategy(
+        'default_threshold_too_low',
+        'citus_shard_cost_1',
+        'capacity_high_worker_1',
+        'citus_shard_allowed_on_node_true',
+        0,
+        0.1
+    );
+
+-- Make it a data node again
+SELECT * from master_set_node_property('localhost', :worker_2_port, 'shouldhaveshards', true);
+DROP TABLE tab;
+
+
+-- we don't need the coordinator on pg_dist_node anymore
+SELECT 1 FROM master_remove_node('localhost', :master_port);
+
+
+--
+-- Make sure that rebalance_table_shards() and replicate_table_shards() replicate
+-- reference tables to the coordinator when replicate_reference_tables_on_activate
+-- is off.
+--
+
+SET citus.replicate_reference_tables_on_activate TO off;
+SET client_min_messages TO WARNING;
+
+CREATE TABLE dist_table_test_3(a int);
+SET citus.shard_count TO 4;
+
+SET citus.shard_replication_factor TO 1;
+SET citus.replication_model TO "statement";
+SELECT create_distributed_table('dist_table_test_3', 'a');
+
+CREATE TABLE ref_table(a int);
+SELECT create_reference_table('ref_table');
+
+SELECT 1 FROM master_add_node('localhost', :master_port, groupId=>0);
+
+SELECT count(*) FROM pg_dist_shard NATURAL JOIN pg_dist_shard_placement WHERE logicalrelid = 'ref_table'::regclass;
+
+SET citus.shard_replication_factor TO 2;
+SELECT replicate_table_shards('dist_table_test_3',  max_shard_copies := 4,  shard_transfer_mode:='block_writes');
+
+SELECT count(*) FROM pg_dist_shard NATURAL JOIN pg_dist_shard_placement WHERE logicalrelid = 'ref_table'::regclass;
+
+SELECT 1 FROM master_remove_node('localhost', :master_port);
+
+CREATE TABLE rebalance_test_table(int_column int);
+SELECT master_create_distributed_table('rebalance_test_table', 'int_column', 'append');
+
+CALL create_unbalanced_shards('rebalance_test_table');
+
+SELECT 1 FROM master_add_node('localhost', :master_port, groupId=>0);
+
+SELECT count(*) FROM pg_dist_shard NATURAL JOIN pg_dist_shard_placement WHERE logicalrelid = 'ref_table'::regclass;
+
+SELECT rebalance_table_shards('rebalance_test_table', shard_transfer_mode:='block_writes');
+
+SELECT count(*) FROM pg_dist_shard NATURAL JOIN pg_dist_shard_placement WHERE logicalrelid = 'ref_table'::regclass;
+
+DROP TABLE dist_table_test_3, rebalance_test_table, ref_table;
+
+SELECT 1 FROM master_remove_node('localhost', :master_port);
+
+-- reference table 2 will not have a replica identity, causing the rebalancer to not work
+-- when ran in the default mode. Instead we need to change the shard transfer mode to make
+-- it work. This verifies the shard transfer mode used in the rebalancer is used for the
+-- ensurance of reference table existence.
+
+CREATE TABLE t1 (a int PRIMARY KEY, b int);
+CREATE TABLE r1 (a int PRIMARY KEY, b int);
+CREATE TABLE r2 (a int, b int);
+
+-- we remove worker 2 before creating the tables, this will allow us to have an active
+-- node without the reference tables
+
+SELECT 1 from master_remove_node('localhost', :worker_2_port);
+
+SELECT create_distributed_table('t1','a');
+SELECT create_reference_table('r1');
+SELECT create_reference_table('r2');
+
+-- add data so to actually copy data when forcing logical replication for reference tables
+INSERT INTO r1 VALUES (1,2), (3,4);
+INSERT INTO r2 VALUES (1,2), (3,4);
+
+SELECT 1 from master_add_node('localhost', :worker_2_port);
+
+SELECT rebalance_table_shards();
+
+DROP TABLE t1, r1, r2;
+
+-- verify there are no distributed tables before we perform the following tests. Preceding
+-- test suites should clean up their distributed tables.
+SELECT count(*) FROM pg_dist_partition;
+
+-- verify a system having only reference tables will copy the reference tables when
+-- executing the rebalancer
+
+SELECT 1 from master_remove_node('localhost', :worker_2_port);
+
+CREATE TABLE r1 (a int PRIMARY KEY, b int);
+SELECT create_reference_table('r1');
+
+SELECT 1 from master_add_node('localhost', :worker_2_port);
+
+-- count the number of placements for the reference table to verify it is not available on
+-- all nodes
+SELECT count(*)
+FROM pg_dist_shard
+JOIN pg_dist_shard_placement USING (shardid)
+WHERE logicalrelid = 'r1'::regclass;
+
+-- rebalance with _only_ a reference table, this should trigger the copy
+SELECT rebalance_table_shards();
+
+-- verify the reference table is on all nodes after the rebalance
+SELECT count(*)
+FROM pg_dist_shard
+JOIN pg_dist_shard_placement USING (shardid)
+WHERE logicalrelid = 'r1'::regclass;
+
+-- cleanup tables
+DROP TABLE r1;
+
+
+-- lastly we need to verify that reference tables are copied before the replication factor
+-- of other tables is increased. Without the copy of reference tables the replication might
+-- fail.
+
+SELECT 1 from master_remove_node('localhost', :worker_2_port);
+
+CREATE TABLE t1 (a int PRIMARY KEY, b int);
+CREATE TABLE r1 (a int PRIMARY KEY, b int);
+SELECT create_distributed_table('t1', 'a');
+SELECT create_reference_table('r1');
+
+SELECT 1 from master_add_node('localhost', :worker_2_port);
+
+-- count the number of placements for the reference table to verify it is not available on
+-- all nodes
+SELECT count(*)
+FROM pg_dist_shard
+JOIN pg_dist_shard_placement USING (shardid)
+WHERE logicalrelid = 'r1'::regclass;
+
+SELECT replicate_table_shards('t1',  shard_replication_factor := 2);
+
+-- verify the reference table is on all nodes after replicate_table_shards
+SELECT count(*)
+FROM pg_dist_shard
+JOIN pg_dist_shard_placement USING (shardid)
+WHERE logicalrelid = 'r1'::regclass;
+
+DROP TABLE t1, r1;
diff --git a/src/test/regress/sql/shard_rebalancer_unit.sql b/src/test/regress/sql/shard_rebalancer_unit.sql
new file mode 100644
index 000000000..d6159cbd2
--- /dev/null
+++ b/src/test/regress/sql/shard_rebalancer_unit.sql
@@ -0,0 +1,383 @@
+CREATE OR REPLACE FUNCTION shard_placement_rebalance_array(
+    worker_node_list json[],
+    shard_placement_list json[],
+    threshold float4 DEFAULT 0,
+    max_shard_moves int DEFAULT 1000000,
+    drain_only bool DEFAULT false
+)
+RETURNS json[]
+AS 'citus'
+LANGUAGE C STRICT VOLATILE;
+
+-- Check that even with threshold=0.0 shard_placement_rebalance_array returns
+-- something when there's no completely balanced solution.
+
+
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}']::json[]
+));
+
+-- Check that a node can be drained in a balanced cluster
+
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432, "disallowed_shards": "1,2,3,4"}',
+          '{"node_name": "hostname2", "node_port": 5432}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}'
+        ]::json[]
+));
+
+-- Check that an already drained node won't be filled again after a second
+-- rebalance
+
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432, "disallowed_shards": "1,2,3,4"}',
+          '{"node_name": "hostname2", "node_port": 5432}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}'
+        ]::json[]
+));
+
+
+-- Check that even when shards are already balanced, but shard 4 is on a node
+-- where it is not allowed it will be moved and there will be rebalancing later
+
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432, "disallowed_shards": "1,2,3,5,6"}',
+          '{"node_name": "hostname2", "node_port": 5432, "disallowed_shards": "4"}',
+          '{"node_name": "hostname3", "node_port": 5432, "disallowed_shards": "4"}'
+        ]::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":5, "shardid":5, "shardstate":1, "shardlength":1, "nodename":"hostname3", "nodeport":5432}',
+          '{"placementid":6, "shardid":6, "shardstate":1, "shardlength":1, "nodename":"hostname3", "nodeport":5432}'
+        ]::json[]
+));
+
+-- Check that even when shards are already balanced, disallowed shards will be
+-- moved away from hostname1 and the only shard that is allowed there will be
+-- moved there
+
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432, "disallowed_shards": "1,2,3,5,6"}',
+          '{"node_name": "hostname2", "node_port": 5432}',
+          '{"node_name": "hostname3", "node_port": 5432}'
+        ]::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":5, "shardid":5, "shardstate":1, "shardlength":1, "nodename":"hostname3", "nodeport":5432}',
+          '{"placementid":6, "shardid":6, "shardstate":1, "shardlength":1, "nodename":"hostname3", "nodeport":5432}'
+        ]::json[]
+));
+
+-- Check that an error is returned when a shard is not allowed anywhere
+
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432, "disallowed_shards": "2,4"}',
+          '{"node_name": "hostname2", "node_port": 5432, "disallowed_shards": "1,4"}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}'
+        ]::json[]
+));
+
+-- Check that cost is taken into account when rebalancing
+
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432, "cost": 3}']::json[]
+));
+
+
+-- Check that cost is taken into account when rebalancing disallowed placements
+
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432, "disallowed_shards": "1,2,3,4"}',
+          '{"node_name": "hostname2", "node_port": 5432}',
+          '{"node_name": "hostname3", "node_port": 5432}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432, "cost": 3}']::json[]
+));
+
+
+-- Check that node capacacity is taken into account.
+
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432, "capacity": 3}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}']::json[]
+));
+
+-- Check that shards are not moved when target utilization stays the same and
+-- the source utilization goes below the original target utilization. hostname1
+-- has utilization of 1, after move hostname2 would have a utilization of 1 as
+-- well. hostname1 would have utilization of 1 while hostname2 has utilization
+-- of 2/3 now. Since load is spread more fairly with utilization 2/3 than 0 it
+-- should choose that distribution.
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432, "capacity": 3}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}']::json[]
+));
+
+
+-- Check that shards are moved even when target utilization stays the same, but
+-- source utilization goes below the original target utilization. hostname2
+-- has utilization of 1, after move hostname1 would have a utilization of 1 as
+-- well. hostname2 would have utilization of 2/3 while hostname1 now has
+-- utilization of 0 now. Since load is spread more fairly with utilization 2/3
+-- than 0 it should choose that distribution.
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432, "capacity": 3}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}']::json[]
+));
+
+-- Check that shards are moved even when target utilization stays the same, but
+-- source utilization goes below the original target utilization. hostname2
+-- has utilization of 2, after move hostname1 would have a utilization of 2 as
+-- well. hostname2 would have utilization of 1.5 while hostname1 now has
+-- utilization of 1. Since load is spread more fairly with utilization 1.5 than
+-- 1 it should choose that distribution.
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432, "capacity": 2}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":5, "shardid":5, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}']::json[]
+));
+
+-- Check that shards are moved even when target utilization stays the same, but
+-- source utilization goes below the original target utilization. hostname1
+-- has utilization of 2, after move hostname2 would have a utilization of 2 as
+-- well. hostname1 would have utilization of 1 while hostname2 now has
+-- utilization of 1.5. Since load is spread more fairly with utilization 1.5
+-- than 1 it should choose that distribution.
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432, "capacity": 2}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":5, "shardid":5, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}']::json[]
+));
+
+
+-- Check that all shards will be moved to 1 node if its capacity is big enough
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432, "capacity": 4}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}']::json[]
+));
+
+-- Check that shards will be moved to a smaller node node if utilization improves
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432, "capacity": 3}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}']::json[]
+));
+
+-- Check that node capacity works with different shard costs
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432, "capacity": 3}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432, "cost": 3}']::json[]
+));
+
+-- Check that node capacity works with different shard costs again
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432, "capacity": 3}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432, "cost": 2}']::json[]
+));
+
+-- Check that max_shard_moves works and that we get a NOTICE that it is hit
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432, "capacity": 3}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432, "cost": 2}']::json[],
+    max_shard_moves := 1
+));
+
+
+-- Check that node capacity works with different shard costs and disallowed_shards
+-- NOTE: these moves are not optimal, once we implement merging of updates this
+-- output should change.
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432}',
+          '{"node_name": "hostname2", "node_port": 5432, "capacity": 5}',
+          '{"node_name": "hostname3", "node_port": 5432, "disallowed_shards": "1,2"}']::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname3", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname3", "nodeport":5432, "cost": 2}']::json[]
+));
+
+-- Check that draining + rebalancing nodes works
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432, "disallowed_shards": "1,2,3,4,5,6", "capacity": 0}',
+          '{"node_name": "hostname2", "node_port": 5432}',
+          '{"node_name": "hostname3", "node_port": 5432}'
+        ]::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":5, "shardid":5, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":6, "shardid":6, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}'
+        ]::json[]
+));
+
+
+-- Check that draining nodes with drain only works
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432, "disallowed_shards": "1,2,3,4,5,6", "capacity": 0}',
+          '{"node_name": "hostname2", "node_port": 5432}',
+          '{"node_name": "hostname3", "node_port": 5432}'
+        ]::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":5, "shardid":5, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":6, "shardid":6, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}'
+        ]::json[],
+    drain_only := true
+));
+
+-- Check that draining nodes has priority over max_shard_moves
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432, "disallowed_shards": "1,2,3,4,5,6", "capacity": 0}',
+          '{"node_name": "hostname2", "node_port": 5432}',
+          '{"node_name": "hostname3", "node_port": 5432}'
+        ]::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":5, "shardid":5, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":6, "shardid":6, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}'
+        ]::json[],
+    max_shard_moves := 0
+));
+
+-- Check that drained moves are counted towards shard moves and thus use up the
+-- limit when doing normal rebalance moves
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432, "disallowed_shards": "1,2,3,4,5,6", "capacity": 0}',
+          '{"node_name": "hostname2", "node_port": 5432}',
+          '{"node_name": "hostname3", "node_port": 5432}'
+        ]::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":5, "shardid":5, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":6, "shardid":6, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}'
+        ]::json[],
+    max_shard_moves := 2
+));
+
+-- Check that draining for all colocation groups is done before rebalancing
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432, "disallowed_shards": "1,2,3,4,5,6,7,8,9,10,11,12", "capacity": 0}',
+          '{"node_name": "hostname2", "node_port": 5432}',
+          '{"node_name": "hostname3", "node_port": 5432}'
+        ]::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":5, "shardid":5, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":6, "shardid":6, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":7, "shardid":7, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432, "next_colocation": true}',
+          '{"placementid":8, "shardid":8, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":9, "shardid":9, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":10, "shardid":10, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":11, "shardid":11, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":12, "shardid":12, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}'
+        ]::json[]
+));
+
+-- Check that max_shard_moves warning is only shown once even if more than one
+-- colocation group its placement updates are ignored because of it
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432, "disallowed_shards": "1,2,3,4,5,6,7,8,9,10,11,12", "capacity": 0}',
+          '{"node_name": "hostname2", "node_port": 5432}',
+          '{"node_name": "hostname3", "node_port": 5432}'
+        ]::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":5, "shardid":5, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":6, "shardid":6, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":7, "shardid":7, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432, "next_colocation": true}',
+          '{"placementid":8, "shardid":8, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":9, "shardid":9, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":10, "shardid":10, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":11, "shardid":11, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":12, "shardid":12, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}'
+        ]::json[],
+    max_shard_moves := 1
+));
+
+-- Check that moves for different colocation groups are added together when
+-- taking into account max_shard_moves
+SELECT unnest(shard_placement_rebalance_array(
+    ARRAY['{"node_name": "hostname1", "node_port": 5432, "disallowed_shards": "1,2,3,4,5,6,7,8,9,10,11,12", "capacity": 0}',
+          '{"node_name": "hostname2", "node_port": 5432}',
+          '{"node_name": "hostname3", "node_port": 5432}'
+        ]::json[],
+    ARRAY['{"placementid":1, "shardid":1, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432}',
+          '{"placementid":2, "shardid":2, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":3, "shardid":3, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":4, "shardid":4, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":5, "shardid":5, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":6, "shardid":6, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":7, "shardid":7, "shardstate":1, "shardlength":1, "nodename":"hostname1", "nodeport":5432, "next_colocation": true}',
+          '{"placementid":8, "shardid":8, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":9, "shardid":9, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":10, "shardid":10, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":11, "shardid":11, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}',
+          '{"placementid":12, "shardid":12, "shardstate":1, "shardlength":1, "nodename":"hostname2", "nodeport":5432}'
+        ]::json[],
+    max_shard_moves := 5
+));