Cleanup orphaned shards before moving when necessary

A shard move would fail if there was an orphaned version of the shard on the target node. With this change before actually fail, we try to clean up orphaned shards to see if that fixes the issue.
2021-06-03 11:38:27 +02:00 · 2021-06-03 11:38:27 +02:00 · 503c70b619
parent 280b9ae018
commit 503c70b619
5 changed files with 92 additions and 10 deletions
--- a/src/backend/distributed/operations/repair_shards.c
+++ b/src/backend/distributed/operations/repair_shards.c
@ -1063,12 +1063,29 @@ EnsureShardCanBeCopied(int64 shardId, const char *sourceNodeName, int32 sourceNo
 	{
 		if (targetPlacement->shardState == SHARD_STATE_TO_DELETE)
 		{
-			ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-							errmsg(
-								"shard " INT64_FORMAT " already exists in the target node",
-								shardId),
-							errdetail(
-								"The existing shard is marked for deletion, but could not be deleted because there are still active queries on it")));
+			/*
+			 * Trigger deletion of orphaned shards and hope that this removes
+			 * the shard.
+			 */
+			DropMarkedShardsInDifferentTransaction();
+			shardPlacementList = ShardPlacementList(shardId);
+			targetPlacement = SearchShardPlacementInList(shardPlacementList,
+														 targetNodeName,
+														 targetNodePort);
+
+			/*
+			 * If it still doesn't remove the shard, then we error.
+			 */
+			if (targetPlacement != NULL)
+			{
+				ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+								errmsg(
+									"shard " INT64_FORMAT
+									" still exists on the target node as an orphaned shard",
+									shardId),
+								errdetail(
+									"The existing shard is orphaned, but could not be deleted because there are still active queries on it")));
+			}
 		}
 		else
 		{
--- a/src/test/regress/expected/isolation_rebalancer_deferred_drop.out
+++ b/src/test/regress/expected/isolation_rebalancer_deferred_drop.out
@ -90,6 +90,57 @@ stop_session_level_connection_to_node



+starting permutation: s1-begin s1-move-placement s2-start-session-level-connection s2-lock-table-on-worker s1-commit s1-begin s1-move-placement-back s1-commit s2-stop-connection
+step s1-begin:
+    BEGIN;
+
+step s1-move-placement:
+    SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638);
+
+master_move_shard_placement
+
+
+step s2-start-session-level-connection:
+        SELECT start_session_level_connection_to_node('localhost', 57637);
+
+start_session_level_connection_to_node
+
+
+step s2-lock-table-on-worker:
+    SELECT run_commands_on_session_level_connection_to_node('BEGIN;');
+    SELECT run_commands_on_session_level_connection_to_node('LOCK TABLE t1_120000');
+
+run_commands_on_session_level_connection_to_node
+
+
+run_commands_on_session_level_connection_to_node
+
+
+step s1-commit:
+    COMMIT;
+
+step s1-begin:
+    BEGIN;
+
+step s1-move-placement-back:
+    SET client_min_messages to NOTICE;
+    SHOW log_error_verbosity;
+    SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57638, 'localhost', 57637);
+
+log_error_verbosity
+
+verbose
+ERROR:  shard xxxxx still exists on the target node as an orphaned shard
+step s1-commit:
+    COMMIT;
+
+step s2-stop-connection:
+ SELECT stop_session_level_connection_to_node();
+
+stop_session_level_connection_to_node
+
+
+
 starting permutation: s1-begin s1-lock-pg-dist-placement s2-drop-old-shards s1-commit
 step s1-begin:
    BEGIN;
--- a/src/test/regress/expected/shard_move_deferred_delete.out
+++ b/src/test/regress/expected/shard_move_deferred_delete.out
@ -134,10 +134,14 @@ $cmd$);
 (localhost,57638,t,1)
 (2 rows)

-- we expect to get an error since the old placement is still there
+-- master_move_shard_placement automatically cleans up orphaned shards if
+-- needed.
 SELECT master_move_shard_placement(20000000, 'localhost', :worker_2_port, 'localhost', :worker_1_port);
-ERROR:  shard xxxxx already exists in the target node
-DETAIL:  The existing shard is marked for deletion, but could not be deleted because there are still active queries on it
+ master_move_shard_placement
+---------------------------------------------------------------------
+
+(1 row)
+
 SELECT run_command_on_workers($cmd$
    -- override the function for testing purpose
    create or replace function pg_catalog.citus_local_disk_space_stats(OUT available_disk_size bigint, OUT total_disk_size bigint)
--- a/src/test/regress/spec/isolation_rebalancer_deferred_drop.spec
+++ b/src/test/regress/spec/isolation_rebalancer_deferred_drop.spec
@ -62,6 +62,13 @@ step "s1-move-placement"
    SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638);
 }

+step "s1-move-placement-back"
+{
+    SET client_min_messages to NOTICE;
+    SHOW log_error_verbosity;
+    SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57638, 'localhost', 57637);
+}
+
 step "s1-move-placement-without-deferred" {
    SET citus.defer_drop_after_shard_move TO OFF;
    SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638);
@ -127,6 +134,8 @@ step "s2-commit" {
 permutation "s1-begin" "s1-move-placement" "s1-drop-marked-shards" "s2-drop-marked-shards" "s1-commit"
 permutation "s1-begin" "s1-move-placement" "s2-drop-marked-shards" "s1-drop-marked-shards" "s1-commit"
 permutation "s1-begin" "s1-move-placement" "s2-start-session-level-connection" "s2-lock-table-on-worker" "s1-drop-marked-shards" "s1-commit" "s2-stop-connection"
+// make sure we give a clear error when we try to replace an orphaned shard that is still in use
+permutation "s1-begin" "s1-move-placement" "s2-start-session-level-connection" "s2-lock-table-on-worker" "s1-commit" "s1-begin" "s1-move-placement-back" "s1-commit" "s2-stop-connection"
 // make sure we error if we cannot get the lock on pg_dist_placement
 permutation "s1-begin" "s1-lock-pg-dist-placement" "s2-drop-old-shards" "s1-commit"
 permutation "s1-begin" "s2-begin" "s2-select" "s1-move-placement-without-deferred" "s2-commit" "s1-commit"
--- a/src/test/regress/sql/shard_move_deferred_delete.sql
+++ b/src/test/regress/sql/shard_move_deferred_delete.sql
@ -75,7 +75,8 @@ SELECT run_command_on_workers($cmd$
    SELECT count(*) FROM pg_class WHERE relname = 't1_20000000';
 $cmd$);

-- we expect to get an error since the old placement is still there
+-- master_move_shard_placement automatically cleans up orphaned shards if
+-- needed.
 SELECT master_move_shard_placement(20000000, 'localhost', :worker_2_port, 'localhost', :worker_1_port);