diff --git a/src/backend/distributed/operations/repair_shards.c b/src/backend/distributed/operations/repair_shards.c index 59e87f74a..7938a954f 100644 --- a/src/backend/distributed/operations/repair_shards.c +++ b/src/backend/distributed/operations/repair_shards.c @@ -1063,12 +1063,29 @@ EnsureShardCanBeCopied(int64 shardId, const char *sourceNodeName, int32 sourceNo { if (targetPlacement->shardState == SHARD_STATE_TO_DELETE) { - ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg( - "shard " INT64_FORMAT " already exists in the target node", - shardId), - errdetail( - "The existing shard is marked for deletion, but could not be deleted because there are still active queries on it"))); + /* + * Trigger deletion of orphaned shards and hope that this removes + * the shard. + */ + DropMarkedShardsInDifferentTransaction(); + shardPlacementList = ShardPlacementList(shardId); + targetPlacement = SearchShardPlacementInList(shardPlacementList, + targetNodeName, + targetNodePort); + + /* + * If it still doesn't remove the shard, then we error. + */ + if (targetPlacement != NULL) + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg( + "shard " INT64_FORMAT + " still exists on the target node as an orphaned shard", + shardId), + errdetail( + "The existing shard is orphaned, but could not be deleted because there are still active queries on it"))); + } } else { diff --git a/src/test/regress/expected/isolation_rebalancer_deferred_drop.out b/src/test/regress/expected/isolation_rebalancer_deferred_drop.out index 187d33632..0a982936e 100644 --- a/src/test/regress/expected/isolation_rebalancer_deferred_drop.out +++ b/src/test/regress/expected/isolation_rebalancer_deferred_drop.out @@ -90,6 +90,57 @@ stop_session_level_connection_to_node +starting permutation: s1-begin s1-move-placement s2-start-session-level-connection s2-lock-table-on-worker s1-commit s1-begin s1-move-placement-back s1-commit s2-stop-connection +step s1-begin: + BEGIN; + +step s1-move-placement: + SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); + +master_move_shard_placement + + +step s2-start-session-level-connection: + SELECT start_session_level_connection_to_node('localhost', 57637); + +start_session_level_connection_to_node + + +step s2-lock-table-on-worker: + SELECT run_commands_on_session_level_connection_to_node('BEGIN;'); + SELECT run_commands_on_session_level_connection_to_node('LOCK TABLE t1_120000'); + +run_commands_on_session_level_connection_to_node + + +run_commands_on_session_level_connection_to_node + + +step s1-commit: + COMMIT; + +step s1-begin: + BEGIN; + +step s1-move-placement-back: + SET client_min_messages to NOTICE; + SHOW log_error_verbosity; + SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57638, 'localhost', 57637); + +log_error_verbosity + +verbose +ERROR: shard xxxxx still exists on the target node as an orphaned shard +step s1-commit: + COMMIT; + +step s2-stop-connection: + SELECT stop_session_level_connection_to_node(); + +stop_session_level_connection_to_node + + + starting permutation: s1-begin s1-lock-pg-dist-placement s2-drop-old-shards s1-commit step s1-begin: BEGIN; diff --git a/src/test/regress/expected/shard_move_deferred_delete.out b/src/test/regress/expected/shard_move_deferred_delete.out index ae3fbfa00..ed0d46500 100644 --- a/src/test/regress/expected/shard_move_deferred_delete.out +++ b/src/test/regress/expected/shard_move_deferred_delete.out @@ -134,10 +134,14 @@ $cmd$); (localhost,57638,t,1) (2 rows) --- we expect to get an error since the old placement is still there +-- master_move_shard_placement automatically cleans up orphaned shards if +-- needed. SELECT master_move_shard_placement(20000000, 'localhost', :worker_2_port, 'localhost', :worker_1_port); -ERROR: shard xxxxx already exists in the target node -DETAIL: The existing shard is marked for deletion, but could not be deleted because there are still active queries on it + master_move_shard_placement +--------------------------------------------------------------------- + +(1 row) + SELECT run_command_on_workers($cmd$ -- override the function for testing purpose create or replace function pg_catalog.citus_local_disk_space_stats(OUT available_disk_size bigint, OUT total_disk_size bigint) diff --git a/src/test/regress/spec/isolation_rebalancer_deferred_drop.spec b/src/test/regress/spec/isolation_rebalancer_deferred_drop.spec index 150776aba..bf8a10eb7 100644 --- a/src/test/regress/spec/isolation_rebalancer_deferred_drop.spec +++ b/src/test/regress/spec/isolation_rebalancer_deferred_drop.spec @@ -62,6 +62,13 @@ step "s1-move-placement" SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); } +step "s1-move-placement-back" +{ + SET client_min_messages to NOTICE; + SHOW log_error_verbosity; + SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57638, 'localhost', 57637); +} + step "s1-move-placement-without-deferred" { SET citus.defer_drop_after_shard_move TO OFF; SELECT master_move_shard_placement((SELECT * FROM selected_shard), 'localhost', 57637, 'localhost', 57638); @@ -127,6 +134,8 @@ step "s2-commit" { permutation "s1-begin" "s1-move-placement" "s1-drop-marked-shards" "s2-drop-marked-shards" "s1-commit" permutation "s1-begin" "s1-move-placement" "s2-drop-marked-shards" "s1-drop-marked-shards" "s1-commit" permutation "s1-begin" "s1-move-placement" "s2-start-session-level-connection" "s2-lock-table-on-worker" "s1-drop-marked-shards" "s1-commit" "s2-stop-connection" +// make sure we give a clear error when we try to replace an orphaned shard that is still in use +permutation "s1-begin" "s1-move-placement" "s2-start-session-level-connection" "s2-lock-table-on-worker" "s1-commit" "s1-begin" "s1-move-placement-back" "s1-commit" "s2-stop-connection" // make sure we error if we cannot get the lock on pg_dist_placement permutation "s1-begin" "s1-lock-pg-dist-placement" "s2-drop-old-shards" "s1-commit" permutation "s1-begin" "s2-begin" "s2-select" "s1-move-placement-without-deferred" "s2-commit" "s1-commit" diff --git a/src/test/regress/sql/shard_move_deferred_delete.sql b/src/test/regress/sql/shard_move_deferred_delete.sql index e33bc3f82..a052590d5 100644 --- a/src/test/regress/sql/shard_move_deferred_delete.sql +++ b/src/test/regress/sql/shard_move_deferred_delete.sql @@ -75,7 +75,8 @@ SELECT run_command_on_workers($cmd$ SELECT count(*) FROM pg_class WHERE relname = 't1_20000000'; $cmd$); --- we expect to get an error since the old placement is still there +-- master_move_shard_placement automatically cleans up orphaned shards if +-- needed. SELECT master_move_shard_placement(20000000, 'localhost', :worker_2_port, 'localhost', :worker_1_port);