Drop cleanup on failure (#6584)

DESCRIPTION: Defers cleanup after a failure in shard move or split

We don't need to do a cleanup in case of failure on a shard transfer or
split anymore. Because,
* Maintenance daemon will clean them up anyway.
* We trigger a cleanup at the beginning of shard transfers/splits. 
* The cleanup on failure logic also can fail sometimes and instead of
the original error, we throw the error that is raised by the cleanup
procedure, and it causes confusion.
pull/6586/head
Ahmet Gedemenli 2022-12-28 15:48:44 +03:00 committed by GitHub
parent cfc17385e9
commit 1b1e737e51
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 347 additions and 493 deletions

View File

@ -478,73 +478,6 @@ RegisterOperationNeedingCleanup(void)
}
/*
* FinalizeOperationNeedingCleanupOnFailure is be called by an operation to signal
* completion with failure. This will trigger cleanup of appropriate resources.
*/
void
FinalizeOperationNeedingCleanupOnFailure(const char *operationName)
{
/* We must have a valid OperationId. Any operation requring cleanup
* will call RegisterOperationNeedingCleanup.
*/
Assert(CurrentOperationId != INVALID_OPERATION_ID);
List *currentOperationRecordList = ListCleanupRecordsForCurrentOperation();
/*
* We sort the records before cleaning up by their types, because of dependencies.
* For example, a subscription might depend on a publication.
*/
currentOperationRecordList = SortList(currentOperationRecordList,
CompareCleanupRecordsByObjectType);
int failedShardCountOnComplete = 0;
CleanupRecord *record = NULL;
foreach_ptr(record, currentOperationRecordList)
{
if (record->policy == CLEANUP_ALWAYS || record->policy == CLEANUP_ON_FAILURE)
{
WorkerNode *workerNode = LookupNodeForGroup(record->nodeGroupId);
/*
* For all resources of CurrentOperationId that are marked as 'CLEANUP_ALWAYS' or
* 'CLEANUP_ON_FAILURE', drop resource and cleanup records.
*/
if (TryDropResourceByCleanupRecordOutsideTransaction(record,
workerNode->workerName,
workerNode->workerPort))
{
/*
* Given the operation is failing and we will abort its transaction, we cannot delete
* records in the current transaction. Delete these records outside of the
* current transaction via a localhost connection.
*/
DeleteCleanupRecordByRecordIdOutsideTransaction(record->recordId);
}
else if (record->objectType == CLEANUP_OBJECT_SHARD_PLACEMENT)
{
/*
* We log failures at the end, since they occur repeatedly
* for a large number of objects.
*/
failedShardCountOnComplete++;
}
}
}
if (failedShardCountOnComplete > 0)
{
ereport(WARNING, (errmsg("failed to clean up %d orphaned shards out of %d after "
"a %s operation failed",
failedShardCountOnComplete,
list_length(currentOperationRecordList),
operationName)));
}
}
/*
* FinalizeOperationNeedingCleanupOnSuccess is be called by an operation to signal
* completion with success. This will trigger cleanup of appropriate resources.

View File

@ -596,8 +596,6 @@ BlockingShardSplit(SplitOperation splitOperation,
WorkerNode *sourceShardNode =
ActiveShardPlacementWorkerNode(firstShard->shardId);
PG_TRY();
{
ereport(LOG, (errmsg("creating child shards for %s", operationName)));
/* Physically create split children. */
@ -662,19 +660,6 @@ BlockingShardSplit(SplitOperation splitOperation,
*/
CreateForeignKeyConstraints(shardGroupSplitIntervalListList,
workersForPlacementList);
}
PG_CATCH();
{
/* end ongoing transactions to enable us to clean up */
ShutdownAllConnections();
/* Do a best effort cleanup of shards created on workers in the above block */
FinalizeOperationNeedingCleanupOnFailure(operationName);
PG_RE_THROW();
}
PG_END_TRY();
CitusInvalidateRelcacheByRelid(DistShardRelationId());
}
@ -1508,8 +1493,7 @@ NonBlockingShardSplit(SplitOperation splitOperation,
sourceShardToCopyNode->workerPort);
/* Non-Blocking shard split workflow starts here */
PG_TRY();
{
ereport(LOG, (errmsg("creating child shards for %s",
operationName)));
@ -1715,21 +1699,6 @@ NonBlockingShardSplit(SplitOperation splitOperation,
/* 18) Close connection of template replication slot */
CloseConnection(sourceReplicationConnection);
}
PG_CATCH();
{
/* end ongoing transactions to enable us to clean up */
ShutdownAllConnections();
/*
* Drop temporary objects that were marked as CLEANUP_ON_FAILURE
* or CLEANUP_ALWAYS.
*/
FinalizeOperationNeedingCleanupOnFailure(operationName);
PG_RE_THROW();
}
PG_END_TRY();
}
/*

View File

@ -90,8 +90,7 @@ static void CopyShardTablesViaLogicalReplication(List *shardIntervalList,
char *sourceNodeName,
int32 sourceNodePort,
char *targetNodeName,
int32 targetNodePort,
char *operationName);
int32 targetNodePort);
static void CopyShardTablesViaBlockWrites(List *shardIntervalList, char *sourceNodeName,
int32 sourceNodePort,
@ -1170,7 +1169,7 @@ CopyShardTables(List *shardIntervalList, char *sourceNodeName, int32 sourceNodeP
{
CopyShardTablesViaLogicalReplication(shardIntervalList, sourceNodeName,
sourceNodePort, targetNodeName,
targetNodePort, operationName);
targetNodePort);
}
else
{
@ -1192,7 +1191,7 @@ CopyShardTables(List *shardIntervalList, char *sourceNodeName, int32 sourceNodeP
static void
CopyShardTablesViaLogicalReplication(List *shardIntervalList, char *sourceNodeName,
int32 sourceNodePort, char *targetNodeName,
int32 targetNodePort, char *operationName)
int32 targetNodePort)
{
MemoryContext localContext = AllocSetContextCreate(CurrentMemoryContext,
"CopyShardTablesViaLogicalReplication",
@ -1231,8 +1230,7 @@ CopyShardTablesViaLogicalReplication(List *shardIntervalList, char *sourceNodeNa
/* data copy is done seperately when logical replication is used */
LogicallyReplicateShards(shardIntervalList, sourceNodeName,
sourceNodePort, targetNodeName, targetNodePort,
operationName);
sourceNodePort, targetNodeName, targetNodePort);
}

View File

@ -152,7 +152,7 @@ static void WaitForGroupedLogicalRepTargetsToCatchUp(XLogRecPtr sourcePosition,
*/
void
LogicallyReplicateShards(List *shardList, char *sourceNodeName, int sourceNodePort,
char *targetNodeName, int targetNodePort, char *operationName)
char *targetNodeName, int targetNodePort)
{
AcquireLogicalReplicationLock();
char *superUser = CitusExtensionOwnerName();
@ -193,8 +193,6 @@ LogicallyReplicateShards(List *shardList, char *sourceNodeName, int sourceNodePo
CreateGroupedLogicalRepTargetsConnections(groupedLogicalRepTargetsHash, superUser,
databaseName);
PG_TRY();
{
MultiConnection *sourceReplicationConnection =
GetReplicationConnection(sourceConnection->hostname, sourceConnection->port);
@ -269,20 +267,6 @@ LogicallyReplicateShards(List *shardList, char *sourceNodeName, int sourceNodePo
CloseGroupedLogicalRepTargetsConnections(groupedLogicalRepTargetsHash);
CloseConnection(sourceConnection);
}
PG_CATCH();
{
/* We don't need to UnclaimConnections since we're already erroring out */
/*
* Drop temporary objects that were marked as CLEANUP_ON_FAILURE
* or CLEANUP_ALWAYS.
*/
FinalizeOperationNeedingCleanupOnFailure(operationName);
PG_RE_THROW();
}
PG_END_TRY();
}
/*

View File

@ -130,7 +130,7 @@ typedef enum LogicalRepType
extern void LogicallyReplicateShards(List *shardList, char *sourceNodeName,
int sourceNodePort, char *targetNodeName,
int targetNodePort, char *operationName);
int targetNodePort);
extern void ConflictWithIsolationTestingBeforeCopy(void);
extern void ConflictWithIsolationTestingAfterCopy(void);

View File

@ -103,13 +103,6 @@ extern void InsertCleanupRecordInSubtransaction(CleanupObject objectType,
int nodeGroupId,
CleanupPolicy policy);
/*
* FinalizeOperationNeedingCleanupOnFailure is be called by an operation to signal
* completion on failure. This will trigger cleanup of appropriate resources
* and cleanup records.
*/
extern void FinalizeOperationNeedingCleanupOnFailure(const char *operationName);
/*
* FinalizeOperationNeedingCleanupOnSuccess is be called by an operation to signal
* completion on success. This will trigger cleanup of appropriate resources

View File

@ -74,6 +74,12 @@ SELECT pg_catalog.citus_split_shard_by_split_points(
'block_writes');
ERROR: relation citus_split_failure_test_schema.sensors_8981002 already exists on worker localhost:xxxxx
-- BEGIN : Split Shard, which is expected to fail.
SELECT public.wait_for_resource_cleanup();
wait_for_resource_cleanup
---------------------------------------------------------------------
(1 row)
-- BEGIN : Ensure tables were cleaned from worker
\c - - - :worker_1_port
SET search_path TO "citus_split_failure_test_schema";

View File

@ -54,10 +54,6 @@ SELECT citus.mitmproxy('conn.onQuery(query="ALTER SUBSCRIPTION").kill()');
(1 row)
SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port);
WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx
WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx
ERROR: connection not open
CONTEXT: while executing command on localhost:xxxxx
-- cleanup leftovers

View File

@ -122,13 +122,6 @@ SELECT citus.mitmproxy('conn.onQuery(query="^ALTER SUBSCRIPTION .* (ENABLE|DISAB
(1 row)
SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port);
WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx
WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx
WARNING: role "citus_shard_move_subscription_role_xxxxxxx_xxxxxxx" cannot be dropped because some objects depend on it
DETAIL: owner of subscription citus_shard_move_subscription_xxxxxxx_xxxxxxx
CONTEXT: while executing command on localhost:xxxxx
ERROR: connection not open
CONTEXT: while executing command on localhost:xxxxx
-- cleanup leftovers
@ -397,9 +390,6 @@ SELECT citus.mitmproxy('conn.onQuery(query="t_pkey").cancel(' || :pid || ')');
(1 row)
SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port);
WARNING: canceling statement due to lock timeout
CONTEXT: while executing command on localhost:xxxxx
WARNING: failed to clean up 1 orphaned shards out of 5 after a citus_move_shard_placement operation failed
ERROR: canceling statement due to user request
-- cleanup leftovers
SELECT citus.mitmproxy('conn.allow()');
@ -422,9 +412,6 @@ SELECT citus.mitmproxy('conn.matches(b"CREATE INDEX").killall()');
(1 row)
SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port);
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: failed to clean up 1 orphaned shards out of 5 after a citus_move_shard_placement operation failed
ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open
-- cleanup leftovers
SELECT citus.mitmproxy('conn.allow()');
@ -460,9 +447,6 @@ SELECT citus.mitmproxy('conn.matches(b"CREATE INDEX").killall()');
(1 row)
SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port);
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: failed to clean up 1 orphaned shards out of 5 after a citus_move_shard_placement operation failed
ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open
-- failure on parallel create index
ALTER SYSTEM RESET citus.max_adaptive_executor_pool_size;
@ -479,9 +463,6 @@ SELECT citus.mitmproxy('conn.matches(b"CREATE INDEX").killall()');
(1 row)
SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port);
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: failed to clean up 1 orphaned shards out of 5 after a citus_move_shard_placement operation failed
ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open
-- Verify that the shard is not moved and the number of rows are still 100k
SELECT citus.mitmproxy('conn.allow()');

View File

@ -51,17 +51,18 @@ SELECT create_distributed_table('table_to_split', 'id');
ARRAY['-100000'],
ARRAY[:worker_1_node, :worker_2_node],
'force_logical');
WARNING: failed to clean up 2 orphaned shards out of 5 after a citus_split_shard_by_split_points operation failed
ERROR: connection not open
CONTEXT: while executing command on localhost:xxxxx
SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
operation_id | object_type | object_name | node_group_id | policy_type
---------------------------------------------------------------------
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981000 | 1 | 0
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 1 | 1
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 2 | 0
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981003 | 2 | 1
777 | 4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0
(3 rows)
(5 rows)
-- we need to allow connection so that we can connect to proxy
SELECT citus.mitmproxy('conn.allow()');
@ -165,11 +166,13 @@ ERROR: Failed to run worker_split_shard_replication_setup UDF. It should succes
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
operation_id | object_type | object_name | node_group_id | policy_type
---------------------------------------------------------------------
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981000 | 1 | 0
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 1 | 1
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 2 | 0
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981003 | 2 | 1
777 | 4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0
777 | 4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0
(4 rows)
(6 rows)
-- we need to allow connection so that we can connect to proxy
SELECT citus.mitmproxy('conn.allow()');
@ -271,21 +274,20 @@ WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx
WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: failed to clean up 2 orphaned shards out of 7 after a citus_split_shard_by_split_points operation failed
ERROR: connection not open
CONTEXT: while executing command on localhost:xxxxx
SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
operation_id | object_type | object_name | node_group_id | policy_type
---------------------------------------------------------------------
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981000 | 1 | 0
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 2 | 0
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 1 | 1
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981003 | 2 | 1
777 | 4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0
777 | 4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0
777 | 3 | citus_shard_split_slot_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0
(5 rows)
(7 rows)
-- we need to allow connection so that we can connect to proxy
SELECT citus.mitmproxy('conn.allow()');
@ -383,28 +385,25 @@ CONTEXT: while executing command on localhost:xxxxx
ARRAY['-100000'],
ARRAY[:worker_1_node, :worker_2_node],
'force_logical');
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: failed to clean up 2 orphaned shards out of 12 after a citus_split_shard_by_split_points operation failed
ERROR: connection not open
CONTEXT: while executing command on localhost:xxxxx
SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
operation_id | object_type | object_name | node_group_id | policy_type
---------------------------------------------------------------------
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981000 | 1 | 0
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 1 | 1
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 2 | 0
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981003 | 2 | 1
777 | 4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0
777 | 4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0
777 | 3 | citus_shard_split_slot_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0
777 | 3 | citus_shard_split_slot_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0
777 | 2 | citus_shard_split_subscription_xxxxxxx_xxxxxxx | 1 | 0
777 | 2 | citus_shard_split_subscription_xxxxxxx_xxxxxxx | 2 | 0
777 | 5 | citus_shard_split_subscription_role_xxxxxxx_xxxxxxx | 1 | 0
777 | 5 | citus_shard_split_subscription_role_xxxxxxx_xxxxxxx | 2 | 0
(8 rows)
(12 rows)
-- we need to allow connection so that we can connect to proxy
SELECT citus.mitmproxy('conn.allow()');
@ -505,28 +504,25 @@ CONTEXT: while executing command on localhost:xxxxx
ARRAY['-100000'],
ARRAY[:worker_1_node, :worker_2_node],
'force_logical');
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: failed to clean up 2 orphaned shards out of 12 after a citus_split_shard_by_split_points operation failed
ERROR: connection not open
CONTEXT: while executing command on localhost:xxxxx
SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
operation_id | object_type | object_name | node_group_id | policy_type
---------------------------------------------------------------------
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981000 | 1 | 0
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 1 | 1
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 2 | 0
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981003 | 2 | 1
777 | 4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0
777 | 4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0
777 | 3 | citus_shard_split_slot_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0
777 | 3 | citus_shard_split_slot_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0
777 | 2 | citus_shard_split_subscription_xxxxxxx_xxxxxxx | 1 | 0
777 | 2 | citus_shard_split_subscription_xxxxxxx_xxxxxxx | 2 | 0
777 | 5 | citus_shard_split_subscription_role_xxxxxxx_xxxxxxx | 1 | 0
777 | 5 | citus_shard_split_subscription_role_xxxxxxx_xxxxxxx | 2 | 0
(8 rows)
(12 rows)
-- we need to allow connection so that we can connect to proxy
SELECT citus.mitmproxy('conn.allow()');

View File

@ -766,8 +766,6 @@ SET citus.override_table_visibility TO false;
SET search_path to "Tenant Isolation";
\set VERBOSITY terse
SELECT isolate_tenant_to_new_shard('orders_streaming', 104, 'CASCADE', shard_transfer_mode => 'block_writes');
WARNING: command DROP TABLE is disabled
WARNING: failed to clean up 1 orphaned shards out of 1 after a isolate_tenant_to_new_shard operation failed
ERROR: command CREATE TABLE is disabled
\set VERBOSITY default
\c - postgres - :worker_1_port

View File

@ -814,8 +814,6 @@ SET citus.override_table_visibility TO false;
SET search_path to "Tenant Isolation";
\set VERBOSITY terse
SELECT isolate_tenant_to_new_shard('orders_streaming', 104, 'CASCADE', shard_transfer_mode => 'force_logical');
WARNING: command DROP TABLE is disabled
WARNING: failed to clean up 1 orphaned shards out of 1 after a isolate_tenant_to_new_shard operation failed
ERROR: command CREATE TABLE is disabled
\set VERBOSITY default
\c - postgres - :worker_1_port

View File

@ -64,6 +64,8 @@ SELECT pg_catalog.citus_split_shard_by_split_points(
'block_writes');
-- BEGIN : Split Shard, which is expected to fail.
SELECT public.wait_for_resource_cleanup();
-- BEGIN : Ensure tables were cleaned from worker
\c - - - :worker_1_port
SET search_path TO "citus_split_failure_test_schema";