Drop cleanup on failure (#6584)

DESCRIPTION: Defers cleanup after a failure in shard move or split

We don't need to do a cleanup in case of failure on a shard transfer or
split anymore. Because,
* Maintenance daemon will clean them up anyway.
* We trigger a cleanup at the beginning of shard transfers/splits. 
* The cleanup on failure logic also can fail sometimes and instead of
the original error, we throw the error that is raised by the cleanup
procedure, and it causes confusion.
pull/6586/head
Ahmet Gedemenli 2022-12-28 15:48:44 +03:00 committed by GitHub
parent cfc17385e9
commit 1b1e737e51
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 347 additions and 493 deletions

View File

@ -478,73 +478,6 @@ RegisterOperationNeedingCleanup(void)
} }
/*
* FinalizeOperationNeedingCleanupOnFailure is be called by an operation to signal
* completion with failure. This will trigger cleanup of appropriate resources.
*/
void
FinalizeOperationNeedingCleanupOnFailure(const char *operationName)
{
/* We must have a valid OperationId. Any operation requring cleanup
* will call RegisterOperationNeedingCleanup.
*/
Assert(CurrentOperationId != INVALID_OPERATION_ID);
List *currentOperationRecordList = ListCleanupRecordsForCurrentOperation();
/*
* We sort the records before cleaning up by their types, because of dependencies.
* For example, a subscription might depend on a publication.
*/
currentOperationRecordList = SortList(currentOperationRecordList,
CompareCleanupRecordsByObjectType);
int failedShardCountOnComplete = 0;
CleanupRecord *record = NULL;
foreach_ptr(record, currentOperationRecordList)
{
if (record->policy == CLEANUP_ALWAYS || record->policy == CLEANUP_ON_FAILURE)
{
WorkerNode *workerNode = LookupNodeForGroup(record->nodeGroupId);
/*
* For all resources of CurrentOperationId that are marked as 'CLEANUP_ALWAYS' or
* 'CLEANUP_ON_FAILURE', drop resource and cleanup records.
*/
if (TryDropResourceByCleanupRecordOutsideTransaction(record,
workerNode->workerName,
workerNode->workerPort))
{
/*
* Given the operation is failing and we will abort its transaction, we cannot delete
* records in the current transaction. Delete these records outside of the
* current transaction via a localhost connection.
*/
DeleteCleanupRecordByRecordIdOutsideTransaction(record->recordId);
}
else if (record->objectType == CLEANUP_OBJECT_SHARD_PLACEMENT)
{
/*
* We log failures at the end, since they occur repeatedly
* for a large number of objects.
*/
failedShardCountOnComplete++;
}
}
}
if (failedShardCountOnComplete > 0)
{
ereport(WARNING, (errmsg("failed to clean up %d orphaned shards out of %d after "
"a %s operation failed",
failedShardCountOnComplete,
list_length(currentOperationRecordList),
operationName)));
}
}
/* /*
* FinalizeOperationNeedingCleanupOnSuccess is be called by an operation to signal * FinalizeOperationNeedingCleanupOnSuccess is be called by an operation to signal
* completion with success. This will trigger cleanup of appropriate resources. * completion with success. This will trigger cleanup of appropriate resources.

View File

@ -596,8 +596,6 @@ BlockingShardSplit(SplitOperation splitOperation,
WorkerNode *sourceShardNode = WorkerNode *sourceShardNode =
ActiveShardPlacementWorkerNode(firstShard->shardId); ActiveShardPlacementWorkerNode(firstShard->shardId);
PG_TRY();
{
ereport(LOG, (errmsg("creating child shards for %s", operationName))); ereport(LOG, (errmsg("creating child shards for %s", operationName)));
/* Physically create split children. */ /* Physically create split children. */
@ -662,19 +660,6 @@ BlockingShardSplit(SplitOperation splitOperation,
*/ */
CreateForeignKeyConstraints(shardGroupSplitIntervalListList, CreateForeignKeyConstraints(shardGroupSplitIntervalListList,
workersForPlacementList); workersForPlacementList);
}
PG_CATCH();
{
/* end ongoing transactions to enable us to clean up */
ShutdownAllConnections();
/* Do a best effort cleanup of shards created on workers in the above block */
FinalizeOperationNeedingCleanupOnFailure(operationName);
PG_RE_THROW();
}
PG_END_TRY();
CitusInvalidateRelcacheByRelid(DistShardRelationId()); CitusInvalidateRelcacheByRelid(DistShardRelationId());
} }
@ -1508,8 +1493,7 @@ NonBlockingShardSplit(SplitOperation splitOperation,
sourceShardToCopyNode->workerPort); sourceShardToCopyNode->workerPort);
/* Non-Blocking shard split workflow starts here */ /* Non-Blocking shard split workflow starts here */
PG_TRY();
{
ereport(LOG, (errmsg("creating child shards for %s", ereport(LOG, (errmsg("creating child shards for %s",
operationName))); operationName)));
@ -1715,21 +1699,6 @@ NonBlockingShardSplit(SplitOperation splitOperation,
/* 18) Close connection of template replication slot */ /* 18) Close connection of template replication slot */
CloseConnection(sourceReplicationConnection); CloseConnection(sourceReplicationConnection);
} }
PG_CATCH();
{
/* end ongoing transactions to enable us to clean up */
ShutdownAllConnections();
/*
* Drop temporary objects that were marked as CLEANUP_ON_FAILURE
* or CLEANUP_ALWAYS.
*/
FinalizeOperationNeedingCleanupOnFailure(operationName);
PG_RE_THROW();
}
PG_END_TRY();
}
/* /*

View File

@ -90,8 +90,7 @@ static void CopyShardTablesViaLogicalReplication(List *shardIntervalList,
char *sourceNodeName, char *sourceNodeName,
int32 sourceNodePort, int32 sourceNodePort,
char *targetNodeName, char *targetNodeName,
int32 targetNodePort, int32 targetNodePort);
char *operationName);
static void CopyShardTablesViaBlockWrites(List *shardIntervalList, char *sourceNodeName, static void CopyShardTablesViaBlockWrites(List *shardIntervalList, char *sourceNodeName,
int32 sourceNodePort, int32 sourceNodePort,
@ -1170,7 +1169,7 @@ CopyShardTables(List *shardIntervalList, char *sourceNodeName, int32 sourceNodeP
{ {
CopyShardTablesViaLogicalReplication(shardIntervalList, sourceNodeName, CopyShardTablesViaLogicalReplication(shardIntervalList, sourceNodeName,
sourceNodePort, targetNodeName, sourceNodePort, targetNodeName,
targetNodePort, operationName); targetNodePort);
} }
else else
{ {
@ -1192,7 +1191,7 @@ CopyShardTables(List *shardIntervalList, char *sourceNodeName, int32 sourceNodeP
static void static void
CopyShardTablesViaLogicalReplication(List *shardIntervalList, char *sourceNodeName, CopyShardTablesViaLogicalReplication(List *shardIntervalList, char *sourceNodeName,
int32 sourceNodePort, char *targetNodeName, int32 sourceNodePort, char *targetNodeName,
int32 targetNodePort, char *operationName) int32 targetNodePort)
{ {
MemoryContext localContext = AllocSetContextCreate(CurrentMemoryContext, MemoryContext localContext = AllocSetContextCreate(CurrentMemoryContext,
"CopyShardTablesViaLogicalReplication", "CopyShardTablesViaLogicalReplication",
@ -1231,8 +1230,7 @@ CopyShardTablesViaLogicalReplication(List *shardIntervalList, char *sourceNodeNa
/* data copy is done seperately when logical replication is used */ /* data copy is done seperately when logical replication is used */
LogicallyReplicateShards(shardIntervalList, sourceNodeName, LogicallyReplicateShards(shardIntervalList, sourceNodeName,
sourceNodePort, targetNodeName, targetNodePort, sourceNodePort, targetNodeName, targetNodePort);
operationName);
} }

View File

@ -152,7 +152,7 @@ static void WaitForGroupedLogicalRepTargetsToCatchUp(XLogRecPtr sourcePosition,
*/ */
void void
LogicallyReplicateShards(List *shardList, char *sourceNodeName, int sourceNodePort, LogicallyReplicateShards(List *shardList, char *sourceNodeName, int sourceNodePort,
char *targetNodeName, int targetNodePort, char *operationName) char *targetNodeName, int targetNodePort)
{ {
AcquireLogicalReplicationLock(); AcquireLogicalReplicationLock();
char *superUser = CitusExtensionOwnerName(); char *superUser = CitusExtensionOwnerName();
@ -193,8 +193,6 @@ LogicallyReplicateShards(List *shardList, char *sourceNodeName, int sourceNodePo
CreateGroupedLogicalRepTargetsConnections(groupedLogicalRepTargetsHash, superUser, CreateGroupedLogicalRepTargetsConnections(groupedLogicalRepTargetsHash, superUser,
databaseName); databaseName);
PG_TRY();
{
MultiConnection *sourceReplicationConnection = MultiConnection *sourceReplicationConnection =
GetReplicationConnection(sourceConnection->hostname, sourceConnection->port); GetReplicationConnection(sourceConnection->hostname, sourceConnection->port);
@ -269,20 +267,6 @@ LogicallyReplicateShards(List *shardList, char *sourceNodeName, int sourceNodePo
CloseGroupedLogicalRepTargetsConnections(groupedLogicalRepTargetsHash); CloseGroupedLogicalRepTargetsConnections(groupedLogicalRepTargetsHash);
CloseConnection(sourceConnection); CloseConnection(sourceConnection);
} }
PG_CATCH();
{
/* We don't need to UnclaimConnections since we're already erroring out */
/*
* Drop temporary objects that were marked as CLEANUP_ON_FAILURE
* or CLEANUP_ALWAYS.
*/
FinalizeOperationNeedingCleanupOnFailure(operationName);
PG_RE_THROW();
}
PG_END_TRY();
}
/* /*

View File

@ -130,7 +130,7 @@ typedef enum LogicalRepType
extern void LogicallyReplicateShards(List *shardList, char *sourceNodeName, extern void LogicallyReplicateShards(List *shardList, char *sourceNodeName,
int sourceNodePort, char *targetNodeName, int sourceNodePort, char *targetNodeName,
int targetNodePort, char *operationName); int targetNodePort);
extern void ConflictWithIsolationTestingBeforeCopy(void); extern void ConflictWithIsolationTestingBeforeCopy(void);
extern void ConflictWithIsolationTestingAfterCopy(void); extern void ConflictWithIsolationTestingAfterCopy(void);

View File

@ -103,13 +103,6 @@ extern void InsertCleanupRecordInSubtransaction(CleanupObject objectType,
int nodeGroupId, int nodeGroupId,
CleanupPolicy policy); CleanupPolicy policy);
/*
* FinalizeOperationNeedingCleanupOnFailure is be called by an operation to signal
* completion on failure. This will trigger cleanup of appropriate resources
* and cleanup records.
*/
extern void FinalizeOperationNeedingCleanupOnFailure(const char *operationName);
/* /*
* FinalizeOperationNeedingCleanupOnSuccess is be called by an operation to signal * FinalizeOperationNeedingCleanupOnSuccess is be called by an operation to signal
* completion on success. This will trigger cleanup of appropriate resources * completion on success. This will trigger cleanup of appropriate resources

View File

@ -74,6 +74,12 @@ SELECT pg_catalog.citus_split_shard_by_split_points(
'block_writes'); 'block_writes');
ERROR: relation citus_split_failure_test_schema.sensors_8981002 already exists on worker localhost:xxxxx ERROR: relation citus_split_failure_test_schema.sensors_8981002 already exists on worker localhost:xxxxx
-- BEGIN : Split Shard, which is expected to fail. -- BEGIN : Split Shard, which is expected to fail.
SELECT public.wait_for_resource_cleanup();
wait_for_resource_cleanup
---------------------------------------------------------------------
(1 row)
-- BEGIN : Ensure tables were cleaned from worker -- BEGIN : Ensure tables were cleaned from worker
\c - - - :worker_1_port \c - - - :worker_1_port
SET search_path TO "citus_split_failure_test_schema"; SET search_path TO "citus_split_failure_test_schema";

View File

@ -54,10 +54,6 @@ SELECT citus.mitmproxy('conn.onQuery(query="ALTER SUBSCRIPTION").kill()');
(1 row) (1 row)
SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port); SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port);
WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx
WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx
ERROR: connection not open ERROR: connection not open
CONTEXT: while executing command on localhost:xxxxx CONTEXT: while executing command on localhost:xxxxx
-- cleanup leftovers -- cleanup leftovers

View File

@ -122,13 +122,6 @@ SELECT citus.mitmproxy('conn.onQuery(query="^ALTER SUBSCRIPTION .* (ENABLE|DISAB
(1 row) (1 row)
SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port); SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port);
WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx
WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx
WARNING: role "citus_shard_move_subscription_role_xxxxxxx_xxxxxxx" cannot be dropped because some objects depend on it
DETAIL: owner of subscription citus_shard_move_subscription_xxxxxxx_xxxxxxx
CONTEXT: while executing command on localhost:xxxxx
ERROR: connection not open ERROR: connection not open
CONTEXT: while executing command on localhost:xxxxx CONTEXT: while executing command on localhost:xxxxx
-- cleanup leftovers -- cleanup leftovers
@ -397,9 +390,6 @@ SELECT citus.mitmproxy('conn.onQuery(query="t_pkey").cancel(' || :pid || ')');
(1 row) (1 row)
SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port); SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port);
WARNING: canceling statement due to lock timeout
CONTEXT: while executing command on localhost:xxxxx
WARNING: failed to clean up 1 orphaned shards out of 5 after a citus_move_shard_placement operation failed
ERROR: canceling statement due to user request ERROR: canceling statement due to user request
-- cleanup leftovers -- cleanup leftovers
SELECT citus.mitmproxy('conn.allow()'); SELECT citus.mitmproxy('conn.allow()');
@ -422,9 +412,6 @@ SELECT citus.mitmproxy('conn.matches(b"CREATE INDEX").killall()');
(1 row) (1 row)
SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port); SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port);
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: failed to clean up 1 orphaned shards out of 5 after a citus_move_shard_placement operation failed
ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open
-- cleanup leftovers -- cleanup leftovers
SELECT citus.mitmproxy('conn.allow()'); SELECT citus.mitmproxy('conn.allow()');
@ -460,9 +447,6 @@ SELECT citus.mitmproxy('conn.matches(b"CREATE INDEX").killall()');
(1 row) (1 row)
SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port); SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port);
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: failed to clean up 1 orphaned shards out of 5 after a citus_move_shard_placement operation failed
ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open
-- failure on parallel create index -- failure on parallel create index
ALTER SYSTEM RESET citus.max_adaptive_executor_pool_size; ALTER SYSTEM RESET citus.max_adaptive_executor_pool_size;
@ -479,9 +463,6 @@ SELECT citus.mitmproxy('conn.matches(b"CREATE INDEX").killall()');
(1 row) (1 row)
SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port); SELECT master_move_shard_placement(101, 'localhost', :worker_1_port, 'localhost', :worker_2_proxy_port);
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: failed to clean up 1 orphaned shards out of 5 after a citus_move_shard_placement operation failed
ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open
-- Verify that the shard is not moved and the number of rows are still 100k -- Verify that the shard is not moved and the number of rows are still 100k
SELECT citus.mitmproxy('conn.allow()'); SELECT citus.mitmproxy('conn.allow()');

View File

@ -51,17 +51,18 @@ SELECT create_distributed_table('table_to_split', 'id');
ARRAY['-100000'], ARRAY['-100000'],
ARRAY[:worker_1_node, :worker_2_node], ARRAY[:worker_1_node, :worker_2_node],
'force_logical'); 'force_logical');
WARNING: failed to clean up 2 orphaned shards out of 5 after a citus_split_shard_by_split_points operation failed
ERROR: connection not open ERROR: connection not open
CONTEXT: while executing command on localhost:xxxxx CONTEXT: while executing command on localhost:xxxxx
SELECT operation_id, object_type, object_name, node_group_id, policy_type SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
operation_id | object_type | object_name | node_group_id | policy_type operation_id | object_type | object_name | node_group_id | policy_type
--------------------------------------------------------------------- ---------------------------------------------------------------------
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981000 | 1 | 0
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 1 | 1
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 2 | 0 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 2 | 0
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981003 | 2 | 1 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981003 | 2 | 1
777 | 4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0 777 | 4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0
(3 rows) (5 rows)
-- we need to allow connection so that we can connect to proxy -- we need to allow connection so that we can connect to proxy
SELECT citus.mitmproxy('conn.allow()'); SELECT citus.mitmproxy('conn.allow()');
@ -165,11 +166,13 @@ ERROR: Failed to run worker_split_shard_replication_setup UDF. It should succes
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
operation_id | object_type | object_name | node_group_id | policy_type operation_id | object_type | object_name | node_group_id | policy_type
--------------------------------------------------------------------- ---------------------------------------------------------------------
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981000 | 1 | 0
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 1 | 1
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 2 | 0 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 2 | 0
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981003 | 2 | 1 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981003 | 2 | 1
777 | 4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0 777 | 4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0
777 | 4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0 777 | 4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0
(4 rows) (6 rows)
-- we need to allow connection so that we can connect to proxy -- we need to allow connection so that we can connect to proxy
SELECT citus.mitmproxy('conn.allow()'); SELECT citus.mitmproxy('conn.allow()');
@ -271,21 +274,20 @@ WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx CONTEXT: while executing command on localhost:xxxxx
WARNING: connection not open WARNING: connection not open
CONTEXT: while executing command on localhost:xxxxx CONTEXT: while executing command on localhost:xxxxx
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: failed to clean up 2 orphaned shards out of 7 after a citus_split_shard_by_split_points operation failed
ERROR: connection not open ERROR: connection not open
CONTEXT: while executing command on localhost:xxxxx CONTEXT: while executing command on localhost:xxxxx
SELECT operation_id, object_type, object_name, node_group_id, policy_type SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
operation_id | object_type | object_name | node_group_id | policy_type operation_id | object_type | object_name | node_group_id | policy_type
--------------------------------------------------------------------- ---------------------------------------------------------------------
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981000 | 1 | 0
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 2 | 0 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 2 | 0
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 1 | 1
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981003 | 2 | 1 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981003 | 2 | 1
777 | 4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0 777 | 4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0
777 | 4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0 777 | 4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0
777 | 3 | citus_shard_split_slot_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0 777 | 3 | citus_shard_split_slot_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0
(5 rows) (7 rows)
-- we need to allow connection so that we can connect to proxy -- we need to allow connection so that we can connect to proxy
SELECT citus.mitmproxy('conn.allow()'); SELECT citus.mitmproxy('conn.allow()');
@ -383,28 +385,25 @@ CONTEXT: while executing command on localhost:xxxxx
ARRAY['-100000'], ARRAY['-100000'],
ARRAY[:worker_1_node, :worker_2_node], ARRAY[:worker_1_node, :worker_2_node],
'force_logical'); 'force_logical');
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: failed to clean up 2 orphaned shards out of 12 after a citus_split_shard_by_split_points operation failed
ERROR: connection not open ERROR: connection not open
CONTEXT: while executing command on localhost:xxxxx CONTEXT: while executing command on localhost:xxxxx
SELECT operation_id, object_type, object_name, node_group_id, policy_type SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
operation_id | object_type | object_name | node_group_id | policy_type operation_id | object_type | object_name | node_group_id | policy_type
--------------------------------------------------------------------- ---------------------------------------------------------------------
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981000 | 1 | 0
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 1 | 1
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 2 | 0 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 2 | 0
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981003 | 2 | 1 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981003 | 2 | 1
777 | 4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0 777 | 4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0
777 | 4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0 777 | 4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0
777 | 3 | citus_shard_split_slot_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0 777 | 3 | citus_shard_split_slot_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0
777 | 3 | citus_shard_split_slot_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0 777 | 3 | citus_shard_split_slot_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0
777 | 2 | citus_shard_split_subscription_xxxxxxx_xxxxxxx | 1 | 0
777 | 2 | citus_shard_split_subscription_xxxxxxx_xxxxxxx | 2 | 0 777 | 2 | citus_shard_split_subscription_xxxxxxx_xxxxxxx | 2 | 0
777 | 5 | citus_shard_split_subscription_role_xxxxxxx_xxxxxxx | 1 | 0
777 | 5 | citus_shard_split_subscription_role_xxxxxxx_xxxxxxx | 2 | 0 777 | 5 | citus_shard_split_subscription_role_xxxxxxx_xxxxxxx | 2 | 0
(8 rows) (12 rows)
-- we need to allow connection so that we can connect to proxy -- we need to allow connection so that we can connect to proxy
SELECT citus.mitmproxy('conn.allow()'); SELECT citus.mitmproxy('conn.allow()');
@ -505,28 +504,25 @@ CONTEXT: while executing command on localhost:xxxxx
ARRAY['-100000'], ARRAY['-100000'],
ARRAY[:worker_1_node, :worker_2_node], ARRAY[:worker_1_node, :worker_2_node],
'force_logical'); 'force_logical');
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: connection to the remote node localhost:xxxxx failed with the following error: connection not open
WARNING: failed to clean up 2 orphaned shards out of 12 after a citus_split_shard_by_split_points operation failed
ERROR: connection not open ERROR: connection not open
CONTEXT: while executing command on localhost:xxxxx CONTEXT: while executing command on localhost:xxxxx
SELECT operation_id, object_type, object_name, node_group_id, policy_type SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
operation_id | object_type | object_name | node_group_id | policy_type operation_id | object_type | object_name | node_group_id | policy_type
--------------------------------------------------------------------- ---------------------------------------------------------------------
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981000 | 1 | 0
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 1 | 1
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 2 | 0 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 2 | 0
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981003 | 2 | 1 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981003 | 2 | 1
777 | 4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0 777 | 4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0
777 | 4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0 777 | 4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0
777 | 3 | citus_shard_split_slot_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0 777 | 3 | citus_shard_split_slot_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0
777 | 3 | citus_shard_split_slot_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0 777 | 3 | citus_shard_split_slot_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0
777 | 2 | citus_shard_split_subscription_xxxxxxx_xxxxxxx | 1 | 0
777 | 2 | citus_shard_split_subscription_xxxxxxx_xxxxxxx | 2 | 0 777 | 2 | citus_shard_split_subscription_xxxxxxx_xxxxxxx | 2 | 0
777 | 5 | citus_shard_split_subscription_role_xxxxxxx_xxxxxxx | 1 | 0
777 | 5 | citus_shard_split_subscription_role_xxxxxxx_xxxxxxx | 2 | 0 777 | 5 | citus_shard_split_subscription_role_xxxxxxx_xxxxxxx | 2 | 0
(8 rows) (12 rows)
-- we need to allow connection so that we can connect to proxy -- we need to allow connection so that we can connect to proxy
SELECT citus.mitmproxy('conn.allow()'); SELECT citus.mitmproxy('conn.allow()');

View File

@ -766,8 +766,6 @@ SET citus.override_table_visibility TO false;
SET search_path to "Tenant Isolation"; SET search_path to "Tenant Isolation";
\set VERBOSITY terse \set VERBOSITY terse
SELECT isolate_tenant_to_new_shard('orders_streaming', 104, 'CASCADE', shard_transfer_mode => 'block_writes'); SELECT isolate_tenant_to_new_shard('orders_streaming', 104, 'CASCADE', shard_transfer_mode => 'block_writes');
WARNING: command DROP TABLE is disabled
WARNING: failed to clean up 1 orphaned shards out of 1 after a isolate_tenant_to_new_shard operation failed
ERROR: command CREATE TABLE is disabled ERROR: command CREATE TABLE is disabled
\set VERBOSITY default \set VERBOSITY default
\c - postgres - :worker_1_port \c - postgres - :worker_1_port

View File

@ -814,8 +814,6 @@ SET citus.override_table_visibility TO false;
SET search_path to "Tenant Isolation"; SET search_path to "Tenant Isolation";
\set VERBOSITY terse \set VERBOSITY terse
SELECT isolate_tenant_to_new_shard('orders_streaming', 104, 'CASCADE', shard_transfer_mode => 'force_logical'); SELECT isolate_tenant_to_new_shard('orders_streaming', 104, 'CASCADE', shard_transfer_mode => 'force_logical');
WARNING: command DROP TABLE is disabled
WARNING: failed to clean up 1 orphaned shards out of 1 after a isolate_tenant_to_new_shard operation failed
ERROR: command CREATE TABLE is disabled ERROR: command CREATE TABLE is disabled
\set VERBOSITY default \set VERBOSITY default
\c - postgres - :worker_1_port \c - postgres - :worker_1_port

View File

@ -64,6 +64,8 @@ SELECT pg_catalog.citus_split_shard_by_split_points(
'block_writes'); 'block_writes');
-- BEGIN : Split Shard, which is expected to fail. -- BEGIN : Split Shard, which is expected to fail.
SELECT public.wait_for_resource_cleanup();
-- BEGIN : Ensure tables were cleaned from worker -- BEGIN : Ensure tables were cleaned from worker
\c - - - :worker_1_port \c - - - :worker_1_port
SET search_path TO "citus_split_failure_test_schema"; SET search_path TO "citus_split_failure_test_schema";