niupre/TestDeferredDropAndCleanup
Nitish Upreti 2022-08-25 18:45:41 -07:00
parent 919e44eab6
commit 3d46860fbb
4 changed files with 49 additions and 40 deletions

View File

@ -69,7 +69,8 @@ PG_FUNCTION_INFO_V1(citus_cleanup_orphaned_shards);
PG_FUNCTION_INFO_V1(isolation_cleanup_orphaned_shards); PG_FUNCTION_INFO_V1(isolation_cleanup_orphaned_shards);
static int DropOrphanedShardsForMove(bool waitForLocks); static int DropOrphanedShardsForMove(bool waitForLocks);
static bool TryDropShardOutsideTransaction(char *qualifiedTableName, char *nodeName, int nodePort); static bool TryDropShardOutsideTransaction(char *qualifiedTableName, char *nodeName, int
nodePort);
static bool TryLockRelationAndPlacementCleanup(Oid relationId, LOCKMODE lockmode); static bool TryLockRelationAndPlacementCleanup(Oid relationId, LOCKMODE lockmode);
/* Functions for cleanup infrastructure */ /* Functions for cleanup infrastructure */
@ -209,7 +210,7 @@ DropOrphanedShardsForCleanup()
if (record->objectType != CLEANUP_SHARD_PLACEMENT) if (record->objectType != CLEANUP_SHARD_PLACEMENT)
{ {
ereport(WARNING, (errmsg("Invalid object type %d for cleanup record ", ereport(WARNING, (errmsg("Invalid object type %d for cleanup record ",
record->objectType))); record->objectType)));
continue; continue;
} }
@ -231,7 +232,7 @@ DropOrphanedShardsForCleanup()
} }
if (TryDropShardOutsideTransaction(qualifiedTableName, workerNode->workerName, if (TryDropShardOutsideTransaction(qualifiedTableName, workerNode->workerName,
workerNode->workerPort)) workerNode->workerPort))
{ {
/* delete the cleanup record */ /* delete the cleanup record */
DeleteCleanupRecordByRecordId(record->recordId); DeleteCleanupRecordByRecordId(record->recordId);
@ -246,7 +247,8 @@ DropOrphanedShardsForCleanup()
if (failedShardCountForCleanup > 0) if (failedShardCountForCleanup > 0)
{ {
ereport(WARNING, (errmsg("Failed to drop %d cleanup shards out of %d", ereport(WARNING, (errmsg("Failed to drop %d cleanup shards out of %d",
failedShardCountForCleanup, list_length(cleanupRecordList)))); failedShardCountForCleanup, list_length(
cleanupRecordList))));
} }
return removedShardCountForCleanup; return removedShardCountForCleanup;
@ -321,7 +323,7 @@ DropOrphanedShardsForMove(bool waitForLocks)
char *qualifiedTableName = ConstructQualifiedShardName(shardInterval); char *qualifiedTableName = ConstructQualifiedShardName(shardInterval);
if (TryDropShardOutsideTransaction(qualifiedTableName, shardPlacement->nodeName, if (TryDropShardOutsideTransaction(qualifiedTableName, shardPlacement->nodeName,
shardPlacement->nodePort)) shardPlacement->nodePort))
{ {
/* delete the actual placement */ /* delete the actual placement */
DeleteShardPlacementRow(placement->placementId); DeleteShardPlacementRow(placement->placementId);
@ -359,13 +361,13 @@ CompleteNewOperationNeedingCleanup(bool isSuccess)
{ {
/* /*
* As part of operation completion: * As part of operation completion:
* 1. Drop all resources of CurrentOperationId that are marked with 'CLEANUP_ALWAYS' policy and * 1. Drop all resources of CurrentOperationId that are marked with 'CLEANUP_ALWAYS' policy and
* the respective cleanup records in seperate transaction. * the respective cleanup records in seperate transaction.
* *
* 2. For all resources of CurrentOperationId that are marked with 'CLEANUP_ON_FAILURE': * 2. For all resources of CurrentOperationId that are marked with 'CLEANUP_ON_FAILURE':
* a) If isSuccess = true, drop cleanup records as operation is nearing completion. * a) If isSuccess = true, drop cleanup records as operation is nearing completion.
* As the operation is nearing successful completion. This is done as part of the * As the operation is nearing successful completion. This is done as part of the
* same transaction so will rollback in case of potential failure later. * same transaction so will rollback in case of potential failure later.
* *
* b) If isSuccess = false, drop resource and cleanup records in a seperate transaction. * b) If isSuccess = false, drop resource and cleanup records in a seperate transaction.
*/ */
@ -387,18 +389,18 @@ CompleteNewOperationNeedingCleanup(bool isSuccess)
if (record->objectType != CLEANUP_SHARD_PLACEMENT) if (record->objectType != CLEANUP_SHARD_PLACEMENT)
{ {
ereport(WARNING, (errmsg("Invalid object type %d for cleanup record ", ereport(WARNING, (errmsg("Invalid object type %d for cleanup record ",
record->objectType))); record->objectType)));
continue; continue;
} }
if (record->policy == CLEANUP_ALWAYS || if (record->policy == CLEANUP_ALWAYS ||
(record->policy == CLEANUP_ON_FAILURE && !isSuccess)) (record->policy == CLEANUP_ON_FAILURE && !isSuccess))
{ {
char *qualifiedTableName = record->objectName; char *qualifiedTableName = record->objectName;
WorkerNode *workerNode = LookupNodeForGroup(record->nodeGroupId); WorkerNode *workerNode = LookupNodeForGroup(record->nodeGroupId);
if (TryDropShardOutsideTransaction(qualifiedTableName, workerNode->workerName, if (TryDropShardOutsideTransaction(qualifiedTableName, workerNode->workerName,
workerNode->workerPort)) workerNode->workerPort))
{ {
DeleteCleanupRecordByRecordIdOutsideTransaction(record->recordId); DeleteCleanupRecordByRecordIdOutsideTransaction(record->recordId);
removedShardCountOnComplete++; removedShardCountOnComplete++;
@ -417,12 +419,14 @@ CompleteNewOperationNeedingCleanup(bool isSuccess)
if (list_length(currentOperationRecordList) > 0) if (list_length(currentOperationRecordList) > 0)
{ {
ereport(LOG, (errmsg("Removed %d orphaned shards out of %d", ereport(LOG, (errmsg("Removed %d orphaned shards out of %d",
removedShardCountOnComplete, list_length(currentOperationRecordList)))); removedShardCountOnComplete, list_length(
currentOperationRecordList))));
if (failedShardCountOnComplete > 0) if (failedShardCountOnComplete > 0)
{ {
ereport(WARNING, (errmsg("Failed to drop %d cleanup shards out of %d", ereport(WARNING, (errmsg("Failed to drop %d cleanup shards out of %d",
failedShardCountOnComplete, list_length(currentOperationRecordList)))); failedShardCountOnComplete, list_length(
currentOperationRecordList))));
} }
} }
} }
@ -459,7 +463,7 @@ InsertCleanupRecordInCurrentTransaction(CleanupObject objectType,
values[Anum_pg_dist_cleanup_object_type - 1] = Int32GetDatum(objectType); values[Anum_pg_dist_cleanup_object_type - 1] = Int32GetDatum(objectType);
values[Anum_pg_dist_cleanup_object_name - 1] = CStringGetTextDatum(objectName); values[Anum_pg_dist_cleanup_object_name - 1] = CStringGetTextDatum(objectName);
values[Anum_pg_dist_cleanup_node_group_id - 1] = Int32GetDatum(nodeGroupId); values[Anum_pg_dist_cleanup_node_group_id - 1] = Int32GetDatum(nodeGroupId);
values[Anum_pg_dist_cleanup_policy_type -1] = Int32GetDatum(policy); values[Anum_pg_dist_cleanup_policy_type - 1] = Int32GetDatum(policy);
/* open cleanup relation and insert new tuple */ /* open cleanup relation and insert new tuple */
Oid relationId = DistCleanupRelationId(); Oid relationId = DistCleanupRelationId();
@ -507,9 +511,9 @@ InsertCleanupRecordInSubtransaction(CleanupObject objectType,
policy); policy);
SendCommandListToWorkerOutsideTransaction(LocalHostName, SendCommandListToWorkerOutsideTransaction(LocalHostName,
PostPortNumber, PostPortNumber,
CitusExtensionOwnerName(), CitusExtensionOwnerName(),
list_make1(command->data)); list_make1(command->data));
} }
@ -525,9 +529,9 @@ DeleteCleanupRecordByRecordIdOutsideTransaction(uint64 recordId)
recordId); recordId);
SendCommandListToWorkerOutsideTransaction(LocalHostName, SendCommandListToWorkerOutsideTransaction(LocalHostName,
PostPortNumber, PostPortNumber,
CitusExtensionOwnerName(), CitusExtensionOwnerName(),
list_make1(command->data)); list_make1(command->data));
} }
@ -621,8 +625,8 @@ GetNextOperationId()
/* token location, or -1 if unknown */ /* token location, or -1 if unknown */
const int location = -1; const int location = -1;
RangeVar *sequenceName = makeRangeVar(PG_CATALOG, RangeVar *sequenceName = makeRangeVar(PG_CATALOG,
OPERATIONID_SEQUENCE_NAME, OPERATIONID_SEQUENCE_NAME,
location); location);
bool missingOK = false; bool missingOK = false;
Oid sequenceId = RangeVarGetRelid(sequenceName, NoLock, missingOK); Oid sequenceId = RangeVarGetRelid(sequenceName, NoLock, missingOK);
@ -680,7 +684,8 @@ ListCleanupRecordsForCurrentOperation(void)
int scanKeyCount = 1; int scanKeyCount = 1;
Oid scanIndexId = InvalidOid; Oid scanIndexId = InvalidOid;
bool useIndex = false; bool useIndex = false;
SysScanDesc scanDescriptor = systable_beginscan(pgDistCleanup, scanIndexId, useIndex, NULL, SysScanDesc scanDescriptor = systable_beginscan(pgDistCleanup, scanIndexId, useIndex,
NULL,
scanKeyCount, scanKey); scanKeyCount, scanKey);
HeapTuple heapTuple = NULL; HeapTuple heapTuple = NULL;
@ -697,6 +702,7 @@ ListCleanupRecordsForCurrentOperation(void)
return recordList; return recordList;
} }
/* /*
* TupleToCleanupRecord converts a pg_dist_cleanup record tuple into a CleanupRecord struct. * TupleToCleanupRecord converts a pg_dist_cleanup record tuple into a CleanupRecord struct.
*/ */
@ -736,7 +742,7 @@ static bool
CleanupRecordExists(uint64 recordId) CleanupRecordExists(uint64 recordId)
{ {
Relation pgDistCleanup = table_open(DistCleanupRelationId(), Relation pgDistCleanup = table_open(DistCleanupRelationId(),
AccessShareLock); AccessShareLock);
const int scanKeyCount = 1; const int scanKeyCount = 1;
ScanKeyData scanKey[1]; ScanKeyData scanKey[1];
@ -769,7 +775,7 @@ static void
DeleteCleanupRecordByRecordId(uint64 recordId) DeleteCleanupRecordByRecordId(uint64 recordId)
{ {
Relation pgDistCleanup = table_open(DistCleanupRelationId(), Relation pgDistCleanup = table_open(DistCleanupRelationId(),
RowExclusiveLock); RowExclusiveLock);
const int scanKeyCount = 1; const int scanKeyCount = 1;
ScanKeyData scanKey[1]; ScanKeyData scanKey[1];
@ -808,6 +814,7 @@ static uint64
GetNextCleanupRecordId(void) GetNextCleanupRecordId(void)
{ {
uint64 recordId = INVALID_CLEANUP_RECORD_ID; uint64 recordId = INVALID_CLEANUP_RECORD_ID;
/* /*
* In regression tests, we would like to generate record IDs consistently * In regression tests, we would like to generate record IDs consistently
* even if the tests run in parallel. Instead of the sequence, we can use * even if the tests run in parallel. Instead of the sequence, we can use
@ -844,6 +851,7 @@ LockOperationId(OperationId operationId)
(void) LockAcquire(&tag, ExclusiveLock, sessionLock, dontWait); (void) LockAcquire(&tag, ExclusiveLock, sessionLock, dontWait);
} }
static bool static bool
TryLockOperationId(OperationId operationId) TryLockOperationId(OperationId operationId)
{ {

View File

@ -75,11 +75,10 @@ static void ErrorIfCannotSplitShardExtended(SplitOperation splitOperation,
ShardInterval *shardIntervalToSplit, ShardInterval *shardIntervalToSplit,
List *shardSplitPointsList, List *shardSplitPointsList,
List *nodeIdsForPlacementList); List *nodeIdsForPlacementList);
static void CreateAndCopySplitShardsForShardGroup( static void CreateAndCopySplitShardsForShardGroup(WorkerNode *sourceShardNode,
WorkerNode *sourceShardNode, List *sourceColocatedShardIntervalList,
List *sourceColocatedShardIntervalList, List *shardGroupSplitIntervalListList,
List *shardGroupSplitIntervalListList, List *workersForPlacementList);
List *workersForPlacementList);
static void CreateSplitShardsForShardGroup(List *shardGroupSplitIntervalListList, static void CreateSplitShardsForShardGroup(List *shardGroupSplitIntervalListList,
List *workersForPlacementList); List *workersForPlacementList);
static void CreateDummyShardsForShardGroup(HTAB *mapOfPlacementToDummyShardList, static void CreateDummyShardsForShardGroup(HTAB *mapOfPlacementToDummyShardList,
@ -133,7 +132,8 @@ static List * ExecuteSplitShardReplicationSetupUDF(WorkerNode *sourceWorkerNode,
List *sourceColocatedShardIntervalList, List *sourceColocatedShardIntervalList,
List *shardGroupSplitIntervalListList, List *shardGroupSplitIntervalListList,
List *destinationWorkerNodesList); List *destinationWorkerNodesList);
static void AddDummyShardEntryInMap(HTAB *mapOfPlacementToDummyShardList, uint32 targetNodeId, static void AddDummyShardEntryInMap(HTAB *mapOfPlacementToDummyShardList, uint32
targetNodeId,
ShardInterval *shardInterval); ShardInterval *shardInterval);
static uint64 GetNextShardIdForSplitChild(void); static uint64 GetNextShardIdForSplitChild(void);
@ -581,7 +581,8 @@ CreateSplitShardsForShardGroup(List *shardGroupSplitIntervalListList,
/* Log resource for cleanup in case of failure only. */ /* Log resource for cleanup in case of failure only. */
CleanupPolicy policy = CLEANUP_ON_FAILURE; CleanupPolicy policy = CLEANUP_ON_FAILURE;
InsertCleanupRecordInSubtransaction(CLEANUP_SHARD_PLACEMENT, InsertCleanupRecordInSubtransaction(CLEANUP_SHARD_PLACEMENT,
ConstructQualifiedShardName(shardInterval), ConstructQualifiedShardName(
shardInterval),
workerPlacementNode->groupId, workerPlacementNode->groupId,
policy); policy);
@ -1132,7 +1133,6 @@ DropShardList(List *shardIntervalList)
if (DeferShardDeleteOnSplit) if (DeferShardDeleteOnSplit)
{ {
/* Log shard in pg_dist_cleanup. /* Log shard in pg_dist_cleanup.
* Parent shards are to be dropped only on sucess after split workflow is complete, * Parent shards are to be dropped only on sucess after split workflow is complete,
* so mark the policy as 'CLEANUP_DEFERRED_ON_SUCCESS'. * so mark the policy as 'CLEANUP_DEFERRED_ON_SUCCESS'.
@ -1151,12 +1151,12 @@ DropShardList(List *shardIntervalList)
if (storageType == SHARD_STORAGE_TABLE) if (storageType == SHARD_STORAGE_TABLE)
{ {
appendStringInfo(dropQuery, DROP_REGULAR_TABLE_COMMAND, appendStringInfo(dropQuery, DROP_REGULAR_TABLE_COMMAND,
qualifiedShardName); qualifiedShardName);
} }
else if (storageType == SHARD_STORAGE_FOREIGN) else if (storageType == SHARD_STORAGE_FOREIGN)
{ {
appendStringInfo(dropQuery, DROP_FOREIGN_TABLE_COMMAND, appendStringInfo(dropQuery, DROP_FOREIGN_TABLE_COMMAND,
qualifiedShardName); qualifiedShardName);
} }
/* drop old shard */ /* drop old shard */
@ -1232,7 +1232,7 @@ NonBlockingShardSplit(SplitOperation splitOperation,
* information. * information.
*/ */
HTAB *mapOfPlacementToDummyShardList = CreateSimpleHash(NodeAndOwner, HTAB *mapOfPlacementToDummyShardList = CreateSimpleHash(NodeAndOwner,
GroupedShardSplitInfos); GroupedShardSplitInfos);
CreateDummyShardsForShardGroup( CreateDummyShardsForShardGroup(
mapOfPlacementToDummyShardList, mapOfPlacementToDummyShardList,
sourceColocatedShardIntervalList, sourceColocatedShardIntervalList,
@ -1465,7 +1465,8 @@ CreateDummyShardsForShardGroup(HTAB *mapOfPlacementToDummyShardList,
*/ */
CleanupPolicy policy = CLEANUP_ALWAYS; CleanupPolicy policy = CLEANUP_ALWAYS;
InsertCleanupRecordInSubtransaction(CLEANUP_SHARD_PLACEMENT, InsertCleanupRecordInSubtransaction(CLEANUP_SHARD_PLACEMENT,
ConstructQualifiedShardName(shardInterval), ConstructQualifiedShardName(
shardInterval),
workerPlacementNode->groupId, workerPlacementNode->groupId,
policy); policy);
@ -1509,7 +1510,8 @@ CreateDummyShardsForShardGroup(HTAB *mapOfPlacementToDummyShardList,
*/ */
CleanupPolicy policy = CLEANUP_ALWAYS; CleanupPolicy policy = CLEANUP_ALWAYS;
InsertCleanupRecordInSubtransaction(CLEANUP_SHARD_PLACEMENT, InsertCleanupRecordInSubtransaction(CLEANUP_SHARD_PLACEMENT,
ConstructQualifiedShardName(shardInterval), ConstructQualifiedShardName(
shardInterval),
workerPlacementNode->groupId, workerPlacementNode->groupId,
policy); policy);

View File

@ -31,4 +31,3 @@
#define CLEANUPRECORDID_SEQUENCE_NAME "pg_dist_cleanup_recordid_seq" #define CLEANUPRECORDID_SEQUENCE_NAME "pg_dist_cleanup_recordid_seq"
#endif /* PG_DIST_CLEANUP_H */ #endif /* PG_DIST_CLEANUP_H */

View File

@ -50,13 +50,13 @@ typedef enum CleanupPolicy
/* /*
* Resources that are cleanup only on failure. * Resources that are cleanup only on failure.
* (Example: Split Children for Blocking/Non-Blocking splits) * (Example: Split Children for Blocking/Non-Blocking splits)
*/ */
CLEANUP_ON_FAILURE = 1, CLEANUP_ON_FAILURE = 1,
/* /*
* Resources that need 'deferred' clean up only on success . * Resources that need 'deferred' clean up only on success .
* (Example: Parent child being split for Blocking/Non-Blocking splits) * (Example: Parent child being split for Blocking/Non-Blocking splits)
*/ */
CLEANUP_DEFERRED_ON_SUCCESS = 2, CLEANUP_DEFERRED_ON_SUCCESS = 2,
} CleanupPolicy; } CleanupPolicy;