diff --git a/src/backend/distributed/metadata/metadata_cache.c b/src/backend/distributed/metadata/metadata_cache.c index 7779c5e07..be2685c53 100644 --- a/src/backend/distributed/metadata/metadata_cache.c +++ b/src/backend/distributed/metadata/metadata_cache.c @@ -147,6 +147,8 @@ typedef struct MetadataCacheData Oid distLocalGroupRelationId; Oid distObjectRelationId; Oid distObjectPrimaryKeyIndexId; + Oid distCleanupRelationId; + Oid distCleanupPrimaryKeyIndexId; Oid distColocationRelationId; Oid distColocationConfigurationIndexId; Oid distPartitionRelationId; @@ -2591,6 +2593,28 @@ DistObjectPrimaryKeyIndexId(void) } +/* return oid of pg_dist_cleanup relation */ +Oid +DistCleanupRelationId(void) +{ + CachedRelationLookup("pg_dist_cleanup", + &MetadataCache.distCleanupRelationId); + + return MetadataCache.distCleanupRelationId; +} + + +/* return oid of pg_dist_cleanup primary key index */ +Oid +DistCleanupPrimaryKeyIndexId(void) +{ + CachedRelationLookup("pg_dist_cleanup_pkey", + &MetadataCache.distCleanupPrimaryKeyIndexId); + + return MetadataCache.distCleanupPrimaryKeyIndexId; +} + + /* return oid of pg_dist_colocation relation */ Oid DistColocationRelationId(void) diff --git a/src/backend/distributed/operations/shard_cleaner.c b/src/backend/distributed/operations/shard_cleaner.c index 0c9e7903c..9775099eb 100644 --- a/src/backend/distributed/operations/shard_cleaner.c +++ b/src/backend/distributed/operations/shard_cleaner.c @@ -11,10 +11,19 @@ */ #include "postgres.h" - +#include "miscadmin.h" +#include "access/genam.h" #include "access/xact.h" +#include "catalog/namespace.h" +#include "commands/dbcommands.h" +#include "commands/sequence.h" #include "postmaster/postmaster.h" +#include "nodes/makefuncs.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "distributed/citus_safe_lib.h" +#include "distributed/listutils.h" #include "distributed/coordinator_protocol.h" #include "distributed/metadata_cache.h" #include "distributed/shard_cleaner.h" @@ -22,15 +31,67 @@ #include "distributed/remote_commands.h" #include "distributed/resource_lock.h" #include "distributed/worker_transaction.h" +#include "distributed/pg_dist_cleanup.h" +/* GUC configuration for shard cleaner */ +int NextOperationId = 0; +int NextCleanupRecordId = 0; + +/* Data structure for cleanup operation */ + +/* + * CleanupRecord represents a record from pg_dist_cleanup. + */ +typedef struct CleanupRecord +{ + /* unique identifier of the record */ + uint64 recordId; + + /* identifier of the operation that generated the record */ + OperationId operationId; + + /* type of the object (e.g. shard) */ + CleanupObject objectType; + + /* fully qualified name of the object */ + char *objectName; + + /* node group ID on which the object is located */ + int nodeGroupId; + + /* cleanup policy that determines when object is cleaned */ + CleanupPolicy policy; +} CleanupRecord; + +/* operation ID set by RegisterOperationNeedingCleanup */ +OperationId CurrentOperationId = INVALID_OPERATION_ID; /* declarations for dynamic loading */ PG_FUNCTION_INFO_V1(citus_cleanup_orphaned_shards); PG_FUNCTION_INFO_V1(isolation_cleanup_orphaned_shards); +PG_FUNCTION_INFO_V1(citus_cleanup_orphaned_resources); -static bool TryDropShard(GroupShardPlacement *placement); +static int DropOrphanedShardsForMove(bool waitForLocks); +static bool TryDropShardOutsideTransaction(OperationId operationId, + char *qualifiedTableName, + char *nodeName, + int nodePort); static bool TryLockRelationAndPlacementCleanup(Oid relationId, LOCKMODE lockmode); +/* Functions for cleanup infrastructure */ +static CleanupRecord * TupleToCleanupRecord(HeapTuple heapTuple, + TupleDesc + tupleDescriptor); +static OperationId GetNextOperationId(void); +static uint64 GetNextCleanupRecordId(void); +static void LockOperationId(OperationId operationId); +static bool TryLockOperationId(OperationId operationId); +static void DeleteCleanupRecordByRecordId(uint64 recordId); +static void DeleteCleanupRecordByRecordIdOutsideTransaction(uint64 recordId); +static bool CleanupRecordExists(uint64 recordId); +static List * ListCleanupRecords(void); +static List * ListCleanupRecordsForCurrentOperation(void); +static int DropOrphanedShardsForCleanup(void); /* * citus_cleanup_orphaned_shards implements a user-facing UDF to delete @@ -56,7 +117,7 @@ citus_cleanup_orphaned_shards(PG_FUNCTION_ARGS) PreventInTransactionBlock(true, "citus_cleanup_orphaned_shards"); bool waitForLocks = true; - int droppedShardCount = DropOrphanedShards(waitForLocks); + int droppedShardCount = DropOrphanedShardsForMove(waitForLocks); if (droppedShardCount > 0) { ereport(NOTICE, (errmsg("cleaned up %d orphaned shards", droppedShardCount))); @@ -78,7 +139,7 @@ isolation_cleanup_orphaned_shards(PG_FUNCTION_ARGS) EnsureCoordinator(); bool waitForLocks = true; - int droppedShardCount = DropOrphanedShards(waitForLocks); + int droppedShardCount = DropOrphanedShardsForMove(waitForLocks); if (droppedShardCount > 0) { ereport(NOTICE, (errmsg("cleaned up %d orphaned shards", droppedShardCount))); @@ -88,10 +149,40 @@ isolation_cleanup_orphaned_shards(PG_FUNCTION_ARGS) } +/* + * citus_cleanup_orphaned_resources implements a user-facing UDF to delete + * orphaned resources that are present in the system. These resources are + * orphaned by previous actions that either failed or marked the resources + * for deferred cleanup. + * The UDF only supports dropping shards at the moment but will be extended in + * near future to clean any type of resource. + * + * The function takes no arguments and runs on co-ordinator. It cannot be run in a + * transaction, because holding the locks it takes for a long time is not good. + * While the locks are held, it is impossible for the background daemon to + * perform concurrent cleanup. + */ +Datum +citus_cleanup_orphaned_resources(PG_FUNCTION_ARGS) +{ + CheckCitusVersion(ERROR); + EnsureCoordinator(); + PreventInTransactionBlock(true, "citus_cleanup_orphaned_resources"); + + int droppedCount = DropOrphanedShardsForCleanup(); + if (droppedCount > 0) + { + ereport(NOTICE, (errmsg("cleaned up %d orphaned resources", droppedCount))); + } + + PG_RETURN_VOID(); +} + + /* * DropOrphanedShardsInSeparateTransaction cleans up orphaned shards by * connecting to localhost. This is done, so that the locks that - * DropOrphanedShards takes are only held for a short time. + * DropOrphanedShardsForMove takes are only held for a short time. */ void DropOrphanedShardsInSeparateTransaction(void) @@ -101,8 +192,9 @@ DropOrphanedShardsInSeparateTransaction(void) /* - * TryDropOrphanedShards is a wrapper around DropOrphanedShards that catches - * any errors to make it safe to use in the maintenance daemon. + * TryDropOrphanedShards is a wrapper around DropOrphanedShardsForMove and + * DropOrphanedShardsForCleanup that catches any errors to make it safe to + * use in the maintenance daemon. * * If dropping any of the shards failed this function returns -1, otherwise it * returns the number of dropped shards. @@ -112,9 +204,24 @@ TryDropOrphanedShards(bool waitForLocks) { int droppedShardCount = 0; MemoryContext savedContext = CurrentMemoryContext; + + /* + * Start a subtransaction so we can rollback database's state to it in case + * of error. + */ + BeginInternalSubTransaction(NULL); + PG_TRY(); { - droppedShardCount = DropOrphanedShards(waitForLocks); + droppedShardCount = DropOrphanedShardsForMove(waitForLocks); + droppedShardCount += DropOrphanedShardsForCleanup(); + + /* + * Releasing a subtransaction doesn't free its memory context, since the + * data it contains will be needed at upper commit. See the comments for + * AtSubCommit_Memory() at postgres/src/backend/access/transam/xact.c. + */ + ReleaseCurrentSubTransaction(); } PG_CATCH(); { @@ -122,6 +229,8 @@ TryDropOrphanedShards(bool waitForLocks) ErrorData *edata = CopyErrorData(); FlushErrorState(); + RollbackAndReleaseCurrentSubTransaction(); + /* rethrow as WARNING */ edata->elevel = WARNING; ThrowErrorData(edata); @@ -133,7 +242,90 @@ TryDropOrphanedShards(bool waitForLocks) /* - * DropOrphanedShards removes shards that were marked SHARD_STATE_TO_DELETE before. + * DropOrphanedShardsForCleanup removes resources that were marked for cleanup by operation. + * It does so by trying to take an exclusive lock on the resources. If the lock cannot be + * obtained it skips the resource and continues with others. + * The resource that has been skipped will be removed at a later iteration when there are no + * locks held anymore. + */ +static int +DropOrphanedShardsForCleanup() +{ + /* Only runs on Coordinator */ + if (!IsCoordinator()) + { + return 0; + } + + List *cleanupRecordList = ListCleanupRecords(); + + int removedShardCountForCleanup = 0; + int failedShardCountForCleanup = 0; + CleanupRecord *record = NULL; + + foreach_ptr(record, cleanupRecordList) + { + /* We only support one resource type at the moment */ + if (record->objectType != CLEANUP_OBJECT_SHARD_PLACEMENT) + { + ereport(WARNING, (errmsg("Invalid object type %d for cleanup record ", + record->objectType))); + continue; + } + + if (!PrimaryNodeForGroup(record->nodeGroupId, NULL)) + { + continue; + } + + /* Advisory locks are reentrant */ + if (!TryLockOperationId(record->operationId)) + { + /* operation that the cleanup record is part of is still running */ + continue; + } + + char *qualifiedTableName = record->objectName; + WorkerNode *workerNode = LookupNodeForGroup(record->nodeGroupId); + + /* + * Now that we have the lock, check if record exists. + * The operation could have completed successfully just after we called + * ListCleanupRecords in which case the record will be now gone. + */ + if (!CleanupRecordExists(record->recordId)) + { + continue; + } + + if (TryDropShardOutsideTransaction(record->operationId, + qualifiedTableName, + workerNode->workerName, + workerNode->workerPort)) + { + /* delete the cleanup record */ + DeleteCleanupRecordByRecordId(record->recordId); + removedShardCountForCleanup++; + } + else + { + failedShardCountForCleanup++; + } + } + + if (failedShardCountForCleanup > 0) + { + ereport(WARNING, (errmsg("Failed to cleanup %d shards out of %d", + failedShardCountForCleanup, list_length( + cleanupRecordList)))); + } + + return removedShardCountForCleanup; +} + + +/* + * DropOrphanedShardsForMove removes shards that were marked SHARD_STATE_TO_DELETE before. * * It does so by trying to take an exclusive lock on the shard and its * colocated placements before removing. If the lock cannot be obtained it @@ -151,11 +343,10 @@ TryDropOrphanedShards(bool waitForLocks) * wait for this lock or not. * */ -int -DropOrphanedShards(bool waitForLocks) +static int +DropOrphanedShardsForMove(bool waitForLocks) { int removedShardCount = 0; - ListCell *shardPlacementCell = NULL; /* * We should try to take the highest lock that we take @@ -185,19 +376,28 @@ DropOrphanedShards(bool waitForLocks) int failedShardDropCount = 0; List *shardPlacementList = AllShardPlacementsWithShardPlacementState( SHARD_STATE_TO_DELETE); - foreach(shardPlacementCell, shardPlacementList) - { - GroupShardPlacement *placement = (GroupShardPlacement *) lfirst( - shardPlacementCell); + GroupShardPlacement *placement = NULL; + foreach_ptr(placement, shardPlacementList) + { if (!PrimaryNodeForGroup(placement->groupId, NULL) || !ShardExists(placement->shardId)) { continue; } - if (TryDropShard(placement)) + ShardPlacement *shardPlacement = LoadShardPlacement(placement->shardId, + placement->placementId); + ShardInterval *shardInterval = LoadShardInterval(placement->shardId); + char *qualifiedTableName = ConstructQualifiedShardName(shardInterval); + + if (TryDropShardOutsideTransaction(INVALID_OPERATION_ID, + qualifiedTableName, + shardPlacement->nodeName, + shardPlacement->nodePort)) { + /* delete the actual placement */ + DeleteShardPlacementRow(placement->placementId); removedShardCount++; } else @@ -216,6 +416,288 @@ DropOrphanedShards(bool waitForLocks) } +/* + * RegisterOperationNeedingCleanup is be called by an operation to register + * for cleanup. + */ +OperationId +RegisterOperationNeedingCleanup(void) +{ + CurrentOperationId = GetNextOperationId(); + + LockOperationId(CurrentOperationId); + + return CurrentOperationId; +} + + +/* + * FinalizeOperationNeedingCleanupOnFailure is be called by an operation to signal + * completion with failure. This will trigger cleanup of appropriate resources. + */ +void +FinalizeOperationNeedingCleanupOnFailure() +{ + /* We must have a valid OperationId. Any operation requring cleanup + * will call RegisterOperationNeedingCleanup. + */ + Assert(CurrentOperationId != INVALID_OPERATION_ID); + + List *currentOperationRecordList = ListCleanupRecordsForCurrentOperation(); + + int removedShardCountOnComplete = 0; + int failedShardCountOnComplete = 0; + + CleanupRecord *record = NULL; + foreach_ptr(record, currentOperationRecordList) + { + /* We only supporting cleaning shards right now */ + if (record->objectType != CLEANUP_OBJECT_SHARD_PLACEMENT) + { + ereport(WARNING, (errmsg("Invalid object type %d for cleanup record ", + record->objectType))); + continue; + } + + if (record->policy == CLEANUP_ALWAYS || record->policy == CLEANUP_ON_FAILURE) + { + char *qualifiedTableName = record->objectName; + WorkerNode *workerNode = LookupNodeForGroup(record->nodeGroupId); + + /* + * For all resources of CurrentOperationId that are marked as 'CLEANUP_ALWAYS' or + * 'CLEANUP_ON_FAILURE', drop resource and cleanup records. + */ + if (TryDropShardOutsideTransaction(CurrentOperationId, + qualifiedTableName, + workerNode->workerName, + workerNode->workerPort)) + { + /* + * Given the operation is failing and we will abort its transaction, we cannot delete + * records in the current transaction. Delete these records outside of the + * current transaction via a localhost connection. + */ + DeleteCleanupRecordByRecordIdOutsideTransaction(record->recordId); + removedShardCountOnComplete++; + } + else + { + failedShardCountOnComplete++; + } + } + } + + if (list_length(currentOperationRecordList) > 0) + { + ereport(LOG, (errmsg("Removed %d orphaned shards out of %d", + removedShardCountOnComplete, list_length( + currentOperationRecordList)))); + + if (failedShardCountOnComplete > 0) + { + ereport(WARNING, (errmsg("Failed to cleanup %d shards out of %d", + failedShardCountOnComplete, list_length( + currentOperationRecordList)))); + } + } +} + + +/* + * FinalizeOperationNeedingCleanupOnSuccess is be called by an operation to signal + * completion with success. This will trigger cleanup of appropriate resources. + */ +void +FinalizeOperationNeedingCleanupOnSuccess() +{ + /* We must have a valid OperationId. Any operation requring cleanup + * will call RegisterOperationNeedingCleanup. + */ + Assert(CurrentOperationId != INVALID_OPERATION_ID); + + List *currentOperationRecordList = ListCleanupRecordsForCurrentOperation(); + + int removedShardCountOnComplete = 0; + int failedShardCountOnComplete = 0; + + CleanupRecord *record = NULL; + foreach_ptr(record, currentOperationRecordList) + { + /* We only supporting cleaning shards right now */ + if (record->objectType != CLEANUP_OBJECT_SHARD_PLACEMENT) + { + ereport(WARNING, (errmsg("Invalid object type %d for cleanup record ", + record->objectType))); + continue; + } + + if (record->policy == CLEANUP_ALWAYS) + { + char *qualifiedTableName = record->objectName; + WorkerNode *workerNode = LookupNodeForGroup(record->nodeGroupId); + + /* + * For all resources of CurrentOperationId that are marked as 'CLEANUP_ALWAYS' + * drop resource and cleanup records. + */ + if (TryDropShardOutsideTransaction(CurrentOperationId, + qualifiedTableName, + workerNode->workerName, + workerNode->workerPort)) + { + /* + * Delete cleanup records outside transaction as: + * The resources are marked as 'CLEANUP_ALWAYS' and should be cleaned no matter + * the operation succeeded or failed. + */ + DeleteCleanupRecordByRecordIdOutsideTransaction(record->recordId); + removedShardCountOnComplete++; + } + else + { + failedShardCountOnComplete++; + } + } + else if (record->policy == CLEANUP_ON_FAILURE) + { + /* Delete cleanup records (and not the actual resource) in same transaction as: + * The resources are marked as 'CLEANUP_ON_FAILURE' and we are approaching a successful + * completion of the operation. However, we cannot guarentee that operation will succeed + * so we tie the Delete with parent transaction. + */ + DeleteCleanupRecordByRecordId(record->recordId); + } + } + + if (list_length(currentOperationRecordList) > 0) + { + ereport(LOG, (errmsg("Removed %d orphaned shards out of %d", + removedShardCountOnComplete, list_length( + currentOperationRecordList)))); + + if (failedShardCountOnComplete > 0) + { + ereport(WARNING, (errmsg("Failed to cleanup %d shards out of %d", + failedShardCountOnComplete, list_length( + currentOperationRecordList)))); + } + } +} + + +/* + * InsertCleanupRecordInCurrentTransaction inserts a new pg_dist_cleanup_record entry + * as part of the current transaction. This is primarily useful for deferred drop scenarios, + * since these records would roll back in case of operation failure. + */ +void +InsertCleanupRecordInCurrentTransaction(CleanupObject objectType, + char *objectName, + int nodeGroupId, + CleanupPolicy policy) +{ + /* We must have a valid OperationId. Any operation requring cleanup + * will call RegisterOperationNeedingCleanup. + */ + Assert(CurrentOperationId != INVALID_OPERATION_ID); + + Datum values[Natts_pg_dist_cleanup]; + bool isNulls[Natts_pg_dist_cleanup]; + + /* form new shard tuple */ + memset(values, 0, sizeof(values)); + memset(isNulls, false, sizeof(isNulls)); + + uint64 recordId = GetNextCleanupRecordId(); + OperationId operationId = CurrentOperationId; + + values[Anum_pg_dist_cleanup_record_id - 1] = UInt64GetDatum(recordId); + values[Anum_pg_dist_cleanup_operation_id - 1] = UInt64GetDatum(operationId); + values[Anum_pg_dist_cleanup_object_type - 1] = Int32GetDatum(objectType); + values[Anum_pg_dist_cleanup_object_name - 1] = CStringGetTextDatum(objectName); + values[Anum_pg_dist_cleanup_node_group_id - 1] = Int32GetDatum(nodeGroupId); + values[Anum_pg_dist_cleanup_policy_type - 1] = Int32GetDatum(policy); + + /* open cleanup relation and insert new tuple */ + Oid relationId = DistCleanupRelationId(); + Relation pgDistCleanup = table_open(relationId, RowExclusiveLock); + + TupleDesc tupleDescriptor = RelationGetDescr(pgDistCleanup); + HeapTuple heapTuple = heap_form_tuple(tupleDescriptor, values, isNulls); + + CatalogTupleInsert(pgDistCleanup, heapTuple); + + CommandCounterIncrement(); + table_close(pgDistCleanup, NoLock); +} + + +/* + * InsertCleanupRecordInSeparateTransaction inserts a new pg_dist_cleanup_record entry + * in a separate transaction to ensure the record persists after rollback. We should + * delete these records if the operation completes successfully. + * + * For failure scenarios, use a subtransaction (direct insert via localhost). + */ +void +InsertCleanupRecordInSubtransaction(CleanupObject objectType, + char *objectName, + int nodeGroupId, + CleanupPolicy policy) +{ + /* We must have a valid OperationId. Any operation requring cleanup + * will call RegisterOperationNeedingCleanup. + */ + Assert(CurrentOperationId != INVALID_OPERATION_ID); + + StringInfo sequenceName = makeStringInfo(); + appendStringInfo(sequenceName, "%s.%s", + PG_CATALOG, + CLEANUPRECORDID_SEQUENCE_NAME); + + StringInfo command = makeStringInfo(); + appendStringInfo(command, + "INSERT INTO %s.%s " + " (record_id, operation_id, object_type, object_name, node_group_id, policy_type) " + " VALUES ( nextval('%s'), " UINT64_FORMAT ", %d, %s, %d, %d)", + PG_CATALOG, + PG_DIST_CLEANUP, + sequenceName->data, + CurrentOperationId, + objectType, + quote_literal_cstr(objectName), + nodeGroupId, + policy); + + SendCommandListToWorkerOutsideTransaction(LocalHostName, + PostPortNumber, + CitusExtensionOwnerName(), + list_make1(command->data)); +} + + +/* + * DeleteCleanupRecordByRecordId deletes a cleanup record by record id. + */ +static void +DeleteCleanupRecordByRecordIdOutsideTransaction(uint64 recordId) +{ + StringInfo command = makeStringInfo(); + appendStringInfo(command, + "DELETE FROM %s.%s " + "WHERE record_id = %lu", + PG_CATALOG, + PG_DIST_CLEANUP, + recordId); + + SendCommandListToWorkerOutsideTransaction(LocalHostName, + PostPortNumber, + CitusExtensionOwnerName(), + list_make1(command->data)); +} + + /* * TryLockRelationAndPlacementCleanup tries to lock the given relation * and the placement cleanup. If it cannot, it returns false. @@ -245,20 +727,20 @@ TryLockRelationAndPlacementCleanup(Oid relationId, LOCKMODE lockmode) * true on success. */ static bool -TryDropShard(GroupShardPlacement *placement) +TryDropShardOutsideTransaction(OperationId operationId, char *qualifiedTableName, + char *nodeName, int nodePort) { - ShardPlacement *shardPlacement = LoadShardPlacement(placement->shardId, - placement->placementId); - ShardInterval *shardInterval = LoadShardInterval(shardPlacement->shardId); + char *operation = (operationId == INVALID_OPERATION_ID) ? "move" : "cleanup"; - ereport(LOG, (errmsg("dropping shard placement " INT64_FORMAT " of shard " - INT64_FORMAT " on %s:%d after it was moved away", - shardPlacement->placementId, shardPlacement->shardId, - shardPlacement->nodeName, shardPlacement->nodePort))); + ereport(LOG, (errmsg("cleaning up %s on %s:%d which was left " + "after a %s", + qualifiedTableName, + nodeName, + nodePort, + operation))); /* prepare sql query to execute to drop the shard */ StringInfo dropQuery = makeStringInfo(); - char *qualifiedTableName = ConstructQualifiedShardName(shardInterval); appendStringInfo(dropQuery, DROP_REGULAR_TABLE_COMMAND, qualifiedTableName); /* @@ -274,15 +756,316 @@ TryDropShard(GroupShardPlacement *placement) dropQuery->data); /* remove the shard from the node */ - bool success = - SendOptionalCommandListToWorkerOutsideTransaction(shardPlacement->nodeName, - shardPlacement->nodePort, - NULL, dropCommandList); - if (success) - { - /* delete the actual placement */ - DeleteShardPlacementRow(placement->placementId); - } + bool success = SendOptionalCommandListToWorkerOutsideTransaction(nodeName, + nodePort, + NULL, + dropCommandList); return success; } + + +/* + * GetNextOperationId allocates and returns a unique operationId for an operation + * requiring potential cleanup. This allocation occurs both in shared memory and + * in write ahead logs; writing to logs avoids the risk of having operationId collisions. + */ +static OperationId +GetNextOperationId() +{ + OperationId operationdId = INVALID_OPERATION_ID; + + /* + * In regression tests, we would like to generate operation IDs consistently + * even if the tests run in parallel. Instead of the sequence, we can use + * the next_operation_id GUC to specify which operation ID the current session should + * generate next. The GUC is automatically increased by 1 every time a new + * operation ID is generated. + */ + if (NextOperationId > 0) + { + operationdId = NextOperationId; + NextOperationId += 1; + + return operationdId; + } + + /* Generate sequence using a subtransaction. else we can hold replication slot creation for operations */ + StringInfo sequenceName = makeStringInfo(); + appendStringInfo(sequenceName, "%s.%s", + PG_CATALOG, + OPERATIONID_SEQUENCE_NAME); + + StringInfo nextValueCommand = makeStringInfo(); + appendStringInfo(nextValueCommand, "SELECT nextval(%s);", + quote_literal_cstr(sequenceName->data)); + + int connectionFlag = FORCE_NEW_CONNECTION; + MultiConnection *connection = GetNodeUserDatabaseConnection(connectionFlag, + LocalHostName, + PostPortNumber, + CitusExtensionOwnerName(), + get_database_name( + MyDatabaseId)); + + PGresult *result = NULL; + int queryResult = ExecuteOptionalRemoteCommand(connection, nextValueCommand->data, + &result); + if (queryResult != RESPONSE_OKAY || !IsResponseOK(result) || PQntuples(result) != 1 || + PQnfields(result) != 1) + { + ReportResultError(connection, result, ERROR); + } + + operationdId = SafeStringToUint64(PQgetvalue(result, 0, 0 /* nodeId column*/)); + + PQclear(result); + ForgetResults(connection); + CloseConnection(connection); + + return operationdId; +} + + +/* + * ListCleanupRecords lists all the current cleanup records. + */ +static List * +ListCleanupRecords(void) +{ + Relation pgDistCleanup = table_open(DistCleanupRelationId(), AccessShareLock); + TupleDesc tupleDescriptor = RelationGetDescr(pgDistCleanup); + + List *recordList = NIL; + int scanKeyCount = 0; + bool indexOK = false; + + SysScanDesc scanDescriptor = systable_beginscan(pgDistCleanup, InvalidOid, + indexOK, NULL, scanKeyCount, NULL); + + HeapTuple heapTuple = NULL; + while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor))) + { + CleanupRecord *record = TupleToCleanupRecord(heapTuple, tupleDescriptor); + recordList = lappend(recordList, record); + } + + systable_endscan(scanDescriptor); + table_close(pgDistCleanup, NoLock); + + return recordList; +} + + +/* + * ListCleanupRecordsForCurrentOperation lists all the cleanup records for + * current operation. + */ +static List * +ListCleanupRecordsForCurrentOperation(void) +{ + /* We must have a valid OperationId. Any operation requring cleanup + * will call RegisterOperationNeedingCleanup. + */ + Assert(CurrentOperationId != INVALID_OPERATION_ID); + + Relation pgDistCleanup = table_open(DistCleanupRelationId(), AccessShareLock); + TupleDesc tupleDescriptor = RelationGetDescr(pgDistCleanup); + + ScanKeyData scanKey[1]; + ScanKeyInit(&scanKey[0], Anum_pg_dist_cleanup_operation_id, BTEqualStrategyNumber, + F_INT8EQ, UInt64GetDatum(CurrentOperationId)); + + int scanKeyCount = 1; + Oid scanIndexId = InvalidOid; + bool useIndex = false; + SysScanDesc scanDescriptor = systable_beginscan(pgDistCleanup, scanIndexId, useIndex, + NULL, + scanKeyCount, scanKey); + + HeapTuple heapTuple = NULL; + List *recordList = NIL; + while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor))) + { + CleanupRecord *record = TupleToCleanupRecord(heapTuple, tupleDescriptor); + recordList = lappend(recordList, record); + } + + systable_endscan(scanDescriptor); + table_close(pgDistCleanup, NoLock); + + return recordList; +} + + +/* + * TupleToCleanupRecord converts a pg_dist_cleanup record tuple into a CleanupRecord struct. + */ +static CleanupRecord * +TupleToCleanupRecord(HeapTuple heapTuple, TupleDesc tupleDescriptor) +{ + Datum datumArray[Natts_pg_dist_cleanup]; + bool isNullArray[Natts_pg_dist_cleanup]; + heap_deform_tuple(heapTuple, tupleDescriptor, datumArray, isNullArray); + + CleanupRecord *record = palloc0(sizeof(CleanupRecord)); + + record->recordId = + DatumGetUInt64(datumArray[Anum_pg_dist_cleanup_record_id - 1]); + + record->operationId = + DatumGetUInt64(datumArray[Anum_pg_dist_cleanup_operation_id - 1]); + + record->objectType = + DatumGetInt32(datumArray[Anum_pg_dist_cleanup_object_type - 1]); + + record->objectName = + TextDatumGetCString(datumArray[Anum_pg_dist_cleanup_object_name - 1]); + + record->nodeGroupId = + DatumGetInt32(datumArray[Anum_pg_dist_cleanup_node_group_id - 1]); + + record->policy = + DatumGetInt32(datumArray[Anum_pg_dist_cleanup_policy_type - 1]); + + return record; +} + + +/* + * CleanupRecordExists returns whether a cleanup record with the given + * record ID exists in pg_dist_cleanup_record. + */ +static bool +CleanupRecordExists(uint64 recordId) +{ + Relation pgDistCleanup = table_open(DistCleanupRelationId(), + AccessShareLock); + + const int scanKeyCount = 1; + ScanKeyData scanKey[1]; + bool indexOK = true; + + ScanKeyInit(&scanKey[0], Anum_pg_dist_cleanup_record_id, + BTEqualStrategyNumber, F_INT8EQ, UInt64GetDatum(recordId)); + + SysScanDesc scanDescriptor = systable_beginscan(pgDistCleanup, + DistCleanupPrimaryKeyIndexId(), + indexOK, + NULL, scanKeyCount, scanKey); + + HeapTuple heapTuple = systable_getnext(scanDescriptor); + bool recordExists = HeapTupleIsValid(heapTuple); + + systable_endscan(scanDescriptor); + + CommandCounterIncrement(); + table_close(pgDistCleanup, NoLock); + + return recordExists; +} + + +/* + * DeleteCleanupRecordByRecordId deletes a single pg_dist_cleanup_record entry. + */ +static void +DeleteCleanupRecordByRecordId(uint64 recordId) +{ + Relation pgDistCleanup = table_open(DistCleanupRelationId(), + RowExclusiveLock); + + const int scanKeyCount = 1; + ScanKeyData scanKey[1]; + bool indexOK = true; + + ScanKeyInit(&scanKey[0], Anum_pg_dist_cleanup_record_id, + BTEqualStrategyNumber, F_INT8EQ, UInt64GetDatum(recordId)); + + SysScanDesc scanDescriptor = systable_beginscan(pgDistCleanup, + DistCleanupPrimaryKeyIndexId(), + indexOK, + NULL, scanKeyCount, scanKey); + + HeapTuple heapTuple = systable_getnext(scanDescriptor); + if (heapTuple == NULL) + { + ereport(ERROR, (errmsg("could not find cleanup record " UINT64_FORMAT, + recordId))); + } + + simple_heap_delete(pgDistCleanup, &heapTuple->t_self); + + systable_endscan(scanDescriptor); + + CommandCounterIncrement(); + table_close(pgDistCleanup, NoLock); +} + + +/* + * GetNextCleanupRecordId allocates and returns a unique recordid for a cleanup entry. + * This allocation occurs both in shared memory and + * in write ahead logs; writing to logs avoids the risk of having operationId collisions. + */ +static uint64 +GetNextCleanupRecordId(void) +{ + uint64 recordId = INVALID_CLEANUP_RECORD_ID; + + /* + * In regression tests, we would like to generate record IDs consistently + * even if the tests run in parallel. Instead of the sequence, we can use + * the next_record_id GUC to specify which recordid ID the current session should + * generate next. The GUC is automatically increased by 1 every time a new + * record ID is generated. + */ + if (NextCleanupRecordId > 0) + { + recordId = NextCleanupRecordId; + NextCleanupRecordId += 1; + + return recordId; + } + + RangeVar *sequenceName = makeRangeVar(PG_CATALOG, + CLEANUPRECORDID_SEQUENCE_NAME, + -1); + + bool missingOK = false; + Oid sequenceId = RangeVarGetRelid(sequenceName, NoLock, missingOK); + bool checkPermissions = false; + return nextval_internal(sequenceId, checkPermissions); +} + + +/* + * LockOperationId takes an exclusive lock to ensure that only one process + * can cleanup operationId resources at the same time. + */ +static void +LockOperationId(OperationId operationId) +{ + LOCKTAG tag; + const bool sessionLock = false; + const bool dontWait = false; + SET_LOCKTAG_CLEANUP_OPERATION_ID(tag, operationId); + (void) LockAcquire(&tag, ExclusiveLock, sessionLock, dontWait); +} + + +/* + * TryLockOperationId takes an exclusive lock (with dontWait = true) to ensure that + * only one process can cleanup operationId resources at the same time. + */ +static bool +TryLockOperationId(OperationId operationId) +{ + LOCKTAG tag; + const bool sessionLock = false; + const bool dontWait = true; + SET_LOCKTAG_CLEANUP_OPERATION_ID(tag, operationId); + LockAcquireResult lockResult = LockAcquire(&tag, ExclusiveLock, sessionLock, + dontWait); + return (lockResult != LOCKACQUIRE_NOT_AVAIL); +} diff --git a/src/backend/distributed/operations/shard_split.c b/src/backend/distributed/operations/shard_split.c index f7116d565..4656e798f 100644 --- a/src/backend/distributed/operations/shard_split.c +++ b/src/backend/distributed/operations/shard_split.c @@ -34,6 +34,7 @@ #include "distributed/multi_partitioning_utils.h" #include "distributed/worker_manager.h" #include "distributed/worker_transaction.h" +#include "distributed/shard_cleaner.h" #include "distributed/shared_library_init.h" #include "distributed/pg_dist_shard.h" #include "distributed/metadata_sync.h" @@ -45,6 +46,9 @@ #include "distributed/shard_rebalancer.h" #include "postmaster/postmaster.h" +/* declarations for dynamic loading */ +bool DeferShardDeleteOnSplit = true; + /* * Entry for map that tracks ShardInterval -> Placement Node * created by split workflow. @@ -73,12 +77,13 @@ static void ErrorIfCannotSplitShardExtended(SplitOperation splitOperation, ShardInterval *shardIntervalToSplit, List *shardSplitPointsList, List *nodeIdsForPlacementList); +static bool CheckIfRelationWithSameNameExists(ShardInterval *shardInterval, + WorkerNode *workerNode); static void ErrorIfModificationAndSplitInTheSameTransaction(SplitOperation splitOperation); -static void CreateSplitShardsForShardGroup(HTAB *mapOfShardToPlacementCreatedByWorkflow, - List *shardGroupSplitIntervalListList, +static void CreateSplitShardsForShardGroup(List *shardGroupSplitIntervalListList, List *workersForPlacementList); -static void CreateDummyShardsForShardGroup(HTAB *mapOfDummyShardToPlacement, +static void CreateDummyShardsForShardGroup(HTAB *mapOfPlacementToDummyShardList, List *sourceColocatedShardIntervalList, List *shardGroupSplitIntervalListList, WorkerNode *sourceWorkerNode, @@ -87,7 +92,7 @@ static HTAB * CreateWorkerForPlacementSet(List *workersForPlacementList); static void CreateAuxiliaryStructuresForShardGroup(List *shardGroupSplitIntervalListList, List *workersForPlacementList, bool includeReplicaIdentity); -static void CreateReplicaIdentitiesForDummyShards(HTAB *mapOfDummyShardToPlacement); +static void CreateReplicaIdentitiesForDummyShards(HTAB *mapOfPlacementToDummyShardList); static void CreateObjectOnPlacement(List *objectCreationCommandList, WorkerNode *workerNode); static List * CreateSplitIntervalsForShardGroup(List *sourceColocatedShardList, @@ -131,8 +136,6 @@ static void CreatePartitioningHierarchy(List *shardGroupSplitIntervalListList, List *workersForPlacementList); static void CreateForeignKeyConstraints(List *shardGroupSplitIntervalListList, List *workersForPlacementList); -static void TryDropSplitShardsOnFailure(HTAB *mapOfShardToPlacementCreatedByWorkflow); -static HTAB * CreateEmptyMapForShardsCreatedByWorkflow(); static Task * CreateTaskForDDLCommandList(List *ddlCommandList, WorkerNode *workerNode); static StringInfo CreateSplitShardReplicationSetupUDF( List *sourceColocatedShardIntervalList, List *shardGroupSplitIntervalListList, @@ -148,13 +151,15 @@ static List * ExecuteSplitShardReplicationSetupUDF(WorkerNode *sourceWorkerNode, DistributionColumnMap * distributionColumnOverrides); static void ExecuteSplitShardReleaseSharedMemory(WorkerNode *sourceWorkerNode); -static void AddDummyShardEntryInMap(HTAB *mapOfDummyShards, uint32 targetNodeId, +static void AddDummyShardEntryInMap(HTAB *mapOfPlacementToDummyShardList, uint32 + targetNodeId, ShardInterval *shardInterval); -static void DropDummyShards(HTAB *mapOfDummyShardToPlacement); -static void DropDummyShard(MultiConnection *connection, ShardInterval *shardInterval); static uint64 GetNextShardIdForSplitChild(void); static void AcquireNonblockingSplitLock(Oid relationId); static List * GetWorkerNodesFromWorkerIds(List *nodeIdsForPlacementList); +static void DropShardListMetadata(List *shardIntervalList); +static void DropShardList(List *shardIntervalList); +static void InsertDeferredDropCleanupRecordsForShards(List *shardIntervalList); /* Customize error message strings based on operation type */ static const char *const SplitOperationName[] = @@ -517,6 +522,9 @@ SplitShard(SplitMode splitMode, /* use the user-specified shard ID as the split workflow ID */ uint64 splitWorkflowId = shardIntervalToSplit->shardId; + /* Start operation to prepare for generating cleanup records */ + RegisterOperationNeedingCleanup(); + if (splitMode == BLOCKING_SPLIT) { BlockingShardSplit( @@ -540,70 +548,8 @@ SplitShard(SplitMode splitMode, PlacementMovedUsingLogicalReplicationInTX = true; } -} - -/* - * ShardIntervalHashCode computes the hash code for a Shardinterval using - * shardId. - */ -static uint32 -ShardIntervalHashCode(const void *key, Size keySize) -{ - const ShardInterval *shardInterval = (const ShardInterval *) key; - const uint64 *shardId = &(shardInterval->shardId); - - /* standard hash function outlined in Effective Java, Item 8 */ - uint32 result = 17; - result = 37 * result + tag_hash(shardId, sizeof(uint64)); - - return result; -} - - -/* - * ShardIntervalHashCompare compares two shard intervals using shard id. - */ -static int -ShardIntervalHashCompare(const void *lhsKey, const void *rhsKey, Size keySize) -{ - const ShardInterval *intervalLhs = (const ShardInterval *) lhsKey; - const ShardInterval *intervalRhs = (const ShardInterval *) rhsKey; - - int shardIdCompare = 0; - - /* first, compare by shard id */ - if (intervalLhs->shardId < intervalRhs->shardId) - { - shardIdCompare = -1; - } - else if (intervalLhs->shardId > intervalRhs->shardId) - { - shardIdCompare = 1; - } - - return shardIdCompare; -} - - -/* Create an empty map that tracks ShardInterval -> Placement Node as created by workflow */ -static HTAB * -CreateEmptyMapForShardsCreatedByWorkflow() -{ - HASHCTL info = { 0 }; - info.keysize = sizeof(ShardInterval); - info.entrysize = sizeof(ShardCreatedByWorkflowEntry); - info.hash = ShardIntervalHashCode; - info.match = ShardIntervalHashCompare; - info.hcxt = CurrentMemoryContext; - - /* we don't have value field as it's a set */ - info.entrysize = info.keysize; - uint32 hashFlags = (HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT); - - HTAB *splitChildrenCreatedByWorkflow = hash_create("Shard id to Node Placement Map", - 32, &info, hashFlags); - return splitChildrenCreatedByWorkflow; + FinalizeOperationNeedingCleanupOnSuccess(); } @@ -636,13 +582,10 @@ BlockingShardSplit(SplitOperation splitOperation, WorkerNode *sourceShardNode = ActiveShardPlacementWorkerNode(firstShard->shardId); - HTAB *mapOfShardToPlacementCreatedByWorkflow = - CreateEmptyMapForShardsCreatedByWorkflow(); PG_TRY(); { /* Physically create split children. */ - CreateSplitShardsForShardGroup(mapOfShardToPlacementCreatedByWorkflow, - shardGroupSplitIntervalListList, + CreateSplitShardsForShardGroup(shardGroupSplitIntervalListList, workersForPlacementList); /* For Blocking split, copy isn't snapshotted */ @@ -651,6 +594,9 @@ BlockingShardSplit(SplitOperation splitOperation, shardGroupSplitIntervalListList, workersForPlacementList, snapshotName, distributionColumnOverrides); + /* Used for testing */ + ConflictOnlyWithIsolationTesting(); + /* Create auxiliary structures (indexes, stats, replicaindentities, triggers) */ CreateAuxiliaryStructuresForShardGroup(shardGroupSplitIntervalListList, workersForPlacementList, @@ -662,12 +608,24 @@ BlockingShardSplit(SplitOperation splitOperation, * going forward are part of the same distributed transaction. */ + /* - * Drop old shards and delete related metadata. Have to do that before - * creating the new shard metadata, because there's cross-checks - * preventing inconsistent metadata (like overlapping shards). + * Delete old shards metadata and either mark the shards as + * to be deferred drop or physically delete them. + * Have to do that before creating the new shard metadata, + * because there's cross-checks preventing inconsistent metadata + * (like overlapping shards). */ - DropShardList(sourceColocatedShardIntervalList); + if (DeferShardDeleteOnSplit) + { + InsertDeferredDropCleanupRecordsForShards(sourceColocatedShardIntervalList); + } + else + { + DropShardList(sourceColocatedShardIntervalList); + } + + DropShardListMetadata(sourceColocatedShardIntervalList); /* Insert new shard and placement metdata */ InsertSplitChildrenShardMetadata(shardGroupSplitIntervalListList, @@ -691,7 +649,7 @@ BlockingShardSplit(SplitOperation splitOperation, ShutdownAllConnections(); /* Do a best effort cleanup of shards created on workers in the above block */ - TryDropSplitShardsOnFailure(mapOfShardToPlacementCreatedByWorkflow); + FinalizeOperationNeedingCleanupOnFailure(); PG_RE_THROW(); } @@ -702,10 +660,48 @@ BlockingShardSplit(SplitOperation splitOperation, } +/* Check if a relation with given name already exists on the worker node */ +static bool +CheckIfRelationWithSameNameExists(ShardInterval *shardInterval, WorkerNode *workerNode) +{ + char *schemaName = get_namespace_name( + get_rel_namespace(shardInterval->relationId)); + char *shardName = get_rel_name(shardInterval->relationId); + AppendShardIdToName(&shardName, shardInterval->shardId); + + StringInfo checkShardExistsQuery = makeStringInfo(); + appendStringInfo(checkShardExistsQuery, + "SELECT EXISTS (SELECT FROM pg_catalog.pg_tables WHERE schemaname = '%s' AND tablename = '%s');", + schemaName, + shardName); + + int connectionFlags = 0; + MultiConnection *connection = GetNodeUserDatabaseConnection(connectionFlags, + workerNode->workerName, + workerNode->workerPort, + CitusExtensionOwnerName(), + get_database_name( + MyDatabaseId)); + + PGresult *result = NULL; + int queryResult = ExecuteOptionalRemoteCommand(connection, + checkShardExistsQuery->data, &result); + if (queryResult != RESPONSE_OKAY || !IsResponseOK(result) || PQntuples(result) != 1) + { + ReportResultError(connection, result, ERROR); + } + + char *checkExists = PQgetvalue(result, 0, 0); + PQclear(result); + ForgetResults(connection); + + return strcmp(checkExists, "t") == 0; +} + + /* Create ShardGroup split children on a list of corresponding workers. */ static void -CreateSplitShardsForShardGroup(HTAB *mapOfShardToPlacementCreatedByWorkflow, - List *shardGroupSplitIntervalListList, +CreateSplitShardsForShardGroup(List *shardGroupSplitIntervalListList, List *workersForPlacementList) { /* @@ -733,16 +729,33 @@ CreateSplitShardsForShardGroup(HTAB *mapOfShardToPlacementCreatedByWorkflow, splitShardCreationCommandList, shardInterval->shardId); - /* Create new split child shard on the specified placement list */ - CreateObjectOnPlacement(splitShardCreationCommandList, workerPlacementNode); + /* Log resource for cleanup in case of failure only. + * Before we log a record, do a best effort check to see if a shard with same name exists. + * This is because, it will cause shard creation to fail and we will end up cleaning the + * old shard. We don't want that. + */ + bool relationExists = CheckIfRelationWithSameNameExists(shardInterval, + workerPlacementNode); - ShardCreatedByWorkflowEntry entry; - entry.shardIntervalKey = shardInterval; - entry.workerNodeValue = workerPlacementNode; - bool found = false; - hash_search(mapOfShardToPlacementCreatedByWorkflow, &entry, HASH_ENTER, - &found); - Assert(!found); + if (relationExists) + { + ereport(ERROR, (errcode(ERRCODE_DUPLICATE_TABLE), + errmsg("relation %s already exists on worker %s:%d", + ConstructQualifiedShardName(shardInterval), + workerPlacementNode->workerName, + workerPlacementNode->workerPort))); + } + + CleanupPolicy policy = CLEANUP_ON_FAILURE; + InsertCleanupRecordInSubtransaction(CLEANUP_OBJECT_SHARD_PLACEMENT, + ConstructQualifiedShardName( + shardInterval), + workerPlacementNode->groupId, + policy); + + /* Create new split child shard on the specified placement list */ + CreateObjectOnPlacement(splitShardCreationCommandList, + workerPlacementNode); } } } @@ -1294,11 +1307,11 @@ CreateForeignKeyConstraints(List *shardGroupSplitIntervalListList, /* - * DropShardList drops shards and their metadata from both the coordinator and + * DropShardListMetadata drops shard metadata from both the coordinator and * mx nodes. */ -void -DropShardList(List *shardIntervalList) +static void +DropShardListMetadata(List *shardIntervalList) { ListCell *shardIntervalCell = NULL; @@ -1323,7 +1336,35 @@ DropShardList(List *shardIntervalList) } } - /* delete shard placements and drop shards */ + /* delete shard placements */ + List *shardPlacementList = ActiveShardPlacementList(oldShardId); + foreach(shardPlacementCell, shardPlacementList) + { + ShardPlacement *placement = (ShardPlacement *) lfirst(shardPlacementCell); + DeleteShardPlacementRow(placement->placementId); + } + + /* delete shard row */ + DeleteShardRow(oldShardId); + } +} + + +/* + * DropShardList drops actual shards from the worker nodes. + */ +static void +DropShardList(List *shardIntervalList) +{ + ListCell *shardIntervalCell = NULL; + + foreach(shardIntervalCell, shardIntervalList) + { + ShardInterval *shardInterval = (ShardInterval *) lfirst(shardIntervalCell); + ListCell *shardPlacementCell = NULL; + uint64 oldShardId = shardInterval->shardId; + + /* delete shard placements */ List *shardPlacementList = ActiveShardPlacementList(oldShardId); foreach(shardPlacementCell, shardPlacementList) { @@ -1332,8 +1373,6 @@ DropShardList(List *shardIntervalList) uint32 workerPort = placement->nodePort; StringInfo dropQuery = makeStringInfo(); - DeleteShardPlacementRow(placement->placementId); - /* get shard name */ char *qualifiedShardName = ConstructQualifiedShardName(shardInterval); @@ -1352,53 +1391,47 @@ DropShardList(List *shardIntervalList) /* drop old shard */ SendCommandToWorker(workerName, workerPort, dropQuery->data); } - - /* delete shard row */ - DeleteShardRow(oldShardId); } } /* - * In case of failure, TryDropSplitShardsOnFailure drops in-progress shard placements from both the - * coordinator and mx nodes. + * If deferred drop is enabled, insert deferred cleanup records instead of + * dropping actual shards from the worker nodes. The shards will be dropped + * by background cleaner later. */ static void -TryDropSplitShardsOnFailure(HTAB *mapOfShardToPlacementCreatedByWorkflow) +InsertDeferredDropCleanupRecordsForShards(List *shardIntervalList) { - HASH_SEQ_STATUS status; - ShardCreatedByWorkflowEntry *entry; + ListCell *shardIntervalCell = NULL; - hash_seq_init(&status, mapOfShardToPlacementCreatedByWorkflow); - while ((entry = (ShardCreatedByWorkflowEntry *) hash_seq_search(&status)) != 0) + foreach(shardIntervalCell, shardIntervalList) { - ShardInterval *shardInterval = entry->shardIntervalKey; - WorkerNode *workerPlacementNode = entry->workerNodeValue; + ShardInterval *shardInterval = (ShardInterval *) lfirst(shardIntervalCell); + ListCell *shardPlacementCell = NULL; + uint64 oldShardId = shardInterval->shardId; - char *qualifiedShardName = ConstructQualifiedShardName(shardInterval); - StringInfo dropShardQuery = makeStringInfo(); + /* mark for deferred drop */ + List *shardPlacementList = ActiveShardPlacementList(oldShardId); + foreach(shardPlacementCell, shardPlacementList) + { + ShardPlacement *placement = (ShardPlacement *) lfirst(shardPlacementCell); - /* Caller enforces that foreign tables cannot be split (use DROP_REGULAR_TABLE_COMMAND) */ - appendStringInfo(dropShardQuery, DROP_REGULAR_TABLE_COMMAND, - qualifiedShardName); + /* get shard name */ + char *qualifiedShardName = ConstructQualifiedShardName(shardInterval); - int connectionFlags = FOR_DDL; - connectionFlags |= OUTSIDE_TRANSACTION; - MultiConnection *connnection = GetNodeUserDatabaseConnection( - connectionFlags, - workerPlacementNode->workerName, - workerPlacementNode->workerPort, - CurrentUserName(), - NULL /* databaseName */); - - /* - * Perform a drop in best effort manner. - * The shard may or may not exist and the connection could have died. - */ - ExecuteOptionalRemoteCommand( - connnection, - dropShardQuery->data, - NULL /* pgResult */); + /* Log shard in pg_dist_cleanup. + * Parent shards are to be dropped only on sucess after split workflow is complete, + * so mark the policy as 'CLEANUP_DEFERRED_ON_SUCCESS'. + * We also log cleanup record in the current transaction. If the current transaction rolls back, + * we do not generate a record at all. + */ + CleanupPolicy policy = CLEANUP_DEFERRED_ON_SUCCESS; + InsertCleanupRecordInCurrentTransaction(CLEANUP_OBJECT_SHARD_PLACEMENT, + qualifiedShardName, + placement->groupId, + policy); + } } } @@ -1489,11 +1522,6 @@ NonBlockingShardSplit(SplitOperation splitOperation, databaseName); ClaimConnectionExclusively(sourceConnection); - HTAB *mapOfShardToPlacementCreatedByWorkflow = - CreateEmptyMapForShardsCreatedByWorkflow(); - - HTAB *mapOfDummyShardToPlacement = CreateSimpleHash(NodeAndOwner, - GroupedShardSplitInfos); MultiConnection *sourceReplicationConnection = GetReplicationConnection(sourceShardToCopyNode->workerName, sourceShardToCopyNode->workerPort); @@ -1502,8 +1530,7 @@ NonBlockingShardSplit(SplitOperation splitOperation, PG_TRY(); { /* 1) Physically create split children. */ - CreateSplitShardsForShardGroup(mapOfShardToPlacementCreatedByWorkflow, - shardGroupSplitIntervalListList, + CreateSplitShardsForShardGroup(shardGroupSplitIntervalListList, workersForPlacementList); /* @@ -1511,8 +1538,10 @@ NonBlockingShardSplit(SplitOperation splitOperation, * Refer to the comment section of 'CreateDummyShardsForShardGroup' for indepth * information. */ + HTAB *mapOfPlacementToDummyShardList = CreateSimpleHash(NodeAndOwner, + GroupedShardSplitInfos); CreateDummyShardsForShardGroup( - mapOfDummyShardToPlacement, + mapOfPlacementToDummyShardList, sourceColocatedShardIntervalList, shardGroupSplitIntervalListList, sourceShardToCopyNode, @@ -1527,7 +1556,7 @@ NonBlockingShardSplit(SplitOperation splitOperation, * initial COPY phase, like we do for the replica identities on the * target shards. */ - CreateReplicaIdentitiesForDummyShards(mapOfDummyShardToPlacement); + CreateReplicaIdentitiesForDummyShards(mapOfPlacementToDummyShardList); /* 4) Create Publications. */ CreatePublications(sourceConnection, publicationInfoHash); @@ -1627,11 +1656,22 @@ NonBlockingShardSplit(SplitOperation splitOperation, DropPublications(sourceConnection, publicationInfoHash); /* - * 20) Drop old shards and delete related metadata. Have to do that before - * creating the new shard metadata, because there's cross-checks - * preventing inconsistent metadata (like overlapping shards). + * 20) Delete old shards metadata and either mark the shards as + * to be deferred drop or physically delete them. + * Have to do that before creating the new shard metadata, + * because there's cross-checks preventing inconsistent metadata + * (like overlapping shards). */ - DropShardList(sourceColocatedShardIntervalList); + if (DeferShardDeleteOnSplit) + { + InsertDeferredDropCleanupRecordsForShards(sourceColocatedShardIntervalList); + } + else + { + DropShardList(sourceColocatedShardIntervalList); + } + + DropShardListMetadata(sourceColocatedShardIntervalList); /* * 21) In case of create_distributed_table_concurrently, which converts @@ -1681,11 +1721,6 @@ NonBlockingShardSplit(SplitOperation splitOperation, CreateForeignKeyConstraints(shardGroupSplitIntervalListList, workersForPlacementList); - /* - * 24) Drop dummy shards. - */ - DropDummyShards(mapOfDummyShardToPlacement); - /* * 24) Release shared memory allocated by worker_split_shard_replication_setup udf * at source node. @@ -1706,14 +1741,13 @@ NonBlockingShardSplit(SplitOperation splitOperation, /* end ongoing transactions to enable us to clean up */ ShutdownAllConnections(); - /* Do a best effort cleanup of shards created on workers in the above block */ - TryDropSplitShardsOnFailure(mapOfShardToPlacementCreatedByWorkflow); - + /* Do a best effort cleanup of shards created on workers in the above block + * TODO(niupre): We don't need to do this once shard cleaner can clean replication + * artifacts. + */ DropAllLogicalReplicationLeftovers(SHARD_SPLIT); - DropDummyShards(mapOfDummyShardToPlacement); - - ExecuteSplitShardReleaseSharedMemory(sourceShardToCopyNode); + FinalizeOperationNeedingCleanupOnFailure(); PG_RE_THROW(); } @@ -1744,7 +1778,7 @@ NonBlockingShardSplit(SplitOperation splitOperation, * Note 2 : Given there is an overlap of source and destination in Worker0, Shard1_1 and Shard2_1 need not be created. */ static void -CreateDummyShardsForShardGroup(HTAB *mapOfDummyShardToPlacement, +CreateDummyShardsForShardGroup(HTAB *mapOfPlacementToDummyShardList, List *sourceColocatedShardIntervalList, List *shardGroupSplitIntervalListList, WorkerNode *sourceWorkerNode, @@ -1781,11 +1815,39 @@ CreateDummyShardsForShardGroup(HTAB *mapOfDummyShardToPlacement, splitShardCreationCommandList, shardInterval->shardId); + /* Log resource for cleanup in case of failure only. + * Before we log a record, do a best effort check to see if a shard with same name exists. + * This is because, it will cause shard creation to fail and we will end up cleaning the + * old shard. We don't want that. + */ + bool relationExists = CheckIfRelationWithSameNameExists(shardInterval, + workerPlacementNode); + + if (relationExists) + { + ereport(ERROR, (errcode(ERRCODE_DUPLICATE_TABLE), + errmsg("relation %s already exists on worker %s:%d", + ConstructQualifiedShardName(shardInterval), + workerPlacementNode->workerName, + workerPlacementNode->workerPort))); + } + + /* Log shard in pg_dist_cleanup. Given dummy shards are transient resources, + * we want to cleanup irrespective of operation success or failure. + */ + CleanupPolicy policy = CLEANUP_ALWAYS; + InsertCleanupRecordInSubtransaction(CLEANUP_OBJECT_SHARD_PLACEMENT, + ConstructQualifiedShardName( + shardInterval), + workerPlacementNode->groupId, + policy); + /* Create dummy source shard on the specified placement list */ - CreateObjectOnPlacement(splitShardCreationCommandList, workerPlacementNode); + CreateObjectOnPlacement(splitShardCreationCommandList, + workerPlacementNode); /* Add dummy source shard entry created for placement node in map */ - AddDummyShardEntryInMap(mapOfDummyShardToPlacement, + AddDummyShardEntryInMap(mapOfPlacementToDummyShardList, workerPlacementNode->nodeId, shardInterval); } @@ -1816,11 +1878,39 @@ CreateDummyShardsForShardGroup(HTAB *mapOfDummyShardToPlacement, splitShardCreationCommandList, shardInterval->shardId); + /* Log resource for cleanup in case of failure only. + * Before we log a record, do a best effort check to see if a shard with same name exists. + * This is because, it will cause shard creation to fail and we will end up cleaning the + * old shard. We don't want that. + */ + bool relationExists = CheckIfRelationWithSameNameExists(shardInterval, + sourceWorkerNode); + + if (relationExists) + { + ereport(ERROR, (errcode(ERRCODE_DUPLICATE_TABLE), + errmsg("relation %s already exists on worker %s:%d", + ConstructQualifiedShardName(shardInterval), + sourceWorkerNode->workerName, + sourceWorkerNode->workerPort))); + } + + /* Log shard in pg_dist_cleanup. Given dummy shards are transient resources, + * we want to cleanup irrespective of operation success or failure. + */ + CleanupPolicy policy = CLEANUP_ALWAYS; + InsertCleanupRecordInSubtransaction(CLEANUP_OBJECT_SHARD_PLACEMENT, + ConstructQualifiedShardName( + shardInterval), + sourceWorkerNode->groupId, + policy); + /* Create dummy split child shard on source worker node */ CreateObjectOnPlacement(splitShardCreationCommandList, sourceWorkerNode); /* Add dummy split child shard entry created on source node */ - AddDummyShardEntryInMap(mapOfDummyShardToPlacement, sourceWorkerNode->nodeId, + AddDummyShardEntryInMap(mapOfPlacementToDummyShardList, + sourceWorkerNode->nodeId, shardInterval); } } @@ -2077,7 +2167,7 @@ ParseReplicationSlotInfoFromResult(PGresult *result) * of logical replication. We cautiously delete only the dummy shards added in the DummyShardHashMap. */ static void -AddDummyShardEntryInMap(HTAB *mapOfDummyShardToPlacement, uint32 targetNodeId, +AddDummyShardEntryInMap(HTAB *mapOfPlacementToDummyShardList, uint32 targetNodeId, ShardInterval *shardInterval) { NodeAndOwner key; @@ -2086,7 +2176,7 @@ AddDummyShardEntryInMap(HTAB *mapOfDummyShardToPlacement, uint32 targetNodeId, bool found = false; GroupedDummyShards *nodeMappingEntry = - (GroupedDummyShards *) hash_search(mapOfDummyShardToPlacement, &key, + (GroupedDummyShards *) hash_search(mapOfPlacementToDummyShardList, &key, HASH_ENTER, &found); if (!found) @@ -2099,68 +2189,6 @@ AddDummyShardEntryInMap(HTAB *mapOfDummyShardToPlacement, uint32 targetNodeId, } -/* - * DropDummyShards traverses the dummy shard map and drops shard at given node. - * It fails if the shard cannot be dropped. - */ -static void -DropDummyShards(HTAB *mapOfDummyShardToPlacement) -{ - HASH_SEQ_STATUS status; - hash_seq_init(&status, mapOfDummyShardToPlacement); - - GroupedDummyShards *entry = NULL; - while ((entry = (GroupedDummyShards *) hash_seq_search(&status)) != NULL) - { - uint32 nodeId = entry->key.nodeId; - WorkerNode *shardToBeDroppedNode = FindNodeWithNodeId(nodeId, - false /* missingOk */); - - int connectionFlags = FOR_DDL; - connectionFlags |= OUTSIDE_TRANSACTION; - MultiConnection *connection = GetNodeUserDatabaseConnection( - connectionFlags, - shardToBeDroppedNode->workerName, - shardToBeDroppedNode->workerPort, - CurrentUserName(), - NULL /* databaseName */); - - List *dummyShardIntervalList = entry->shardIntervals; - ShardInterval *shardInterval = NULL; - foreach_ptr(shardInterval, dummyShardIntervalList) - { - DropDummyShard(connection, shardInterval); - } - - CloseConnection(connection); - } -} - - -/* - * DropDummyShard drops a given shard on the node connection. - * It fails if the shard cannot be dropped. - */ -static void -DropDummyShard(MultiConnection *connection, ShardInterval *shardInterval) -{ - char *qualifiedShardName = ConstructQualifiedShardName(shardInterval); - StringInfo dropShardQuery = makeStringInfo(); - - /* Caller enforces that foreign tables cannot be split (use DROP_REGULAR_TABLE_COMMAND) */ - appendStringInfo(dropShardQuery, DROP_REGULAR_TABLE_COMMAND, - qualifiedShardName); - - /* - * Since the dummy shard is expected to be present on the given node, - * fail if it cannot be dropped during cleanup. - */ - ExecuteCriticalRemoteCommand( - connection, - dropShardQuery->data); -} - - /* * CreateReplicaIdentitiesForDummyShards creates replica indentities for split * dummy shards. diff --git a/src/backend/distributed/replication/multi_logical_replication.c b/src/backend/distributed/replication/multi_logical_replication.c index f91025b2e..7ee70f7f3 100644 --- a/src/backend/distributed/replication/multi_logical_replication.c +++ b/src/backend/distributed/replication/multi_logical_replication.c @@ -579,7 +579,7 @@ DropAllLogicalReplicationLeftovers(LogicalRepType type) /* * We close all connections that we opened for the dropping here. That * way we don't keep these connections open unnecessarily during the - * shard move (which can take a long time). + * 'LogicalRepType' operation (which can take a long time). */ CloseConnection(cleanupConnection); } diff --git a/src/backend/distributed/shared_library_init.c b/src/backend/distributed/shared_library_init.c index 343ac2f0b..b70dda310 100644 --- a/src/backend/distributed/shared_library_init.c +++ b/src/backend/distributed/shared_library_init.c @@ -1009,6 +1009,25 @@ RegisterCitusConfigVariables(void) 0, NULL, NULL, NULL); + DefineCustomBoolVariable( + "citus.defer_drop_after_shard_split", + gettext_noop("When enabled a shard split will mark the original shards " + "for deletion after a successful split, instead of deleting " + "them right away."), + gettext_noop("The deletion of a shard can sometimes run into a conflict with a " + "long running transactions on a the shard during the drop phase of " + "the shard split. This causes some splits to be rolled back after " + "resources have been spend on moving the shard. To prevent " + "conflicts this feature lets you skip the actual deletion till a " + "later point in time. When used one should set " + "citus.defer_shard_delete_interval to make sure defered deletions " + "will be executed"), + &DeferShardDeleteOnSplit, + true, + PGC_USERSET, + 0, + NULL, NULL, NULL); + DefineCustomIntVariable( "citus.defer_shard_delete_interval", gettext_noop("Sets the time to wait between background deletion for shards."), @@ -1834,6 +1853,36 @@ RegisterCitusConfigVariables(void) GUC_STANDARD, NULL, NULL, NULL); + DefineCustomIntVariable( + "citus.next_cleanup_record_id", + gettext_noop("Set the next cleanup record ID to use in operation creation."), + gettext_noop("Cleanup record IDs are normally generated using a sequence. If " + "next_cleanup_record_id is set to a non-zero value, cleanup record IDs will " + "instead be generated by incrementing from the value of " + "this GUC and this will be reflected in the GUC. This is " + "mainly useful to ensure consistent cleanup record IDs when running " + "tests in parallel."), + &NextCleanupRecordId, + 0, 0, INT_MAX, + PGC_USERSET, + GUC_NO_SHOW_ALL, + NULL, NULL, NULL); + + DefineCustomIntVariable( + "citus.next_operation_id", + gettext_noop("Set the next operation ID to use in operation creation."), + gettext_noop("Operation IDs are normally generated using a sequence. If " + "next_operation_id is set to a non-zero value, operation IDs will " + "instead be generated by incrementing from the value of " + "this GUC and this will be reflected in the GUC. This is " + "mainly useful to ensure consistent operation IDs when running " + "tests in parallel."), + &NextOperationId, + 0, 0, INT_MAX, + PGC_USERSET, + GUC_NO_SHOW_ALL, + NULL, NULL, NULL); + DefineCustomIntVariable( "citus.next_placement_id", gettext_noop("Set the next placement ID to use in placement creation."), diff --git a/src/backend/distributed/sql/citus--11.0-4--11.1-1.sql b/src/backend/distributed/sql/citus--11.0-4--11.1-1.sql index 27c1e69d8..86b60cef1 100644 --- a/src/backend/distributed/sql/citus--11.0-4--11.1-1.sql +++ b/src/backend/distributed/sql/citus--11.0-4--11.1-1.sql @@ -63,6 +63,7 @@ BEGIN END IF; END $check_citus$; +#include "udfs/citus_prepare_pg_upgrade/11.1-1.sql" #include "udfs/citus_finish_pg_upgrade/11.1-1.sql" DROP FUNCTION pg_catalog.get_all_active_transactions(OUT datid oid, OUT process_id int, OUT initiator_node_identifier int4, @@ -79,3 +80,28 @@ DROP FUNCTION pg_catalog.get_all_active_transactions(OUT datid oid, OUT process_ DROP FUNCTION pg_catalog.isolate_tenant_to_new_shard(table_name regclass, tenant_id "any", cascade_option text); #include "udfs/isolate_tenant_to_new_shard/11.1-1.sql" + +-- Table of records to: +-- 1) Cleanup leftover resources after a failure +-- 2) Deferred drop of old shard placements after a split. +#include "udfs/citus_cleanup_orphaned_resources/11.1-1.sql" + +CREATE TABLE citus.pg_dist_cleanup ( + record_id bigint primary key, + operation_id bigint not null, + object_type int not null, + object_name text not null, + node_group_id int not null, + policy_type int not null +); +ALTER TABLE citus.pg_dist_cleanup SET SCHEMA pg_catalog; +GRANT SELECT ON pg_catalog.pg_dist_cleanup TO public; + +-- Sequence used to generate operation Ids and record Ids in pg_dist_cleanup_record. +CREATE SEQUENCE citus.pg_dist_operationid_seq; +ALTER SEQUENCE citus.pg_dist_operationid_seq SET SCHEMA pg_catalog; +GRANT SELECT ON pg_catalog.pg_dist_operationid_seq TO public; + +CREATE SEQUENCE citus.pg_dist_cleanup_recordid_seq; +ALTER SEQUENCE citus.pg_dist_cleanup_recordid_seq SET SCHEMA pg_catalog; +GRANT SELECT ON pg_catalog.pg_dist_cleanup_recordid_seq TO public; diff --git a/src/backend/distributed/sql/downgrades/citus--11.1-1--11.0-4.sql b/src/backend/distributed/sql/downgrades/citus--11.1-1--11.0-4.sql index c2ec9c482..d859ef1d6 100644 --- a/src/backend/distributed/sql/downgrades/citus--11.1-1--11.0-4.sql +++ b/src/backend/distributed/sql/downgrades/citus--11.1-1--11.0-4.sql @@ -100,3 +100,8 @@ DROP FUNCTION pg_catalog.isolate_tenant_to_new_shard(table_name regclass, tenant #include "../udfs/isolate_tenant_to_new_shard/8.0-1.sql" DROP FUNCTION pg_catalog.create_distributed_table_concurrently; DROP FUNCTION pg_catalog.citus_internal_delete_partition_metadata(regclass); + +DROP TABLE pg_catalog.pg_dist_cleanup; +DROP SEQUENCE pg_catalog.pg_dist_operationid_seq; +DROP SEQUENCE pg_catalog.pg_dist_cleanup_recordid_seq; +DROP PROCEDURE pg_catalog.citus_cleanup_orphaned_resources(); diff --git a/src/backend/distributed/sql/udfs/citus_cleanup_orphaned_resources/11.1-1.sql b/src/backend/distributed/sql/udfs/citus_cleanup_orphaned_resources/11.1-1.sql new file mode 100644 index 000000000..774412992 --- /dev/null +++ b/src/backend/distributed/sql/udfs/citus_cleanup_orphaned_resources/11.1-1.sql @@ -0,0 +1,5 @@ +CREATE OR REPLACE PROCEDURE pg_catalog.citus_cleanup_orphaned_resources() + LANGUAGE C + AS 'citus', $$citus_cleanup_orphaned_resources$$; +COMMENT ON PROCEDURE pg_catalog.citus_cleanup_orphaned_resources() + IS 'cleanup orphaned resources'; diff --git a/src/backend/distributed/sql/udfs/citus_cleanup_orphaned_resources/latest.sql b/src/backend/distributed/sql/udfs/citus_cleanup_orphaned_resources/latest.sql new file mode 100644 index 000000000..774412992 --- /dev/null +++ b/src/backend/distributed/sql/udfs/citus_cleanup_orphaned_resources/latest.sql @@ -0,0 +1,5 @@ +CREATE OR REPLACE PROCEDURE pg_catalog.citus_cleanup_orphaned_resources() + LANGUAGE C + AS 'citus', $$citus_cleanup_orphaned_resources$$; +COMMENT ON PROCEDURE pg_catalog.citus_cleanup_orphaned_resources() + IS 'cleanup orphaned resources'; diff --git a/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/11.1-1.sql b/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/11.1-1.sql index ee24a37e9..1486f1431 100644 --- a/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/11.1-1.sql +++ b/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/11.1-1.sql @@ -62,6 +62,7 @@ BEGIN INSERT INTO pg_catalog.pg_dist_local_group SELECT * FROM public.pg_dist_local_group; INSERT INTO pg_catalog.pg_dist_transaction SELECT * FROM public.pg_dist_transaction; INSERT INTO pg_catalog.pg_dist_colocation SELECT * FROM public.pg_dist_colocation; + INSERT INTO pg_catalog.pg_dist_cleanup SELECT * FROM public.pg_dist_cleanup; -- enterprise catalog tables INSERT INTO pg_catalog.pg_dist_authinfo SELECT * FROM public.pg_dist_authinfo; INSERT INTO pg_catalog.pg_dist_poolinfo SELECT * FROM public.pg_dist_poolinfo; @@ -91,7 +92,7 @@ BEGIN DROP TABLE public.pg_dist_shard; DROP TABLE public.pg_dist_transaction; DROP TABLE public.pg_dist_rebalance_strategy; - + DROP TABLE public.pg_dist_cleanup; -- -- reset sequences -- @@ -100,6 +101,8 @@ BEGIN PERFORM setval('pg_catalog.pg_dist_groupid_seq', (SELECT MAX(groupid)+1 AS max_group_id FROM pg_dist_node), false); PERFORM setval('pg_catalog.pg_dist_node_nodeid_seq', (SELECT MAX(nodeid)+1 AS max_node_id FROM pg_dist_node), false); PERFORM setval('pg_catalog.pg_dist_colocationid_seq', (SELECT MAX(colocationid)+1 AS max_colocation_id FROM pg_dist_colocation), false); + PERFORM setval('pg_catalog.pg_dist_operationid_seq', (SELECT MAX(operation_id)+1 AS max_operation_id FROM pg_dist_cleanup), false); + PERFORM setval('pg_catalog.pg_dist_cleanup_recordid_seq', (SELECT MAX(record_id)+1 AS max_record_id FROM pg_dist_cleanup), false); -- -- register triggers diff --git a/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/latest.sql b/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/latest.sql index ee24a37e9..1486f1431 100644 --- a/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/latest.sql +++ b/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/latest.sql @@ -62,6 +62,7 @@ BEGIN INSERT INTO pg_catalog.pg_dist_local_group SELECT * FROM public.pg_dist_local_group; INSERT INTO pg_catalog.pg_dist_transaction SELECT * FROM public.pg_dist_transaction; INSERT INTO pg_catalog.pg_dist_colocation SELECT * FROM public.pg_dist_colocation; + INSERT INTO pg_catalog.pg_dist_cleanup SELECT * FROM public.pg_dist_cleanup; -- enterprise catalog tables INSERT INTO pg_catalog.pg_dist_authinfo SELECT * FROM public.pg_dist_authinfo; INSERT INTO pg_catalog.pg_dist_poolinfo SELECT * FROM public.pg_dist_poolinfo; @@ -91,7 +92,7 @@ BEGIN DROP TABLE public.pg_dist_shard; DROP TABLE public.pg_dist_transaction; DROP TABLE public.pg_dist_rebalance_strategy; - + DROP TABLE public.pg_dist_cleanup; -- -- reset sequences -- @@ -100,6 +101,8 @@ BEGIN PERFORM setval('pg_catalog.pg_dist_groupid_seq', (SELECT MAX(groupid)+1 AS max_group_id FROM pg_dist_node), false); PERFORM setval('pg_catalog.pg_dist_node_nodeid_seq', (SELECT MAX(nodeid)+1 AS max_node_id FROM pg_dist_node), false); PERFORM setval('pg_catalog.pg_dist_colocationid_seq', (SELECT MAX(colocationid)+1 AS max_colocation_id FROM pg_dist_colocation), false); + PERFORM setval('pg_catalog.pg_dist_operationid_seq', (SELECT MAX(operation_id)+1 AS max_operation_id FROM pg_dist_cleanup), false); + PERFORM setval('pg_catalog.pg_dist_cleanup_recordid_seq', (SELECT MAX(record_id)+1 AS max_record_id FROM pg_dist_cleanup), false); -- -- register triggers diff --git a/src/backend/distributed/sql/udfs/citus_prepare_pg_upgrade/11.1-1.sql b/src/backend/distributed/sql/udfs/citus_prepare_pg_upgrade/11.1-1.sql new file mode 100644 index 000000000..7f2f2d1a8 --- /dev/null +++ b/src/backend/distributed/sql/udfs/citus_prepare_pg_upgrade/11.1-1.sql @@ -0,0 +1,76 @@ +CREATE OR REPLACE FUNCTION pg_catalog.citus_prepare_pg_upgrade() + RETURNS void + LANGUAGE plpgsql + SET search_path = pg_catalog + AS $cppu$ +BEGIN + + DELETE FROM pg_depend WHERE + objid IN (SELECT oid FROM pg_proc WHERE proname = 'array_cat_agg') AND + refobjid IN (select oid from pg_extension where extname = 'citus'); + -- + -- We are dropping the aggregates because postgres 14 changed + -- array_cat type from anyarray to anycompatiblearray. When + -- upgrading to pg14, specifically when running pg_restore on + -- array_cat_agg we would get an error. So we drop the aggregate + -- and create the right one on citus_finish_pg_upgrade. + + DROP AGGREGATE IF EXISTS array_cat_agg(anyarray); + DROP AGGREGATE IF EXISTS array_cat_agg(anycompatiblearray); + -- + -- Drop existing backup tables + -- + DROP TABLE IF EXISTS public.pg_dist_partition; + DROP TABLE IF EXISTS public.pg_dist_shard; + DROP TABLE IF EXISTS public.pg_dist_placement; + DROP TABLE IF EXISTS public.pg_dist_node_metadata; + DROP TABLE IF EXISTS public.pg_dist_node; + DROP TABLE IF EXISTS public.pg_dist_local_group; + DROP TABLE IF EXISTS public.pg_dist_transaction; + DROP TABLE IF EXISTS public.pg_dist_colocation; + DROP TABLE IF EXISTS public.pg_dist_authinfo; + DROP TABLE IF EXISTS public.pg_dist_poolinfo; + DROP TABLE IF EXISTS public.pg_dist_rebalance_strategy; + DROP TABLE IF EXISTS public.pg_dist_object; + DROP TABLE IF EXISTS public.pg_dist_cleanup; + + -- + -- backup citus catalog tables + -- + CREATE TABLE public.pg_dist_partition AS SELECT * FROM pg_catalog.pg_dist_partition; + CREATE TABLE public.pg_dist_shard AS SELECT * FROM pg_catalog.pg_dist_shard; + CREATE TABLE public.pg_dist_placement AS SELECT * FROM pg_catalog.pg_dist_placement; + CREATE TABLE public.pg_dist_node_metadata AS SELECT * FROM pg_catalog.pg_dist_node_metadata; + CREATE TABLE public.pg_dist_node AS SELECT * FROM pg_catalog.pg_dist_node; + CREATE TABLE public.pg_dist_local_group AS SELECT * FROM pg_catalog.pg_dist_local_group; + CREATE TABLE public.pg_dist_transaction AS SELECT * FROM pg_catalog.pg_dist_transaction; + CREATE TABLE public.pg_dist_colocation AS SELECT * FROM pg_catalog.pg_dist_colocation; + CREATE TABLE public.pg_dist_cleanup AS SELECT * FROM pg_catalog.pg_dist_cleanup; + -- enterprise catalog tables + CREATE TABLE public.pg_dist_authinfo AS SELECT * FROM pg_catalog.pg_dist_authinfo; + CREATE TABLE public.pg_dist_poolinfo AS SELECT * FROM pg_catalog.pg_dist_poolinfo; + CREATE TABLE public.pg_dist_rebalance_strategy AS SELECT + name, + default_strategy, + shard_cost_function::regprocedure::text, + node_capacity_function::regprocedure::text, + shard_allowed_on_node_function::regprocedure::text, + default_threshold, + minimum_threshold, + improvement_threshold + FROM pg_catalog.pg_dist_rebalance_strategy; + + -- store upgrade stable identifiers on pg_dist_object catalog + CREATE TABLE public.pg_dist_object AS SELECT + address.type, + address.object_names, + address.object_args, + objects.distribution_argument_index, + objects.colocationid + FROM pg_catalog.pg_dist_object objects, + pg_catalog.pg_identify_object_as_address(objects.classid, objects.objid, objects.objsubid) address; +END; +$cppu$; + +COMMENT ON FUNCTION pg_catalog.citus_prepare_pg_upgrade() + IS 'perform tasks to copy citus settings to a location that could later be restored after pg_upgrade is done'; diff --git a/src/backend/distributed/sql/udfs/citus_prepare_pg_upgrade/latest.sql b/src/backend/distributed/sql/udfs/citus_prepare_pg_upgrade/latest.sql index ff7e5d43e..7f2f2d1a8 100644 --- a/src/backend/distributed/sql/udfs/citus_prepare_pg_upgrade/latest.sql +++ b/src/backend/distributed/sql/udfs/citus_prepare_pg_upgrade/latest.sql @@ -32,6 +32,7 @@ BEGIN DROP TABLE IF EXISTS public.pg_dist_poolinfo; DROP TABLE IF EXISTS public.pg_dist_rebalance_strategy; DROP TABLE IF EXISTS public.pg_dist_object; + DROP TABLE IF EXISTS public.pg_dist_cleanup; -- -- backup citus catalog tables @@ -44,6 +45,7 @@ BEGIN CREATE TABLE public.pg_dist_local_group AS SELECT * FROM pg_catalog.pg_dist_local_group; CREATE TABLE public.pg_dist_transaction AS SELECT * FROM pg_catalog.pg_dist_transaction; CREATE TABLE public.pg_dist_colocation AS SELECT * FROM pg_catalog.pg_dist_colocation; + CREATE TABLE public.pg_dist_cleanup AS SELECT * FROM pg_catalog.pg_dist_cleanup; -- enterprise catalog tables CREATE TABLE public.pg_dist_authinfo AS SELECT * FROM pg_catalog.pg_dist_authinfo; CREATE TABLE public.pg_dist_poolinfo AS SELECT * FROM pg_catalog.pg_dist_poolinfo; diff --git a/src/backend/distributed/transaction/transaction_management.c b/src/backend/distributed/transaction/transaction_management.c index d3f41273d..82c3fb5f7 100644 --- a/src/backend/distributed/transaction/transaction_management.c +++ b/src/backend/distributed/transaction/transaction_management.c @@ -38,6 +38,7 @@ #include "distributed/placement_connection.h" #include "distributed/relation_access_tracking.h" #include "distributed/shared_connection_stats.h" +#include "distributed/shard_cleaner.h" #include "distributed/subplan_execution.h" #include "distributed/version_compat.h" #include "distributed/worker_log_messages.h" @@ -600,6 +601,7 @@ ResetGlobalVariables() NodeMetadataSyncOnCommit = false; InTopLevelDelegatedFunctionCall = false; InTableTypeConversionFunctionCall = false; + CurrentOperationId = INVALID_OPERATION_ID; ResetWorkerErrorIndication(); memset(&AllowedDistributionColumnValue, 0, sizeof(AllowedDistributionColumn)); diff --git a/src/backend/distributed/utils/resource_lock.c b/src/backend/distributed/utils/resource_lock.c index a4187aa65..167bd2b12 100644 --- a/src/backend/distributed/utils/resource_lock.c +++ b/src/backend/distributed/utils/resource_lock.c @@ -513,7 +513,11 @@ LockPlacementCleanup(void) LOCKTAG tag; const bool sessionLock = false; const bool dontWait = false; - SET_LOCKTAG_PLACEMENT_CLEANUP(tag); + + /* Moves acquire lock with a constant operation id CITUS_SHARD_MOVE. + * This will change as we add support for parallel moves. + */ + SET_LOCKTAG_CITUS_OPERATION(tag, CITUS_SHARD_MOVE); (void) LockAcquire(&tag, ExclusiveLock, sessionLock, dontWait); } @@ -528,7 +532,11 @@ TryLockPlacementCleanup(void) LOCKTAG tag; const bool sessionLock = false; const bool dontWait = true; - SET_LOCKTAG_PLACEMENT_CLEANUP(tag); + + /* Moves acquire lock with a constant operation id CITUS_SHARD_MOVE. + * This will change as we add support for parallel moves. + */ + SET_LOCKTAG_CITUS_OPERATION(tag, CITUS_SHARD_MOVE); bool lockAcquired = LockAcquire(&tag, ExclusiveLock, sessionLock, dontWait); return lockAcquired; } diff --git a/src/include/distributed/metadata_cache.h b/src/include/distributed/metadata_cache.h index 2b55cf8d9..a6b3732c4 100644 --- a/src/include/distributed/metadata_cache.h +++ b/src/include/distributed/metadata_cache.h @@ -223,6 +223,7 @@ extern WorkerNode * LookupNodeForGroup(int32 groupId); extern Oid CitusCatalogNamespaceId(void); /* relation oids */ +extern Oid DistCleanupRelationId(void); extern Oid DistColocationRelationId(void); extern Oid DistColocationConfigurationIndexId(void); extern Oid DistPartitionRelationId(void); @@ -247,6 +248,7 @@ extern Oid DistTransactionRelationId(void); extern Oid DistTransactionGroupIndexId(void); extern Oid DistPlacementGroupidIndexId(void); extern Oid DistObjectPrimaryKeyIndexId(void); +extern Oid DistCleanupPrimaryKeyIndexId(void); /* type oids */ extern Oid LookupTypeOid(char *schemaNameSting, char *typeNameString); diff --git a/src/include/distributed/pg_dist_cleanup.h b/src/include/distributed/pg_dist_cleanup.h new file mode 100644 index 000000000..5dfcc2074 --- /dev/null +++ b/src/include/distributed/pg_dist_cleanup.h @@ -0,0 +1,33 @@ +/*------------------------------------------------------------------------- + * + * pg_dist_cleanup.h + * definition of the relation that holds the resources to be cleaned up + * in cluster (pg_dist_cleanup). + * + * Copyright (c) Citus Data, Inc. + * + *------------------------------------------------------------------------- + */ + +#ifndef PG_DIST_CLEANUP_H +#define PG_DIST_CLEANUP_H + +/* ---------------- + * compiler constants for pg_dist_cleanup + * ---------------- + */ + +#define Natts_pg_dist_cleanup 6 +#define Anum_pg_dist_cleanup_record_id 1 +#define Anum_pg_dist_cleanup_operation_id 2 +#define Anum_pg_dist_cleanup_object_type 3 +#define Anum_pg_dist_cleanup_object_name 4 +#define Anum_pg_dist_cleanup_node_group_id 5 +#define Anum_pg_dist_cleanup_policy_type 6 + +#define PG_CATALOG "pg_catalog" +#define PG_DIST_CLEANUP "pg_dist_cleanup" +#define OPERATIONID_SEQUENCE_NAME "pg_dist_operationid_seq" +#define CLEANUPRECORDID_SEQUENCE_NAME "pg_dist_cleanup_recordid_seq" + +#endif /* PG_DIST_CLEANUP_H */ diff --git a/src/include/distributed/resource_lock.h b/src/include/distributed/resource_lock.h index 375f6abbf..0a7ea674b 100644 --- a/src/include/distributed/resource_lock.h +++ b/src/include/distributed/resource_lock.h @@ -40,7 +40,7 @@ typedef enum AdvisoryLocktagClass ADV_LOCKTAG_CLASS_CITUS_REBALANCE_COLOCATION = 7, ADV_LOCKTAG_CLASS_CITUS_COLOCATED_SHARDS_METADATA = 8, ADV_LOCKTAG_CLASS_CITUS_OPERATIONS = 9, - ADV_LOCKTAG_CLASS_CITUS_PLACEMENT_CLEANUP = 10, + ADV_LOCKTAG_CLASS_CITUS_CLEANUP_OPERATION_ID = 10, ADV_LOCKTAG_CLASS_CITUS_LOGICAL_REPLICATION = 12, ADV_LOCKTAG_CLASS_CITUS_REBALANCE_PLACEMENT_COLOCATION = 13 } AdvisoryLocktagClass; @@ -51,7 +51,8 @@ typedef enum CitusOperations CITUS_TRANSACTION_RECOVERY = 0, CITUS_NONBLOCKING_SPLIT = 1, CITUS_CREATE_DISTRIBUTED_TABLE_CONCURRENTLY = 2, - CITUS_CREATE_COLOCATION_DEFAULT = 3 + CITUS_CREATE_COLOCATION_DEFAULT = 3, + CITUS_SHARD_MOVE = 4 } CitusOperations; /* reuse advisory lock, but with different, unused field 4 (4)*/ @@ -111,12 +112,12 @@ typedef enum CitusOperations /* reuse advisory lock, but with different, unused field 4 (10) * Also it has the database hardcoded to MyDatabaseId, to ensure the locks * are local to each database */ -#define SET_LOCKTAG_PLACEMENT_CLEANUP(tag) \ +#define SET_LOCKTAG_CLEANUP_OPERATION_ID(tag, operationId) \ SET_LOCKTAG_ADVISORY(tag, \ MyDatabaseId, \ - (uint32) 0, \ - (uint32) 0, \ - ADV_LOCKTAG_CLASS_CITUS_PLACEMENT_CLEANUP) + (uint32) ((operationId) >> 32), \ + (uint32) operationId, \ + ADV_LOCKTAG_CLASS_CITUS_CLEANUP_OPERATION_ID) /* reuse advisory lock, but with different, unused field 4 (12) * Also it has the database hardcoded to MyDatabaseId, to ensure the locks diff --git a/src/include/distributed/shard_cleaner.h b/src/include/distributed/shard_cleaner.h index 8a98254f9..df316f493 100644 --- a/src/include/distributed/shard_cleaner.h +++ b/src/include/distributed/shard_cleaner.h @@ -17,8 +17,98 @@ extern bool DeferShardDeleteOnMove; extern double DesiredPercentFreeAfterMove; extern bool CheckAvailableSpaceBeforeMove; +extern bool DeferShardDeleteOnSplit; +extern int NextOperationId; +extern int NextCleanupRecordId; + extern int TryDropOrphanedShards(bool waitForLocks); -extern int DropOrphanedShards(bool waitForLocks); extern void DropOrphanedShardsInSeparateTransaction(void); +/* Members for cleanup infrastructure */ +typedef uint64 OperationId; +extern OperationId CurrentOperationId; + +/* + * CleanupResource represents the Resource type in cleanup records. + */ +typedef enum CleanupObject +{ + CLEANUP_OBJECT_INVALID = 0, + CLEANUP_OBJECT_SHARD_PLACEMENT = 1 +} CleanupObject; + +/* + * CleanupPolicy represents the policy type for cleanup records. + */ +typedef enum CleanupPolicy +{ + /* + * Resources that are transient and always need clean up after the operation is completed. + * (Example: Dummy Shards for Non-Blocking splits) + */ + CLEANUP_ALWAYS = 0, + + /* + * Resources that are cleanup only on failure. + * (Example: Split Children for Blocking/Non-Blocking splits) + */ + CLEANUP_ON_FAILURE = 1, + + /* + * Resources that need 'deferred' clean up only on success . + * (Example: Parent child being split for Blocking/Non-Blocking splits) + */ + CLEANUP_DEFERRED_ON_SUCCESS = 2, +} CleanupPolicy; + +/* Global Constants */ +#define INVALID_OPERATION_ID 0 +#define INVALID_CLEANUP_RECORD_ID 0 + +/* APIs for cleanup infrastructure */ + +/* + * RegisterOperationNeedingCleanup is be called by an operation to register + * for cleanup. + */ +extern OperationId RegisterOperationNeedingCleanup(void); + +/* + * InsertCleanupRecordInCurrentTransaction inserts a new pg_dist_cleanup entry + * as part of the current transaction. + * + * This is primarily useful for deferred cleanup (CLEANUP_DEFERRED_ON_SUCCESS) + * scenarios, since the records would roll back in case of failure. + */ +extern void InsertCleanupRecordInCurrentTransaction(CleanupObject objectType, + char *objectName, + int nodeGroupId, + CleanupPolicy policy); + +/* + * InsertCleanupRecordInSeparateTransaction inserts a new pg_dist_cleanup entry + * in a separate transaction to ensure the record persists after rollback. + * + * This is used in scenarios where we need to cleanup resources on operation + * completion (CLEANUP_ALWAYS) or on failure (CLEANUP_ON_FAILURE). + */ +extern void InsertCleanupRecordInSubtransaction(CleanupObject objectType, + char *objectName, + int nodeGroupId, + CleanupPolicy policy); + +/* + * FinalizeOperationNeedingCleanupOnFailure is be called by an operation to signal + * completion on failure. This will trigger cleanup of appropriate resources + * and cleanup records. + */ +extern void FinalizeOperationNeedingCleanupOnFailure(void); + +/* + * FinalizeOperationNeedingCleanupOnSuccess is be called by an operation to signal + * completion on success. This will trigger cleanup of appropriate resources + * and cleanup records. + */ +extern void FinalizeOperationNeedingCleanupOnSuccess(void); + #endif /*CITUS_SHARD_CLEANER_H */ diff --git a/src/include/distributed/shard_split.h b/src/include/distributed/shard_split.h index 2154ff446..5bd0c7686 100644 --- a/src/include/distributed/shard_split.h +++ b/src/include/distributed/shard_split.h @@ -47,8 +47,6 @@ extern void SplitShard(SplitMode splitMode, List *colocatedShardIntervalList, uint32 targetColocationId); -extern void DropShardList(List *shardIntervalList); - extern SplitMode LookupSplitMode(Oid shardTransferModeOid); extern void ErrorIfMultipleNonblockingMoveSplitInTheSameTransaction(void); diff --git a/src/test/regress/enterprise_failure_schedule b/src/test/regress/enterprise_failure_schedule index cc7b4f2d4..faa1a4c66 100644 --- a/src/test/regress/enterprise_failure_schedule +++ b/src/test/regress/enterprise_failure_schedule @@ -11,3 +11,4 @@ test: failure_offline_move_shard_placement test: failure_tenant_isolation test: failure_tenant_isolation_nonblocking test: check_mx +test: failure_split_cleanup diff --git a/src/test/regress/enterprise_split_schedule b/src/test/regress/enterprise_split_schedule index 6f216ea44..fd9788a42 100644 --- a/src/test/regress/enterprise_split_schedule +++ b/src/test/regress/enterprise_split_schedule @@ -7,3 +7,4 @@ test: tablespace # Split tests go here. test: citus_split_shard_by_split_points_negative test: citus_split_shard_by_split_points +test: citus_split_shard_by_split_points_deferred_drop diff --git a/src/test/regress/expected/citus_non_blocking_split_columnar.out b/src/test/regress/expected/citus_non_blocking_split_columnar.out index e8365ec01..c58fec967 100644 --- a/src/test/regress/expected/citus_non_blocking_split_columnar.out +++ b/src/test/regress/expected/citus_non_blocking_split_columnar.out @@ -1,9 +1,17 @@ -CREATE SCHEMA "citus_split_test_schema_columnar_partitioned"; -SET search_path TO "citus_split_test_schema_columnar_partitioned"; +CREATE SCHEMA "citus_split_non_blocking_schema_columnar_partitioned"; +SET search_path TO "citus_split_non_blocking_schema_columnar_partitioned"; SET citus.next_shard_id TO 8970000; SET citus.next_placement_id TO 8770000; SET citus.shard_count TO 1; SET citus.shard_replication_factor TO 1; +-- Disable Deferred drop auto cleanup to avoid flaky tests. +ALTER SYSTEM SET citus.defer_shard_delete_interval TO -1; +SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + -- BEGIN: Create table to split, along with other co-located tables. Add indexes, statistics etc. CREATE TABLE sensors( measureid integer, @@ -88,7 +96,7 @@ SET citus.shard_replication_factor TO 1; INNER JOIN pg_dist_node node ON placement.groupid = node.groupid INNER JOIN pg_catalog.pg_class cls ON shard.logicalrelid = cls.oid INNER JOIN pg_catalog.pg_namespace ns ON cls.relnamespace = ns.oid - WHERE node.noderole = 'primary' AND ns.nspname = 'citus_split_test_schema_columnar_partitioned' + WHERE node.noderole = 'primary' AND ns.nspname = 'citus_split_non_blocking_schema_columnar_partitioned' ORDER BY logicalrelid, shardminvalue::BIGINT, nodeport; shardid | logicalrelid | shardminvalue | shardmaxvalue | nodename | nodeport --------------------------------------------------------------------- @@ -135,7 +143,7 @@ SET citus.shard_replication_factor TO 1; -- END: Load data into tables -- BEGIN: Show the current state on workers \c - - - :worker_1_port - SET search_path TO "citus_split_test_schema_columnar_partitioned"; + SET search_path TO "citus_split_non_blocking_schema_columnar_partitioned"; SET citus.show_shards_for_app_name_prefixes = '*'; SELECT tbl.relname, fk."Constraint", fk."Definition" FROM pg_catalog.pg_class tbl @@ -169,37 +177,37 @@ SET citus.shard_replication_factor TO 1; (22 rows) SELECT tablename, indexdef FROM pg_indexes WHERE tablename like '%_89%' ORDER BY 1,2; - tablename | indexdef + tablename | indexdef --------------------------------------------------------------------- - colocated_dist_table_8970008 | CREATE UNIQUE INDEX colocated_dist_table_pkey_8970008 ON citus_split_test_schema_columnar_partitioned.colocated_dist_table_8970008 USING btree (measureid) - colocated_partitioned_table_2020_01_01_8970010 | CREATE UNIQUE INDEX colocated_partitioned_table_2020_01_01_pkey_8970010 ON citus_split_test_schema_columnar_partitioned.colocated_partitioned_table_2020_01_01_8970010 USING btree (measureid, eventdatetime) - colocated_partitioned_table_8970009 | CREATE UNIQUE INDEX colocated_partitioned_table_pkey_8970009 ON ONLY citus_split_test_schema_columnar_partitioned.colocated_partitioned_table_8970009 USING btree (measureid, eventdatetime) - reference_table_8970011 | CREATE UNIQUE INDEX reference_table_pkey_8970011 ON citus_split_test_schema_columnar_partitioned.reference_table_8970011 USING btree (measureid) - sensors_2020_01_01_8970002 | CREATE INDEX index_on_child_8970002 ON citus_split_test_schema_columnar_partitioned.sensors_2020_01_01_8970002 USING btree (lower((measure_data)::text)) - sensors_2020_01_01_8970002 | CREATE INDEX sensors_2020_01_01_lower_idx_8970002 ON citus_split_test_schema_columnar_partitioned.sensors_2020_01_01_8970002 USING btree (lower((measureid)::text)) - sensors_2020_01_01_8970002 | CREATE UNIQUE INDEX sensors_2020_01_01_pkey_8970002 ON citus_split_test_schema_columnar_partitioned.sensors_2020_01_01_8970002 USING btree (measureid, eventdatetime, measure_data) - sensors_8970000 | CREATE INDEX index_on_parent_8970000 ON ONLY citus_split_test_schema_columnar_partitioned.sensors_8970000 USING btree (lower((measureid)::text)) - sensors_8970000 | CREATE UNIQUE INDEX sensors_pkey_8970000 ON ONLY citus_split_test_schema_columnar_partitioned.sensors_8970000 USING btree (measureid, eventdatetime, measure_data) - sensors_news_8970003 | CREATE INDEX sensors_news_lower_idx_8970003 ON citus_split_test_schema_columnar_partitioned.sensors_news_8970003 USING btree (lower((measureid)::text)) - sensors_news_8970003 | CREATE UNIQUE INDEX sensors_news_pkey_8970003 ON citus_split_test_schema_columnar_partitioned.sensors_news_8970003 USING btree (measureid, eventdatetime, measure_data) - sensors_old_8970001 | CREATE INDEX sensors_old_lower_idx_8970001 ON citus_split_test_schema_columnar_partitioned.sensors_old_8970001 USING btree (lower((measureid)::text)) - sensors_old_8970001 | CREATE UNIQUE INDEX sensors_old_pkey_8970001 ON citus_split_test_schema_columnar_partitioned.sensors_old_8970001 USING btree (measureid, eventdatetime, measure_data) - sensorscolumnar_2020_01_01_8970006 | CREATE INDEX index_on_child_columnar_8970006 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8970006 USING btree (lower((measure_data)::text)) - sensorscolumnar_2020_01_01_8970006 | CREATE INDEX sensorscolumnar_2020_01_01_lower_idx_8970006 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8970006 USING btree (lower((measureid)::text)) - sensorscolumnar_2020_01_01_8970006 | CREATE UNIQUE INDEX sensorscolumnar_2020_01_01_pkey_8970006 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8970006 USING btree (measureid, eventdatetime, measure_data) - sensorscolumnar_8970004 | CREATE INDEX index_on_parent_columnar_8970004 ON ONLY citus_split_test_schema_columnar_partitioned.sensorscolumnar_8970004 USING btree (lower((measureid)::text)) - sensorscolumnar_8970004 | CREATE UNIQUE INDEX sensorscolumnar_pkey_8970004 ON ONLY citus_split_test_schema_columnar_partitioned.sensorscolumnar_8970004 USING btree (measureid, eventdatetime, measure_data) - sensorscolumnar_news_8970007 | CREATE INDEX sensorscolumnar_news_lower_idx_8970007 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_news_8970007 USING btree (lower((measureid)::text)) - sensorscolumnar_news_8970007 | CREATE UNIQUE INDEX sensorscolumnar_news_pkey_8970007 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_news_8970007 USING btree (measureid, eventdatetime, measure_data) - sensorscolumnar_old_8970005 | CREATE INDEX sensorscolumnar_old_lower_idx_8970005 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_old_8970005 USING btree (lower((measureid)::text)) - sensorscolumnar_old_8970005 | CREATE UNIQUE INDEX sensorscolumnar_old_pkey_8970005 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_old_8970005 USING btree (measureid, eventdatetime, measure_data) + colocated_dist_table_8970008 | CREATE UNIQUE INDEX colocated_dist_table_pkey_8970008 ON citus_split_non_blocking_schema_columnar_partitioned.colocated_dist_table_8970008 USING btree (measureid) + colocated_partitioned_table_2020_01_01_8970010 | CREATE UNIQUE INDEX colocated_partitioned_table_2020_01_01_pkey_8970010 ON citus_split_non_blocking_schema_columnar_partitioned.colocated_partitioned_table_2020_01_01_8970010 USING btree (measureid, eventdatetime) + colocated_partitioned_table_8970009 | CREATE UNIQUE INDEX colocated_partitioned_table_pkey_8970009 ON ONLY citus_split_non_blocking_schema_columnar_partitioned.colocated_partitioned_table_8970009 USING btree (measureid, eventdatetime) + reference_table_8970011 | CREATE UNIQUE INDEX reference_table_pkey_8970011 ON citus_split_non_blocking_schema_columnar_partitioned.reference_table_8970011 USING btree (measureid) + sensors_2020_01_01_8970002 | CREATE INDEX index_on_child_8970002 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_2020_01_01_8970002 USING btree (lower((measure_data)::text)) + sensors_2020_01_01_8970002 | CREATE INDEX sensors_2020_01_01_lower_idx_8970002 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_2020_01_01_8970002 USING btree (lower((measureid)::text)) + sensors_2020_01_01_8970002 | CREATE UNIQUE INDEX sensors_2020_01_01_pkey_8970002 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_2020_01_01_8970002 USING btree (measureid, eventdatetime, measure_data) + sensors_8970000 | CREATE INDEX index_on_parent_8970000 ON ONLY citus_split_non_blocking_schema_columnar_partitioned.sensors_8970000 USING btree (lower((measureid)::text)) + sensors_8970000 | CREATE UNIQUE INDEX sensors_pkey_8970000 ON ONLY citus_split_non_blocking_schema_columnar_partitioned.sensors_8970000 USING btree (measureid, eventdatetime, measure_data) + sensors_news_8970003 | CREATE INDEX sensors_news_lower_idx_8970003 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_news_8970003 USING btree (lower((measureid)::text)) + sensors_news_8970003 | CREATE UNIQUE INDEX sensors_news_pkey_8970003 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_news_8970003 USING btree (measureid, eventdatetime, measure_data) + sensors_old_8970001 | CREATE INDEX sensors_old_lower_idx_8970001 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_old_8970001 USING btree (lower((measureid)::text)) + sensors_old_8970001 | CREATE UNIQUE INDEX sensors_old_pkey_8970001 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_old_8970001 USING btree (measureid, eventdatetime, measure_data) + sensorscolumnar_2020_01_01_8970006 | CREATE INDEX index_on_child_columnar_8970006 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8970006 USING btree (lower((measure_data)::text)) + sensorscolumnar_2020_01_01_8970006 | CREATE INDEX sensorscolumnar_2020_01_01_lower_idx_8970006 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8970006 USING btree (lower((measureid)::text)) + sensorscolumnar_2020_01_01_8970006 | CREATE UNIQUE INDEX sensorscolumnar_2020_01_01_pkey_8970006 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8970006 USING btree (measureid, eventdatetime, measure_data) + sensorscolumnar_8970004 | CREATE INDEX index_on_parent_columnar_8970004 ON ONLY citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_8970004 USING btree (lower((measureid)::text)) + sensorscolumnar_8970004 | CREATE UNIQUE INDEX sensorscolumnar_pkey_8970004 ON ONLY citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_8970004 USING btree (measureid, eventdatetime, measure_data) + sensorscolumnar_news_8970007 | CREATE INDEX sensorscolumnar_news_lower_idx_8970007 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_news_8970007 USING btree (lower((measureid)::text)) + sensorscolumnar_news_8970007 | CREATE UNIQUE INDEX sensorscolumnar_news_pkey_8970007 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_news_8970007 USING btree (measureid, eventdatetime, measure_data) + sensorscolumnar_old_8970005 | CREATE INDEX sensorscolumnar_old_lower_idx_8970005 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_old_8970005 USING btree (lower((measureid)::text)) + sensorscolumnar_old_8970005 | CREATE UNIQUE INDEX sensorscolumnar_old_pkey_8970005 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_old_8970005 USING btree (measureid, eventdatetime, measure_data) (22 rows) SELECT stxname FROM pg_statistic_ext WHERE stxnamespace IN ( SELECT oid FROM pg_namespace - WHERE nspname IN ('citus_split_test_schema_columnar_partitioned') + WHERE nspname IN ('citus_split_non_blocking_schema_columnar_partitioned') ) ORDER BY stxname ASC; stxname @@ -215,7 +223,7 @@ SET citus.shard_replication_factor TO 1; (8 rows) \c - - - :worker_2_port - SET search_path TO "citus_split_test_schema_columnar_partitioned"; + SET search_path TO "citus_split_non_blocking_schema_columnar_partitioned"; SET citus.show_shards_for_app_name_prefixes = '*'; SELECT tbl.relname, fk."Constraint", fk."Definition" FROM pg_catalog.pg_class tbl @@ -227,16 +235,16 @@ SET citus.shard_replication_factor TO 1; (0 rows) SELECT tablename, indexdef FROM pg_indexes WHERE tablename like '%_89%' ORDER BY 1,2; - tablename | indexdef + tablename | indexdef --------------------------------------------------------------------- - reference_table_8970011 | CREATE UNIQUE INDEX reference_table_pkey_8970011 ON citus_split_test_schema_columnar_partitioned.reference_table_8970011 USING btree (measureid) + reference_table_8970011 | CREATE UNIQUE INDEX reference_table_pkey_8970011 ON citus_split_non_blocking_schema_columnar_partitioned.reference_table_8970011 USING btree (measureid) (1 row) SELECT stxname FROM pg_statistic_ext WHERE stxnamespace IN ( SELECT oid FROM pg_namespace - WHERE nspname IN ('citus_split_test_schema_columnar_partitioned') + WHERE nspname IN ('citus_split_non_blocking_schema_columnar_partitioned') ) ORDER BY stxname ASC; stxname @@ -250,7 +258,7 @@ SET citus.shard_replication_factor TO 1; -- END: Show the current state on workers -- BEGIN: Split a shard along its co-located shards \c - - - :master_port - SET search_path TO "citus_split_test_schema_columnar_partitioned"; + SET search_path TO "citus_split_non_blocking_schema_columnar_partitioned"; SET citus.next_shard_id TO 8999000; SELECT nodeid AS worker_1_node FROM pg_dist_node WHERE nodeport=:worker_1_port \gset SELECT nodeid AS worker_2_node FROM pg_dist_node WHERE nodeport=:worker_2_port \gset @@ -265,6 +273,10 @@ SET citus.shard_replication_factor TO 1; (1 row) -- END: Split a shard along its co-located shards +-- BEGIN: Perform deferred cleanup. +CALL pg_catalog.citus_cleanup_orphaned_resources(); +NOTICE: cleaned up 13 orphaned resources +-- END: Perform deferred cleanup. -- BEGIN: Validate Shard Info and Data SELECT shard.shardid, logicalrelid, shardminvalue, shardmaxvalue, nodename, nodeport FROM pg_dist_shard AS shard @@ -272,7 +284,7 @@ SET citus.shard_replication_factor TO 1; INNER JOIN pg_dist_node node ON placement.groupid = node.groupid INNER JOIN pg_catalog.pg_class cls ON shard.logicalrelid = cls.oid INNER JOIN pg_catalog.pg_namespace ns ON cls.relnamespace = ns.oid - WHERE node.noderole = 'primary' AND ns.nspname = 'citus_split_test_schema_columnar_partitioned' + WHERE node.noderole = 'primary' AND ns.nspname = 'citus_split_non_blocking_schema_columnar_partitioned' ORDER BY logicalrelid, shardminvalue::BIGINT, nodeport; shardid | logicalrelid | shardminvalue | shardmaxvalue | nodename | nodeport --------------------------------------------------------------------- @@ -335,7 +347,7 @@ SET citus.shard_replication_factor TO 1; -- END: Validate Shard Info and Data -- BEGIN: Show the updated state on workers \c - - - :worker_1_port - SET search_path TO "citus_split_test_schema_columnar_partitioned"; + SET search_path TO "citus_split_non_blocking_schema_columnar_partitioned"; SET citus.show_shards_for_app_name_prefixes = '*'; SELECT tbl.relname, fk."Constraint", fk."Definition" FROM pg_catalog.pg_class tbl @@ -369,37 +381,37 @@ SET citus.shard_replication_factor TO 1; (22 rows) SELECT tablename, indexdef FROM pg_indexes WHERE tablename like '%_89%' ORDER BY 1,2; - tablename | indexdef + tablename | indexdef --------------------------------------------------------------------- - colocated_dist_table_8999016 | CREATE UNIQUE INDEX colocated_dist_table_pkey_8999016 ON citus_split_test_schema_columnar_partitioned.colocated_dist_table_8999016 USING btree (measureid) - colocated_partitioned_table_2020_01_01_8999020 | CREATE UNIQUE INDEX colocated_partitioned_table_2020_01_01_pkey_8999020 ON citus_split_test_schema_columnar_partitioned.colocated_partitioned_table_2020_01_01_8999020 USING btree (measureid, eventdatetime) - colocated_partitioned_table_8999018 | CREATE UNIQUE INDEX colocated_partitioned_table_pkey_8999018 ON ONLY citus_split_test_schema_columnar_partitioned.colocated_partitioned_table_8999018 USING btree (measureid, eventdatetime) - reference_table_8970011 | CREATE UNIQUE INDEX reference_table_pkey_8970011 ON citus_split_test_schema_columnar_partitioned.reference_table_8970011 USING btree (measureid) - sensors_2020_01_01_8999004 | CREATE INDEX index_on_child_8999004 ON citus_split_test_schema_columnar_partitioned.sensors_2020_01_01_8999004 USING btree (lower((measure_data)::text)) - sensors_2020_01_01_8999004 | CREATE INDEX sensors_2020_01_01_lower_idx_8999004 ON citus_split_test_schema_columnar_partitioned.sensors_2020_01_01_8999004 USING btree (lower((measureid)::text)) - sensors_2020_01_01_8999004 | CREATE UNIQUE INDEX sensors_2020_01_01_pkey_8999004 ON citus_split_test_schema_columnar_partitioned.sensors_2020_01_01_8999004 USING btree (measureid, eventdatetime, measure_data) - sensors_8999000 | CREATE INDEX index_on_parent_8999000 ON ONLY citus_split_test_schema_columnar_partitioned.sensors_8999000 USING btree (lower((measureid)::text)) - sensors_8999000 | CREATE UNIQUE INDEX sensors_pkey_8999000 ON ONLY citus_split_test_schema_columnar_partitioned.sensors_8999000 USING btree (measureid, eventdatetime, measure_data) - sensors_news_8999006 | CREATE INDEX sensors_news_lower_idx_8999006 ON citus_split_test_schema_columnar_partitioned.sensors_news_8999006 USING btree (lower((measureid)::text)) - sensors_news_8999006 | CREATE UNIQUE INDEX sensors_news_pkey_8999006 ON citus_split_test_schema_columnar_partitioned.sensors_news_8999006 USING btree (measureid, eventdatetime, measure_data) - sensors_old_8999002 | CREATE INDEX sensors_old_lower_idx_8999002 ON citus_split_test_schema_columnar_partitioned.sensors_old_8999002 USING btree (lower((measureid)::text)) - sensors_old_8999002 | CREATE UNIQUE INDEX sensors_old_pkey_8999002 ON citus_split_test_schema_columnar_partitioned.sensors_old_8999002 USING btree (measureid, eventdatetime, measure_data) - sensorscolumnar_2020_01_01_8999012 | CREATE INDEX index_on_child_columnar_8999012 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8999012 USING btree (lower((measure_data)::text)) - sensorscolumnar_2020_01_01_8999012 | CREATE INDEX sensorscolumnar_2020_01_01_lower_idx_8999012 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8999012 USING btree (lower((measureid)::text)) - sensorscolumnar_2020_01_01_8999012 | CREATE UNIQUE INDEX sensorscolumnar_2020_01_01_pkey_8999012 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8999012 USING btree (measureid, eventdatetime, measure_data) - sensorscolumnar_8999008 | CREATE INDEX index_on_parent_columnar_8999008 ON ONLY citus_split_test_schema_columnar_partitioned.sensorscolumnar_8999008 USING btree (lower((measureid)::text)) - sensorscolumnar_8999008 | CREATE UNIQUE INDEX sensorscolumnar_pkey_8999008 ON ONLY citus_split_test_schema_columnar_partitioned.sensorscolumnar_8999008 USING btree (measureid, eventdatetime, measure_data) - sensorscolumnar_news_8999014 | CREATE INDEX sensorscolumnar_news_lower_idx_8999014 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_news_8999014 USING btree (lower((measureid)::text)) - sensorscolumnar_news_8999014 | CREATE UNIQUE INDEX sensorscolumnar_news_pkey_8999014 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_news_8999014 USING btree (measureid, eventdatetime, measure_data) - sensorscolumnar_old_8999010 | CREATE INDEX sensorscolumnar_old_lower_idx_8999010 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_old_8999010 USING btree (lower((measureid)::text)) - sensorscolumnar_old_8999010 | CREATE UNIQUE INDEX sensorscolumnar_old_pkey_8999010 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_old_8999010 USING btree (measureid, eventdatetime, measure_data) + colocated_dist_table_8999016 | CREATE UNIQUE INDEX colocated_dist_table_pkey_8999016 ON citus_split_non_blocking_schema_columnar_partitioned.colocated_dist_table_8999016 USING btree (measureid) + colocated_partitioned_table_2020_01_01_8999020 | CREATE UNIQUE INDEX colocated_partitioned_table_2020_01_01_pkey_8999020 ON citus_split_non_blocking_schema_columnar_partitioned.colocated_partitioned_table_2020_01_01_8999020 USING btree (measureid, eventdatetime) + colocated_partitioned_table_8999018 | CREATE UNIQUE INDEX colocated_partitioned_table_pkey_8999018 ON ONLY citus_split_non_blocking_schema_columnar_partitioned.colocated_partitioned_table_8999018 USING btree (measureid, eventdatetime) + reference_table_8970011 | CREATE UNIQUE INDEX reference_table_pkey_8970011 ON citus_split_non_blocking_schema_columnar_partitioned.reference_table_8970011 USING btree (measureid) + sensors_2020_01_01_8999004 | CREATE INDEX index_on_child_8999004 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_2020_01_01_8999004 USING btree (lower((measure_data)::text)) + sensors_2020_01_01_8999004 | CREATE INDEX sensors_2020_01_01_lower_idx_8999004 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_2020_01_01_8999004 USING btree (lower((measureid)::text)) + sensors_2020_01_01_8999004 | CREATE UNIQUE INDEX sensors_2020_01_01_pkey_8999004 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_2020_01_01_8999004 USING btree (measureid, eventdatetime, measure_data) + sensors_8999000 | CREATE INDEX index_on_parent_8999000 ON ONLY citus_split_non_blocking_schema_columnar_partitioned.sensors_8999000 USING btree (lower((measureid)::text)) + sensors_8999000 | CREATE UNIQUE INDEX sensors_pkey_8999000 ON ONLY citus_split_non_blocking_schema_columnar_partitioned.sensors_8999000 USING btree (measureid, eventdatetime, measure_data) + sensors_news_8999006 | CREATE INDEX sensors_news_lower_idx_8999006 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_news_8999006 USING btree (lower((measureid)::text)) + sensors_news_8999006 | CREATE UNIQUE INDEX sensors_news_pkey_8999006 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_news_8999006 USING btree (measureid, eventdatetime, measure_data) + sensors_old_8999002 | CREATE INDEX sensors_old_lower_idx_8999002 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_old_8999002 USING btree (lower((measureid)::text)) + sensors_old_8999002 | CREATE UNIQUE INDEX sensors_old_pkey_8999002 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_old_8999002 USING btree (measureid, eventdatetime, measure_data) + sensorscolumnar_2020_01_01_8999012 | CREATE INDEX index_on_child_columnar_8999012 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8999012 USING btree (lower((measure_data)::text)) + sensorscolumnar_2020_01_01_8999012 | CREATE INDEX sensorscolumnar_2020_01_01_lower_idx_8999012 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8999012 USING btree (lower((measureid)::text)) + sensorscolumnar_2020_01_01_8999012 | CREATE UNIQUE INDEX sensorscolumnar_2020_01_01_pkey_8999012 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8999012 USING btree (measureid, eventdatetime, measure_data) + sensorscolumnar_8999008 | CREATE INDEX index_on_parent_columnar_8999008 ON ONLY citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_8999008 USING btree (lower((measureid)::text)) + sensorscolumnar_8999008 | CREATE UNIQUE INDEX sensorscolumnar_pkey_8999008 ON ONLY citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_8999008 USING btree (measureid, eventdatetime, measure_data) + sensorscolumnar_news_8999014 | CREATE INDEX sensorscolumnar_news_lower_idx_8999014 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_news_8999014 USING btree (lower((measureid)::text)) + sensorscolumnar_news_8999014 | CREATE UNIQUE INDEX sensorscolumnar_news_pkey_8999014 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_news_8999014 USING btree (measureid, eventdatetime, measure_data) + sensorscolumnar_old_8999010 | CREATE INDEX sensorscolumnar_old_lower_idx_8999010 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_old_8999010 USING btree (lower((measureid)::text)) + sensorscolumnar_old_8999010 | CREATE UNIQUE INDEX sensorscolumnar_old_pkey_8999010 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_old_8999010 USING btree (measureid, eventdatetime, measure_data) (22 rows) SELECT stxname FROM pg_statistic_ext WHERE stxnamespace IN ( SELECT oid FROM pg_namespace - WHERE nspname IN ('citus_split_test_schema_columnar_partitioned') + WHERE nspname IN ('citus_split_non_blocking_schema_columnar_partitioned') ) ORDER BY stxname ASC; stxname @@ -415,7 +427,7 @@ SET citus.shard_replication_factor TO 1; (8 rows) \c - - - :worker_2_port - SET search_path TO "citus_split_test_schema_columnar_partitioned"; + SET search_path TO "citus_split_non_blocking_schema_columnar_partitioned"; SET citus.show_shards_for_app_name_prefixes = '*'; SELECT tbl.relname, fk."Constraint", fk."Definition" FROM pg_catalog.pg_class tbl @@ -449,37 +461,37 @@ SET citus.shard_replication_factor TO 1; (22 rows) SELECT tablename, indexdef FROM pg_indexes WHERE tablename like '%_89%' ORDER BY 1,2; - tablename | indexdef + tablename | indexdef --------------------------------------------------------------------- - colocated_dist_table_8999017 | CREATE UNIQUE INDEX colocated_dist_table_pkey_8999017 ON citus_split_test_schema_columnar_partitioned.colocated_dist_table_8999017 USING btree (measureid) - colocated_partitioned_table_2020_01_01_8999021 | CREATE UNIQUE INDEX colocated_partitioned_table_2020_01_01_pkey_8999021 ON citus_split_test_schema_columnar_partitioned.colocated_partitioned_table_2020_01_01_8999021 USING btree (measureid, eventdatetime) - colocated_partitioned_table_8999019 | CREATE UNIQUE INDEX colocated_partitioned_table_pkey_8999019 ON ONLY citus_split_test_schema_columnar_partitioned.colocated_partitioned_table_8999019 USING btree (measureid, eventdatetime) - reference_table_8970011 | CREATE UNIQUE INDEX reference_table_pkey_8970011 ON citus_split_test_schema_columnar_partitioned.reference_table_8970011 USING btree (measureid) - sensors_2020_01_01_8999005 | CREATE INDEX index_on_child_8999005 ON citus_split_test_schema_columnar_partitioned.sensors_2020_01_01_8999005 USING btree (lower((measure_data)::text)) - sensors_2020_01_01_8999005 | CREATE INDEX sensors_2020_01_01_lower_idx_8999005 ON citus_split_test_schema_columnar_partitioned.sensors_2020_01_01_8999005 USING btree (lower((measureid)::text)) - sensors_2020_01_01_8999005 | CREATE UNIQUE INDEX sensors_2020_01_01_pkey_8999005 ON citus_split_test_schema_columnar_partitioned.sensors_2020_01_01_8999005 USING btree (measureid, eventdatetime, measure_data) - sensors_8999001 | CREATE INDEX index_on_parent_8999001 ON ONLY citus_split_test_schema_columnar_partitioned.sensors_8999001 USING btree (lower((measureid)::text)) - sensors_8999001 | CREATE UNIQUE INDEX sensors_pkey_8999001 ON ONLY citus_split_test_schema_columnar_partitioned.sensors_8999001 USING btree (measureid, eventdatetime, measure_data) - sensors_news_8999007 | CREATE INDEX sensors_news_lower_idx_8999007 ON citus_split_test_schema_columnar_partitioned.sensors_news_8999007 USING btree (lower((measureid)::text)) - sensors_news_8999007 | CREATE UNIQUE INDEX sensors_news_pkey_8999007 ON citus_split_test_schema_columnar_partitioned.sensors_news_8999007 USING btree (measureid, eventdatetime, measure_data) - sensors_old_8999003 | CREATE INDEX sensors_old_lower_idx_8999003 ON citus_split_test_schema_columnar_partitioned.sensors_old_8999003 USING btree (lower((measureid)::text)) - sensors_old_8999003 | CREATE UNIQUE INDEX sensors_old_pkey_8999003 ON citus_split_test_schema_columnar_partitioned.sensors_old_8999003 USING btree (measureid, eventdatetime, measure_data) - sensorscolumnar_2020_01_01_8999013 | CREATE INDEX index_on_child_columnar_8999013 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8999013 USING btree (lower((measure_data)::text)) - sensorscolumnar_2020_01_01_8999013 | CREATE INDEX sensorscolumnar_2020_01_01_lower_idx_8999013 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8999013 USING btree (lower((measureid)::text)) - sensorscolumnar_2020_01_01_8999013 | CREATE UNIQUE INDEX sensorscolumnar_2020_01_01_pkey_8999013 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8999013 USING btree (measureid, eventdatetime, measure_data) - sensorscolumnar_8999009 | CREATE INDEX index_on_parent_columnar_8999009 ON ONLY citus_split_test_schema_columnar_partitioned.sensorscolumnar_8999009 USING btree (lower((measureid)::text)) - sensorscolumnar_8999009 | CREATE UNIQUE INDEX sensorscolumnar_pkey_8999009 ON ONLY citus_split_test_schema_columnar_partitioned.sensorscolumnar_8999009 USING btree (measureid, eventdatetime, measure_data) - sensorscolumnar_news_8999015 | CREATE INDEX sensorscolumnar_news_lower_idx_8999015 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_news_8999015 USING btree (lower((measureid)::text)) - sensorscolumnar_news_8999015 | CREATE UNIQUE INDEX sensorscolumnar_news_pkey_8999015 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_news_8999015 USING btree (measureid, eventdatetime, measure_data) - sensorscolumnar_old_8999011 | CREATE INDEX sensorscolumnar_old_lower_idx_8999011 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_old_8999011 USING btree (lower((measureid)::text)) - sensorscolumnar_old_8999011 | CREATE UNIQUE INDEX sensorscolumnar_old_pkey_8999011 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_old_8999011 USING btree (measureid, eventdatetime, measure_data) + colocated_dist_table_8999017 | CREATE UNIQUE INDEX colocated_dist_table_pkey_8999017 ON citus_split_non_blocking_schema_columnar_partitioned.colocated_dist_table_8999017 USING btree (measureid) + colocated_partitioned_table_2020_01_01_8999021 | CREATE UNIQUE INDEX colocated_partitioned_table_2020_01_01_pkey_8999021 ON citus_split_non_blocking_schema_columnar_partitioned.colocated_partitioned_table_2020_01_01_8999021 USING btree (measureid, eventdatetime) + colocated_partitioned_table_8999019 | CREATE UNIQUE INDEX colocated_partitioned_table_pkey_8999019 ON ONLY citus_split_non_blocking_schema_columnar_partitioned.colocated_partitioned_table_8999019 USING btree (measureid, eventdatetime) + reference_table_8970011 | CREATE UNIQUE INDEX reference_table_pkey_8970011 ON citus_split_non_blocking_schema_columnar_partitioned.reference_table_8970011 USING btree (measureid) + sensors_2020_01_01_8999005 | CREATE INDEX index_on_child_8999005 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_2020_01_01_8999005 USING btree (lower((measure_data)::text)) + sensors_2020_01_01_8999005 | CREATE INDEX sensors_2020_01_01_lower_idx_8999005 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_2020_01_01_8999005 USING btree (lower((measureid)::text)) + sensors_2020_01_01_8999005 | CREATE UNIQUE INDEX sensors_2020_01_01_pkey_8999005 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_2020_01_01_8999005 USING btree (measureid, eventdatetime, measure_data) + sensors_8999001 | CREATE INDEX index_on_parent_8999001 ON ONLY citus_split_non_blocking_schema_columnar_partitioned.sensors_8999001 USING btree (lower((measureid)::text)) + sensors_8999001 | CREATE UNIQUE INDEX sensors_pkey_8999001 ON ONLY citus_split_non_blocking_schema_columnar_partitioned.sensors_8999001 USING btree (measureid, eventdatetime, measure_data) + sensors_news_8999007 | CREATE INDEX sensors_news_lower_idx_8999007 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_news_8999007 USING btree (lower((measureid)::text)) + sensors_news_8999007 | CREATE UNIQUE INDEX sensors_news_pkey_8999007 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_news_8999007 USING btree (measureid, eventdatetime, measure_data) + sensors_old_8999003 | CREATE INDEX sensors_old_lower_idx_8999003 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_old_8999003 USING btree (lower((measureid)::text)) + sensors_old_8999003 | CREATE UNIQUE INDEX sensors_old_pkey_8999003 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_old_8999003 USING btree (measureid, eventdatetime, measure_data) + sensorscolumnar_2020_01_01_8999013 | CREATE INDEX index_on_child_columnar_8999013 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8999013 USING btree (lower((measure_data)::text)) + sensorscolumnar_2020_01_01_8999013 | CREATE INDEX sensorscolumnar_2020_01_01_lower_idx_8999013 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8999013 USING btree (lower((measureid)::text)) + sensorscolumnar_2020_01_01_8999013 | CREATE UNIQUE INDEX sensorscolumnar_2020_01_01_pkey_8999013 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8999013 USING btree (measureid, eventdatetime, measure_data) + sensorscolumnar_8999009 | CREATE INDEX index_on_parent_columnar_8999009 ON ONLY citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_8999009 USING btree (lower((measureid)::text)) + sensorscolumnar_8999009 | CREATE UNIQUE INDEX sensorscolumnar_pkey_8999009 ON ONLY citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_8999009 USING btree (measureid, eventdatetime, measure_data) + sensorscolumnar_news_8999015 | CREATE INDEX sensorscolumnar_news_lower_idx_8999015 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_news_8999015 USING btree (lower((measureid)::text)) + sensorscolumnar_news_8999015 | CREATE UNIQUE INDEX sensorscolumnar_news_pkey_8999015 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_news_8999015 USING btree (measureid, eventdatetime, measure_data) + sensorscolumnar_old_8999011 | CREATE INDEX sensorscolumnar_old_lower_idx_8999011 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_old_8999011 USING btree (lower((measureid)::text)) + sensorscolumnar_old_8999011 | CREATE UNIQUE INDEX sensorscolumnar_old_pkey_8999011 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_old_8999011 USING btree (measureid, eventdatetime, measure_data) (22 rows) SELECT stxname FROM pg_statistic_ext WHERE stxnamespace IN ( SELECT oid FROM pg_namespace - WHERE nspname IN ('citus_split_test_schema_columnar_partitioned') + WHERE nspname IN ('citus_split_non_blocking_schema_columnar_partitioned') ) ORDER BY stxname ASC; stxname @@ -497,7 +509,7 @@ SET citus.shard_replication_factor TO 1; -- END: Show the updated state on workers -- BEGIN: Split a partition table directly \c - - - :master_port - SET search_path TO "citus_split_test_schema_columnar_partitioned"; + SET search_path TO "citus_split_non_blocking_schema_columnar_partitioned"; SET citus.next_shard_id TO 8999100; SELECT nodeid AS worker_1_node FROM pg_dist_node WHERE nodeport=:worker_1_port \gset SELECT nodeid AS worker_2_node FROM pg_dist_node WHERE nodeport=:worker_2_port \gset @@ -512,6 +524,10 @@ SET citus.shard_replication_factor TO 1; (1 row) -- END: Split a partition table directly +-- BEGIN: Perform deferred cleanup. +CALL pg_catalog.citus_cleanup_orphaned_resources(); +NOTICE: cleaned up 11 orphaned resources +-- END: Perform deferred cleanup. -- BEGIN: Validate Shard Info and Data SELECT shard.shardid, logicalrelid, shardminvalue, shardmaxvalue, nodename, nodeport FROM pg_dist_shard AS shard @@ -519,7 +535,7 @@ SET citus.shard_replication_factor TO 1; INNER JOIN pg_dist_node node ON placement.groupid = node.groupid INNER JOIN pg_catalog.pg_class cls ON shard.logicalrelid = cls.oid INNER JOIN pg_catalog.pg_namespace ns ON cls.relnamespace = ns.oid - WHERE node.noderole = 'primary' AND ns.nspname = 'citus_split_test_schema_columnar_partitioned' + WHERE node.noderole = 'primary' AND ns.nspname = 'citus_split_non_blocking_schema_columnar_partitioned' ORDER BY logicalrelid, shardminvalue::BIGINT, nodeport; shardid | logicalrelid | shardminvalue | shardmaxvalue | nodename | nodeport --------------------------------------------------------------------- @@ -593,7 +609,7 @@ SET citus.shard_replication_factor TO 1; -- END: Validate Shard Info and Data -- BEGIN: Show the updated state on workers \c - - - :worker_1_port - SET search_path TO "citus_split_test_schema_columnar_partitioned"; + SET search_path TO "citus_split_non_blocking_schema_columnar_partitioned"; SET citus.show_shards_for_app_name_prefixes = '*'; SELECT tbl.relname, fk."Constraint", fk."Definition" FROM pg_catalog.pg_class tbl @@ -627,37 +643,37 @@ SET citus.shard_replication_factor TO 1; (22 rows) SELECT tablename, indexdef FROM pg_indexes WHERE tablename like '%_89%' ORDER BY 1,2; - tablename | indexdef + tablename | indexdef --------------------------------------------------------------------- - colocated_dist_table_8999116 | CREATE UNIQUE INDEX colocated_dist_table_pkey_8999116 ON citus_split_test_schema_columnar_partitioned.colocated_dist_table_8999116 USING btree (measureid) - colocated_partitioned_table_2020_01_01_8999120 | CREATE UNIQUE INDEX colocated_partitioned_table_2020_01_01_pkey_8999120 ON citus_split_test_schema_columnar_partitioned.colocated_partitioned_table_2020_01_01_8999120 USING btree (measureid, eventdatetime) - colocated_partitioned_table_8999118 | CREATE UNIQUE INDEX colocated_partitioned_table_pkey_8999118 ON ONLY citus_split_test_schema_columnar_partitioned.colocated_partitioned_table_8999118 USING btree (measureid, eventdatetime) - reference_table_8970011 | CREATE UNIQUE INDEX reference_table_pkey_8970011 ON citus_split_test_schema_columnar_partitioned.reference_table_8970011 USING btree (measureid) - sensors_2020_01_01_8999104 | CREATE INDEX index_on_child_8999104 ON citus_split_test_schema_columnar_partitioned.sensors_2020_01_01_8999104 USING btree (lower((measure_data)::text)) - sensors_2020_01_01_8999104 | CREATE INDEX sensors_2020_01_01_lower_idx_8999104 ON citus_split_test_schema_columnar_partitioned.sensors_2020_01_01_8999104 USING btree (lower((measureid)::text)) - sensors_2020_01_01_8999104 | CREATE UNIQUE INDEX sensors_2020_01_01_pkey_8999104 ON citus_split_test_schema_columnar_partitioned.sensors_2020_01_01_8999104 USING btree (measureid, eventdatetime, measure_data) - sensors_8999100 | CREATE INDEX index_on_parent_8999100 ON ONLY citus_split_test_schema_columnar_partitioned.sensors_8999100 USING btree (lower((measureid)::text)) - sensors_8999100 | CREATE UNIQUE INDEX sensors_pkey_8999100 ON ONLY citus_split_test_schema_columnar_partitioned.sensors_8999100 USING btree (measureid, eventdatetime, measure_data) - sensors_news_8999106 | CREATE INDEX sensors_news_lower_idx_8999106 ON citus_split_test_schema_columnar_partitioned.sensors_news_8999106 USING btree (lower((measureid)::text)) - sensors_news_8999106 | CREATE UNIQUE INDEX sensors_news_pkey_8999106 ON citus_split_test_schema_columnar_partitioned.sensors_news_8999106 USING btree (measureid, eventdatetime, measure_data) - sensors_old_8999102 | CREATE INDEX sensors_old_lower_idx_8999102 ON citus_split_test_schema_columnar_partitioned.sensors_old_8999102 USING btree (lower((measureid)::text)) - sensors_old_8999102 | CREATE UNIQUE INDEX sensors_old_pkey_8999102 ON citus_split_test_schema_columnar_partitioned.sensors_old_8999102 USING btree (measureid, eventdatetime, measure_data) - sensorscolumnar_2020_01_01_8999112 | CREATE INDEX index_on_child_columnar_8999112 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8999112 USING btree (lower((measure_data)::text)) - sensorscolumnar_2020_01_01_8999112 | CREATE INDEX sensorscolumnar_2020_01_01_lower_idx_8999112 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8999112 USING btree (lower((measureid)::text)) - sensorscolumnar_2020_01_01_8999112 | CREATE UNIQUE INDEX sensorscolumnar_2020_01_01_pkey_8999112 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8999112 USING btree (measureid, eventdatetime, measure_data) - sensorscolumnar_8999108 | CREATE INDEX index_on_parent_columnar_8999108 ON ONLY citus_split_test_schema_columnar_partitioned.sensorscolumnar_8999108 USING btree (lower((measureid)::text)) - sensorscolumnar_8999108 | CREATE UNIQUE INDEX sensorscolumnar_pkey_8999108 ON ONLY citus_split_test_schema_columnar_partitioned.sensorscolumnar_8999108 USING btree (measureid, eventdatetime, measure_data) - sensorscolumnar_news_8999114 | CREATE INDEX sensorscolumnar_news_lower_idx_8999114 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_news_8999114 USING btree (lower((measureid)::text)) - sensorscolumnar_news_8999114 | CREATE UNIQUE INDEX sensorscolumnar_news_pkey_8999114 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_news_8999114 USING btree (measureid, eventdatetime, measure_data) - sensorscolumnar_old_8999110 | CREATE INDEX sensorscolumnar_old_lower_idx_8999110 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_old_8999110 USING btree (lower((measureid)::text)) - sensorscolumnar_old_8999110 | CREATE UNIQUE INDEX sensorscolumnar_old_pkey_8999110 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_old_8999110 USING btree (measureid, eventdatetime, measure_data) + colocated_dist_table_8999116 | CREATE UNIQUE INDEX colocated_dist_table_pkey_8999116 ON citus_split_non_blocking_schema_columnar_partitioned.colocated_dist_table_8999116 USING btree (measureid) + colocated_partitioned_table_2020_01_01_8999120 | CREATE UNIQUE INDEX colocated_partitioned_table_2020_01_01_pkey_8999120 ON citus_split_non_blocking_schema_columnar_partitioned.colocated_partitioned_table_2020_01_01_8999120 USING btree (measureid, eventdatetime) + colocated_partitioned_table_8999118 | CREATE UNIQUE INDEX colocated_partitioned_table_pkey_8999118 ON ONLY citus_split_non_blocking_schema_columnar_partitioned.colocated_partitioned_table_8999118 USING btree (measureid, eventdatetime) + reference_table_8970011 | CREATE UNIQUE INDEX reference_table_pkey_8970011 ON citus_split_non_blocking_schema_columnar_partitioned.reference_table_8970011 USING btree (measureid) + sensors_2020_01_01_8999104 | CREATE INDEX index_on_child_8999104 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_2020_01_01_8999104 USING btree (lower((measure_data)::text)) + sensors_2020_01_01_8999104 | CREATE INDEX sensors_2020_01_01_lower_idx_8999104 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_2020_01_01_8999104 USING btree (lower((measureid)::text)) + sensors_2020_01_01_8999104 | CREATE UNIQUE INDEX sensors_2020_01_01_pkey_8999104 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_2020_01_01_8999104 USING btree (measureid, eventdatetime, measure_data) + sensors_8999100 | CREATE INDEX index_on_parent_8999100 ON ONLY citus_split_non_blocking_schema_columnar_partitioned.sensors_8999100 USING btree (lower((measureid)::text)) + sensors_8999100 | CREATE UNIQUE INDEX sensors_pkey_8999100 ON ONLY citus_split_non_blocking_schema_columnar_partitioned.sensors_8999100 USING btree (measureid, eventdatetime, measure_data) + sensors_news_8999106 | CREATE INDEX sensors_news_lower_idx_8999106 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_news_8999106 USING btree (lower((measureid)::text)) + sensors_news_8999106 | CREATE UNIQUE INDEX sensors_news_pkey_8999106 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_news_8999106 USING btree (measureid, eventdatetime, measure_data) + sensors_old_8999102 | CREATE INDEX sensors_old_lower_idx_8999102 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_old_8999102 USING btree (lower((measureid)::text)) + sensors_old_8999102 | CREATE UNIQUE INDEX sensors_old_pkey_8999102 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_old_8999102 USING btree (measureid, eventdatetime, measure_data) + sensorscolumnar_2020_01_01_8999112 | CREATE INDEX index_on_child_columnar_8999112 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8999112 USING btree (lower((measure_data)::text)) + sensorscolumnar_2020_01_01_8999112 | CREATE INDEX sensorscolumnar_2020_01_01_lower_idx_8999112 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8999112 USING btree (lower((measureid)::text)) + sensorscolumnar_2020_01_01_8999112 | CREATE UNIQUE INDEX sensorscolumnar_2020_01_01_pkey_8999112 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8999112 USING btree (measureid, eventdatetime, measure_data) + sensorscolumnar_8999108 | CREATE INDEX index_on_parent_columnar_8999108 ON ONLY citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_8999108 USING btree (lower((measureid)::text)) + sensorscolumnar_8999108 | CREATE UNIQUE INDEX sensorscolumnar_pkey_8999108 ON ONLY citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_8999108 USING btree (measureid, eventdatetime, measure_data) + sensorscolumnar_news_8999114 | CREATE INDEX sensorscolumnar_news_lower_idx_8999114 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_news_8999114 USING btree (lower((measureid)::text)) + sensorscolumnar_news_8999114 | CREATE UNIQUE INDEX sensorscolumnar_news_pkey_8999114 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_news_8999114 USING btree (measureid, eventdatetime, measure_data) + sensorscolumnar_old_8999110 | CREATE INDEX sensorscolumnar_old_lower_idx_8999110 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_old_8999110 USING btree (lower((measureid)::text)) + sensorscolumnar_old_8999110 | CREATE UNIQUE INDEX sensorscolumnar_old_pkey_8999110 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_old_8999110 USING btree (measureid, eventdatetime, measure_data) (22 rows) SELECT stxname FROM pg_statistic_ext WHERE stxnamespace IN ( SELECT oid FROM pg_namespace - WHERE nspname IN ('citus_split_test_schema_columnar_partitioned') + WHERE nspname IN ('citus_split_non_blocking_schema_columnar_partitioned') ) ORDER BY stxname ASC; stxname @@ -673,7 +689,7 @@ SET citus.shard_replication_factor TO 1; (8 rows) \c - - - :worker_2_port - SET search_path TO "citus_split_test_schema_columnar_partitioned"; + SET search_path TO "citus_split_non_blocking_schema_columnar_partitioned"; SET citus.show_shards_for_app_name_prefixes = '*'; SELECT tbl.relname, fk."Constraint", fk."Definition" FROM pg_catalog.pg_class tbl @@ -729,58 +745,58 @@ SET citus.shard_replication_factor TO 1; (44 rows) SELECT tablename, indexdef FROM pg_indexes WHERE tablename like '%_89%' ORDER BY 1,2; - tablename | indexdef + tablename | indexdef --------------------------------------------------------------------- - colocated_dist_table_8999017 | CREATE UNIQUE INDEX colocated_dist_table_pkey_8999017 ON citus_split_test_schema_columnar_partitioned.colocated_dist_table_8999017 USING btree (measureid) - colocated_dist_table_8999117 | CREATE UNIQUE INDEX colocated_dist_table_pkey_8999117 ON citus_split_test_schema_columnar_partitioned.colocated_dist_table_8999117 USING btree (measureid) - colocated_partitioned_table_2020_01_01_8999021 | CREATE UNIQUE INDEX colocated_partitioned_table_2020_01_01_pkey_8999021 ON citus_split_test_schema_columnar_partitioned.colocated_partitioned_table_2020_01_01_8999021 USING btree (measureid, eventdatetime) - colocated_partitioned_table_2020_01_01_8999121 | CREATE UNIQUE INDEX colocated_partitioned_table_2020_01_01_pkey_8999121 ON citus_split_test_schema_columnar_partitioned.colocated_partitioned_table_2020_01_01_8999121 USING btree (measureid, eventdatetime) - colocated_partitioned_table_8999019 | CREATE UNIQUE INDEX colocated_partitioned_table_pkey_8999019 ON ONLY citus_split_test_schema_columnar_partitioned.colocated_partitioned_table_8999019 USING btree (measureid, eventdatetime) - colocated_partitioned_table_8999119 | CREATE UNIQUE INDEX colocated_partitioned_table_pkey_8999119 ON ONLY citus_split_test_schema_columnar_partitioned.colocated_partitioned_table_8999119 USING btree (measureid, eventdatetime) - reference_table_8970011 | CREATE UNIQUE INDEX reference_table_pkey_8970011 ON citus_split_test_schema_columnar_partitioned.reference_table_8970011 USING btree (measureid) - sensors_2020_01_01_8999005 | CREATE INDEX index_on_child_8999005 ON citus_split_test_schema_columnar_partitioned.sensors_2020_01_01_8999005 USING btree (lower((measure_data)::text)) - sensors_2020_01_01_8999005 | CREATE INDEX sensors_2020_01_01_lower_idx_8999005 ON citus_split_test_schema_columnar_partitioned.sensors_2020_01_01_8999005 USING btree (lower((measureid)::text)) - sensors_2020_01_01_8999005 | CREATE UNIQUE INDEX sensors_2020_01_01_pkey_8999005 ON citus_split_test_schema_columnar_partitioned.sensors_2020_01_01_8999005 USING btree (measureid, eventdatetime, measure_data) - sensors_2020_01_01_8999105 | CREATE INDEX index_on_child_8999105 ON citus_split_test_schema_columnar_partitioned.sensors_2020_01_01_8999105 USING btree (lower((measure_data)::text)) - sensors_2020_01_01_8999105 | CREATE INDEX sensors_2020_01_01_lower_idx_8999105 ON citus_split_test_schema_columnar_partitioned.sensors_2020_01_01_8999105 USING btree (lower((measureid)::text)) - sensors_2020_01_01_8999105 | CREATE UNIQUE INDEX sensors_2020_01_01_pkey_8999105 ON citus_split_test_schema_columnar_partitioned.sensors_2020_01_01_8999105 USING btree (measureid, eventdatetime, measure_data) - sensors_8999001 | CREATE INDEX index_on_parent_8999001 ON ONLY citus_split_test_schema_columnar_partitioned.sensors_8999001 USING btree (lower((measureid)::text)) - sensors_8999001 | CREATE UNIQUE INDEX sensors_pkey_8999001 ON ONLY citus_split_test_schema_columnar_partitioned.sensors_8999001 USING btree (measureid, eventdatetime, measure_data) - sensors_8999101 | CREATE INDEX index_on_parent_8999101 ON ONLY citus_split_test_schema_columnar_partitioned.sensors_8999101 USING btree (lower((measureid)::text)) - sensors_8999101 | CREATE UNIQUE INDEX sensors_pkey_8999101 ON ONLY citus_split_test_schema_columnar_partitioned.sensors_8999101 USING btree (measureid, eventdatetime, measure_data) - sensors_news_8999007 | CREATE INDEX sensors_news_lower_idx_8999007 ON citus_split_test_schema_columnar_partitioned.sensors_news_8999007 USING btree (lower((measureid)::text)) - sensors_news_8999007 | CREATE UNIQUE INDEX sensors_news_pkey_8999007 ON citus_split_test_schema_columnar_partitioned.sensors_news_8999007 USING btree (measureid, eventdatetime, measure_data) - sensors_news_8999107 | CREATE INDEX sensors_news_lower_idx_8999107 ON citus_split_test_schema_columnar_partitioned.sensors_news_8999107 USING btree (lower((measureid)::text)) - sensors_news_8999107 | CREATE UNIQUE INDEX sensors_news_pkey_8999107 ON citus_split_test_schema_columnar_partitioned.sensors_news_8999107 USING btree (measureid, eventdatetime, measure_data) - sensors_old_8999003 | CREATE INDEX sensors_old_lower_idx_8999003 ON citus_split_test_schema_columnar_partitioned.sensors_old_8999003 USING btree (lower((measureid)::text)) - sensors_old_8999003 | CREATE UNIQUE INDEX sensors_old_pkey_8999003 ON citus_split_test_schema_columnar_partitioned.sensors_old_8999003 USING btree (measureid, eventdatetime, measure_data) - sensors_old_8999103 | CREATE INDEX sensors_old_lower_idx_8999103 ON citus_split_test_schema_columnar_partitioned.sensors_old_8999103 USING btree (lower((measureid)::text)) - sensors_old_8999103 | CREATE UNIQUE INDEX sensors_old_pkey_8999103 ON citus_split_test_schema_columnar_partitioned.sensors_old_8999103 USING btree (measureid, eventdatetime, measure_data) - sensorscolumnar_2020_01_01_8999013 | CREATE INDEX index_on_child_columnar_8999013 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8999013 USING btree (lower((measure_data)::text)) - sensorscolumnar_2020_01_01_8999013 | CREATE INDEX sensorscolumnar_2020_01_01_lower_idx_8999013 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8999013 USING btree (lower((measureid)::text)) - sensorscolumnar_2020_01_01_8999013 | CREATE UNIQUE INDEX sensorscolumnar_2020_01_01_pkey_8999013 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8999013 USING btree (measureid, eventdatetime, measure_data) - sensorscolumnar_2020_01_01_8999113 | CREATE INDEX index_on_child_columnar_8999113 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8999113 USING btree (lower((measure_data)::text)) - sensorscolumnar_2020_01_01_8999113 | CREATE INDEX sensorscolumnar_2020_01_01_lower_idx_8999113 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8999113 USING btree (lower((measureid)::text)) - sensorscolumnar_2020_01_01_8999113 | CREATE UNIQUE INDEX sensorscolumnar_2020_01_01_pkey_8999113 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8999113 USING btree (measureid, eventdatetime, measure_data) - sensorscolumnar_8999009 | CREATE INDEX index_on_parent_columnar_8999009 ON ONLY citus_split_test_schema_columnar_partitioned.sensorscolumnar_8999009 USING btree (lower((measureid)::text)) - sensorscolumnar_8999009 | CREATE UNIQUE INDEX sensorscolumnar_pkey_8999009 ON ONLY citus_split_test_schema_columnar_partitioned.sensorscolumnar_8999009 USING btree (measureid, eventdatetime, measure_data) - sensorscolumnar_8999109 | CREATE INDEX index_on_parent_columnar_8999109 ON ONLY citus_split_test_schema_columnar_partitioned.sensorscolumnar_8999109 USING btree (lower((measureid)::text)) - sensorscolumnar_8999109 | CREATE UNIQUE INDEX sensorscolumnar_pkey_8999109 ON ONLY citus_split_test_schema_columnar_partitioned.sensorscolumnar_8999109 USING btree (measureid, eventdatetime, measure_data) - sensorscolumnar_news_8999015 | CREATE INDEX sensorscolumnar_news_lower_idx_8999015 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_news_8999015 USING btree (lower((measureid)::text)) - sensorscolumnar_news_8999015 | CREATE UNIQUE INDEX sensorscolumnar_news_pkey_8999015 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_news_8999015 USING btree (measureid, eventdatetime, measure_data) - sensorscolumnar_news_8999115 | CREATE INDEX sensorscolumnar_news_lower_idx_8999115 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_news_8999115 USING btree (lower((measureid)::text)) - sensorscolumnar_news_8999115 | CREATE UNIQUE INDEX sensorscolumnar_news_pkey_8999115 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_news_8999115 USING btree (measureid, eventdatetime, measure_data) - sensorscolumnar_old_8999011 | CREATE INDEX sensorscolumnar_old_lower_idx_8999011 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_old_8999011 USING btree (lower((measureid)::text)) - sensorscolumnar_old_8999011 | CREATE UNIQUE INDEX sensorscolumnar_old_pkey_8999011 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_old_8999011 USING btree (measureid, eventdatetime, measure_data) - sensorscolumnar_old_8999111 | CREATE INDEX sensorscolumnar_old_lower_idx_8999111 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_old_8999111 USING btree (lower((measureid)::text)) - sensorscolumnar_old_8999111 | CREATE UNIQUE INDEX sensorscolumnar_old_pkey_8999111 ON citus_split_test_schema_columnar_partitioned.sensorscolumnar_old_8999111 USING btree (measureid, eventdatetime, measure_data) + colocated_dist_table_8999017 | CREATE UNIQUE INDEX colocated_dist_table_pkey_8999017 ON citus_split_non_blocking_schema_columnar_partitioned.colocated_dist_table_8999017 USING btree (measureid) + colocated_dist_table_8999117 | CREATE UNIQUE INDEX colocated_dist_table_pkey_8999117 ON citus_split_non_blocking_schema_columnar_partitioned.colocated_dist_table_8999117 USING btree (measureid) + colocated_partitioned_table_2020_01_01_8999021 | CREATE UNIQUE INDEX colocated_partitioned_table_2020_01_01_pkey_8999021 ON citus_split_non_blocking_schema_columnar_partitioned.colocated_partitioned_table_2020_01_01_8999021 USING btree (measureid, eventdatetime) + colocated_partitioned_table_2020_01_01_8999121 | CREATE UNIQUE INDEX colocated_partitioned_table_2020_01_01_pkey_8999121 ON citus_split_non_blocking_schema_columnar_partitioned.colocated_partitioned_table_2020_01_01_8999121 USING btree (measureid, eventdatetime) + colocated_partitioned_table_8999019 | CREATE UNIQUE INDEX colocated_partitioned_table_pkey_8999019 ON ONLY citus_split_non_blocking_schema_columnar_partitioned.colocated_partitioned_table_8999019 USING btree (measureid, eventdatetime) + colocated_partitioned_table_8999119 | CREATE UNIQUE INDEX colocated_partitioned_table_pkey_8999119 ON ONLY citus_split_non_blocking_schema_columnar_partitioned.colocated_partitioned_table_8999119 USING btree (measureid, eventdatetime) + reference_table_8970011 | CREATE UNIQUE INDEX reference_table_pkey_8970011 ON citus_split_non_blocking_schema_columnar_partitioned.reference_table_8970011 USING btree (measureid) + sensors_2020_01_01_8999005 | CREATE INDEX index_on_child_8999005 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_2020_01_01_8999005 USING btree (lower((measure_data)::text)) + sensors_2020_01_01_8999005 | CREATE INDEX sensors_2020_01_01_lower_idx_8999005 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_2020_01_01_8999005 USING btree (lower((measureid)::text)) + sensors_2020_01_01_8999005 | CREATE UNIQUE INDEX sensors_2020_01_01_pkey_8999005 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_2020_01_01_8999005 USING btree (measureid, eventdatetime, measure_data) + sensors_2020_01_01_8999105 | CREATE INDEX index_on_child_8999105 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_2020_01_01_8999105 USING btree (lower((measure_data)::text)) + sensors_2020_01_01_8999105 | CREATE INDEX sensors_2020_01_01_lower_idx_8999105 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_2020_01_01_8999105 USING btree (lower((measureid)::text)) + sensors_2020_01_01_8999105 | CREATE UNIQUE INDEX sensors_2020_01_01_pkey_8999105 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_2020_01_01_8999105 USING btree (measureid, eventdatetime, measure_data) + sensors_8999001 | CREATE INDEX index_on_parent_8999001 ON ONLY citus_split_non_blocking_schema_columnar_partitioned.sensors_8999001 USING btree (lower((measureid)::text)) + sensors_8999001 | CREATE UNIQUE INDEX sensors_pkey_8999001 ON ONLY citus_split_non_blocking_schema_columnar_partitioned.sensors_8999001 USING btree (measureid, eventdatetime, measure_data) + sensors_8999101 | CREATE INDEX index_on_parent_8999101 ON ONLY citus_split_non_blocking_schema_columnar_partitioned.sensors_8999101 USING btree (lower((measureid)::text)) + sensors_8999101 | CREATE UNIQUE INDEX sensors_pkey_8999101 ON ONLY citus_split_non_blocking_schema_columnar_partitioned.sensors_8999101 USING btree (measureid, eventdatetime, measure_data) + sensors_news_8999007 | CREATE INDEX sensors_news_lower_idx_8999007 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_news_8999007 USING btree (lower((measureid)::text)) + sensors_news_8999007 | CREATE UNIQUE INDEX sensors_news_pkey_8999007 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_news_8999007 USING btree (measureid, eventdatetime, measure_data) + sensors_news_8999107 | CREATE INDEX sensors_news_lower_idx_8999107 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_news_8999107 USING btree (lower((measureid)::text)) + sensors_news_8999107 | CREATE UNIQUE INDEX sensors_news_pkey_8999107 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_news_8999107 USING btree (measureid, eventdatetime, measure_data) + sensors_old_8999003 | CREATE INDEX sensors_old_lower_idx_8999003 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_old_8999003 USING btree (lower((measureid)::text)) + sensors_old_8999003 | CREATE UNIQUE INDEX sensors_old_pkey_8999003 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_old_8999003 USING btree (measureid, eventdatetime, measure_data) + sensors_old_8999103 | CREATE INDEX sensors_old_lower_idx_8999103 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_old_8999103 USING btree (lower((measureid)::text)) + sensors_old_8999103 | CREATE UNIQUE INDEX sensors_old_pkey_8999103 ON citus_split_non_blocking_schema_columnar_partitioned.sensors_old_8999103 USING btree (measureid, eventdatetime, measure_data) + sensorscolumnar_2020_01_01_8999013 | CREATE INDEX index_on_child_columnar_8999013 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8999013 USING btree (lower((measure_data)::text)) + sensorscolumnar_2020_01_01_8999013 | CREATE INDEX sensorscolumnar_2020_01_01_lower_idx_8999013 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8999013 USING btree (lower((measureid)::text)) + sensorscolumnar_2020_01_01_8999013 | CREATE UNIQUE INDEX sensorscolumnar_2020_01_01_pkey_8999013 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8999013 USING btree (measureid, eventdatetime, measure_data) + sensorscolumnar_2020_01_01_8999113 | CREATE INDEX index_on_child_columnar_8999113 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8999113 USING btree (lower((measure_data)::text)) + sensorscolumnar_2020_01_01_8999113 | CREATE INDEX sensorscolumnar_2020_01_01_lower_idx_8999113 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8999113 USING btree (lower((measureid)::text)) + sensorscolumnar_2020_01_01_8999113 | CREATE UNIQUE INDEX sensorscolumnar_2020_01_01_pkey_8999113 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_2020_01_01_8999113 USING btree (measureid, eventdatetime, measure_data) + sensorscolumnar_8999009 | CREATE INDEX index_on_parent_columnar_8999009 ON ONLY citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_8999009 USING btree (lower((measureid)::text)) + sensorscolumnar_8999009 | CREATE UNIQUE INDEX sensorscolumnar_pkey_8999009 ON ONLY citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_8999009 USING btree (measureid, eventdatetime, measure_data) + sensorscolumnar_8999109 | CREATE INDEX index_on_parent_columnar_8999109 ON ONLY citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_8999109 USING btree (lower((measureid)::text)) + sensorscolumnar_8999109 | CREATE UNIQUE INDEX sensorscolumnar_pkey_8999109 ON ONLY citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_8999109 USING btree (measureid, eventdatetime, measure_data) + sensorscolumnar_news_8999015 | CREATE INDEX sensorscolumnar_news_lower_idx_8999015 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_news_8999015 USING btree (lower((measureid)::text)) + sensorscolumnar_news_8999015 | CREATE UNIQUE INDEX sensorscolumnar_news_pkey_8999015 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_news_8999015 USING btree (measureid, eventdatetime, measure_data) + sensorscolumnar_news_8999115 | CREATE INDEX sensorscolumnar_news_lower_idx_8999115 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_news_8999115 USING btree (lower((measureid)::text)) + sensorscolumnar_news_8999115 | CREATE UNIQUE INDEX sensorscolumnar_news_pkey_8999115 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_news_8999115 USING btree (measureid, eventdatetime, measure_data) + sensorscolumnar_old_8999011 | CREATE INDEX sensorscolumnar_old_lower_idx_8999011 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_old_8999011 USING btree (lower((measureid)::text)) + sensorscolumnar_old_8999011 | CREATE UNIQUE INDEX sensorscolumnar_old_pkey_8999011 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_old_8999011 USING btree (measureid, eventdatetime, measure_data) + sensorscolumnar_old_8999111 | CREATE INDEX sensorscolumnar_old_lower_idx_8999111 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_old_8999111 USING btree (lower((measureid)::text)) + sensorscolumnar_old_8999111 | CREATE UNIQUE INDEX sensorscolumnar_old_pkey_8999111 ON citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar_old_8999111 USING btree (measureid, eventdatetime, measure_data) (43 rows) SELECT stxname FROM pg_statistic_ext WHERE stxnamespace IN ( SELECT oid FROM pg_namespace - WHERE nspname IN ('citus_split_test_schema_columnar_partitioned') + WHERE nspname IN ('citus_split_non_blocking_schema_columnar_partitioned') ) ORDER BY stxname ASC; stxname @@ -802,11 +818,18 @@ SET citus.shard_replication_factor TO 1; -- END: Show the updated state on workers --BEGIN : Cleanup \c - postgres - :master_port - DROP SCHEMA "citus_split_test_schema_columnar_partitioned" CASCADE; + ALTER SYSTEM RESET citus.defer_shard_delete_interval; + SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + + DROP SCHEMA "citus_split_non_blocking_schema_columnar_partitioned" CASCADE; NOTICE: drop cascades to 5 other objects -DETAIL: drop cascades to table citus_split_test_schema_columnar_partitioned.sensors -drop cascades to table citus_split_test_schema_columnar_partitioned.sensorscolumnar -drop cascades to table citus_split_test_schema_columnar_partitioned.colocated_dist_table -drop cascades to table citus_split_test_schema_columnar_partitioned.colocated_partitioned_table -drop cascades to table citus_split_test_schema_columnar_partitioned.reference_table +DETAIL: drop cascades to table citus_split_non_blocking_schema_columnar_partitioned.sensors +drop cascades to table citus_split_non_blocking_schema_columnar_partitioned.sensorscolumnar +drop cascades to table citus_split_non_blocking_schema_columnar_partitioned.colocated_dist_table +drop cascades to table citus_split_non_blocking_schema_columnar_partitioned.colocated_partitioned_table +drop cascades to table citus_split_non_blocking_schema_columnar_partitioned.reference_table --END : Cleanup diff --git a/src/test/regress/expected/citus_non_blocking_split_shards.out b/src/test/regress/expected/citus_non_blocking_split_shards.out index ec4cc629f..59ea77f61 100644 --- a/src/test/regress/expected/citus_non_blocking_split_shards.out +++ b/src/test/regress/expected/citus_non_blocking_split_shards.out @@ -15,6 +15,14 @@ Here is a high level overview of test plan: 12. Show we allow Split with the shard transfer mode 'auto' if all colocated tables has replica identity. */ CREATE SCHEMA "citus_split_test_schema"; +-- Disable Deferred drop auto cleanup to avoid flaky tests. +ALTER SYSTEM SET citus.defer_shard_delete_interval TO -1; +SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + CREATE ROLE test_shard_split_role WITH LOGIN; GRANT USAGE, CREATE ON SCHEMA "citus_split_test_schema" TO test_shard_split_role; SET ROLE test_shard_split_role; @@ -227,6 +235,10 @@ SELECT pg_catalog.citus_split_shard_by_split_points( (1 row) +-- BEGIN: Perform deferred cleanup. +CALL pg_catalog.citus_cleanup_orphaned_resources(); +NOTICE: cleaned up 3 orphaned resources +-- END: Perform deferred cleanup. -- Perform 3 way split SELECT pg_catalog.citus_split_shard_by_split_points( 8981001, @@ -239,6 +251,10 @@ SELECT pg_catalog.citus_split_shard_by_split_points( (1 row) -- END : Split two shards : One with move and One without move. +-- BEGIN: Perform deferred cleanup. +CALL pg_catalog.citus_cleanup_orphaned_resources(); +NOTICE: cleaned up 3 orphaned resources +-- END: Perform deferred cleanup. -- BEGIN : Move a shard post split. SELECT citus_move_shard_placement(8981007, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode:='block_writes'); citus_move_shard_placement @@ -401,6 +417,10 @@ SELECT pg_catalog.citus_split_shard_by_split_points( (1 row) +-- BEGIN: Perform deferred cleanup. +CALL pg_catalog.citus_cleanup_orphaned_resources(); +NOTICE: cleaned up 3 orphaned resources +-- END: Perform deferred cleanup. SET search_path TO "citus_split_test_schema"; SELECT shard.shardid, logicalrelid, shardminvalue, shardmaxvalue, nodename, nodeport FROM pg_dist_shard AS shard @@ -450,6 +470,9 @@ SELECT pg_catalog.citus_split_shard_by_split_points( ERROR: cannot use logical replication to transfer shards of the relation table_no_rep_id since it doesn't have a REPLICA IDENTITY or PRIMARY KEY DETAIL: UPDATE and DELETE commands on the shard will error out during logical replication unless there is a REPLICA IDENTITY or PRIMARY KEY. HINT: If you wish to continue without a replica identity set the shard_transfer_mode to 'force_logical' or 'block_writes'. +-- BEGIN: Perform deferred cleanup. +CALL pg_catalog.citus_cleanup_orphaned_resources(); +-- END: Perform deferred cleanup. SELECT shard.shardid, logicalrelid, shardminvalue, shardmaxvalue, nodename, nodeport FROM pg_dist_shard AS shard INNER JOIN pg_dist_placement placement ON shard.shardid = placement.shardid @@ -495,6 +518,10 @@ SELECT pg_catalog.citus_split_shard_by_split_points( (1 row) +-- BEGIN: Perform deferred cleanup. +CALL pg_catalog.citus_cleanup_orphaned_resources(); +NOTICE: cleaned up 3 orphaned resources +-- END: Perform deferred cleanup. SELECT shard.shardid, logicalrelid, shardminvalue, shardmaxvalue, nodename, nodeport FROM pg_dist_shard AS shard INNER JOIN pg_dist_placement placement ON shard.shardid = placement.shardid @@ -550,6 +577,13 @@ SELECT COUNT(*) FROM colocated_dist_table; -- END: Validate Data Count --BEGIN : Cleanup \c - postgres - :master_port +ALTER SYSTEM RESET citus.defer_shard_delete_interval; +SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + DROP SCHEMA "citus_split_test_schema" CASCADE; NOTICE: drop cascades to 4 other objects DETAIL: drop cascades to table citus_split_test_schema.sensors diff --git a/src/test/regress/expected/citus_non_blocking_splits_no_deferred_drop.out b/src/test/regress/expected/citus_non_blocking_splits_no_deferred_drop.out new file mode 100644 index 000000000..4a66b5c48 --- /dev/null +++ b/src/test/regress/expected/citus_non_blocking_splits_no_deferred_drop.out @@ -0,0 +1,565 @@ +/* +This suite runs without deferred drop enabled. + +Citus Shard Split Test.The test is model similar to 'shard_move_constraints'. +Here is a high level overview of test plan: + 1. Create a table 'sensors' (ShardCount = 2) to be split. Add indexes and statistics on this table. + 2. Create two other tables: 'reference_table' and 'colocated_dist_table', co-located with sensors. + 3. Create Foreign key constraints between the two co-located distributed tables. + 4. Load data into the three tables. + 5. Move one of the shards for 'sensors' to test ShardMove -> Split. + 6. Trigger Split on both shards of 'sensors'. This will also split co-located tables. + 7. Move one of the split shard to test Split -> ShardMove. + 8. Split an already split shard second time on a different schema. + 9. Create a colocated table with no replica identity. + 10. Show we do not allow Split with the shard transfer mode 'auto' if any colocated table has no replica identity. + 11. Drop the colocated table with no replica identity. + 12. Show we allow Split with the shard transfer mode 'auto' if all colocated tables has replica identity. +*/ +CREATE SCHEMA "citus_split_test_schema_no_deferred_drop"; +SET citus.defer_drop_after_shard_split TO OFF; +CREATE ROLE test_shard_split_role_nodeferred_drop WITH LOGIN; +GRANT USAGE, CREATE ON SCHEMA "citus_split_test_schema_no_deferred_drop" TO test_shard_split_role_nodeferred_drop; +SET ROLE test_shard_split_role_nodeferred_drop; +SET search_path TO "citus_split_test_schema_no_deferred_drop"; +SET citus.next_shard_id TO 8981000; +SET citus.next_placement_id TO 8610000; +SET citus.shard_count TO 2; +SET citus.shard_replication_factor TO 1; +-- BEGIN: Create table to split, along with other co-located tables. Add indexes, statistics etc. +CREATE TABLE sensors( + measureid integer, + eventdatetime date, + measure_data jsonb, + meaure_quantity decimal(15, 2), + measure_status char(1), + measure_comment varchar(44), + PRIMARY KEY (measureid, eventdatetime, measure_data)); +CREATE INDEX index_on_sensors ON sensors(lower(measureid::text)); +ALTER INDEX index_on_sensors ALTER COLUMN 1 SET STATISTICS 1000; +CREATE INDEX hash_index_on_sensors ON sensors USING HASH((measure_data->'IsFailed')); +CREATE INDEX index_with_include_on_sensors ON sensors ((measure_data->'IsFailed')) INCLUDE (measure_data, eventdatetime, measure_status); +CREATE STATISTICS stats_on_sensors (dependencies) ON measureid, eventdatetime FROM sensors; +SELECT create_distributed_table('sensors', 'measureid', colocate_with:='none'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- END: Create table to split, along with other co-located tables. Add indexes, statistics etc. +-- BEGIN: Create co-located distributed and reference tables. +CREATE TABLE reference_table (measureid integer PRIMARY KEY); +SELECT create_reference_table('reference_table'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE colocated_dist_table (measureid integer PRIMARY KEY); +CLUSTER colocated_dist_table USING colocated_dist_table_pkey; +SELECT create_distributed_table('colocated_dist_table', 'measureid', colocate_with:='sensors'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE table_with_index_rep_identity(key int NOT NULL); +CREATE UNIQUE INDEX uqx ON table_with_index_rep_identity(key); +ALTER TABLE table_with_index_rep_identity REPLICA IDENTITY USING INDEX uqx; +CLUSTER table_with_index_rep_identity USING uqx; +SELECT create_distributed_table('table_with_index_rep_identity', 'key', colocate_with:='sensors'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- END: Create co-located distributed and reference tables. +-- BEGIN : Create Foreign key constraints. +ALTER TABLE sensors ADD CONSTRAINT fkey_table_to_dist FOREIGN KEY (measureid) REFERENCES colocated_dist_table(measureid); +-- END : Create Foreign key constraints. +-- BEGIN : Load data into tables. +INSERT INTO reference_table SELECT i FROM generate_series(0,1000)i; +INSERT INTO colocated_dist_table SELECT i FROM generate_series(0,1000)i; +INSERT INTO sensors SELECT i, '2020-01-05', '{}', 11011.10, 'A', 'I <3 Citus' FROM generate_series(0,1000)i; +SELECT COUNT(*) FROM sensors; + count +--------------------------------------------------------------------- + 1001 +(1 row) + +SELECT COUNT(*) FROM reference_table; + count +--------------------------------------------------------------------- + 1001 +(1 row) + +SELECT COUNT(*) FROM colocated_dist_table; + count +--------------------------------------------------------------------- + 1001 +(1 row) + +-- END: Load data into tables. +-- BEGIN : Display current state. +SELECT shard.shardid, logicalrelid, shardminvalue, shardmaxvalue, nodename, nodeport + FROM pg_dist_shard AS shard + INNER JOIN pg_dist_placement placement ON shard.shardid = placement.shardid + INNER JOIN pg_dist_node node ON placement.groupid = node.groupid + INNER JOIN pg_catalog.pg_class cls ON shard.logicalrelid = cls.oid + WHERE node.noderole = 'primary' AND (logicalrelid = 'sensors'::regclass OR logicalrelid = 'colocated_dist_table'::regclass OR logicalrelid = 'table_with_index_rep_identity'::regclass) + ORDER BY logicalrelid, shardminvalue::BIGINT; + shardid | logicalrelid | shardminvalue | shardmaxvalue | nodename | nodeport +--------------------------------------------------------------------- + 8981000 | sensors | -2147483648 | -1 | localhost | 57637 + 8981001 | sensors | 0 | 2147483647 | localhost | 57638 + 8981003 | colocated_dist_table | -2147483648 | -1 | localhost | 57637 + 8981004 | colocated_dist_table | 0 | 2147483647 | localhost | 57638 + 8981005 | table_with_index_rep_identity | -2147483648 | -1 | localhost | 57637 + 8981006 | table_with_index_rep_identity | 0 | 2147483647 | localhost | 57638 +(6 rows) + +\c - - - :worker_1_port + SET search_path TO "citus_split_test_schema_no_deferred_drop", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + SELECT tbl.relname, fk."Constraint", fk."Definition" + FROM pg_catalog.pg_class tbl + JOIN public.table_fkeys fk on tbl.oid = fk.relid + WHERE tbl.relname like 'sensors_%' + ORDER BY 1, 2; + relname | Constraint | Definition +--------------------------------------------------------------------- + sensors_8981000 | fkey_table_to_dist_8981000 | FOREIGN KEY (measureid) REFERENCES colocated_dist_table_8981003(measureid) +(1 row) + + SELECT tablename, indexdef FROM pg_indexes WHERE tablename like 'sensors_%' ORDER BY 1,2; + tablename | indexdef +--------------------------------------------------------------------- + sensors_8981000 | CREATE INDEX hash_index_on_sensors_8981000 ON citus_split_test_schema_no_deferred_drop.sensors_8981000 USING hash (((measure_data -> 'IsFailed'::text))) + sensors_8981000 | CREATE INDEX index_on_sensors_8981000 ON citus_split_test_schema_no_deferred_drop.sensors_8981000 USING btree (lower((measureid)::text)) + sensors_8981000 | CREATE INDEX index_with_include_on_sensors_8981000 ON citus_split_test_schema_no_deferred_drop.sensors_8981000 USING btree (((measure_data -> 'IsFailed'::text))) INCLUDE (measure_data, eventdatetime, measure_status) + sensors_8981000 | CREATE UNIQUE INDEX sensors_pkey_8981000 ON citus_split_test_schema_no_deferred_drop.sensors_8981000 USING btree (measureid, eventdatetime, measure_data) +(4 rows) + + SELECT tablename, indexdef FROM pg_indexes WHERE tablename like 'table_with_index_rep_identity_%' ORDER BY 1,2; + tablename | indexdef +--------------------------------------------------------------------- + table_with_index_rep_identity_8981005 | CREATE UNIQUE INDEX uqx_8981005 ON citus_split_test_schema_no_deferred_drop.table_with_index_rep_identity_8981005 USING btree (key) +(1 row) + + SELECT stxname FROM pg_statistic_ext + WHERE stxnamespace IN ( + SELECT oid + FROM pg_namespace + WHERE nspname IN ('citus_split_test_schema_no_deferred_drop') + ) + ORDER BY stxname ASC; + stxname +--------------------------------------------------------------------- + stats_on_sensors + stats_on_sensors_8981000 +(2 rows) + +\c - - - :worker_2_port + SET search_path TO "citus_split_test_schema_no_deferred_drop", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + SELECT tbl.relname, fk."Constraint", fk."Definition" + FROM pg_catalog.pg_class tbl + JOIN public.table_fkeys fk on tbl.oid = fk.relid + WHERE tbl.relname like 'sensors_%' + ORDER BY 1, 2; + relname | Constraint | Definition +--------------------------------------------------------------------- + sensors_8981001 | fkey_table_to_dist_8981001 | FOREIGN KEY (measureid) REFERENCES colocated_dist_table_8981004(measureid) +(1 row) + + SELECT tablename, indexdef FROM pg_indexes WHERE tablename like 'sensors_%' ORDER BY 1,2; + tablename | indexdef +--------------------------------------------------------------------- + sensors_8981001 | CREATE INDEX hash_index_on_sensors_8981001 ON citus_split_test_schema_no_deferred_drop.sensors_8981001 USING hash (((measure_data -> 'IsFailed'::text))) + sensors_8981001 | CREATE INDEX index_on_sensors_8981001 ON citus_split_test_schema_no_deferred_drop.sensors_8981001 USING btree (lower((measureid)::text)) + sensors_8981001 | CREATE INDEX index_with_include_on_sensors_8981001 ON citus_split_test_schema_no_deferred_drop.sensors_8981001 USING btree (((measure_data -> 'IsFailed'::text))) INCLUDE (measure_data, eventdatetime, measure_status) + sensors_8981001 | CREATE UNIQUE INDEX sensors_pkey_8981001 ON citus_split_test_schema_no_deferred_drop.sensors_8981001 USING btree (measureid, eventdatetime, measure_data) +(4 rows) + + SELECT tablename, indexdef FROM pg_indexes WHERE tablename like 'table_with_index_rep_identity_%' ORDER BY 1,2; + tablename | indexdef +--------------------------------------------------------------------- + table_with_index_rep_identity_8981006 | CREATE UNIQUE INDEX uqx_8981006 ON citus_split_test_schema_no_deferred_drop.table_with_index_rep_identity_8981006 USING btree (key) +(1 row) + + SELECT stxname FROM pg_statistic_ext + WHERE stxnamespace IN ( + SELECT oid + FROM pg_namespace + WHERE nspname IN ('citus_split_test_schema_no_deferred_drop') + ) + ORDER BY stxname ASC; + stxname +--------------------------------------------------------------------- + stats_on_sensors + stats_on_sensors_8981001 +(2 rows) + +-- END : Display current state +-- BEGIN : Move one shard before we split it. +\c - postgres - :master_port +SET citus.defer_drop_after_shard_split TO OFF; +SET ROLE test_shard_split_role_nodeferred_drop; +SET search_path TO "citus_split_test_schema_no_deferred_drop"; +SET citus.next_shard_id TO 8981007; +SET citus.defer_drop_after_shard_move TO OFF; +SELECT citus_move_shard_placement(8981000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode:='force_logical'); + citus_move_shard_placement +--------------------------------------------------------------------- + +(1 row) + +-- END : Move one shard before we split it. +-- BEGIN : Set node id variables +SELECT nodeid AS worker_1_node FROM pg_dist_node WHERE nodeport=:worker_1_port \gset +SELECT nodeid AS worker_2_node FROM pg_dist_node WHERE nodeport=:worker_2_port \gset +-- END : Set node id variables +-- BEGIN : Split two shards : One with move and One without move. +-- Perform 2 way split +SELECT pg_catalog.citus_split_shard_by_split_points( + 8981000, + ARRAY['-1073741824'], + ARRAY[:worker_1_node, :worker_2_node], + 'force_logical'); + citus_split_shard_by_split_points +--------------------------------------------------------------------- + +(1 row) + +-- Perform 3 way split +SELECT pg_catalog.citus_split_shard_by_split_points( + 8981001, + ARRAY['536870911', '1610612735'], + ARRAY[:worker_1_node, :worker_1_node, :worker_2_node], + 'force_logical'); + citus_split_shard_by_split_points +--------------------------------------------------------------------- + +(1 row) + +-- END : Split two shards : One with move and One without move. +-- BEGIN : Move a shard post split. +SELECT citus_move_shard_placement(8981007, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode:='block_writes'); + citus_move_shard_placement +--------------------------------------------------------------------- + +(1 row) + +-- END : Move a shard post split. +-- BEGIN : Display current state. +SELECT shard.shardid, logicalrelid, shardminvalue, shardmaxvalue, nodename, nodeport + FROM pg_dist_shard AS shard + INNER JOIN pg_dist_placement placement ON shard.shardid = placement.shardid + INNER JOIN pg_dist_node node ON placement.groupid = node.groupid + INNER JOIN pg_catalog.pg_class cls ON shard.logicalrelid = cls.oid + WHERE node.noderole = 'primary' AND (logicalrelid = 'sensors'::regclass OR logicalrelid = 'colocated_dist_table'::regclass OR logicalrelid = 'table_with_index_rep_identity'::regclass) + ORDER BY logicalrelid, shardminvalue::BIGINT; + shardid | logicalrelid | shardminvalue | shardmaxvalue | nodename | nodeport +--------------------------------------------------------------------- + 8981007 | sensors | -2147483648 | -1073741824 | localhost | 57638 + 8981008 | sensors | -1073741823 | -1 | localhost | 57638 + 8981013 | sensors | 0 | 536870911 | localhost | 57637 + 8981014 | sensors | 536870912 | 1610612735 | localhost | 57637 + 8981015 | sensors | 1610612736 | 2147483647 | localhost | 57638 + 8981009 | colocated_dist_table | -2147483648 | -1073741824 | localhost | 57638 + 8981010 | colocated_dist_table | -1073741823 | -1 | localhost | 57638 + 8981016 | colocated_dist_table | 0 | 536870911 | localhost | 57637 + 8981017 | colocated_dist_table | 536870912 | 1610612735 | localhost | 57637 + 8981018 | colocated_dist_table | 1610612736 | 2147483647 | localhost | 57638 + 8981011 | table_with_index_rep_identity | -2147483648 | -1073741824 | localhost | 57638 + 8981012 | table_with_index_rep_identity | -1073741823 | -1 | localhost | 57638 + 8981019 | table_with_index_rep_identity | 0 | 536870911 | localhost | 57637 + 8981020 | table_with_index_rep_identity | 536870912 | 1610612735 | localhost | 57637 + 8981021 | table_with_index_rep_identity | 1610612736 | 2147483647 | localhost | 57638 +(15 rows) + +\c - - - :worker_1_port + SET search_path TO "citus_split_test_schema_no_deferred_drop", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + SELECT tbl.relname, fk."Constraint", fk."Definition" + FROM pg_catalog.pg_class tbl + JOIN public.table_fkeys fk on tbl.oid = fk.relid + WHERE tbl.relname like 'sensors_%' + ORDER BY 1, 2; + relname | Constraint | Definition +--------------------------------------------------------------------- + sensors_8981013 | fkey_table_to_dist_8981013 | FOREIGN KEY (measureid) REFERENCES colocated_dist_table_8981016(measureid) + sensors_8981014 | fkey_table_to_dist_8981014 | FOREIGN KEY (measureid) REFERENCES colocated_dist_table_8981017(measureid) +(2 rows) + + SELECT tablename, indexdef FROM pg_indexes WHERE tablename like 'sensors_%' ORDER BY 1,2; + tablename | indexdef +--------------------------------------------------------------------- + sensors_8981013 | CREATE INDEX hash_index_on_sensors_8981013 ON citus_split_test_schema_no_deferred_drop.sensors_8981013 USING hash (((measure_data -> 'IsFailed'::text))) + sensors_8981013 | CREATE INDEX index_on_sensors_8981013 ON citus_split_test_schema_no_deferred_drop.sensors_8981013 USING btree (lower((measureid)::text)) + sensors_8981013 | CREATE INDEX index_with_include_on_sensors_8981013 ON citus_split_test_schema_no_deferred_drop.sensors_8981013 USING btree (((measure_data -> 'IsFailed'::text))) INCLUDE (measure_data, eventdatetime, measure_status) + sensors_8981013 | CREATE UNIQUE INDEX sensors_pkey_8981013 ON citus_split_test_schema_no_deferred_drop.sensors_8981013 USING btree (measureid, eventdatetime, measure_data) + sensors_8981014 | CREATE INDEX hash_index_on_sensors_8981014 ON citus_split_test_schema_no_deferred_drop.sensors_8981014 USING hash (((measure_data -> 'IsFailed'::text))) + sensors_8981014 | CREATE INDEX index_on_sensors_8981014 ON citus_split_test_schema_no_deferred_drop.sensors_8981014 USING btree (lower((measureid)::text)) + sensors_8981014 | CREATE INDEX index_with_include_on_sensors_8981014 ON citus_split_test_schema_no_deferred_drop.sensors_8981014 USING btree (((measure_data -> 'IsFailed'::text))) INCLUDE (measure_data, eventdatetime, measure_status) + sensors_8981014 | CREATE UNIQUE INDEX sensors_pkey_8981014 ON citus_split_test_schema_no_deferred_drop.sensors_8981014 USING btree (measureid, eventdatetime, measure_data) +(8 rows) + + SELECT tablename, indexdef FROM pg_indexes WHERE tablename like 'table_with_index_rep_identity_%' ORDER BY 1,2; + tablename | indexdef +--------------------------------------------------------------------- + table_with_index_rep_identity_8981019 | CREATE UNIQUE INDEX uqx_8981019 ON citus_split_test_schema_no_deferred_drop.table_with_index_rep_identity_8981019 USING btree (key) + table_with_index_rep_identity_8981020 | CREATE UNIQUE INDEX uqx_8981020 ON citus_split_test_schema_no_deferred_drop.table_with_index_rep_identity_8981020 USING btree (key) +(2 rows) + + SELECT stxname FROM pg_statistic_ext + WHERE stxnamespace IN ( + SELECT oid + FROM pg_namespace + WHERE nspname IN ('citus_split_test_schema_no_deferred_drop') + ) + ORDER BY stxname ASC; + stxname +--------------------------------------------------------------------- + stats_on_sensors + stats_on_sensors_8981013 + stats_on_sensors_8981014 +(3 rows) + +\c - - - :worker_2_port + SET search_path TO "citus_split_test_schema_no_deferred_drop", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + SELECT tbl.relname, fk."Constraint", fk."Definition" + FROM pg_catalog.pg_class tbl + JOIN public.table_fkeys fk on tbl.oid = fk.relid + WHERE tbl.relname like 'sensors_%' + ORDER BY 1, 2; + relname | Constraint | Definition +--------------------------------------------------------------------- + sensors_8981007 | fkey_table_to_dist_8981007 | FOREIGN KEY (measureid) REFERENCES colocated_dist_table_8981009(measureid) + sensors_8981008 | fkey_table_to_dist_8981008 | FOREIGN KEY (measureid) REFERENCES colocated_dist_table_8981010(measureid) + sensors_8981015 | fkey_table_to_dist_8981015 | FOREIGN KEY (measureid) REFERENCES colocated_dist_table_8981018(measureid) +(3 rows) + + SELECT tablename, indexdef FROM pg_indexes WHERE tablename like 'sensors_%' ORDER BY 1,2; + tablename | indexdef +--------------------------------------------------------------------- + sensors_8981007 | CREATE INDEX hash_index_on_sensors_8981007 ON citus_split_test_schema_no_deferred_drop.sensors_8981007 USING hash (((measure_data -> 'IsFailed'::text))) + sensors_8981007 | CREATE INDEX index_on_sensors_8981007 ON citus_split_test_schema_no_deferred_drop.sensors_8981007 USING btree (lower((measureid)::text)) + sensors_8981007 | CREATE INDEX index_with_include_on_sensors_8981007 ON citus_split_test_schema_no_deferred_drop.sensors_8981007 USING btree (((measure_data -> 'IsFailed'::text))) INCLUDE (measure_data, eventdatetime, measure_status) + sensors_8981007 | CREATE UNIQUE INDEX sensors_pkey_8981007 ON citus_split_test_schema_no_deferred_drop.sensors_8981007 USING btree (measureid, eventdatetime, measure_data) + sensors_8981008 | CREATE INDEX hash_index_on_sensors_8981008 ON citus_split_test_schema_no_deferred_drop.sensors_8981008 USING hash (((measure_data -> 'IsFailed'::text))) + sensors_8981008 | CREATE INDEX index_on_sensors_8981008 ON citus_split_test_schema_no_deferred_drop.sensors_8981008 USING btree (lower((measureid)::text)) + sensors_8981008 | CREATE INDEX index_with_include_on_sensors_8981008 ON citus_split_test_schema_no_deferred_drop.sensors_8981008 USING btree (((measure_data -> 'IsFailed'::text))) INCLUDE (measure_data, eventdatetime, measure_status) + sensors_8981008 | CREATE UNIQUE INDEX sensors_pkey_8981008 ON citus_split_test_schema_no_deferred_drop.sensors_8981008 USING btree (measureid, eventdatetime, measure_data) + sensors_8981015 | CREATE INDEX hash_index_on_sensors_8981015 ON citus_split_test_schema_no_deferred_drop.sensors_8981015 USING hash (((measure_data -> 'IsFailed'::text))) + sensors_8981015 | CREATE INDEX index_on_sensors_8981015 ON citus_split_test_schema_no_deferred_drop.sensors_8981015 USING btree (lower((measureid)::text)) + sensors_8981015 | CREATE INDEX index_with_include_on_sensors_8981015 ON citus_split_test_schema_no_deferred_drop.sensors_8981015 USING btree (((measure_data -> 'IsFailed'::text))) INCLUDE (measure_data, eventdatetime, measure_status) + sensors_8981015 | CREATE UNIQUE INDEX sensors_pkey_8981015 ON citus_split_test_schema_no_deferred_drop.sensors_8981015 USING btree (measureid, eventdatetime, measure_data) +(12 rows) + + SELECT tablename, indexdef FROM pg_indexes WHERE tablename like 'table_with_index_rep_identity_%' ORDER BY 1,2; + tablename | indexdef +--------------------------------------------------------------------- + table_with_index_rep_identity_8981011 | CREATE UNIQUE INDEX uqx_8981011 ON citus_split_test_schema_no_deferred_drop.table_with_index_rep_identity_8981011 USING btree (key) + table_with_index_rep_identity_8981012 | CREATE UNIQUE INDEX uqx_8981012 ON citus_split_test_schema_no_deferred_drop.table_with_index_rep_identity_8981012 USING btree (key) + table_with_index_rep_identity_8981021 | CREATE UNIQUE INDEX uqx_8981021 ON citus_split_test_schema_no_deferred_drop.table_with_index_rep_identity_8981021 USING btree (key) +(3 rows) + + SELECT stxname FROM pg_statistic_ext + WHERE stxnamespace IN ( + SELECT oid + FROM pg_namespace + WHERE nspname IN ('citus_split_test_schema_no_deferred_drop') + ) + ORDER BY stxname ASC; + stxname +--------------------------------------------------------------------- + stats_on_sensors + stats_on_sensors_8981007 + stats_on_sensors_8981008 + stats_on_sensors_8981015 +(4 rows) + +-- END : Display current state +-- BEGIN: Should be able to change/drop constraints +\c - postgres - :master_port +SET citus.defer_drop_after_shard_split TO OFF; +SET ROLE test_shard_split_role_nodeferred_drop; +SET search_path TO "citus_split_test_schema_no_deferred_drop"; +ALTER INDEX index_on_sensors RENAME TO index_on_sensors_renamed; +ALTER INDEX index_on_sensors_renamed ALTER COLUMN 1 SET STATISTICS 200; +DROP STATISTICS stats_on_sensors; +DROP INDEX index_on_sensors_renamed; +ALTER TABLE sensors DROP CONSTRAINT fkey_table_to_dist; +-- END: Should be able to change/drop constraints +-- BEGIN: Split second time on another schema +SET search_path TO public; +SET citus.next_shard_id TO 8981031; +SELECT pg_catalog.citus_split_shard_by_split_points( + 8981007, + ARRAY['-2100000000'], + ARRAY[:worker_1_node, :worker_2_node], + 'force_logical'); + citus_split_shard_by_split_points +--------------------------------------------------------------------- + +(1 row) + +SET search_path TO "citus_split_test_schema_no_deferred_drop"; +SELECT shard.shardid, logicalrelid, shardminvalue, shardmaxvalue, nodename, nodeport + FROM pg_dist_shard AS shard + INNER JOIN pg_dist_placement placement ON shard.shardid = placement.shardid + INNER JOIN pg_dist_node node ON placement.groupid = node.groupid + INNER JOIN pg_catalog.pg_class cls ON shard.logicalrelid = cls.oid + WHERE node.noderole = 'primary' AND (logicalrelid = 'sensors'::regclass OR logicalrelid = 'colocated_dist_table'::regclass OR logicalrelid = 'table_with_index_rep_identity'::regclass) + ORDER BY logicalrelid, shardminvalue::BIGINT; + shardid | logicalrelid | shardminvalue | shardmaxvalue | nodename | nodeport +--------------------------------------------------------------------- + 8981031 | sensors | -2147483648 | -2100000000 | localhost | 57637 + 8981032 | sensors | -2099999999 | -1073741824 | localhost | 57638 + 8981008 | sensors | -1073741823 | -1 | localhost | 57638 + 8981013 | sensors | 0 | 536870911 | localhost | 57637 + 8981014 | sensors | 536870912 | 1610612735 | localhost | 57637 + 8981015 | sensors | 1610612736 | 2147483647 | localhost | 57638 + 8981033 | colocated_dist_table | -2147483648 | -2100000000 | localhost | 57637 + 8981034 | colocated_dist_table | -2099999999 | -1073741824 | localhost | 57638 + 8981010 | colocated_dist_table | -1073741823 | -1 | localhost | 57638 + 8981016 | colocated_dist_table | 0 | 536870911 | localhost | 57637 + 8981017 | colocated_dist_table | 536870912 | 1610612735 | localhost | 57637 + 8981018 | colocated_dist_table | 1610612736 | 2147483647 | localhost | 57638 + 8981035 | table_with_index_rep_identity | -2147483648 | -2100000000 | localhost | 57637 + 8981036 | table_with_index_rep_identity | -2099999999 | -1073741824 | localhost | 57638 + 8981012 | table_with_index_rep_identity | -1073741823 | -1 | localhost | 57638 + 8981019 | table_with_index_rep_identity | 0 | 536870911 | localhost | 57637 + 8981020 | table_with_index_rep_identity | 536870912 | 1610612735 | localhost | 57637 + 8981021 | table_with_index_rep_identity | 1610612736 | 2147483647 | localhost | 57638 +(18 rows) + +-- END: Split second time on another schema +-- BEGIN: Create a co-located table with no replica identity. +CREATE TABLE table_no_rep_id (measureid integer); +SELECT create_distributed_table('table_no_rep_id', 'measureid', colocate_with:='sensors'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- END: Create a co-located table with no replica identity. +-- BEGIN: Split a shard with shard_transfer_mode='auto' and with a colocated table with no replica identity +SET citus.next_shard_id TO 8981041; +SELECT pg_catalog.citus_split_shard_by_split_points( + 8981031, + ARRAY['-2120000000'], + ARRAY[:worker_1_node, :worker_2_node]); +ERROR: cannot use logical replication to transfer shards of the relation table_no_rep_id since it doesn't have a REPLICA IDENTITY or PRIMARY KEY +DETAIL: UPDATE and DELETE commands on the shard will error out during logical replication unless there is a REPLICA IDENTITY or PRIMARY KEY. +HINT: If you wish to continue without a replica identity set the shard_transfer_mode to 'force_logical' or 'block_writes'. +SELECT shard.shardid, logicalrelid, shardminvalue, shardmaxvalue, nodename, nodeport + FROM pg_dist_shard AS shard + INNER JOIN pg_dist_placement placement ON shard.shardid = placement.shardid + INNER JOIN pg_dist_node node ON placement.groupid = node.groupid + INNER JOIN pg_catalog.pg_class cls ON shard.logicalrelid = cls.oid + WHERE node.noderole = 'primary' AND (logicalrelid = 'sensors'::regclass OR logicalrelid = 'colocated_dist_table'::regclass OR logicalrelid = 'table_with_index_rep_identity'::regclass) + ORDER BY logicalrelid, shardminvalue::BIGINT; + shardid | logicalrelid | shardminvalue | shardmaxvalue | nodename | nodeport +--------------------------------------------------------------------- + 8981031 | sensors | -2147483648 | -2100000000 | localhost | 57637 + 8981032 | sensors | -2099999999 | -1073741824 | localhost | 57638 + 8981008 | sensors | -1073741823 | -1 | localhost | 57638 + 8981013 | sensors | 0 | 536870911 | localhost | 57637 + 8981014 | sensors | 536870912 | 1610612735 | localhost | 57637 + 8981015 | sensors | 1610612736 | 2147483647 | localhost | 57638 + 8981033 | colocated_dist_table | -2147483648 | -2100000000 | localhost | 57637 + 8981034 | colocated_dist_table | -2099999999 | -1073741824 | localhost | 57638 + 8981010 | colocated_dist_table | -1073741823 | -1 | localhost | 57638 + 8981016 | colocated_dist_table | 0 | 536870911 | localhost | 57637 + 8981017 | colocated_dist_table | 536870912 | 1610612735 | localhost | 57637 + 8981018 | colocated_dist_table | 1610612736 | 2147483647 | localhost | 57638 + 8981035 | table_with_index_rep_identity | -2147483648 | -2100000000 | localhost | 57637 + 8981036 | table_with_index_rep_identity | -2099999999 | -1073741824 | localhost | 57638 + 8981012 | table_with_index_rep_identity | -1073741823 | -1 | localhost | 57638 + 8981019 | table_with_index_rep_identity | 0 | 536870911 | localhost | 57637 + 8981020 | table_with_index_rep_identity | 536870912 | 1610612735 | localhost | 57637 + 8981021 | table_with_index_rep_identity | 1610612736 | 2147483647 | localhost | 57638 +(18 rows) + +-- END: Split a shard with shard_transfer_mode='auto' and with a colocated table with no replica identity +-- BEGIN: Drop the co-located table with no replica identity. +DROP TABLE table_no_rep_id; +-- END: Drop the co-located table with no replica identity. +-- BEGIN: Split a shard with shard_transfer_mode='auto' and with all colocated tables has replica identity +SET citus.next_shard_id TO 8981041; +SELECT pg_catalog.citus_split_shard_by_split_points( + 8981031, + ARRAY['-2120000000'], + ARRAY[:worker_1_node, :worker_2_node], + 'auto'); + citus_split_shard_by_split_points +--------------------------------------------------------------------- + +(1 row) + +SELECT shard.shardid, logicalrelid, shardminvalue, shardmaxvalue, nodename, nodeport + FROM pg_dist_shard AS shard + INNER JOIN pg_dist_placement placement ON shard.shardid = placement.shardid + INNER JOIN pg_dist_node node ON placement.groupid = node.groupid + INNER JOIN pg_catalog.pg_class cls ON shard.logicalrelid = cls.oid + WHERE node.noderole = 'primary' AND (logicalrelid = 'sensors'::regclass OR logicalrelid = 'colocated_dist_table'::regclass OR logicalrelid = 'table_with_index_rep_identity'::regclass) + ORDER BY logicalrelid, shardminvalue::BIGINT; + shardid | logicalrelid | shardminvalue | shardmaxvalue | nodename | nodeport +--------------------------------------------------------------------- + 8981041 | sensors | -2147483648 | -2120000000 | localhost | 57637 + 8981042 | sensors | -2119999999 | -2100000000 | localhost | 57638 + 8981032 | sensors | -2099999999 | -1073741824 | localhost | 57638 + 8981008 | sensors | -1073741823 | -1 | localhost | 57638 + 8981013 | sensors | 0 | 536870911 | localhost | 57637 + 8981014 | sensors | 536870912 | 1610612735 | localhost | 57637 + 8981015 | sensors | 1610612736 | 2147483647 | localhost | 57638 + 8981043 | colocated_dist_table | -2147483648 | -2120000000 | localhost | 57637 + 8981044 | colocated_dist_table | -2119999999 | -2100000000 | localhost | 57638 + 8981034 | colocated_dist_table | -2099999999 | -1073741824 | localhost | 57638 + 8981010 | colocated_dist_table | -1073741823 | -1 | localhost | 57638 + 8981016 | colocated_dist_table | 0 | 536870911 | localhost | 57637 + 8981017 | colocated_dist_table | 536870912 | 1610612735 | localhost | 57637 + 8981018 | colocated_dist_table | 1610612736 | 2147483647 | localhost | 57638 + 8981045 | table_with_index_rep_identity | -2147483648 | -2120000000 | localhost | 57637 + 8981046 | table_with_index_rep_identity | -2119999999 | -2100000000 | localhost | 57638 + 8981036 | table_with_index_rep_identity | -2099999999 | -1073741824 | localhost | 57638 + 8981012 | table_with_index_rep_identity | -1073741823 | -1 | localhost | 57638 + 8981019 | table_with_index_rep_identity | 0 | 536870911 | localhost | 57637 + 8981020 | table_with_index_rep_identity | 536870912 | 1610612735 | localhost | 57637 + 8981021 | table_with_index_rep_identity | 1610612736 | 2147483647 | localhost | 57638 +(21 rows) + +-- END: Split a shard with shard_transfer_mode='auto' and with all colocated tables has replica identity +-- BEGIN: Validate Data Count +SELECT COUNT(*) FROM sensors; + count +--------------------------------------------------------------------- + 1001 +(1 row) + +SELECT COUNT(*) FROM reference_table; + count +--------------------------------------------------------------------- + 1001 +(1 row) + +SELECT COUNT(*) FROM colocated_dist_table; + count +--------------------------------------------------------------------- + 1001 +(1 row) + +-- END: Validate Data Count +--BEGIN : Cleanup +\c - postgres - :master_port +DROP SCHEMA "citus_split_test_schema_no_deferred_drop" CASCADE; +NOTICE: drop cascades to 4 other objects +DETAIL: drop cascades to table citus_split_test_schema_no_deferred_drop.sensors +drop cascades to table citus_split_test_schema_no_deferred_drop.reference_table +drop cascades to table citus_split_test_schema_no_deferred_drop.colocated_dist_table +drop cascades to table citus_split_test_schema_no_deferred_drop.table_with_index_rep_identity +SET citus.defer_drop_after_shard_split TO ON; +--END : Cleanup diff --git a/src/test/regress/expected/citus_split_shard_by_split_points.out b/src/test/regress/expected/citus_split_shard_by_split_points.out index 743996160..599864aac 100644 --- a/src/test/regress/expected/citus_split_shard_by_split_points.out +++ b/src/test/regress/expected/citus_split_shard_by_split_points.out @@ -11,6 +11,14 @@ Here is a high level overview of test plan: 8. Split an already split shard second time on a different schema. */ CREATE SCHEMA "citus_split_test_schema"; +-- Disable Deferred drop auto cleanup to avoid flaky tests. +ALTER SYSTEM SET citus.defer_shard_delete_interval TO -1; +SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + CREATE ROLE test_split_role WITH LOGIN; GRANT USAGE, CREATE ON SCHEMA "citus_split_test_schema" TO test_split_role; SET ROLE test_split_role; @@ -223,6 +231,10 @@ SELECT pg_catalog.citus_split_shard_by_split_points( (1 row) +-- BEGIN: Perform deferred cleanup. +CALL pg_catalog.citus_cleanup_orphaned_resources(); +NOTICE: cleaned up 4 orphaned resources +-- END: Perform deferred cleanup. -- Perform 3 way split SELECT pg_catalog.citus_split_shard_by_split_points( 8981001, @@ -235,6 +247,10 @@ SELECT pg_catalog.citus_split_shard_by_split_points( (1 row) -- END : Split two shards : One with move and One without move. +-- BEGIN: Perform deferred cleanup. +CALL pg_catalog.citus_cleanup_orphaned_resources(); +NOTICE: cleaned up 3 orphaned resources +-- END: Perform deferred cleanup. -- BEGIN : Move a shard post split. SELECT citus_move_shard_placement(8981007, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode:='block_writes'); citus_move_shard_placement @@ -397,6 +413,10 @@ SELECT pg_catalog.citus_split_shard_by_split_points( (1 row) +-- BEGIN: Perform deferred cleanup. +CALL pg_catalog.citus_cleanup_orphaned_resources(); +NOTICE: cleaned up 3 orphaned resources +-- END: Perform deferred cleanup. SET search_path TO "citus_split_test_schema"; SELECT shard.shardid, logicalrelid, shardminvalue, shardmaxvalue, nodename, nodeport FROM pg_dist_shard AS shard @@ -450,6 +470,13 @@ SELECT COUNT(*) FROM colocated_dist_table; -- END: Validate Data Count --BEGIN : Cleanup \c - postgres - :master_port +ALTER SYSTEM RESET citus.defer_shard_delete_interval; +SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + DROP SCHEMA "citus_split_test_schema" CASCADE; NOTICE: drop cascades to 4 other objects DETAIL: drop cascades to table citus_split_test_schema.sensors diff --git a/src/test/regress/expected/citus_split_shard_by_split_points_deferred_drop.out b/src/test/regress/expected/citus_split_shard_by_split_points_deferred_drop.out new file mode 100644 index 000000000..f0cd227b9 --- /dev/null +++ b/src/test/regress/expected/citus_split_shard_by_split_points_deferred_drop.out @@ -0,0 +1,132 @@ +CREATE SCHEMA "citus_split_shard_by_split_points_deferred_schema"; +CREATE ROLE test_split_role WITH LOGIN; +GRANT USAGE, CREATE ON SCHEMA "citus_split_shard_by_split_points_deferred_schema" TO test_split_role; +SET ROLE test_split_role; +SET search_path TO "citus_split_shard_by_split_points_deferred_schema"; +-- Valide user cannot insert directly to pg_dist_cleanup table but can select from it. +CREATE TABLE temp_table (id INT); +INSERT INTO pg_catalog.pg_dist_cleanup (operation_id, object_type, object_name, node_group_id, policy_type) + VALUES (3134, 1, 'citus_split_shard_by_split_points_deferred_schema.temp_table', 1, 1); +ERROR: permission denied for table pg_dist_cleanup +SELECT * from pg_dist_cleanup; + record_id | operation_id | object_type | object_name | node_group_id | policy_type +--------------------------------------------------------------------- +(0 rows) + +-- Set a very long(10mins) time interval to stop auto cleanup in case of deferred drop. +\c - postgres - :master_port +ALTER SYSTEM SET citus.defer_shard_delete_interval TO 600000; +SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +-- Perform a split and validate shard is marked for deferred drop. +SET citus.next_shard_id TO 8981000; +SET citus.next_placement_id TO 8610000; +SET citus.shard_count TO 2; +SET citus.shard_replication_factor TO 1; +SET citus.next_operation_id TO 777; +SET citus.next_cleanup_record_id TO 11; +SET ROLE test_split_role; +CREATE TABLE table_to_split(id int PRIMARY KEY, int_data int, data text); +SELECT create_distributed_table('table_to_split', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT nodeid AS worker_1_node FROM pg_dist_node WHERE nodeport=:worker_1_port \gset +SELECT nodeid AS worker_2_node FROM pg_dist_node WHERE nodeport=:worker_2_port \gset +SET citus.next_shard_id TO 9999000; +SET citus.next_placement_id TO 5555000; +SELECT pg_catalog.citus_split_shard_by_split_points( + 8981000, + ARRAY['-100000'], + ARRAY[:worker_1_node, :worker_2_node], + 'block_writes'); + citus_split_shard_by_split_points +--------------------------------------------------------------------- + +(1 row) + +SELECT pg_catalog.citus_split_shard_by_split_points( + 8981001, + ARRAY['100000'], + ARRAY[:worker_1_node, :worker_2_node], + 'force_logical'); + citus_split_shard_by_split_points +--------------------------------------------------------------------- + +(1 row) + +-- The original shards are marked for deferred drop with policy_type = 2. +SELECT * from pg_dist_cleanup; + record_id | operation_id | object_type | object_name | node_group_id | policy_type +--------------------------------------------------------------------- + 11 | 777 | 1 | public.table_to_split_8981000 | 14 | 2 + 12 | 778 | 1 | public.table_to_split_8981001 | 16 | 2 +(2 rows) + +-- The physical shards should not be deleted. +\c - - - :worker_1_port +SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r'; + relname +--------------------------------------------------------------------- + table_to_split_8981000 + table_to_split_9999000 + table_to_split_9999002 +(3 rows) + +\c - - - :worker_2_port +SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r'; + relname +--------------------------------------------------------------------- + table_to_split_8981001 + table_to_split_9999001 + table_to_split_9999003 +(3 rows) + +-- Set a very short(1ms) time interval to force deferred drop cleanup. +\c - postgres - :master_port +ALTER SYSTEM SET citus.defer_shard_delete_interval TO 1; +SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +-- Give enough time for the deferred drop cleanup to run. +SELECT pg_sleep(2); + pg_sleep +--------------------------------------------------------------------- + +(1 row) + +-- Clenaup has been done. +SELECT * from pg_dist_cleanup; + record_id | operation_id | object_type | object_name | node_group_id | policy_type +--------------------------------------------------------------------- +(0 rows) + +\c - - - :worker_1_port +SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r'; + relname +--------------------------------------------------------------------- + table_to_split_9999000 + table_to_split_9999002 +(2 rows) + +\c - - - :worker_2_port +SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r'; + relname +--------------------------------------------------------------------- + table_to_split_9999001 + table_to_split_9999003 +(2 rows) + +-- Test Cleanup +\c - postgres - :master_port +DROP SCHEMA "citus_split_shard_by_split_points_deferred_schema" CASCADE; +NOTICE: drop cascades to table citus_split_shard_by_split_points_deferred_schema.temp_table diff --git a/src/test/regress/expected/citus_split_shard_by_split_points_failure.out b/src/test/regress/expected/citus_split_shard_by_split_points_failure.out index 4ea61e03c..f54c24204 100644 --- a/src/test/regress/expected/citus_split_shard_by_split_points_failure.out +++ b/src/test/regress/expected/citus_split_shard_by_split_points_failure.out @@ -72,8 +72,7 @@ SELECT pg_catalog.citus_split_shard_by_split_points( ARRAY['-1073741824'], ARRAY[:worker_1_node, :worker_1_node], 'block_writes'); -ERROR: relation "sensors_8981002" already exists -CONTEXT: while executing command on localhost:xxxxx +ERROR: relation citus_split_failure_test_schema.sensors_8981002 already exists on worker localhost:xxxxx -- BEGIN : Split Shard, which is expected to fail. -- BEGIN : Ensure tables were cleaned from worker \c - - - :worker_1_port diff --git a/src/test/regress/expected/citus_split_shard_by_split_points_negative.out b/src/test/regress/expected/citus_split_shard_by_split_points_negative.out index 6fa2559a1..85b1fc3ee 100644 --- a/src/test/regress/expected/citus_split_shard_by_split_points_negative.out +++ b/src/test/regress/expected/citus_split_shard_by_split_points_negative.out @@ -128,3 +128,11 @@ SELECT citus_split_shard_by_split_points( ARRAY['-1073741826'], ARRAY[:worker_1_node, :worker_2_node]); ERROR: Operation split not supported for shard as replication factor '2' is greater than 1. +--BEGIN : Cleanup +\c - postgres - :master_port +DROP SCHEMA "citus_split_shard_by_split_points_negative" CASCADE; +NOTICE: drop cascades to 3 other objects +DETAIL: drop cascades to table citus_split_shard_by_split_points_negative.range_paritioned_table_to_split +drop cascades to table citus_split_shard_by_split_points_negative.table_to_split +drop cascades to table citus_split_shard_by_split_points_negative.table_to_split_replication_factor_2 +--END : Cleanup diff --git a/src/test/regress/expected/citus_split_shard_columnar_partitioned.out b/src/test/regress/expected/citus_split_shard_columnar_partitioned.out index f81768cbc..09fb12fd9 100644 --- a/src/test/regress/expected/citus_split_shard_columnar_partitioned.out +++ b/src/test/regress/expected/citus_split_shard_columnar_partitioned.out @@ -4,6 +4,14 @@ SET citus.next_shard_id TO 8970000; SET citus.next_placement_id TO 8770000; SET citus.shard_count TO 1; SET citus.shard_replication_factor TO 1; +-- Disable Deferred drop auto cleanup to avoid flaky tests. +ALTER SYSTEM SET citus.defer_shard_delete_interval TO -1; +SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + -- BEGIN: Create table to split, along with other co-located tables. Add indexes, statistics etc. CREATE TABLE sensors( measureid integer, @@ -265,6 +273,10 @@ SET citus.shard_replication_factor TO 1; (1 row) -- END: Split a shard along its co-located shards +-- BEGIN: Perform deferred cleanup. +CALL pg_catalog.citus_cleanup_orphaned_resources(); +NOTICE: cleaned up 11 orphaned resources +-- END: Perform deferred cleanup. -- BEGIN: Validate Shard Info and Data SELECT shard.shardid, logicalrelid, shardminvalue, shardmaxvalue, nodename, nodeport FROM pg_dist_shard AS shard @@ -512,6 +524,10 @@ SET citus.shard_replication_factor TO 1; (1 row) -- END: Split a partition table directly +-- BEGIN: Perform deferred cleanup. +CALL pg_catalog.citus_cleanup_orphaned_resources(); +NOTICE: cleaned up 11 orphaned resources +-- END: Perform deferred cleanup. -- BEGIN: Validate Shard Info and Data SELECT shard.shardid, logicalrelid, shardminvalue, shardmaxvalue, nodename, nodeport FROM pg_dist_shard AS shard @@ -802,6 +818,13 @@ SET citus.shard_replication_factor TO 1; -- END: Show the updated state on workers --BEGIN : Cleanup \c - postgres - :master_port + ALTER SYSTEM RESET citus.defer_shard_delete_interval; + SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + DROP SCHEMA "citus_split_test_schema_columnar_partitioned" CASCADE; NOTICE: drop cascades to 5 other objects DETAIL: drop cascades to table citus_split_test_schema_columnar_partitioned.sensors diff --git a/src/test/regress/expected/citus_split_shard_no_deferred_drop.out b/src/test/regress/expected/citus_split_shard_no_deferred_drop.out new file mode 100644 index 000000000..9bcae7cb0 --- /dev/null +++ b/src/test/regress/expected/citus_split_shard_no_deferred_drop.out @@ -0,0 +1,464 @@ +/* +This suite runs without deferred drop enabled. +Citus Shard Split Test.The test is model similar to 'shard_move_constraints'. +Here is a high level overview of test plan: + 1. Create a table 'sensors' (ShardCount = 2) to be split. Add indexes and statistics on this table. + 2. Create two other tables: 'reference_table' and 'colocated_dist_table', co-located with sensors. + 3. Create Foreign key constraints between the two co-located distributed tables. + 4. Load data into the three tables. + 5. Move one of the shards for 'sensors' to test ShardMove -> Split. + 6. Trigger Split on both shards of 'sensors'. This will also split co-located tables. + 7. Move one of the split shard to test Split -> ShardMove. + 8. Split an already split shard second time on a different schema. +*/ +CREATE SCHEMA "citus_split_test_schema_no_deferred_drop"; +SET citus.defer_drop_after_shard_split TO OFF; +CREATE ROLE test_split_deferred_role WITH LOGIN; +GRANT USAGE, CREATE ON SCHEMA "citus_split_test_schema_no_deferred_drop" TO test_split_deferred_role; +SET ROLE test_split_deferred_role; +SET search_path TO "citus_split_test_schema_no_deferred_drop"; +SET citus.next_shard_id TO 8981000; +SET citus.next_placement_id TO 8610000; +SET citus.shard_count TO 2; +SET citus.shard_replication_factor TO 1; +-- BEGIN: Create table to split, along with other co-located tables. Add indexes, statistics etc. +CREATE TABLE sensors( + measureid integer, + eventdatetime date, + measure_data jsonb, + meaure_quantity decimal(15, 2), + measure_status char(1), + measure_comment varchar(44), + PRIMARY KEY (measureid, eventdatetime, measure_data)); +CREATE INDEX index_on_sensors ON sensors(lower(measureid::text)); +ALTER INDEX index_on_sensors ALTER COLUMN 1 SET STATISTICS 1000; +CREATE INDEX hash_index_on_sensors ON sensors USING HASH((measure_data->'IsFailed')); +CREATE INDEX index_with_include_on_sensors ON sensors ((measure_data->'IsFailed')) INCLUDE (measure_data, eventdatetime, measure_status); +CREATE STATISTICS stats_on_sensors (dependencies) ON measureid, eventdatetime FROM sensors; +SELECT create_distributed_table('sensors', 'measureid', colocate_with:='none'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- END: Create table to split, along with other co-located tables. Add indexes, statistics etc. +-- BEGIN: Create co-located distributed and reference tables. +CREATE TABLE reference_table (measureid integer PRIMARY KEY); +SELECT create_reference_table('reference_table'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE colocated_dist_table (measureid integer PRIMARY KEY); +CLUSTER colocated_dist_table USING colocated_dist_table_pkey; +SELECT create_distributed_table('colocated_dist_table', 'measureid', colocate_with:='sensors'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE table_with_index_rep_identity(key int NOT NULL); +CREATE UNIQUE INDEX uqx ON table_with_index_rep_identity(key); +ALTER TABLE table_with_index_rep_identity REPLICA IDENTITY USING INDEX uqx; +CLUSTER table_with_index_rep_identity USING uqx; +SELECT create_distributed_table('table_with_index_rep_identity', 'key', colocate_with:='sensors'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- END: Create co-located distributed and reference tables. +-- BEGIN : Create Foreign key constraints. +ALTER TABLE sensors ADD CONSTRAINT fkey_table_to_dist FOREIGN KEY (measureid) REFERENCES colocated_dist_table(measureid); +-- END : Create Foreign key constraints. +-- BEGIN : Load data into tables. +INSERT INTO reference_table SELECT i FROM generate_series(0,1000)i; +INSERT INTO colocated_dist_table SELECT i FROM generate_series(0,1000)i; +INSERT INTO sensors SELECT i, '2020-01-05', '{}', 11011.10, 'A', 'I <3 Citus' FROM generate_series(0,1000)i; +SELECT COUNT(*) FROM sensors; + count +--------------------------------------------------------------------- + 1001 +(1 row) + +SELECT COUNT(*) FROM reference_table; + count +--------------------------------------------------------------------- + 1001 +(1 row) + +SELECT COUNT(*) FROM colocated_dist_table; + count +--------------------------------------------------------------------- + 1001 +(1 row) + +-- END: Load data into tables. +-- BEGIN : Display current state. +SELECT shard.shardid, logicalrelid, shardminvalue, shardmaxvalue, nodename, nodeport + FROM pg_dist_shard AS shard + INNER JOIN pg_dist_placement placement ON shard.shardid = placement.shardid + INNER JOIN pg_dist_node node ON placement.groupid = node.groupid + INNER JOIN pg_catalog.pg_class cls ON shard.logicalrelid = cls.oid + WHERE node.noderole = 'primary' AND (logicalrelid = 'sensors'::regclass OR logicalrelid = 'colocated_dist_table'::regclass OR logicalrelid = 'table_with_index_rep_identity'::regclass) + ORDER BY logicalrelid, shardminvalue::BIGINT; + shardid | logicalrelid | shardminvalue | shardmaxvalue | nodename | nodeport +--------------------------------------------------------------------- + 8981000 | sensors | -2147483648 | -1 | localhost | 57637 + 8981001 | sensors | 0 | 2147483647 | localhost | 57638 + 8981003 | colocated_dist_table | -2147483648 | -1 | localhost | 57637 + 8981004 | colocated_dist_table | 0 | 2147483647 | localhost | 57638 + 8981005 | table_with_index_rep_identity | -2147483648 | -1 | localhost | 57637 + 8981006 | table_with_index_rep_identity | 0 | 2147483647 | localhost | 57638 +(6 rows) + +\c - - - :worker_1_port + SET search_path TO "citus_split_test_schema_no_deferred_drop", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + SELECT tbl.relname, fk."Constraint", fk."Definition" + FROM pg_catalog.pg_class tbl + JOIN public.table_fkeys fk on tbl.oid = fk.relid + WHERE tbl.relname like 'sensors_%' + ORDER BY 1, 2; + relname | Constraint | Definition +--------------------------------------------------------------------- + sensors_8981000 | fkey_table_to_dist_8981000 | FOREIGN KEY (measureid) REFERENCES colocated_dist_table_8981003(measureid) +(1 row) + + SELECT tablename, indexdef FROM pg_indexes WHERE tablename like 'sensors_%' ORDER BY 1,2; + tablename | indexdef +--------------------------------------------------------------------- + sensors_8981000 | CREATE INDEX hash_index_on_sensors_8981000 ON citus_split_test_schema_no_deferred_drop.sensors_8981000 USING hash (((measure_data -> 'IsFailed'::text))) + sensors_8981000 | CREATE INDEX index_on_sensors_8981000 ON citus_split_test_schema_no_deferred_drop.sensors_8981000 USING btree (lower((measureid)::text)) + sensors_8981000 | CREATE INDEX index_with_include_on_sensors_8981000 ON citus_split_test_schema_no_deferred_drop.sensors_8981000 USING btree (((measure_data -> 'IsFailed'::text))) INCLUDE (measure_data, eventdatetime, measure_status) + sensors_8981000 | CREATE UNIQUE INDEX sensors_pkey_8981000 ON citus_split_test_schema_no_deferred_drop.sensors_8981000 USING btree (measureid, eventdatetime, measure_data) +(4 rows) + + SELECT tablename, indexdef FROM pg_indexes WHERE tablename like 'table_with_index_rep_identity_%' ORDER BY 1,2; + tablename | indexdef +--------------------------------------------------------------------- + table_with_index_rep_identity_8981005 | CREATE UNIQUE INDEX uqx_8981005 ON citus_split_test_schema_no_deferred_drop.table_with_index_rep_identity_8981005 USING btree (key) +(1 row) + + SELECT stxname FROM pg_statistic_ext + WHERE stxnamespace IN ( + SELECT oid + FROM pg_namespace + WHERE nspname IN ('citus_split_test_schema_no_deferred_drop') + ) + ORDER BY stxname ASC; + stxname +--------------------------------------------------------------------- + stats_on_sensors + stats_on_sensors_8981000 +(2 rows) + +\c - - - :worker_2_port + SET search_path TO "citus_split_test_schema_no_deferred_drop", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + SELECT tbl.relname, fk."Constraint", fk."Definition" + FROM pg_catalog.pg_class tbl + JOIN public.table_fkeys fk on tbl.oid = fk.relid + WHERE tbl.relname like 'sensors_%' + ORDER BY 1, 2; + relname | Constraint | Definition +--------------------------------------------------------------------- + sensors_8981001 | fkey_table_to_dist_8981001 | FOREIGN KEY (measureid) REFERENCES colocated_dist_table_8981004(measureid) +(1 row) + + SELECT tablename, indexdef FROM pg_indexes WHERE tablename like 'sensors_%' ORDER BY 1,2; + tablename | indexdef +--------------------------------------------------------------------- + sensors_8981001 | CREATE INDEX hash_index_on_sensors_8981001 ON citus_split_test_schema_no_deferred_drop.sensors_8981001 USING hash (((measure_data -> 'IsFailed'::text))) + sensors_8981001 | CREATE INDEX index_on_sensors_8981001 ON citus_split_test_schema_no_deferred_drop.sensors_8981001 USING btree (lower((measureid)::text)) + sensors_8981001 | CREATE INDEX index_with_include_on_sensors_8981001 ON citus_split_test_schema_no_deferred_drop.sensors_8981001 USING btree (((measure_data -> 'IsFailed'::text))) INCLUDE (measure_data, eventdatetime, measure_status) + sensors_8981001 | CREATE UNIQUE INDEX sensors_pkey_8981001 ON citus_split_test_schema_no_deferred_drop.sensors_8981001 USING btree (measureid, eventdatetime, measure_data) +(4 rows) + + SELECT tablename, indexdef FROM pg_indexes WHERE tablename like 'table_with_index_rep_identity_%' ORDER BY 1,2; + tablename | indexdef +--------------------------------------------------------------------- + table_with_index_rep_identity_8981006 | CREATE UNIQUE INDEX uqx_8981006 ON citus_split_test_schema_no_deferred_drop.table_with_index_rep_identity_8981006 USING btree (key) +(1 row) + + SELECT stxname FROM pg_statistic_ext + WHERE stxnamespace IN ( + SELECT oid + FROM pg_namespace + WHERE nspname IN ('citus_split_test_schema_no_deferred_drop') + ) + ORDER BY stxname ASC; + stxname +--------------------------------------------------------------------- + stats_on_sensors + stats_on_sensors_8981001 +(2 rows) + +-- END : Display current state +-- BEGIN : Move one shard before we split it. +\c - postgres - :master_port +SET citus.defer_drop_after_shard_split TO OFF; +SET ROLE test_split_deferred_role; +SET search_path TO "citus_split_test_schema_no_deferred_drop"; +SET citus.next_shard_id TO 8981007; +SET citus.defer_drop_after_shard_move TO OFF; +SELECT citus_move_shard_placement(8981000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode:='force_logical'); + citus_move_shard_placement +--------------------------------------------------------------------- + +(1 row) + +-- END : Move one shard before we split it. +-- BEGIN : Set node id variables +SELECT nodeid AS worker_1_node FROM pg_dist_node WHERE nodeport=:worker_1_port \gset +SELECT nodeid AS worker_2_node FROM pg_dist_node WHERE nodeport=:worker_2_port \gset +-- END : Set node id variables +-- BEGIN : Split two shards : One with move and One without move. +-- Perform 2 way split +SELECT pg_catalog.citus_split_shard_by_split_points( + 8981000, + ARRAY['-1073741824'], + ARRAY[:worker_1_node, :worker_2_node], + 'block_writes'); + citus_split_shard_by_split_points +--------------------------------------------------------------------- + +(1 row) + +-- Perform 3 way split +SELECT pg_catalog.citus_split_shard_by_split_points( + 8981001, + ARRAY['536870911', '1610612735'], + ARRAY[:worker_1_node, :worker_1_node, :worker_2_node], + 'block_writes'); + citus_split_shard_by_split_points +--------------------------------------------------------------------- + +(1 row) + +-- END : Split two shards : One with move and One without move. +-- BEGIN : Move a shard post split. +SELECT citus_move_shard_placement(8981007, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode:='block_writes'); + citus_move_shard_placement +--------------------------------------------------------------------- + +(1 row) + +-- END : Move a shard post split. +-- BEGIN : Display current state. +SELECT shard.shardid, logicalrelid, shardminvalue, shardmaxvalue, nodename, nodeport + FROM pg_dist_shard AS shard + INNER JOIN pg_dist_placement placement ON shard.shardid = placement.shardid + INNER JOIN pg_dist_node node ON placement.groupid = node.groupid + INNER JOIN pg_catalog.pg_class cls ON shard.logicalrelid = cls.oid + WHERE node.noderole = 'primary' AND (logicalrelid = 'sensors'::regclass OR logicalrelid = 'colocated_dist_table'::regclass OR logicalrelid = 'table_with_index_rep_identity'::regclass) + ORDER BY logicalrelid, shardminvalue::BIGINT; + shardid | logicalrelid | shardminvalue | shardmaxvalue | nodename | nodeport +--------------------------------------------------------------------- + 8981007 | sensors | -2147483648 | -1073741824 | localhost | 57638 + 8981008 | sensors | -1073741823 | -1 | localhost | 57638 + 8981013 | sensors | 0 | 536870911 | localhost | 57637 + 8981014 | sensors | 536870912 | 1610612735 | localhost | 57637 + 8981015 | sensors | 1610612736 | 2147483647 | localhost | 57638 + 8981009 | colocated_dist_table | -2147483648 | -1073741824 | localhost | 57638 + 8981010 | colocated_dist_table | -1073741823 | -1 | localhost | 57638 + 8981016 | colocated_dist_table | 0 | 536870911 | localhost | 57637 + 8981017 | colocated_dist_table | 536870912 | 1610612735 | localhost | 57637 + 8981018 | colocated_dist_table | 1610612736 | 2147483647 | localhost | 57638 + 8981011 | table_with_index_rep_identity | -2147483648 | -1073741824 | localhost | 57638 + 8981012 | table_with_index_rep_identity | -1073741823 | -1 | localhost | 57638 + 8981019 | table_with_index_rep_identity | 0 | 536870911 | localhost | 57637 + 8981020 | table_with_index_rep_identity | 536870912 | 1610612735 | localhost | 57637 + 8981021 | table_with_index_rep_identity | 1610612736 | 2147483647 | localhost | 57638 +(15 rows) + +\c - - - :worker_1_port + SET search_path TO "citus_split_test_schema_no_deferred_drop", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + SELECT tbl.relname, fk."Constraint", fk."Definition" + FROM pg_catalog.pg_class tbl + JOIN public.table_fkeys fk on tbl.oid = fk.relid + WHERE tbl.relname like 'sensors_%' + ORDER BY 1, 2; + relname | Constraint | Definition +--------------------------------------------------------------------- + sensors_8981013 | fkey_table_to_dist_8981013 | FOREIGN KEY (measureid) REFERENCES colocated_dist_table_8981016(measureid) + sensors_8981014 | fkey_table_to_dist_8981014 | FOREIGN KEY (measureid) REFERENCES colocated_dist_table_8981017(measureid) +(2 rows) + + SELECT tablename, indexdef FROM pg_indexes WHERE tablename like 'sensors_%' ORDER BY 1,2; + tablename | indexdef +--------------------------------------------------------------------- + sensors_8981013 | CREATE INDEX hash_index_on_sensors_8981013 ON citus_split_test_schema_no_deferred_drop.sensors_8981013 USING hash (((measure_data -> 'IsFailed'::text))) + sensors_8981013 | CREATE INDEX index_on_sensors_8981013 ON citus_split_test_schema_no_deferred_drop.sensors_8981013 USING btree (lower((measureid)::text)) + sensors_8981013 | CREATE INDEX index_with_include_on_sensors_8981013 ON citus_split_test_schema_no_deferred_drop.sensors_8981013 USING btree (((measure_data -> 'IsFailed'::text))) INCLUDE (measure_data, eventdatetime, measure_status) + sensors_8981013 | CREATE UNIQUE INDEX sensors_pkey_8981013 ON citus_split_test_schema_no_deferred_drop.sensors_8981013 USING btree (measureid, eventdatetime, measure_data) + sensors_8981014 | CREATE INDEX hash_index_on_sensors_8981014 ON citus_split_test_schema_no_deferred_drop.sensors_8981014 USING hash (((measure_data -> 'IsFailed'::text))) + sensors_8981014 | CREATE INDEX index_on_sensors_8981014 ON citus_split_test_schema_no_deferred_drop.sensors_8981014 USING btree (lower((measureid)::text)) + sensors_8981014 | CREATE INDEX index_with_include_on_sensors_8981014 ON citus_split_test_schema_no_deferred_drop.sensors_8981014 USING btree (((measure_data -> 'IsFailed'::text))) INCLUDE (measure_data, eventdatetime, measure_status) + sensors_8981014 | CREATE UNIQUE INDEX sensors_pkey_8981014 ON citus_split_test_schema_no_deferred_drop.sensors_8981014 USING btree (measureid, eventdatetime, measure_data) +(8 rows) + + SELECT tablename, indexdef FROM pg_indexes WHERE tablename like 'table_with_index_rep_identity_%' ORDER BY 1,2; + tablename | indexdef +--------------------------------------------------------------------- + table_with_index_rep_identity_8981019 | CREATE UNIQUE INDEX uqx_8981019 ON citus_split_test_schema_no_deferred_drop.table_with_index_rep_identity_8981019 USING btree (key) + table_with_index_rep_identity_8981020 | CREATE UNIQUE INDEX uqx_8981020 ON citus_split_test_schema_no_deferred_drop.table_with_index_rep_identity_8981020 USING btree (key) +(2 rows) + + SELECT stxname FROM pg_statistic_ext + WHERE stxnamespace IN ( + SELECT oid + FROM pg_namespace + WHERE nspname IN ('citus_split_test_schema_no_deferred_drop') + ) + ORDER BY stxname ASC; + stxname +--------------------------------------------------------------------- + stats_on_sensors + stats_on_sensors_8981013 + stats_on_sensors_8981014 +(3 rows) + +\c - - - :worker_2_port + SET search_path TO "citus_split_test_schema_no_deferred_drop", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + SELECT tbl.relname, fk."Constraint", fk."Definition" + FROM pg_catalog.pg_class tbl + JOIN public.table_fkeys fk on tbl.oid = fk.relid + WHERE tbl.relname like 'sensors_%' + ORDER BY 1, 2; + relname | Constraint | Definition +--------------------------------------------------------------------- + sensors_8981007 | fkey_table_to_dist_8981007 | FOREIGN KEY (measureid) REFERENCES colocated_dist_table_8981009(measureid) + sensors_8981008 | fkey_table_to_dist_8981008 | FOREIGN KEY (measureid) REFERENCES colocated_dist_table_8981010(measureid) + sensors_8981015 | fkey_table_to_dist_8981015 | FOREIGN KEY (measureid) REFERENCES colocated_dist_table_8981018(measureid) +(3 rows) + + SELECT tablename, indexdef FROM pg_indexes WHERE tablename like 'sensors_%' ORDER BY 1,2; + tablename | indexdef +--------------------------------------------------------------------- + sensors_8981007 | CREATE INDEX hash_index_on_sensors_8981007 ON citus_split_test_schema_no_deferred_drop.sensors_8981007 USING hash (((measure_data -> 'IsFailed'::text))) + sensors_8981007 | CREATE INDEX index_on_sensors_8981007 ON citus_split_test_schema_no_deferred_drop.sensors_8981007 USING btree (lower((measureid)::text)) + sensors_8981007 | CREATE INDEX index_with_include_on_sensors_8981007 ON citus_split_test_schema_no_deferred_drop.sensors_8981007 USING btree (((measure_data -> 'IsFailed'::text))) INCLUDE (measure_data, eventdatetime, measure_status) + sensors_8981007 | CREATE UNIQUE INDEX sensors_pkey_8981007 ON citus_split_test_schema_no_deferred_drop.sensors_8981007 USING btree (measureid, eventdatetime, measure_data) + sensors_8981008 | CREATE INDEX hash_index_on_sensors_8981008 ON citus_split_test_schema_no_deferred_drop.sensors_8981008 USING hash (((measure_data -> 'IsFailed'::text))) + sensors_8981008 | CREATE INDEX index_on_sensors_8981008 ON citus_split_test_schema_no_deferred_drop.sensors_8981008 USING btree (lower((measureid)::text)) + sensors_8981008 | CREATE INDEX index_with_include_on_sensors_8981008 ON citus_split_test_schema_no_deferred_drop.sensors_8981008 USING btree (((measure_data -> 'IsFailed'::text))) INCLUDE (measure_data, eventdatetime, measure_status) + sensors_8981008 | CREATE UNIQUE INDEX sensors_pkey_8981008 ON citus_split_test_schema_no_deferred_drop.sensors_8981008 USING btree (measureid, eventdatetime, measure_data) + sensors_8981015 | CREATE INDEX hash_index_on_sensors_8981015 ON citus_split_test_schema_no_deferred_drop.sensors_8981015 USING hash (((measure_data -> 'IsFailed'::text))) + sensors_8981015 | CREATE INDEX index_on_sensors_8981015 ON citus_split_test_schema_no_deferred_drop.sensors_8981015 USING btree (lower((measureid)::text)) + sensors_8981015 | CREATE INDEX index_with_include_on_sensors_8981015 ON citus_split_test_schema_no_deferred_drop.sensors_8981015 USING btree (((measure_data -> 'IsFailed'::text))) INCLUDE (measure_data, eventdatetime, measure_status) + sensors_8981015 | CREATE UNIQUE INDEX sensors_pkey_8981015 ON citus_split_test_schema_no_deferred_drop.sensors_8981015 USING btree (measureid, eventdatetime, measure_data) +(12 rows) + + SELECT tablename, indexdef FROM pg_indexes WHERE tablename like 'table_with_index_rep_identity_%' ORDER BY 1,2; + tablename | indexdef +--------------------------------------------------------------------- + table_with_index_rep_identity_8981011 | CREATE UNIQUE INDEX uqx_8981011 ON citus_split_test_schema_no_deferred_drop.table_with_index_rep_identity_8981011 USING btree (key) + table_with_index_rep_identity_8981012 | CREATE UNIQUE INDEX uqx_8981012 ON citus_split_test_schema_no_deferred_drop.table_with_index_rep_identity_8981012 USING btree (key) + table_with_index_rep_identity_8981021 | CREATE UNIQUE INDEX uqx_8981021 ON citus_split_test_schema_no_deferred_drop.table_with_index_rep_identity_8981021 USING btree (key) +(3 rows) + + SELECT stxname FROM pg_statistic_ext + WHERE stxnamespace IN ( + SELECT oid + FROM pg_namespace + WHERE nspname IN ('citus_split_test_schema_no_deferred_drop') + ) + ORDER BY stxname ASC; + stxname +--------------------------------------------------------------------- + stats_on_sensors + stats_on_sensors_8981007 + stats_on_sensors_8981008 + stats_on_sensors_8981015 +(4 rows) + +-- END : Display current state +-- BEGIN: Should be able to change/drop constraints +\c - postgres - :master_port +SET citus.defer_drop_after_shard_split TO OFF; +SET ROLE test_split_deferred_role; +SET search_path TO "citus_split_test_schema_no_deferred_drop"; +ALTER INDEX index_on_sensors RENAME TO index_on_sensors_renamed; +ALTER INDEX index_on_sensors_renamed ALTER COLUMN 1 SET STATISTICS 200; +DROP STATISTICS stats_on_sensors; +DROP INDEX index_on_sensors_renamed; +ALTER TABLE sensors DROP CONSTRAINT fkey_table_to_dist; +-- END: Should be able to change/drop constraints +-- BEGIN: Split second time on another schema +SET search_path TO public; +SET citus.next_shard_id TO 8981031; +SELECT pg_catalog.citus_split_shard_by_split_points( + 8981007, + ARRAY['-2100000000'], + ARRAY[:worker_1_node, :worker_2_node], + 'block_writes'); + citus_split_shard_by_split_points +--------------------------------------------------------------------- + +(1 row) + +SET search_path TO "citus_split_test_schema_no_deferred_drop"; +SELECT shard.shardid, logicalrelid, shardminvalue, shardmaxvalue, nodename, nodeport + FROM pg_dist_shard AS shard + INNER JOIN pg_dist_placement placement ON shard.shardid = placement.shardid + INNER JOIN pg_dist_node node ON placement.groupid = node.groupid + INNER JOIN pg_catalog.pg_class cls ON shard.logicalrelid = cls.oid + WHERE node.noderole = 'primary' AND (logicalrelid = 'sensors'::regclass OR logicalrelid = 'colocated_dist_table'::regclass OR logicalrelid = 'table_with_index_rep_identity'::regclass) + ORDER BY logicalrelid, shardminvalue::BIGINT; + shardid | logicalrelid | shardminvalue | shardmaxvalue | nodename | nodeport +--------------------------------------------------------------------- + 8981031 | sensors | -2147483648 | -2100000000 | localhost | 57637 + 8981032 | sensors | -2099999999 | -1073741824 | localhost | 57638 + 8981008 | sensors | -1073741823 | -1 | localhost | 57638 + 8981013 | sensors | 0 | 536870911 | localhost | 57637 + 8981014 | sensors | 536870912 | 1610612735 | localhost | 57637 + 8981015 | sensors | 1610612736 | 2147483647 | localhost | 57638 + 8981033 | colocated_dist_table | -2147483648 | -2100000000 | localhost | 57637 + 8981034 | colocated_dist_table | -2099999999 | -1073741824 | localhost | 57638 + 8981010 | colocated_dist_table | -1073741823 | -1 | localhost | 57638 + 8981016 | colocated_dist_table | 0 | 536870911 | localhost | 57637 + 8981017 | colocated_dist_table | 536870912 | 1610612735 | localhost | 57637 + 8981018 | colocated_dist_table | 1610612736 | 2147483647 | localhost | 57638 + 8981035 | table_with_index_rep_identity | -2147483648 | -2100000000 | localhost | 57637 + 8981036 | table_with_index_rep_identity | -2099999999 | -1073741824 | localhost | 57638 + 8981012 | table_with_index_rep_identity | -1073741823 | -1 | localhost | 57638 + 8981019 | table_with_index_rep_identity | 0 | 536870911 | localhost | 57637 + 8981020 | table_with_index_rep_identity | 536870912 | 1610612735 | localhost | 57637 + 8981021 | table_with_index_rep_identity | 1610612736 | 2147483647 | localhost | 57638 +(18 rows) + +-- END: Split second time on another schema +-- BEGIN: Validate Data Count +SELECT COUNT(*) FROM sensors; + count +--------------------------------------------------------------------- + 1001 +(1 row) + +SELECT COUNT(*) FROM reference_table; + count +--------------------------------------------------------------------- + 1001 +(1 row) + +SELECT COUNT(*) FROM colocated_dist_table; + count +--------------------------------------------------------------------- + 1001 +(1 row) + +-- END: Validate Data Count +--BEGIN : Cleanup +\c - postgres - :master_port +DROP SCHEMA "citus_split_test_schema_no_deferred_drop" CASCADE; +NOTICE: drop cascades to 4 other objects +DETAIL: drop cascades to table citus_split_test_schema_no_deferred_drop.sensors +drop cascades to table citus_split_test_schema_no_deferred_drop.reference_table +drop cascades to table citus_split_test_schema_no_deferred_drop.colocated_dist_table +drop cascades to table citus_split_test_schema_no_deferred_drop.table_with_index_rep_identity +SET citus.defer_drop_after_shard_split TO ON; +--END : Cleanup diff --git a/src/test/regress/expected/create_distributed_table_concurrently.out b/src/test/regress/expected/create_distributed_table_concurrently.out index 19298a39d..e9de5af1c 100644 --- a/src/test/regress/expected/create_distributed_table_concurrently.out +++ b/src/test/regress/expected/create_distributed_table_concurrently.out @@ -117,6 +117,10 @@ NOTICE: renaming the new table to create_distributed_table_concurrently.test NOTICE: creating a new table for create_distributed_table_concurrently.ref NOTICE: moving the data of create_distributed_table_concurrently.ref NOTICE: dropping the old create_distributed_table_concurrently.ref +NOTICE: drop cascades to constraint test_id_fkey_1190041 on table create_distributed_table_concurrently.test_1190041 +CONTEXT: SQL statement "SELECT citus_drop_all_shards(v_obj.objid, v_obj.schema_name, v_obj.object_name, drop_shards_metadata_only := false)" +PL/pgSQL function citus_drop_trigger() line XX at PERFORM +SQL statement "DROP TABLE create_distributed_table_concurrently.ref CASCADE" NOTICE: renaming the new table to create_distributed_table_concurrently.ref NOTICE: creating a new table for create_distributed_table_concurrently.test_1 NOTICE: moving the data of create_distributed_table_concurrently.test_1 diff --git a/src/test/regress/expected/failure_split_cleanup.out b/src/test/regress/expected/failure_split_cleanup.out new file mode 100644 index 000000000..ec4810a5c --- /dev/null +++ b/src/test/regress/expected/failure_split_cleanup.out @@ -0,0 +1,800 @@ +-- The test excercises below failure scenarios +--1. Failure while creating publications +--2. Failure while creating shared memory segment +--3. Failure while creating replication slots +--4. Failure while enabling subscription +--5. Failure on polling subscription state +--6. Failure on polling last write-ahead log location reported to origin WAL sender +--7. Failure on dropping subscription +CREATE SCHEMA "citus_failure_split_cleanup_schema"; +SET search_path TO "citus_failure_split_cleanup_schema"; +SET citus.next_shard_id TO 8981000; +SET citus.next_placement_id TO 8610000; +SET citus.next_operation_id TO 777; +SET citus.next_cleanup_record_id TO 11; +SET citus.shard_count TO 2; +SET citus.shard_replication_factor TO 1; +SELECT pg_backend_pid() as pid \gset +-- Disable defer shard delete to stop auto cleanup. +ALTER SYSTEM SET citus.defer_shard_delete_interval TO -1; +SELECT pg_reload_conf(); + pg_reload_conf +--------------------------------------------------------------------- + t +(1 row) + +-- Connections on the proxy port(worker_2) are monitored +SELECT nodeid AS worker_1_node FROM pg_dist_node WHERE nodeport=:worker_1_port \gset +SELECT nodeid AS worker_2_node FROM pg_dist_node WHERE nodeport=:worker_2_proxy_port \gset +CREATE TABLE table_to_split(id int PRIMARY KEY, int_data int, data text); +SELECT create_distributed_table('table_to_split', 'id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +--1. Failure while creating publications + SELECT citus.mitmproxy('conn.onQuery(query="CREATE PUBLICATION .* FOR TABLE").killall()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + + SELECT pg_catalog.citus_split_shard_by_split_points( + 8981000, + ARRAY['-100000'], + ARRAY[:worker_1_node, :worker_2_node], + 'force_logical'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open + SELECT operation_id, object_type, object_name, node_group_id, policy_type + FROM pg_dist_cleanup where operation_id = 777; + operation_id | object_type | object_name | node_group_id | policy_type +--------------------------------------------------------------------- + 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 1 | 1 + 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981003 | 2 | 1 + 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981000 | 1 | 0 + 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 2 | 0 +(4 rows) + + -- we need to allow connection so that we can connect to proxy + SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + + \c - - - :worker_2_proxy_port + SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + -- Left over child shards + SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname; + relname +--------------------------------------------------------------------- + table_to_split_8981000 + table_to_split_8981002 + table_to_split_8981003 +(3 rows) + + -- Left over publications + SELECT pubname FROM pg_publication; + pubname +--------------------------------------------------------------------- +(0 rows) + + -- Left over replication slots + SELECT slot_name FROM pg_replication_slots; + slot_name +--------------------------------------------------------------------- +(0 rows) + + -- Left over subscriptions + SELECT subname FROM pg_subscription; + subname +--------------------------------------------------------------------- +(0 rows) + + \c - postgres - :master_port + CALL pg_catalog.citus_cleanup_orphaned_resources(); +NOTICE: cleaned up 79 orphaned resources + SELECT operation_id, object_type, object_name, node_group_id, policy_type + FROM pg_dist_cleanup where operation_id = 777; + operation_id | object_type | object_name | node_group_id | policy_type +--------------------------------------------------------------------- +(0 rows) + + \c - - - :worker_2_proxy_port + SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + -- Empty child shards after cleanup + SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname; + relname +--------------------------------------------------------------------- + table_to_split_8981000 +(1 row) + + -- Empty publications + SELECT pubname FROM pg_publication; + pubname +--------------------------------------------------------------------- +(0 rows) + + -- Empty replication slot table + SELECT slot_name FROM pg_replication_slots; + slot_name +--------------------------------------------------------------------- +(0 rows) + + -- Empty subscriptions + SELECT subname FROM pg_subscription; + subname +--------------------------------------------------------------------- +(0 rows) + +--2. Failure while creating shared memory segment + \c - postgres - :master_port + SET citus.next_shard_id TO 8981002; + SET citus.next_operation_id TO 777; + SET citus.next_cleanup_record_id TO 11; + SELECT citus.mitmproxy('conn.onQuery(query="SELECT \* FROM pg_catalog.worker_split_shard_replication_setup\(.*").killall()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + + SELECT pg_catalog.citus_split_shard_by_split_points( + 8981000, + ARRAY['-100000'], + ARRAY[:worker_1_node, :worker_2_node], + 'force_logical'); +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open + SELECT operation_id, object_type, object_name, node_group_id, policy_type + FROM pg_dist_cleanup where operation_id = 777; + operation_id | object_type | object_name | node_group_id | policy_type +--------------------------------------------------------------------- + 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 1 | 1 + 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981003 | 2 | 1 + 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981000 | 1 | 0 + 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 2 | 0 +(4 rows) + + -- we need to allow connection so that we can connect to proxy + SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + + \c - - - :worker_2_proxy_port + SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + -- Left over child shards + SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname; + relname +--------------------------------------------------------------------- + table_to_split_8981000 + table_to_split_8981002 + table_to_split_8981003 +(3 rows) + + -- Left over publications + SELECT pubname FROM pg_publication; + pubname +--------------------------------------------------------------------- + citus_shard_split_publication_xxxxxxx_xxxxxxx + citus_shard_split_publication_xxxxxxx_xxxxxxx +(2 rows) + + -- Left over replication slots + SELECT slot_name FROM pg_replication_slots; + slot_name +--------------------------------------------------------------------- +(0 rows) + + -- Left over subscriptions + SELECT subname FROM pg_subscription; + subname +--------------------------------------------------------------------- +(0 rows) + + \c - postgres - :master_port + CALL pg_catalog.citus_cleanup_orphaned_resources(); +NOTICE: cleaned up 4 orphaned resources + SELECT operation_id, object_type, object_name, node_group_id, policy_type + FROM pg_dist_cleanup where operation_id = 777; + operation_id | object_type | object_name | node_group_id | policy_type +--------------------------------------------------------------------- +(0 rows) + + \c - - - :worker_2_proxy_port + SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + -- Empty child shards after cleanup + SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname; + relname +--------------------------------------------------------------------- + table_to_split_8981000 +(1 row) + + -- Empty publications + SELECT pubname FROM pg_publication; + pubname +--------------------------------------------------------------------- + citus_shard_split_publication_xxxxxxx_xxxxxxx + citus_shard_split_publication_xxxxxxx_xxxxxxx +(2 rows) + + -- Empty replication slot table + SELECT slot_name FROM pg_replication_slots; + slot_name +--------------------------------------------------------------------- +(0 rows) + + -- Empty subscriptions + SELECT subname FROM pg_subscription; + subname +--------------------------------------------------------------------- +(0 rows) + +--3. Failure while executing 'CREATE_REPLICATION_SLOT' for Snapshot. + \c - postgres - :master_port + SET citus.next_shard_id TO 8981002; + SET citus.next_operation_id TO 777; + SET citus.next_cleanup_record_id TO 11; + SELECT citus.mitmproxy('conn.onQuery(query="CREATE_REPLICATION_SLOT .* LOGICAL .* EXPORT_SNAPSHOT.*").killall()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + + SELECT pg_catalog.citus_split_shard_by_split_points( + 8981000, + ARRAY['-100000'], + ARRAY[:worker_1_node, :worker_2_node], + 'force_logical'); +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +WARNING: connection not open +CONTEXT: while executing command on localhost:xxxxx +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open + SELECT operation_id, object_type, object_name, node_group_id, policy_type + FROM pg_dist_cleanup where operation_id = 777; + operation_id | object_type | object_name | node_group_id | policy_type +--------------------------------------------------------------------- + 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 1 | 1 + 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981003 | 2 | 1 + 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981000 | 1 | 0 + 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 2 | 0 +(4 rows) + + -- we need to allow connection so that we can connect to proxy + SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + + \c - - - :worker_2_proxy_port + SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + -- Left over child shards + SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname; + relname +--------------------------------------------------------------------- + table_to_split_8981000 + table_to_split_8981002 + table_to_split_8981003 +(3 rows) + + -- Left over publications + SELECT pubname FROM pg_publication; + pubname +--------------------------------------------------------------------- + citus_shard_split_publication_xxxxxxx_xxxxxxx + citus_shard_split_publication_xxxxxxx_xxxxxxx +(2 rows) + + -- Left over replication slots + SELECT slot_name FROM pg_replication_slots; + slot_name +--------------------------------------------------------------------- +(0 rows) + + -- Left over subscriptions + SELECT subname FROM pg_subscription; + subname +--------------------------------------------------------------------- +(0 rows) + + \c - postgres - :master_port + CALL pg_catalog.citus_cleanup_orphaned_resources(); +NOTICE: cleaned up 4 orphaned resources + SELECT operation_id, object_type, object_name, node_group_id, policy_type + FROM pg_dist_cleanup where operation_id = 777; + operation_id | object_type | object_name | node_group_id | policy_type +--------------------------------------------------------------------- +(0 rows) + + \c - - - :worker_2_proxy_port + SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + -- Empty child shards after cleanup + SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname; + relname +--------------------------------------------------------------------- + table_to_split_8981000 +(1 row) + + -- Empty publications + SELECT pubname FROM pg_publication; + pubname +--------------------------------------------------------------------- + citus_shard_split_publication_xxxxxxx_xxxxxxx + citus_shard_split_publication_xxxxxxx_xxxxxxx +(2 rows) + + -- Empty replication slot table + SELECT slot_name FROM pg_replication_slots; + slot_name +--------------------------------------------------------------------- +(0 rows) + + -- Empty subscriptions + SELECT subname FROM pg_subscription; + subname +--------------------------------------------------------------------- +(0 rows) + +--4. Failure while enabling subscription + \c - postgres - :master_port + SET citus.next_shard_id TO 8981002; + SET citus.next_operation_id TO 777; + SET citus.next_cleanup_record_id TO 11; + SELECT citus.mitmproxy('conn.onQuery(query="ALTER SUBSCRIPTION .* ENABLE").killall()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + + SELECT pg_catalog.citus_split_shard_by_split_points( + 8981000, + ARRAY['-100000'], + ARRAY[:worker_1_node, :worker_2_node], + 'force_logical'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open + SELECT operation_id, object_type, object_name, node_group_id, policy_type + FROM pg_dist_cleanup where operation_id = 777; + operation_id | object_type | object_name | node_group_id | policy_type +--------------------------------------------------------------------- + 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 1 | 1 + 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981003 | 2 | 1 + 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981000 | 1 | 0 + 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 2 | 0 +(4 rows) + + -- we need to allow connection so that we can connect to proxy + SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + + \c - - - :worker_2_proxy_port + SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + -- Left over child shards + SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname; + relname +--------------------------------------------------------------------- + table_to_split_8981000 + table_to_split_8981002 + table_to_split_8981003 +(3 rows) + + -- Left over publications + SELECT pubname FROM pg_publication; + pubname +--------------------------------------------------------------------- + citus_shard_split_publication_xxxxxxx_xxxxxxx + citus_shard_split_publication_xxxxxxx_xxxxxxx +(2 rows) + + -- Left over replication slots + SELECT slot_name FROM pg_replication_slots; + slot_name +--------------------------------------------------------------------- + citus_shard_split_slot_xxxxxxx_xxxxxxx + citus_shard_split_slot_xxxxxxx_xxxxxxx +(2 rows) + + -- Left over subscriptions + SELECT subname FROM pg_subscription; + subname +--------------------------------------------------------------------- + citus_shard_split_subscription_xxxxxxx +(1 row) + + \c - postgres - :master_port + CALL pg_catalog.citus_cleanup_orphaned_resources(); +NOTICE: cleaned up 4 orphaned resources + SELECT operation_id, object_type, object_name, node_group_id, policy_type + FROM pg_dist_cleanup where operation_id = 777; + operation_id | object_type | object_name | node_group_id | policy_type +--------------------------------------------------------------------- +(0 rows) + + \c - - - :worker_2_proxy_port + SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + -- Empty child shards after cleanup + SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname; + relname +--------------------------------------------------------------------- + table_to_split_8981000 +(1 row) + + -- Empty publications + SELECT pubname FROM pg_publication; + pubname +--------------------------------------------------------------------- + citus_shard_split_publication_xxxxxxx_xxxxxxx + citus_shard_split_publication_xxxxxxx_xxxxxxx +(2 rows) + + -- Empty replication slot table + SELECT slot_name FROM pg_replication_slots; + slot_name +--------------------------------------------------------------------- + citus_shard_split_slot_xxxxxxx_xxxxxxx + citus_shard_split_slot_xxxxxxx_xxxxxxx +(2 rows) + + -- Empty subscriptions + SELECT subname FROM pg_subscription; + subname +--------------------------------------------------------------------- + citus_shard_split_subscription_xxxxxxx +(1 row) + +--5. Failure on polling subscription state + \c - postgres - :master_port + SET citus.next_shard_id TO 8981002; + SET citus.next_operation_id TO 777; + SET citus.next_cleanup_record_id TO 11; + SELECT citus.mitmproxy('conn.onQuery(query="^SELECT count\(\*\) FROM pg_subscription_rel").killall()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + + SELECT pg_catalog.citus_split_shard_by_split_points( + 8981000, + ARRAY['-100000'], + ARRAY[:worker_1_node, :worker_2_node], + 'force_logical'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open + SELECT operation_id, object_type, object_name, node_group_id, policy_type + FROM pg_dist_cleanup where operation_id = 777; + operation_id | object_type | object_name | node_group_id | policy_type +--------------------------------------------------------------------- + 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 1 | 1 + 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981003 | 2 | 1 + 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981000 | 1 | 0 + 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 2 | 0 +(4 rows) + + -- we need to allow connection so that we can connect to proxy + SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + + \c - - - :worker_2_proxy_port + SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + -- Left over child shards + SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname; + relname +--------------------------------------------------------------------- + table_to_split_8981000 + table_to_split_8981002 + table_to_split_8981003 +(3 rows) + + -- Left over publications + SELECT pubname FROM pg_publication; + pubname +--------------------------------------------------------------------- + citus_shard_split_publication_xxxxxxx_xxxxxxx + citus_shard_split_publication_xxxxxxx_xxxxxxx +(2 rows) + + -- Left over replication slots + SELECT slot_name FROM pg_replication_slots; + slot_name +--------------------------------------------------------------------- + citus_shard_split_slot_xxxxxxx_xxxxxxx + citus_shard_split_slot_xxxxxxx_xxxxxxx +(2 rows) + + -- Left over subscriptions + SELECT subname FROM pg_subscription; + subname +--------------------------------------------------------------------- + citus_shard_split_subscription_xxxxxxx +(1 row) + + \c - postgres - :master_port + CALL pg_catalog.citus_cleanup_orphaned_resources(); +NOTICE: cleaned up 4 orphaned resources + SELECT operation_id, object_type, object_name, node_group_id, policy_type + FROM pg_dist_cleanup where operation_id = 777; + operation_id | object_type | object_name | node_group_id | policy_type +--------------------------------------------------------------------- +(0 rows) + + \c - - - :worker_2_proxy_port + SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + -- Empty child shards after cleanup + SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname; + relname +--------------------------------------------------------------------- + table_to_split_8981000 +(1 row) + + -- Empty publications + SELECT pubname FROM pg_publication; + pubname +--------------------------------------------------------------------- + citus_shard_split_publication_xxxxxxx_xxxxxxx + citus_shard_split_publication_xxxxxxx_xxxxxxx +(2 rows) + + -- Empty replication slot table + SELECT slot_name FROM pg_replication_slots; + slot_name +--------------------------------------------------------------------- + citus_shard_split_slot_xxxxxxx_xxxxxxx + citus_shard_split_slot_xxxxxxx_xxxxxxx +(2 rows) + + -- Empty subscriptions + SELECT subname FROM pg_subscription; + subname +--------------------------------------------------------------------- + citus_shard_split_subscription_xxxxxxx +(1 row) + +--6. Failure on polling last write-ahead log location reported to origin WAL sender + \c - postgres - :master_port + SET citus.next_shard_id TO 8981002; + SET citus.next_operation_id TO 777; + SET citus.next_cleanup_record_id TO 11; + SELECT citus.mitmproxy('conn.onQuery(query="^SELECT min\(latest_end_lsn").killall()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + + SELECT pg_catalog.citus_split_shard_by_split_points( + 8981000, + ARRAY['-100000'], + ARRAY[:worker_1_node, :worker_2_node], + 'force_logical'); +ERROR: connection to the remote node localhost:xxxxx failed with the following error: connection not open + SELECT operation_id, object_type, object_name, node_group_id, policy_type + FROM pg_dist_cleanup where operation_id = 777; + operation_id | object_type | object_name | node_group_id | policy_type +--------------------------------------------------------------------- + 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 1 | 1 + 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981003 | 2 | 1 + 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981000 | 1 | 0 + 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 2 | 0 +(4 rows) + + -- we need to allow connection so that we can connect to proxy + SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + + \c - - - :worker_2_proxy_port + SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + -- Left over child shards + SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname; + relname +--------------------------------------------------------------------- + table_to_split_8981000 + table_to_split_8981002 + table_to_split_8981003 +(3 rows) + + -- Left over publications + SELECT pubname FROM pg_publication; + pubname +--------------------------------------------------------------------- + citus_shard_split_publication_xxxxxxx_xxxxxxx + citus_shard_split_publication_xxxxxxx_xxxxxxx +(2 rows) + + -- Left over replication slots + SELECT slot_name FROM pg_replication_slots; + slot_name +--------------------------------------------------------------------- + citus_shard_split_slot_xxxxxxx_xxxxxxx + citus_shard_split_slot_xxxxxxx_xxxxxxx +(2 rows) + + -- Left over subscriptions + SELECT subname FROM pg_subscription; + subname +--------------------------------------------------------------------- + citus_shard_split_subscription_xxxxxxx +(1 row) + + \c - postgres - :master_port + CALL pg_catalog.citus_cleanup_orphaned_resources(); +NOTICE: cleaned up 4 orphaned resources + SELECT operation_id, object_type, object_name, node_group_id, policy_type + FROM pg_dist_cleanup where operation_id = 777; + operation_id | object_type | object_name | node_group_id | policy_type +--------------------------------------------------------------------- +(0 rows) + + \c - - - :worker_2_proxy_port + SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + -- Empty child shards after cleanup + SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname; + relname +--------------------------------------------------------------------- + table_to_split_8981000 +(1 row) + + -- Empty publications + SELECT pubname FROM pg_publication; + pubname +--------------------------------------------------------------------- + citus_shard_split_publication_xxxxxxx_xxxxxxx + citus_shard_split_publication_xxxxxxx_xxxxxxx +(2 rows) + + -- Empty replication slot table + SELECT slot_name FROM pg_replication_slots; + slot_name +--------------------------------------------------------------------- + citus_shard_split_slot_xxxxxxx_xxxxxxx + citus_shard_split_slot_xxxxxxx_xxxxxxx +(2 rows) + + -- Empty subscriptions + SELECT subname FROM pg_subscription; + subname +--------------------------------------------------------------------- + citus_shard_split_subscription_xxxxxxx +(1 row) + +--7. Failure on dropping subscription + \c - postgres - :master_port + SET citus.next_shard_id TO 8981002; + SET citus.next_operation_id TO 777; + SET citus.next_cleanup_record_id TO 11; + SELECT citus.mitmproxy('conn.onQuery(query="^DROP SUBSCRIPTION").killall()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + + SELECT pg_catalog.citus_split_shard_by_split_points( + 8981000, + ARRAY['-100000'], + ARRAY[:worker_1_node, :worker_2_node], + 'force_logical'); +ERROR: connection not open +CONTEXT: while executing command on localhost:xxxxx + -- NO records expected as we fail at 'DropAllLogicalReplicationLeftovers' before creating + -- any resources. + SELECT operation_id, object_type, object_name, node_group_id, policy_type + FROM pg_dist_cleanup where operation_id = 777; + operation_id | object_type | object_name | node_group_id | policy_type +--------------------------------------------------------------------- +(0 rows) + + SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname; + relname +--------------------------------------------------------------------- +(0 rows) + + -- we need to allow connection so that we can connect to proxy + SELECT citus.mitmproxy('conn.allow()'); + mitmproxy +--------------------------------------------------------------------- + +(1 row) + + \c - - - :worker_2_proxy_port + SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + -- Left over child shards + SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname; + relname +--------------------------------------------------------------------- + table_to_split_8981000 +(1 row) + + -- Left over publications + SELECT pubname FROM pg_publication; + pubname +--------------------------------------------------------------------- + citus_shard_split_publication_xxxxxxx_xxxxxxx + citus_shard_split_publication_xxxxxxx_xxxxxxx +(2 rows) + + -- Left over replication slots + SELECT slot_name FROM pg_replication_slots; + slot_name +--------------------------------------------------------------------- + citus_shard_split_slot_xxxxxxx_xxxxxxx + citus_shard_split_slot_xxxxxxx_xxxxxxx +(2 rows) + + -- Left over subscriptions + SELECT subname FROM pg_subscription; + subname +--------------------------------------------------------------------- + citus_shard_split_subscription_xxxxxxx +(1 row) + + \c - postgres - :master_port + CALL pg_catalog.citus_cleanup_orphaned_resources(); + SELECT operation_id, object_type, object_name, node_group_id, policy_type + FROM pg_dist_cleanup where operation_id = 777; + operation_id | object_type | object_name | node_group_id | policy_type +--------------------------------------------------------------------- +(0 rows) + + \c - - - :worker_2_proxy_port + SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + -- Empty child shards after cleanup + SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname; + relname +--------------------------------------------------------------------- + table_to_split_8981000 +(1 row) + + -- Empty publications + SELECT pubname FROM pg_publication; + pubname +--------------------------------------------------------------------- + citus_shard_split_publication_xxxxxxx_xxxxxxx + citus_shard_split_publication_xxxxxxx_xxxxxxx +(2 rows) + + -- Empty replication slot table + SELECT slot_name FROM pg_replication_slots; + slot_name +--------------------------------------------------------------------- + citus_shard_split_slot_xxxxxxx_xxxxxxx + citus_shard_split_slot_xxxxxxx_xxxxxxx +(2 rows) + + -- Empty subscriptions + SELECT subname FROM pg_subscription; + subname +--------------------------------------------------------------------- + citus_shard_split_subscription_xxxxxxx +(1 row) + +-- Cleanup +\c - postgres - :master_port +DROP SCHEMA "citus_failure_split_cleanup_schema" CASCADE; +NOTICE: drop cascades to table citus_failure_split_cleanup_schema.table_to_split +-- Cleanup diff --git a/src/test/regress/expected/failure_tenant_isolation.out b/src/test/regress/expected/failure_tenant_isolation.out index 6b7fde064..7c604bd31 100644 --- a/src/test/regress/expected/failure_tenant_isolation.out +++ b/src/test/regress/expected/failure_tenant_isolation.out @@ -160,6 +160,8 @@ SELECT citus.mitmproxy('conn.onQuery(query="ALTER TABLE tenant_isolation.table_1 SELECT isolate_tenant_to_new_shard('table_1', 5, 'CASCADE', shard_transfer_mode => 'block_writes'); ERROR: canceling statement due to user request -- failure on dropping old colocated shard +-- Disable deferred drop otherwise we will skip the drop and operation will succeed instead of failing. +SET citus.defer_drop_after_shard_split TO OFF; SELECT citus.mitmproxy('conn.onQuery(query="DROP TABLE IF EXISTS tenant_isolation.table_2").kill()'); mitmproxy --------------------------------------------------------------------- @@ -197,6 +199,8 @@ SELECT citus.mitmproxy('conn.onQuery(query="DROP TABLE IF EXISTS tenant_isolatio SELECT isolate_tenant_to_new_shard('table_1', 5, 'CASCADE', shard_transfer_mode => 'block_writes'); ERROR: canceling statement due to user request +-- Re-enable deferred drop for rest of the tests. +SET citus.defer_drop_after_shard_split TO ON; -- failure on foreign key creation SELECT citus.mitmproxy('conn.onQuery(query="ADD CONSTRAINT table_2_ref_id_fkey FOREIGN KEY").kill()'); mitmproxy diff --git a/src/test/regress/expected/failure_tenant_isolation_nonblocking.out b/src/test/regress/expected/failure_tenant_isolation_nonblocking.out index 5af9df776..63719c4ec 100644 --- a/src/test/regress/expected/failure_tenant_isolation_nonblocking.out +++ b/src/test/regress/expected/failure_tenant_isolation_nonblocking.out @@ -330,6 +330,9 @@ SELECT citus.mitmproxy('conn.onQuery(query="select pg_drop_replication_slot").ca SELECT isolate_tenant_to_new_shard('table_1', 5, 'CASCADE', shard_transfer_mode := 'force_logical'); ERROR: canceling statement due to user request -- failure on dropping old shard +-- failure on dropping old colocated shard +-- Disable deferred drop otherwise we will skip the drop and operation will succeed instead of failing. +SET citus.defer_drop_after_shard_split TO OFF; SELECT citus.mitmproxy('conn.onQuery(query="DROP TABLE IF EXISTS tenant_isolation.table_1").kill()'); mitmproxy --------------------------------------------------------------------- @@ -367,6 +370,8 @@ SELECT citus.mitmproxy('conn.onQuery(query="DROP TABLE IF EXISTS tenant_isolatio SELECT isolate_tenant_to_new_shard('table_1', 5, 'CASCADE', shard_transfer_mode := 'force_logical'); ERROR: canceling statement due to user request +-- Re-enable deferred drop for rest of the tests. +SET citus.defer_drop_after_shard_split TO ON; -- failure on foreign key creation SELECT citus.mitmproxy('conn.onQuery(query="ADD CONSTRAINT table_2_ref_id_fkey FOREIGN KEY").kill()'); mitmproxy diff --git a/src/test/regress/expected/isolation_blocking_shard_split.out b/src/test/regress/expected/isolation_blocking_shard_split.out index b5b7dc71b..86d50e3e3 100644 --- a/src/test/regress/expected/isolation_blocking_shard_split.out +++ b/src/test/regress/expected/isolation_blocking_shard_split.out @@ -931,3 +931,316 @@ nodeport|success|result 57638|t | 1 (3 rows) + +starting permutation: s1-load-cache s1-start-connection s1-lock-to-split-shard s2-print-locks s2-blocking-shard-split s2-print-locks s2-show-pg_dist_cleanup s1-stop-connection +create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +step s1-load-cache: + -- Indirect way to load cache. + TRUNCATE to_split_table; + +step s1-start-connection: + SELECT start_session_level_connection_to_node('localhost', 57638); + +start_session_level_connection_to_node +--------------------------------------------------------------------- + +(1 row) + +step s1-lock-to-split-shard: + SELECT run_commands_on_session_level_connection_to_node('BEGIN; LOCK TABLE to_split_table_1500002 IN ACCESS SHARE MODE;'); + +run_commands_on_session_level_connection_to_node +--------------------------------------------------------------------- + +(1 row) + +step s2-print-locks: + SELECT * FROM master_run_on_worker( + ARRAY['localhost']::text[], + ARRAY[57638]::int[], + ARRAY[ + 'SELECT CONCAT(relation::regclass, ''-'', locktype, ''-'', mode) AS LockInfo FROM pg_locks + WHERE relation::regclass::text = ''to_split_table_1500002'';' + ]::text[], + false); + +node_name|node_port|success|result +--------------------------------------------------------------------- +localhost| 57638|t |to_split_table_1500002-relation-AccessShareLock +(1 row) + +step s2-blocking-shard-split: + SELECT pg_catalog.citus_split_shard_by_split_points( + 1500002, + ARRAY['1073741824'], + ARRAY[1, 2], + 'block_writes'); + +citus_split_shard_by_split_points +--------------------------------------------------------------------- + +(1 row) + +step s2-print-locks: + SELECT * FROM master_run_on_worker( + ARRAY['localhost']::text[], + ARRAY[57638]::int[], + ARRAY[ + 'SELECT CONCAT(relation::regclass, ''-'', locktype, ''-'', mode) AS LockInfo FROM pg_locks + WHERE relation::regclass::text = ''to_split_table_1500002'';' + ]::text[], + false); + +node_name|node_port|success|result +--------------------------------------------------------------------- +localhost| 57638|t |to_split_table_1500002-relation-AccessShareLock +(1 row) + +step s2-show-pg_dist_cleanup: + SELECT object_name, object_type, policy_type FROM pg_dist_cleanup; + +object_name |object_type|policy_type +--------------------------------------------------------------------- +public.to_split_table_1500002| 1| 2 +(1 row) + +step s1-stop-connection: + SELECT stop_session_level_connection_to_node(); + +stop_session_level_connection_to_node +--------------------------------------------------------------------- + +(1 row) + + +starting permutation: s1-start-connection s1-lock-to-split-shard s2-print-locks s2-blocking-shard-split s2-print-cluster s2-show-pg_dist_cleanup s1-stop-connection +create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +step s1-start-connection: + SELECT start_session_level_connection_to_node('localhost', 57638); + +start_session_level_connection_to_node +--------------------------------------------------------------------- + +(1 row) + +step s1-lock-to-split-shard: + SELECT run_commands_on_session_level_connection_to_node('BEGIN; LOCK TABLE to_split_table_1500002 IN ACCESS SHARE MODE;'); + +run_commands_on_session_level_connection_to_node +--------------------------------------------------------------------- + +(1 row) + +step s2-print-locks: + SELECT * FROM master_run_on_worker( + ARRAY['localhost']::text[], + ARRAY[57638]::int[], + ARRAY[ + 'SELECT CONCAT(relation::regclass, ''-'', locktype, ''-'', mode) AS LockInfo FROM pg_locks + WHERE relation::regclass::text = ''to_split_table_1500002'';' + ]::text[], + false); + +node_name|node_port|success|result +--------------------------------------------------------------------- +localhost| 57638|t |to_split_table_1500002-relation-AccessShareLock +(1 row) + +step s2-blocking-shard-split: + SELECT pg_catalog.citus_split_shard_by_split_points( + 1500002, + ARRAY['1073741824'], + ARRAY[1, 2], + 'block_writes'); + +citus_split_shard_by_split_points +--------------------------------------------------------------------- + +(1 row) + +step s2-print-cluster: + -- row count per shard + SELECT + nodeport, shardid, success, result + FROM + run_command_on_placements('to_split_table', 'select count(*) from %s') + ORDER BY + nodeport, shardid; + -- rows + SELECT id, value FROM to_split_table ORDER BY id, value; + +nodeport|shardid|success|result +--------------------------------------------------------------------- + 57637|1500001|t | 0 + 57637|1500003|t | 0 + 57638|1500004|t | 0 +(3 rows) + +id|value +--------------------------------------------------------------------- +(0 rows) + +step s2-show-pg_dist_cleanup: + SELECT object_name, object_type, policy_type FROM pg_dist_cleanup; + +object_name |object_type|policy_type +--------------------------------------------------------------------- +public.to_split_table_1500002| 1| 2 +(1 row) + +step s1-stop-connection: + SELECT stop_session_level_connection_to_node(); + +stop_session_level_connection_to_node +--------------------------------------------------------------------- + +(1 row) + + +starting permutation: s1-load-cache s1-acquire-split-advisory-lock s2-blocking-shard-split s1-run-cleaner s1-show-pg_dist_cleanup s1-release-split-advisory-lock s1-run-cleaner s2-show-pg_dist_cleanup +create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +step s1-load-cache: + -- Indirect way to load cache. + TRUNCATE to_split_table; + +step s1-acquire-split-advisory-lock: + SELECT pg_advisory_lock(44000, 55152); + +pg_advisory_lock +--------------------------------------------------------------------- + +(1 row) + +step s2-blocking-shard-split: + SELECT pg_catalog.citus_split_shard_by_split_points( + 1500002, + ARRAY['1073741824'], + ARRAY[1, 2], + 'block_writes'); + +step s1-run-cleaner: + SELECT run_try_drop_marked_shards(); + +run_try_drop_marked_shards +--------------------------------------------------------------------- + +(1 row) + +step s1-show-pg_dist_cleanup: + SELECT object_name, object_type, policy_type FROM pg_dist_cleanup; + +object_name |object_type|policy_type +--------------------------------------------------------------------- +public.to_split_table_1500003| 1| 1 +public.to_split_table_1500004| 1| 1 +(2 rows) + +step s1-release-split-advisory-lock: + SELECT pg_advisory_unlock(44000, 55152); + +pg_advisory_unlock +--------------------------------------------------------------------- +t +(1 row) + +step s2-blocking-shard-split: <... completed> +citus_split_shard_by_split_points +--------------------------------------------------------------------- + +(1 row) + +step s1-run-cleaner: + SELECT run_try_drop_marked_shards(); + +run_try_drop_marked_shards +--------------------------------------------------------------------- + +(1 row) + +step s2-show-pg_dist_cleanup: + SELECT object_name, object_type, policy_type FROM pg_dist_cleanup; + +object_name|object_type|policy_type +--------------------------------------------------------------------- +(0 rows) + + +starting permutation: s1-acquire-split-advisory-lock s2-blocking-shard-split s1-run-cleaner s1-show-pg_dist_cleanup s1-release-split-advisory-lock s1-run-cleaner s2-show-pg_dist_cleanup +create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +step s1-acquire-split-advisory-lock: + SELECT pg_advisory_lock(44000, 55152); + +pg_advisory_lock +--------------------------------------------------------------------- + +(1 row) + +step s2-blocking-shard-split: + SELECT pg_catalog.citus_split_shard_by_split_points( + 1500002, + ARRAY['1073741824'], + ARRAY[1, 2], + 'block_writes'); + +step s1-run-cleaner: + SELECT run_try_drop_marked_shards(); + +run_try_drop_marked_shards +--------------------------------------------------------------------- + +(1 row) + +step s1-show-pg_dist_cleanup: + SELECT object_name, object_type, policy_type FROM pg_dist_cleanup; + +object_name |object_type|policy_type +--------------------------------------------------------------------- +public.to_split_table_1500003| 1| 1 +public.to_split_table_1500004| 1| 1 +(2 rows) + +step s1-release-split-advisory-lock: + SELECT pg_advisory_unlock(44000, 55152); + +pg_advisory_unlock +--------------------------------------------------------------------- +t +(1 row) + +step s2-blocking-shard-split: <... completed> +citus_split_shard_by_split_points +--------------------------------------------------------------------- + +(1 row) + +step s1-run-cleaner: + SELECT run_try_drop_marked_shards(); + +run_try_drop_marked_shards +--------------------------------------------------------------------- + +(1 row) + +step s2-show-pg_dist_cleanup: + SELECT object_name, object_type, policy_type FROM pg_dist_cleanup; + +object_name|object_type|policy_type +--------------------------------------------------------------------- +(0 rows) + diff --git a/src/test/regress/expected/isolation_non_blocking_shard_split.out b/src/test/regress/expected/isolation_non_blocking_shard_split.out index e52dff295..65356f2d6 100644 --- a/src/test/regress/expected/isolation_non_blocking_shard_split.out +++ b/src/test/regress/expected/isolation_non_blocking_shard_split.out @@ -773,3 +773,319 @@ id|value --------------------------------------------------------------------- (0 rows) + +starting permutation: s1-load-cache s1-start-connection s1-lock-to-split-shard s2-print-locks s2-non-blocking-shard-split s2-print-locks s2-show-pg_dist_cleanup s1-stop-connection +create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +step s1-load-cache: + -- Indirect way to load cache. + TRUNCATE to_split_table; + +step s1-start-connection: + SELECT start_session_level_connection_to_node('localhost', 57637); + +start_session_level_connection_to_node +--------------------------------------------------------------------- + +(1 row) + +step s1-lock-to-split-shard: + SELECT run_commands_on_session_level_connection_to_node('BEGIN; LOCK TABLE to_split_table_1500001 IN ACCESS SHARE MODE;'); + +run_commands_on_session_level_connection_to_node +--------------------------------------------------------------------- + +(1 row) + +step s2-print-locks: + SELECT * FROM master_run_on_worker( + ARRAY['localhost']::text[], + ARRAY[57637]::int[], + ARRAY[ + 'SELECT CONCAT(relation::regclass, ''-'', locktype, ''-'', mode) AS LockInfo FROM pg_locks + WHERE relation::regclass::text = ''to_split_table_1500001'';' + ]::text[], + false); + +node_name|node_port|success|result +--------------------------------------------------------------------- +localhost| 57637|t |to_split_table_1500001-relation-AccessShareLock +(1 row) + +step s2-non-blocking-shard-split: + SELECT pg_catalog.citus_split_shard_by_split_points( + 1500001, + ARRAY['-1073741824'], + ARRAY[1, 2], + 'force_logical'); + +citus_split_shard_by_split_points +--------------------------------------------------------------------- + +(1 row) + +step s2-print-locks: + SELECT * FROM master_run_on_worker( + ARRAY['localhost']::text[], + ARRAY[57637]::int[], + ARRAY[ + 'SELECT CONCAT(relation::regclass, ''-'', locktype, ''-'', mode) AS LockInfo FROM pg_locks + WHERE relation::regclass::text = ''to_split_table_1500001'';' + ]::text[], + false); + +node_name|node_port|success|result +--------------------------------------------------------------------- +localhost| 57637|t |to_split_table_1500001-relation-AccessShareLock +(1 row) + +step s2-show-pg_dist_cleanup: + SELECT object_name, object_type, policy_type FROM pg_dist_cleanup; + +object_name |object_type|policy_type +--------------------------------------------------------------------- +public.to_split_table_1500001| 1| 2 +(1 row) + +step s1-stop-connection: + SELECT stop_session_level_connection_to_node(); + +stop_session_level_connection_to_node +--------------------------------------------------------------------- + +(1 row) + + +starting permutation: s1-start-connection s1-lock-to-split-shard s2-print-locks s2-non-blocking-shard-split s2-print-cluster s2-show-pg_dist_cleanup s1-stop-connection +create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +step s1-start-connection: + SELECT start_session_level_connection_to_node('localhost', 57637); + +start_session_level_connection_to_node +--------------------------------------------------------------------- + +(1 row) + +step s1-lock-to-split-shard: + SELECT run_commands_on_session_level_connection_to_node('BEGIN; LOCK TABLE to_split_table_1500001 IN ACCESS SHARE MODE;'); + +run_commands_on_session_level_connection_to_node +--------------------------------------------------------------------- + +(1 row) + +step s2-print-locks: + SELECT * FROM master_run_on_worker( + ARRAY['localhost']::text[], + ARRAY[57637]::int[], + ARRAY[ + 'SELECT CONCAT(relation::regclass, ''-'', locktype, ''-'', mode) AS LockInfo FROM pg_locks + WHERE relation::regclass::text = ''to_split_table_1500001'';' + ]::text[], + false); + +node_name|node_port|success|result +--------------------------------------------------------------------- +localhost| 57637|t |to_split_table_1500001-relation-AccessShareLock +(1 row) + +step s2-non-blocking-shard-split: + SELECT pg_catalog.citus_split_shard_by_split_points( + 1500001, + ARRAY['-1073741824'], + ARRAY[1, 2], + 'force_logical'); + +citus_split_shard_by_split_points +--------------------------------------------------------------------- + +(1 row) + +step s2-print-cluster: + -- row count per shard + SELECT + nodeport, shardid, success, result + FROM + run_command_on_placements('to_split_table', 'select count(*) from %s') + ORDER BY + nodeport, shardid; + -- rows + SELECT id, value FROM to_split_table ORDER BY id, value; + +nodeport|shardid|success|result +--------------------------------------------------------------------- + 57637|1500002|t | 0 + 57638|1500003|t | 0 +(2 rows) + +id|value +--------------------------------------------------------------------- +(0 rows) + +step s2-show-pg_dist_cleanup: + SELECT object_name, object_type, policy_type FROM pg_dist_cleanup; + +object_name |object_type|policy_type +--------------------------------------------------------------------- +public.to_split_table_1500001| 1| 2 +(1 row) + +step s1-stop-connection: + SELECT stop_session_level_connection_to_node(); + +stop_session_level_connection_to_node +--------------------------------------------------------------------- + +(1 row) + + +starting permutation: s1-load-cache s1-acquire-split-advisory-lock s2-non-blocking-shard-split s1-run-cleaner s1-show-pg_dist_cleanup s1-release-split-advisory-lock s1-run-cleaner s2-show-pg_dist_cleanup +create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +step s1-load-cache: + -- Indirect way to load cache. + TRUNCATE to_split_table; + +step s1-acquire-split-advisory-lock: + SELECT pg_advisory_lock(44000, 55152); + +pg_advisory_lock +--------------------------------------------------------------------- + +(1 row) + +step s2-non-blocking-shard-split: + SELECT pg_catalog.citus_split_shard_by_split_points( + 1500001, + ARRAY['-1073741824'], + ARRAY[1, 2], + 'force_logical'); + +step s1-run-cleaner: + SELECT run_try_drop_marked_shards(); + +run_try_drop_marked_shards +--------------------------------------------------------------------- + +(1 row) + +step s1-show-pg_dist_cleanup: + SELECT object_name, object_type, policy_type FROM pg_dist_cleanup; + +object_name |object_type|policy_type +--------------------------------------------------------------------- +public.to_split_table_1500002| 1| 1 +public.to_split_table_1500003| 1| 1 +public.to_split_table_1500001| 1| 0 +public.to_split_table_1500003| 1| 0 +(4 rows) + +step s1-release-split-advisory-lock: + SELECT pg_advisory_unlock(44000, 55152); + +pg_advisory_unlock +--------------------------------------------------------------------- +t +(1 row) + +step s2-non-blocking-shard-split: <... completed> +citus_split_shard_by_split_points +--------------------------------------------------------------------- + +(1 row) + +step s1-run-cleaner: + SELECT run_try_drop_marked_shards(); + +run_try_drop_marked_shards +--------------------------------------------------------------------- + +(1 row) + +step s2-show-pg_dist_cleanup: + SELECT object_name, object_type, policy_type FROM pg_dist_cleanup; + +object_name|object_type|policy_type +--------------------------------------------------------------------- +(0 rows) + + +starting permutation: s1-acquire-split-advisory-lock s2-non-blocking-shard-split s1-run-cleaner s1-show-pg_dist_cleanup s1-release-split-advisory-lock s1-run-cleaner s2-show-pg_dist_cleanup +create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +step s1-acquire-split-advisory-lock: + SELECT pg_advisory_lock(44000, 55152); + +pg_advisory_lock +--------------------------------------------------------------------- + +(1 row) + +step s2-non-blocking-shard-split: + SELECT pg_catalog.citus_split_shard_by_split_points( + 1500001, + ARRAY['-1073741824'], + ARRAY[1, 2], + 'force_logical'); + +step s1-run-cleaner: + SELECT run_try_drop_marked_shards(); + +run_try_drop_marked_shards +--------------------------------------------------------------------- + +(1 row) + +step s1-show-pg_dist_cleanup: + SELECT object_name, object_type, policy_type FROM pg_dist_cleanup; + +object_name |object_type|policy_type +--------------------------------------------------------------------- +public.to_split_table_1500002| 1| 1 +public.to_split_table_1500003| 1| 1 +public.to_split_table_1500001| 1| 0 +public.to_split_table_1500003| 1| 0 +(4 rows) + +step s1-release-split-advisory-lock: + SELECT pg_advisory_unlock(44000, 55152); + +pg_advisory_unlock +--------------------------------------------------------------------- +t +(1 row) + +step s2-non-blocking-shard-split: <... completed> +citus_split_shard_by_split_points +--------------------------------------------------------------------- + +(1 row) + +step s1-run-cleaner: + SELECT run_try_drop_marked_shards(); + +run_try_drop_marked_shards +--------------------------------------------------------------------- + +(1 row) + +step s2-show-pg_dist_cleanup: + SELECT object_name, object_type, policy_type FROM pg_dist_cleanup; + +object_name|object_type|policy_type +--------------------------------------------------------------------- +(0 rows) + diff --git a/src/test/regress/expected/isolation_tenant_isolation_nonblocking.out b/src/test/regress/expected/isolation_tenant_isolation_nonblocking.out index 17766f85e..280e3183e 100644 --- a/src/test/regress/expected/isolation_tenant_isolation_nonblocking.out +++ b/src/test/regress/expected/isolation_tenant_isolation_nonblocking.out @@ -1181,13 +1181,8 @@ step s2-isolate-tenant: step s1-isolate-tenant-no-same-coloc-blocking: SELECT isolate_tenant_to_new_shard('isolation_table2', 2, shard_transfer_mode => 'block_writes'); - -isolate_tenant_to_new_shard ---------------------------------------------------------------------- - 1500211 -(1 row) - -step s3-release-advisory-lock: + +step s3-release-advisory-lock: SELECT pg_advisory_unlock(44000, 55152); pg_advisory_unlock @@ -1201,6 +1196,12 @@ isolate_tenant_to_new_shard 1500208 (1 row) +step s1-isolate-tenant-no-same-coloc-blocking: <... completed> +isolate_tenant_to_new_shard +--------------------------------------------------------------------- + 1500211 +(1 row) + step s2-print-cluster: -- row count per shard SELECT @@ -1253,13 +1254,8 @@ step s2-isolate-tenant: step s1-isolate-tenant-no-same-coloc-blocking: SELECT isolate_tenant_to_new_shard('isolation_table2', 2, shard_transfer_mode => 'block_writes'); - -isolate_tenant_to_new_shard ---------------------------------------------------------------------- - 1500222 -(1 row) - -step s3-release-advisory-lock: + +step s3-release-advisory-lock: SELECT pg_advisory_unlock(44000, 55152); pg_advisory_unlock @@ -1273,6 +1269,12 @@ isolate_tenant_to_new_shard 1500219 (1 row) +step s1-isolate-tenant-no-same-coloc-blocking: <... completed> +isolate_tenant_to_new_shard +--------------------------------------------------------------------- + 1500222 +(1 row) + step s2-print-cluster: -- row count per shard SELECT diff --git a/src/test/regress/expected/multi_colocated_shard_rebalance.out b/src/test/regress/expected/multi_colocated_shard_rebalance.out index 68b8925ed..1a5c3b71b 100644 --- a/src/test/regress/expected/multi_colocated_shard_rebalance.out +++ b/src/test/regress/expected/multi_colocated_shard_rebalance.out @@ -724,7 +724,7 @@ DETAIL: from localhost:xxxxx (1 row) CALL citus_cleanup_orphaned_shards(); -LOG: dropping shard placement xxxxx of shard xxxxx on localhost:xxxxx after it was moved away +LOG: cleaning up public.test_with_pkey_13000042 on localhost:xxxxx which was left after a move NOTICE: cleaned up 1 orphaned shards SET client_min_messages TO DEFAULT; -- we don't support multiple shard moves in a single transaction diff --git a/src/test/regress/expected/multi_extension.out b/src/test/regress/expected/multi_extension.out index c8f6c20b0..c79771817 100644 --- a/src/test/regress/expected/multi_extension.out +++ b/src/test/regress/expected/multi_extension.out @@ -1136,6 +1136,7 @@ SELECT * FROM multi_extension.print_extension_changes(); table columnar.chunk_group | table columnar.options | table columnar.stripe | + | function citus_cleanup_orphaned_resources() | function citus_internal_delete_partition_metadata(regclass) void | function citus_locks() SETOF record | function citus_split_shard_by_split_points(bigint,text[],integer[],citus.shard_transfer_mode) void @@ -1146,11 +1147,14 @@ SELECT * FROM multi_extension.print_extension_changes(); | function worker_split_copy(bigint,text,split_copy_info[]) void | function worker_split_shard_release_dsm() void | function worker_split_shard_replication_setup(split_shard_info[]) SETOF replication_slot_info + | sequence pg_dist_cleanup_recordid_seq + | sequence pg_dist_operationid_seq + | table pg_dist_cleanup | type replication_slot_info | type split_copy_info | type split_shard_info | view citus_locks -(37 rows) +(41 rows) DROP TABLE multi_extension.prev_objects, multi_extension.extension_diff; -- show running version diff --git a/src/test/regress/expected/multi_tenant_isolation.out b/src/test/regress/expected/multi_tenant_isolation.out index 046d814b0..9c196b184 100644 --- a/src/test/regress/expected/multi_tenant_isolation.out +++ b/src/test/regress/expected/multi_tenant_isolation.out @@ -448,6 +448,9 @@ SELECT * FROM pg_dist_shard_placement WHERE shardid >= 1230000 ORDER BY nodeport \COPY lineitem_streaming FROM STDIN WITH DELIMITER '|' ERROR: insert or update on table "lineitem_streaming_1230044" violates foreign key constraint "test_constraint_1230044" DETAIL: Key (l_orderkey)=(128) is not present in table "orders_streaming_1230046". +\c - postgres - :master_port +CALL pg_catalog.citus_cleanup_orphaned_resources(); +NOTICE: cleaned up 24 orphaned resources -- connect to the worker node with metadata \c - mx_isolation_role_ent - :worker_1_port SET search_path to "Tenant Isolation"; @@ -713,6 +716,8 @@ SELECT * FROM pg_dist_shard orders_streaming | 1230047 | t | 2147483647 | 2147483647 (24 rows) +\c - postgres - :master_port +CALL pg_catalog.citus_cleanup_orphaned_resources(); -- test failure scenarios with triggers on workers \c - postgres - :worker_1_port SET search_path to "Tenant Isolation"; @@ -753,6 +758,8 @@ SET citus.override_table_visibility TO false; SET search_path to "Tenant Isolation"; \set VERBOSITY terse SELECT isolate_tenant_to_new_shard('orders_streaming', 104, 'CASCADE', shard_transfer_mode => 'block_writes'); +WARNING: command DROP TABLE is disabled +WARNING: Failed to cleanup 1 shards out of 1 ERROR: command CREATE TABLE is disabled \set VERBOSITY default \c - postgres - :worker_1_port @@ -792,7 +799,10 @@ $$; RESET citus.enable_metadata_sync; CREATE EVENT TRIGGER abort_drop ON sql_drop EXECUTE PROCEDURE abort_drop_command(); -\c - mx_isolation_role_ent - :master_port +\c - postgres - :master_port +-- Disable deferred drop otherwise we will skip the drop and operation will succeed instead of failing. +SET citus.defer_drop_after_shard_split TO OFF; +SET ROLE mx_isolation_role_ent; SET search_path to "Tenant Isolation"; \set VERBOSITY terse SELECT isolate_tenant_to_new_shard('orders_streaming', 104, 'CASCADE', shard_transfer_mode => 'block_writes'); @@ -802,6 +812,7 @@ WARNING: command DROP TABLE is disabled WARNING: command DROP TABLE is disabled WARNING: command DROP TABLE is disabled WARNING: command DROP TABLE is disabled +WARNING: Failed to cleanup 6 shards out of 6 ERROR: command DROP TABLE is disabled \set VERBOSITY default -- check if metadata is changed @@ -1084,6 +1095,9 @@ SELECT count(*) FROM test_colocated_table_2; 101 (1 row) +\c - postgres - :master_port +CALL pg_catalog.citus_cleanup_orphaned_resources(); +NOTICE: cleaned up 10 orphaned resources \c - postgres - :worker_1_port -- show the foreign keys of the main table & its colocated shard on other tables SELECT tbl.relname, fk."Constraint", fk."Definition" diff --git a/src/test/regress/expected/multi_tenant_isolation_nonblocking.out b/src/test/regress/expected/multi_tenant_isolation_nonblocking.out index 1c87236a0..4dc4a6809 100644 --- a/src/test/regress/expected/multi_tenant_isolation_nonblocking.out +++ b/src/test/regress/expected/multi_tenant_isolation_nonblocking.out @@ -448,6 +448,9 @@ SELECT * FROM pg_dist_shard_placement WHERE shardid >= 1230000 ORDER BY nodeport \COPY lineitem_streaming FROM STDIN WITH DELIMITER '|' ERROR: insert or update on table "lineitem_streaming_1230044" violates foreign key constraint "test_constraint_1230044" DETAIL: Key (l_orderkey)=(128) is not present in table "orders_streaming_1230046". +\c - postgres - :master_port +CALL pg_catalog.citus_cleanup_orphaned_resources(); +NOTICE: cleaned up 30 orphaned resources -- connect to the worker node with metadata \c - mx_isolation_role_ent - :worker_1_port SET search_path to "Tenant Isolation"; @@ -713,6 +716,8 @@ SELECT * FROM pg_dist_shard orders_streaming | 1230047 | t | 2147483647 | 2147483647 (24 rows) +\c - postgres - :master_port +CALL pg_catalog.citus_cleanup_orphaned_resources(); -- test failure scenarios with triggers on workers \c - postgres - :worker_1_port SET search_path to "Tenant Isolation"; @@ -753,6 +758,8 @@ SET citus.override_table_visibility TO false; SET search_path to "Tenant Isolation"; \set VERBOSITY terse SELECT isolate_tenant_to_new_shard('orders_streaming', 104, 'CASCADE', shard_transfer_mode => 'force_logical'); +WARNING: command DROP TABLE is disabled +WARNING: Failed to cleanup 1 shards out of 1 ERROR: command CREATE TABLE is disabled \set VERBOSITY default \c - postgres - :worker_1_port @@ -796,12 +803,6 @@ CREATE EVENT TRIGGER abort_drop ON sql_drop SET search_path to "Tenant Isolation"; \set VERBOSITY terse SELECT isolate_tenant_to_new_shard('orders_streaming', 104, 'CASCADE', shard_transfer_mode => 'force_logical'); -WARNING: command DROP TABLE is disabled -WARNING: command DROP TABLE is disabled -WARNING: command DROP TABLE is disabled -WARNING: command DROP TABLE is disabled -WARNING: command DROP TABLE is disabled -WARNING: command DROP TABLE is disabled ERROR: command DROP SUBSCRIPTION is disabled \set VERBOSITY default -- check if metadata is changed @@ -1099,6 +1100,9 @@ SELECT count(*) FROM test_colocated_table_2; 101 (1 row) +\c - postgres - :master_port +CALL pg_catalog.citus_cleanup_orphaned_resources(); +NOTICE: cleaned up 10 orphaned resources \c - postgres - :worker_1_port -- show the foreign keys of the main table & its colocated shard on other tables SELECT tbl.relname, fk."Constraint", fk."Definition" diff --git a/src/test/regress/expected/upgrade_basic_after.out b/src/test/regress/expected/upgrade_basic_after.out index ee0e93999..ad77b2157 100644 --- a/src/test/regress/expected/upgrade_basic_after.out +++ b/src/test/regress/expected/upgrade_basic_after.out @@ -39,6 +39,18 @@ SELECT nextval('pg_dist_colocationid_seq') = MAX(colocationid)+1 FROM pg_dist_co t (1 row) +SELECT nextval('pg_dist_operationid_seq') = MAX(operation_id)+1 FROM pg_dist_cleanup; + ?column? +--------------------------------------------------------------------- + +(1 row) + +SELECT nextval('pg_dist_cleanup_recordid_seq') = MAX(record_id)+1 FROM pg_dist_cleanup; + ?column? +--------------------------------------------------------------------- + +(1 row) + -- If this query gives output it means we've added a new sequence that should -- possibly be restored after upgrades. SELECT sequence_name FROM information_schema.sequences @@ -49,7 +61,9 @@ SELECT sequence_name FROM information_schema.sequences 'pg_dist_placement_placementid_seq', 'pg_dist_groupid_seq', 'pg_dist_node_nodeid_seq', - 'pg_dist_colocationid_seq' + 'pg_dist_colocationid_seq', + 'pg_dist_operationid_seq', + 'pg_dist_cleanup_recordid_seq' ); sequence_name --------------------------------------------------------------------- diff --git a/src/test/regress/expected/upgrade_list_citus_objects.out b/src/test/regress/expected/upgrade_list_citus_objects.out index 98bc47c6d..bf15d2fbf 100644 --- a/src/test/regress/expected/upgrade_list_citus_objects.out +++ b/src/test/regress/expected/upgrade_list_citus_objects.out @@ -31,6 +31,7 @@ ORDER BY 1; function citus_calculate_gpid(integer,integer) function citus_check_cluster_node_health() function citus_check_connection_to_node(text,integer) + function citus_cleanup_orphaned_resources() function citus_cleanup_orphaned_shards() function citus_conninfo_cache_invalidate() function citus_coordinator_nodeid() @@ -235,12 +236,15 @@ ORDER BY 1; function worker_split_shard_replication_setup(split_shard_info[]) schema citus schema citus_internal + sequence pg_dist_cleanup_recordid_seq sequence pg_dist_colocationid_seq sequence pg_dist_groupid_seq sequence pg_dist_node_nodeid_seq + sequence pg_dist_operationid_seq sequence pg_dist_placement_placementid_seq sequence pg_dist_shardid_seq table pg_dist_authinfo + table pg_dist_cleanup table pg_dist_colocation table pg_dist_local_group table pg_dist_node @@ -270,5 +274,5 @@ ORDER BY 1; view citus_stat_statements view pg_dist_shard_placement view time_partitions -(262 rows) +(266 rows) diff --git a/src/test/regress/spec/isolation_blocking_shard_split.spec b/src/test/regress/spec/isolation_blocking_shard_split.spec index bb2f93368..86611cc2a 100644 --- a/src/test/regress/spec/isolation_blocking_shard_split.spec +++ b/src/test/regress/spec/isolation_blocking_shard_split.spec @@ -1,15 +1,25 @@ +#include "isolation_mx_common.include.spec" + setup { SET citus.shard_count to 2; SET citus.shard_replication_factor to 1; SELECT setval('pg_dist_shardid_seq', 1500000); + -- Cleanup any orphan shards that might be left over from a previous run. + CREATE OR REPLACE FUNCTION run_try_drop_marked_shards() + RETURNS VOID + AS 'citus' + LANGUAGE C STRICT VOLATILE; + CREATE TABLE to_split_table (id int, value int); SELECT create_distributed_table('to_split_table', 'id'); } teardown { + SELECT run_try_drop_marked_shards(); + DROP TABLE to_split_table; } @@ -64,6 +74,44 @@ step "s1-copy" COPY to_split_table FROM PROGRAM 'echo "1,1\n2,2\n3,3\n4,4\n5,5"' WITH CSV; } +step "s1-lock-to-split-shard" +{ + SELECT run_commands_on_session_level_connection_to_node('BEGIN; LOCK TABLE to_split_table_1500002 IN ACCESS SHARE MODE;'); +} + +step "s1-start-connection" +{ + SELECT start_session_level_connection_to_node('localhost', 57638); +} + +step "s1-stop-connection" +{ + SELECT stop_session_level_connection_to_node(); +} + +// this advisory lock with (almost) random values are only used +// for testing purposes. For details, check Citus' logical replication +// source code +step "s1-acquire-split-advisory-lock" +{ + SELECT pg_advisory_lock(44000, 55152); +} + +step "s1-release-split-advisory-lock" +{ + SELECT pg_advisory_unlock(44000, 55152); +} + +step "s1-run-cleaner" +{ + SELECT run_try_drop_marked_shards(); +} + +step "s1-show-pg_dist_cleanup" +{ + SELECT object_name, object_type, policy_type FROM pg_dist_cleanup; +} + step "s1-blocking-shard-split" { SELECT pg_catalog.citus_split_shard_by_split_points( @@ -85,6 +133,23 @@ step "s2-begin" BEGIN; } +step "s2-print-locks" +{ + SELECT * FROM master_run_on_worker( + ARRAY['localhost']::text[], + ARRAY[57638]::int[], + ARRAY[ + 'SELECT CONCAT(relation::regclass, ''-'', locktype, ''-'', mode) AS LockInfo FROM pg_locks + WHERE relation::regclass::text = ''to_split_table_1500002'';' + ]::text[], + false); +} + +step "s2-show-pg_dist_cleanup" +{ + SELECT object_name, object_type, policy_type FROM pg_dist_cleanup; +} + step "s2-blocking-shard-split" { SELECT pg_catalog.citus_split_shard_by_split_points( @@ -144,3 +209,13 @@ permutation "s1-insert" "s1-begin" "s1-blocking-shard-split" "s2-blocking-shard- permutation "s1-load-cache" "s1-begin" "s1-select" "s2-begin" "s2-blocking-shard-split" "s1-ddl" "s2-commit" "s1-commit" "s2-print-cluster" "s2-print-index-count" // The same tests without loading the cache at first permutation "s1-begin" "s1-select" "s2-begin" "s2-blocking-shard-split" "s1-ddl" "s2-commit" "s1-commit" "s2-print-cluster" "s2-print-index-count" + +// With Deferred drop, AccessShareLock (acquired by SELECTS) do not block split from completion. +permutation "s1-load-cache" "s1-start-connection" "s1-lock-to-split-shard" "s2-print-locks" "s2-blocking-shard-split" "s2-print-locks" "s2-show-pg_dist_cleanup" "s1-stop-connection" +// The same test above without loading the cache at first +permutation "s1-start-connection" "s1-lock-to-split-shard" "s2-print-locks" "s2-blocking-shard-split" "s2-print-cluster" "s2-show-pg_dist_cleanup" "s1-stop-connection" + +// When a split operation is running, cleaner cannot clean its resources. +permutation "s1-load-cache" "s1-acquire-split-advisory-lock" "s2-blocking-shard-split" "s1-run-cleaner" "s1-show-pg_dist_cleanup" "s1-release-split-advisory-lock" "s1-run-cleaner" "s2-show-pg_dist_cleanup" +// The same test above without loading the cache at first +permutation "s1-acquire-split-advisory-lock" "s2-blocking-shard-split" "s1-run-cleaner" "s1-show-pg_dist_cleanup" "s1-release-split-advisory-lock" "s1-run-cleaner" "s2-show-pg_dist_cleanup" diff --git a/src/test/regress/spec/isolation_blocking_shard_split_with_fkey_to_reference.spec b/src/test/regress/spec/isolation_blocking_shard_split_with_fkey_to_reference.spec index 49b56c4a5..7b4d75c46 100644 --- a/src/test/regress/spec/isolation_blocking_shard_split_with_fkey_to_reference.spec +++ b/src/test/regress/spec/isolation_blocking_shard_split_with_fkey_to_reference.spec @@ -13,6 +13,13 @@ setup teardown { + -- Cleanup any orphan shards that might be left over from a previous run. + CREATE OR REPLACE FUNCTION run_try_drop_marked_shards() + RETURNS VOID + AS 'citus' + LANGUAGE C STRICT VOLATILE; + SELECT run_try_drop_marked_shards(); + DROP TABLE table_to_split CASCADE; DROP TABLE reference_table CASCADE; } diff --git a/src/test/regress/spec/isolation_non_blocking_shard_split.spec b/src/test/regress/spec/isolation_non_blocking_shard_split.spec index deb1b61a8..d5065d66b 100644 --- a/src/test/regress/spec/isolation_non_blocking_shard_split.spec +++ b/src/test/regress/spec/isolation_non_blocking_shard_split.spec @@ -1,3 +1,5 @@ +#include "isolation_mx_common.include.spec" + // Test scenario for nonblocking split and concurrent INSERT/UPDATE/DELETE // session s1 - Executes non-blocking shard split // session s2 - Does concurrent writes @@ -10,12 +12,20 @@ setup SET citus.shard_replication_factor to 1; SELECT setval('pg_dist_shardid_seq', 1500000); + -- Cleanup any orphan shards that might be left over from a previous run. + CREATE OR REPLACE FUNCTION run_try_drop_marked_shards() + RETURNS VOID + AS 'citus' + LANGUAGE C STRICT VOLATILE; + CREATE TABLE to_split_table (id int PRIMARY KEY, value int); SELECT create_distributed_table('to_split_table', 'id'); } teardown { + SELECT run_try_drop_marked_shards(); + DROP TABLE to_split_table; } @@ -34,6 +44,44 @@ step "s1-load-cache" TRUNCATE to_split_table; } +step "s1-lock-to-split-shard" +{ + SELECT run_commands_on_session_level_connection_to_node('BEGIN; LOCK TABLE to_split_table_1500001 IN ACCESS SHARE MODE;'); +} + +// this advisory lock with (almost) random values are only used +// for testing purposes. For details, check Citus' logical replication +// source code +step "s1-acquire-split-advisory-lock" +{ + SELECT pg_advisory_lock(44000, 55152); +} + +step "s1-release-split-advisory-lock" +{ + SELECT pg_advisory_unlock(44000, 55152); +} + +step "s1-run-cleaner" +{ + SELECT run_try_drop_marked_shards(); +} + +step "s1-start-connection" +{ + SELECT start_session_level_connection_to_node('localhost', 57637); +} + +step "s1-stop-connection" +{ + SELECT stop_session_level_connection_to_node(); +} + +step "s1-show-pg_dist_cleanup" +{ + SELECT object_name, object_type, policy_type FROM pg_dist_cleanup; +} + step "s1-non-blocking-shard-split" { SELECT pg_catalog.citus_split_shard_by_split_points( @@ -81,6 +129,32 @@ step "s2-end" COMMIT; } +step "s2-non-blocking-shard-split" +{ + SELECT pg_catalog.citus_split_shard_by_split_points( + 1500001, + ARRAY['-1073741824'], + ARRAY[1, 2], + 'force_logical'); +} + +step "s2-print-locks" +{ + SELECT * FROM master_run_on_worker( + ARRAY['localhost']::text[], + ARRAY[57637]::int[], + ARRAY[ + 'SELECT CONCAT(relation::regclass, ''-'', locktype, ''-'', mode) AS LockInfo FROM pg_locks + WHERE relation::regclass::text = ''to_split_table_1500001'';' + ]::text[], + false); +} + +step "s2-show-pg_dist_cleanup" +{ + SELECT object_name, object_type, policy_type FROM pg_dist_cleanup; +} + step "s2-print-cluster" { -- row count per shard @@ -156,3 +230,14 @@ permutation "s1-load-cache" "s2-print-cluster" "s3-acquire-advisory-lock" "s1-be permutation "s2-print-cluster" "s3-acquire-advisory-lock" "s1-begin" "s2-begin" "s1-non-blocking-shard-split" "s2-insert" "s2-end" "s2-print-cluster" "s3-release-advisory-lock" "s1-end" "s2-print-cluster" permutation "s2-insert" "s2-print-cluster" "s3-acquire-advisory-lock" "s1-begin" "s1-non-blocking-shard-split" "s2-update" "s3-release-advisory-lock" "s1-end" "s2-print-cluster" permutation "s2-insert" "s2-print-cluster" "s3-acquire-advisory-lock" "s1-begin" "s1-non-blocking-shard-split" "s2-delete" "s3-release-advisory-lock" "s1-end" "s2-print-cluster" + + +// With Deferred drop, AccessShareLock (acquired by SELECTS) do not block split from completion. +permutation "s1-load-cache" "s1-start-connection" "s1-lock-to-split-shard" "s2-print-locks" "s2-non-blocking-shard-split" "s2-print-locks" "s2-show-pg_dist_cleanup" "s1-stop-connection" +// The same test above without loading the cache at first +permutation "s1-start-connection" "s1-lock-to-split-shard" "s2-print-locks" "s2-non-blocking-shard-split" "s2-print-cluster" "s2-show-pg_dist_cleanup" "s1-stop-connection" + +// When a split operation is running, cleaner cannot clean its resources. +permutation "s1-load-cache" "s1-acquire-split-advisory-lock" "s2-non-blocking-shard-split" "s1-run-cleaner" "s1-show-pg_dist_cleanup" "s1-release-split-advisory-lock" "s1-run-cleaner" "s2-show-pg_dist_cleanup" +// The same test above without loading the cache at first +permutation "s1-acquire-split-advisory-lock" "s2-non-blocking-shard-split" "s1-run-cleaner" "s1-show-pg_dist_cleanup" "s1-release-split-advisory-lock" "s1-run-cleaner" "s2-show-pg_dist_cleanup" diff --git a/src/test/regress/spec/isolation_non_blocking_shard_split_fkey.spec b/src/test/regress/spec/isolation_non_blocking_shard_split_fkey.spec index 2aee89f53..eba2b6f6a 100644 --- a/src/test/regress/spec/isolation_non_blocking_shard_split_fkey.spec +++ b/src/test/regress/spec/isolation_non_blocking_shard_split_fkey.spec @@ -19,6 +19,13 @@ setup teardown { + -- Cleanup any orphan shards that might be left over from a previous run. + CREATE OR REPLACE FUNCTION run_try_drop_marked_shards() + RETURNS VOID + AS 'citus' + LANGUAGE C STRICT VOLATILE; + SELECT run_try_drop_marked_shards(); + DROP TABLE table_to_split CASCADE; DROP TABLE reference_table CASCADE; } diff --git a/src/test/regress/spec/isolation_non_blocking_shard_split_with_index_as_replicaIdentity.spec b/src/test/regress/spec/isolation_non_blocking_shard_split_with_index_as_replicaIdentity.spec index be4371e26..79f49d6a5 100644 --- a/src/test/regress/spec/isolation_non_blocking_shard_split_with_index_as_replicaIdentity.spec +++ b/src/test/regress/spec/isolation_non_blocking_shard_split_with_index_as_replicaIdentity.spec @@ -18,6 +18,13 @@ setup teardown { + -- Cleanup any orphan shards that might be left over from a previous run. + CREATE OR REPLACE FUNCTION run_try_drop_marked_shards() + RETURNS VOID + AS 'citus' + LANGUAGE C STRICT VOLATILE; + SELECT run_try_drop_marked_shards(); + DROP TABLE to_split_table CASCADE; } diff --git a/src/test/regress/split_schedule b/src/test/regress/split_schedule index 62ba469bf..716e9dd08 100644 --- a/src/test/regress/split_schedule +++ b/src/test/regress/split_schedule @@ -17,10 +17,12 @@ test: worker_split_binary_copy_test test: worker_split_text_copy_test test: citus_split_shard_by_split_points_negative test: citus_split_shard_by_split_points +test: citus_split_shard_no_deferred_drop test: citus_split_shard_by_split_points_failure # Name citus_split_shard_by_split_points_columnar_partitioned was too long and being truncated. # use citus_split_shard_columnar_partitioned instead. test: citus_split_shard_columnar_partitioned test: citus_non_blocking_split_shards +test: citus_non_blocking_splits_no_deferred_drop test: citus_non_blocking_split_shard_cleanup test: citus_non_blocking_split_columnar diff --git a/src/test/regress/sql/citus_non_blocking_split_columnar.sql b/src/test/regress/sql/citus_non_blocking_split_columnar.sql index b8f4084cf..33a622968 100644 --- a/src/test/regress/sql/citus_non_blocking_split_columnar.sql +++ b/src/test/regress/sql/citus_non_blocking_split_columnar.sql @@ -1,10 +1,14 @@ -CREATE SCHEMA "citus_split_test_schema_columnar_partitioned"; -SET search_path TO "citus_split_test_schema_columnar_partitioned"; +CREATE SCHEMA "citus_split_non_blocking_schema_columnar_partitioned"; +SET search_path TO "citus_split_non_blocking_schema_columnar_partitioned"; SET citus.next_shard_id TO 8970000; SET citus.next_placement_id TO 8770000; SET citus.shard_count TO 1; SET citus.shard_replication_factor TO 1; +-- Disable Deferred drop auto cleanup to avoid flaky tests. +ALTER SYSTEM SET citus.defer_shard_delete_interval TO -1; +SELECT pg_reload_conf(); + -- BEGIN: Create table to split, along with other co-located tables. Add indexes, statistics etc. CREATE TABLE sensors( measureid integer, @@ -78,7 +82,7 @@ SET citus.shard_replication_factor TO 1; INNER JOIN pg_dist_node node ON placement.groupid = node.groupid INNER JOIN pg_catalog.pg_class cls ON shard.logicalrelid = cls.oid INNER JOIN pg_catalog.pg_namespace ns ON cls.relnamespace = ns.oid - WHERE node.noderole = 'primary' AND ns.nspname = 'citus_split_test_schema_columnar_partitioned' + WHERE node.noderole = 'primary' AND ns.nspname = 'citus_split_non_blocking_schema_columnar_partitioned' ORDER BY logicalrelid, shardminvalue::BIGINT, nodeport; -- END: Create table to split, along with other co-located tables. Add indexes, statistics etc. @@ -120,7 +124,7 @@ SET citus.shard_replication_factor TO 1; -- BEGIN: Show the current state on workers \c - - - :worker_1_port - SET search_path TO "citus_split_test_schema_columnar_partitioned"; + SET search_path TO "citus_split_non_blocking_schema_columnar_partitioned"; SET citus.show_shards_for_app_name_prefixes = '*'; SELECT tbl.relname, fk."Constraint", fk."Definition" FROM pg_catalog.pg_class tbl @@ -132,12 +136,12 @@ SET citus.shard_replication_factor TO 1; WHERE stxnamespace IN ( SELECT oid FROM pg_namespace - WHERE nspname IN ('citus_split_test_schema_columnar_partitioned') + WHERE nspname IN ('citus_split_non_blocking_schema_columnar_partitioned') ) ORDER BY stxname ASC; \c - - - :worker_2_port - SET search_path TO "citus_split_test_schema_columnar_partitioned"; + SET search_path TO "citus_split_non_blocking_schema_columnar_partitioned"; SET citus.show_shards_for_app_name_prefixes = '*'; SELECT tbl.relname, fk."Constraint", fk."Definition" FROM pg_catalog.pg_class tbl @@ -149,14 +153,14 @@ SET citus.shard_replication_factor TO 1; WHERE stxnamespace IN ( SELECT oid FROM pg_namespace - WHERE nspname IN ('citus_split_test_schema_columnar_partitioned') + WHERE nspname IN ('citus_split_non_blocking_schema_columnar_partitioned') ) ORDER BY stxname ASC; -- END: Show the current state on workers -- BEGIN: Split a shard along its co-located shards \c - - - :master_port - SET search_path TO "citus_split_test_schema_columnar_partitioned"; + SET search_path TO "citus_split_non_blocking_schema_columnar_partitioned"; SET citus.next_shard_id TO 8999000; SELECT nodeid AS worker_1_node FROM pg_dist_node WHERE nodeport=:worker_1_port \gset SELECT nodeid AS worker_2_node FROM pg_dist_node WHERE nodeport=:worker_2_port \gset @@ -168,6 +172,10 @@ SET citus.shard_replication_factor TO 1; 'force_logical'); -- END: Split a shard along its co-located shards +-- BEGIN: Perform deferred cleanup. +CALL pg_catalog.citus_cleanup_orphaned_resources(); +-- END: Perform deferred cleanup. + -- BEGIN: Validate Shard Info and Data SELECT shard.shardid, logicalrelid, shardminvalue, shardmaxvalue, nodename, nodeport FROM pg_dist_shard AS shard @@ -175,7 +183,7 @@ SET citus.shard_replication_factor TO 1; INNER JOIN pg_dist_node node ON placement.groupid = node.groupid INNER JOIN pg_catalog.pg_class cls ON shard.logicalrelid = cls.oid INNER JOIN pg_catalog.pg_namespace ns ON cls.relnamespace = ns.oid - WHERE node.noderole = 'primary' AND ns.nspname = 'citus_split_test_schema_columnar_partitioned' + WHERE node.noderole = 'primary' AND ns.nspname = 'citus_split_non_blocking_schema_columnar_partitioned' ORDER BY logicalrelid, shardminvalue::BIGINT, nodeport; SELECT count(*) FROM reference_table; @@ -187,7 +195,7 @@ SET citus.shard_replication_factor TO 1; -- BEGIN: Show the updated state on workers \c - - - :worker_1_port - SET search_path TO "citus_split_test_schema_columnar_partitioned"; + SET search_path TO "citus_split_non_blocking_schema_columnar_partitioned"; SET citus.show_shards_for_app_name_prefixes = '*'; SELECT tbl.relname, fk."Constraint", fk."Definition" FROM pg_catalog.pg_class tbl @@ -199,12 +207,12 @@ SET citus.shard_replication_factor TO 1; WHERE stxnamespace IN ( SELECT oid FROM pg_namespace - WHERE nspname IN ('citus_split_test_schema_columnar_partitioned') + WHERE nspname IN ('citus_split_non_blocking_schema_columnar_partitioned') ) ORDER BY stxname ASC; \c - - - :worker_2_port - SET search_path TO "citus_split_test_schema_columnar_partitioned"; + SET search_path TO "citus_split_non_blocking_schema_columnar_partitioned"; SET citus.show_shards_for_app_name_prefixes = '*'; SELECT tbl.relname, fk."Constraint", fk."Definition" FROM pg_catalog.pg_class tbl @@ -216,14 +224,14 @@ SET citus.shard_replication_factor TO 1; WHERE stxnamespace IN ( SELECT oid FROM pg_namespace - WHERE nspname IN ('citus_split_test_schema_columnar_partitioned') + WHERE nspname IN ('citus_split_non_blocking_schema_columnar_partitioned') ) ORDER BY stxname ASC; -- END: Show the updated state on workers -- BEGIN: Split a partition table directly \c - - - :master_port - SET search_path TO "citus_split_test_schema_columnar_partitioned"; + SET search_path TO "citus_split_non_blocking_schema_columnar_partitioned"; SET citus.next_shard_id TO 8999100; SELECT nodeid AS worker_1_node FROM pg_dist_node WHERE nodeport=:worker_1_port \gset SELECT nodeid AS worker_2_node FROM pg_dist_node WHERE nodeport=:worker_2_port \gset @@ -235,6 +243,10 @@ SET citus.shard_replication_factor TO 1; 'force_logical'); -- END: Split a partition table directly +-- BEGIN: Perform deferred cleanup. +CALL pg_catalog.citus_cleanup_orphaned_resources(); +-- END: Perform deferred cleanup. + -- BEGIN: Validate Shard Info and Data SELECT shard.shardid, logicalrelid, shardminvalue, shardmaxvalue, nodename, nodeport FROM pg_dist_shard AS shard @@ -242,7 +254,7 @@ SET citus.shard_replication_factor TO 1; INNER JOIN pg_dist_node node ON placement.groupid = node.groupid INNER JOIN pg_catalog.pg_class cls ON shard.logicalrelid = cls.oid INNER JOIN pg_catalog.pg_namespace ns ON cls.relnamespace = ns.oid - WHERE node.noderole = 'primary' AND ns.nspname = 'citus_split_test_schema_columnar_partitioned' + WHERE node.noderole = 'primary' AND ns.nspname = 'citus_split_non_blocking_schema_columnar_partitioned' ORDER BY logicalrelid, shardminvalue::BIGINT, nodeport; SELECT count(*) FROM reference_table; @@ -254,7 +266,7 @@ SET citus.shard_replication_factor TO 1; -- BEGIN: Show the updated state on workers \c - - - :worker_1_port - SET search_path TO "citus_split_test_schema_columnar_partitioned"; + SET search_path TO "citus_split_non_blocking_schema_columnar_partitioned"; SET citus.show_shards_for_app_name_prefixes = '*'; SELECT tbl.relname, fk."Constraint", fk."Definition" FROM pg_catalog.pg_class tbl @@ -266,12 +278,12 @@ SET citus.shard_replication_factor TO 1; WHERE stxnamespace IN ( SELECT oid FROM pg_namespace - WHERE nspname IN ('citus_split_test_schema_columnar_partitioned') + WHERE nspname IN ('citus_split_non_blocking_schema_columnar_partitioned') ) ORDER BY stxname ASC; \c - - - :worker_2_port - SET search_path TO "citus_split_test_schema_columnar_partitioned"; + SET search_path TO "citus_split_non_blocking_schema_columnar_partitioned"; SET citus.show_shards_for_app_name_prefixes = '*'; SELECT tbl.relname, fk."Constraint", fk."Definition" FROM pg_catalog.pg_class tbl @@ -283,12 +295,14 @@ SET citus.shard_replication_factor TO 1; WHERE stxnamespace IN ( SELECT oid FROM pg_namespace - WHERE nspname IN ('citus_split_test_schema_columnar_partitioned') + WHERE nspname IN ('citus_split_non_blocking_schema_columnar_partitioned') ) ORDER BY stxname ASC; -- END: Show the updated state on workers --BEGIN : Cleanup \c - postgres - :master_port - DROP SCHEMA "citus_split_test_schema_columnar_partitioned" CASCADE; + ALTER SYSTEM RESET citus.defer_shard_delete_interval; + SELECT pg_reload_conf(); + DROP SCHEMA "citus_split_non_blocking_schema_columnar_partitioned" CASCADE; --END : Cleanup diff --git a/src/test/regress/sql/citus_non_blocking_split_shards.sql b/src/test/regress/sql/citus_non_blocking_split_shards.sql index f9e8598af..00b5b71cb 100644 --- a/src/test/regress/sql/citus_non_blocking_split_shards.sql +++ b/src/test/regress/sql/citus_non_blocking_split_shards.sql @@ -17,10 +17,13 @@ Here is a high level overview of test plan: CREATE SCHEMA "citus_split_test_schema"; +-- Disable Deferred drop auto cleanup to avoid flaky tests. +ALTER SYSTEM SET citus.defer_shard_delete_interval TO -1; +SELECT pg_reload_conf(); + CREATE ROLE test_shard_split_role WITH LOGIN; GRANT USAGE, CREATE ON SCHEMA "citus_split_test_schema" TO test_shard_split_role; SET ROLE test_shard_split_role; - SET search_path TO "citus_split_test_schema"; SET citus.next_shard_id TO 8981000; SET citus.next_placement_id TO 8610000; @@ -144,6 +147,10 @@ SELECT pg_catalog.citus_split_shard_by_split_points( ARRAY[:worker_1_node, :worker_2_node], 'force_logical'); +-- BEGIN: Perform deferred cleanup. +CALL pg_catalog.citus_cleanup_orphaned_resources(); +-- END: Perform deferred cleanup. + -- Perform 3 way split SELECT pg_catalog.citus_split_shard_by_split_points( 8981001, @@ -152,6 +159,10 @@ SELECT pg_catalog.citus_split_shard_by_split_points( 'force_logical'); -- END : Split two shards : One with move and One without move. +-- BEGIN: Perform deferred cleanup. +CALL pg_catalog.citus_cleanup_orphaned_resources(); +-- END: Perform deferred cleanup. + -- BEGIN : Move a shard post split. SELECT citus_move_shard_placement(8981007, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode:='block_writes'); -- END : Move a shard post split. @@ -222,6 +233,10 @@ SELECT pg_catalog.citus_split_shard_by_split_points( ARRAY[:worker_1_node, :worker_2_node], 'force_logical'); +-- BEGIN: Perform deferred cleanup. +CALL pg_catalog.citus_cleanup_orphaned_resources(); +-- END: Perform deferred cleanup. + SET search_path TO "citus_split_test_schema"; SELECT shard.shardid, logicalrelid, shardminvalue, shardmaxvalue, nodename, nodeport FROM pg_dist_shard AS shard @@ -244,6 +259,10 @@ SELECT pg_catalog.citus_split_shard_by_split_points( ARRAY['-2120000000'], ARRAY[:worker_1_node, :worker_2_node]); +-- BEGIN: Perform deferred cleanup. +CALL pg_catalog.citus_cleanup_orphaned_resources(); +-- END: Perform deferred cleanup. + SELECT shard.shardid, logicalrelid, shardminvalue, shardmaxvalue, nodename, nodeport FROM pg_dist_shard AS shard INNER JOIN pg_dist_placement placement ON shard.shardid = placement.shardid @@ -265,6 +284,10 @@ SELECT pg_catalog.citus_split_shard_by_split_points( ARRAY[:worker_1_node, :worker_2_node], 'auto'); +-- BEGIN: Perform deferred cleanup. +CALL pg_catalog.citus_cleanup_orphaned_resources(); +-- END: Perform deferred cleanup. + SELECT shard.shardid, logicalrelid, shardminvalue, shardmaxvalue, nodename, nodeport FROM pg_dist_shard AS shard INNER JOIN pg_dist_placement placement ON shard.shardid = placement.shardid @@ -282,5 +305,7 @@ SELECT COUNT(*) FROM colocated_dist_table; --BEGIN : Cleanup \c - postgres - :master_port +ALTER SYSTEM RESET citus.defer_shard_delete_interval; +SELECT pg_reload_conf(); DROP SCHEMA "citus_split_test_schema" CASCADE; --END : Cleanup diff --git a/src/test/regress/sql/citus_non_blocking_splits_no_deferred_drop.sql b/src/test/regress/sql/citus_non_blocking_splits_no_deferred_drop.sql new file mode 100644 index 000000000..b409aa9f4 --- /dev/null +++ b/src/test/regress/sql/citus_non_blocking_splits_no_deferred_drop.sql @@ -0,0 +1,294 @@ +/* +This suite runs without deferred drop enabled. + +Citus Shard Split Test.The test is model similar to 'shard_move_constraints'. +Here is a high level overview of test plan: + 1. Create a table 'sensors' (ShardCount = 2) to be split. Add indexes and statistics on this table. + 2. Create two other tables: 'reference_table' and 'colocated_dist_table', co-located with sensors. + 3. Create Foreign key constraints between the two co-located distributed tables. + 4. Load data into the three tables. + 5. Move one of the shards for 'sensors' to test ShardMove -> Split. + 6. Trigger Split on both shards of 'sensors'. This will also split co-located tables. + 7. Move one of the split shard to test Split -> ShardMove. + 8. Split an already split shard second time on a different schema. + 9. Create a colocated table with no replica identity. + 10. Show we do not allow Split with the shard transfer mode 'auto' if any colocated table has no replica identity. + 11. Drop the colocated table with no replica identity. + 12. Show we allow Split with the shard transfer mode 'auto' if all colocated tables has replica identity. +*/ + +CREATE SCHEMA "citus_split_test_schema_no_deferred_drop"; + +SET citus.defer_drop_after_shard_split TO OFF; + +CREATE ROLE test_shard_split_role_nodeferred_drop WITH LOGIN; +GRANT USAGE, CREATE ON SCHEMA "citus_split_test_schema_no_deferred_drop" TO test_shard_split_role_nodeferred_drop; +SET ROLE test_shard_split_role_nodeferred_drop; + +SET search_path TO "citus_split_test_schema_no_deferred_drop"; +SET citus.next_shard_id TO 8981000; +SET citus.next_placement_id TO 8610000; +SET citus.shard_count TO 2; +SET citus.shard_replication_factor TO 1; + +-- BEGIN: Create table to split, along with other co-located tables. Add indexes, statistics etc. +CREATE TABLE sensors( + measureid integer, + eventdatetime date, + measure_data jsonb, + meaure_quantity decimal(15, 2), + measure_status char(1), + measure_comment varchar(44), + PRIMARY KEY (measureid, eventdatetime, measure_data)); + +CREATE INDEX index_on_sensors ON sensors(lower(measureid::text)); +ALTER INDEX index_on_sensors ALTER COLUMN 1 SET STATISTICS 1000; +CREATE INDEX hash_index_on_sensors ON sensors USING HASH((measure_data->'IsFailed')); +CREATE INDEX index_with_include_on_sensors ON sensors ((measure_data->'IsFailed')) INCLUDE (measure_data, eventdatetime, measure_status); +CREATE STATISTICS stats_on_sensors (dependencies) ON measureid, eventdatetime FROM sensors; + +SELECT create_distributed_table('sensors', 'measureid', colocate_with:='none'); +-- END: Create table to split, along with other co-located tables. Add indexes, statistics etc. + +-- BEGIN: Create co-located distributed and reference tables. +CREATE TABLE reference_table (measureid integer PRIMARY KEY); +SELECT create_reference_table('reference_table'); + +CREATE TABLE colocated_dist_table (measureid integer PRIMARY KEY); +CLUSTER colocated_dist_table USING colocated_dist_table_pkey; +SELECT create_distributed_table('colocated_dist_table', 'measureid', colocate_with:='sensors'); + +CREATE TABLE table_with_index_rep_identity(key int NOT NULL); +CREATE UNIQUE INDEX uqx ON table_with_index_rep_identity(key); +ALTER TABLE table_with_index_rep_identity REPLICA IDENTITY USING INDEX uqx; +CLUSTER table_with_index_rep_identity USING uqx; +SELECT create_distributed_table('table_with_index_rep_identity', 'key', colocate_with:='sensors'); +-- END: Create co-located distributed and reference tables. + +-- BEGIN : Create Foreign key constraints. +ALTER TABLE sensors ADD CONSTRAINT fkey_table_to_dist FOREIGN KEY (measureid) REFERENCES colocated_dist_table(measureid); +-- END : Create Foreign key constraints. + +-- BEGIN : Load data into tables. +INSERT INTO reference_table SELECT i FROM generate_series(0,1000)i; +INSERT INTO colocated_dist_table SELECT i FROM generate_series(0,1000)i; +INSERT INTO sensors SELECT i, '2020-01-05', '{}', 11011.10, 'A', 'I <3 Citus' FROM generate_series(0,1000)i; + +SELECT COUNT(*) FROM sensors; +SELECT COUNT(*) FROM reference_table; +SELECT COUNT(*) FROM colocated_dist_table; +-- END: Load data into tables. + +-- BEGIN : Display current state. +SELECT shard.shardid, logicalrelid, shardminvalue, shardmaxvalue, nodename, nodeport + FROM pg_dist_shard AS shard + INNER JOIN pg_dist_placement placement ON shard.shardid = placement.shardid + INNER JOIN pg_dist_node node ON placement.groupid = node.groupid + INNER JOIN pg_catalog.pg_class cls ON shard.logicalrelid = cls.oid + WHERE node.noderole = 'primary' AND (logicalrelid = 'sensors'::regclass OR logicalrelid = 'colocated_dist_table'::regclass OR logicalrelid = 'table_with_index_rep_identity'::regclass) + ORDER BY logicalrelid, shardminvalue::BIGINT; + +\c - - - :worker_1_port + SET search_path TO "citus_split_test_schema_no_deferred_drop", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + SELECT tbl.relname, fk."Constraint", fk."Definition" + FROM pg_catalog.pg_class tbl + JOIN public.table_fkeys fk on tbl.oid = fk.relid + WHERE tbl.relname like 'sensors_%' + ORDER BY 1, 2; + SELECT tablename, indexdef FROM pg_indexes WHERE tablename like 'sensors_%' ORDER BY 1,2; + SELECT tablename, indexdef FROM pg_indexes WHERE tablename like 'table_with_index_rep_identity_%' ORDER BY 1,2; + SELECT stxname FROM pg_statistic_ext + WHERE stxnamespace IN ( + SELECT oid + FROM pg_namespace + WHERE nspname IN ('citus_split_test_schema_no_deferred_drop') + ) + ORDER BY stxname ASC; + +\c - - - :worker_2_port + SET search_path TO "citus_split_test_schema_no_deferred_drop", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + SELECT tbl.relname, fk."Constraint", fk."Definition" + FROM pg_catalog.pg_class tbl + JOIN public.table_fkeys fk on tbl.oid = fk.relid + WHERE tbl.relname like 'sensors_%' + ORDER BY 1, 2; + SELECT tablename, indexdef FROM pg_indexes WHERE tablename like 'sensors_%' ORDER BY 1,2; + SELECT tablename, indexdef FROM pg_indexes WHERE tablename like 'table_with_index_rep_identity_%' ORDER BY 1,2; + SELECT stxname FROM pg_statistic_ext + WHERE stxnamespace IN ( + SELECT oid + FROM pg_namespace + WHERE nspname IN ('citus_split_test_schema_no_deferred_drop') + ) + ORDER BY stxname ASC; +-- END : Display current state + +-- BEGIN : Move one shard before we split it. +\c - postgres - :master_port +SET citus.defer_drop_after_shard_split TO OFF; +SET ROLE test_shard_split_role_nodeferred_drop; +SET search_path TO "citus_split_test_schema_no_deferred_drop"; +SET citus.next_shard_id TO 8981007; +SET citus.defer_drop_after_shard_move TO OFF; + +SELECT citus_move_shard_placement(8981000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode:='force_logical'); +-- END : Move one shard before we split it. + +-- BEGIN : Set node id variables +SELECT nodeid AS worker_1_node FROM pg_dist_node WHERE nodeport=:worker_1_port \gset +SELECT nodeid AS worker_2_node FROM pg_dist_node WHERE nodeport=:worker_2_port \gset +-- END : Set node id variables + +-- BEGIN : Split two shards : One with move and One without move. +-- Perform 2 way split +SELECT pg_catalog.citus_split_shard_by_split_points( + 8981000, + ARRAY['-1073741824'], + ARRAY[:worker_1_node, :worker_2_node], + 'force_logical'); + +-- Perform 3 way split +SELECT pg_catalog.citus_split_shard_by_split_points( + 8981001, + ARRAY['536870911', '1610612735'], + ARRAY[:worker_1_node, :worker_1_node, :worker_2_node], + 'force_logical'); +-- END : Split two shards : One with move and One without move. + +-- BEGIN : Move a shard post split. +SELECT citus_move_shard_placement(8981007, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode:='block_writes'); +-- END : Move a shard post split. + +-- BEGIN : Display current state. +SELECT shard.shardid, logicalrelid, shardminvalue, shardmaxvalue, nodename, nodeport + FROM pg_dist_shard AS shard + INNER JOIN pg_dist_placement placement ON shard.shardid = placement.shardid + INNER JOIN pg_dist_node node ON placement.groupid = node.groupid + INNER JOIN pg_catalog.pg_class cls ON shard.logicalrelid = cls.oid + WHERE node.noderole = 'primary' AND (logicalrelid = 'sensors'::regclass OR logicalrelid = 'colocated_dist_table'::regclass OR logicalrelid = 'table_with_index_rep_identity'::regclass) + ORDER BY logicalrelid, shardminvalue::BIGINT; + +\c - - - :worker_1_port + SET search_path TO "citus_split_test_schema_no_deferred_drop", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + SELECT tbl.relname, fk."Constraint", fk."Definition" + FROM pg_catalog.pg_class tbl + JOIN public.table_fkeys fk on tbl.oid = fk.relid + WHERE tbl.relname like 'sensors_%' + ORDER BY 1, 2; + SELECT tablename, indexdef FROM pg_indexes WHERE tablename like 'sensors_%' ORDER BY 1,2; + SELECT tablename, indexdef FROM pg_indexes WHERE tablename like 'table_with_index_rep_identity_%' ORDER BY 1,2; + SELECT stxname FROM pg_statistic_ext + WHERE stxnamespace IN ( + SELECT oid + FROM pg_namespace + WHERE nspname IN ('citus_split_test_schema_no_deferred_drop') + ) + ORDER BY stxname ASC; + +\c - - - :worker_2_port + SET search_path TO "citus_split_test_schema_no_deferred_drop", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + SELECT tbl.relname, fk."Constraint", fk."Definition" + FROM pg_catalog.pg_class tbl + JOIN public.table_fkeys fk on tbl.oid = fk.relid + WHERE tbl.relname like 'sensors_%' + ORDER BY 1, 2; + SELECT tablename, indexdef FROM pg_indexes WHERE tablename like 'sensors_%' ORDER BY 1,2; + SELECT tablename, indexdef FROM pg_indexes WHERE tablename like 'table_with_index_rep_identity_%' ORDER BY 1,2; + SELECT stxname FROM pg_statistic_ext + WHERE stxnamespace IN ( + SELECT oid + FROM pg_namespace + WHERE nspname IN ('citus_split_test_schema_no_deferred_drop') + ) + ORDER BY stxname ASC; +-- END : Display current state + +-- BEGIN: Should be able to change/drop constraints +\c - postgres - :master_port +SET citus.defer_drop_after_shard_split TO OFF; +SET ROLE test_shard_split_role_nodeferred_drop; +SET search_path TO "citus_split_test_schema_no_deferred_drop"; +ALTER INDEX index_on_sensors RENAME TO index_on_sensors_renamed; +ALTER INDEX index_on_sensors_renamed ALTER COLUMN 1 SET STATISTICS 200; +DROP STATISTICS stats_on_sensors; +DROP INDEX index_on_sensors_renamed; +ALTER TABLE sensors DROP CONSTRAINT fkey_table_to_dist; +-- END: Should be able to change/drop constraints + +-- BEGIN: Split second time on another schema +SET search_path TO public; +SET citus.next_shard_id TO 8981031; +SELECT pg_catalog.citus_split_shard_by_split_points( + 8981007, + ARRAY['-2100000000'], + ARRAY[:worker_1_node, :worker_2_node], + 'force_logical'); + +SET search_path TO "citus_split_test_schema_no_deferred_drop"; +SELECT shard.shardid, logicalrelid, shardminvalue, shardmaxvalue, nodename, nodeport + FROM pg_dist_shard AS shard + INNER JOIN pg_dist_placement placement ON shard.shardid = placement.shardid + INNER JOIN pg_dist_node node ON placement.groupid = node.groupid + INNER JOIN pg_catalog.pg_class cls ON shard.logicalrelid = cls.oid + WHERE node.noderole = 'primary' AND (logicalrelid = 'sensors'::regclass OR logicalrelid = 'colocated_dist_table'::regclass OR logicalrelid = 'table_with_index_rep_identity'::regclass) + ORDER BY logicalrelid, shardminvalue::BIGINT; +-- END: Split second time on another schema + +-- BEGIN: Create a co-located table with no replica identity. +CREATE TABLE table_no_rep_id (measureid integer); +SELECT create_distributed_table('table_no_rep_id', 'measureid', colocate_with:='sensors'); +-- END: Create a co-located table with no replica identity. + +-- BEGIN: Split a shard with shard_transfer_mode='auto' and with a colocated table with no replica identity +SET citus.next_shard_id TO 8981041; +SELECT pg_catalog.citus_split_shard_by_split_points( + 8981031, + ARRAY['-2120000000'], + ARRAY[:worker_1_node, :worker_2_node]); + +SELECT shard.shardid, logicalrelid, shardminvalue, shardmaxvalue, nodename, nodeport + FROM pg_dist_shard AS shard + INNER JOIN pg_dist_placement placement ON shard.shardid = placement.shardid + INNER JOIN pg_dist_node node ON placement.groupid = node.groupid + INNER JOIN pg_catalog.pg_class cls ON shard.logicalrelid = cls.oid + WHERE node.noderole = 'primary' AND (logicalrelid = 'sensors'::regclass OR logicalrelid = 'colocated_dist_table'::regclass OR logicalrelid = 'table_with_index_rep_identity'::regclass) + ORDER BY logicalrelid, shardminvalue::BIGINT; +-- END: Split a shard with shard_transfer_mode='auto' and with a colocated table with no replica identity + +-- BEGIN: Drop the co-located table with no replica identity. +DROP TABLE table_no_rep_id; +-- END: Drop the co-located table with no replica identity. + +-- BEGIN: Split a shard with shard_transfer_mode='auto' and with all colocated tables has replica identity +SET citus.next_shard_id TO 8981041; +SELECT pg_catalog.citus_split_shard_by_split_points( + 8981031, + ARRAY['-2120000000'], + ARRAY[:worker_1_node, :worker_2_node], + 'auto'); + +SELECT shard.shardid, logicalrelid, shardminvalue, shardmaxvalue, nodename, nodeport + FROM pg_dist_shard AS shard + INNER JOIN pg_dist_placement placement ON shard.shardid = placement.shardid + INNER JOIN pg_dist_node node ON placement.groupid = node.groupid + INNER JOIN pg_catalog.pg_class cls ON shard.logicalrelid = cls.oid + WHERE node.noderole = 'primary' AND (logicalrelid = 'sensors'::regclass OR logicalrelid = 'colocated_dist_table'::regclass OR logicalrelid = 'table_with_index_rep_identity'::regclass) + ORDER BY logicalrelid, shardminvalue::BIGINT; +-- END: Split a shard with shard_transfer_mode='auto' and with all colocated tables has replica identity + +-- BEGIN: Validate Data Count +SELECT COUNT(*) FROM sensors; +SELECT COUNT(*) FROM reference_table; +SELECT COUNT(*) FROM colocated_dist_table; +-- END: Validate Data Count + +--BEGIN : Cleanup +\c - postgres - :master_port +DROP SCHEMA "citus_split_test_schema_no_deferred_drop" CASCADE; + +SET citus.defer_drop_after_shard_split TO ON; +--END : Cleanup diff --git a/src/test/regress/sql/citus_split_shard_by_split_points.sql b/src/test/regress/sql/citus_split_shard_by_split_points.sql index 6c2957953..e9e568de3 100644 --- a/src/test/regress/sql/citus_split_shard_by_split_points.sql +++ b/src/test/regress/sql/citus_split_shard_by_split_points.sql @@ -13,10 +13,13 @@ Here is a high level overview of test plan: CREATE SCHEMA "citus_split_test_schema"; +-- Disable Deferred drop auto cleanup to avoid flaky tests. +ALTER SYSTEM SET citus.defer_shard_delete_interval TO -1; +SELECT pg_reload_conf(); + CREATE ROLE test_split_role WITH LOGIN; GRANT USAGE, CREATE ON SCHEMA "citus_split_test_schema" TO test_split_role; SET ROLE test_split_role; - SET search_path TO "citus_split_test_schema"; SET citus.next_shard_id TO 8981000; SET citus.next_placement_id TO 8610000; @@ -140,6 +143,10 @@ SELECT pg_catalog.citus_split_shard_by_split_points( ARRAY[:worker_1_node, :worker_2_node], 'block_writes'); +-- BEGIN: Perform deferred cleanup. +CALL pg_catalog.citus_cleanup_orphaned_resources(); +-- END: Perform deferred cleanup. + -- Perform 3 way split SELECT pg_catalog.citus_split_shard_by_split_points( 8981001, @@ -148,6 +155,10 @@ SELECT pg_catalog.citus_split_shard_by_split_points( 'block_writes'); -- END : Split two shards : One with move and One without move. +-- BEGIN: Perform deferred cleanup. +CALL pg_catalog.citus_cleanup_orphaned_resources(); +-- END: Perform deferred cleanup. + -- BEGIN : Move a shard post split. SELECT citus_move_shard_placement(8981007, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode:='block_writes'); -- END : Move a shard post split. @@ -218,6 +229,10 @@ SELECT pg_catalog.citus_split_shard_by_split_points( ARRAY[:worker_1_node, :worker_2_node], 'block_writes'); +-- BEGIN: Perform deferred cleanup. +CALL pg_catalog.citus_cleanup_orphaned_resources(); +-- END: Perform deferred cleanup. + SET search_path TO "citus_split_test_schema"; SELECT shard.shardid, logicalrelid, shardminvalue, shardmaxvalue, nodename, nodeport FROM pg_dist_shard AS shard @@ -236,5 +251,7 @@ SELECT COUNT(*) FROM colocated_dist_table; --BEGIN : Cleanup \c - postgres - :master_port +ALTER SYSTEM RESET citus.defer_shard_delete_interval; +SELECT pg_reload_conf(); DROP SCHEMA "citus_split_test_schema" CASCADE; --END : Cleanup diff --git a/src/test/regress/sql/citus_split_shard_by_split_points_deferred_drop.sql b/src/test/regress/sql/citus_split_shard_by_split_points_deferred_drop.sql new file mode 100644 index 000000000..1728355ab --- /dev/null +++ b/src/test/regress/sql/citus_split_shard_by_split_points_deferred_drop.sql @@ -0,0 +1,76 @@ +CREATE SCHEMA "citus_split_shard_by_split_points_deferred_schema"; + +CREATE ROLE test_split_role WITH LOGIN; +GRANT USAGE, CREATE ON SCHEMA "citus_split_shard_by_split_points_deferred_schema" TO test_split_role; +SET ROLE test_split_role; + +SET search_path TO "citus_split_shard_by_split_points_deferred_schema"; + +-- Valide user cannot insert directly to pg_dist_cleanup table but can select from it. +CREATE TABLE temp_table (id INT); +INSERT INTO pg_catalog.pg_dist_cleanup (operation_id, object_type, object_name, node_group_id, policy_type) + VALUES (3134, 1, 'citus_split_shard_by_split_points_deferred_schema.temp_table', 1, 1); + +SELECT * from pg_dist_cleanup; + +-- Disable Deferred drop auto cleanup to avoid flaky tests. +\c - postgres - :master_port +ALTER SYSTEM SET citus.defer_shard_delete_interval TO -1; +SELECT pg_reload_conf(); + +-- Perform a split and validate shard is marked for deferred drop. +SET citus.next_shard_id TO 8981000; +SET citus.next_placement_id TO 8610000; +SET citus.shard_count TO 2; +SET citus.shard_replication_factor TO 1; +SET citus.next_operation_id TO 777; +SET citus.next_cleanup_record_id TO 11; +SET ROLE test_split_role; + +CREATE TABLE table_to_split(id int PRIMARY KEY, int_data int, data text); +SELECT create_distributed_table('table_to_split', 'id'); + +SELECT nodeid AS worker_1_node FROM pg_dist_node WHERE nodeport=:worker_1_port \gset +SELECT nodeid AS worker_2_node FROM pg_dist_node WHERE nodeport=:worker_2_port \gset + +SET citus.next_shard_id TO 9999000; +SET citus.next_placement_id TO 5555000; + +SELECT pg_catalog.citus_split_shard_by_split_points( + 8981000, + ARRAY['-100000'], + ARRAY[:worker_1_node, :worker_2_node], + 'block_writes'); + +SELECT pg_catalog.citus_split_shard_by_split_points( + 8981001, + ARRAY['100000'], + ARRAY[:worker_1_node, :worker_2_node], + 'force_logical'); + +-- The original shards are marked for deferred drop with policy_type = 2. +SELECT * from pg_dist_cleanup; + +-- The physical shards should not be deleted. +\c - - - :worker_1_port +SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r'; + +\c - - - :worker_2_port +SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r'; + +-- Perform deferred drop cleanup. +\c - postgres - :master_port +CALL citus_cleanup_orphaned_shards(); + +-- Clenaup has been done. +SELECT * from pg_dist_cleanup; + +\c - - - :worker_1_port +SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r'; + +\c - - - :worker_2_port +SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r'; + +-- Test Cleanup +\c - postgres - :master_port +DROP SCHEMA "citus_split_shard_by_split_points_deferred_schema" CASCADE; diff --git a/src/test/regress/sql/citus_split_shard_by_split_points_negative.sql b/src/test/regress/sql/citus_split_shard_by_split_points_negative.sql index 1406615f4..fe37777c7 100644 --- a/src/test/regress/sql/citus_split_shard_by_split_points_negative.sql +++ b/src/test/regress/sql/citus_split_shard_by_split_points_negative.sql @@ -109,3 +109,8 @@ SELECT citus_split_shard_by_split_points( 51261400, ARRAY['-1073741826'], ARRAY[:worker_1_node, :worker_2_node]); + +--BEGIN : Cleanup +\c - postgres - :master_port +DROP SCHEMA "citus_split_shard_by_split_points_negative" CASCADE; +--END : Cleanup diff --git a/src/test/regress/sql/citus_split_shard_columnar_partitioned.sql b/src/test/regress/sql/citus_split_shard_columnar_partitioned.sql index 58d577f32..e7578879f 100644 --- a/src/test/regress/sql/citus_split_shard_columnar_partitioned.sql +++ b/src/test/regress/sql/citus_split_shard_columnar_partitioned.sql @@ -5,6 +5,10 @@ SET citus.next_placement_id TO 8770000; SET citus.shard_count TO 1; SET citus.shard_replication_factor TO 1; +-- Disable Deferred drop auto cleanup to avoid flaky tests. +ALTER SYSTEM SET citus.defer_shard_delete_interval TO -1; +SELECT pg_reload_conf(); + -- BEGIN: Create table to split, along with other co-located tables. Add indexes, statistics etc. CREATE TABLE sensors( measureid integer, @@ -168,6 +172,10 @@ SET citus.shard_replication_factor TO 1; 'block_writes'); -- END: Split a shard along its co-located shards +-- BEGIN: Perform deferred cleanup. +CALL pg_catalog.citus_cleanup_orphaned_resources(); +-- END: Perform deferred cleanup. + -- BEGIN: Validate Shard Info and Data SELECT shard.shardid, logicalrelid, shardminvalue, shardmaxvalue, nodename, nodeport FROM pg_dist_shard AS shard @@ -235,6 +243,10 @@ SET citus.shard_replication_factor TO 1; 'block_writes'); -- END: Split a partition table directly +-- BEGIN: Perform deferred cleanup. +CALL pg_catalog.citus_cleanup_orphaned_resources(); +-- END: Perform deferred cleanup. + -- BEGIN: Validate Shard Info and Data SELECT shard.shardid, logicalrelid, shardminvalue, shardmaxvalue, nodename, nodeport FROM pg_dist_shard AS shard @@ -290,5 +302,7 @@ SET citus.shard_replication_factor TO 1; --BEGIN : Cleanup \c - postgres - :master_port + ALTER SYSTEM RESET citus.defer_shard_delete_interval; + SELECT pg_reload_conf(); DROP SCHEMA "citus_split_test_schema_columnar_partitioned" CASCADE; --END : Cleanup diff --git a/src/test/regress/sql/citus_split_shard_no_deferred_drop.sql b/src/test/regress/sql/citus_split_shard_no_deferred_drop.sql new file mode 100644 index 000000000..76d8b6a45 --- /dev/null +++ b/src/test/regress/sql/citus_split_shard_no_deferred_drop.sql @@ -0,0 +1,246 @@ +/* +This suite runs without deferred drop enabled. +Citus Shard Split Test.The test is model similar to 'shard_move_constraints'. +Here is a high level overview of test plan: + 1. Create a table 'sensors' (ShardCount = 2) to be split. Add indexes and statistics on this table. + 2. Create two other tables: 'reference_table' and 'colocated_dist_table', co-located with sensors. + 3. Create Foreign key constraints between the two co-located distributed tables. + 4. Load data into the three tables. + 5. Move one of the shards for 'sensors' to test ShardMove -> Split. + 6. Trigger Split on both shards of 'sensors'. This will also split co-located tables. + 7. Move one of the split shard to test Split -> ShardMove. + 8. Split an already split shard second time on a different schema. +*/ + +CREATE SCHEMA "citus_split_test_schema_no_deferred_drop"; + +SET citus.defer_drop_after_shard_split TO OFF; +CREATE ROLE test_split_deferred_role WITH LOGIN; +GRANT USAGE, CREATE ON SCHEMA "citus_split_test_schema_no_deferred_drop" TO test_split_deferred_role; +SET ROLE test_split_deferred_role; + +SET search_path TO "citus_split_test_schema_no_deferred_drop"; +SET citus.next_shard_id TO 8981000; +SET citus.next_placement_id TO 8610000; +SET citus.shard_count TO 2; +SET citus.shard_replication_factor TO 1; + +-- BEGIN: Create table to split, along with other co-located tables. Add indexes, statistics etc. +CREATE TABLE sensors( + measureid integer, + eventdatetime date, + measure_data jsonb, + meaure_quantity decimal(15, 2), + measure_status char(1), + measure_comment varchar(44), + PRIMARY KEY (measureid, eventdatetime, measure_data)); + +CREATE INDEX index_on_sensors ON sensors(lower(measureid::text)); +ALTER INDEX index_on_sensors ALTER COLUMN 1 SET STATISTICS 1000; +CREATE INDEX hash_index_on_sensors ON sensors USING HASH((measure_data->'IsFailed')); +CREATE INDEX index_with_include_on_sensors ON sensors ((measure_data->'IsFailed')) INCLUDE (measure_data, eventdatetime, measure_status); +CREATE STATISTICS stats_on_sensors (dependencies) ON measureid, eventdatetime FROM sensors; + +SELECT create_distributed_table('sensors', 'measureid', colocate_with:='none'); +-- END: Create table to split, along with other co-located tables. Add indexes, statistics etc. + +-- BEGIN: Create co-located distributed and reference tables. +CREATE TABLE reference_table (measureid integer PRIMARY KEY); +SELECT create_reference_table('reference_table'); + +CREATE TABLE colocated_dist_table (measureid integer PRIMARY KEY); +CLUSTER colocated_dist_table USING colocated_dist_table_pkey; +SELECT create_distributed_table('colocated_dist_table', 'measureid', colocate_with:='sensors'); + +CREATE TABLE table_with_index_rep_identity(key int NOT NULL); +CREATE UNIQUE INDEX uqx ON table_with_index_rep_identity(key); +ALTER TABLE table_with_index_rep_identity REPLICA IDENTITY USING INDEX uqx; +CLUSTER table_with_index_rep_identity USING uqx; +SELECT create_distributed_table('table_with_index_rep_identity', 'key', colocate_with:='sensors'); +-- END: Create co-located distributed and reference tables. + +-- BEGIN : Create Foreign key constraints. +ALTER TABLE sensors ADD CONSTRAINT fkey_table_to_dist FOREIGN KEY (measureid) REFERENCES colocated_dist_table(measureid); +-- END : Create Foreign key constraints. + +-- BEGIN : Load data into tables. +INSERT INTO reference_table SELECT i FROM generate_series(0,1000)i; +INSERT INTO colocated_dist_table SELECT i FROM generate_series(0,1000)i; +INSERT INTO sensors SELECT i, '2020-01-05', '{}', 11011.10, 'A', 'I <3 Citus' FROM generate_series(0,1000)i; + +SELECT COUNT(*) FROM sensors; +SELECT COUNT(*) FROM reference_table; +SELECT COUNT(*) FROM colocated_dist_table; +-- END: Load data into tables. + +-- BEGIN : Display current state. +SELECT shard.shardid, logicalrelid, shardminvalue, shardmaxvalue, nodename, nodeport + FROM pg_dist_shard AS shard + INNER JOIN pg_dist_placement placement ON shard.shardid = placement.shardid + INNER JOIN pg_dist_node node ON placement.groupid = node.groupid + INNER JOIN pg_catalog.pg_class cls ON shard.logicalrelid = cls.oid + WHERE node.noderole = 'primary' AND (logicalrelid = 'sensors'::regclass OR logicalrelid = 'colocated_dist_table'::regclass OR logicalrelid = 'table_with_index_rep_identity'::regclass) + ORDER BY logicalrelid, shardminvalue::BIGINT; + +\c - - - :worker_1_port + SET search_path TO "citus_split_test_schema_no_deferred_drop", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + SELECT tbl.relname, fk."Constraint", fk."Definition" + FROM pg_catalog.pg_class tbl + JOIN public.table_fkeys fk on tbl.oid = fk.relid + WHERE tbl.relname like 'sensors_%' + ORDER BY 1, 2; + SELECT tablename, indexdef FROM pg_indexes WHERE tablename like 'sensors_%' ORDER BY 1,2; + SELECT tablename, indexdef FROM pg_indexes WHERE tablename like 'table_with_index_rep_identity_%' ORDER BY 1,2; + SELECT stxname FROM pg_statistic_ext + WHERE stxnamespace IN ( + SELECT oid + FROM pg_namespace + WHERE nspname IN ('citus_split_test_schema_no_deferred_drop') + ) + ORDER BY stxname ASC; + +\c - - - :worker_2_port + SET search_path TO "citus_split_test_schema_no_deferred_drop", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + SELECT tbl.relname, fk."Constraint", fk."Definition" + FROM pg_catalog.pg_class tbl + JOIN public.table_fkeys fk on tbl.oid = fk.relid + WHERE tbl.relname like 'sensors_%' + ORDER BY 1, 2; + SELECT tablename, indexdef FROM pg_indexes WHERE tablename like 'sensors_%' ORDER BY 1,2; + SELECT tablename, indexdef FROM pg_indexes WHERE tablename like 'table_with_index_rep_identity_%' ORDER BY 1,2; + SELECT stxname FROM pg_statistic_ext + WHERE stxnamespace IN ( + SELECT oid + FROM pg_namespace + WHERE nspname IN ('citus_split_test_schema_no_deferred_drop') + ) + ORDER BY stxname ASC; +-- END : Display current state + +-- BEGIN : Move one shard before we split it. +\c - postgres - :master_port +SET citus.defer_drop_after_shard_split TO OFF; +SET ROLE test_split_deferred_role; +SET search_path TO "citus_split_test_schema_no_deferred_drop"; +SET citus.next_shard_id TO 8981007; +SET citus.defer_drop_after_shard_move TO OFF; + +SELECT citus_move_shard_placement(8981000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode:='force_logical'); +-- END : Move one shard before we split it. + +-- BEGIN : Set node id variables +SELECT nodeid AS worker_1_node FROM pg_dist_node WHERE nodeport=:worker_1_port \gset +SELECT nodeid AS worker_2_node FROM pg_dist_node WHERE nodeport=:worker_2_port \gset +-- END : Set node id variables + +-- BEGIN : Split two shards : One with move and One without move. +-- Perform 2 way split +SELECT pg_catalog.citus_split_shard_by_split_points( + 8981000, + ARRAY['-1073741824'], + ARRAY[:worker_1_node, :worker_2_node], + 'block_writes'); + +-- Perform 3 way split +SELECT pg_catalog.citus_split_shard_by_split_points( + 8981001, + ARRAY['536870911', '1610612735'], + ARRAY[:worker_1_node, :worker_1_node, :worker_2_node], + 'block_writes'); +-- END : Split two shards : One with move and One without move. + +-- BEGIN : Move a shard post split. +SELECT citus_move_shard_placement(8981007, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode:='block_writes'); +-- END : Move a shard post split. + +-- BEGIN : Display current state. +SELECT shard.shardid, logicalrelid, shardminvalue, shardmaxvalue, nodename, nodeport + FROM pg_dist_shard AS shard + INNER JOIN pg_dist_placement placement ON shard.shardid = placement.shardid + INNER JOIN pg_dist_node node ON placement.groupid = node.groupid + INNER JOIN pg_catalog.pg_class cls ON shard.logicalrelid = cls.oid + WHERE node.noderole = 'primary' AND (logicalrelid = 'sensors'::regclass OR logicalrelid = 'colocated_dist_table'::regclass OR logicalrelid = 'table_with_index_rep_identity'::regclass) + ORDER BY logicalrelid, shardminvalue::BIGINT; + +\c - - - :worker_1_port + SET search_path TO "citus_split_test_schema_no_deferred_drop", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + SELECT tbl.relname, fk."Constraint", fk."Definition" + FROM pg_catalog.pg_class tbl + JOIN public.table_fkeys fk on tbl.oid = fk.relid + WHERE tbl.relname like 'sensors_%' + ORDER BY 1, 2; + SELECT tablename, indexdef FROM pg_indexes WHERE tablename like 'sensors_%' ORDER BY 1,2; + SELECT tablename, indexdef FROM pg_indexes WHERE tablename like 'table_with_index_rep_identity_%' ORDER BY 1,2; + SELECT stxname FROM pg_statistic_ext + WHERE stxnamespace IN ( + SELECT oid + FROM pg_namespace + WHERE nspname IN ('citus_split_test_schema_no_deferred_drop') + ) + ORDER BY stxname ASC; + +\c - - - :worker_2_port + SET search_path TO "citus_split_test_schema_no_deferred_drop", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + SELECT tbl.relname, fk."Constraint", fk."Definition" + FROM pg_catalog.pg_class tbl + JOIN public.table_fkeys fk on tbl.oid = fk.relid + WHERE tbl.relname like 'sensors_%' + ORDER BY 1, 2; + SELECT tablename, indexdef FROM pg_indexes WHERE tablename like 'sensors_%' ORDER BY 1,2; + SELECT tablename, indexdef FROM pg_indexes WHERE tablename like 'table_with_index_rep_identity_%' ORDER BY 1,2; + SELECT stxname FROM pg_statistic_ext + WHERE stxnamespace IN ( + SELECT oid + FROM pg_namespace + WHERE nspname IN ('citus_split_test_schema_no_deferred_drop') + ) + ORDER BY stxname ASC; +-- END : Display current state + +-- BEGIN: Should be able to change/drop constraints +\c - postgres - :master_port +SET citus.defer_drop_after_shard_split TO OFF; +SET ROLE test_split_deferred_role; +SET search_path TO "citus_split_test_schema_no_deferred_drop"; +ALTER INDEX index_on_sensors RENAME TO index_on_sensors_renamed; +ALTER INDEX index_on_sensors_renamed ALTER COLUMN 1 SET STATISTICS 200; +DROP STATISTICS stats_on_sensors; +DROP INDEX index_on_sensors_renamed; +ALTER TABLE sensors DROP CONSTRAINT fkey_table_to_dist; +-- END: Should be able to change/drop constraints + +-- BEGIN: Split second time on another schema +SET search_path TO public; +SET citus.next_shard_id TO 8981031; +SELECT pg_catalog.citus_split_shard_by_split_points( + 8981007, + ARRAY['-2100000000'], + ARRAY[:worker_1_node, :worker_2_node], + 'block_writes'); + +SET search_path TO "citus_split_test_schema_no_deferred_drop"; +SELECT shard.shardid, logicalrelid, shardminvalue, shardmaxvalue, nodename, nodeport + FROM pg_dist_shard AS shard + INNER JOIN pg_dist_placement placement ON shard.shardid = placement.shardid + INNER JOIN pg_dist_node node ON placement.groupid = node.groupid + INNER JOIN pg_catalog.pg_class cls ON shard.logicalrelid = cls.oid + WHERE node.noderole = 'primary' AND (logicalrelid = 'sensors'::regclass OR logicalrelid = 'colocated_dist_table'::regclass OR logicalrelid = 'table_with_index_rep_identity'::regclass) + ORDER BY logicalrelid, shardminvalue::BIGINT; +-- END: Split second time on another schema + +-- BEGIN: Validate Data Count +SELECT COUNT(*) FROM sensors; +SELECT COUNT(*) FROM reference_table; +SELECT COUNT(*) FROM colocated_dist_table; +-- END: Validate Data Count + +--BEGIN : Cleanup +\c - postgres - :master_port +DROP SCHEMA "citus_split_test_schema_no_deferred_drop" CASCADE; + +SET citus.defer_drop_after_shard_split TO ON; +--END : Cleanup diff --git a/src/test/regress/sql/failure_split_cleanup.sql b/src/test/regress/sql/failure_split_cleanup.sql new file mode 100644 index 000000000..034bb2c1b --- /dev/null +++ b/src/test/regress/sql/failure_split_cleanup.sql @@ -0,0 +1,355 @@ +-- The test excercises below failure scenarios +--1. Failure while creating publications +--2. Failure while creating shared memory segment +--3. Failure while creating replication slots +--4. Failure while enabling subscription +--5. Failure on polling subscription state +--6. Failure on polling last write-ahead log location reported to origin WAL sender +--7. Failure on dropping subscription +CREATE SCHEMA "citus_failure_split_cleanup_schema"; +SET search_path TO "citus_failure_split_cleanup_schema"; + +SET citus.next_shard_id TO 8981000; +SET citus.next_placement_id TO 8610000; +SET citus.next_operation_id TO 777; +SET citus.next_cleanup_record_id TO 11; +SET citus.shard_count TO 2; +SET citus.shard_replication_factor TO 1; +SELECT pg_backend_pid() as pid \gset + +-- Disable defer shard delete to stop auto cleanup. +ALTER SYSTEM SET citus.defer_shard_delete_interval TO -1; +SELECT pg_reload_conf(); + +-- Connections on the proxy port(worker_2) are monitored +SELECT nodeid AS worker_1_node FROM pg_dist_node WHERE nodeport=:worker_1_port \gset +SELECT nodeid AS worker_2_node FROM pg_dist_node WHERE nodeport=:worker_2_proxy_port \gset + +CREATE TABLE table_to_split(id int PRIMARY KEY, int_data int, data text); +SELECT create_distributed_table('table_to_split', 'id'); + +--1. Failure while creating publications + SELECT citus.mitmproxy('conn.onQuery(query="CREATE PUBLICATION .* FOR TABLE").killall()'); + SELECT pg_catalog.citus_split_shard_by_split_points( + 8981000, + ARRAY['-100000'], + ARRAY[:worker_1_node, :worker_2_node], + 'force_logical'); + SELECT operation_id, object_type, object_name, node_group_id, policy_type + FROM pg_dist_cleanup where operation_id = 777; + + -- we need to allow connection so that we can connect to proxy + SELECT citus.mitmproxy('conn.allow()'); + + \c - - - :worker_2_proxy_port + SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + -- Left over child shards + SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname; + -- Left over publications + SELECT pubname FROM pg_publication; + -- Left over replication slots + SELECT slot_name FROM pg_replication_slots; + -- Left over subscriptions + SELECT subname FROM pg_subscription; + + \c - postgres - :master_port + CALL pg_catalog.citus_cleanup_orphaned_resources(); + SELECT operation_id, object_type, object_name, node_group_id, policy_type + FROM pg_dist_cleanup where operation_id = 777; + + \c - - - :worker_2_proxy_port + SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + -- Empty child shards after cleanup + SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname; + -- Empty publications + SELECT pubname FROM pg_publication; + -- Empty replication slot table + SELECT slot_name FROM pg_replication_slots; + -- Empty subscriptions + SELECT subname FROM pg_subscription; + +--2. Failure while creating shared memory segment + \c - postgres - :master_port + SET citus.next_shard_id TO 8981002; + SET citus.next_operation_id TO 777; + SET citus.next_cleanup_record_id TO 11; + + SELECT citus.mitmproxy('conn.onQuery(query="SELECT \* FROM pg_catalog.worker_split_shard_replication_setup\(.*").killall()'); + SELECT pg_catalog.citus_split_shard_by_split_points( + 8981000, + ARRAY['-100000'], + ARRAY[:worker_1_node, :worker_2_node], + 'force_logical'); + SELECT operation_id, object_type, object_name, node_group_id, policy_type + FROM pg_dist_cleanup where operation_id = 777; + -- we need to allow connection so that we can connect to proxy + SELECT citus.mitmproxy('conn.allow()'); + + \c - - - :worker_2_proxy_port + SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + -- Left over child shards + SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname; + -- Left over publications + SELECT pubname FROM pg_publication; + -- Left over replication slots + SELECT slot_name FROM pg_replication_slots; + -- Left over subscriptions + SELECT subname FROM pg_subscription; + + \c - postgres - :master_port + CALL pg_catalog.citus_cleanup_orphaned_resources(); + SELECT operation_id, object_type, object_name, node_group_id, policy_type + FROM pg_dist_cleanup where operation_id = 777; + + \c - - - :worker_2_proxy_port + SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + -- Empty child shards after cleanup + SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname; + -- Empty publications + SELECT pubname FROM pg_publication; + -- Empty replication slot table + SELECT slot_name FROM pg_replication_slots; + -- Empty subscriptions + SELECT subname FROM pg_subscription; + +--3. Failure while executing 'CREATE_REPLICATION_SLOT' for Snapshot. + \c - postgres - :master_port + SET citus.next_shard_id TO 8981002; + SET citus.next_operation_id TO 777; + SET citus.next_cleanup_record_id TO 11; + + SELECT citus.mitmproxy('conn.onQuery(query="CREATE_REPLICATION_SLOT .* LOGICAL .* EXPORT_SNAPSHOT.*").killall()'); + SELECT pg_catalog.citus_split_shard_by_split_points( + 8981000, + ARRAY['-100000'], + ARRAY[:worker_1_node, :worker_2_node], + 'force_logical'); + SELECT operation_id, object_type, object_name, node_group_id, policy_type + FROM pg_dist_cleanup where operation_id = 777; + -- we need to allow connection so that we can connect to proxy + SELECT citus.mitmproxy('conn.allow()'); + + \c - - - :worker_2_proxy_port + SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + -- Left over child shards + SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname; + -- Left over publications + SELECT pubname FROM pg_publication; + -- Left over replication slots + SELECT slot_name FROM pg_replication_slots; + -- Left over subscriptions + SELECT subname FROM pg_subscription; + + \c - postgres - :master_port + CALL pg_catalog.citus_cleanup_orphaned_resources(); + SELECT operation_id, object_type, object_name, node_group_id, policy_type + FROM pg_dist_cleanup where operation_id = 777; + + \c - - - :worker_2_proxy_port + SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + -- Empty child shards after cleanup + SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname; + -- Empty publications + SELECT pubname FROM pg_publication; + -- Empty replication slot table + SELECT slot_name FROM pg_replication_slots; + -- Empty subscriptions + SELECT subname FROM pg_subscription; + +--4. Failure while enabling subscription + \c - postgres - :master_port + SET citus.next_shard_id TO 8981002; + SET citus.next_operation_id TO 777; + SET citus.next_cleanup_record_id TO 11; + + SELECT citus.mitmproxy('conn.onQuery(query="ALTER SUBSCRIPTION .* ENABLE").killall()'); + SELECT pg_catalog.citus_split_shard_by_split_points( + 8981000, + ARRAY['-100000'], + ARRAY[:worker_1_node, :worker_2_node], + 'force_logical'); + SELECT operation_id, object_type, object_name, node_group_id, policy_type + FROM pg_dist_cleanup where operation_id = 777; + -- we need to allow connection so that we can connect to proxy + SELECT citus.mitmproxy('conn.allow()'); + + \c - - - :worker_2_proxy_port + SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + -- Left over child shards + SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname; + -- Left over publications + SELECT pubname FROM pg_publication; + -- Left over replication slots + SELECT slot_name FROM pg_replication_slots; + -- Left over subscriptions + SELECT subname FROM pg_subscription; + + \c - postgres - :master_port + CALL pg_catalog.citus_cleanup_orphaned_resources(); + SELECT operation_id, object_type, object_name, node_group_id, policy_type + FROM pg_dist_cleanup where operation_id = 777; + + \c - - - :worker_2_proxy_port + SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + -- Empty child shards after cleanup + SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname; + -- Empty publications + SELECT pubname FROM pg_publication; + -- Empty replication slot table + SELECT slot_name FROM pg_replication_slots; + -- Empty subscriptions + SELECT subname FROM pg_subscription; + +--5. Failure on polling subscription state + \c - postgres - :master_port + SET citus.next_shard_id TO 8981002; + SET citus.next_operation_id TO 777; + SET citus.next_cleanup_record_id TO 11; + + SELECT citus.mitmproxy('conn.onQuery(query="^SELECT count\(\*\) FROM pg_subscription_rel").killall()'); + SELECT pg_catalog.citus_split_shard_by_split_points( + 8981000, + ARRAY['-100000'], + ARRAY[:worker_1_node, :worker_2_node], + 'force_logical'); + SELECT operation_id, object_type, object_name, node_group_id, policy_type + FROM pg_dist_cleanup where operation_id = 777; + -- we need to allow connection so that we can connect to proxy + SELECT citus.mitmproxy('conn.allow()'); + + \c - - - :worker_2_proxy_port + SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + -- Left over child shards + SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname; + -- Left over publications + SELECT pubname FROM pg_publication; + -- Left over replication slots + SELECT slot_name FROM pg_replication_slots; + -- Left over subscriptions + SELECT subname FROM pg_subscription; + + \c - postgres - :master_port + CALL pg_catalog.citus_cleanup_orphaned_resources(); + SELECT operation_id, object_type, object_name, node_group_id, policy_type + FROM pg_dist_cleanup where operation_id = 777; + + \c - - - :worker_2_proxy_port + SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + -- Empty child shards after cleanup + SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname; + -- Empty publications + SELECT pubname FROM pg_publication; + -- Empty replication slot table + SELECT slot_name FROM pg_replication_slots; + -- Empty subscriptions + SELECT subname FROM pg_subscription; + +--6. Failure on polling last write-ahead log location reported to origin WAL sender + \c - postgres - :master_port + SET citus.next_shard_id TO 8981002; + SET citus.next_operation_id TO 777; + SET citus.next_cleanup_record_id TO 11; + + SELECT citus.mitmproxy('conn.onQuery(query="^SELECT min\(latest_end_lsn").killall()'); + SELECT pg_catalog.citus_split_shard_by_split_points( + 8981000, + ARRAY['-100000'], + ARRAY[:worker_1_node, :worker_2_node], + 'force_logical'); + SELECT operation_id, object_type, object_name, node_group_id, policy_type + FROM pg_dist_cleanup where operation_id = 777; + -- we need to allow connection so that we can connect to proxy + SELECT citus.mitmproxy('conn.allow()'); + + \c - - - :worker_2_proxy_port + SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + -- Left over child shards + SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname; + -- Left over publications + SELECT pubname FROM pg_publication; + -- Left over replication slots + SELECT slot_name FROM pg_replication_slots; + -- Left over subscriptions + SELECT subname FROM pg_subscription; + + \c - postgres - :master_port + CALL pg_catalog.citus_cleanup_orphaned_resources(); + SELECT operation_id, object_type, object_name, node_group_id, policy_type + FROM pg_dist_cleanup where operation_id = 777; + + \c - - - :worker_2_proxy_port + SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + -- Empty child shards after cleanup + SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname; + -- Empty publications + SELECT pubname FROM pg_publication; + -- Empty replication slot table + SELECT slot_name FROM pg_replication_slots; + -- Empty subscriptions + SELECT subname FROM pg_subscription; + +--7. Failure on dropping subscription + \c - postgres - :master_port + SET citus.next_shard_id TO 8981002; + SET citus.next_operation_id TO 777; + SET citus.next_cleanup_record_id TO 11; + + SELECT citus.mitmproxy('conn.onQuery(query="^DROP SUBSCRIPTION").killall()'); + SELECT pg_catalog.citus_split_shard_by_split_points( + 8981000, + ARRAY['-100000'], + ARRAY[:worker_1_node, :worker_2_node], + 'force_logical'); + -- NO records expected as we fail at 'DropAllLogicalReplicationLeftovers' before creating + -- any resources. + SELECT operation_id, object_type, object_name, node_group_id, policy_type + FROM pg_dist_cleanup where operation_id = 777; + SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname; + -- we need to allow connection so that we can connect to proxy + SELECT citus.mitmproxy('conn.allow()'); + + \c - - - :worker_2_proxy_port + SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + -- Left over child shards + SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname; + -- Left over publications + SELECT pubname FROM pg_publication; + -- Left over replication slots + SELECT slot_name FROM pg_replication_slots; + -- Left over subscriptions + SELECT subname FROM pg_subscription; + + \c - postgres - :master_port + CALL pg_catalog.citus_cleanup_orphaned_resources(); + SELECT operation_id, object_type, object_name, node_group_id, policy_type + FROM pg_dist_cleanup where operation_id = 777; + + \c - - - :worker_2_proxy_port + SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; + SET citus.show_shards_for_app_name_prefixes = '*'; + -- Empty child shards after cleanup + SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname; + -- Empty publications + SELECT pubname FROM pg_publication; + -- Empty replication slot table + SELECT slot_name FROM pg_replication_slots; + -- Empty subscriptions + SELECT subname FROM pg_subscription; + +-- Cleanup +\c - postgres - :master_port +DROP SCHEMA "citus_failure_split_cleanup_schema" CASCADE; +-- Cleanup diff --git a/src/test/regress/sql/failure_tenant_isolation.sql b/src/test/regress/sql/failure_tenant_isolation.sql index 00a75bc58..874b3a59e 100644 --- a/src/test/regress/sql/failure_tenant_isolation.sql +++ b/src/test/regress/sql/failure_tenant_isolation.sql @@ -88,6 +88,9 @@ SELECT isolate_tenant_to_new_shard('table_1', 5, 'CASCADE', shard_transfer_mode -- failure on dropping old colocated shard +-- Disable deferred drop otherwise we will skip the drop and operation will succeed instead of failing. +SET citus.defer_drop_after_shard_split TO OFF; + SELECT citus.mitmproxy('conn.onQuery(query="DROP TABLE IF EXISTS tenant_isolation.table_2").kill()'); SELECT isolate_tenant_to_new_shard('table_1', 5, 'CASCADE', shard_transfer_mode => 'block_writes'); @@ -103,6 +106,8 @@ SELECT isolate_tenant_to_new_shard('table_1', 5, 'CASCADE', shard_transfer_mode SELECT citus.mitmproxy('conn.onQuery(query="DROP TABLE IF EXISTS tenant_isolation.table_1").cancel(' || :pid || ')'); SELECT isolate_tenant_to_new_shard('table_1', 5, 'CASCADE', shard_transfer_mode => 'block_writes'); +-- Re-enable deferred drop for rest of the tests. +SET citus.defer_drop_after_shard_split TO ON; -- failure on foreign key creation SELECT citus.mitmproxy('conn.onQuery(query="ADD CONSTRAINT table_2_ref_id_fkey FOREIGN KEY").kill()'); diff --git a/src/test/regress/sql/failure_tenant_isolation_nonblocking.sql b/src/test/regress/sql/failure_tenant_isolation_nonblocking.sql index 834c25176..60bfff417 100644 --- a/src/test/regress/sql/failure_tenant_isolation_nonblocking.sql +++ b/src/test/regress/sql/failure_tenant_isolation_nonblocking.sql @@ -158,6 +158,10 @@ SELECT citus.mitmproxy('conn.onQuery(query="select pg_drop_replication_slot").ca SELECT isolate_tenant_to_new_shard('table_1', 5, 'CASCADE', shard_transfer_mode := 'force_logical'); -- failure on dropping old shard +-- failure on dropping old colocated shard +-- Disable deferred drop otherwise we will skip the drop and operation will succeed instead of failing. +SET citus.defer_drop_after_shard_split TO OFF; + SELECT citus.mitmproxy('conn.onQuery(query="DROP TABLE IF EXISTS tenant_isolation.table_1").kill()'); SELECT isolate_tenant_to_new_shard('table_1', 5, 'CASCADE', shard_transfer_mode := 'force_logical'); @@ -173,6 +177,9 @@ SELECT isolate_tenant_to_new_shard('table_1', 5, 'CASCADE', shard_transfer_mode SELECT citus.mitmproxy('conn.onQuery(query="DROP TABLE IF EXISTS tenant_isolation.table_2").cancel(' || :pid || ')'); SELECT isolate_tenant_to_new_shard('table_1', 5, 'CASCADE', shard_transfer_mode := 'force_logical'); +-- Re-enable deferred drop for rest of the tests. +SET citus.defer_drop_after_shard_split TO ON; + -- failure on foreign key creation SELECT citus.mitmproxy('conn.onQuery(query="ADD CONSTRAINT table_2_ref_id_fkey FOREIGN KEY").kill()'); SELECT isolate_tenant_to_new_shard('table_1', 5, 'CASCADE', shard_transfer_mode := 'force_logical'); diff --git a/src/test/regress/sql/multi_tenant_isolation.sql b/src/test/regress/sql/multi_tenant_isolation.sql index 3b4dd6b75..07a88cf98 100644 --- a/src/test/regress/sql/multi_tenant_isolation.sql +++ b/src/test/regress/sql/multi_tenant_isolation.sql @@ -224,6 +224,9 @@ SELECT * FROM pg_dist_shard_placement WHERE shardid >= 1230000 ORDER BY nodeport 128|106828|9339|1|38|69723.16|0.06|0.01|A|F|1992-09-01|1992-08-27|1992-10-01|TAKE BACK RETURN|FOB| cajole careful \. +\c - postgres - :master_port +CALL pg_catalog.citus_cleanup_orphaned_resources(); + -- connect to the worker node with metadata \c - mx_isolation_role_ent - :worker_1_port SET search_path to "Tenant Isolation"; @@ -341,6 +344,9 @@ SELECT * FROM pg_dist_shard WHERE logicalrelid = 'lineitem_streaming'::regclass OR logicalrelid = 'orders_streaming'::regclass ORDER BY shardminvalue::BIGINT, logicalrelid; +\c - postgres - :master_port +CALL pg_catalog.citus_cleanup_orphaned_resources(); + -- test failure scenarios with triggers on workers \c - postgres - :worker_1_port SET search_path to "Tenant Isolation"; @@ -393,7 +399,10 @@ RESET citus.enable_metadata_sync; CREATE EVENT TRIGGER abort_drop ON sql_drop EXECUTE PROCEDURE abort_drop_command(); -\c - mx_isolation_role_ent - :master_port +\c - postgres - :master_port +-- Disable deferred drop otherwise we will skip the drop and operation will succeed instead of failing. +SET citus.defer_drop_after_shard_split TO OFF; +SET ROLE mx_isolation_role_ent; SET search_path to "Tenant Isolation"; \set VERBOSITY terse @@ -536,6 +545,9 @@ SELECT isolate_tenant_to_new_shard('test_colocated_table_2', 1, 'CASCADE', shard SELECT count(*) FROM test_colocated_table_2; +\c - postgres - :master_port +CALL pg_catalog.citus_cleanup_orphaned_resources(); + \c - postgres - :worker_1_port -- show the foreign keys of the main table & its colocated shard on other tables diff --git a/src/test/regress/sql/multi_tenant_isolation_nonblocking.sql b/src/test/regress/sql/multi_tenant_isolation_nonblocking.sql index b35c114eb..eca828d04 100644 --- a/src/test/regress/sql/multi_tenant_isolation_nonblocking.sql +++ b/src/test/regress/sql/multi_tenant_isolation_nonblocking.sql @@ -224,6 +224,9 @@ SELECT * FROM pg_dist_shard_placement WHERE shardid >= 1230000 ORDER BY nodeport 128|106828|9339|1|38|69723.16|0.06|0.01|A|F|1992-09-01|1992-08-27|1992-10-01|TAKE BACK RETURN|FOB| cajole careful \. +\c - postgres - :master_port +CALL pg_catalog.citus_cleanup_orphaned_resources(); + -- connect to the worker node with metadata \c - mx_isolation_role_ent - :worker_1_port SET search_path to "Tenant Isolation"; @@ -341,6 +344,9 @@ SELECT * FROM pg_dist_shard WHERE logicalrelid = 'lineitem_streaming'::regclass OR logicalrelid = 'orders_streaming'::regclass ORDER BY shardminvalue::BIGINT, logicalrelid; +\c - postgres - :master_port +CALL pg_catalog.citus_cleanup_orphaned_resources(); + -- test failure scenarios with triggers on workers \c - postgres - :worker_1_port SET search_path to "Tenant Isolation"; @@ -545,6 +551,9 @@ SELECT isolate_tenant_to_new_shard('test_colocated_table_2', 1, 'CASCADE', shard SELECT count(*) FROM test_colocated_table_2; +\c - postgres - :master_port +CALL pg_catalog.citus_cleanup_orphaned_resources(); + \c - postgres - :worker_1_port -- show the foreign keys of the main table & its colocated shard on other tables diff --git a/src/test/regress/sql/upgrade_basic_after.sql b/src/test/regress/sql/upgrade_basic_after.sql index 48209fa62..0ee0ce0f5 100644 --- a/src/test/regress/sql/upgrade_basic_after.sql +++ b/src/test/regress/sql/upgrade_basic_after.sql @@ -8,6 +8,8 @@ SELECT nextval('pg_dist_placement_placementid_seq') = MAX(placementid)+1 FROM pg SELECT nextval('pg_dist_groupid_seq') = MAX(groupid)+1 FROM pg_dist_node; SELECT nextval('pg_dist_node_nodeid_seq') = MAX(nodeid)+1 FROM pg_dist_node; SELECT nextval('pg_dist_colocationid_seq') = MAX(colocationid)+1 FROM pg_dist_colocation; +SELECT nextval('pg_dist_operationid_seq') = MAX(operation_id)+1 FROM pg_dist_cleanup; +SELECT nextval('pg_dist_cleanup_recordid_seq') = MAX(record_id)+1 FROM pg_dist_cleanup; -- If this query gives output it means we've added a new sequence that should -- possibly be restored after upgrades. @@ -19,7 +21,9 @@ SELECT sequence_name FROM information_schema.sequences 'pg_dist_placement_placementid_seq', 'pg_dist_groupid_seq', 'pg_dist_node_nodeid_seq', - 'pg_dist_colocationid_seq' + 'pg_dist_colocationid_seq', + 'pg_dist_operationid_seq', + 'pg_dist_cleanup_recordid_seq' ); SELECT logicalrelid FROM pg_dist_partition