diff --git a/src/backend/distributed/operations/shard_split.c b/src/backend/distributed/operations/shard_split.c index d8dcb4cbc..bcde6b0ae 100644 --- a/src/backend/distributed/operations/shard_split.c +++ b/src/backend/distributed/operations/shard_split.c @@ -1656,6 +1656,26 @@ NonBlockingShardSplit(SplitOperation splitOperation, databaseName, logicalRepTargetList); + /* + * We have to create the primary key (or any other replica identity) + * before the update/delete operations that are queued will be + * replicated. Because if the replica identity does not exist on the + * target, the replication would fail. + * + * So the latest possible moment we could do this is right after the + * initial data COPY, but before enabling the susbcriptions. It might + * seem like a good idea to it after the initial data COPY, since + * it's generally the rule that it's cheaper to build an index at once + * than to create it incrementally. This general rule, is why we create + * all the regular indexes as late during the move as possible. + * + * But as it turns out in practice it's not as clear cut, and we saw a + * speed degradation in the time it takes to move shards when doing the + * replica identity creation after the initial COPY. So, instead we + * keep it before the COPY. + */ + CreateReplicaIdentities(logicalRepTargetList); + ereport(LOG, (errmsg("performing copy for %s", operationName))); /* 8) Do snapshotted Copy */ diff --git a/src/backend/distributed/replication/multi_logical_replication.c b/src/backend/distributed/replication/multi_logical_replication.c index 3d82afdc9..598db1988 100644 --- a/src/backend/distributed/replication/multi_logical_replication.c +++ b/src/backend/distributed/replication/multi_logical_replication.c @@ -234,6 +234,26 @@ LogicallyReplicateShards(List *shardList, char *sourceNodeName, int sourceNodePo /* only useful for isolation testing, see the function comment for the details */ ConflictOnlyWithIsolationTesting(); + /* + * We have to create the primary key (or any other replica identity) + * before the update/delete operations that are queued will be + * replicated. Because if the replica identity does not exist on the + * target, the replication would fail. + * + * So the latest possible moment we could do this is right after the + * initial data COPY, but before enabling the susbcriptions. It might + * seem like a good idea to it after the initial data COPY, since + * it's generally the rule that it's cheaper to build an index at once + * than to create it incrementally. This general rule, is why we create + * all the regular indexes as late during the move as possible. + * + * But as it turns out in practice it's not as clear cut, and we saw a + * speed degradation in the time it takes to move shards when doing the + * replica identity creation after the initial COPY. So, instead we + * keep it before the COPY. + */ + CreateReplicaIdentities(logicalRepTargetList); + CopyShardsToNode(sourceNode, targetNode, shardList, snapshot); /* @@ -347,20 +367,6 @@ CompleteNonBlockingShardTransfer(List *shardList, HTAB *groupedLogicalRepTargetsHash, LogicalRepType type) { - /* - * We have to create the primary key (or any other replica identity) - * before the update/delete operations that are queued will be - * replicated. Because if the replica identity does not exist on the - * target, the replication would fail. - * - * So we it right after the initial data COPY, but before enabling the - * susbcriptions. We do it at this latest possible moment, because its - * much cheaper to build an index at once than to create it - * incrementally. So this way we create the primary key index in one go - * for all data from the initial COPY. - */ - CreateReplicaIdentities(logicalRepTargetList); - /* Start applying the changes from the replication slots to catch up. */ EnableSubscriptions(logicalRepTargetList);