Address review comments

users/saawasek/non_blocking_split_integrated
Sameer Awasekar 2022-08-01 23:58:20 +05:30
parent c56b79b6f7
commit a430b0bffc
9 changed files with 90 additions and 97 deletions

View File

@ -63,7 +63,8 @@ static void CreateAndCopySplitShardsForShardGroup(
static void CreateSplitShardsForShardGroup(HTAB *mapOfShardToPlacementCreatedByWorkflow,
List *shardGroupSplitIntervalListList,
List *workersForPlacementList);
static void CreateDummyShardsForShardGroup(List *sourceColocatedShardIntervalList,
static void CreateDummyShardsForShardGroup(HTAB *mapOfDummyShardToPlacement,
List *sourceColocatedShardIntervalList,
List *shardGroupSplitIntervalListList,
WorkerNode *sourceWorkerNode,
List *workersForPlacementList);
@ -71,7 +72,8 @@ static HTAB * CreateWorkerForPlacementSet(List *workersForPlacementList);
static void CreateAuxiliaryStructuresForShardGroup(List *shardGroupSplitIntervalListList,
List *workersForPlacementList,
bool includeReplicaIdentity);
static void CreateReplicaIdentities(List *shardGroupSplitIntervalListList,
static void CreateReplicaIdentities(HTAB *mapOfDummyShardToPlacement,
List *shardGroupSplitIntervalListList,
List *workersForPlacementList);
static void CreateObjectOnPlacement(List *objectCreationCommandList,
WorkerNode *workerNode);
@ -120,8 +122,9 @@ static List * ExecuteSplitShardReplicationSetupUDF(WorkerNode *sourceWorkerNode,
List *sourceColocatedShardIntervalList,
List *shardGroupSplitIntervalListList,
List *destinationWorkerNodesList);
static void AddDummyShardEntryInMap(uint32 targetNodeId, ShardInterval *shardInterval);
static void DropDummyShards(void);
static void AddDummyShardEntryInMap(HTAB *mapOfDummyShards, uint32 targetNodeId,
ShardInterval *shardInterval);
static void DropDummyShards(HTAB *mapOfDummyShardToPlacement);
static void DropDummyShard(MultiConnection *connection, ShardInterval *shardInterval);
@ -137,13 +140,6 @@ static const char *const SplitTargetName[] =
[ISOLATE_TENANT_TO_NEW_SHARD] = "tenant",
};
/*
* Map containing list of dummy shards created on target nodes.
* Key - <nodeId, tableOwnerId>
* Value - ShardInterval
*/
static HTAB *DummyShardInfoHashMap = NULL;
/* Function definitions */
/*
@ -1258,7 +1254,6 @@ NonBlockingShardSplit(SplitOperation splitOperation,
{
char *superUser = CitusExtensionOwnerName();
char *databaseName = get_database_name(MyDatabaseId);
int connectionFlags = FORCE_NEW_CONNECTION;
List *sourceColocatedShardIntervalList = ColocatedShardIntervalList(
shardIntervalToSplit);
@ -1279,6 +1274,7 @@ NonBlockingShardSplit(SplitOperation splitOperation,
DropAllShardSplitLeftOvers(sourceShardToCopyNode, shardSplitHashMapForPublication);
int connectionFlags = FORCE_NEW_CONNECTION;
MultiConnection *sourceConnection = GetNodeUserDatabaseConnection(connectionFlags,
sourceShardToCopyNode
->
@ -1293,6 +1289,8 @@ NonBlockingShardSplit(SplitOperation splitOperation,
HTAB *mapOfShardToPlacementCreatedByWorkflow =
CreateEmptyMapForShardsCreatedByWorkflow();
HTAB *mapOfDummyShardToPlacement = SetupHashMapForShardInfo();
/* Non-Blocking shard split workflow starts here */
PG_TRY();
{
@ -1307,12 +1305,14 @@ NonBlockingShardSplit(SplitOperation splitOperation,
* information.
*/
CreateDummyShardsForShardGroup(
mapOfDummyShardToPlacement,
sourceColocatedShardIntervalList,
shardGroupSplitIntervalListList,
sourceShardToCopyNode,
workersForPlacementList);
CreateReplicaIdentities(shardGroupSplitIntervalListList, workersForPlacementList);
CreateReplicaIdentities(mapOfDummyShardToPlacement,
shardGroupSplitIntervalListList, workersForPlacementList);
/* 3) Create Publications. */
@ -1426,7 +1426,7 @@ NonBlockingShardSplit(SplitOperation splitOperation,
/* 21) Drop dummy shards.
* TODO(saawasek):Refactor and pass hashmap.Currently map is global variable */
DropDummyShards();
DropDummyShards(mapOfDummyShardToPlacement);
/* 22) Close source connection */
@ -1447,7 +1447,7 @@ NonBlockingShardSplit(SplitOperation splitOperation,
DropAllShardSplitLeftOvers(sourceShardToCopyNode,
shardSplitHashMapForPublication);
DropDummyShards();
DropDummyShards(mapOfDummyShardToPlacement);
PG_RE_THROW();
}
@ -1478,17 +1478,12 @@ NonBlockingShardSplit(SplitOperation splitOperation,
* Note 2 : Given there is an overlap of source and destination in Worker0, Shard1_1 and Shard2_1 need not be created.
*/
static void
CreateDummyShardsForShardGroup(List *sourceColocatedShardIntervalList,
CreateDummyShardsForShardGroup(HTAB *mapOfDummyShardToPlacement,
List *sourceColocatedShardIntervalList,
List *shardGroupSplitIntervalListList,
WorkerNode *sourceWorkerNode,
List *workersForPlacementList)
{
/*
* Setup a hash map to store list of dummy shards created on nodes.
* This will facilitate easy cleanup.
*/
DummyShardInfoHashMap = SetupHashMapForShardInfo();
/*
* Statisfy Constraint 1: Create dummy source shard(s) on all destination nodes.
* If source node is also in desintation, skip dummy shard creation(see Note 1 from function description).
@ -1524,7 +1519,9 @@ CreateDummyShardsForShardGroup(List *sourceColocatedShardIntervalList,
CreateObjectOnPlacement(splitShardCreationCommandList, workerPlacementNode);
/* Add dummy source shard entry created for placement node in map */
AddDummyShardEntryInMap(workerPlacementNode->nodeId, shardInterval);
AddDummyShardEntryInMap(mapOfDummyShardToPlacement,
workerPlacementNode->nodeId,
shardInterval);
}
}
@ -1557,12 +1554,16 @@ CreateDummyShardsForShardGroup(List *sourceColocatedShardIntervalList,
CreateObjectOnPlacement(splitShardCreationCommandList, sourceWorkerNode);
/* Add dummy split child shard entry created on source node */
AddDummyShardEntryInMap(sourceWorkerNode->nodeId, shardInterval);
AddDummyShardEntryInMap(mapOfDummyShardToPlacement, sourceWorkerNode->nodeId,
shardInterval);
}
}
}
/*
* CreateWorkerForPlacementSet returns a set with unique worker nodes.
*/
static HTAB *
CreateWorkerForPlacementSet(List *workersForPlacementList)
{
@ -1813,7 +1814,8 @@ ParseReplicationSlotInfoFromResult(PGresult *result)
* of logical replication. We cautiously delete only the dummy shards added in the DummyShardHashMap.
*/
static void
AddDummyShardEntryInMap(uint32 targetNodeId, ShardInterval *shardInterval)
AddDummyShardEntryInMap(HTAB *mapOfDummyShardToPlacement, uint32 targetNodeId,
ShardInterval *shardInterval)
{
NodeShardMappingKey key;
key.nodeId = targetNodeId;
@ -1821,7 +1823,8 @@ AddDummyShardEntryInMap(uint32 targetNodeId, ShardInterval *shardInterval)
bool found = false;
NodeShardMappingEntry *nodeMappingEntry =
(NodeShardMappingEntry *) hash_search(DummyShardInfoHashMap, &key, HASH_ENTER,
(NodeShardMappingEntry *) hash_search(mapOfDummyShardToPlacement, &key,
HASH_ENTER,
&found);
if (!found)
{
@ -1834,16 +1837,10 @@ AddDummyShardEntryInMap(uint32 targetNodeId, ShardInterval *shardInterval)
static void
DropDummyShards()
DropDummyShards(HTAB *mapOfDummyShardToPlacement)
{
/* Return if no dummy shards are created */
if (DummyShardInfoHashMap == NULL)
{
return;
}
HASH_SEQ_STATUS status;
hash_seq_init(&status, DummyShardInfoHashMap);
hash_seq_init(&status, mapOfDummyShardToPlacement);
NodeShardMappingEntry *entry = NULL;
while ((entry = (NodeShardMappingEntry *) hash_seq_search(&status)) != NULL)
@ -1899,7 +1896,8 @@ DropDummyShard(MultiConnection *connection, ShardInterval *shardInterval)
/*todo(saawasek): Add comments */
static void
CreateReplicaIdentities(List *shardGroupSplitIntervalListList,
CreateReplicaIdentities(HTAB *mapOfDummyShardToPlacement,
List *shardGroupSplitIntervalListList,
List *workersForPlacementList)
{
/*
@ -1928,7 +1926,7 @@ CreateReplicaIdentities(List *shardGroupSplitIntervalListList,
/*todo: remove the global variable dummy map*/
HASH_SEQ_STATUS status;
hash_seq_init(&status, DummyShardInfoHashMap);
hash_seq_init(&status, mapOfDummyShardToPlacement);
NodeShardMappingEntry *entry = NULL;
while ((entry = (NodeShardMappingEntry *) hash_seq_search(&status)) != NULL)

View File

@ -48,10 +48,9 @@ static ShardSplitInfo * CreateShardSplitInfo(uint64 sourceShardIdToSplit,
int32 maxValue,
int32 nodeId);
static void AddShardSplitInfoEntryForNodeInMap(ShardSplitInfo *shardSplitInfo);
static void PopulateShardSplitInfoInSM(ShardSplitInfoSMHeader *shardSplitInfoSMHeader,
HTAB *shardInfoHashMap);
static void PopulateShardSplitInfoInSM(ShardSplitInfoSMHeader *shardSplitInfoSMHeader);
static void ReturnReplicationSlotInfo(HTAB *shardInfoHashMap, Tuplestorestate *tupleStore,
static void ReturnReplicationSlotInfo(Tuplestorestate *tupleStore,
TupleDesc tupleDescriptor);
/*
@ -62,18 +61,20 @@ static void ReturnReplicationSlotInfo(HTAB *shardInfoHashMap, Tuplestorestate *t
* This meta information is stored in a shared memory segment and accessed
* by logical decoding plugin.
*
* Split information is given by user as an Array of custom data type 'citus.split_shard_info'.
* (worker_split_shard_replication_setup(citus.split_shard_info[]))
* Split information is given by user as an Array of custom data type 'pg_catalog.split_shard_info'.
* (worker_split_shard_replication_setup(pg_catalog.split_shard_info[]))
*
* Fields of custom data type 'citus.split_shard_info':
* Fields of custom data type 'pg_catalog.split_shard_info':
* source_shard_id - id of the shard that is undergoing a split
*
* distribution_column - Distribution column name
*
* child_shard_id - id of shard that stores a specific range of values
* belonging to sourceShardId(parent)
*
* shard_min_value - lower bound(inclusive) of hash value which childShard stores.
* shard_min_value - Lower bound(inclusive) of hash value which childShard stores
*
* shard_max_value - upper bound(inclusive) of hash value which childShard stores
* shard_max_value - Upper bound(inclusive) of hash value which childShard stores
*
* node_id - Node where the childShardId is located
*
@ -81,7 +82,7 @@ static void ReturnReplicationSlotInfo(HTAB *shardInfoHashMap, Tuplestorestate *t
* <nodeId, tableOwner> pair. Multiple shards can be placed on the same destination node.
* Source and destination nodes can be same too.
*
* There is a 1-1 mapping between a table owner and a replication slot. One replication
* There is a 1-1 mapping between a (table owner, node) and replication slot. One replication
* slot takes care of replicating changes for all shards belonging to the same owner on a particular node.
*
* During the replication phase, WAL senders will attach to the shared memory
@ -140,15 +141,14 @@ worker_split_shard_replication_setup(PG_FUNCTION_ARGS)
ShardSplitInfoSMHeader *splitShardInfoSMHeader =
CreateSharedMemoryForShardSplitInfo(shardSplitInfoCount, &dsmHandle);
PopulateShardSplitInfoInSM(splitShardInfoSMHeader,
ShardInfoHashMap);
PopulateShardSplitInfoInSM(splitShardInfoSMHeader);
/* store handle in statically allocated shared memory*/
StoreShardSplitSharedMemoryHandle(dsmHandle);
TupleDesc tupleDescriptor = NULL;
Tuplestorestate *tupleStore = SetupTuplestore(fcinfo, &tupleDescriptor);
ReturnReplicationSlotInfo(ShardInfoHashMap, tupleStore, tupleDescriptor);
ReturnReplicationSlotInfo(tupleStore, tupleDescriptor);
PG_RETURN_VOID();
}
@ -184,7 +184,7 @@ SetupHashMapForShardInfo()
*
* sourceShardIdToSplit - Existing shardId which has a valid entry in cache and catalogue
* partitionColumnName - Name of column to use for partitioning
* desSplitChildShardId - New split child shard which doesn't have an entry in metacache yet.
* desSplitChildShardId - New split child shard which doesn't have an entry in metacache yet
* minValue - Minimum hash value for desSplitChildShardId
* maxValue - Maximum hash value for desSplitChildShardId
* nodeId - NodeId where
@ -291,16 +291,12 @@ AddShardSplitInfoEntryForNodeInMap(ShardSplitInfo *shardSplitInfo)
* process during logical replication.
*
* shardSplitInfoSMHeader - Shared memory header
*
* shardInfoHashMap - Hashmap containing parsed split information
* per nodeId wise
*/
static void
PopulateShardSplitInfoInSM(ShardSplitInfoSMHeader *shardSplitInfoSMHeader,
HTAB *shardInfoHashMap)
PopulateShardSplitInfoInSM(ShardSplitInfoSMHeader *shardSplitInfoSMHeader)
{
HASH_SEQ_STATUS status;
hash_seq_init(&status, shardInfoHashMap);
hash_seq_init(&status, ShardInfoHashMap);
NodeShardMappingEntry *entry = NULL;
int splitInfoIndex = 0;
@ -360,6 +356,10 @@ NodeShardMappingHashCompare(const void *left, const void *right, Size keysize)
}
/*
* ParseShardSplitInfoFromDatum deserializes individual fields of 'pg_catalog.split_shard_info'
* datatype.
*/
static void
ParseShardSplitInfoFromDatum(Datum shardSplitInfoDatum,
uint64 *sourceShardId,
@ -421,12 +421,18 @@ ParseShardSplitInfoFromDatum(Datum shardSplitInfoDatum,
}
/*
* ReturnReplicationSlotInfo writes 'pg_catalog.replication_slot_info'
* records to tuplestore.
* This information is used by the coordinator to create replication slots as a
* part of non-blocking split workflow.
*/
static void
ReturnReplicationSlotInfo(HTAB *shardInfoHashMap, Tuplestorestate *tupleStore, TupleDesc
ReturnReplicationSlotInfo(Tuplestorestate *tupleStore, TupleDesc
tupleDescriptor)
{
HASH_SEQ_STATUS status;
hash_seq_init(&status, shardInfoHashMap);
hash_seq_init(&status, ShardInfoHashMap);
NodeShardMappingEntry *entry = NULL;
while ((entry = (NodeShardMappingEntry *) hash_seq_search(&status)) != NULL)

View File

@ -14,13 +14,6 @@
#include "replication/logical.h"
#include "utils/typcache.h"
/*
* Dynamically-loaded modules are required to include this macro call to check for
* incompatibility (such as being compiled for a different major PostgreSQL version etc).
* In a multiple source-file module, the macro call should only appear once.
*/
PG_MODULE_MAGIC;
extern void _PG_output_plugin_init(OutputPluginCallbacks *cb);
static LogicalDecodeChangeCB pgoutputChangeCB;

View File

@ -2,7 +2,7 @@
*
* shardsplit_logical_replication.c
*
* Function definitions for logically replicating split children.
* Function definitions for logically replicating shard to split children.
*
* Copyright (c) Citus Data, Inc.
*
@ -433,11 +433,7 @@ DropExistingIfAnyAndCreateTemplateReplicationSlot(ShardInterval *shardIntervalTo
PQclear(result);
ForgetResults(sourceConnection);
/*
* Note: Temporary slot are only live during the session's lifetime causing them to be dropped when the session ends.
*/
StringInfo createReplicationSlotCommand = makeStringInfo();
appendStringInfo(createReplicationSlotCommand,
"CREATE_REPLICATION_SLOT %s LOGICAL citus EXPORT_SNAPSHOT;",
ShardSplitTemplateReplicationSlotName(
@ -587,7 +583,7 @@ void
DropAllShardSplitSubscriptions(MultiConnection *cleanupConnection)
{
char *query = psprintf(
"SELECT subname FROM pg_subscription "
"SELECT subname FROM pg_catalog.pg_subscription "
"WHERE subname LIKE %s || '%%'",
quote_literal_cstr(SHARD_SPLIT_SUBSCRIPTION_PREFIX));
List *subscriptionNameList = GetQueryResultStringList(cleanupConnection, query);
@ -608,7 +604,7 @@ static void
DropAllShardSplitPublications(MultiConnection *connection)
{
char *query = psprintf(
"SELECT pubname FROM pg_publication "
"SELECT pubname FROM pg_catalog.pg_publication "
"WHERE pubname LIKE %s || '%%'",
quote_literal_cstr(SHARD_SPLIT_PUBLICATION_PREFIX));
List *publicationNameList = GetQueryResultStringList(connection, query);
@ -628,7 +624,7 @@ static void
DropAllShardSplitUsers(MultiConnection *connection)
{
char *query = psprintf(
"SELECT rolname FROM pg_roles "
"SELECT rolname FROM pg_catalog.pg_roles "
"WHERE rolname LIKE %s || '%%'",
quote_literal_cstr(SHARD_SPLIT_SUBSCRIPTION_ROLE_PREFIX));
List *usernameList = GetQueryResultStringList(connection, query);
@ -649,7 +645,7 @@ static void
DropAllShardSplitReplicationSlots(MultiConnection *cleanupConnection)
{
char *query = psprintf(
"SELECT slot_name FROM pg_replication_slots "
"SELECT slot_name FROM pg_catalog.pg_replication_slots "
"WHERE slot_name LIKE %s || '%%'",
quote_literal_cstr(SHARD_SPLIT_REPLICATION_SLOT_PREFIX));
List *slotNameList = GetQueryResultStringList(cleanupConnection, query);

View File

@ -25,9 +25,6 @@ const char *SharedMemoryNameForHandleManagement =
static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
static
void ShardSplitShmemInit(void);
/* Function declarations */
static ShardSplitInfoSMHeader * AllocateSharedMemoryForShardSplitInfo(int
shardSplitInfoCount,
@ -35,9 +32,10 @@ static ShardSplitInfoSMHeader * AllocateSharedMemoryForShardSplitInfo(int
shardSplitInfoSize,
dsm_handle *
dsmHandle);
static ShardSplitInfoSMHeader * GetShardSplitInfoSMHeaderFromDSMHandle(dsm_handle
dsmHandle);
static dsm_handle GetShardSplitSharedMemoryHandle(void);
static void ShardSplitShmemInit(void);
/*
* GetShardSplitInfoSMHeaderFromDSMHandle returns the header of the shared memory

View File

@ -106,6 +106,9 @@
#include "columnar/columnar.h"
/* marks shared object as one loadable by the postgres version compiled against */
PG_MODULE_MAGIC;
ColumnarSupportsIndexAM_type extern_ColumnarSupportsIndexAM = NULL;
CompressionTypeStr_type extern_CompressionTypeStr = NULL;
IsColumnarTableAmTable_type extern_IsColumnarTableAmTable = NULL;

View File

@ -29,22 +29,6 @@ typedef enum SplitOperation
ISOLATE_TENANT_TO_NEW_SHARD
} SplitOperation;
/*
* In-memory mapping of a split child shard.
*/
typedef struct ShardSplitInfo
{
Oid distributedTableOid; /* citus distributed table Oid */
int partitionColumnIndex; /* partition column index */
Oid sourceShardOid; /* parent shard Oid */
Oid splitChildShardOid; /* child shard Oid */
int32 shardMinValue; /* min hash value */
int32 shardMaxValue; /* max hash value */
uint32_t nodeId; /* node where child shard is to be placed */
uint64 sourceShardId; /* parent shardId */
uint64 splitChildShardId; /* child shardId*/
char slotName[NAMEDATALEN]; /* replication slot name belonging to this node */
} ShardSplitInfo;
/*
* SplitShard API to split a given shard (or shard group) using split mode and

View File

@ -2,7 +2,7 @@
*
* shardsplit_logical_replication.h
*
* Function declarations for logically replicating split children.
* Function declarations for logically replicating shard to split children.
*
* Copyright (c) Citus Data, Inc.
*

View File

@ -14,7 +14,23 @@
#ifndef SHARDSPLIT_SHARED_MEMORY_H
#define SHARDSPLIT_SHARED_MEMORY_H
#include "distributed/shard_split.h"
/*
* In-memory mapping of a split child shard.
*/
typedef struct ShardSplitInfo
{
Oid distributedTableOid; /* citus distributed table Oid */
int partitionColumnIndex; /* partition column index */
Oid sourceShardOid; /* parent shard Oid */
Oid splitChildShardOid; /* child shard Oid */
int32 shardMinValue; /* min hash value */
int32 shardMaxValue; /* max hash value */
uint32_t nodeId; /* node where child shard is to be placed */
uint64 sourceShardId; /* parent shardId */
uint64 splitChildShardId; /* child shardId*/
char slotName[NAMEDATALEN]; /* replication slot name belonging to this node */
} ShardSplitInfo;
/*
* Header of the shared memory segment where shard split information is stored.
@ -52,7 +68,6 @@ typedef struct ShardSplitShmemData
void InitializeShardSplitSMHandleManagement(void);
void StoreShardSplitSharedMemoryHandle(dsm_handle dsmHandle);
dsm_handle GetShardSplitSharedMemoryHandle(void);
/* Functions for creating and accessing shared memory segments consisting shard split information */
extern ShardSplitInfoSMHeader * CreateSharedMemoryForShardSplitInfo(int
@ -65,5 +80,5 @@ extern ShardSplitInfoSMHeader * GetShardSplitInfoSMHeader(void);
extern HTAB * PopulateSourceToDestinationShardMapForSlot(char *slotName, MemoryContext
cxt);
char * encode_replication_slot(uint32_t nodeId, uint32_t tableOwnerId);
extern char * encode_replication_slot(uint32_t nodeId, uint32_t tableOwnerId);
#endif /* SHARDSPLIT_SHARED_MEMORY_H */