mirror of https://github.com/citusdata/citus.git
PR #6728 / commit - 2
- Create MetadataSyncContext api to encapsulate both transactional and nontransactional modes, - Add a GUC to switch between metadata sync transaction modes.pull/6728/head
parent
98abd68178
commit
85d50203d1
|
@ -90,6 +90,7 @@
|
||||||
|
|
||||||
/* managed via a GUC */
|
/* managed via a GUC */
|
||||||
char *EnableManualMetadataChangesForUser = "";
|
char *EnableManualMetadataChangesForUser = "";
|
||||||
|
int MetadataSyncTransMode = METADATA_SYNC_TRANSACTIONAL;
|
||||||
|
|
||||||
|
|
||||||
static void EnsureObjectMetadataIsSane(int distributionArgumentIndex,
|
static void EnsureObjectMetadataIsSane(int distributionArgumentIndex,
|
||||||
|
@ -4157,3 +4158,330 @@ ColocationGroupCreateCommandList(void)
|
||||||
|
|
||||||
return list_make1(colocationGroupCreateCommand->data);
|
return list_make1(colocationGroupCreateCommand->data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SetMetadataSyncNodesFromNodeList sets list of nodes that needs to be metadata
|
||||||
|
* synced among given node list into metadataSyncContext.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
SetMetadataSyncNodesFromNodeList(MetadataSyncContext *context, List *nodeList)
|
||||||
|
{
|
||||||
|
List *activatedWorkerNodeList = NIL;
|
||||||
|
|
||||||
|
WorkerNode *node = NULL;
|
||||||
|
foreach_ptr(node, nodeList)
|
||||||
|
{
|
||||||
|
if (EnableMetadataSync && NodeIsPrimary(node))
|
||||||
|
{
|
||||||
|
/* warn if we have coordinator in nodelist */
|
||||||
|
if (NodeIsCoordinator(node))
|
||||||
|
{
|
||||||
|
ereport(NOTICE, (errmsg("%s:%d is the coordinator and already contains "
|
||||||
|
"metadata, skipping syncing the metadata",
|
||||||
|
node->workerName, node->workerPort)));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
activatedWorkerNodeList = lappend(activatedWorkerNodeList, node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
context->activatedWorkerNodeList = activatedWorkerNodeList;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* EstablishAndSetMetadataSyncBareConnections establishes and sets
|
||||||
|
* connections used throughout nontransactional metadata sync.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
EstablishAndSetMetadataSyncBareConnections(MetadataSyncContext *context)
|
||||||
|
{
|
||||||
|
Assert(MetadataSyncTransMode == METADATA_SYNC_NON_TRANSACTIONAL);
|
||||||
|
|
||||||
|
int connectionFlags = REQUIRE_METADATA_CONNECTION;
|
||||||
|
|
||||||
|
/* establish bare connections to activated worker nodes */
|
||||||
|
List *bareConnectionList = NIL;
|
||||||
|
WorkerNode *node = NULL;
|
||||||
|
foreach_ptr(node, context->activatedWorkerNodeList)
|
||||||
|
{
|
||||||
|
MultiConnection *connection = GetNodeUserDatabaseConnection(connectionFlags,
|
||||||
|
node->workerName,
|
||||||
|
node->workerPort,
|
||||||
|
CurrentUserName(),
|
||||||
|
NULL);
|
||||||
|
|
||||||
|
Assert(connection != NULL);
|
||||||
|
bareConnectionList = lappend(bareConnectionList, connection);
|
||||||
|
}
|
||||||
|
|
||||||
|
context->activatedWorkerConnections = bareConnectionList;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* EstablishAndSetMetadataSyncCoordinatedConnections establishes and sets
|
||||||
|
* connections used throughout transactional metadata sync.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
EstablishAndSetMetadataSyncCoordinatedConnections(MetadataSyncContext *context)
|
||||||
|
{
|
||||||
|
Assert(MetadataSyncTransMode == METADATA_SYNC_TRANSACTIONAL);
|
||||||
|
|
||||||
|
int connectionFlags = REQUIRE_METADATA_CONNECTION;
|
||||||
|
|
||||||
|
/* establish coordinated connections to activated worker nodes */
|
||||||
|
List *coordinatedConnectionList = NIL;
|
||||||
|
WorkerNode *node = NULL;
|
||||||
|
foreach_ptr(node, context->activatedWorkerNodeList)
|
||||||
|
{
|
||||||
|
MultiConnection *connection =
|
||||||
|
StartNodeConnection(connectionFlags, node->workerName, node->workerPort);
|
||||||
|
|
||||||
|
MarkRemoteTransactionCritical(connection);
|
||||||
|
|
||||||
|
Assert(connection != NULL);
|
||||||
|
coordinatedConnectionList = lappend(coordinatedConnectionList, connection);
|
||||||
|
}
|
||||||
|
|
||||||
|
context->activatedWorkerConnections = coordinatedConnectionList;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CreateMetadataSyncContext creates a context which contains worker connections
|
||||||
|
* and a MemoryContext to be used throughout the metadata sync.
|
||||||
|
*
|
||||||
|
* If we collect commands, connections will not be established as caller's intent
|
||||||
|
* is to collcet sync commands.
|
||||||
|
*/
|
||||||
|
MetadataSyncContext *
|
||||||
|
CreateMetadataSyncContext(List *nodeList, bool collectCommands)
|
||||||
|
{
|
||||||
|
/* should be alive during local transaction during the sync */
|
||||||
|
MemoryContext context = AllocSetContextCreate(TopTransactionContext,
|
||||||
|
"metadata_sync_context",
|
||||||
|
ALLOCSET_DEFAULT_SIZES);
|
||||||
|
|
||||||
|
MetadataSyncContext *metadataSyncContext = (MetadataSyncContext *) palloc0(
|
||||||
|
sizeof(MetadataSyncContext));
|
||||||
|
|
||||||
|
metadataSyncContext->context = context;
|
||||||
|
metadataSyncContext->transactionMode = MetadataSyncTransMode;
|
||||||
|
metadataSyncContext->collectCommands = collectCommands;
|
||||||
|
metadataSyncContext->collectedCommands = NIL;
|
||||||
|
|
||||||
|
/* filter the nodes that needs to be activated from given node list */
|
||||||
|
SetMetadataSyncNodesFromNodeList(metadataSyncContext, nodeList);
|
||||||
|
|
||||||
|
/* establish connections */
|
||||||
|
if (!collectCommands && MetadataSyncTransMode == METADATA_SYNC_TRANSACTIONAL)
|
||||||
|
{
|
||||||
|
EstablishAndSetMetadataSyncCoordinatedConnections(metadataSyncContext);
|
||||||
|
}
|
||||||
|
else if (!collectCommands && MetadataSyncTransMode == METADATA_SYNC_NON_TRANSACTIONAL)
|
||||||
|
{
|
||||||
|
EstablishAndSetMetadataSyncBareConnections(metadataSyncContext);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* use 2PC coordinated transactions if we operate in transactional mode */
|
||||||
|
if (MetadataSyncTransMode == METADATA_SYNC_TRANSACTIONAL)
|
||||||
|
{
|
||||||
|
Use2PCForCoordinatedTransaction();
|
||||||
|
}
|
||||||
|
|
||||||
|
return metadataSyncContext;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* DestroyMetadataSyncContext destroys the memory context inside metadataSyncContext
|
||||||
|
* and also closes open connections if any.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
DestroyMetadataSyncContext(MetadataSyncContext *context)
|
||||||
|
{
|
||||||
|
/* close connections */
|
||||||
|
MultiConnection *connection = NULL;
|
||||||
|
foreach_ptr(connection, context->activatedWorkerBareConnections)
|
||||||
|
{
|
||||||
|
CloseConnection(connection);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* delete memory context */
|
||||||
|
MemoryContextDelete(context->context);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ResetMetadataSyncMemoryContext resets memory context inside metadataSyncContext, if
|
||||||
|
* we are not collecting commands.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
ResetMetadataSyncMemoryContext(MetadataSyncContext *context)
|
||||||
|
{
|
||||||
|
if (!MetadataSyncCollectsCommands(context))
|
||||||
|
{
|
||||||
|
MemoryContextReset(context->context);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* MetadataSyncCollectsCommands returns whether context is used for collecting
|
||||||
|
* commands instead of sending them to workers.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
MetadataSyncCollectsCommands(MetadataSyncContext *context)
|
||||||
|
{
|
||||||
|
return context->collectCommands;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SendOrCollectCommandListToActivatedNodes sends the commands to the activated nodes with
|
||||||
|
* bare connections inside metadatacontext or via coordinated connections.
|
||||||
|
* Note that when context only collects commands, we add commands into the context
|
||||||
|
* without sending the commands.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
SendOrCollectCommandListToActivatedNodes(MetadataSyncContext *context, List *commands)
|
||||||
|
{
|
||||||
|
/* do nothing if no commands */
|
||||||
|
if (commands == NIL)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* do not send any command to workers if we collect commands.
|
||||||
|
* Collect commands into metadataSyncContext's collected command
|
||||||
|
* list.
|
||||||
|
*/
|
||||||
|
if (MetadataSyncCollectsCommands(context))
|
||||||
|
{
|
||||||
|
context->collectedCommands = list_concat(context->collectedCommands,
|
||||||
|
commands);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* send commands to new workers, the current user should be a superuser */
|
||||||
|
Assert(superuser());
|
||||||
|
|
||||||
|
if (context->transactionMode == METADATA_SYNC_TRANSACTIONAL)
|
||||||
|
{
|
||||||
|
List *workerNodes = context->activatedWorkerNodeList;
|
||||||
|
SendMetadataCommandListToWorkerListInCoordinatedTransaction(workerNodes,
|
||||||
|
CurrentUserName(),
|
||||||
|
commands);
|
||||||
|
}
|
||||||
|
else if (context->transactionMode == METADATA_SYNC_NON_TRANSACTIONAL)
|
||||||
|
{
|
||||||
|
List *workerConnections = context->activatedWorkerBareConnections;
|
||||||
|
SendCommandListToWorkerListWithBareConnections(workerConnections, commands);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
pg_unreachable();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SendOrCollectCommandListToMetadataNodes sends the commands to the metadata nodes with
|
||||||
|
* bare connections inside metadatacontext or via coordinated connections.
|
||||||
|
* Note that when context only collects commands, we add commands into the context
|
||||||
|
* without sending the commands.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
SendOrCollectCommandListToMetadataNodes(MetadataSyncContext *context, List *commands)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* do not send any command to workers if we collcet commands.
|
||||||
|
* Collect commands into metadataSyncContext's collected command
|
||||||
|
* list.
|
||||||
|
*/
|
||||||
|
if (MetadataSyncCollectsCommands(context))
|
||||||
|
{
|
||||||
|
context->collectedCommands = list_concat(context->collectedCommands,
|
||||||
|
commands);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* send commands to new workers, the current user should be a superuser */
|
||||||
|
Assert(superuser());
|
||||||
|
|
||||||
|
if (context->transactionMode == METADATA_SYNC_TRANSACTIONAL)
|
||||||
|
{
|
||||||
|
List *metadataNodes = TargetWorkerSetNodeList(NON_COORDINATOR_METADATA_NODES,
|
||||||
|
RowShareLock);
|
||||||
|
SendMetadataCommandListToWorkerListInCoordinatedTransaction(metadataNodes,
|
||||||
|
CurrentUserName(),
|
||||||
|
commands);
|
||||||
|
}
|
||||||
|
else if (context->transactionMode == METADATA_SYNC_NON_TRANSACTIONAL)
|
||||||
|
{
|
||||||
|
SendBareCommandListToMetadataWorkers(commands);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
pg_unreachable();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SendOrCollectCommandListToSingleNode sends the commands to the specific worker
|
||||||
|
* indexed by nodeIdx with bare connection inside metadatacontext or via coordinated
|
||||||
|
* connection. Note that when context only collects commands, we add commands into
|
||||||
|
* the context without sending the commands.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
SendOrCollectCommandListToSingleNode(MetadataSyncContext *context, List *commands,
|
||||||
|
int nodeIdx)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Do not send any command to workers if we collect commands.
|
||||||
|
* Collect commands into metadataSyncContext's collected command
|
||||||
|
* list.
|
||||||
|
*/
|
||||||
|
if (MetadataSyncCollectsCommands(context))
|
||||||
|
{
|
||||||
|
context->collectedCommands = list_concat(context->collectedCommands,
|
||||||
|
commands);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* send commands to new workers, the current user should be a superuser */
|
||||||
|
Assert(superuser());
|
||||||
|
|
||||||
|
List *workerConnections = context->activatedWorkerConnections;
|
||||||
|
Assert(nodeIdx < list_length(workerConnections));
|
||||||
|
MultiConnection *workerConnection = list_nth(workerConnections, nodeIdx);
|
||||||
|
|
||||||
|
if (context->transactionMode == METADATA_SYNC_TRANSACTIONAL)
|
||||||
|
{
|
||||||
|
List *workerNodes = context->activatedWorkerNodeList;
|
||||||
|
Assert(nodeIdx < list_length(workerNodes));
|
||||||
|
|
||||||
|
WorkerNode *node = list_nth(workerNodes, nodeIdx);
|
||||||
|
SendMetadataCommandListToWorkerListInCoordinatedTransaction(list_make1(node),
|
||||||
|
CurrentUserName(),
|
||||||
|
commands);
|
||||||
|
}
|
||||||
|
else if (context->transactionMode == METADATA_SYNC_NON_TRANSACTIONAL)
|
||||||
|
{
|
||||||
|
List *workerConnections = context->activatedWorkerBareConnections;
|
||||||
|
Assert(nodeIdx < list_length(workerConnections));
|
||||||
|
|
||||||
|
MultiConnection *workerConnection = list_nth(workerConnections, nodeIdx);
|
||||||
|
List *connectionList = list_make1(workerConnection);
|
||||||
|
SendCommandListToWorkerListWithBareConnections(connectionList, commands);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
pg_unreachable();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -360,6 +360,11 @@ static const struct config_enum_entry cpu_priority_options[] = {
|
||||||
{ NULL, 0, false}
|
{ NULL, 0, false}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const struct config_enum_entry metadata_sync_mode_options[] = {
|
||||||
|
{ "transactional", METADATA_SYNC_TRANSACTIONAL, false },
|
||||||
|
{ "nontransactional", METADATA_SYNC_NON_TRANSACTIONAL, false },
|
||||||
|
{ NULL, 0, false }
|
||||||
|
};
|
||||||
|
|
||||||
/* *INDENT-ON* */
|
/* *INDENT-ON* */
|
||||||
|
|
||||||
|
@ -1880,6 +1885,21 @@ RegisterCitusConfigVariables(void)
|
||||||
GUC_UNIT_MS | GUC_NO_SHOW_ALL,
|
GUC_UNIT_MS | GUC_NO_SHOW_ALL,
|
||||||
NULL, NULL, NULL);
|
NULL, NULL, NULL);
|
||||||
|
|
||||||
|
DefineCustomEnumVariable(
|
||||||
|
"citus.metadata_sync_mode",
|
||||||
|
gettext_noop("Sets transaction mode for metadata syncs."),
|
||||||
|
gettext_noop("metadata sync can be run inside a single coordinated "
|
||||||
|
"transaction or with multiple small transactions in "
|
||||||
|
"idempotent way. By default we sync metadata in single "
|
||||||
|
"coordinated transaction. When we hit memory problems "
|
||||||
|
"at workers, we have alternative nontransactional mode "
|
||||||
|
"where we send each command with separate transaction."),
|
||||||
|
&MetadataSyncTransMode,
|
||||||
|
METADATA_SYNC_TRANSACTIONAL, metadata_sync_mode_options,
|
||||||
|
PGC_SUSET,
|
||||||
|
GUC_SUPERUSER_ONLY | GUC_NO_SHOW_ALL,
|
||||||
|
NULL, NULL, NULL);
|
||||||
|
|
||||||
DefineCustomIntVariable(
|
DefineCustomIntVariable(
|
||||||
"citus.metadata_sync_retry_interval",
|
"citus.metadata_sync_retry_interval",
|
||||||
gettext_noop("Sets the interval to retry failed metadata syncs."),
|
gettext_noop("Sets the interval to retry failed metadata syncs."),
|
||||||
|
|
|
@ -18,9 +18,30 @@
|
||||||
#include "distributed/metadata_cache.h"
|
#include "distributed/metadata_cache.h"
|
||||||
#include "nodes/pg_list.h"
|
#include "nodes/pg_list.h"
|
||||||
|
|
||||||
|
/* managed via guc.c */
|
||||||
|
typedef enum
|
||||||
|
{
|
||||||
|
METADATA_SYNC_TRANSACTIONAL = 0,
|
||||||
|
METADATA_SYNC_NON_TRANSACTIONAL = 1
|
||||||
|
} MetadataSyncTransactionMode;
|
||||||
|
|
||||||
/* config variables */
|
/* config variables */
|
||||||
extern int MetadataSyncInterval;
|
extern int MetadataSyncInterval;
|
||||||
extern int MetadataSyncRetryInterval;
|
extern int MetadataSyncRetryInterval;
|
||||||
|
extern int MetadataSyncTransMode;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* MetadataSyncContext is used throughout metadata sync.
|
||||||
|
*/
|
||||||
|
typedef struct MetadataSyncContext
|
||||||
|
{
|
||||||
|
List *activatedWorkerNodeList; /* activated worker nodes */
|
||||||
|
List *activatedWorkerConnections; /* connections to activated worker nodes */
|
||||||
|
MemoryContext context; /* memory context for all allocations */
|
||||||
|
MetadataSyncTransactionMode transactionMode; /* transaction mode for the sync */
|
||||||
|
bool collectCommands; /* flag to collect commands instead of sending and resetting */
|
||||||
|
List *collectedCommands; /* collected commands. (NIL if collectCommands == false) */
|
||||||
|
} MetadataSyncContext;
|
||||||
|
|
||||||
typedef enum
|
typedef enum
|
||||||
{
|
{
|
||||||
|
@ -116,6 +137,22 @@ extern void SyncNewColocationGroupToNodes(uint32 colocationId, int shardCount,
|
||||||
Oid distributionColumnCollation);
|
Oid distributionColumnCollation);
|
||||||
extern void SyncDeleteColocationGroupToNodes(uint32 colocationId);
|
extern void SyncDeleteColocationGroupToNodes(uint32 colocationId);
|
||||||
|
|
||||||
|
extern MetadataSyncContext * CreateMetadataSyncContext(List *nodeList, bool testMode);
|
||||||
|
extern void DestroyMetadataSyncContext(MetadataSyncContext *context);
|
||||||
|
extern void EstablishAndSetMetadataSyncBareConnections(MetadataSyncContext *context);
|
||||||
|
extern void EstablishAndSetMetadataSyncCoordinatedConnections(
|
||||||
|
MetadataSyncContext *context);
|
||||||
|
extern void SetMetadataSyncNodesFromNodeList(MetadataSyncContext *context,
|
||||||
|
List *nodeList);
|
||||||
|
extern void ResetMetadataSyncMemoryContext(MetadataSyncContext *context);
|
||||||
|
extern bool MetadataSyncCollectsCommands(MetadataSyncContext *context);
|
||||||
|
extern void SendOrCollectCommandListToActivatedNodes(MetadataSyncContext *context,
|
||||||
|
List *commands);
|
||||||
|
extern void SendOrCollectCommandListToMetadataNodes(MetadataSyncContext *context,
|
||||||
|
List *commands);
|
||||||
|
extern void SendOrCollectCommandListToSingleNode(MetadataSyncContext *context,
|
||||||
|
List *commands, int nodeIdx);
|
||||||
|
|
||||||
#define DELETE_ALL_NODES "DELETE FROM pg_dist_node"
|
#define DELETE_ALL_NODES "DELETE FROM pg_dist_node"
|
||||||
#define DELETE_ALL_PLACEMENTS "DELETE FROM pg_dist_placement"
|
#define DELETE_ALL_PLACEMENTS "DELETE FROM pg_dist_placement"
|
||||||
#define DELETE_ALL_SHARDS "DELETE FROM pg_dist_shard"
|
#define DELETE_ALL_SHARDS "DELETE FROM pg_dist_shard"
|
||||||
|
|
Loading…
Reference in New Issue