mirror of https://github.com/citusdata/citus.git
286 lines
7.8 KiB
C
286 lines
7.8 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* multi_shard_transaction.c
|
|
* This file contains functions for managing 1PC or 2PC transactions
|
|
* across many shard placements.
|
|
*
|
|
* Copyright (c) Citus Data, Inc.
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
|
|
#include "libpq-fe.h"
|
|
#include "postgres.h"
|
|
|
|
#include "distributed/colocation_utils.h"
|
|
#include "distributed/connection_management.h"
|
|
#include "distributed/master_metadata_utility.h"
|
|
#include "distributed/metadata_cache.h"
|
|
#include "distributed/multi_router_executor.h"
|
|
#include "distributed/multi_shard_transaction.h"
|
|
#include "distributed/placement_connection.h"
|
|
#include "distributed/shardinterval_utils.h"
|
|
#include "distributed/worker_manager.h"
|
|
#include "nodes/pg_list.h"
|
|
#include "storage/ipc.h"
|
|
#include "utils/memutils.h"
|
|
|
|
|
|
#define INITIAL_SHARD_CONNECTION_HASH_SIZE 128
|
|
|
|
|
|
/*
|
|
* OpenTransactionsForAllTasks opens a connection for each task,
|
|
* taking into account which shards are read and modified by the task
|
|
* to select the appopriate connection, or error out if no appropriate
|
|
* connection can be found. The set of connections is returned as an
|
|
* anchor shard ID -> ShardConnections hash.
|
|
*/
|
|
HTAB *
|
|
OpenTransactionsForAllTasks(List *taskList, int connectionFlags)
|
|
{
|
|
HTAB *shardConnectionHash = NULL;
|
|
ListCell *taskCell = NULL;
|
|
List *newConnectionList = NIL;
|
|
|
|
shardConnectionHash = CreateShardConnectionHash(CurrentMemoryContext);
|
|
|
|
connectionFlags |= CONNECTION_PER_PLACEMENT;
|
|
|
|
/* open connections to shards which don't have connections yet */
|
|
foreach(taskCell, taskList)
|
|
{
|
|
Task *task = (Task *) lfirst(taskCell);
|
|
ShardPlacementAccessType accessType = PLACEMENT_ACCESS_SELECT;
|
|
uint64 shardId = task->anchorShardId;
|
|
ShardConnections *shardConnections = NULL;
|
|
bool shardConnectionsFound = false;
|
|
List *shardPlacementList = NIL;
|
|
ListCell *placementCell = NULL;
|
|
|
|
shardConnections = GetShardHashConnections(shardConnectionHash, shardId,
|
|
&shardConnectionsFound);
|
|
if (shardConnectionsFound)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
shardPlacementList = FinalizedShardPlacementList(shardId);
|
|
if (shardPlacementList == NIL)
|
|
{
|
|
/* going to have to have some placements to do any work */
|
|
ereport(ERROR, (errmsg("could not find any shard placements for the shard "
|
|
UINT64_FORMAT, shardId)));
|
|
}
|
|
|
|
if (task->taskType == MODIFY_TASK)
|
|
{
|
|
accessType = PLACEMENT_ACCESS_DML;
|
|
}
|
|
else
|
|
{
|
|
/* can only open connections for DDL and DML commands */
|
|
Assert(task->taskType == DDL_TASK || VACUUM_ANALYZE_TASK);
|
|
|
|
accessType = PLACEMENT_ACCESS_DDL;
|
|
}
|
|
|
|
foreach(placementCell, shardPlacementList)
|
|
{
|
|
ShardPlacement *shardPlacement = (ShardPlacement *) lfirst(placementCell);
|
|
ShardPlacementAccess placementModification;
|
|
List *placementAccessList = NIL;
|
|
MultiConnection *connection = NULL;
|
|
|
|
WorkerNode *workerNode = FindWorkerNode(shardPlacement->nodeName,
|
|
shardPlacement->nodePort);
|
|
if (workerNode == NULL)
|
|
{
|
|
ereport(ERROR, (errmsg("could not find worker node %s:%d",
|
|
shardPlacement->nodeName,
|
|
shardPlacement->nodePort)));
|
|
}
|
|
|
|
/* add placement access for modification */
|
|
placementModification.placement = shardPlacement;
|
|
placementModification.accessType = accessType;
|
|
|
|
placementAccessList = lappend(placementAccessList, &placementModification);
|
|
|
|
if (accessType == PLACEMENT_ACCESS_DDL)
|
|
{
|
|
List *placementDDLList = BuildPlacementDDLList(shardPlacement->groupId,
|
|
task->relationShardList);
|
|
|
|
/*
|
|
* All relations appearing inter-shard DDL commands should be marked
|
|
* with DDL access.
|
|
*/
|
|
placementAccessList = list_concat(placementAccessList, placementDDLList);
|
|
}
|
|
else
|
|
{
|
|
List *placementSelectList =
|
|
BuildPlacementSelectList(shardPlacement->groupId,
|
|
task->relationShardList);
|
|
|
|
/* add additional placement accesses for subselects (e.g. INSERT .. SELECT) */
|
|
placementAccessList =
|
|
list_concat(placementAccessList, placementSelectList);
|
|
}
|
|
|
|
/*
|
|
* Find a connection that sees preceding writes and cannot self-deadlock,
|
|
* or error out if no such connection exists.
|
|
*/
|
|
connection = StartPlacementListConnection(connectionFlags,
|
|
placementAccessList, NULL);
|
|
|
|
ClaimConnectionExclusively(connection);
|
|
|
|
shardConnections->connectionList = lappend(shardConnections->connectionList,
|
|
connection);
|
|
|
|
newConnectionList = lappend(newConnectionList, connection);
|
|
|
|
/*
|
|
* Every individual failure should cause entire distributed
|
|
* transaction to fail.
|
|
*/
|
|
MarkRemoteTransactionCritical(connection);
|
|
}
|
|
}
|
|
|
|
/* finish connection establishment newly opened connections */
|
|
FinishConnectionListEstablishment(newConnectionList);
|
|
|
|
/* the special BARE mode (for e.g. VACUUM/ANALYZE) skips BEGIN */
|
|
if (MultiShardCommitProtocol > COMMIT_PROTOCOL_BARE)
|
|
{
|
|
RemoteTransactionsBeginIfNecessary(newConnectionList);
|
|
}
|
|
|
|
return shardConnectionHash;
|
|
}
|
|
|
|
|
|
/*
|
|
* CreateShardConnectionHash constructs a hash table which maps from shard
|
|
* identifier to connection lists, passing the provided MemoryContext to
|
|
* hash_create for hash allocations.
|
|
*/
|
|
HTAB *
|
|
CreateShardConnectionHash(MemoryContext memoryContext)
|
|
{
|
|
HTAB *shardConnectionsHash = NULL;
|
|
int hashFlags = 0;
|
|
HASHCTL info;
|
|
|
|
memset(&info, 0, sizeof(info));
|
|
info.keysize = sizeof(int64);
|
|
info.entrysize = sizeof(ShardConnections);
|
|
info.hcxt = memoryContext;
|
|
hashFlags = (HASH_ELEM | HASH_CONTEXT | HASH_BLOBS);
|
|
|
|
shardConnectionsHash = hash_create("Shard Connections Hash",
|
|
INITIAL_SHARD_CONNECTION_HASH_SIZE, &info,
|
|
hashFlags);
|
|
|
|
return shardConnectionsHash;
|
|
}
|
|
|
|
|
|
/*
|
|
* GetShardHashConnections finds existing connections for a shard in the
|
|
* provided hash. If not found, then a ShardConnections structure with empty
|
|
* connectionList is returned.
|
|
*/
|
|
ShardConnections *
|
|
GetShardHashConnections(HTAB *connectionHash, int64 shardId, bool *connectionsFound)
|
|
{
|
|
ShardConnections *shardConnections = NULL;
|
|
|
|
shardConnections = (ShardConnections *) hash_search(connectionHash, &shardId,
|
|
HASH_ENTER, connectionsFound);
|
|
if (!*connectionsFound)
|
|
{
|
|
shardConnections->shardId = shardId;
|
|
shardConnections->connectionList = NIL;
|
|
}
|
|
|
|
return shardConnections;
|
|
}
|
|
|
|
|
|
/*
|
|
* ShardConnectionList returns the list of ShardConnections in connectionHash.
|
|
*/
|
|
List *
|
|
ShardConnectionList(HTAB *connectionHash)
|
|
{
|
|
List *shardConnectionsList = NIL;
|
|
HASH_SEQ_STATUS status;
|
|
ShardConnections *shardConnections = NULL;
|
|
|
|
if (connectionHash == NULL)
|
|
{
|
|
return NIL;
|
|
}
|
|
|
|
hash_seq_init(&status, connectionHash);
|
|
|
|
shardConnections = (ShardConnections *) hash_seq_search(&status);
|
|
while (shardConnections != NULL)
|
|
{
|
|
shardConnectionsList = lappend(shardConnectionsList, shardConnections);
|
|
|
|
shardConnections = (ShardConnections *) hash_seq_search(&status);
|
|
}
|
|
|
|
return shardConnectionsList;
|
|
}
|
|
|
|
|
|
/*
|
|
* ResetShardPlacementTransactionState performs cleanup after the end of a
|
|
* transaction.
|
|
*/
|
|
void
|
|
ResetShardPlacementTransactionState(void)
|
|
{
|
|
if (MultiShardCommitProtocol == COMMIT_PROTOCOL_BARE)
|
|
{
|
|
MultiShardCommitProtocol = SavedMultiShardCommitProtocol;
|
|
SavedMultiShardCommitProtocol = COMMIT_PROTOCOL_BARE;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* UnclaimAllShardConnections unclaims all connections in the given
|
|
* shard connections hash after previously claiming them exclusively
|
|
* in OpenTransactionsToAllShardPlacements.
|
|
*/
|
|
void
|
|
UnclaimAllShardConnections(HTAB *shardConnectionHash)
|
|
{
|
|
HASH_SEQ_STATUS status;
|
|
ShardConnections *shardConnections = NULL;
|
|
|
|
hash_seq_init(&status, shardConnectionHash);
|
|
|
|
while ((shardConnections = hash_seq_search(&status)) != 0)
|
|
{
|
|
List *connectionList = shardConnections->connectionList;
|
|
ListCell *connectionCell = NULL;
|
|
|
|
foreach(connectionCell, connectionList)
|
|
{
|
|
MultiConnection *connection = (MultiConnection *) lfirst(connectionCell);
|
|
|
|
UnclaimConnection(connection);
|
|
}
|
|
}
|
|
}
|