citus/src/backend/distributed/transaction/multi_shard_transaction.c

286 lines
7.8 KiB
C

/*-------------------------------------------------------------------------
*
* multi_shard_transaction.c
* This file contains functions for managing 1PC or 2PC transactions
* across many shard placements.
*
* Copyright (c) Citus Data, Inc.
*
*-------------------------------------------------------------------------
*/
#include "libpq-fe.h"
#include "postgres.h"
#include "distributed/colocation_utils.h"
#include "distributed/connection_management.h"
#include "distributed/master_metadata_utility.h"
#include "distributed/metadata_cache.h"
#include "distributed/multi_router_executor.h"
#include "distributed/multi_shard_transaction.h"
#include "distributed/placement_connection.h"
#include "distributed/shardinterval_utils.h"
#include "distributed/worker_manager.h"
#include "nodes/pg_list.h"
#include "storage/ipc.h"
#include "utils/memutils.h"
#define INITIAL_SHARD_CONNECTION_HASH_SIZE 128
/*
* OpenTransactionsForAllTasks opens a connection for each task,
* taking into account which shards are read and modified by the task
* to select the appopriate connection, or error out if no appropriate
* connection can be found. The set of connections is returned as an
* anchor shard ID -> ShardConnections hash.
*/
HTAB *
OpenTransactionsForAllTasks(List *taskList, int connectionFlags)
{
HTAB *shardConnectionHash = NULL;
ListCell *taskCell = NULL;
List *newConnectionList = NIL;
shardConnectionHash = CreateShardConnectionHash(CurrentMemoryContext);
connectionFlags |= CONNECTION_PER_PLACEMENT;
/* open connections to shards which don't have connections yet */
foreach(taskCell, taskList)
{
Task *task = (Task *) lfirst(taskCell);
ShardPlacementAccessType accessType = PLACEMENT_ACCESS_SELECT;
uint64 shardId = task->anchorShardId;
ShardConnections *shardConnections = NULL;
bool shardConnectionsFound = false;
List *shardPlacementList = NIL;
ListCell *placementCell = NULL;
shardConnections = GetShardHashConnections(shardConnectionHash, shardId,
&shardConnectionsFound);
if (shardConnectionsFound)
{
continue;
}
shardPlacementList = FinalizedShardPlacementList(shardId);
if (shardPlacementList == NIL)
{
/* going to have to have some placements to do any work */
ereport(ERROR, (errmsg("could not find any shard placements for the shard "
UINT64_FORMAT, shardId)));
}
if (task->taskType == MODIFY_TASK)
{
accessType = PLACEMENT_ACCESS_DML;
}
else
{
/* can only open connections for DDL and DML commands */
Assert(task->taskType == DDL_TASK || VACUUM_ANALYZE_TASK);
accessType = PLACEMENT_ACCESS_DDL;
}
foreach(placementCell, shardPlacementList)
{
ShardPlacement *shardPlacement = (ShardPlacement *) lfirst(placementCell);
ShardPlacementAccess placementModification;
List *placementAccessList = NIL;
MultiConnection *connection = NULL;
WorkerNode *workerNode = FindWorkerNode(shardPlacement->nodeName,
shardPlacement->nodePort);
if (workerNode == NULL)
{
ereport(ERROR, (errmsg("could not find worker node %s:%d",
shardPlacement->nodeName,
shardPlacement->nodePort)));
}
/* add placement access for modification */
placementModification.placement = shardPlacement;
placementModification.accessType = accessType;
placementAccessList = lappend(placementAccessList, &placementModification);
if (accessType == PLACEMENT_ACCESS_DDL)
{
List *placementDDLList = BuildPlacementDDLList(shardPlacement->groupId,
task->relationShardList);
/*
* All relations appearing inter-shard DDL commands should be marked
* with DDL access.
*/
placementAccessList = list_concat(placementAccessList, placementDDLList);
}
else
{
List *placementSelectList =
BuildPlacementSelectList(shardPlacement->groupId,
task->relationShardList);
/* add additional placement accesses for subselects (e.g. INSERT .. SELECT) */
placementAccessList =
list_concat(placementAccessList, placementSelectList);
}
/*
* Find a connection that sees preceding writes and cannot self-deadlock,
* or error out if no such connection exists.
*/
connection = StartPlacementListConnection(connectionFlags,
placementAccessList, NULL);
ClaimConnectionExclusively(connection);
shardConnections->connectionList = lappend(shardConnections->connectionList,
connection);
newConnectionList = lappend(newConnectionList, connection);
/*
* Every individual failure should cause entire distributed
* transaction to fail.
*/
MarkRemoteTransactionCritical(connection);
}
}
/* finish connection establishment newly opened connections */
FinishConnectionListEstablishment(newConnectionList);
/* the special BARE mode (for e.g. VACUUM/ANALYZE) skips BEGIN */
if (MultiShardCommitProtocol > COMMIT_PROTOCOL_BARE)
{
RemoteTransactionsBeginIfNecessary(newConnectionList);
}
return shardConnectionHash;
}
/*
* CreateShardConnectionHash constructs a hash table which maps from shard
* identifier to connection lists, passing the provided MemoryContext to
* hash_create for hash allocations.
*/
HTAB *
CreateShardConnectionHash(MemoryContext memoryContext)
{
HTAB *shardConnectionsHash = NULL;
int hashFlags = 0;
HASHCTL info;
memset(&info, 0, sizeof(info));
info.keysize = sizeof(int64);
info.entrysize = sizeof(ShardConnections);
info.hcxt = memoryContext;
hashFlags = (HASH_ELEM | HASH_CONTEXT | HASH_BLOBS);
shardConnectionsHash = hash_create("Shard Connections Hash",
INITIAL_SHARD_CONNECTION_HASH_SIZE, &info,
hashFlags);
return shardConnectionsHash;
}
/*
* GetShardHashConnections finds existing connections for a shard in the
* provided hash. If not found, then a ShardConnections structure with empty
* connectionList is returned.
*/
ShardConnections *
GetShardHashConnections(HTAB *connectionHash, int64 shardId, bool *connectionsFound)
{
ShardConnections *shardConnections = NULL;
shardConnections = (ShardConnections *) hash_search(connectionHash, &shardId,
HASH_ENTER, connectionsFound);
if (!*connectionsFound)
{
shardConnections->shardId = shardId;
shardConnections->connectionList = NIL;
}
return shardConnections;
}
/*
* ShardConnectionList returns the list of ShardConnections in connectionHash.
*/
List *
ShardConnectionList(HTAB *connectionHash)
{
List *shardConnectionsList = NIL;
HASH_SEQ_STATUS status;
ShardConnections *shardConnections = NULL;
if (connectionHash == NULL)
{
return NIL;
}
hash_seq_init(&status, connectionHash);
shardConnections = (ShardConnections *) hash_seq_search(&status);
while (shardConnections != NULL)
{
shardConnectionsList = lappend(shardConnectionsList, shardConnections);
shardConnections = (ShardConnections *) hash_seq_search(&status);
}
return shardConnectionsList;
}
/*
* ResetShardPlacementTransactionState performs cleanup after the end of a
* transaction.
*/
void
ResetShardPlacementTransactionState(void)
{
if (MultiShardCommitProtocol == COMMIT_PROTOCOL_BARE)
{
MultiShardCommitProtocol = SavedMultiShardCommitProtocol;
SavedMultiShardCommitProtocol = COMMIT_PROTOCOL_BARE;
}
}
/*
* UnclaimAllShardConnections unclaims all connections in the given
* shard connections hash after previously claiming them exclusively
* in OpenTransactionsToAllShardPlacements.
*/
void
UnclaimAllShardConnections(HTAB *shardConnectionHash)
{
HASH_SEQ_STATUS status;
ShardConnections *shardConnections = NULL;
hash_seq_init(&status, shardConnectionHash);
while ((shardConnections = hash_seq_search(&status)) != 0)
{
List *connectionList = shardConnections->connectionList;
ListCell *connectionCell = NULL;
foreach(connectionCell, connectionList)
{
MultiConnection *connection = (MultiConnection *) lfirst(connectionCell);
UnclaimConnection(connection);
}
}
}