mirror of https://github.com/citusdata/citus.git
1438 lines
41 KiB
C
1438 lines
41 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* remote_transaction.c
|
|
* Management of transaction spanning more than one node.
|
|
*
|
|
* Copyright (c) 2016, Citus Data, Inc.
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
#include "libpq-fe.h"
|
|
|
|
#include "miscadmin.h"
|
|
|
|
#include "access/xact.h"
|
|
#include "distributed/backend_data.h"
|
|
#include "distributed/connection_management.h"
|
|
#include "distributed/metadata_cache.h"
|
|
#include "distributed/remote_commands.h"
|
|
#include "distributed/remote_transaction.h"
|
|
#include "distributed/transaction_identifier.h"
|
|
#include "distributed/transaction_management.h"
|
|
#include "distributed/transaction_recovery.h"
|
|
#include "distributed/worker_manager.h"
|
|
#include "utils/builtins.h"
|
|
#include "utils/hsearch.h"
|
|
|
|
|
|
#define PREPARED_TRANSACTION_NAME_FORMAT "citus_%u_%u_"UINT64_FORMAT "_%u"
|
|
|
|
|
|
static void StartRemoteTransactionSavepointBegin(MultiConnection *connection,
|
|
SubTransactionId subId);
|
|
static void FinishRemoteTransactionSavepointBegin(MultiConnection *connection,
|
|
SubTransactionId subId);
|
|
static void StartRemoteTransactionSavepointRelease(MultiConnection *connection,
|
|
SubTransactionId subId);
|
|
static void FinishRemoteTransactionSavepointRelease(MultiConnection *connection,
|
|
SubTransactionId subId);
|
|
static void StartRemoteTransactionSavepointRollback(MultiConnection *connection,
|
|
SubTransactionId subId);
|
|
static void FinishRemoteTransactionSavepointRollback(MultiConnection *connection,
|
|
SubTransactionId subId);
|
|
|
|
static void Assign2PCIdentifier(MultiConnection *connection);
|
|
static void WarnAboutLeakedPreparedTransaction(MultiConnection *connection, bool commit);
|
|
|
|
|
|
/*
|
|
* StartRemoteTransactionBeging initiates beginning the remote transaction in
|
|
* a non-blocking manner. The function sends "BEGIN" followed by
|
|
* assign_distributed_transaction_id() to assign the distributed transaction
|
|
* id on the remote node.
|
|
*/
|
|
void
|
|
StartRemoteTransactionBegin(struct MultiConnection *connection)
|
|
{
|
|
RemoteTransaction *transaction = &connection->remoteTransaction;
|
|
StringInfo beginAndSetDistributedTransactionId = makeStringInfo();
|
|
DistributedTransactionId *distributedTransactionId = NULL;
|
|
ListCell *subIdCell = NULL;
|
|
List *activeSubXacts = NIL;
|
|
const char *timestamp = NULL;
|
|
|
|
Assert(transaction->transactionState == REMOTE_TRANS_INVALID);
|
|
|
|
/* remember transaction as being in-progress */
|
|
dlist_push_tail(&InProgressTransactions, &connection->transactionNode);
|
|
|
|
transaction->transactionState = REMOTE_TRANS_STARTING;
|
|
|
|
/*
|
|
* Explicitly specify READ COMMITTED, the default on the remote
|
|
* side might have been changed, and that would cause problematic
|
|
* behaviour.
|
|
*/
|
|
appendStringInfoString(beginAndSetDistributedTransactionId,
|
|
"BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;");
|
|
|
|
/*
|
|
* Append BEGIN and assign_distributed_transaction_id() statements into a single command
|
|
* and send both in one step. The reason is purely performance, we don't want
|
|
* seperate roundtrips for these two statements.
|
|
*/
|
|
distributedTransactionId = GetCurrentDistributedTransactionId();
|
|
timestamp = timestamptz_to_str(distributedTransactionId->timestamp);
|
|
appendStringInfo(beginAndSetDistributedTransactionId,
|
|
"SELECT assign_distributed_transaction_id(%d, " UINT64_FORMAT
|
|
", '%s');",
|
|
distributedTransactionId->initiatorNodeIdentifier,
|
|
distributedTransactionId->transactionNumber,
|
|
timestamp);
|
|
|
|
/* append in-progress savepoints for this transaction */
|
|
activeSubXacts = ActiveSubXacts();
|
|
transaction->lastSuccessfulSubXact = TopSubTransactionId;
|
|
transaction->lastQueuedSubXact = TopSubTransactionId;
|
|
foreach(subIdCell, activeSubXacts)
|
|
{
|
|
SubTransactionId subId = lfirst_int(subIdCell);
|
|
appendStringInfo(beginAndSetDistributedTransactionId,
|
|
"SAVEPOINT savepoint_%u;", subId);
|
|
transaction->lastQueuedSubXact = subId;
|
|
}
|
|
|
|
if (!SendRemoteCommand(connection, beginAndSetDistributedTransactionId->data))
|
|
{
|
|
const bool raiseErrors = true;
|
|
|
|
HandleRemoteTransactionConnectionError(connection, raiseErrors);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* FinishRemoteTransactionBegin finishes the work StartRemoteTransactionBegin
|
|
* initiated. It blocks if necessary (i.e. if PQisBusy() would return true).
|
|
*/
|
|
void
|
|
FinishRemoteTransactionBegin(struct MultiConnection *connection)
|
|
{
|
|
RemoteTransaction *transaction = &connection->remoteTransaction;
|
|
bool clearSuccessful = true;
|
|
bool raiseErrors = true;
|
|
|
|
Assert(transaction->transactionState == REMOTE_TRANS_STARTING);
|
|
|
|
clearSuccessful = ClearResults(connection, raiseErrors);
|
|
if (clearSuccessful)
|
|
{
|
|
transaction->transactionState = REMOTE_TRANS_STARTED;
|
|
transaction->lastSuccessfulSubXact = transaction->lastQueuedSubXact;
|
|
}
|
|
|
|
if (!transaction->transactionFailed)
|
|
{
|
|
Assert(PQtransactionStatus(connection->pgConn) == PQTRANS_INTRANS);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* RemoteTransactionBegin begins a remote transaction in a blocking manner.
|
|
*/
|
|
void
|
|
RemoteTransactionBegin(struct MultiConnection *connection)
|
|
{
|
|
StartRemoteTransactionBegin(connection);
|
|
FinishRemoteTransactionBegin(connection);
|
|
}
|
|
|
|
|
|
/*
|
|
* RemoteTransactionListBegin sends BEGIN over all connections in the
|
|
* given connection list and waits for all of them to finish.
|
|
*/
|
|
void
|
|
RemoteTransactionListBegin(List *connectionList)
|
|
{
|
|
ListCell *connectionCell = NULL;
|
|
|
|
/* send BEGIN to all nodes */
|
|
foreach(connectionCell, connectionList)
|
|
{
|
|
MultiConnection *connection = (MultiConnection *) lfirst(connectionCell);
|
|
|
|
StartRemoteTransactionBegin(connection);
|
|
}
|
|
|
|
/* wait for BEGIN to finish on all nodes */
|
|
foreach(connectionCell, connectionList)
|
|
{
|
|
MultiConnection *connection = (MultiConnection *) lfirst(connectionCell);
|
|
|
|
FinishRemoteTransactionBegin(connection);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* StartRemoteTransactionCommit initiates transaction commit in a non-blocking
|
|
* manner. If the transaction is in a failed state, it'll instead get rolled
|
|
* back.
|
|
*/
|
|
void
|
|
StartRemoteTransactionCommit(MultiConnection *connection)
|
|
{
|
|
RemoteTransaction *transaction = &connection->remoteTransaction;
|
|
const bool raiseErrors = false;
|
|
const bool isCommit = true;
|
|
|
|
/* can only commit if transaction is in progress */
|
|
Assert(transaction->transactionState != REMOTE_TRANS_INVALID);
|
|
|
|
/* can't commit if we already started to commit or abort */
|
|
Assert(transaction->transactionState < REMOTE_TRANS_1PC_ABORTING);
|
|
|
|
if (transaction->transactionFailed)
|
|
{
|
|
/* abort the transaction if it failed */
|
|
transaction->transactionState = REMOTE_TRANS_1PC_ABORTING;
|
|
|
|
/*
|
|
* Try sending an ROLLBACK; Depending on the state that won't
|
|
* succeed, but let's try. Have to clear previous results
|
|
* first.
|
|
*/
|
|
ForgetResults(connection); /* try to clear pending stuff */
|
|
if (!SendRemoteCommand(connection, "ROLLBACK"))
|
|
{
|
|
/* no point in reporting a likely redundant message */
|
|
}
|
|
}
|
|
else if (transaction->transactionState == REMOTE_TRANS_PREPARED)
|
|
{
|
|
/* commit the prepared transaction */
|
|
StringInfoData command;
|
|
|
|
initStringInfo(&command);
|
|
appendStringInfo(&command, "COMMIT PREPARED '%s'",
|
|
transaction->preparedName);
|
|
|
|
transaction->transactionState = REMOTE_TRANS_2PC_COMMITTING;
|
|
|
|
if (!SendRemoteCommand(connection, command.data))
|
|
{
|
|
HandleRemoteTransactionConnectionError(connection, raiseErrors);
|
|
|
|
WarnAboutLeakedPreparedTransaction(connection, isCommit);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* initiate remote transaction commit */
|
|
transaction->transactionState = REMOTE_TRANS_1PC_COMMITTING;
|
|
|
|
if (!SendRemoteCommand(connection, "COMMIT"))
|
|
{
|
|
/*
|
|
* For a moment there I thought we were in trouble.
|
|
*
|
|
* Failing in this state means that we don't know whether the the
|
|
* commit has succeeded.
|
|
*/
|
|
HandleRemoteTransactionConnectionError(connection, raiseErrors);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* FinishRemoteTransactionCommit finishes the work
|
|
* StartRemoteTransactionCommit initiated. It blocks if necessary (i.e. if
|
|
* PQisBusy() would return true).
|
|
*/
|
|
void
|
|
FinishRemoteTransactionCommit(MultiConnection *connection)
|
|
{
|
|
RemoteTransaction *transaction = &connection->remoteTransaction;
|
|
PGresult *result = NULL;
|
|
const bool raiseErrors = false;
|
|
const bool isCommit = true;
|
|
|
|
Assert(transaction->transactionState == REMOTE_TRANS_1PC_ABORTING ||
|
|
transaction->transactionState == REMOTE_TRANS_1PC_COMMITTING ||
|
|
transaction->transactionState == REMOTE_TRANS_2PC_COMMITTING);
|
|
|
|
result = GetRemoteCommandResult(connection, raiseErrors);
|
|
|
|
if (!IsResponseOK(result))
|
|
{
|
|
HandleRemoteTransactionResultError(connection, result, raiseErrors);
|
|
|
|
/*
|
|
* Failing in this state means that we will often not know whether
|
|
* the the commit has succeeded (particularly in case of network
|
|
* troubles).
|
|
*
|
|
* XXX: It might be worthwhile to discern cases where we got a
|
|
* proper error back from postgres (i.e. COMMIT was received but
|
|
* produced an error) from cases where the connection failed
|
|
* before getting a reply.
|
|
*/
|
|
|
|
if (transaction->transactionState == REMOTE_TRANS_1PC_COMMITTING)
|
|
{
|
|
if (transaction->transactionCritical)
|
|
{
|
|
ereport(WARNING, (errmsg("failed to commit critical transaction "
|
|
"on %s:%d, metadata is likely out of sync",
|
|
connection->hostname, connection->port)));
|
|
}
|
|
else
|
|
{
|
|
ereport(WARNING, (errmsg("failed to commit transaction on %s:%d",
|
|
connection->hostname, connection->port)));
|
|
}
|
|
}
|
|
else if (transaction->transactionState == REMOTE_TRANS_2PC_COMMITTING)
|
|
{
|
|
ereport(WARNING, (errmsg("failed to commit transaction on %s:%d",
|
|
connection->hostname, connection->port)));
|
|
WarnAboutLeakedPreparedTransaction(connection, isCommit);
|
|
}
|
|
}
|
|
else if (transaction->transactionState == REMOTE_TRANS_1PC_ABORTING ||
|
|
transaction->transactionState == REMOTE_TRANS_2PC_ABORTING)
|
|
{
|
|
transaction->transactionState = REMOTE_TRANS_ABORTED;
|
|
}
|
|
else
|
|
{
|
|
transaction->transactionState = REMOTE_TRANS_COMMITTED;
|
|
}
|
|
|
|
PQclear(result);
|
|
|
|
ForgetResults(connection);
|
|
}
|
|
|
|
|
|
/*
|
|
* RemoteTransactionCommit commits (or aborts, if the transaction failed) a
|
|
* remote transaction in a blocking manner.
|
|
*/
|
|
void
|
|
RemoteTransactionCommit(MultiConnection *connection)
|
|
{
|
|
StartRemoteTransactionCommit(connection);
|
|
FinishRemoteTransactionCommit(connection);
|
|
}
|
|
|
|
|
|
/*
|
|
* StartRemoteTransactionAbort initiates abortin the transaction in a
|
|
* non-blocking manner.
|
|
*/
|
|
void
|
|
StartRemoteTransactionAbort(MultiConnection *connection)
|
|
{
|
|
RemoteTransaction *transaction = &connection->remoteTransaction;
|
|
const bool raiseErrors = false;
|
|
const bool isNotCommit = false;
|
|
|
|
Assert(transaction->transactionState != REMOTE_TRANS_INVALID);
|
|
|
|
/*
|
|
* Clear previous results, so we have a better chance to send ROLLBACK
|
|
* [PREPARED]. If we've previously sent a PREPARE TRANSACTION, we always
|
|
* want to wait for that result, as that shouldn't take long and will
|
|
* reserve resources. But if there's another query running, we don't want
|
|
* to wait, because a longrunning statement may be running, force it to be
|
|
* killed in that case.
|
|
*/
|
|
if (transaction->transactionState == REMOTE_TRANS_PREPARING ||
|
|
transaction->transactionState == REMOTE_TRANS_PREPARED)
|
|
{
|
|
StringInfoData command;
|
|
|
|
/* await PREPARE TRANSACTION results, closing the connection would leave it dangling */
|
|
ForgetResults(connection);
|
|
|
|
initStringInfo(&command);
|
|
appendStringInfo(&command, "ROLLBACK PREPARED '%s'",
|
|
transaction->preparedName);
|
|
|
|
if (!SendRemoteCommand(connection, command.data))
|
|
{
|
|
HandleRemoteTransactionConnectionError(connection, raiseErrors);
|
|
|
|
WarnAboutLeakedPreparedTransaction(connection, isNotCommit);
|
|
}
|
|
else
|
|
{
|
|
transaction->transactionState = REMOTE_TRANS_2PC_ABORTING;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* In case of a cancellation, the connection might still be working
|
|
* on some commands. Try to consume the results such that the
|
|
* connection can be reused, but do not want to wait for commands
|
|
* to finish. Instead we just close the connection if the command
|
|
* is still busy.
|
|
*/
|
|
if (!ClearResultsIfReady(connection))
|
|
{
|
|
ShutdownConnection(connection);
|
|
|
|
/* FinishRemoteTransactionAbort will emit warning */
|
|
return;
|
|
}
|
|
|
|
if (!SendRemoteCommand(connection, "ROLLBACK"))
|
|
{
|
|
/* no point in reporting a likely redundant message */
|
|
MarkRemoteTransactionFailed(connection, raiseErrors);
|
|
}
|
|
else
|
|
{
|
|
transaction->transactionState = REMOTE_TRANS_1PC_ABORTING;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* FinishRemoteTransactionAbort finishes the work StartRemoteTransactionAbort
|
|
* initiated. It blocks if necessary (i.e. if PQisBusy() would return true).
|
|
*/
|
|
void
|
|
FinishRemoteTransactionAbort(MultiConnection *connection)
|
|
{
|
|
RemoteTransaction *transaction = &connection->remoteTransaction;
|
|
const bool raiseErrors = false;
|
|
|
|
if (transaction->transactionState == REMOTE_TRANS_2PC_ABORTING)
|
|
{
|
|
PGresult *result = GetRemoteCommandResult(connection, raiseErrors);
|
|
if (!IsResponseOK(result))
|
|
{
|
|
const bool isCommit = false;
|
|
|
|
HandleRemoteTransactionResultError(connection, result, raiseErrors);
|
|
|
|
WarnAboutLeakedPreparedTransaction(connection, isCommit);
|
|
}
|
|
|
|
PQclear(result);
|
|
}
|
|
|
|
/*
|
|
* Try to consume results of any in-progress commands. In the 1PC case
|
|
* this is also where we consume the result of the ROLLBACK.
|
|
*
|
|
* If we don't succeed the connection will be in a bad state, so we close it.
|
|
*/
|
|
if (!ClearResults(connection, raiseErrors))
|
|
{
|
|
ShutdownConnection(connection);
|
|
}
|
|
|
|
transaction->transactionState = REMOTE_TRANS_ABORTED;
|
|
}
|
|
|
|
|
|
/*
|
|
* RemoteTransactionAbort aborts a remote transaction in a blocking manner.
|
|
*/
|
|
void
|
|
RemoteTransactionAbort(MultiConnection *connection)
|
|
{
|
|
StartRemoteTransactionAbort(connection);
|
|
FinishRemoteTransactionAbort(connection);
|
|
}
|
|
|
|
|
|
/*
|
|
* StartRemoteTransactionPrepare initiates preparing the transaction in a
|
|
* non-blocking manner.
|
|
*/
|
|
void
|
|
StartRemoteTransactionPrepare(struct MultiConnection *connection)
|
|
{
|
|
RemoteTransaction *transaction = &connection->remoteTransaction;
|
|
StringInfoData command;
|
|
const bool raiseErrors = true;
|
|
WorkerNode *workerNode = NULL;
|
|
|
|
/* can't prepare a nonexistant transaction */
|
|
Assert(transaction->transactionState != REMOTE_TRANS_INVALID);
|
|
|
|
/* can't prepare in a failed transaction */
|
|
Assert(!transaction->transactionFailed);
|
|
|
|
/* can't prepare if already started to prepare/abort/commit */
|
|
Assert(transaction->transactionState < REMOTE_TRANS_PREPARING);
|
|
|
|
Assign2PCIdentifier(connection);
|
|
|
|
/* log transactions to workers in pg_dist_transaction */
|
|
workerNode = FindWorkerNode(connection->hostname, connection->port);
|
|
if (workerNode != NULL)
|
|
{
|
|
LogTransactionRecord(workerNode->groupId, transaction->preparedName);
|
|
}
|
|
|
|
initStringInfo(&command);
|
|
appendStringInfo(&command, "PREPARE TRANSACTION '%s'",
|
|
transaction->preparedName);
|
|
|
|
if (!SendRemoteCommand(connection, command.data))
|
|
{
|
|
HandleRemoteTransactionConnectionError(connection, raiseErrors);
|
|
}
|
|
else
|
|
{
|
|
transaction->transactionState = REMOTE_TRANS_PREPARING;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* FinishRemoteTransactionPrepare finishes the work
|
|
* StartRemoteTransactionPrepare initiated. It blocks if necessary (i.e. if
|
|
* PQisBusy() would return true).
|
|
*/
|
|
void
|
|
FinishRemoteTransactionPrepare(struct MultiConnection *connection)
|
|
{
|
|
RemoteTransaction *transaction = &connection->remoteTransaction;
|
|
PGresult *result = NULL;
|
|
const bool raiseErrors = true;
|
|
|
|
Assert(transaction->transactionState == REMOTE_TRANS_PREPARING);
|
|
|
|
result = GetRemoteCommandResult(connection, raiseErrors);
|
|
|
|
if (!IsResponseOK(result))
|
|
{
|
|
transaction->transactionState = REMOTE_TRANS_ABORTED;
|
|
HandleRemoteTransactionResultError(connection, result, raiseErrors);
|
|
}
|
|
else
|
|
{
|
|
transaction->transactionState = REMOTE_TRANS_PREPARED;
|
|
}
|
|
|
|
PQclear(result);
|
|
|
|
/*
|
|
* Try to consume results of PREPARE TRANSACTION command. If we don't
|
|
* succeed, rollback the transaction. Note that we've not committed on
|
|
* any node yet, and we're not sure about the state of the worker node.
|
|
* So rollbacking seems to be the safest action if the worker is
|
|
* in a state where it can actually rollback.
|
|
*/
|
|
if (!ClearResults(connection, raiseErrors))
|
|
{
|
|
ereport(ERROR, (errmsg("failed to prepare transaction '%s' on host %s:%d",
|
|
transaction->preparedName, connection->hostname,
|
|
connection->port),
|
|
errhint("Try re-running the command.")));
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* RemoteTransactionPrepare prepares a remote transaction in a blocking
|
|
* manner.
|
|
*/
|
|
void
|
|
RemoteTransactionPrepare(struct MultiConnection *connection)
|
|
{
|
|
StartRemoteTransactionPrepare(connection);
|
|
FinishRemoteTransactionPrepare(connection);
|
|
}
|
|
|
|
|
|
/*
|
|
* RemoteTransactionBeginIfNecessary is a convenience wrapper around
|
|
* RemoteTransactionsBeginIfNecessary(), for a single connection.
|
|
*/
|
|
void
|
|
RemoteTransactionBeginIfNecessary(MultiConnection *connection)
|
|
{
|
|
/* just delegate */
|
|
if (InCoordinatedTransaction())
|
|
{
|
|
List *connectionList = list_make1(connection);
|
|
|
|
RemoteTransactionsBeginIfNecessary(connectionList);
|
|
list_free(connectionList);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* RemoteTransactionsBeginIfNecessary begins, if necessary according to this
|
|
* session's coordinated transaction state, and the remote transaction's
|
|
* state, an explicit transaction on all the connections. This is done in
|
|
* parallel, to lessen latency penalties.
|
|
*/
|
|
void
|
|
RemoteTransactionsBeginIfNecessary(List *connectionList)
|
|
{
|
|
ListCell *connectionCell = NULL;
|
|
bool raiseInterrupts = true;
|
|
|
|
/*
|
|
* Don't do anything if not in a coordinated transaction. That allows the
|
|
* same code to work both in situations that uses transactions, and when
|
|
* not.
|
|
*/
|
|
if (!InCoordinatedTransaction())
|
|
{
|
|
return;
|
|
}
|
|
|
|
/* issue BEGIN to all connections needing it */
|
|
foreach(connectionCell, connectionList)
|
|
{
|
|
MultiConnection *connection = (MultiConnection *) lfirst(connectionCell);
|
|
RemoteTransaction *transaction = &connection->remoteTransaction;
|
|
|
|
/* can't send BEGIN if a command already is in progress */
|
|
Assert(PQtransactionStatus(connection->pgConn) != PQTRANS_ACTIVE);
|
|
|
|
/*
|
|
* If a transaction already is in progress (including having failed),
|
|
* don't start it again. Thats quite normal if a piece of code allows
|
|
* cached connections.
|
|
*/
|
|
if (transaction->transactionState != REMOTE_TRANS_INVALID)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
StartRemoteTransactionBegin(connection);
|
|
}
|
|
|
|
raiseInterrupts = true;
|
|
WaitForAllConnections(connectionList, raiseInterrupts);
|
|
|
|
/* get result of all the BEGINs */
|
|
foreach(connectionCell, connectionList)
|
|
{
|
|
MultiConnection *connection = (MultiConnection *) lfirst(connectionCell);
|
|
RemoteTransaction *transaction = &connection->remoteTransaction;
|
|
|
|
/*
|
|
* Only handle BEGIN results on connections that are in process of
|
|
* starting a transaction, and haven't already failed (e.g. by not
|
|
* being able to send BEGIN due to a network failure).
|
|
*/
|
|
if (transaction->transactionFailed ||
|
|
transaction->transactionState != REMOTE_TRANS_STARTING)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
FinishRemoteTransactionBegin(connection);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* HandleRemoteTransactionConnectionError records a transaction as having failed
|
|
* and throws a connection error if the transaction was critical and raiseErrors
|
|
* is true, or a warning otherwise.
|
|
*/
|
|
void
|
|
HandleRemoteTransactionConnectionError(MultiConnection *connection, bool raiseErrors)
|
|
{
|
|
RemoteTransaction *transaction = &connection->remoteTransaction;
|
|
|
|
transaction->transactionFailed = true;
|
|
|
|
if (transaction->transactionCritical && raiseErrors)
|
|
{
|
|
ReportConnectionError(connection, ERROR);
|
|
}
|
|
else
|
|
{
|
|
ReportConnectionError(connection, WARNING);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* HandleRemoteTransactionResultError records a transaction as having failed
|
|
* and throws a result error if the transaction was critical and raiseErrors
|
|
* is true, or a warning otherwise.
|
|
*/
|
|
void
|
|
HandleRemoteTransactionResultError(MultiConnection *connection, PGresult *result, bool
|
|
raiseErrors)
|
|
{
|
|
RemoteTransaction *transaction = &connection->remoteTransaction;
|
|
|
|
transaction->transactionFailed = true;
|
|
|
|
if (transaction->transactionCritical && raiseErrors)
|
|
{
|
|
ReportResultError(connection, result, ERROR);
|
|
}
|
|
else
|
|
{
|
|
ReportResultError(connection, result, WARNING);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* MarkRemoteTransactionFailed records a transaction as having failed.
|
|
*
|
|
* If the connection is marked as critical, and allowErrorPromotion is true,
|
|
* this routine will ERROR out. The allowErrorPromotion case is primarily
|
|
* required for the transaction management code itself. Usually it is helpful
|
|
* to fail as soon as possible. If !allowErrorPromotion transaction commit
|
|
* will instead issue an error before committing on any node.
|
|
*/
|
|
void
|
|
MarkRemoteTransactionFailed(MultiConnection *connection, bool allowErrorPromotion)
|
|
{
|
|
RemoteTransaction *transaction = &connection->remoteTransaction;
|
|
|
|
transaction->transactionFailed = true;
|
|
|
|
/*
|
|
* If the connection is marked as critical, fail the entire coordinated
|
|
* transaction. If allowed.
|
|
*/
|
|
if (transaction->transactionCritical && allowErrorPromotion)
|
|
{
|
|
ereport(ERROR, (errmsg("failure on connection marked as essential: %s:%d",
|
|
connection->hostname, connection->port)));
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* MarkRemoteTransactionCritical signals that failures on this remote
|
|
* transaction should fail the entire coordinated transaction.
|
|
*/
|
|
void
|
|
MarkRemoteTransactionCritical(struct MultiConnection *connection)
|
|
{
|
|
RemoteTransaction *transaction = &connection->remoteTransaction;
|
|
|
|
transaction->transactionCritical = true;
|
|
}
|
|
|
|
|
|
/*
|
|
* IsRemoteTransactionCritical returns whether the remote transaction on
|
|
* the given connection has been marked as critical.
|
|
*/
|
|
bool
|
|
IsRemoteTransactionCritical(struct MultiConnection *connection)
|
|
{
|
|
RemoteTransaction *transaction = &connection->remoteTransaction;
|
|
|
|
return transaction->transactionCritical;
|
|
}
|
|
|
|
|
|
/*
|
|
* CloseRemoteTransaction handles closing a connection that, potentially, is
|
|
* part of a coordinated transaction. This should only ever be called from
|
|
* connection_management.c, while closing a connection during a transaction.
|
|
*/
|
|
void
|
|
CloseRemoteTransaction(struct MultiConnection *connection)
|
|
{
|
|
RemoteTransaction *transaction = &connection->remoteTransaction;
|
|
|
|
/* unlink from list of open transactions, if necessary */
|
|
if (transaction->transactionState != REMOTE_TRANS_INVALID)
|
|
{
|
|
/* XXX: Should we error out for a critical transaction? */
|
|
|
|
dlist_delete(&connection->transactionNode);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* ResetRemoteTransaction resets the state of the transaction after the end of
|
|
* the main transaction, if the connection is being reused.
|
|
*/
|
|
void
|
|
ResetRemoteTransaction(struct MultiConnection *connection)
|
|
{
|
|
RemoteTransaction *transaction = &connection->remoteTransaction;
|
|
|
|
/* just reset the entire state, relying on 0 being invalid/false */
|
|
memset(transaction, 0, sizeof(*transaction));
|
|
}
|
|
|
|
|
|
/*
|
|
* CoordinatedRemoteTransactionsPrepare PREPAREs a 2PC transaction on all
|
|
* non-failed transactions participating in the coordinated transaction.
|
|
*/
|
|
void
|
|
CoordinatedRemoteTransactionsPrepare(void)
|
|
{
|
|
dlist_iter iter;
|
|
bool raiseInterrupts = false;
|
|
List *connectionList = NIL;
|
|
|
|
/* issue PREPARE TRANSACTION; to all relevant remote nodes */
|
|
|
|
/* asynchronously send PREPARE */
|
|
dlist_foreach(iter, &InProgressTransactions)
|
|
{
|
|
MultiConnection *connection = dlist_container(MultiConnection, transactionNode,
|
|
iter.cur);
|
|
RemoteTransaction *transaction = &connection->remoteTransaction;
|
|
|
|
Assert(transaction->transactionState != REMOTE_TRANS_INVALID);
|
|
|
|
/* can't PREPARE a transaction that failed */
|
|
if (transaction->transactionFailed)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
StartRemoteTransactionPrepare(connection);
|
|
connectionList = lappend(connectionList, connection);
|
|
}
|
|
|
|
raiseInterrupts = true;
|
|
WaitForAllConnections(connectionList, raiseInterrupts);
|
|
|
|
/* Wait for result */
|
|
dlist_foreach(iter, &InProgressTransactions)
|
|
{
|
|
MultiConnection *connection = dlist_container(MultiConnection, transactionNode,
|
|
iter.cur);
|
|
RemoteTransaction *transaction = &connection->remoteTransaction;
|
|
|
|
if (transaction->transactionState != REMOTE_TRANS_PREPARING)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
FinishRemoteTransactionPrepare(connection);
|
|
}
|
|
|
|
CurrentCoordinatedTransactionState = COORD_TRANS_PREPARED;
|
|
}
|
|
|
|
|
|
/*
|
|
* CoordinatedRemoteTransactionsCommit performs distributed transactions
|
|
* handling at commit time. This will be called at XACT_EVENT_PRE_COMMIT if
|
|
* 1PC commits are used - so shards can still be invalidated - and at
|
|
* XACT_EVENT_COMMIT if 2PC is being used.
|
|
*
|
|
* Note that this routine has to issue rollbacks for failed transactions.
|
|
*/
|
|
void
|
|
CoordinatedRemoteTransactionsCommit(void)
|
|
{
|
|
dlist_iter iter;
|
|
List *connectionList = NIL;
|
|
bool raiseInterrupts = false;
|
|
|
|
/*
|
|
* Issue appropriate transaction commands to remote nodes. If everything
|
|
* went well that's going to be COMMIT or COMMIT PREPARED, if individual
|
|
* connections had errors, some or all of them might require a ROLLBACK.
|
|
*
|
|
* First send the command asynchronously over all connections.
|
|
*/
|
|
dlist_foreach(iter, &InProgressTransactions)
|
|
{
|
|
MultiConnection *connection = dlist_container(MultiConnection, transactionNode,
|
|
iter.cur);
|
|
RemoteTransaction *transaction = &connection->remoteTransaction;
|
|
|
|
if (transaction->transactionState == REMOTE_TRANS_INVALID ||
|
|
transaction->transactionState == REMOTE_TRANS_1PC_COMMITTING ||
|
|
transaction->transactionState == REMOTE_TRANS_2PC_COMMITTING ||
|
|
transaction->transactionState == REMOTE_TRANS_COMMITTED ||
|
|
transaction->transactionState == REMOTE_TRANS_ABORTED)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
StartRemoteTransactionCommit(connection);
|
|
connectionList = lappend(connectionList, connection);
|
|
}
|
|
|
|
raiseInterrupts = false;
|
|
WaitForAllConnections(connectionList, raiseInterrupts);
|
|
|
|
/* wait for the replies to the commands to come in */
|
|
dlist_foreach(iter, &InProgressTransactions)
|
|
{
|
|
MultiConnection *connection = dlist_container(MultiConnection, transactionNode,
|
|
iter.cur);
|
|
RemoteTransaction *transaction = &connection->remoteTransaction;
|
|
|
|
/* nothing to do if not committing / aborting */
|
|
if (transaction->transactionState != REMOTE_TRANS_1PC_COMMITTING &&
|
|
transaction->transactionState != REMOTE_TRANS_2PC_COMMITTING &&
|
|
transaction->transactionState != REMOTE_TRANS_1PC_ABORTING &&
|
|
transaction->transactionState != REMOTE_TRANS_2PC_ABORTING)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
FinishRemoteTransactionCommit(connection);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* CoordinatedRemoteTransactionsAbort performs distributed transactions
|
|
* handling at abort time.
|
|
*
|
|
* This issues ROLLBACKS and ROLLBACK PREPARED depending on whether the remote
|
|
* transaction has been prepared or not.
|
|
*/
|
|
void
|
|
CoordinatedRemoteTransactionsAbort(void)
|
|
{
|
|
dlist_iter iter;
|
|
List *connectionList = NIL;
|
|
bool raiseInterrupts = false;
|
|
|
|
/* asynchronously send ROLLBACK [PREPARED] */
|
|
dlist_foreach(iter, &InProgressTransactions)
|
|
{
|
|
MultiConnection *connection = dlist_container(MultiConnection, transactionNode,
|
|
iter.cur);
|
|
RemoteTransaction *transaction = &connection->remoteTransaction;
|
|
|
|
if (transaction->transactionState == REMOTE_TRANS_INVALID ||
|
|
transaction->transactionState == REMOTE_TRANS_1PC_ABORTING ||
|
|
transaction->transactionState == REMOTE_TRANS_2PC_ABORTING ||
|
|
transaction->transactionState == REMOTE_TRANS_ABORTED)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
StartRemoteTransactionAbort(connection);
|
|
connectionList = lappend(connectionList, connection);
|
|
}
|
|
|
|
raiseInterrupts = false;
|
|
WaitForAllConnections(connectionList, raiseInterrupts);
|
|
|
|
/* and wait for the results */
|
|
dlist_foreach(iter, &InProgressTransactions)
|
|
{
|
|
MultiConnection *connection = dlist_container(MultiConnection, transactionNode,
|
|
iter.cur);
|
|
RemoteTransaction *transaction = &connection->remoteTransaction;
|
|
|
|
if (transaction->transactionState != REMOTE_TRANS_1PC_ABORTING &&
|
|
transaction->transactionState != REMOTE_TRANS_2PC_ABORTING)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
FinishRemoteTransactionAbort(connection);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* CoordinatedRemoteTransactionsSavepointBegin sends the SAVEPOINT command for
|
|
* the given sub-transaction id to all connections participating in the current
|
|
* transaction.
|
|
*/
|
|
void
|
|
CoordinatedRemoteTransactionsSavepointBegin(SubTransactionId subId)
|
|
{
|
|
dlist_iter iter;
|
|
const bool raiseInterrupts = true;
|
|
List *connectionList = NIL;
|
|
|
|
/* asynchronously send SAVEPOINT */
|
|
dlist_foreach(iter, &InProgressTransactions)
|
|
{
|
|
MultiConnection *connection = dlist_container(MultiConnection, transactionNode,
|
|
iter.cur);
|
|
RemoteTransaction *transaction = &connection->remoteTransaction;
|
|
if (transaction->transactionFailed)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
StartRemoteTransactionSavepointBegin(connection, subId);
|
|
connectionList = lappend(connectionList, connection);
|
|
}
|
|
|
|
WaitForAllConnections(connectionList, raiseInterrupts);
|
|
|
|
/* and wait for the results */
|
|
dlist_foreach(iter, &InProgressTransactions)
|
|
{
|
|
MultiConnection *connection = dlist_container(MultiConnection, transactionNode,
|
|
iter.cur);
|
|
RemoteTransaction *transaction = &connection->remoteTransaction;
|
|
if (transaction->transactionFailed)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
FinishRemoteTransactionSavepointBegin(connection, subId);
|
|
|
|
if (!transaction->transactionFailed)
|
|
{
|
|
transaction->lastSuccessfulSubXact = subId;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* CoordinatedRemoteTransactionsSavepointRelease sends the RELEASE SAVEPOINT
|
|
* command for the given sub-transaction id to all connections participating in
|
|
* the current transaction.
|
|
*/
|
|
void
|
|
CoordinatedRemoteTransactionsSavepointRelease(SubTransactionId subId)
|
|
{
|
|
dlist_iter iter;
|
|
const bool raiseInterrupts = true;
|
|
List *connectionList = NIL;
|
|
|
|
/* asynchronously send RELEASE SAVEPOINT */
|
|
dlist_foreach(iter, &InProgressTransactions)
|
|
{
|
|
MultiConnection *connection = dlist_container(MultiConnection, transactionNode,
|
|
iter.cur);
|
|
RemoteTransaction *transaction = &connection->remoteTransaction;
|
|
if (transaction->transactionFailed)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
StartRemoteTransactionSavepointRelease(connection, subId);
|
|
connectionList = lappend(connectionList, connection);
|
|
}
|
|
|
|
WaitForAllConnections(connectionList, raiseInterrupts);
|
|
|
|
/* and wait for the results */
|
|
dlist_foreach(iter, &InProgressTransactions)
|
|
{
|
|
MultiConnection *connection = dlist_container(MultiConnection, transactionNode,
|
|
iter.cur);
|
|
RemoteTransaction *transaction = &connection->remoteTransaction;
|
|
if (transaction->transactionFailed)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
FinishRemoteTransactionSavepointRelease(connection, subId);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* CoordinatedRemoteTransactionsSavepointRollback sends the ROLLBACK TO SAVEPOINT
|
|
* command for the given sub-transaction id to all connections participating in
|
|
* the current transaction.
|
|
*/
|
|
void
|
|
CoordinatedRemoteTransactionsSavepointRollback(SubTransactionId subId)
|
|
{
|
|
dlist_iter iter;
|
|
const bool raiseInterrupts = false;
|
|
List *connectionList = NIL;
|
|
|
|
/* asynchronously send ROLLBACK TO SAVEPOINT */
|
|
dlist_foreach(iter, &InProgressTransactions)
|
|
{
|
|
MultiConnection *connection = dlist_container(MultiConnection, transactionNode,
|
|
iter.cur);
|
|
RemoteTransaction *transaction = &connection->remoteTransaction;
|
|
|
|
/* cancel any ongoing queries before issuing rollback */
|
|
SendCancelationRequest(connection);
|
|
|
|
/* clear results, but don't show cancelation warning messages from workers. */
|
|
ClearResultsDiscardWarnings(connection, raiseInterrupts);
|
|
|
|
if (transaction->transactionFailed)
|
|
{
|
|
if (transaction->lastSuccessfulSubXact <= subId)
|
|
{
|
|
transaction->transactionRecovering = true;
|
|
|
|
/*
|
|
* Clear the results of the failed query so we can send the ROLLBACK
|
|
* TO SAVEPOINT command for a savepoint that can recover the transaction
|
|
* from failure.
|
|
*/
|
|
ForgetResults(connection);
|
|
}
|
|
else
|
|
{
|
|
continue;
|
|
}
|
|
}
|
|
StartRemoteTransactionSavepointRollback(connection, subId);
|
|
connectionList = lappend(connectionList, connection);
|
|
}
|
|
|
|
WaitForAllConnections(connectionList, raiseInterrupts);
|
|
|
|
/* and wait for the results */
|
|
dlist_foreach(iter, &InProgressTransactions)
|
|
{
|
|
MultiConnection *connection = dlist_container(MultiConnection, transactionNode,
|
|
iter.cur);
|
|
RemoteTransaction *transaction = &connection->remoteTransaction;
|
|
if (transaction->transactionFailed && !transaction->transactionRecovering)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
FinishRemoteTransactionSavepointRollback(connection, subId);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* StartRemoteTransactionSavepointBegin initiates SAVEPOINT command for the given
|
|
* subtransaction id in a non-blocking manner.
|
|
*/
|
|
static void
|
|
StartRemoteTransactionSavepointBegin(MultiConnection *connection, SubTransactionId subId)
|
|
{
|
|
const bool raiseErrors = true;
|
|
StringInfo savepointCommand = makeStringInfo();
|
|
appendStringInfo(savepointCommand, "SAVEPOINT savepoint_%u", subId);
|
|
|
|
if (!SendRemoteCommand(connection, savepointCommand->data))
|
|
{
|
|
HandleRemoteTransactionConnectionError(connection, raiseErrors);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* FinishRemoteTransactionSavepointBegin finishes the work
|
|
* StartRemoteTransactionSavepointBegin initiated. It blocks if necessary (i.e.
|
|
* if PQisBusy() would return true).
|
|
*/
|
|
static void
|
|
FinishRemoteTransactionSavepointBegin(MultiConnection *connection, SubTransactionId subId)
|
|
{
|
|
const bool raiseErrors = true;
|
|
PGresult *result = GetRemoteCommandResult(connection, raiseErrors);
|
|
if (!IsResponseOK(result))
|
|
{
|
|
HandleRemoteTransactionResultError(connection, result, raiseErrors);
|
|
}
|
|
|
|
PQclear(result);
|
|
ForgetResults(connection);
|
|
}
|
|
|
|
|
|
/*
|
|
* StartRemoteTransactionSavepointRelease initiates RELEASE SAVEPOINT command for
|
|
* the given subtransaction id in a non-blocking manner.
|
|
*/
|
|
static void
|
|
StartRemoteTransactionSavepointRelease(MultiConnection *connection,
|
|
SubTransactionId subId)
|
|
{
|
|
const bool raiseErrors = true;
|
|
StringInfo savepointCommand = makeStringInfo();
|
|
appendStringInfo(savepointCommand, "RELEASE SAVEPOINT savepoint_%u", subId);
|
|
|
|
if (!SendRemoteCommand(connection, savepointCommand->data))
|
|
{
|
|
HandleRemoteTransactionConnectionError(connection, raiseErrors);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* FinishRemoteTransactionSavepointRelease finishes the work
|
|
* StartRemoteTransactionSavepointRelease initiated. It blocks if necessary (i.e.
|
|
* if PQisBusy() would return true).
|
|
*/
|
|
static void
|
|
FinishRemoteTransactionSavepointRelease(MultiConnection *connection,
|
|
SubTransactionId subId)
|
|
{
|
|
const bool raiseErrors = true;
|
|
PGresult *result = GetRemoteCommandResult(connection, raiseErrors);
|
|
if (!IsResponseOK(result))
|
|
{
|
|
HandleRemoteTransactionResultError(connection, result, raiseErrors);
|
|
}
|
|
|
|
PQclear(result);
|
|
ForgetResults(connection);
|
|
}
|
|
|
|
|
|
/*
|
|
* StartRemoteTransactionSavepointRollback initiates ROLLBACK TO SAVEPOINT command
|
|
* for the given subtransaction id in a non-blocking manner.
|
|
*/
|
|
static void
|
|
StartRemoteTransactionSavepointRollback(MultiConnection *connection,
|
|
SubTransactionId subId)
|
|
{
|
|
const bool raiseErrors = false;
|
|
StringInfo savepointCommand = makeStringInfo();
|
|
appendStringInfo(savepointCommand, "ROLLBACK TO SAVEPOINT savepoint_%u", subId);
|
|
|
|
if (!SendRemoteCommand(connection, savepointCommand->data))
|
|
{
|
|
HandleRemoteTransactionConnectionError(connection, raiseErrors);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* FinishRemoteTransactionSavepointRollback finishes the work
|
|
* StartRemoteTransactionSavepointRollback initiated. It blocks if necessary (i.e.
|
|
* if PQisBusy() would return true). It also recovers the transaction from failure
|
|
* if transaction is recovering and the rollback command succeeds.
|
|
*/
|
|
static void
|
|
FinishRemoteTransactionSavepointRollback(MultiConnection *connection, SubTransactionId
|
|
subId)
|
|
{
|
|
const bool raiseErrors = false;
|
|
RemoteTransaction *transaction = &connection->remoteTransaction;
|
|
|
|
PGresult *result = GetRemoteCommandResult(connection, raiseErrors);
|
|
if (!IsResponseOK(result))
|
|
{
|
|
HandleRemoteTransactionResultError(connection, result, raiseErrors);
|
|
}
|
|
|
|
/* ROLLBACK TO SAVEPOINT succeeded, check if it recovers the transaction */
|
|
else if (transaction->transactionRecovering)
|
|
{
|
|
transaction->transactionFailed = false;
|
|
transaction->transactionRecovering = false;
|
|
}
|
|
|
|
PQclear(result);
|
|
ForgetResults(connection);
|
|
}
|
|
|
|
|
|
/*
|
|
* CheckRemoteTransactionsHealth checks if any of the participating transactions in a
|
|
* coordinated transaction failed, and what consequence that should have.
|
|
* This needs to be called before the coordinated transaction commits (but
|
|
* after they've been PREPAREd if 2PC is in use).
|
|
*/
|
|
void
|
|
CheckRemoteTransactionsHealth(void)
|
|
{
|
|
dlist_iter iter;
|
|
|
|
dlist_foreach(iter, &InProgressTransactions)
|
|
{
|
|
MultiConnection *connection = dlist_container(MultiConnection, transactionNode,
|
|
iter.cur);
|
|
RemoteTransaction *transaction = &connection->remoteTransaction;
|
|
PGTransactionStatusType status = PQtransactionStatus(connection->pgConn);
|
|
|
|
/* if the connection is in a bad state, so is the transaction's state */
|
|
if (status == PQTRANS_INERROR || status == PQTRANS_UNKNOWN)
|
|
{
|
|
transaction->transactionFailed = true;
|
|
}
|
|
|
|
/*
|
|
* If a critical connection is marked as failed (and no error has been
|
|
* raised yet) do so now.
|
|
*/
|
|
if (transaction->transactionFailed && transaction->transactionCritical)
|
|
{
|
|
ereport(ERROR, (errmsg("failure on connection marked as essential: %s:%d",
|
|
connection->hostname, connection->port)));
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* Assign2PCIdentifier computes the 2PC transaction name to use for a
|
|
* transaction. Every prepared transaction should get a new name, i.e. this
|
|
* function will need to be called again.
|
|
*
|
|
* The format of the name is:
|
|
*
|
|
* citus_<source group>_<pid>_<distributed transaction number>_<connection number>
|
|
*
|
|
* (at most 5+1+10+1+10+20+1+10 = 58 characters, while limit is 64)
|
|
*
|
|
* The source group is used to distinguish 2PCs started by different
|
|
* coordinators. A coordinator will only attempt to recover its own 2PCs.
|
|
*
|
|
* The pid is used to distinguish different processes on the coordinator, mainly
|
|
* to provide some entropy across restarts.
|
|
*
|
|
* The distributed transaction number is used to distinguish different
|
|
* transactions originating from the same node (since restart).
|
|
*
|
|
* The connection number is used to distinguish connections made to a node
|
|
* within the same transaction.
|
|
*
|
|
* NB: we rely on the fact that we don't need to do full escaping on the names
|
|
* generated here.
|
|
*/
|
|
static void
|
|
Assign2PCIdentifier(MultiConnection *connection)
|
|
{
|
|
/* local sequence number used to distinguish different connections */
|
|
static uint32 connectionNumber = 0;
|
|
|
|
/* transaction identifier that is unique across processes */
|
|
uint64 transactionNumber = CurrentDistributedTransactionNumber();
|
|
|
|
/* print all numbers as unsigned to guarantee no minus symbols appear in the name */
|
|
snprintf(connection->remoteTransaction.preparedName, NAMEDATALEN,
|
|
PREPARED_TRANSACTION_NAME_FORMAT, GetLocalGroupId(), MyProcPid,
|
|
transactionNumber, connectionNumber++);
|
|
}
|
|
|
|
|
|
/*
|
|
* ParsePreparedTransactionName parses a prepared transaction name to extract
|
|
* the initiator group ID, initiator process ID, distributed transaction number,
|
|
* and the connection number. If the transaction name does not match the expected
|
|
* format ParsePreparedTransactionName returns false, and true otherwise.
|
|
*/
|
|
bool
|
|
ParsePreparedTransactionName(char *preparedTransactionName,
|
|
int32 *groupId, int *procId,
|
|
uint64 *transactionNumber,
|
|
uint32 *connectionNumber)
|
|
{
|
|
char *currentCharPointer = preparedTransactionName;
|
|
|
|
currentCharPointer = strchr(currentCharPointer, '_');
|
|
if (currentCharPointer == NULL)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
/* step ahead of the current '_' character */
|
|
++currentCharPointer;
|
|
|
|
*groupId = strtol(currentCharPointer, NULL, 10);
|
|
|
|
if ((*groupId == 0 && errno == EINVAL) ||
|
|
(*groupId == INT_MAX && errno == ERANGE))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
currentCharPointer = strchr(currentCharPointer, '_');
|
|
if (currentCharPointer == NULL)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
/* step ahead of the current '_' character */
|
|
++currentCharPointer;
|
|
|
|
*procId = strtol(currentCharPointer, NULL, 10);
|
|
if ((*procId == 0 && errno == EINVAL) ||
|
|
(*procId == INT_MAX && errno == ERANGE))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
currentCharPointer = strchr(currentCharPointer, '_');
|
|
if (currentCharPointer == NULL)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
/* step ahead of the current '_' character */
|
|
++currentCharPointer;
|
|
|
|
*transactionNumber = pg_strtouint64(currentCharPointer, NULL, 10);
|
|
if ((*transactionNumber == 0 && errno != 0) ||
|
|
(*transactionNumber == ULLONG_MAX && errno == ERANGE))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
currentCharPointer = strchr(currentCharPointer, '_');
|
|
if (currentCharPointer == NULL)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
/* step ahead of the current '_' character */
|
|
++currentCharPointer;
|
|
|
|
*connectionNumber = strtoul(currentCharPointer, NULL, 10);
|
|
if ((*connectionNumber == 0 && errno == EINVAL) ||
|
|
(*connectionNumber == UINT_MAX && errno == ERANGE))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
/*
|
|
* WarnAboutLeakedPreparedTransaction issues a WARNING explaining that a
|
|
* prepared transaction could not be committed or rolled back, and explains
|
|
* how to perform cleanup.
|
|
*/
|
|
static void
|
|
WarnAboutLeakedPreparedTransaction(MultiConnection *connection, bool commit)
|
|
{
|
|
StringInfoData command;
|
|
RemoteTransaction *transaction = &connection->remoteTransaction;
|
|
|
|
initStringInfo(&command);
|
|
|
|
if (commit)
|
|
{
|
|
appendStringInfo(&command, "COMMIT PREPARED '%s'",
|
|
transaction->preparedName);
|
|
}
|
|
else
|
|
{
|
|
appendStringInfo(&command, "ROLLBACK PREPARED '%s'",
|
|
transaction->preparedName);
|
|
}
|
|
|
|
/* log a warning so the user may abort the transaction later */
|
|
ereport(WARNING, (errmsg("failed to roll back prepared transaction '%s'",
|
|
transaction->preparedName),
|
|
errhint("Run \"%s\" on %s:%u",
|
|
command.data, connection->hostname, connection->port)));
|
|
}
|