mirror of https://github.com/citusdata/citus.git
Do not commit already-committed prepared transactions in recovery
parent
fe798cf0f9
commit
9793218122
|
@ -246,7 +246,8 @@ RecoverWorkerTransactions(WorkerNode *workerNode)
|
||||||
{
|
{
|
||||||
bool isNull = false;
|
bool isNull = false;
|
||||||
bool isTransactionInProgress = false;
|
bool isTransactionInProgress = false;
|
||||||
bool isTransactionPending = false;
|
bool foundPreparedTransactionBeforeCommit = false;
|
||||||
|
bool foundPreparedTransactionAfterCommit = false;
|
||||||
|
|
||||||
Datum transactionNameDatum = heap_getattr(heapTuple,
|
Datum transactionNameDatum = heap_getattr(heapTuple,
|
||||||
Anum_pg_dist_transaction_gid,
|
Anum_pg_dist_transaction_gid,
|
||||||
|
@ -270,13 +271,20 @@ RecoverWorkerTransactions(WorkerNode *workerNode)
|
||||||
* that need to be aborted remain at the end.
|
* that need to be aborted remain at the end.
|
||||||
*/
|
*/
|
||||||
hash_search(pendingTransactionSet, transactionName, HASH_REMOVE,
|
hash_search(pendingTransactionSet, transactionName, HASH_REMOVE,
|
||||||
&isTransactionPending);
|
&foundPreparedTransactionBeforeCommit);
|
||||||
|
|
||||||
if (isTransactionPending)
|
hash_search(recheckTransactionSet, transactionName, HASH_FIND,
|
||||||
|
&foundPreparedTransactionAfterCommit);
|
||||||
|
|
||||||
|
if (foundPreparedTransactionBeforeCommit && foundPreparedTransactionAfterCommit)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* The transaction was committed, but the prepared transaction still exists
|
* The transaction was committed, but the prepared transaction still exists
|
||||||
* on the worker. Try committing it.
|
* on the worker. Try committing it.
|
||||||
|
*
|
||||||
|
* We double check that the recovery record exists both before and after
|
||||||
|
* checking ActiveDistributedTransactionNumbers(), since we may have
|
||||||
|
* observed a prepared transaction that was committed immediately after.
|
||||||
*/
|
*/
|
||||||
bool shouldCommit = true;
|
bool shouldCommit = true;
|
||||||
bool commitSucceeded = RecoverPreparedTransactionOnWorker(connection,
|
bool commitSucceeded = RecoverPreparedTransactionOnWorker(connection,
|
||||||
|
@ -300,53 +308,46 @@ RecoverWorkerTransactions(WorkerNode *workerNode)
|
||||||
* the recovery record.
|
* the recovery record.
|
||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
else
|
else if (foundPreparedTransactionAfterCommit)
|
||||||
{
|
{
|
||||||
bool foundNewPreparedTransaction = false;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We found a committed pg_dist_transaction record without a prepared
|
* We found a committed pg_dist_transaction record that initially did
|
||||||
* transaction. In the typical case, this means that the prepared
|
* not have a prepared transaction, but did when we checked again.
|
||||||
* transaction was successfully committed from the post-commit callback
|
|
||||||
* and we can therefore delete the recovery record. However, there is a
|
|
||||||
* race condition.
|
|
||||||
*
|
*
|
||||||
* If a transaction started and committed just after we observed the
|
* If a transaction started and committed just after we observed the
|
||||||
* set of prepared transactions, and just before we took our
|
* set of prepared transactions, and just before we called
|
||||||
* pg_dist_transaction snapshot, then we would see its records, but it may
|
* ActiveDistributedTransactionNumbers, then we would see a recovery
|
||||||
* have prepared transactions that failed to commit and that we did not
|
* record without a prepared transaction in pendingTransactionSet,
|
||||||
* observe. We should not delete the records, since we would otherwise
|
* but there may be prepared transactions that failed to commit.
|
||||||
* roll back the prepared transaction on the next call to
|
* We should not delete the records for those prepared transactions,
|
||||||
|
* since we would otherwise roll back them on the next call to
|
||||||
* recover_prepared_transactions.
|
* recover_prepared_transactions.
|
||||||
*
|
*
|
||||||
* In addition, if the transaction started after the call it
|
* In addition, if the transaction started after the call to
|
||||||
* ActiveDistributedTransactionNumbers, then it may still be in the process
|
* ActiveDistributedTransactionNumbers and finished just before our
|
||||||
* of comitting the prepared transactions in the post-commit callback.
|
* pg_dist_transaction snapshot, then it may still be in the process
|
||||||
|
* of comitting the prepared transactions in the post-commit callback
|
||||||
|
* and we should not touch the prepared transactions.
|
||||||
*
|
*
|
||||||
* We check the prepared transactions again after taking the
|
* To handle these cases, we just leave the records and prepared
|
||||||
* pg_dist_transaction snapshot. Any prepared transactions that appear
|
* transactions for the next call to recover_prepared_transactions
|
||||||
* in this set, but not in pendingTransactionSet, may have been created
|
* and skip them here.
|
||||||
* before the pg_dist_transaction snapshot. We skip deleting the records
|
|
||||||
* for those transactions since there are prepared transactions that
|
|
||||||
* need to be commit, but we cannot commit them now, since they might
|
|
||||||
* still be getting committed and committing them twice would result in
|
|
||||||
* failures.
|
|
||||||
*
|
|
||||||
* In this case, we just leave the records and prepared transactions
|
|
||||||
* for the next call to recover_prepared_transactions.
|
|
||||||
*/
|
*/
|
||||||
hash_search(recheckTransactionSet, transactionName, HASH_FIND,
|
|
||||||
&foundNewPreparedTransaction);
|
|
||||||
|
|
||||||
if (foundNewPreparedTransaction)
|
|
||||||
{
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
/*
|
/*
|
||||||
* There is a recovery record and definitely no prepared transaction, it
|
* We found a recovery record without any prepared transaction. It
|
||||||
* must have been committed immediately, so it's safe to delete the
|
* must have already been committed, so it's safe to delete the
|
||||||
* recovery record.
|
* recovery record.
|
||||||
|
*
|
||||||
|
* Transactions that started after we observed pendingTransactionSet,
|
||||||
|
* but successfully committed their prepared transactions before
|
||||||
|
* ActiveDistributedTransactionNumbers are indistinguishable from
|
||||||
|
* transactions that committed at an earlier time, in which case it's
|
||||||
|
* safe delete the recovery record as well.
|
||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue