From c70cf963c42627ff4e7b8065dd190c8fc3677aa1 Mon Sep 17 00:00:00 2001 From: Jelte Fennema Date: Thu, 2 Feb 2023 16:05:34 +0100 Subject: [PATCH] Also reset transactions at connection shutdown (#6685) In #6314 I refactored the connection cleanup to be simpler to understand and use. However, by doing so I introduced a use-after-free possibility (that valgrind luckily picked up): In the `ShouldShutdownConnection` path of `AfterXactHostConnectionHandling` we free connections without removing the `transactionNode` from the dlist that it might be part of. Before the refactoring this wasn't a problem, because the dlist would be completely reset quickly after in `ResetGlobalVariables` (without reading or writing the dlist entries). The refactoring changed this by moving the `dlist_delete` call to `ResetRemoteTransaction`, which in turn was called in the `!ShouldShutdownConnection` path of `AfterXactHostConnectionHandling`. Thus this `!ShouldShutdownConnection` path would now delete from the `dlist`, but the `ShouldShutdownConnection` path would not. Thus to remove itself the deleting path would sometimes update nodes in the list that were freed right before. There's two ways of fixing this: 1. Call `dlist_delete` from **both** of paths. 2. Call `dlist_delete` from **neither** of the paths. This commit implements the second approach, and #6684 implements the first. We need to choose which approach we prefer. To make calling `dlist_delete` from both paths actually work, we also need to use a slightly different check to determine if we need to call dlist_delete. Various regression tests showed that there can be cases where the `transactionState` is something else than `REMOTE_TRANS_NOT_STARTED` but the connection was not added to the `InProgressTransactions` list One example of such a case is when running `TransactionStateMachine` without calling `StartRemoteTransactionBegin` beforehand. In those cases the connection won't be added to `InProgressTransactions`, but the `transactionState` is changed to `REMOTE_TRANS_SENT_COMMAND`. Sidenote: This bug already existed in 11.1, but valgrind didn't catch it back then. My guess is that this happened because #6314 was merged after the initial release branch was cut. Fixes #6638 (cherry picked from commit f061dbb253d27bb7f16b0f43665d584a23e25f44) --- src/backend/distributed/connection/connection_management.c | 3 +++ src/backend/distributed/transaction/remote_transaction.c | 5 ++++- src/include/distributed/connection_management.h | 6 +++++- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/backend/distributed/connection/connection_management.c b/src/backend/distributed/connection/connection_management.c index 12b5da547..0597b5537 100644 --- a/src/backend/distributed/connection/connection_management.c +++ b/src/backend/distributed/connection/connection_management.c @@ -1449,6 +1449,9 @@ AfterXactHostConnectionHandling(ConnectionHashEntry *entry, bool isCommit) { ShutdownConnection(connection); + /* remove from transactionlist before free-ing */ + ResetRemoteTransaction(connection); + /* unlink from list */ dlist_delete(iter.cur); diff --git a/src/backend/distributed/transaction/remote_transaction.c b/src/backend/distributed/transaction/remote_transaction.c index 59a509507..02474c8bc 100644 --- a/src/backend/distributed/transaction/remote_transaction.c +++ b/src/backend/distributed/transaction/remote_transaction.c @@ -66,6 +66,7 @@ StartRemoteTransactionBegin(struct MultiConnection *connection) /* remember transaction as being in-progress */ dlist_push_tail(&InProgressTransactions, &connection->transactionNode); + connection->transactionInProgress = true; transaction->transactionState = REMOTE_TRANS_STARTING; @@ -760,11 +761,13 @@ ResetRemoteTransaction(struct MultiConnection *connection) RemoteTransaction *transaction = &connection->remoteTransaction; /* unlink from list of open transactions, if necessary */ - if (transaction->transactionState != REMOTE_TRANS_NOT_STARTED) + if (connection->transactionInProgress) { /* XXX: Should we error out for a critical transaction? */ dlist_delete(&connection->transactionNode); + connection->transactionInProgress = false; + memset(&connection->transactionNode, 0, sizeof(connection->transactionNode)); } /* just reset the entire state, relying on 0 being invalid/false */ diff --git a/src/include/distributed/connection_management.h b/src/include/distributed/connection_management.h index b64785ec7..02ad3b2dd 100644 --- a/src/include/distributed/connection_management.h +++ b/src/include/distributed/connection_management.h @@ -189,8 +189,12 @@ typedef struct MultiConnection /* information about the associated remote transaction */ RemoteTransaction remoteTransaction; - /* membership in list of in-progress transactions */ + /* + * membership in list of in-progress transactions and a flag to indicate + * that the connection was added to this list + */ dlist_node transactionNode; + bool transactionInProgress; /* list of all placements referenced by this connection */ dlist_head referencedPlacements;