From bb3a96eacb1c89c35aa5b4715e403727b8989244 Mon Sep 17 00:00:00 2001 From: Marco Slot Date: Mon, 20 May 2019 17:30:36 +0200 Subject: [PATCH] Cache a configurable number of connections at xact end --- .../connection/connection_management.c | 26 ++++++++----------- .../executor/multi_router_executor.c | 4 +-- src/backend/distributed/shared_library_init.c | 14 ++++++++++ .../test/run_from_same_connection.c | 3 ++- .../transaction/transaction_recovery.c | 2 +- .../distributed/connection_management.h | 16 ++++++------ 6 files changed, 38 insertions(+), 27 deletions(-) diff --git a/src/backend/distributed/connection/connection_management.c b/src/backend/distributed/connection/connection_management.c index a6e23bb5b..47809da8e 100644 --- a/src/backend/distributed/connection/connection_management.c +++ b/src/backend/distributed/connection/connection_management.c @@ -32,6 +32,8 @@ int NodeConnectionTimeout = 5000; +int MaxCachedConnectionsPerWorker = 1; + HTAB *ConnectionHash = NULL; HTAB *ConnParamsHash = NULL; MemoryContext ConnectionContext = NULL; @@ -253,7 +255,6 @@ GetNodeUserDatabaseConnection(uint32 flags, const char *hostname, int32 port, co * * If user or database are NULL, the current session's defaults are used. The * following flags influence connection establishment behaviour: - * - SESSION_LIFESPAN - the connection should persist after transaction end * - FORCE_NEW_CONNECTION - a new connection is required * * The returned connection has only been initiated, not fully @@ -322,11 +323,6 @@ StartNodeUserDatabaseConnection(uint32 flags, const char *hostname, int32 port, connection = FindAvailableConnection(entry->connections, flags); if (connection) { - if (flags & SESSION_LIFESPAN) - { - connection->sessionLifespan = true; - } - return connection; } } @@ -340,11 +336,6 @@ StartNodeUserDatabaseConnection(uint32 flags, const char *hostname, int32 port, dlist_push_tail(entry->connections, &connection->connectionNode); ResetShardPlacementAssociation(connection); - if (flags & SESSION_LIFESPAN) - { - connection->sessionLifespan = true; - } - return connection; } @@ -374,8 +365,9 @@ FindAvailableConnection(dlist_head *connections, uint32 flags) /* - * CloseNodeConnectionsAfterTransaction sets the sessionLifespan flag of the connections - * to a particular node as false. This is mainly used when a worker leaves the cluster. + * CloseNodeConnectionsAfterTransaction sets the forceClose flag of the connections + * to a particular node as true such that the connections are no longer cached. This + * is mainly used when a worker leaves the cluster. */ void CloseNodeConnectionsAfterTransaction(char *nodeName, int nodePort) @@ -400,7 +392,7 @@ CloseNodeConnectionsAfterTransaction(char *nodeName, int nodePort) MultiConnection *connection = dlist_container(MultiConnection, connectionNode, iter.cur); - connection->sessionLifespan = false; + connection->forceCloseAtTransactionEnd = true; } } } @@ -1002,6 +994,7 @@ static void AfterXactHostConnectionHandling(ConnectionHashEntry *entry, bool isCommit) { dlist_mutable_iter iter; + int cachedConnectionCount = 0; dlist_foreach_modify(iter, entry->connections) { @@ -1023,7 +1016,8 @@ AfterXactHostConnectionHandling(ConnectionHashEntry *entry, bool isCommit) /* * Preserve session lifespan connections if they are still healthy. */ - if (!connection->sessionLifespan || + if (cachedConnectionCount >= MaxCachedConnectionsPerWorker || + connection->forceCloseAtTransactionEnd || PQstatus(connection->pgConn) != CONNECTION_OK || !RemoteTransactionIdle(connection)) { @@ -1044,6 +1038,8 @@ AfterXactHostConnectionHandling(ConnectionHashEntry *entry, bool isCommit) connection->copyBytesWrittenSinceLastFlush = 0; UnclaimConnection(connection); + + cachedConnectionCount++; } } } diff --git a/src/backend/distributed/executor/multi_router_executor.c b/src/backend/distributed/executor/multi_router_executor.c index d3626d53f..dba69fe6f 100644 --- a/src/backend/distributed/executor/multi_router_executor.c +++ b/src/backend/distributed/executor/multi_router_executor.c @@ -924,7 +924,7 @@ ExecuteSingleSelectTask(CitusScanState *scanState, Task *task) bool queryOK = false; bool dontFailOnError = false; int64 currentAffectedTupleCount = 0; - int connectionFlags = SESSION_LIFESPAN; + int connectionFlags = 0; List *placementAccessList = NIL; MultiConnection *connection = NULL; @@ -1262,7 +1262,7 @@ GetModifyConnections(Task *task, bool markCritical) foreach(taskPlacementCell, taskPlacementList) { ShardPlacement *taskPlacement = (ShardPlacement *) lfirst(taskPlacementCell); - int connectionFlags = SESSION_LIFESPAN; + int connectionFlags = 0; MultiConnection *multiConnection = NULL; List *placementAccessList = NIL; ShardPlacementAccess *placementModification = NULL; diff --git a/src/backend/distributed/shared_library_init.c b/src/backend/distributed/shared_library_init.c index c453927a5..5239bc7f1 100644 --- a/src/backend/distributed/shared_library_init.c +++ b/src/backend/distributed/shared_library_init.c @@ -745,6 +745,20 @@ RegisterCitusConfigVariables(void) GUC_UNIT_MS, NULL, NULL, NULL); + DefineCustomIntVariable( + "citus.max_cached_conns_per_worker", + gettext_noop("Sets the maximum number of connections to cache per worker."), + gettext_noop("Each backend opens connections to the workers to query the " + "shards. At the end of the transaction, the configurated number " + "of connections is kept open to speed up subsequent commands. " + "Increasing this value will reduce the latency of multi-shard " + "queries, but increases overhead on the workers"), + &MaxCachedConnectionsPerWorker, + 1, 0, INT_MAX, + PGC_USERSET, + GUC_NO_SHOW_ALL, + NULL, NULL, NULL); + DefineCustomIntVariable( "citus.max_assign_task_batch_size", gettext_noop("Sets the maximum number of tasks to assign per round."), diff --git a/src/backend/distributed/test/run_from_same_connection.c b/src/backend/distributed/test/run_from_same_connection.c index d472e1264..92135ab93 100644 --- a/src/backend/distributed/test/run_from_same_connection.c +++ b/src/backend/distributed/test/run_from_same_connection.c @@ -85,6 +85,7 @@ start_session_level_connection_to_node(PG_FUNCTION_ARGS) text *nodeName = PG_GETARG_TEXT_P(0); uint32 nodePort = PG_GETARG_UINT32(1); char *nodeNameString = text_to_cstring(nodeName); + int connectionFlags = 0; CheckCitusVersion(ERROR); @@ -102,7 +103,7 @@ start_session_level_connection_to_node(PG_FUNCTION_ARGS) */ if (singleConnection == NULL) { - singleConnection = GetNodeConnection(SESSION_LIFESPAN, nodeNameString, nodePort); + singleConnection = GetNodeConnection(connectionFlags, nodeNameString, nodePort); allowNonIdleRemoteTransactionOnXactHandling = true; } diff --git a/src/backend/distributed/transaction/transaction_recovery.c b/src/backend/distributed/transaction/transaction_recovery.c index 81746dcd0..8d28dd0b1 100644 --- a/src/backend/distributed/transaction/transaction_recovery.c +++ b/src/backend/distributed/transaction/transaction_recovery.c @@ -167,7 +167,7 @@ RecoverWorkerTransactions(WorkerNode *workerNode) MemoryContext oldContext = NULL; bool recoveryFailed = false; - int connectionFlags = SESSION_LIFESPAN; + int connectionFlags = 0; MultiConnection *connection = GetNodeConnection(connectionFlags, nodeName, nodePort); if (connection->pgConn == NULL || PQstatus(connection->pgConn) != CONNECTION_OK) { diff --git a/src/include/distributed/connection_management.h b/src/include/distributed/connection_management.h index 1cbf8719e..bd5314ca1 100644 --- a/src/include/distributed/connection_management.h +++ b/src/include/distributed/connection_management.h @@ -39,15 +39,12 @@ enum MultiConnectionMode /* force establishment of a new connection */ FORCE_NEW_CONNECTION = 1 << 0, - /* mark returned connection as having session lifespan */ - SESSION_LIFESPAN = 1 << 1, + FOR_DDL = 1 << 1, - FOR_DDL = 1 << 2, - - FOR_DML = 1 << 3, + FOR_DML = 1 << 2, /* open a connection per (co-located set of) placement(s) */ - CONNECTION_PER_PLACEMENT = 1 << 4 + CONNECTION_PER_PLACEMENT = 1 << 3 }; @@ -65,8 +62,8 @@ typedef struct MultiConnection /* underlying libpq connection */ struct pg_conn *pgConn; - /* is the connection intended to be kept after transaction end */ - bool sessionLifespan; + /* force the connection to be closed at the end of the transaction */ + bool forceCloseAtTransactionEnd; /* is the connection currently in use, and shouldn't be used by anything else */ bool claimedExclusively; @@ -130,6 +127,9 @@ typedef struct ConnParamsHashEntry /* maximum duration to wait for connection */ extern int NodeConnectionTimeout; +/* maximum number of connections to cache per worker per session */ +extern int MaxCachedConnectionsPerWorker; + /* parameters used for outbound connections */ extern char *NodeConninfo;