mirror of https://github.com/citusdata/citus.git
Implementation of a dedicated maintenance quota
parent
4d775ab361
commit
76d10cc413
|
@ -61,8 +61,8 @@ static MultiConnection * FindAvailableConnection(dlist_head *connections, uint32
|
|||
static void ErrorIfMultipleMetadataConnectionExists(dlist_head *connections);
|
||||
static void FreeConnParamsHashEntryFields(ConnParamsHashEntry *entry);
|
||||
static void AfterXactHostConnectionHandling(ConnectionHashEntry *entry, bool isCommit);
|
||||
static bool ShouldShutdownConnection(MultiConnection *connection, const int
|
||||
cachedConnectionCount);
|
||||
static bool ShouldShutdownConnection(MultiConnection *connection,
|
||||
const int cachedConnectionCount);
|
||||
static bool RemoteTransactionIdle(MultiConnection *connection);
|
||||
static int EventSetSizeForConnectionList(List *connections);
|
||||
|
||||
|
@ -427,10 +427,14 @@ StartNodeUserDatabaseConnection(uint32 flags, const char *hostname, int32 port,
|
|||
ResetShardPlacementAssociation(connection);
|
||||
|
||||
|
||||
if ((flags & REQUIRE_METADATA_CONNECTION))
|
||||
if (flags & REQUIRE_METADATA_CONNECTION)
|
||||
{
|
||||
connection->useForMetadataOperations = true;
|
||||
}
|
||||
else if (flags & REQUIRE_MAINTENANCE_CONNECTION)
|
||||
{
|
||||
connection->useForMaintenanceOperations = true;
|
||||
}
|
||||
|
||||
/* fully initialized the connection, record it */
|
||||
connection->initializationState = POOL_STATE_INITIALIZED;
|
||||
|
@ -1194,7 +1198,10 @@ CitusPQFinish(MultiConnection *connection)
|
|||
/* behave idempotently, there is no gurantee that CitusPQFinish() is called once */
|
||||
if (connection->initializationState >= POOL_STATE_COUNTER_INCREMENTED)
|
||||
{
|
||||
DecrementSharedConnectionCounter(connection->hostname, connection->port);
|
||||
int sharedCounterFlags = (connection->useForMaintenanceOperations)
|
||||
? MAINTENANCE_CONNECTION
|
||||
: 0;
|
||||
DecrementSharedConnectionCounter(sharedCounterFlags, connection->hostname, connection->port);
|
||||
connection->initializationState = POOL_STATE_NOT_INITIALIZED;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -240,7 +240,8 @@ DeallocateReservedConnections(void)
|
|||
* We have not used this reservation, make sure to clean-up from
|
||||
* the shared memory as well.
|
||||
*/
|
||||
DecrementSharedConnectionCounter(entry->key.hostname, entry->key.port);
|
||||
int sharedCounterFlags = 0;
|
||||
DecrementSharedConnectionCounter(sharedCounterFlags, entry->key.hostname, entry->key.port);
|
||||
|
||||
/* for completeness, set it to true */
|
||||
entry->usedReservation = true;
|
||||
|
|
|
@ -84,7 +84,8 @@ typedef struct SharedWorkerNodeConnStatsHashEntry
|
|||
{
|
||||
SharedWorkerNodeConnStatsHashKey key;
|
||||
|
||||
int count;
|
||||
int regularConnectionsCount;
|
||||
int maintenanceConnectionsCount;
|
||||
} SharedWorkerNodeConnStatsHashEntry;
|
||||
|
||||
/* hash entry for per database on worker stats */
|
||||
|
@ -141,9 +142,7 @@ static uint32 SharedConnectionHashHash(const void *key, Size keysize);
|
|||
static int SharedConnectionHashCompare(const void *a, const void *b, Size keysize);
|
||||
static uint32 SharedWorkerNodeDatabaseHashHash(const void *key, Size keysize);
|
||||
static int SharedWorkerNodeDatabaseHashCompare(const void *a, const void *b, Size keysize);
|
||||
static bool IsConnectionToLocalNode(SharedWorkerNodeConnStatsHashKey *connKey);
|
||||
static bool isConnectionSlotAvailable(uint32 flags, SharedWorkerNodeConnStatsHashKey *connKey,
|
||||
const SharedWorkerNodeConnStatsHashEntry *connectionEntry);
|
||||
static bool isConnectionThrottlingDisabled();
|
||||
static bool
|
||||
IncrementSharedConnectionCounterInternal(uint32 externalFlags, bool checkLimits, const char *hostname, int port,
|
||||
Oid database);
|
||||
|
@ -152,7 +151,7 @@ static SharedWorkerNodeDatabaseConnStatsHashKey PrepareWorkerNodeDatabaseHashKey
|
|||
int port,
|
||||
Oid database);
|
||||
static void
|
||||
DecrementSharedConnectionCounterInternal(const char *hostname, int port);
|
||||
DecrementSharedConnectionCounterInternal(uint32 externalFlags, const char *hostname, int port);
|
||||
|
||||
|
||||
PG_FUNCTION_INFO_V1(citus_remote_connection_stats);
|
||||
|
@ -316,9 +315,8 @@ WaitLoopForSharedConnection(uint32 flags, const char *hostname, int port)
|
|||
bool
|
||||
TryToIncrementSharedConnectionCounter(uint32 flags, const char *hostname, int port)
|
||||
{
|
||||
if (GetMaxSharedPoolSize() == DISABLE_CONNECTION_THROTTLING)
|
||||
if (isConnectionThrottlingDisabled())
|
||||
{
|
||||
/* connection throttling disabled */
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -334,7 +332,11 @@ TryToIncrementSharedConnectionCounter(uint32 flags, const char *hostname, int po
|
|||
return true;
|
||||
}
|
||||
|
||||
return IncrementSharedConnectionCounterInternal(flags, true, hostname, port, MyDatabaseId);
|
||||
return IncrementSharedConnectionCounterInternal(flags,
|
||||
true,
|
||||
hostname,
|
||||
port,
|
||||
MyDatabaseId);
|
||||
}
|
||||
|
||||
|
||||
|
@ -345,13 +347,16 @@ TryToIncrementSharedConnectionCounter(uint32 flags, const char *hostname, int po
|
|||
void
|
||||
IncrementSharedConnectionCounter(uint32 flags, const char *hostname, int port)
|
||||
{
|
||||
if (MaxSharedPoolSize == DISABLE_CONNECTION_THROTTLING)
|
||||
if (isConnectionThrottlingDisabled())
|
||||
{
|
||||
/* connection throttling disabled */
|
||||
return;
|
||||
}
|
||||
|
||||
IncrementSharedConnectionCounterInternal(flags, false, hostname, port, MyDatabaseId);
|
||||
IncrementSharedConnectionCounterInternal(flags,
|
||||
false,
|
||||
hostname,
|
||||
port,
|
||||
MyDatabaseId);
|
||||
}
|
||||
|
||||
|
||||
|
@ -392,7 +397,8 @@ IncrementSharedConnectionCounterInternal(uint32 externalFlags,
|
|||
if (!workerNodeEntryFound)
|
||||
{
|
||||
/* we successfully allocated the entry for the first time, so initialize it */
|
||||
workerNodeConnectionEntry->count = 0;
|
||||
workerNodeConnectionEntry->regularConnectionsCount = 0;
|
||||
workerNodeConnectionEntry->maintenanceConnectionsCount = 0;
|
||||
}
|
||||
|
||||
/* Initialize SharedWorkerNodeDatabaseConnStatsHash the same way */
|
||||
|
@ -418,47 +424,25 @@ IncrementSharedConnectionCounterInternal(uint32 externalFlags,
|
|||
|
||||
/* Increment counter if a slot available */
|
||||
bool connectionSlotAvailable = true;
|
||||
connectionSlotAvailable =
|
||||
!checkLimits ||
|
||||
isConnectionSlotAvailable(externalFlags,
|
||||
&workerNodeKey,
|
||||
workerNodeConnectionEntry);
|
||||
|
||||
if (connectionSlotAvailable)
|
||||
/* When GetSharedPoolSizeMaintenanceQuota() == 0, treat maintenance connections as regular */
|
||||
bool maintenanceConnection = (GetSharedPoolSizeMaintenanceQuota() > 0 && (externalFlags & MAINTENANCE_CONNECTION));
|
||||
if (checkLimits)
|
||||
{
|
||||
workerNodeConnectionEntry->count += 1;
|
||||
workerNodeDatabaseEntry->count += 1;
|
||||
}
|
||||
|
||||
UnLockConnectionSharedMemory();
|
||||
|
||||
return connectionSlotAvailable;
|
||||
}
|
||||
|
||||
static bool IsConnectionToLocalNode(SharedWorkerNodeConnStatsHashKey *connKey)
|
||||
{
|
||||
WorkerNode *workerNode = FindWorkerNode(connKey->hostname, connKey->port);
|
||||
return workerNode && (workerNode->groupId == GetLocalGroupId());
|
||||
}
|
||||
|
||||
|
||||
static bool isConnectionSlotAvailable(uint32 flags,
|
||||
SharedWorkerNodeConnStatsHashKey *connKey,
|
||||
const SharedWorkerNodeConnStatsHashEntry *connectionEntry)
|
||||
{
|
||||
bool connectionSlotAvailable = true;
|
||||
bool connectionToLocalNode = IsConnectionToLocalNode(connKey);
|
||||
/*
|
||||
* Use full capacity for maintenance connections,
|
||||
*/
|
||||
int maintenanceConnectionsQuota =
|
||||
(flags & MAINTENANCE_CONNECTION)
|
||||
? 0
|
||||
: (int) floor((double) GetMaxSharedPoolSize() * GetSharedPoolSizeMaintenanceQuota());
|
||||
if (connectionToLocalNode)
|
||||
{
|
||||
bool remoteConnectionsForLocalQueriesDisabled =
|
||||
GetLocalSharedPoolSize() == DISABLE_REMOTE_CONNECTIONS_FOR_LOCAL_QUERIES;
|
||||
WorkerNode *workerNode = FindWorkerNode(hostname, port);
|
||||
bool connectionToLocalNode = workerNode && (workerNode->groupId == GetLocalGroupId());
|
||||
int currentConnectionsLimit = connectionToLocalNode
|
||||
? GetLocalSharedPoolSize()
|
||||
: GetMaxSharedPoolSize();
|
||||
int maintenanceQuota = (int) ceil((double) currentConnectionsLimit * GetSharedPoolSizeMaintenanceQuota());
|
||||
/* Connections limit should never go below 1 */
|
||||
currentConnectionsLimit = Max(maintenanceConnection
|
||||
? maintenanceQuota
|
||||
: currentConnectionsLimit - maintenanceQuota, 1);
|
||||
int currentConnectionsCount = maintenanceConnection
|
||||
? workerNodeConnectionEntry->maintenanceConnectionsCount
|
||||
: workerNodeConnectionEntry->regularConnectionsCount;
|
||||
bool remoteNodeLimitExceeded = currentConnectionsCount + 1 > currentConnectionsLimit;
|
||||
/*
|
||||
* For local nodes, solely relying on citus.max_shared_pool_size or
|
||||
* max_connections might not be sufficient. The former gives us
|
||||
|
@ -471,21 +455,33 @@ static bool isConnectionSlotAvailable(uint32 flags,
|
|||
* a reasonable pace. The latter limit typically kicks in when the database
|
||||
* is issued lots of concurrent sessions at the same time, such as benchmarks.
|
||||
*/
|
||||
bool localConnectionLimitExceeded =
|
||||
GetExternalClientBackendCount() + 1 > GetLocalSharedPoolSize() ||
|
||||
connectionEntry->count + 1 > GetLocalSharedPoolSize();
|
||||
if (remoteConnectionsForLocalQueriesDisabled || localConnectionLimitExceeded)
|
||||
bool localNodeLimitExceeded =
|
||||
connectionToLocalNode &&
|
||||
(GetLocalSharedPoolSize() == DISABLE_REMOTE_CONNECTIONS_FOR_LOCAL_QUERIES ||
|
||||
GetExternalClientBackendCount() + 1 > currentConnectionsLimit);
|
||||
if (remoteNodeLimitExceeded || localNodeLimitExceeded)
|
||||
{
|
||||
connectionSlotAvailable = false;
|
||||
}
|
||||
}
|
||||
else if (connectionEntry->count + 1 > (GetMaxSharedPoolSize() - maintenanceConnectionsQuota))
|
||||
{
|
||||
connectionSlotAvailable = false;
|
||||
}
|
||||
return connectionSlotAvailable;
|
||||
}
|
||||
|
||||
if (connectionSlotAvailable)
|
||||
{
|
||||
if (maintenanceConnection)
|
||||
{
|
||||
workerNodeConnectionEntry->maintenanceConnectionsCount += 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
workerNodeConnectionEntry->regularConnectionsCount += 1;
|
||||
}
|
||||
workerNodeDatabaseEntry->count += 1;
|
||||
}
|
||||
|
||||
UnLockConnectionSharedMemory();
|
||||
|
||||
return connectionSlotAvailable;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
|
@ -493,28 +489,23 @@ static bool isConnectionSlotAvailable(uint32 flags,
|
|||
* for the given hostname and port for the given count.
|
||||
*/
|
||||
void
|
||||
DecrementSharedConnectionCounter(const char *hostname, int port)
|
||||
DecrementSharedConnectionCounter(uint32 externalFlags, const char *hostname, int port)
|
||||
{
|
||||
/*
|
||||
* Do not call GetMaxSharedPoolSize() here, since it may read from
|
||||
* the catalog and we may be in the process exit handler.
|
||||
*/
|
||||
if (MaxSharedPoolSize == DISABLE_CONNECTION_THROTTLING)
|
||||
if (isConnectionThrottlingDisabled())
|
||||
{
|
||||
/* connection throttling disabled */
|
||||
return;
|
||||
}
|
||||
|
||||
LockConnectionSharedMemory(LW_EXCLUSIVE);
|
||||
|
||||
DecrementSharedConnectionCounterInternal(hostname, port);
|
||||
DecrementSharedConnectionCounterInternal(externalFlags, hostname, port);
|
||||
|
||||
UnLockConnectionSharedMemory();
|
||||
WakeupWaiterBackendsForSharedConnection();
|
||||
}
|
||||
|
||||
static void
|
||||
DecrementSharedConnectionCounterInternal(const char *hostname, int port)
|
||||
DecrementSharedConnectionCounterInternal(uint32 externalFlags, const char *hostname, int port)
|
||||
{
|
||||
bool workerNodeEntryFound = false;
|
||||
SharedWorkerNodeConnStatsHashKey workerNodeKey = PrepareWorkerNodeHashKey(hostname, port);
|
||||
|
@ -530,10 +521,17 @@ DecrementSharedConnectionCounterInternal(const char *hostname, int port)
|
|||
}
|
||||
|
||||
/* we should never go below 0 */
|
||||
Assert(workerNodeEntry->count > 0);
|
||||
Assert(workerNodeEntry->regularConnectionsCount > 0 || workerNodeEntry->maintenanceConnectionsCount > 0);
|
||||
|
||||
|
||||
workerNodeEntry->count -= 1;
|
||||
/* When GetSharedPoolSizeMaintenanceQuota() == 0, treat maintenance connections as regular */
|
||||
if ((GetSharedPoolSizeMaintenanceQuota() > 0 && (externalFlags & MAINTENANCE_CONNECTION)))
|
||||
{
|
||||
workerNodeEntry->maintenanceConnectionsCount -= 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
workerNodeEntry->regularConnectionsCount -= 1;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
|
@ -543,7 +541,7 @@ DecrementSharedConnectionCounterInternal(const char *hostname, int port)
|
|||
* not busy, and given the default value of MaxCachedConnectionsPerWorker = 1,
|
||||
* we're unlikely to trigger this often.
|
||||
*/
|
||||
if (workerNodeEntry->count == 0)
|
||||
if (workerNodeEntry->regularConnectionsCount == 0 && workerNodeEntry->maintenanceConnectionsCount == 0)
|
||||
{
|
||||
hash_search(SharedWorkerNodeConnStatsHash, &workerNodeKey, HASH_REMOVE, NULL);
|
||||
}
|
||||
|
@ -920,3 +918,12 @@ SharedWorkerNodeDatabaseHashCompare(const void *a, const void *b, Size keysize)
|
|||
ca->workerNodeKey.port != cb->workerNodeKey.port ||
|
||||
ca->database != cb->database;
|
||||
}
|
||||
|
||||
static bool isConnectionThrottlingDisabled()
|
||||
{
|
||||
/*
|
||||
* Do not call Get*PoolSize() functions here, since it may read from
|
||||
* the catalog and we may be in the process exit handler.
|
||||
*/
|
||||
return MaxSharedPoolSize == DISABLE_CONNECTION_THROTTLING;
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@
|
|||
/* Config variables managed via guc.c */
|
||||
char *WorkerListFileName;
|
||||
int MaxWorkerNodesTracked = 2048; /* determines worker node hash table size */
|
||||
int DatabasesPerWorker = 10; /* determine database per worker hash table size */
|
||||
int DatabasesPerWorker = 1; /* determine database per worker hash table size */
|
||||
|
||||
|
||||
/* Local functions forward declarations */
|
||||
|
|
|
@ -159,7 +159,7 @@ RecoverWorkerTransactions(WorkerNode *workerNode)
|
|||
|
||||
bool recoveryFailed = false;
|
||||
|
||||
int connectionFlags = 0;
|
||||
int connectionFlags = WAIT_FOR_CONNECTION | REQUIRE_MAINTENANCE_CONNECTION;
|
||||
MultiConnection *connection = GetNodeConnection(connectionFlags, nodeName, nodePort);
|
||||
if (connection->pgConn == NULL || PQstatus(connection->pgConn) != CONNECTION_OK)
|
||||
{
|
||||
|
|
|
@ -127,8 +127,8 @@ enum MultiConnectionMode
|
|||
|
||||
/*
|
||||
* This flag specifies that connection is required for maintenance operations, e.g.
|
||||
* transaction recovery, distributed deadlock detection. Such connections may have
|
||||
* special treatment, like dedicated share of pool, etc.
|
||||
* transaction recovery, distributed deadlock detection. Such connections have
|
||||
* a reserved quota of the MaxSharedPoolSize.
|
||||
*/
|
||||
REQUIRE_MAINTENANCE_CONNECTION = 1 << 9
|
||||
};
|
||||
|
@ -230,6 +230,9 @@ typedef struct MultiConnection
|
|||
/* replication option */
|
||||
bool requiresReplication;
|
||||
|
||||
/* See REQUIRE_MAINTENANCE_CONNECTION */
|
||||
bool useForMaintenanceOperations;
|
||||
|
||||
MultiConnectionStructInitializationState initializationState;
|
||||
} MultiConnection;
|
||||
|
||||
|
|
|
@ -41,7 +41,7 @@ extern double GetSharedPoolSizeMaintenanceQuota(void);
|
|||
extern int GetLocalSharedPoolSize(void);
|
||||
extern bool TryToIncrementSharedConnectionCounter(uint32 flags, const char *hostname, int port);
|
||||
extern void WaitLoopForSharedConnection(uint32 flags, const char *hostname, int port);
|
||||
extern void DecrementSharedConnectionCounter(const char *hostname, int port);
|
||||
extern void DecrementSharedConnectionCounter(uint32 externalFlags, const char *hostname, int port);
|
||||
extern void IncrementSharedConnectionCounter(uint32 flags, const char *hostname, int port);
|
||||
extern int AdaptiveConnectionManagementFlag(bool connectToLocalNode, int
|
||||
activeConnectionCount);
|
||||
|
|
|
@ -224,7 +224,7 @@ BEGIN;
|
|||
COMMIT;
|
||||
-- pg_sleep forces almost 1 connection per placement
|
||||
-- now, some of the optional connections would be skipped,
|
||||
-- and only 5 connections are used per node
|
||||
-- and only 4 connections (5 minus the maintenance quota) are used per node
|
||||
BEGIN;
|
||||
SET LOCAL citus.max_adaptive_executor_pool_size TO 16;
|
||||
with cte_1 as (select pg_sleep(0.1) is null, a from test) SELECT a from cte_1 ORDER By 1 LIMIT 1;
|
||||
|
@ -244,8 +244,8 @@ BEGIN;
|
|||
hostname, port;
|
||||
connection_count_to_node
|
||||
---------------------------------------------------------------------
|
||||
5
|
||||
5
|
||||
4
|
||||
4
|
||||
(2 rows)
|
||||
|
||||
COMMIT;
|
||||
|
@ -382,8 +382,8 @@ COPY test FROM PROGRAM 'seq 32';
|
|||
hostname, port;
|
||||
connection_count_to_node
|
||||
---------------------------------------------------------------------
|
||||
3
|
||||
3
|
||||
2
|
||||
2
|
||||
(2 rows)
|
||||
|
||||
ROLLBACK;
|
||||
|
@ -404,7 +404,7 @@ BEGIN;
|
|||
hostname, port;
|
||||
connection_count_to_node
|
||||
---------------------------------------------------------------------
|
||||
3
|
||||
2
|
||||
1
|
||||
(2 rows)
|
||||
|
||||
|
@ -423,7 +423,7 @@ COPY test FROM STDIN;
|
|||
hostname, port;
|
||||
connection_count_to_node
|
||||
---------------------------------------------------------------------
|
||||
3
|
||||
2
|
||||
1
|
||||
(2 rows)
|
||||
|
||||
|
@ -450,7 +450,7 @@ BEGIN;
|
|||
hostname, port;
|
||||
connection_count_to_node
|
||||
---------------------------------------------------------------------
|
||||
3
|
||||
2
|
||||
(1 row)
|
||||
|
||||
-- in this second COPY, we access the same node but different shards
|
||||
|
@ -468,7 +468,7 @@ COPY test FROM STDIN;
|
|||
hostname, port;
|
||||
connection_count_to_node
|
||||
---------------------------------------------------------------------
|
||||
3
|
||||
2
|
||||
1
|
||||
(2 rows)
|
||||
|
||||
|
|
|
@ -146,7 +146,7 @@ COMMIT;
|
|||
|
||||
-- pg_sleep forces almost 1 connection per placement
|
||||
-- now, some of the optional connections would be skipped,
|
||||
-- and only 5 connections are used per node
|
||||
-- and only 4 connections (5 minus the maintenance quota) are used per node
|
||||
BEGIN;
|
||||
SET LOCAL citus.max_adaptive_executor_pool_size TO 16;
|
||||
with cte_1 as (select pg_sleep(0.1) is null, a from test) SELECT a from cte_1 ORDER By 1 LIMIT 1;
|
||||
|
|
Loading…
Reference in New Issue