mirror of https://github.com/citusdata/citus.git
- Fix limits check for local nodes
- WIP test_multiple_databases_distributed_deadlock_detectionpull/7286/head
parent
bdc7bead09
commit
4312b0656b
|
@ -461,8 +461,8 @@ IncrementSharedConnectionCounterInternal(uint32 externalFlags,
|
||||||
currentConnectionsCount = workerNodeConnectionEntry->regularConnectionsCount;
|
currentConnectionsCount = workerNodeConnectionEntry->regularConnectionsCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool remoteNodeLimitExceeded = currentConnectionsCount + 1 >
|
bool currentConnectionsLimitExceeded = currentConnectionsCount + 1 >
|
||||||
currentConnectionsLimit;
|
currentConnectionsLimit;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For local nodes, solely relying on citus.max_shared_pool_size or
|
* For local nodes, solely relying on citus.max_shared_pool_size or
|
||||||
|
@ -476,11 +476,11 @@ IncrementSharedConnectionCounterInternal(uint32 externalFlags,
|
||||||
* a reasonable pace. The latter limit typically kicks in when the database
|
* a reasonable pace. The latter limit typically kicks in when the database
|
||||||
* is issued lots of concurrent sessions at the same time, such as benchmarks.
|
* is issued lots of concurrent sessions at the same time, such as benchmarks.
|
||||||
*/
|
*/
|
||||||
bool localNodeLimitExceeded =
|
bool localNodeConnectionsLimitExceeded =
|
||||||
connectionToLocalNode &&
|
connectionToLocalNode &&
|
||||||
(GetLocalSharedPoolSize() == DISABLE_REMOTE_CONNECTIONS_FOR_LOCAL_QUERIES ||
|
(GetLocalSharedPoolSize() == DISABLE_REMOTE_CONNECTIONS_FOR_LOCAL_QUERIES ||
|
||||||
GetExternalClientBackendCount() + 1 > currentConnectionsLimit);
|
GetExternalClientBackendCount() + 1 > GetLocalSharedPoolSize());
|
||||||
if (remoteNodeLimitExceeded || localNodeLimitExceeded)
|
if (currentConnectionsLimitExceeded || localNodeConnectionsLimitExceeded)
|
||||||
{
|
{
|
||||||
connectionSlotAvailable = false;
|
connectionSlotAvailable = false;
|
||||||
}
|
}
|
||||||
|
@ -502,9 +502,10 @@ IncrementSharedConnectionCounterInternal(uint32 externalFlags,
|
||||||
if (IsLoggableLevel(DEBUG4))
|
if (IsLoggableLevel(DEBUG4))
|
||||||
{
|
{
|
||||||
ereport(DEBUG4, errmsg(
|
ereport(DEBUG4, errmsg(
|
||||||
"Incrementing connection counter. "
|
"Incrementing %s connection counter. "
|
||||||
"Current regular connections: %i, maintenance connections: %i. "
|
"Current regular connections: %i, maintenance connections: %i. "
|
||||||
"Connection slot to %s:%i database %i is %s",
|
"Connection slot to %s:%i database %i is %s",
|
||||||
|
maintenanceConnection ? "maintenance" : "regular",
|
||||||
workerNodeConnectionEntry->regularConnectionsCount,
|
workerNodeConnectionEntry->regularConnectionsCount,
|
||||||
workerNodeConnectionEntry->maintenanceConnectionsCount,
|
workerNodeConnectionEntry->maintenanceConnectionsCount,
|
||||||
hostname,
|
hostname,
|
||||||
|
@ -568,7 +569,8 @@ DecrementSharedConnectionCounterInternal(uint32 externalFlags,
|
||||||
Assert(workerNodeConnectionEntry->regularConnectionsCount > 0 ||
|
Assert(workerNodeConnectionEntry->regularConnectionsCount > 0 ||
|
||||||
workerNodeConnectionEntry->maintenanceConnectionsCount > 0);
|
workerNodeConnectionEntry->maintenanceConnectionsCount > 0);
|
||||||
|
|
||||||
if (externalFlags & MAINTENANCE_CONNECTION)
|
bool maintenanceConnection = externalFlags & MAINTENANCE_CONNECTION;
|
||||||
|
if (maintenanceConnection)
|
||||||
{
|
{
|
||||||
workerNodeConnectionEntry->maintenanceConnectionsCount -= 1;
|
workerNodeConnectionEntry->maintenanceConnectionsCount -= 1;
|
||||||
}
|
}
|
||||||
|
@ -580,9 +582,10 @@ DecrementSharedConnectionCounterInternal(uint32 externalFlags,
|
||||||
if (IsLoggableLevel(DEBUG4))
|
if (IsLoggableLevel(DEBUG4))
|
||||||
{
|
{
|
||||||
ereport(DEBUG4, errmsg(
|
ereport(DEBUG4, errmsg(
|
||||||
"Decrementing connection counter. "
|
"Decrementing %s connection counter. "
|
||||||
"Current regular connections: %i, maintenance connections: %i. "
|
"Current regular connections: %i, maintenance connections: %i. "
|
||||||
"Connection slot to %s:%i database %i is released",
|
"Connection slot to %s:%i database %i is released",
|
||||||
|
maintenanceConnection ? "maintenance" : "regular",
|
||||||
workerNodeConnectionEntry->regularConnectionsCount,
|
workerNodeConnectionEntry->regularConnectionsCount,
|
||||||
workerNodeConnectionEntry->maintenanceConnectionsCount,
|
workerNodeConnectionEntry->maintenanceConnectionsCount,
|
||||||
hostname,
|
hostname,
|
||||||
|
|
|
@ -977,6 +977,14 @@ class Postgres(QueryRunner):
|
||||||
for config in configs:
|
for config in configs:
|
||||||
self.sql(f"alter system set {config}")
|
self.sql(f"alter system set {config}")
|
||||||
|
|
||||||
|
def reset_configuration(self, *configs):
|
||||||
|
"""Reset specific Postgres settings using ALTER SYSTEM RESET
|
||||||
|
NOTE: after configuring a call to reload or restart is needed for the
|
||||||
|
settings to become effective.
|
||||||
|
"""
|
||||||
|
for config in configs:
|
||||||
|
self.sql(f"alter system reset {config}")
|
||||||
|
|
||||||
def log_handle(self):
|
def log_handle(self):
|
||||||
"""Returns the opened logfile at the current end of the log
|
"""Returns the opened logfile at the current end of the log
|
||||||
|
|
||||||
|
|
|
@ -10,29 +10,38 @@ DATABASES_NUMBER = 40
|
||||||
async def test_multiple_databases_distributed_deadlock_detection(cluster):
|
async def test_multiple_databases_distributed_deadlock_detection(cluster):
|
||||||
# Disable maintenance on all nodes
|
# Disable maintenance on all nodes
|
||||||
for node in cluster.nodes:
|
for node in cluster.nodes:
|
||||||
node.sql("ALTER SYSTEM SET citus.recover_2pc_interval TO '-1';")
|
node.configure(
|
||||||
node.sql("ALTER SYSTEM SET citus.distributed_deadlock_detection_factor = '-1';")
|
"citus.recover_2pc_interval = '-1'",
|
||||||
node.sql("ALTER SYSTEM SET citus.max_maintenance_shared_pool_size = 10;")
|
"citus.distributed_deadlock_detection_factor = '-1'",
|
||||||
node.sql("SELECT pg_reload_conf();")
|
"citus.max_maintenance_shared_pool_size = 5",
|
||||||
|
# "log_min_messages = 'debug4'",
|
||||||
|
# "citus.main_db='postgres'"
|
||||||
|
)
|
||||||
|
node.restart()
|
||||||
|
|
||||||
# Prepare database names for test
|
# Prepare database names for test
|
||||||
db_names = [f'db{db_index}' for db_index in range(1, DATABASES_NUMBER + 1)]
|
db_names = [f"db{db_index}" for db_index in range(1, DATABASES_NUMBER + 1)]
|
||||||
|
|
||||||
# Create and configure databases
|
# Create and configure databases
|
||||||
for db_name in db_names:
|
for db_name in db_names:
|
||||||
nodes = cluster.workers + [cluster.coordinator]
|
nodes = cluster.workers + [cluster.coordinator]
|
||||||
for node in nodes:
|
for node in nodes:
|
||||||
node.sql(f'CREATE DATABASE {db_name}')
|
node.sql(f"CREATE DATABASE {db_name}")
|
||||||
with node.cur(dbname=db_name) as node_cursor:
|
with node.cur(dbname=db_name) as node_cursor:
|
||||||
node_cursor.execute("CREATE EXTENSION citus;")
|
node_cursor.execute("CREATE EXTENSION citus;")
|
||||||
if node == cluster.coordinator:
|
if node == cluster.coordinator:
|
||||||
for worker in cluster.workers:
|
for worker in cluster.workers:
|
||||||
node_cursor.execute(f"SELECT citus_add_node('localhost', {worker.port});")
|
node_cursor.execute(
|
||||||
node_cursor.execute("""
|
"SELECT pg_catalog.citus_add_node(%s, %s)",
|
||||||
|
(worker.host, worker.port),
|
||||||
|
)
|
||||||
|
node_cursor.execute(
|
||||||
|
"""
|
||||||
CREATE TABLE public.deadlock_detection_test (user_id int UNIQUE, some_val int);
|
CREATE TABLE public.deadlock_detection_test (user_id int UNIQUE, some_val int);
|
||||||
SELECT create_distributed_table('public.deadlock_detection_test', 'user_id');
|
SELECT create_distributed_table('public.deadlock_detection_test', 'user_id');
|
||||||
INSERT INTO public.deadlock_detection_test SELECT i, i FROM generate_series(1,2) i;
|
INSERT INTO public.deadlock_detection_test SELECT i, i FROM generate_series(1,2) i;
|
||||||
""")
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
print("Setup is done")
|
print("Setup is done")
|
||||||
|
|
||||||
|
@ -40,26 +49,43 @@ async def test_multiple_databases_distributed_deadlock_detection(cluster):
|
||||||
"""Function to prepare a deadlock query in a given database"""
|
"""Function to prepare a deadlock query in a given database"""
|
||||||
# Init connections and store for later commits
|
# Init connections and store for later commits
|
||||||
if run_on_coordinator:
|
if run_on_coordinator:
|
||||||
first_connection = await cluster.coordinator.aconn(dbname=db_name, autocommit=False)
|
first_connection = await cluster.coordinator.aconn(
|
||||||
|
dbname=db_name, autocommit=False
|
||||||
|
)
|
||||||
first_cursor = first_connection.cursor()
|
first_cursor = first_connection.cursor()
|
||||||
second_connection = await cluster.coordinator.aconn(dbname=db_name, autocommit=False)
|
second_connection = await cluster.coordinator.aconn(
|
||||||
|
dbname=db_name, autocommit=False
|
||||||
|
)
|
||||||
second_cursor = second_connection.cursor()
|
second_cursor = second_connection.cursor()
|
||||||
else:
|
else:
|
||||||
first_connection = await cluster.workers[0].aconn(dbname=db_name, autocommit=False)
|
first_connection = await cluster.workers[0].aconn(
|
||||||
|
dbname=db_name, autocommit=False
|
||||||
|
)
|
||||||
first_cursor = first_connection.cursor()
|
first_cursor = first_connection.cursor()
|
||||||
second_connection = await cluster.workers[1].aconn(dbname=db_name, autocommit=False)
|
second_connection = await cluster.workers[1].aconn(
|
||||||
|
dbname=db_name, autocommit=False
|
||||||
|
)
|
||||||
second_cursor = second_connection.cursor()
|
second_cursor = second_connection.cursor()
|
||||||
|
|
||||||
# initiate deadlock
|
# initiate deadlock
|
||||||
await first_cursor.execute("UPDATE public.deadlock_detection_test SET some_val = 1 WHERE user_id = 1;")
|
await first_cursor.execute(
|
||||||
await second_cursor.execute("UPDATE public.deadlock_detection_test SET some_val = 2 WHERE user_id = 2;")
|
"UPDATE public.deadlock_detection_test SET some_val = 1 WHERE user_id = 1;"
|
||||||
|
)
|
||||||
|
await second_cursor.execute(
|
||||||
|
"UPDATE public.deadlock_detection_test SET some_val = 2 WHERE user_id = 2;"
|
||||||
|
)
|
||||||
|
|
||||||
# Test that deadlock is resolved by a maintenance daemon
|
# Test that deadlock is resolved by a maintenance daemon
|
||||||
with pytest.raises(DeadlockDetected):
|
with pytest.raises(DeadlockDetected):
|
||||||
|
|
||||||
async def run_deadlocked_queries():
|
async def run_deadlocked_queries():
|
||||||
await asyncio.gather(
|
await asyncio.gather(
|
||||||
second_cursor.execute("UPDATE public.deadlock_detection_test SET some_val = 2 WHERE user_id = 1;"),
|
second_cursor.execute(
|
||||||
first_cursor.execute("UPDATE public.deadlock_detection_test SET some_val = 1 WHERE user_id = 2;")
|
"UPDATE public.deadlock_detection_test SET some_val = 2 WHERE user_id = 1;"
|
||||||
|
),
|
||||||
|
first_cursor.execute(
|
||||||
|
"UPDATE public.deadlock_detection_test SET some_val = 1 WHERE user_id = 2;"
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
await asyncio.wait_for(run_deadlocked_queries(), 300)
|
await asyncio.wait_for(run_deadlocked_queries(), 300)
|
||||||
|
@ -72,16 +98,18 @@ async def test_multiple_databases_distributed_deadlock_detection(cluster):
|
||||||
# Check that queries are deadlocked
|
# Check that queries are deadlocked
|
||||||
databases_with_deadlock = set()
|
databases_with_deadlock = set()
|
||||||
while len(databases_with_deadlock) < DATABASES_NUMBER:
|
while len(databases_with_deadlock) < DATABASES_NUMBER:
|
||||||
for db_name in (db for db in db_names if
|
for db_name in (db for db in db_names if db not in databases_with_deadlock):
|
||||||
db not in databases_with_deadlock):
|
|
||||||
for node in cluster.nodes:
|
for node in cluster.nodes:
|
||||||
async with node.acur(dbname=db_name) as cursor:
|
async with node.acur(dbname=db_name) as cursor:
|
||||||
expected_lock_count = 4 if node == cluster.coordinator else 2
|
expected_lock_count = 4 if node == cluster.coordinator else 2
|
||||||
await cursor.execute(f"""
|
await cursor.execute(
|
||||||
SELECT count(*) = {expected_lock_count} AS deadlock_created
|
"""
|
||||||
|
SELECT count(*) = %s AS deadlock_created
|
||||||
FROM pg_locks
|
FROM pg_locks
|
||||||
INNER JOIN pg_class pc ON relation = oid
|
INNER JOIN pg_class pc ON relation = oid
|
||||||
WHERE relname LIKE 'deadlock_detection_test%'""")
|
WHERE relname LIKE 'deadlock_detection_test%%'""",
|
||||||
|
(expected_lock_count,),
|
||||||
|
)
|
||||||
queries_deadlocked = await cursor.fetchone()
|
queries_deadlocked = await cursor.fetchone()
|
||||||
if queries_deadlocked[0]:
|
if queries_deadlocked[0]:
|
||||||
print(f"Queries are deadlocked on {db_name}")
|
print(f"Queries are deadlocked on {db_name}")
|
||||||
|
@ -91,14 +119,18 @@ async def test_multiple_databases_distributed_deadlock_detection(cluster):
|
||||||
|
|
||||||
# Enable maintenance back
|
# Enable maintenance back
|
||||||
for node in cluster.nodes:
|
for node in cluster.nodes:
|
||||||
node.sql("ALTER SYSTEM RESET citus.recover_2pc_interval;")
|
node.reset_configuration(
|
||||||
node.sql("ALTER SYSTEM RESET citus.distributed_deadlock_detection_factor;")
|
"citus.recover_2pc_interval",
|
||||||
node.sql("SELECT pg_reload_conf();")
|
"citus.distributed_deadlock_detection_factor",
|
||||||
|
)
|
||||||
|
node.reload()
|
||||||
|
|
||||||
tasks = list()
|
tasks = list()
|
||||||
for idx, db_name in enumerate(db_names):
|
for idx, db_name in enumerate(db_names):
|
||||||
run_on_coordinator = True if idx % 3 == 0 else False
|
run_on_coordinator = True if idx % 3 == 0 else False
|
||||||
tasks.append(test_deadlock(db_name=db_name, run_on_coordinator=run_on_coordinator))
|
tasks.append(
|
||||||
|
test_deadlock(db_name=db_name, run_on_coordinator=run_on_coordinator)
|
||||||
|
)
|
||||||
|
|
||||||
tasks.append(enable_maintenance_when_deadlocks_ready())
|
tasks.append(enable_maintenance_when_deadlocks_ready())
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue