From eafb2e89a8657b58771a8e776868611b8c591e64 Mon Sep 17 00:00:00 2001 From: Diego Fronza Date: Fri, 3 Sep 2021 14:11:58 -0400 Subject: [PATCH] PG-225: Fix deadlock in pgss_store. The deadlock scenario is describe below: 1. pgss_store is called, it acquires the lock pgss->lock. 2. An error ocurr, mostly out of memory when accessing internal hash tables used to store internal data, on functions pgss_store_query_info and pgss_get_entry. 3. Call elog() to report out of memory error. 4. Our pgsm_emit_log_hook is called, it calls pgss_store_error, which in turn calls pgss_store. 5. Try to acquire already acquired lock pgss->lock, deadlock happens. To fix the problem, there are two modifications worth mentioning done by this commit: 1. We are now passing HASH_ENTER_NULL flag to hash_search, instead of HASH_ENTER, as read in postgresql sources, this prevents this function from reporting error when out of memory, but instead it will only return NULL if we pass HASH_ENTER_NULL, so we can handle the error ourselves. 2. In pgss_store, if an error happens after the pgss->lock is acquired, we only set a flag, then, after releasing the lock, we check if the flag is set and report the error accordingly. --- hash_query.c | 4 ++-- pg_stat_monitor.c | 9 ++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/hash_query.c b/hash_query.c index 265b2a1..b74cada 100644 --- a/hash_query.c +++ b/hash_query.c @@ -147,7 +147,7 @@ hash_entry_alloc(pgssSharedState *pgss, pgssHashKey *key,int encoding) return NULL; } /* Find or create an entry with desired hash code */ - entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found); + entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER_NULL, &found); if (!found) { pgss->bucket_entry[pgss->current_wbucket]++; @@ -285,7 +285,7 @@ hash_create_query_entry(uint64 bucket_id, uint64 queryid, uint64 dbid, uint64 us key.ip = ip; key.appid = appid; - entry = (pgssQueryEntry *) hash_search(pgss_query_hash, &key, HASH_ENTER, &found); + entry = (pgssQueryEntry *) hash_search(pgss_query_hash, &key, HASH_ENTER_NULL, &found); return entry; } diff --git a/pg_stat_monitor.c b/pg_stat_monitor.c index 95a3efe..c9a8fe4 100644 --- a/pg_stat_monitor.c +++ b/pg_stat_monitor.c @@ -1499,6 +1499,7 @@ pgss_store(uint64 queryid, uint64 planid = plan_info ? plan_info->planid: 0; uint64 appid = djb2_hash((unsigned char *)application_name, application_name_len); char comments[512] = ""; + bool out_of_memory = false; /* Monitoring is disabled */ if (!PGSM_ENABLED) return; @@ -1532,7 +1533,7 @@ pgss_store(uint64 queryid, pgssQueryEntry *query_entry; query_entry = pgss_store_query_info(bucketid, queryid, dbid, userid, ip, appid, query, strlen(query), kind); if (query_entry == NULL) - elog(DEBUG1, "pg_stat_monitor: out of memory"); + out_of_memory = true; break; } case PGSS_ERROR: @@ -1543,13 +1544,13 @@ pgss_store(uint64 queryid, query_entry = pgss_store_query_info(bucketid, queryid, dbid, userid, ip, appid, query, strlen(query), kind); if (query_entry == NULL) { - elog(DEBUG1, "pg_stat_monitor: out of memory"); + out_of_memory = true; break; } entry = pgss_get_entry(bucketid, userid, dbid, queryid, ip, planid, appid); if (entry == NULL) { - elog(DEBUG1, "pg_stat_monitor: out of memory"); + out_of_memory = true; break; } @@ -1576,6 +1577,8 @@ pgss_store(uint64 queryid, break; } LWLockRelease(pgss->lock); + if (out_of_memory) + elog(DEBUG1, "pg_stat_monitor: out of memory"); } /* * Reset all statement statistics.