PG-225: Fix deadlock in pgss_store.

The deadlock scenario is describe below:
1. pgss_store is called, it acquires the lock pgss->lock.
2. An error ocurr, mostly out of memory when accessing internal hash
   tables used to store internal data, on functions
   pgss_store_query_info and pgss_get_entry.
3. Call elog() to report out of memory error.
4. Our pgsm_emit_log_hook is called, it calls pgss_store_error, which in
   turn calls pgss_store.
5. Try to acquire already acquired lock pgss->lock, deadlock happens.

To fix the problem, there are two modifications worth mentioning done by
this commit:
1. We are now passing HASH_ENTER_NULL flag to hash_search, instead of
   HASH_ENTER, as read in postgresql sources, this prevents this
   function from reporting error when out of memory, but instead it will
   only return NULL if we pass HASH_ENTER_NULL, so we can handle the
   error ourselves.
2. In pgss_store, if an error happens after the pgss->lock is acquired,
   we only set a flag, then, after releasing the lock, we check if the
   flag is set and report the error accordingly.
pull/101/head
Diego Fronza 2021-09-03 14:11:58 -04:00
parent e541633670
commit eafb2e89a8
2 changed files with 8 additions and 5 deletions

View File

@ -147,7 +147,7 @@ hash_entry_alloc(pgssSharedState *pgss, pgssHashKey *key,int encoding)
return NULL; return NULL;
} }
/* Find or create an entry with desired hash code */ /* Find or create an entry with desired hash code */
entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found); entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER_NULL, &found);
if (!found) if (!found)
{ {
pgss->bucket_entry[pgss->current_wbucket]++; pgss->bucket_entry[pgss->current_wbucket]++;
@ -285,7 +285,7 @@ hash_create_query_entry(uint64 bucket_id, uint64 queryid, uint64 dbid, uint64 us
key.ip = ip; key.ip = ip;
key.appid = appid; key.appid = appid;
entry = (pgssQueryEntry *) hash_search(pgss_query_hash, &key, HASH_ENTER, &found); entry = (pgssQueryEntry *) hash_search(pgss_query_hash, &key, HASH_ENTER_NULL, &found);
return entry; return entry;
} }

View File

@ -1499,6 +1499,7 @@ pgss_store(uint64 queryid,
uint64 planid = plan_info ? plan_info->planid: 0; uint64 planid = plan_info ? plan_info->planid: 0;
uint64 appid = djb2_hash((unsigned char *)application_name, application_name_len); uint64 appid = djb2_hash((unsigned char *)application_name, application_name_len);
char comments[512] = ""; char comments[512] = "";
bool out_of_memory = false;
/* Monitoring is disabled */ /* Monitoring is disabled */
if (!PGSM_ENABLED) if (!PGSM_ENABLED)
return; return;
@ -1532,7 +1533,7 @@ pgss_store(uint64 queryid,
pgssQueryEntry *query_entry; pgssQueryEntry *query_entry;
query_entry = pgss_store_query_info(bucketid, queryid, dbid, userid, ip, appid, query, strlen(query), kind); query_entry = pgss_store_query_info(bucketid, queryid, dbid, userid, ip, appid, query, strlen(query), kind);
if (query_entry == NULL) if (query_entry == NULL)
elog(DEBUG1, "pg_stat_monitor: out of memory"); out_of_memory = true;
break; break;
} }
case PGSS_ERROR: case PGSS_ERROR:
@ -1543,13 +1544,13 @@ pgss_store(uint64 queryid,
query_entry = pgss_store_query_info(bucketid, queryid, dbid, userid, ip, appid, query, strlen(query), kind); query_entry = pgss_store_query_info(bucketid, queryid, dbid, userid, ip, appid, query, strlen(query), kind);
if (query_entry == NULL) if (query_entry == NULL)
{ {
elog(DEBUG1, "pg_stat_monitor: out of memory"); out_of_memory = true;
break; break;
} }
entry = pgss_get_entry(bucketid, userid, dbid, queryid, ip, planid, appid); entry = pgss_get_entry(bucketid, userid, dbid, queryid, ip, planid, appid);
if (entry == NULL) if (entry == NULL)
{ {
elog(DEBUG1, "pg_stat_monitor: out of memory"); out_of_memory = true;
break; break;
} }
@ -1576,6 +1577,8 @@ pgss_store(uint64 queryid,
break; break;
} }
LWLockRelease(pgss->lock); LWLockRelease(pgss->lock);
if (out_of_memory)
elog(DEBUG1, "pg_stat_monitor: out of memory");
} }
/* /*
* Reset all statement statistics. * Reset all statement statistics.