From 1d4b0036e7650fe273cfe32f17b13363dc64062f Mon Sep 17 00:00:00 2001 From: Hamid Akhtar Date: Sun, 19 Feb 2023 05:09:33 +0500 Subject: [PATCH] Fixing issue with server crash because of get_database_name without a transaction. Also, added out of memory warning to log as well as pg_stat_monitor_internal view --- hash_query.c | 10 +++++++++- pg_stat_monitor.c | 41 ++++++++++++++++++++++++++++++----------- pg_stat_monitor.h | 2 ++ 3 files changed, 41 insertions(+), 12 deletions(-) diff --git a/hash_query.c b/hash_query.c index d59164a..98670a2 100644 --- a/hash_query.c +++ b/hash_query.c @@ -105,6 +105,7 @@ pgsm_startup(void) dsa_area *dsa; char *p = (char *) pgsm; + pgsm->pgsm_oom = false; pgsm->lock = &(GetNamedLWLockTranche("pg_stat_monitor"))->lock; SpinLockInit(&pgsm->mutex); ResetSharedState(pgsm); @@ -332,7 +333,8 @@ hash_entry_dealloc(int new_bucket_id, int old_bucket_id, unsigned char *query_bu if (DsaPointerIsValid(parent_qdsa)) dsa_free(pgsmStateLocal.dsa, parent_qdsa); - continue; + + pgsmStateLocal.shared_pgsmState->pgsm_oom = false; } } pgsm_hash_seq_term(&hstat); @@ -344,6 +346,12 @@ IsHashInitialize(void) return (pgsmStateLocal.shared_pgsmState != NULL); } +bool +IsSystemOOM(void) +{ + return (IsHashInitialize() && pgsmStateLocal.shared_pgsmState->pgsm_oom); +} + /* * pgsm_* functions are just wrapper functions over the hash table standard * API and call the appropriate hash table function based on USE_DYNAMIC_HASH diff --git a/pg_stat_monitor.c b/pg_stat_monitor.c index b346442..91a6f4e 100644 --- a/pg_stat_monitor.c +++ b/pg_stat_monitor.c @@ -228,6 +228,7 @@ static void RecordConstLocation(JumbleState *jstate, int location); * relevant part of the string. */ static const char *CleanQuerytext(const char *query, int *location, int *len); +static uint64 get_query_id(JumbleState *jstate, Query *query); #endif static char *generate_normalized_query(JumbleState *jstate, const char *query, @@ -237,10 +238,6 @@ static int comp_location(const void *a, const void *b); static uint64 get_next_wbucket(pgsmSharedState *pgsm); -#if PG_VERSION_NUM < 140000 -static uint64 get_query_id(JumbleState *jstate, Query *query); -#endif - /* * Module load callback */ @@ -1691,11 +1688,15 @@ pgsm_create_hash_entry(MemoryContext context, uint64 bucket_id, uint64 queryid, entry->key.toplevel = ((exec_nested_level + plan_nested_level) == 0); #endif - datname = get_database_name(entry->key.dbid); + if (IsTransactionState()) + { + datname = get_database_name(entry->key.dbid); + username = GetUserNameFromId(entry->key.userid, true); + } + if (!datname) datname = pnstrdup("", sizeof(entry->datname) - 1); - username = GetUserNameFromId(entry->key.userid, true); if (!username) username = pnstrdup("", sizeof(entry->username) - 1); @@ -1804,6 +1805,18 @@ pgsm_store(pgsmEntry *entry) if (shared_hash_entry == NULL) { + /* Out of memory; report only if the state has changed now. Otherwise we risk filling up the log file with these message. */ + if (!IsSystemOOM()) + { + pgsm->pgsm_oom = true; + + ereport(WARNING, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("[pg_stat_monitor] pgsm_store: Hash table is out of memory and can no longer store queries!"), + errdetail("You may reset the view or when the buckets are deallocated, pg_stat_monitor will resume saving " \ + "queries. Alternatively, try increasing the value of pg_stat_monitor.pgsm_max."))); + } + LWLockRelease(pgsm->lock); if (DsaPointerIsValid(dsa_query_pointer)) @@ -1968,6 +1981,14 @@ pg_stat_monitor_internal(FunctionCallInfo fcinfo, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("[pg_stat_monitor] pg_stat_monitor_internal: Must be loaded via shared_preload_libraries."))); + /* Out of memory? */ + if (IsSystemOOM()) + ereport(WARNING, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("[pg_stat_monitor] pg_stat_monitor_internal: Hash table is out of memory and can no longer store queries!"), + errdetail("You may reset the view or when the buckets are deallocated, pg_stat_monitor will resume saving " \ + "queries. Alternatively, try increasing the value of pg_stat_monitor.pgsm_max."))); + /* check to see if caller supports us returning a tuplestore */ if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) ereport(ERROR, @@ -1979,8 +2000,6 @@ pg_stat_monitor_internal(FunctionCallInfo fcinfo, errmsg("[pg_stat_monitor] pg_stat_monitor_internal: Materialize mode required, but it is not " \ "allowed in this context."))); - pgsm = pgsm_get_ss(); - /* Switch into long-lived context to construct returned data structures */ per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; oldcontext = MemoryContextSwitchTo(per_query_ctx); @@ -1999,8 +2018,8 @@ pg_stat_monitor_internal(FunctionCallInfo fcinfo, MemoryContextSwitchTo(oldcontext); + pgsm = pgsm_get_ss(); LWLockAcquire(pgsm->lock, LW_SHARED); - pgsm_hash_seq_init(&hstat, get_pgsmHash(), false); while ((entry = pgsm_hash_seq_next(&hstat)) != NULL) @@ -3410,7 +3429,8 @@ pgsm_emit_log_hook(ErrorData *edata) if (MyProc == NULL) goto exit; - if (PGSM_ERROR_CAPTURE_ENABLED && edata->elevel >= WARNING) + /* Do not store */ + if (PGSM_ERROR_CAPTURE_ENABLED && edata->elevel >= WARNING && IsSystemOOM() == false) { pgsm_store_error(debug_query_string ? debug_query_string : "", edata); @@ -3426,7 +3446,6 @@ IsSystemInitialized(void) return (system_init && IsHashInitialize()); } - static double time_diff(struct timeval end, struct timeval start) { diff --git a/pg_stat_monitor.h b/pg_stat_monitor.h index 39919b7..5676dab 100644 --- a/pg_stat_monitor.h +++ b/pg_stat_monitor.h @@ -417,6 +417,7 @@ typedef struct pgsmSharedState * classic shared memory hash or dshash * (if we are using USE_DYNAMIC_HASH) */ + bool pgsm_oom; } pgsmSharedState; typedef struct pgsmLocalState @@ -484,6 +485,7 @@ PGSM_HASH_TABLE *get_pgsmHash(void); void pgsm_attach_shmem(void); bool IsHashInitialize(void); +bool IsSystemOOM(void); void pgsm_shmem_startup(void); void pgsm_shmem_shutdown(int code, Datum arg); int pgsm_get_bucket_size(void);