diff --git a/hash_query.c b/hash_query.c index 25a7b8a..8cc3bf8 100644 --- a/hash_query.c +++ b/hash_query.c @@ -32,6 +32,12 @@ static dshash_parameters dsh_params = { }; #endif +/* + * Returns the shared memory area size for storing the query texts. + * USE_DYNAMIC_HASH also creates the hash table in the same memory space, + * so add the required bucket memory size to the query text area size + */ + static Size pgsm_query_area_size(void) { @@ -42,7 +48,9 @@ pgsm_query_area_size(void) #endif return MAXALIGN(sz); } - +/* + * Total shared memory area required by pgsm + */ Size pgsm_ShmemSize(void) { @@ -56,6 +64,12 @@ pgsm_ShmemSize(void) return MAXALIGN(sz); } +/* + * Returns the shared memory area size for storing the query texts and pgsm + * shared state structure, + * Moreover, for USE_DYNAMIC_HASH, both the hash table and raw query text area + * get allocated as a single shared memory chunk. + */ static Size pgsm_get_shared_area_size(void) { @@ -105,13 +119,16 @@ pgss_startup(void) pgss->hash_handle = pgsm_create_bucket_hash(pgss,dsa); + /* If overflow is enabled, set the DSA size to unlimited, + * and allow the DSA to grow beyond the shared memory space + * into the swap area*/ if (PGSM_OVERFLOW_TARGET == OVERFLOW_TARGET_DISK) dsa_set_size_limit(dsa, -1); pgsmStateLocal.shared_pgssState = pgss; /* - * Postmaster will never access these again, thus free the local - * dsa/dshash references. + * Postmaster will never access the dsa again, thus free it's local + * references. */ dsa_detach(dsa); } @@ -129,6 +146,9 @@ pgss_startup(void) on_shmem_exit(pgss_shmem_shutdown, (Datum) 0); } +/* + * Create the classic or dshahs hash table for storing the query statistics. + */ static PGSM_HASH_TABLE_HANDLE pgsm_create_bucket_hash(pgssSharedState *pgss, dsa_area *dsa) { @@ -151,6 +171,15 @@ pgsm_create_bucket_hash(pgssSharedState *pgss, dsa_area *dsa) return bucket_hash; } +/* + * Attach to a DSA area created by the postmaster, in the case of + * USE_DYNAMIC_HASH, also attach the local dshash handle to + * the dshash created by the postmaster. + * + * Note: The dsa area and dshash for the process may be mapped at a + * different virtual address in this process. + * + */ void pgsm_attach_shmem(void) { @@ -158,10 +187,17 @@ pgsm_attach_shmem(void) if (pgsmStateLocal.dsa) return; + /* + * We want the dsa to remain valid throughout the lifecycle of this process. + * so switch to TopMemoryContext before attaching + */ oldcontext = MemoryContextSwitchTo(TopMemoryContext); pgsmStateLocal.dsa = dsa_attach_in_place(pgsmStateLocal.shared_pgssState->raw_dsa_area, NULL); + /* pin the attached area to keep the area attached until end of + * session or explicit detach. + */ dsa_pin_mapping(pgsmStateLocal.dsa); #if USE_DYNAMIC_HASH @@ -447,7 +483,10 @@ IsHashInitialize(void) return (pgsmStateLocal.shared_pgssState != NULL); } -/* hash function port based on USE_DYNAMIC_HASH */ +/* + * pgsm_* functions are just wrapper functions over the hash table standard + * API and call the appropriate hash table function based on USE_DYNAMIC_HASH + */ void * pgsm_hash_find_or_insert(PGSM_HASH_TABLE *shared_hash, pgssHashKey *key, bool* found) diff --git a/pg_stat_monitor.c b/pg_stat_monitor.c index 134a999..93bf5d6 100644 --- a/pg_stat_monitor.c +++ b/pg_stat_monitor.c @@ -1327,16 +1327,20 @@ pgss_update_entry(pgssEntry *entry, strlen(nested_query_txts[exec_nested_level - 1]): 0; e->counters.info.parentid = nested_queryids[exec_nested_level - 1]; e->counters.info.parent_query = InvalidDsaPointer; + /* If we have a parent query, store it in the raw dsa area */ if (parent_query_len > 0) { char *qry_buff; dsa_area *query_dsa_area = get_dsa_area_for_query_text(); + /* Use dsa_allocate_extended with DSA_ALLOC_NO_OOM flag, as we don't want to get an + * error if memory allocation fails.*/ dsa_pointer qry = dsa_allocate_extended(query_dsa_area, parent_query_len+1, DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO); if (DsaPointerIsValid(qry)) { qry_buff = dsa_get_address(query_dsa_area, qry); memcpy(qry_buff, nested_query_txts[exec_nested_level - 1], parent_query_len); qry_buff[parent_query_len] = 0; + /* store the dsa pointer for parent query text */ e->counters.info.parent_query = qry; } } @@ -1633,6 +1637,7 @@ pgss_store(uint64 queryid, pfree(norm_query); return; } + /* Get the memory address from DSA pointer and copy the query text in local variable */ query_buff = dsa_get_address(query_dsa_area, dsa_query_pointer); memcpy(query_buff, norm_query ? norm_query : query, query_len); /* OK to create a new hashtable entry */ diff --git a/pg_stat_monitor.h b/pg_stat_monitor.h index ced517c..6bcf089 100644 --- a/pg_stat_monitor.h +++ b/pg_stat_monitor.h @@ -107,6 +107,19 @@ /* Update this if need a enum GUC with more options. */ #define MAX_ENUM_OPTIONS 6 +/* + * API for disabling error capture ereport(ERROR,..) by PGSM's error capture hook + * pgsm_emit_log_hook() + * + * Use these macros as follows: + * PGSM_DISABLE_ERROR_CAPUTRE(); + * { + * ... code that might throw ereport(ERROR) ... + * }PGSM_END_DISABLE_ERROR_CAPTURE(); + * + * These macros can be used to error recursion if the error gets + * thrown from within the function called from pgsm_emit_log_hook() + */ extern volatile bool __pgsm_do_not_capture_error; #define PGSM_DISABLE_ERROR_CAPUTRE() \ do { \ @@ -119,6 +132,27 @@ extern volatile bool __pgsm_do_not_capture_error; #define PGSM_ERROR_CAPTURE_ENABLED \ __pgsm_do_not_capture_error == false +/* + * pg_stat_monitor uses the hash structure to store all query statistics + * except the query text, which gets stored out of line in the raw DSA area. + * Enabling USE_DYNAMIC_HASH uses the dshash for storing the query statistics + * that get created in the DSA area and can grow to any size. + * + * The only issue with using the dshash is that the newly created hash entries + * are explicitly locked by dshash, and its caller is required to release the lock. + * That works well as long as we do not want to swallow the errors thrown from + * dshash function. Since the lightweight locks acquired internally by dshash + * automatically get released by error. + * But throwing an error from pg_stat_monitor would mean erroring out the user query, + * which is not acceptable for any stat collector extension. + * + * Moreover, some of the pg_stat_monitor functions perform the sequence scan on the + * hash table, while the sequence scan support for dshash table is only available + * for PG 15 and onwards. + * So until we figure out the way to release the locks acquired internally by dshash + * in case of an error while ignoring the error at the same time, we will keep using + * the classic shared memory hash table. + */ #ifdef USE_DYNAMIC_HASH #define PGSM_HASH_TABLE dshash_table #define PGSM_HASH_TABLE_HANDLE dshash_table_handle @@ -355,28 +389,23 @@ typedef struct pgssSharedState TimestampTz bucket_start_time[MAX_BUCKETS]; /* start time of the bucket */ LWLock *errors_lock; /* protects errors hashtable * search/modification */ - /* - * These variables are used when pgsm_overflow_target is ON. - * - * overflow is set to true when the query buffer overflows. - * - * n_bucket_cycles counts the number of times we changed bucket since the - * query buffer overflowed. When it reaches pgsm_max_buckets we remove the - * dump file, also reset the counter. - * - * This allows us to avoid having a large file on disk that would also - * slowdown queries to the pg_stat_monitor view. - */ - size_t n_bucket_cycles; int hash_tranche_id; - void *raw_dsa_area; + void *raw_dsa_area; /* DSA area pointer to store query texts. + * dshash also lives in this memory when + * USE_DYNAMIC_HASH is enabled */ PGSM_HASH_TABLE_HANDLE hash_handle; + /* hash table handle. can be either + * classic shared memory hash or dshash + * (if we are using USE_DYNAMIC_HASH) + */ } pgssSharedState; typedef struct pgsmLocalState { pgssSharedState *shared_pgssState; - dsa_area *dsa; + dsa_area *dsa; /* local dsa area for backend attached to the + * dsa area created by postmaster at startup. + */ PGSM_HASH_TABLE *shared_hash; }pgsmLocalState;