PG-286: Avoid duplicate queries in text buffer.

The memory area reserved for query text (pgsm_query_shared_buffer) was
divided evenly for each bucket, this allowed to have the same query,
e.g. "SELECT 1", duplicated in different buckets, thus wasting space.

This commit fix the query text duplication by adding a new hash table
whose only purpose is to verify if a given query is already added to the
buffer (by using the queryID).

This allows different buckets that share the same query to point to a
unique entry in the query buffer (pgss_qbuf).

When pg_stat_monitor moves to a new bucket id, by avoiding adding a
query that already exists in the buffer it can also save some CPU time.
This commit is contained in:
Diego Fronza
2021-11-29 11:09:47 -03:00
committed by Hamid Akhtar
parent b3c7ba8c60
commit c21a3de00d
3 changed files with 93 additions and 141 deletions

View File

@@ -87,9 +87,8 @@
#define MAX_QUERY_BUF (PGSM_QUERY_SHARED_BUFFER * 1024 * 1024)
#define MAX_BUCKETS_MEM (PGSM_MAX * 1024 * 1024)
#define BUCKETS_MEM_OVERFLOW() ((hash_get_num_entries(pgss_hash) * sizeof(pgssEntry)) >= MAX_BUCKETS_MEM)
#define MAX_QUERY_BUFFER_BUCKET MAX_QUERY_BUF / PGSM_MAX_BUCKETS
#define MAX_BUCKET_ENTRIES (MAX_BUCKETS_MEM / sizeof(pgssEntry))
#define QUERY_BUFFER_OVERFLOW(x,y) ((x + y + sizeof(uint64) + sizeof(uint64)) > MAX_QUERY_BUFFER_BUCKET)
#define QUERY_BUFFER_OVERFLOW(x,y) ((x + y + sizeof(uint64) + sizeof(uint64)) > MAX_QUERY_BUF)
#define QUERY_MARGIN 100
#define MIN_QUERY_LEN 10
#define SQLCODE_LEN 20
@@ -161,7 +160,7 @@ typedef enum AGG_KEY
#define MAX_QUERY_LEN 1024
/* shared nenory storage for the query */
/* shared memory storage for the query */
typedef struct CallTime
{
double total_time; /* total execution time, in msec */
@@ -171,21 +170,19 @@ typedef struct CallTime
double sum_var_time; /* sum of variances in execution time in msec */
} CallTime;
typedef struct pgssQueryHashKey
{
uint64 bucket_id; /* bucket number */
uint64 queryid; /* query identifier */
uint64 userid; /* user OID */
uint64 dbid; /* database OID */
uint64 ip; /* client ip address */
uint64 appid; /* hash of application name */
} pgssQueryHashKey;
/*
* Entry type for queries hash table (query ID).
*
* We use a hash table to keep track of query IDs that have their
* corresponding query text added to the query buffer (pgsm_query_shared_buffer).
*
* This allow us to avoid adding duplicated queries to the buffer, therefore
* leaving more space for other queries and saving some CPU.
*/
typedef struct pgssQueryEntry
{
pgssQueryHashKey key; /* hash key of entry - MUST BE FIRST */
uint64 pos; /* bucket number */
uint64 state;
uint64 queryid; /* query identifier, also the key. */
size_t query_pos; /* query location within query buffer */
} pgssQueryEntry;
typedef struct PlanInfo
@@ -208,10 +205,6 @@ typedef struct pgssHashKey
typedef struct QueryInfo
{
uint64 queryid; /* query identifier */
Oid userid; /* user OID */
Oid dbid; /* database OID */
uint host; /* client IP */
uint64 parentid; /* parent queryid of current query*/
int64 type; /* type of query, options are query, info, warning, error, fatal */
char application_name[APPLICATIONNAME_LEN];
@@ -311,7 +304,6 @@ typedef struct pgssSharedState
pg_atomic_uint64 current_wbucket;
pg_atomic_uint64 prev_bucket_usec;
uint64 bucket_entry[MAX_BUCKETS];
int64 query_buf_size_bucket;
char bucket_start_time[MAX_BUCKETS][60]; /* start time of the bucket */
} pgssSharedState;
@@ -382,21 +374,20 @@ int pgsm_get_bucket_size(void);
pgssSharedState* pgsm_get_ss(void);
HTAB *pgsm_get_plan_hash(void);
HTAB *pgsm_get_hash(void);
HTAB *pgsm_get_query_hash(void);
HTAB *pgsm_get_plan_hash(void);
void hash_entry_reset(void);
void hash_query_entryies_reset(void);
void hash_query_entries();
void hash_query_entry_dealloc(int new_bucket_id, int old_bucket_id, unsigned char *query_buffer[]);
void hash_entry_dealloc(int new_bucket_id, int old_bucket_id, unsigned char *query_buffer[]);
void hash_entry_dealloc(int new_bucket_id, int old_bucket_id, unsigned char *query_buffer);
pgssEntry* hash_entry_alloc(pgssSharedState *pgss, pgssHashKey *key, int encoding);
Size hash_memsize(void);
int read_query_buffer(int bucket_id, uint64 queryid, char *query_txt, size_t pos);
uint64 read_query(unsigned char *buf, uint64 queryid, char * query, size_t pos);
pgssQueryEntry* hash_find_query_entry(uint64 bucket_id, uint64 queryid, uint64 dbid, uint64 userid, uint64 ip, uint64 appid);
pgssQueryEntry* hash_create_query_entry(uint64 bucket_id, uint64 queryid, uint64 dbid, uint64 userid, uint64 ip, uint64 appid);
void pgss_startup(void);
void set_qbuf(int i, unsigned char *);
void set_qbuf(unsigned char *);
/* hash_query.c */
void pgss_startup(void);