522 lines
16 KiB
C
522 lines
16 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* pg_stat_monitor.h
|
|
* Track statement execution times across a whole database cluster.
|
|
*
|
|
* Portions Copyright © 2018-2024, Percona LLC and/or its affiliates
|
|
*
|
|
* Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
|
|
*
|
|
* Portions Copyright (c) 1994, The Regents of the University of California
|
|
*
|
|
* IDENTIFICATION
|
|
* contrib/pg_stat_monitor/pg_stat_monitor.h
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#ifndef __PG_STAT_MONITOR_H__
|
|
#define __PG_STAT_MONITOR_H__
|
|
|
|
#include "postgres.h"
|
|
|
|
#include <arpa/inet.h>
|
|
#include <math.h>
|
|
#include <sys/stat.h>
|
|
#include <unistd.h>
|
|
#include <time.h>
|
|
#include <sys/time.h>
|
|
#include <sys/resource.h>
|
|
|
|
#include "lib/dshash.h"
|
|
#include "utils/dsa.h"
|
|
|
|
#include "access/hash.h"
|
|
#include "catalog/pg_authid.h"
|
|
#include "executor/instrument.h"
|
|
#include "common/ip.h"
|
|
#include "jit/jit.h"
|
|
#include "funcapi.h"
|
|
#include "access/twophase.h"
|
|
#include "mb/pg_wchar.h"
|
|
#include "miscadmin.h"
|
|
#include "optimizer/planner.h"
|
|
#include "postmaster/bgworker.h"
|
|
#include "parser/analyze.h"
|
|
#include "parser/parsetree.h"
|
|
#include "parser/scanner.h"
|
|
#include "parser/scansup.h"
|
|
#include "pgstat.h"
|
|
#include "storage/fd.h"
|
|
#include "storage/ipc.h"
|
|
#include "storage/spin.h"
|
|
#include "tcop/utility.h"
|
|
#include "utils/acl.h"
|
|
#include "utils/builtins.h"
|
|
#include "utils/memutils.h"
|
|
#include "utils/timestamp.h"
|
|
#include "utils/lsyscache.h"
|
|
#include "utils/guc.h"
|
|
#include "utils/guc_tables.h"
|
|
#include "utils/memutils.h"
|
|
#include "utils/palloc.h"
|
|
|
|
|
|
#define MAX_BACKEND_PROCESES (MaxBackends + NUM_AUXILIARY_PROCS + max_prepared_xacts)
|
|
#define IntArrayGetTextDatum(x,y) intarray_get_datum(x,y)
|
|
|
|
/* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
|
|
#define USAGE_EXEC(duration) (1.0)
|
|
#define USAGE_INIT (1.0) /* including initial planning */
|
|
#define ASSUMED_LENGTH_INIT 1024 /* initial assumed mean query length */
|
|
#define USAGE_DECREASE_FACTOR (0.99) /* decreased every entry_dealloc */
|
|
#define STICKY_DECREASE_FACTOR (0.50) /* factor for sticky entries */
|
|
#define USAGE_DEALLOC_PERCENT 5 /* free this % of entries at once */
|
|
|
|
#define JUMBLE_SIZE 1024 /* query serialization buffer size */
|
|
|
|
#define HISTOGRAM_MAX_TIME 50000000
|
|
#define MAX_RESPONSE_BUCKET 50
|
|
#define INVALID_BUCKET_ID -1
|
|
#define TEXT_LEN 255
|
|
#define ERROR_MESSAGE_LEN 100
|
|
#define REL_TYPENAME_LEN 64
|
|
#define REL_LST 10
|
|
#define REL_LEN 132 /* REL_TYPENAME_LEN * 2 (relname + schema) + 1
|
|
* (for view indication) + 1 and dot and
|
|
* string terminator */
|
|
#define CMD_LST 10
|
|
#define CMD_LEN 20
|
|
#define APPLICATIONNAME_LEN NAMEDATALEN
|
|
#define COMMENTS_LEN 256
|
|
#define PGSM_OVER_FLOW_MAX 10
|
|
#define PLAN_TEXT_LEN 1024
|
|
/* the assumption of query max nested level */
|
|
#define DEFAULT_MAX_NESTED_LEVEL 10
|
|
|
|
#define MAX_QUERY_BUF ((int64)pgsm_query_shared_buffer * 1024 * 1024)
|
|
#define MAX_BUCKETS_MEM ((int64)pgsm_max * 1024 * 1024)
|
|
#define BUCKETS_MEM_OVERFLOW() ((hash_get_num_entries(pgsm_hash) * sizeof(pgsmEntry)) >= MAX_BUCKETS_MEM)
|
|
#define MAX_BUCKET_ENTRIES (MAX_BUCKETS_MEM / sizeof(pgsmEntry))
|
|
#define QUERY_BUFFER_OVERFLOW(x,y) ((x + y + sizeof(uint64) + sizeof(uint64)) > MAX_QUERY_BUF)
|
|
#define QUERY_MARGIN 100
|
|
#define MIN_QUERY_LEN 10
|
|
#define SQLCODE_LEN 20
|
|
#define TOTAL_RELS_LENGTH (REL_LST * REL_LEN)
|
|
|
|
#if PG_VERSION_NUM >= 130000
|
|
#define MAX_SETTINGS 15
|
|
#else
|
|
#define MAX_SETTINGS 14
|
|
#endif
|
|
|
|
/* Update this if need a enum GUC with more options. */
|
|
#define MAX_ENUM_OPTIONS 6
|
|
|
|
/*
|
|
* pg_stat_monitor uses the hash structure to store all query statistics
|
|
* except the query text, which gets stored out of line in the raw DSA area.
|
|
* Enabling USE_DYNAMIC_HASH uses the dshash for storing the query statistics
|
|
* that get created in the DSA area and can grow to any size.
|
|
*
|
|
* The only issue with using the dshash is that the newly created hash entries
|
|
* are explicitly locked by dshash, and its caller is required to release the lock.
|
|
* That works well as long as we do not want to swallow the errors thrown from
|
|
* dshash function. Since the lightweight locks acquired internally by dshash
|
|
* automatically get released by error.
|
|
* But throwing an error from pg_stat_monitor would mean erroring out the user query,
|
|
* which is not acceptable for any stat collector extension.
|
|
*
|
|
* Moreover, some of the pg_stat_monitor functions perform the sequence scan on the
|
|
* hash table, while the sequence scan support for dshash table is only available
|
|
* for PG 15 and onwards.
|
|
* So until we figure out the way to release the locks acquired internally by dshash
|
|
* in case of an error while ignoring the error at the same time, we will keep using
|
|
* the classic shared memory hash table.
|
|
*/
|
|
#ifdef USE_DYNAMIC_HASH
|
|
#define PGSM_HASH_TABLE dshash_table
|
|
#define PGSM_HASH_TABLE_HANDLE dshash_table_handle
|
|
#define PGSM_HASH_SEQ_STATUS dshash_seq_status
|
|
#else
|
|
#define PGSM_HASH_TABLE HTAB
|
|
#define PGSM_HASH_TABLE_HANDLE HTAB*
|
|
#define PGSM_HASH_SEQ_STATUS HASH_SEQ_STATUS
|
|
#endif
|
|
|
|
|
|
#if PG_VERSION_NUM < 130000
|
|
typedef struct WalUsage
|
|
{
|
|
long wal_records; /* # of WAL records produced */
|
|
long wal_fpi; /* # of WAL full page images produced */
|
|
uint64 wal_bytes; /* size of WAL records produced */
|
|
} WalUsage;
|
|
#endif
|
|
|
|
|
|
typedef enum pgsmStoreKind
|
|
{
|
|
PGSM_INVALID = -1,
|
|
|
|
/*
|
|
* PGSM_PLAN and PGSM_EXEC must be respectively 0 and 1 as they're used to
|
|
* reference the underlying values in the arrays in the Counters struct,
|
|
* and this order is required in pg_stat_monitor_internal().
|
|
*/
|
|
PGSM_PARSE = 0,
|
|
PGSM_PLAN,
|
|
PGSM_EXEC,
|
|
PGSM_STORE,
|
|
PGSM_ERROR,
|
|
|
|
PGSM_NUMKIND /* Must be last value of this enum */
|
|
} pgsmStoreKind;
|
|
|
|
/* the assumption of query max nested level */
|
|
#define DEFAULT_MAX_NESTED_LEVEL 10
|
|
|
|
/*
|
|
* Type of aggregate keys
|
|
*/
|
|
typedef enum AGG_KEY
|
|
{
|
|
AGG_KEY_DATABASE = 0,
|
|
AGG_KEY_USER,
|
|
AGG_KEY_HOST
|
|
} AGG_KEY;
|
|
|
|
#define MAX_QUERY_LEN 1024
|
|
|
|
/* shared memory storage for the query */
|
|
typedef struct CallTime
|
|
{
|
|
double total_time; /* total execution time, in msec */
|
|
double min_time; /* minimum execution time in msec */
|
|
double max_time; /* maximum execution time in msec */
|
|
double mean_time; /* mean execution time in msec */
|
|
double sum_var_time; /* sum of variances in execution time in msec */
|
|
} CallTime;
|
|
|
|
|
|
typedef struct PlanInfo
|
|
{
|
|
uint64 planid; /* plan identifier */
|
|
char plan_text[PLAN_TEXT_LEN]; /* plan text */
|
|
size_t plan_len; /* strlen(plan_text) */
|
|
} PlanInfo;
|
|
|
|
typedef struct pgsmHashKey
|
|
{
|
|
uint64 bucket_id; /* bucket number */
|
|
uint64 queryid; /* query identifier */
|
|
uint64 planid; /* plan identifier */
|
|
uint64 appid; /* hash of application name */
|
|
Oid userid; /* user OID */
|
|
Oid dbid; /* database OID */
|
|
uint32 ip; /* client ip address */
|
|
bool toplevel; /* query executed at top level */
|
|
uint64 parentid; /* parent queryid of current query */
|
|
} pgsmHashKey;
|
|
|
|
typedef struct QueryInfo
|
|
{
|
|
dsa_pointer parent_query;
|
|
int64 type; /* type of query, options are query, info,
|
|
* warning, error, fatal */
|
|
char application_name[APPLICATIONNAME_LEN];
|
|
char comments[COMMENTS_LEN];
|
|
char relations[REL_LST][REL_LEN]; /* List of relation involved
|
|
* in the query */
|
|
int num_relations; /* Number of relation in the query */
|
|
CmdType cmd_type; /* query command type
|
|
* SELECT/UPDATE/DELETE/INSERT */
|
|
} QueryInfo;
|
|
|
|
typedef struct ErrorInfo
|
|
{
|
|
int64 elevel; /* error elevel */
|
|
char sqlcode[SQLCODE_LEN]; /* error sqlcode */
|
|
char message[ERROR_MESSAGE_LEN]; /* error message text */
|
|
} ErrorInfo;
|
|
|
|
typedef struct Calls
|
|
{
|
|
int64 calls; /* # of times executed */
|
|
int64 rows; /* total # of retrieved or affected rows */
|
|
double usage; /* usage factor */
|
|
} Calls;
|
|
|
|
|
|
typedef struct Blocks
|
|
{
|
|
int64 shared_blks_hit; /* # of shared buffer hits */
|
|
int64 shared_blks_read; /* # of shared disk blocks read */
|
|
int64 shared_blks_dirtied; /* # of shared disk blocks dirtied */
|
|
int64 shared_blks_written; /* # of shared disk blocks written */
|
|
int64 local_blks_hit; /* # of local buffer hits */
|
|
int64 local_blks_read; /* # of local disk blocks read */
|
|
int64 local_blks_dirtied; /* # of local disk blocks dirtied */
|
|
int64 local_blks_written; /* # of local disk blocks written */
|
|
int64 temp_blks_read; /* # of temp blocks read */
|
|
int64 temp_blks_written; /* # of temp blocks written */
|
|
double shared_blk_read_time; /* time spent reading shared blocks,
|
|
* in msec */
|
|
double shared_blk_write_time; /* time spent writing shared blocks,
|
|
* in msec */
|
|
double local_blk_read_time; /* time spent reading local blocks, in
|
|
* msec */
|
|
double local_blk_write_time; /* time spent writing local blocks, in
|
|
* msec */
|
|
double temp_blk_read_time; /* time spent reading temp blocks, in msec */
|
|
double temp_blk_write_time; /* time spent writing temp blocks, in
|
|
* msec */
|
|
|
|
/*
|
|
* Variables for local entry. The values to be passed to pgsm_update_entry
|
|
* from pgsm_store.
|
|
*/
|
|
instr_time instr_shared_blk_read_time; /* time spent reading shared
|
|
* blocks */
|
|
instr_time instr_shared_blk_write_time; /* time spent writing shared
|
|
* blocks */
|
|
instr_time instr_local_blk_read_time; /* time spent reading local blocks */
|
|
instr_time instr_local_blk_write_time; /* time spent writing local blocks */
|
|
instr_time instr_temp_blk_read_time; /* time spent reading temp blocks */
|
|
instr_time instr_temp_blk_write_time; /* time spent writing temp blocks */
|
|
} Blocks;
|
|
|
|
typedef struct JitInfo
|
|
{
|
|
int64 jit_functions; /* total number of JIT functions emitted */
|
|
double jit_generation_time; /* total time to generate jit code */
|
|
int64 jit_inlining_count; /* number of times inlining time has been
|
|
* > 0 */
|
|
double jit_deform_time; /* total time to deform tuples in jit code */
|
|
int64 jit_deform_count; /* number of times deform time has been >
|
|
* 0 */
|
|
double jit_inlining_time; /* total time to inline jit code */
|
|
int64 jit_optimization_count; /* number of times optimization time
|
|
* has been > 0 */
|
|
double jit_optimization_time; /* total time to optimize jit code */
|
|
int64 jit_emission_count; /* number of times emission time has been
|
|
* > 0 */
|
|
double jit_emission_time; /* total time to emit jit code */
|
|
|
|
/*
|
|
* Variables for local entry. The values to be passed to pgsm_update_entry
|
|
* from pgsm_store.
|
|
*/
|
|
instr_time instr_generation_counter; /* generation counter */
|
|
instr_time instr_inlining_counter; /* inlining counter */
|
|
instr_time instr_deform_counter; /* deform counter */
|
|
instr_time instr_optimization_counter; /* optimization counter */
|
|
instr_time instr_emission_counter; /* emission counter */
|
|
} JitInfo;
|
|
|
|
typedef struct SysInfo
|
|
{
|
|
double utime; /* user cpu time */
|
|
double stime; /* system cpu time */
|
|
} SysInfo;
|
|
|
|
typedef struct Wal_Usage
|
|
{
|
|
int64 wal_records; /* # of WAL records generated */
|
|
int64 wal_fpi; /* # of WAL full page images generated */
|
|
uint64 wal_bytes; /* total amount of WAL bytes generated */
|
|
} Wal_Usage;
|
|
|
|
typedef struct Counters
|
|
{
|
|
Calls calls;
|
|
QueryInfo info;
|
|
CallTime time;
|
|
|
|
Calls plancalls;
|
|
CallTime plantime;
|
|
PlanInfo planinfo;
|
|
|
|
Blocks blocks;
|
|
SysInfo sysinfo;
|
|
JitInfo jitinfo;
|
|
ErrorInfo error;
|
|
Wal_Usage walusage;
|
|
int resp_calls[MAX_RESPONSE_BUCKET]; /* execution time's in
|
|
* msec */
|
|
} Counters;
|
|
|
|
/* Some global structure to get the cpu usage, really don't like the idea of global variable */
|
|
|
|
/*
|
|
* Statistics per statement
|
|
*/
|
|
typedef struct pgsmEntry
|
|
{
|
|
pgsmHashKey key; /* hash key of entry - MUST BE FIRST */
|
|
uint64 pgsm_query_id; /* pgsm generate normalized query hash */
|
|
char datname[NAMEDATALEN]; /* database name */
|
|
char username[NAMEDATALEN]; /* user name */
|
|
Counters counters; /* the statistics for this query */
|
|
int encoding; /* query text encoding */
|
|
TimestampTz stats_since; /* timestamp of entry allocation */
|
|
TimestampTz minmax_stats_since; /* timestamp of last min/max values reset */
|
|
slock_t mutex; /* protects the counters only */
|
|
union
|
|
{
|
|
dsa_pointer query_pos; /* query location within query buffer */
|
|
char *query_pointer;
|
|
} query_text;
|
|
} pgsmEntry;
|
|
|
|
/*
|
|
* Global shared state
|
|
*/
|
|
typedef struct pgsmSharedState
|
|
{
|
|
LWLock *lock; /* protects hashtable search/modification */
|
|
slock_t mutex; /* protects following fields only: */
|
|
pg_atomic_uint64 current_wbucket;
|
|
pg_atomic_uint64 prev_bucket_sec;
|
|
int hash_tranche_id;
|
|
void *raw_dsa_area; /* DSA area pointer to store query texts.
|
|
* dshash also lives in this memory when
|
|
* USE_DYNAMIC_HASH is enabled */
|
|
PGSM_HASH_TABLE_HANDLE hash_handle;
|
|
|
|
/*
|
|
* hash table handle. can be either classic shared memory hash or dshash
|
|
* (if we are using USE_DYNAMIC_HASH)
|
|
*/
|
|
|
|
bool pgsm_oom;
|
|
TimestampTz bucket_start_time[]; /* start time of the bucket */
|
|
} pgsmSharedState;
|
|
|
|
typedef struct pgsmLocalState
|
|
{
|
|
pgsmSharedState *shared_pgsmState;
|
|
dsa_area *dsa; /* local dsa area for backend attached to the
|
|
* dsa area created by postmaster at startup. */
|
|
PGSM_HASH_TABLE *shared_hash;
|
|
MemoryContext pgsm_mem_cxt;
|
|
|
|
} pgsmLocalState;
|
|
|
|
#if PG_VERSION_NUM < 140000
|
|
/*
|
|
* Struct for tracking locations/lengths of constants during normalization
|
|
*/
|
|
typedef struct LocationLen
|
|
{
|
|
int location; /* start offset in query text */
|
|
int length; /* length in bytes, or -1 to ignore */
|
|
} LocationLen;
|
|
|
|
/*
|
|
* Working state for computing a query jumble and producing a normalized
|
|
* query string
|
|
*/
|
|
typedef struct JumbleState
|
|
{
|
|
/* Jumble of current query tree */
|
|
unsigned char *jumble;
|
|
|
|
/* Number of bytes used in jumble[] */
|
|
Size jumble_len;
|
|
|
|
/* Array of locations of constants that should be removed */
|
|
LocationLen *clocations;
|
|
|
|
/* Allocated length of clocations array */
|
|
int clocations_buf_size;
|
|
|
|
/* Current number of valid entries in clocations array */
|
|
int clocations_count;
|
|
|
|
/* highest Param id we've seen, in order to start normalization correctly */
|
|
int highest_extern_param_id;
|
|
} JumbleState;
|
|
#endif
|
|
|
|
/* guc.c */
|
|
void init_guc(void);
|
|
|
|
/* hash_create.c */
|
|
dsa_area *get_dsa_area_for_query_text(void);
|
|
PGSM_HASH_TABLE *get_pgsmHash(void);
|
|
|
|
void pgsm_attach_shmem(void);
|
|
bool IsHashInitialize(void);
|
|
bool IsSystemOOM(void);
|
|
void pgsm_shmem_startup(void);
|
|
void pgsm_shmem_shutdown(int code, Datum arg);
|
|
int pgsm_get_bucket_size(void);
|
|
pgsmSharedState *pgsm_get_ss(void);
|
|
void hash_query_entries();
|
|
void hash_query_entry_dealloc(int new_bucket_id, int old_bucket_id, unsigned char *query_buffer[]);
|
|
void hash_entry_dealloc(int new_bucket_id, int old_bucket_id, unsigned char *query_buffer);
|
|
pgsmEntry *hash_entry_alloc(pgsmSharedState *pgsm, pgsmHashKey *key, int encoding);
|
|
Size pgsm_ShmemSize(void);
|
|
void pgsm_startup(void);
|
|
|
|
/* hash_query.c */
|
|
void pgsm_startup(void);
|
|
MemoryContext GetPgsmMemoryContext(void);
|
|
|
|
/* guc.c */
|
|
void init_guc(void);
|
|
|
|
/* GUC variables*/
|
|
/*---- GUC variables ----*/
|
|
typedef enum
|
|
{
|
|
PSGM_TRACK_NONE = 0, /* track no statements */
|
|
PGSM_TRACK_TOP, /* only top level statements */
|
|
PGSM_TRACK_ALL /* all statements, including nested ones */
|
|
} PGSMTrackLevel;
|
|
static const struct config_enum_entry track_options[] =
|
|
{
|
|
{"none", PSGM_TRACK_NONE, false},
|
|
{"top", PGSM_TRACK_TOP, false},
|
|
{"all", PGSM_TRACK_ALL, false},
|
|
{NULL, 0, false}
|
|
};
|
|
|
|
typedef enum
|
|
{
|
|
HISTOGRAM_START,
|
|
HISTOGRAM_END,
|
|
HISTOGRAM_COUNT
|
|
} HistogramTimingType;
|
|
|
|
extern int pgsm_max;
|
|
extern int pgsm_query_max_len;
|
|
extern int pgsm_bucket_time;
|
|
extern int pgsm_max_buckets;
|
|
extern int pgsm_histogram_buckets;
|
|
extern double pgsm_histogram_min;
|
|
extern double pgsm_histogram_max;
|
|
extern int pgsm_query_shared_buffer;
|
|
extern bool pgsm_track_planning;
|
|
extern bool pgsm_extract_comments;
|
|
extern bool pgsm_enable_query_plan;
|
|
extern bool pgsm_enable_overflow;
|
|
extern bool pgsm_normalized_query;
|
|
extern bool pgsm_track_utility;
|
|
extern bool pgsm_track_application_names;
|
|
extern bool pgsm_enable_pgsm_query_id;
|
|
extern int pgsm_track;
|
|
|
|
#define DECLARE_HOOK(hook, ...) \
|
|
static hook(__VA_ARGS__);
|
|
#define HOOK(name) name
|
|
#define HOOK_STATS_SIZE 0
|
|
#endif
|
|
|
|
void *pgsm_hash_find_or_insert(PGSM_HASH_TABLE * shared_hash, pgsmHashKey *key, bool *found);
|
|
void *pgsm_hash_find(PGSM_HASH_TABLE * shared_hash, pgsmHashKey *key, bool *found);
|
|
void pgsm_hash_seq_init(PGSM_HASH_SEQ_STATUS * hstat, PGSM_HASH_TABLE * shared_hash, bool lock);
|
|
void *pgsm_hash_seq_next(PGSM_HASH_SEQ_STATUS * hstat);
|
|
void pgsm_hash_seq_term(PGSM_HASH_SEQ_STATUS * hstat);
|
|
void pgsm_hash_delete_current(PGSM_HASH_SEQ_STATUS * hstat, PGSM_HASH_TABLE * shared_hash, void *key);
|