PG-545: pg_stat_monitor: Same query text should generate same queryid

Regardless of the database or the user, the same query will yield the
same query ID. As part of this, a new column, 'pgsm_query_id', is added.

* pgsm_query_id:
pgsm_query_id has the same data type of int8 as the queryid column. If
the incoming SQL command includes any constants, it internally normalizes
the query to remove those constant values with placeholders. Otherwise,
it uses the query directly to generate the query hash.

Since we no longer depend on the server's parse tree mechanism, we can
generate the same hash for the same query text for all server versions.

Also, it is important to note that the hash being calculated is a database,
schema and user independent. So same query text in different databases
will generate the same hash.

This column is not part of the key; rather, for observability purposes only.

* Regression
SQL test case pgsm_query_id.sql is added to the SQL regression.
pull/341/head
Hamid Akhtar 2022-12-28 14:14:26 +05:00
parent f8866272a2
commit b20eda7066
7 changed files with 188 additions and 15 deletions

View File

@ -12,7 +12,7 @@ LDFLAGS_SL += $(filter -lm, $(LIBS))
TAP_TESTS = 1 TAP_TESTS = 1
REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/pg_stat_monitor/pg_stat_monitor.conf --inputdir=regression REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/pg_stat_monitor/pg_stat_monitor.conf --inputdir=regression
REGRESS = basic version guc functions counters relations database error_insert application_name application_name_unique top_query cmd_type error rows tags REGRESS = basic version guc pgsm_query_id functions counters relations database error_insert application_name application_name_unique top_query cmd_type error rows tags
# Disabled because these tests require "shared_preload_libraries=pg_stat_statements", # Disabled because these tests require "shared_preload_libraries=pg_stat_statements",
# which typical installcheck users do not have (e.g. buildfarm clients). # which typical installcheck users do not have (e.g. buildfarm clients).

View File

@ -22,6 +22,7 @@ CREATE FUNCTION pg_stat_monitor_internal(
OUT planid text, OUT planid text,
OUT query text, OUT query text,
OUT query_plan text, OUT query_plan text,
OUT pgsm_query_id int8,
OUT top_queryid text, OUT top_queryid text,
OUT top_query text, OUT top_query text,
OUT application_name text, OUT application_name text,

View File

@ -91,6 +91,7 @@ CREATE FUNCTION pg_stat_monitor_internal(
OUT planid text, OUT planid text,
OUT query text, OUT query text,
OUT query_plan text, OUT query_plan text,
OUT pgsm_query_id int8,
OUT top_queryid text, OUT top_queryid text,
OUT top_query text, OUT top_query text,
OUT application_name text, OUT application_name text,
@ -170,6 +171,7 @@ CREATE VIEW pg_stat_monitor AS SELECT
userid::regrole, userid::regrole,
datname, datname,
'0.0.0.0'::inet + client_ip AS client_ip, '0.0.0.0'::inet + client_ip AS client_ip,
pgsm_query_id,
queryid, queryid,
top_queryid, top_queryid,
query, query,
@ -223,6 +225,7 @@ CREATE VIEW pg_stat_monitor AS SELECT
userid::regrole, userid::regrole,
datname, datname,
'0.0.0.0'::inet + client_ip AS client_ip, '0.0.0.0'::inet + client_ip AS client_ip,
pgsm_query_id,
queryid, queryid,
toplevel, toplevel,
top_queryid, top_queryid,
@ -286,6 +289,7 @@ CREATE VIEW pg_stat_monitor AS SELECT
userid::regrole, userid::regrole,
datname, datname,
'0.0.0.0'::inet + client_ip AS client_ip, '0.0.0.0'::inet + client_ip AS client_ip,
pgsm_query_id,
queryid, queryid,
toplevel, toplevel,
top_queryid, top_queryid,
@ -349,6 +353,7 @@ CREATE VIEW pg_stat_monitor AS SELECT
userid::regrole, userid::regrole,
datname, datname,
'0.0.0.0'::inet + client_ip AS client_ip, '0.0.0.0'::inet + client_ip AS client_ip,
pgsm_query_id,
queryid, queryid,
toplevel, toplevel,
top_queryid, top_queryid,

View File

@ -39,8 +39,8 @@ PG_MODULE_MAGIC;
/* Number of output arguments (columns) for various API versions */ /* Number of output arguments (columns) for various API versions */
#define PG_STAT_MONITOR_COLS_V1_0 52 #define PG_STAT_MONITOR_COLS_V1_0 52
#define PG_STAT_MONITOR_COLS_V2_0 61 #define PG_STAT_MONITOR_COLS_V2_0 62
#define PG_STAT_MONITOR_COLS 61 /* maximum of above */ #define PG_STAT_MONITOR_COLS 62 /* maximum of above */
#define PGSM_TEXT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "pg_stat_monitor_query" #define PGSM_TEXT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "pg_stat_monitor_query"
@ -160,7 +160,6 @@ DECLARE_HOOK(void pgss_ProcessUtility, PlannedStmt *pstmt, const char *queryStri
ParamListInfo params, QueryEnvironment *queryEnv, ParamListInfo params, QueryEnvironment *queryEnv,
DestReceiver *dest, DestReceiver *dest,
QueryCompletion *qc); QueryCompletion *qc);
static uint64 pgss_hash_string(const char *str, int len);
#else #else
static void BufferUsageAccumDiff(BufferUsage *bufusage, BufferUsage *pgBufferUsage, BufferUsage *bufusage_start); static void BufferUsageAccumDiff(BufferUsage *bufusage, BufferUsage *pgBufferUsage, BufferUsage *bufusage_start);
@ -170,6 +169,7 @@ DECLARE_HOOK(void pgss_ProcessUtility, PlannedStmt *pstmt, const char *queryStri
DestReceiver *dest, DestReceiver *dest,
char *completionTag); char *completionTag);
#endif #endif
static uint64 pgss_hash_string(const char *str, int len);
char *unpack_sql_state(int sql_state); char *unpack_sql_state(int sql_state);
#define PGSM_HANDLED_UTILITY(n) (!IsA(n, ExecuteStmt) && \ #define PGSM_HANDLED_UTILITY(n) (!IsA(n, ExecuteStmt) && \
@ -635,7 +635,7 @@ pgss_ExecutorEnd(QueryDesc *queryDesc)
MemoryContext mct = MemoryContextSwitchTo(TopMemoryContext); MemoryContext mct = MemoryContextSwitchTo(TopMemoryContext);
plan_info.plan_len = snprintf(plan_info.plan_text, PLAN_TEXT_LEN, "%s", pgss_explain(queryDesc)); plan_info.plan_len = snprintf(plan_info.plan_text, PLAN_TEXT_LEN, "%s", pgss_explain(queryDesc));
plan_info.planid = DatumGetUInt64(hash_any_extended((const unsigned char *) plan_info.plan_text, plan_info.plan_len, 0)); plan_info.planid = pgss_hash_string(plan_info.plan_text, plan_info.plan_len);
plan_ptr = &plan_info; plan_ptr = &plan_info;
MemoryContextSwitchTo(mct); MemoryContextSwitchTo(mct);
} }
@ -1073,7 +1073,6 @@ BufferUsageAccumDiff(BufferUsage *bufusage, BufferUsage *pgBufferUsage, BufferUs
} }
#endif #endif
#if PG_VERSION_NUM < 140000
/* /*
* Given an arbitrarily long query string, produce a hash for the purposes of * Given an arbitrarily long query string, produce a hash for the purposes of
* identifying the query, without normalizing constants. Used when hashing * identifying the query, without normalizing constants. Used when hashing
@ -1085,7 +1084,6 @@ pgss_hash_string(const char *str, int len)
return DatumGetUInt64(hash_any_extended((const unsigned char *) str, return DatumGetUInt64(hash_any_extended((const unsigned char *) str,
len, 0)); len, 0));
} }
#endif
static PgBackendStatus * static PgBackendStatus *
pg_get_backend_status(void) pg_get_backend_status(void)
@ -1388,13 +1386,15 @@ pgss_store(uint64 queryid,
char app_name[APPLICATIONNAME_LEN] = ""; char app_name[APPLICATIONNAME_LEN] = "";
int app_name_len = 0; int app_name_len = 0;
bool reset = false; bool reset = false;
uint64 pgsm_query_id = 0;
uint64 bucketid; uint64 bucketid;
uint64 prev_bucket_id; uint64 prev_bucket_id;
uint64 userid; uint64 userid;
uint64 planid; uint64 planid;
uint64 appid = 0; uint64 appid = 0;
char comments[512] = ""; int norm_query_len = 0;
char *norm_query = NULL; char *norm_query = NULL;
char comments[512] = "";
bool found_app_name = false; bool found_app_name = false;
bool found_client_addr = false; bool found_client_addr = false;
uint client_addr = 0; uint client_addr = 0;
@ -1506,14 +1506,31 @@ pgss_store(uint64 queryid,
* in the interval where we don't hold the lock below. That case is * in the interval where we don't hold the lock below. That case is
* handled by entry_alloc. * handled by entry_alloc.
*/ */
if (jstate && PGSM_NORMALIZED_QUERY) if (jstate && jstate->clocations_count > 0)
{ {
norm_query_len = query_len;
LWLockRelease(pgss->lock); LWLockRelease(pgss->lock);
norm_query = generate_normalized_query(jstate, query, norm_query = generate_normalized_query(jstate, query,
query_location, query_location,
&query_len, &norm_query_len,
GetDatabaseEncoding()); GetDatabaseEncoding());
LWLockAcquire(pgss->lock, LW_SHARED); LWLockAcquire(pgss->lock, LW_SHARED);
pgsm_query_id = pgss_hash_string(norm_query, norm_query_len);
/* Free up norm_query if we don't intend to show normalized version in the view */
if (!PGSM_NORMALIZED_QUERY)
{
if (norm_query)
pfree(norm_query);
norm_query = NULL;
}
}
else
{
pgsm_query_id = pgss_hash_string(query, query_len);
} }
query_entry = hash_search(pgss_query_hash, &queryid, HASH_ENTER_NULL, &query_found); query_entry = hash_search(pgss_query_hash, &queryid, HASH_ENTER_NULL, &query_found);
@ -1542,7 +1559,7 @@ pgss_store(uint64 queryid,
queryid, queryid,
pgss_qbuf, pgss_qbuf,
norm_query ? norm_query : query, norm_query ? norm_query : query,
query_len, norm_query ? norm_query_len : query_len,
&query_entry->query_pos)) &query_entry->query_pos))
{ {
LWLockRelease(pgss->lock); LWLockRelease(pgss->lock);
@ -1575,6 +1592,7 @@ pgss_store(uint64 queryid,
return; return;
} }
entry->query_pos = query_entry->query_pos; entry->query_pos = query_entry->query_pos;
entry->pgsm_query_id = pgsm_query_id;
} }
if (jstate == NULL) if (jstate == NULL)
@ -1739,6 +1757,7 @@ pg_stat_monitor_internal(FunctionCallInfo fcinfo,
uint64 userid = entry->key.userid; uint64 userid = entry->key.userid;
int64 ip = entry->key.ip; int64 ip = entry->key.ip;
uint64 planid = entry->key.planid; uint64 planid = entry->key.planid;
uint64 pgsm_query_id = entry->pgsm_query_id;
#if PG_VERSION_NUM < 140000 #if PG_VERSION_NUM < 140000
bool toplevel = 1; bool toplevel = 1;
bool is_allowed_role = is_member_of_role(GetUserId(), DEFAULT_ROLE_READ_ALL_STATS); bool is_allowed_role = is_member_of_role(GetUserId(), DEFAULT_ROLE_READ_ALL_STATS);
@ -1856,6 +1875,8 @@ pg_stat_monitor_internal(FunctionCallInfo fcinfo,
values[i++] = CStringGetTextDatum("<insufficient privilege>"); values[i++] = CStringGetTextDatum("<insufficient privilege>");
} }
values[i++] = Int64GetDatumFast(pgsm_query_id);
/* state at column number 8 for V1.0 API*/ /* state at column number 8 for V1.0 API*/
if (api_version <= PGSM_V1_0) if (api_version <= PGSM_V1_0)
values[i++] = Int64GetDatumFast(tmp.state); values[i++] = Int64GetDatumFast(tmp.state);
@ -2190,8 +2211,7 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
{ {
uint64 start_hash; uint64 start_hash;
start_hash = DatumGetUInt64(hash_any_extended(jumble, start_hash = pgss_hash_string((char *)jumble, JUMBLE_SIZE);
JUMBLE_SIZE, 0));
memcpy(jumble, &start_hash, sizeof(start_hash)); memcpy(jumble, &start_hash, sizeof(start_hash));
jumble_len = sizeof(start_hash); jumble_len = sizeof(start_hash);
} }
@ -3334,7 +3354,7 @@ pgsm_emit_log_hook(ErrorData *edata)
uint64 queryid = 0; uint64 queryid = 0;
if (debug_query_string) if (debug_query_string)
queryid = DatumGetUInt64(hash_any_extended((const unsigned char *) debug_query_string, strlen(debug_query_string), 0)); queryid = pgss_hash_string(debug_query_string, strlen(debug_query_string));
pgss_store_error(queryid, pgss_store_error(queryid,
debug_query_string ? debug_query_string : "", debug_query_string ? debug_query_string : "",
@ -3646,7 +3666,7 @@ get_query_id(JumbleState *jstate, Query *query)
/* Compute query ID and mark the Query node with it */ /* Compute query ID and mark the Query node with it */
JumbleQuery(jstate, query); JumbleQuery(jstate, query);
queryid = DatumGetUInt64(hash_any_extended(jstate->jumble, jstate->jumble_len, 0)); queryid = pgss_hash_string((const char *)jstate->jumble, jstate->jumble_len);
return queryid; return queryid;
} }
#endif #endif

View File

@ -319,6 +319,7 @@ typedef struct Counters
typedef struct pgssEntry typedef struct pgssEntry
{ {
pgssHashKey key; /* hash key of entry - MUST BE FIRST */ pgssHashKey key; /* hash key of entry - MUST BE FIRST */
uint64 pgsm_query_id; /* pgsm generate normalized query hash */
Counters counters; /* the statistics for this query */ Counters counters; /* the statistics for this query */
int encoding; /* query text encoding */ int encoding; /* query text encoding */
slock_t mutex; /* protects the counters only */ slock_t mutex; /* protects the counters only */

View File

@ -0,0 +1,91 @@
CREATE EXTENSION pg_stat_monitor;
CREATE DATABASE db1;
CREATE DATABASE db2;
\c db1
CREATE TABLE t1 (a int);
CREATE TABLE t2 (b int);
CREATE FUNCTION add(integer, integer) RETURNS integer
AS 'select $1 + $2;'
LANGUAGE SQL
IMMUTABLE
RETURNS NULL ON NULL INPUT;
\c db2
CREATE TABLE t1 (a int);
CREATE TABLE t3 (c int);
CREATE FUNCTION add(integer, integer) RETURNS integer
AS 'select $1 + $2;'
LANGUAGE SQL
IMMUTABLE
RETURNS NULL ON NULL INPUT;
\c contrib_regression
SELECT pg_stat_monitor_reset();
pg_stat_monitor_reset
-----------------------
(1 row)
\c db1
SELECT * FROM t1;
a
---
(0 rows)
SELECT *, ADD(1, 2) FROM t1;
a | add
---+-----
(0 rows)
SELECT * FROM t2;
b
---
(0 rows)
\c db2
SELECT * FROM t1;
a
---
(0 rows)
SELECT *, ADD(1, 2) FROM t1;
a | add
---+-----
(0 rows)
SELECT * FROM t3;
c
---
(0 rows)
\c contrib_regression
SELECT datname, pgsm_query_id, query FROM pg_stat_monitor ORDER BY pgsm_query_id, query, datname;
datname | pgsm_query_id | query
--------------------+----------------------+--------------------------------
db2 | -5029137034974447432 | SELECT * FROM t3
contrib_regression | 689150021118383254 | SELECT pg_stat_monitor_reset()
db1 | 1897482803466821995 | SELECT * FROM t2
db1 | 1988437669671417938 | SELECT * FROM t1
db2 | 1988437669671417938 | SELECT * FROM t1
db1 | 2864453209316739369 | select $1 + $2
db2 | 2864453209316739369 | select $1 + $2
db1 | 8140395000078788481 | SELECT *, ADD(1, 2) FROM t1
db2 | 8140395000078788481 | SELECT *, ADD(1, 2) FROM t1
(9 rows)
SELECT pg_stat_monitor_reset();
pg_stat_monitor_reset
-----------------------
(1 row)
\c db1
DROP TABLE t1;
DROP TABLE t2;
DROP FUNCTION ADD;
\c db2
DROP TABLE t1;
DROP TABLE t3;
DROP FUNCTION ADD;
\c contrib_regression
DROP DATABASE db1;
DROP DATABASE db2;
DROP EXTENSION pg_stat_monitor;

View File

@ -0,0 +1,55 @@
CREATE EXTENSION pg_stat_monitor;
CREATE DATABASE db1;
CREATE DATABASE db2;
\c db1
CREATE TABLE t1 (a int);
CREATE TABLE t2 (b int);
CREATE FUNCTION add(integer, integer) RETURNS integer
AS 'select $1 + $2;'
LANGUAGE SQL
IMMUTABLE
RETURNS NULL ON NULL INPUT;
\c db2
CREATE TABLE t1 (a int);
CREATE TABLE t3 (c int);
CREATE FUNCTION add(integer, integer) RETURNS integer
AS 'select $1 + $2;'
LANGUAGE SQL
IMMUTABLE
RETURNS NULL ON NULL INPUT;
\c contrib_regression
SELECT pg_stat_monitor_reset();
\c db1
SELECT * FROM t1;
SELECT *, ADD(1, 2) FROM t1;
SELECT * FROM t2;
\c db2
SELECT * FROM t1;
SELECT *, ADD(1, 2) FROM t1;
SELECT * FROM t3;
\c contrib_regression
SELECT datname, pgsm_query_id, query FROM pg_stat_monitor ORDER BY pgsm_query_id, query, datname;
SELECT pg_stat_monitor_reset();
\c db1
DROP TABLE t1;
DROP TABLE t2;
DROP FUNCTION ADD;
\c db2
DROP TABLE t1;
DROP TABLE t3;
DROP FUNCTION ADD;
\c contrib_regression
DROP DATABASE db1;
DROP DATABASE db2;
DROP EXTENSION pg_stat_monitor;