PG-545: pg_stat_monitor: Same query text should generate same queryid

Regardless of the database or the user, the same query will yield the
same query ID. As part of this, a new column, 'pgsm_query_id', is added.

* pgsm_query_id:
pgsm_query_id has the same data type of int8 as the queryid column. If
the incoming SQL command includes any constants, it internally normalizes
the query to remove those constant values with placeholders. Otherwise,
it uses the query directly to generate the query hash.

Since we no longer depend on the server's parse tree mechanism, we can
generate the same hash for the same query text for all server versions.

Also, it is important to note that the hash being calculated is a database,
schema and user independent. So same query text in different databases
will generate the same hash.

This column is not part of the key; rather, for observability purposes only.

* Regression
SQL test case pgsm_query_id.sql is added to the SQL regression.
This commit is contained in:
Hamid Akhtar
2022-12-28 14:14:26 +05:00
parent f8866272a2
commit b20eda7066
7 changed files with 188 additions and 15 deletions

View File

@@ -39,8 +39,8 @@ PG_MODULE_MAGIC;
/* Number of output arguments (columns) for various API versions */
#define PG_STAT_MONITOR_COLS_V1_0 52
#define PG_STAT_MONITOR_COLS_V2_0 61
#define PG_STAT_MONITOR_COLS 61 /* maximum of above */
#define PG_STAT_MONITOR_COLS_V2_0 62
#define PG_STAT_MONITOR_COLS 62 /* maximum of above */
#define PGSM_TEXT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "pg_stat_monitor_query"
@@ -160,7 +160,6 @@ DECLARE_HOOK(void pgss_ProcessUtility, PlannedStmt *pstmt, const char *queryStri
ParamListInfo params, QueryEnvironment *queryEnv,
DestReceiver *dest,
QueryCompletion *qc);
static uint64 pgss_hash_string(const char *str, int len);
#else
static void BufferUsageAccumDiff(BufferUsage *bufusage, BufferUsage *pgBufferUsage, BufferUsage *bufusage_start);
@@ -170,6 +169,7 @@ DECLARE_HOOK(void pgss_ProcessUtility, PlannedStmt *pstmt, const char *queryStri
DestReceiver *dest,
char *completionTag);
#endif
static uint64 pgss_hash_string(const char *str, int len);
char *unpack_sql_state(int sql_state);
#define PGSM_HANDLED_UTILITY(n) (!IsA(n, ExecuteStmt) && \
@@ -635,7 +635,7 @@ pgss_ExecutorEnd(QueryDesc *queryDesc)
MemoryContext mct = MemoryContextSwitchTo(TopMemoryContext);
plan_info.plan_len = snprintf(plan_info.plan_text, PLAN_TEXT_LEN, "%s", pgss_explain(queryDesc));
plan_info.planid = DatumGetUInt64(hash_any_extended((const unsigned char *) plan_info.plan_text, plan_info.plan_len, 0));
plan_info.planid = pgss_hash_string(plan_info.plan_text, plan_info.plan_len);
plan_ptr = &plan_info;
MemoryContextSwitchTo(mct);
}
@@ -1073,7 +1073,6 @@ BufferUsageAccumDiff(BufferUsage *bufusage, BufferUsage *pgBufferUsage, BufferUs
}
#endif
#if PG_VERSION_NUM < 140000
/*
* Given an arbitrarily long query string, produce a hash for the purposes of
* identifying the query, without normalizing constants. Used when hashing
@@ -1085,7 +1084,6 @@ pgss_hash_string(const char *str, int len)
return DatumGetUInt64(hash_any_extended((const unsigned char *) str,
len, 0));
}
#endif
static PgBackendStatus *
pg_get_backend_status(void)
@@ -1388,13 +1386,15 @@ pgss_store(uint64 queryid,
char app_name[APPLICATIONNAME_LEN] = "";
int app_name_len = 0;
bool reset = false;
uint64 pgsm_query_id = 0;
uint64 bucketid;
uint64 prev_bucket_id;
uint64 userid;
uint64 planid;
uint64 appid = 0;
char comments[512] = "";
int norm_query_len = 0;
char *norm_query = NULL;
char comments[512] = "";
bool found_app_name = false;
bool found_client_addr = false;
uint client_addr = 0;
@@ -1506,14 +1506,31 @@ pgss_store(uint64 queryid,
* in the interval where we don't hold the lock below. That case is
* handled by entry_alloc.
*/
if (jstate && PGSM_NORMALIZED_QUERY)
if (jstate && jstate->clocations_count > 0)
{
norm_query_len = query_len;
LWLockRelease(pgss->lock);
norm_query = generate_normalized_query(jstate, query,
query_location,
&query_len,
&norm_query_len,
GetDatabaseEncoding());
LWLockAcquire(pgss->lock, LW_SHARED);
pgsm_query_id = pgss_hash_string(norm_query, norm_query_len);
/* Free up norm_query if we don't intend to show normalized version in the view */
if (!PGSM_NORMALIZED_QUERY)
{
if (norm_query)
pfree(norm_query);
norm_query = NULL;
}
}
else
{
pgsm_query_id = pgss_hash_string(query, query_len);
}
query_entry = hash_search(pgss_query_hash, &queryid, HASH_ENTER_NULL, &query_found);
@@ -1542,7 +1559,7 @@ pgss_store(uint64 queryid,
queryid,
pgss_qbuf,
norm_query ? norm_query : query,
query_len,
norm_query ? norm_query_len : query_len,
&query_entry->query_pos))
{
LWLockRelease(pgss->lock);
@@ -1575,6 +1592,7 @@ pgss_store(uint64 queryid,
return;
}
entry->query_pos = query_entry->query_pos;
entry->pgsm_query_id = pgsm_query_id;
}
if (jstate == NULL)
@@ -1739,6 +1757,7 @@ pg_stat_monitor_internal(FunctionCallInfo fcinfo,
uint64 userid = entry->key.userid;
int64 ip = entry->key.ip;
uint64 planid = entry->key.planid;
uint64 pgsm_query_id = entry->pgsm_query_id;
#if PG_VERSION_NUM < 140000
bool toplevel = 1;
bool is_allowed_role = is_member_of_role(GetUserId(), DEFAULT_ROLE_READ_ALL_STATS);
@@ -1856,6 +1875,8 @@ pg_stat_monitor_internal(FunctionCallInfo fcinfo,
values[i++] = CStringGetTextDatum("<insufficient privilege>");
}
values[i++] = Int64GetDatumFast(pgsm_query_id);
/* state at column number 8 for V1.0 API*/
if (api_version <= PGSM_V1_0)
values[i++] = Int64GetDatumFast(tmp.state);
@@ -2190,8 +2211,7 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
{
uint64 start_hash;
start_hash = DatumGetUInt64(hash_any_extended(jumble,
JUMBLE_SIZE, 0));
start_hash = pgss_hash_string((char *)jumble, JUMBLE_SIZE);
memcpy(jumble, &start_hash, sizeof(start_hash));
jumble_len = sizeof(start_hash);
}
@@ -3334,7 +3354,7 @@ pgsm_emit_log_hook(ErrorData *edata)
uint64 queryid = 0;
if (debug_query_string)
queryid = DatumGetUInt64(hash_any_extended((const unsigned char *) debug_query_string, strlen(debug_query_string), 0));
queryid = pgss_hash_string(debug_query_string, strlen(debug_query_string));
pgss_store_error(queryid,
debug_query_string ? debug_query_string : "",
@@ -3646,7 +3666,7 @@ get_query_id(JumbleState *jstate, Query *query)
/* Compute query ID and mark the Query node with it */
JumbleQuery(jstate, query);
queryid = DatumGetUInt64(hash_any_extended(jstate->jumble, jstate->jumble_len, 0));
queryid = pgss_hash_string((const char *)jstate->jumble, jstate->jumble_len);
return queryid;
}
#endif