diff --git a/Makefile b/Makefile index de2682e..a4289d1 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ LDFLAGS_SL += $(filter -lm, $(LIBS)) TAP_TESTS = 1 REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/pg_stat_monitor/pg_stat_monitor.conf --inputdir=regression -REGRESS = basic version guc pgsm_query_id functions counters relations database error_insert application_name application_name_unique top_query different_parent_queries cmd_type error rows tags user level_tracking +REGRESS = basic version guc pgsm_query_id functions counters relations database error_insert application_name application_name_unique top_query different_parent_queries cmd_type error rows tags user level_tracking denorm_prepared_statements # Disabled because these tests require "shared_preload_libraries=pg_stat_statements", # which typical installcheck users do not have (e.g. buildfarm clients). @@ -40,4 +40,4 @@ update-typedefs: indent: pgindent --typedefs=typedefs-full.list . -.PHONY: update-typedefs indent \ No newline at end of file +.PHONY: update-typedefs indent diff --git a/pg_stat_monitor.c b/pg_stat_monitor.c index 310ea8c..a3822d1 100644 --- a/pg_stat_monitor.c +++ b/pg_stat_monitor.c @@ -20,9 +20,11 @@ #include "nodes/pg_list.h" #include "utils/guc.h" #include +#include #include "pgstat.h" #include "commands/dbcommands.h" #include "commands/explain.h" +#include "lib/stringinfo.h" #include "pg_stat_monitor.h" /* @@ -206,6 +208,10 @@ static void pgsm_add_to_list(pgsmEntry *entry, char *query_text, int query_len); static pgsmEntry *pgsm_get_entry_for_query(uint64 queryid, PlanInfo *plan_info, const char *query_text, int query_len, bool create); static uint64 get_pgsm_query_id_hash(const char *norm_query, int len); +static void get_param_value(const ParamListInfo plist, int idx, StringInfoData *buffer); + +static StringInfoData get_denormalized_query(const ParamListInfo paramlist, const char *query_text); + static void pgsm_cleanup_callback(void *arg); static void pgsm_store_error(const char *query, ErrorData *edata); @@ -232,7 +238,14 @@ static void pgsm_update_entry(pgsmEntry *entry, const struct JitInstrumentation *jitusage, bool reset, pgsmStoreKind kind); -static void pgsm_store(pgsmEntry *entry); +static void pgsm_store_ex(pgsmEntry *entry, ParamListInfo params); + +/* Stores query entry in normalized form */ +static inline void +pgsm_store(pgsmEntry *entry) +{ + pgsm_store_ex(entry, NULL); +} static void pg_stat_monitor_internal(FunctionCallInfo fcinfo, pgsmVersion api_version, @@ -692,12 +705,12 @@ pgsm_ExecutorEnd(QueryDesc *queryDesc) PlanInfo plan_info; PlanInfo *plan_ptr = NULL; pgsmEntry *entry = NULL; + MemoryContext oldctx; /* Extract the plan information in case of SELECT statement */ if (queryDesc->operation == CMD_SELECT && pgsm_enable_query_plan) { int rv; - MemoryContext oldctx; /* * Making sure it is a per query context so that there's no memory @@ -775,7 +788,7 @@ pgsm_ExecutorEnd(QueryDesc *queryDesc) false, /* reset */ PGSM_EXEC); /* kind */ - pgsm_store(entry); + pgsm_store_ex(entry, queryDesc->params); } if (prev_ExecutorEnd) @@ -1861,7 +1874,6 @@ pgsm_create_hash_entry(uint64 bucket_id, uint64 queryid, PlanInfo *plan_info) return entry; } - /* * Store some statistics for a statement. * @@ -1871,9 +1883,15 @@ pgsm_create_hash_entry(uint64 bucket_id, uint64 queryid, PlanInfo *plan_info) * If jstate is not NULL then we're trying to create an entry for which * we have no statistics as yet; we just want to record the normalized * query string. total_time, rows, bufusage are ignored in this case. + * + * If params argument is not null and pgsm_normalized_query is off then we + * denormalize the query using it's actual arguments found in params. + * The denormalization is done during the first time the query is + * inserted or if the time to execute the query exceeds the average + * time computed for the same query. */ -static void -pgsm_store(pgsmEntry *entry) +void +pgsm_store_ex(pgsmEntry *entry, ParamListInfo params) { pgsmEntry *shared_hash_entry; pgsmSharedState *pgsm; @@ -1888,6 +1906,7 @@ pgsm_store(pgsmEntry *entry) JitInstrumentation jitusage; char comments[COMMENTS_LEN] = {0}; int comments_len; + StringInfoData query_info; /* Safety check... */ if (!IsSystemInitialized()) @@ -1976,6 +1995,14 @@ pgsm_store(pgsmEntry *entry) dsa_area *query_dsa_area; char *query_buff; + /* Denormalize the query if normalization is off */ + if (!pgsm_normalized_query && params != NULL) + { + query_info = get_denormalized_query(params, query); + query = query_info.data; + query_len = query_info.len; + } + /* New query, truncate length if necessary. */ if (query_len > pgsm_query_max_len) query_len = pgsm_query_max_len; @@ -2065,6 +2092,50 @@ pgsm_store(pgsmEntry *entry) snprintf(shared_hash_entry->username, sizeof(shared_hash_entry->username), "%s", entry->username); } + /* + * Entry already exists, if query normalization is disabled and the query + * execution time exceeds the mean time for this query, then we + * denormalize the query so users can inspect which arguments caused the + * query to take more time to execute + */ + else if ( + !pgsm_normalized_query && + params != NULL && + entry->counters.time.total_time > shared_hash_entry->counters.time.mean_time + ) + { + dsa_pointer dsa_query_pointer; + dsa_area *query_dsa_area; + char *query_buff; + + query_info = get_denormalized_query(params, query); + query = query_info.data; + query_len = query_info.len; + + /* truncate length if necessary. */ + if (query_len > pgsm_query_max_len) + query_len = pgsm_query_max_len; + + /* Save the query text in raw dsa area */ + query_dsa_area = get_dsa_area_for_query_text(); + dsa_query_pointer = dsa_allocate_extended(query_dsa_area, query_len + 1, DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO); + if (DsaPointerIsValid(dsa_query_pointer)) + { + /* + * Get the memory address from DSA pointer and copy the query text + * to it. + */ + query_buff = dsa_get_address(query_dsa_area, dsa_query_pointer); + memcpy(query_buff, query, query_len); + + /* release previous query from shared memory */ + if (DsaPointerIsValid(shared_hash_entry->query_text.query_pos)) + dsa_free(query_dsa_area, shared_hash_entry->query_text.query_pos); + + shared_hash_entry->query_text.query_pos = dsa_query_pointer; + } + } + pgsm_update_entry(shared_hash_entry, /* entry */ query, /* query */ comments, /* comments */ @@ -4018,3 +4089,89 @@ get_query_id(JumbleState *jstate, Query *query) return queryid; } #endif + +/* + * extract parameter value (Datum) from plist->params[idx], cast it to string then + * append the resulting string to the buffer. + */ +void +get_param_value(const ParamListInfo plist, int idx, StringInfoData *buffer) +{ + Oid typoutput; + bool typisvarlena; + char *pstring; + ParamExternData *param; + + Assert(idx < plist->numParams); + + param = &plist->params[idx]; + + if (param->isnull || !OidIsValid(param->ptype)) + { + appendStringInfoString(buffer, "NULL"); + return; + } + + getTypeOutputInfo(param->ptype, &typoutput, &typisvarlena); + pstring = OidOutputFunctionCall(typoutput, param->value); + appendStringInfo(buffer, "%s", pstring); +} + +/* denormalize the query, replace placeholders with actual values */ +StringInfoData +get_denormalized_query(const ParamListInfo paramlist, const char *query_text) +{ + int current_param; + const char *cursor_ori; + const char *cursor_curr; + StringInfoData result_buf; + ptrdiff_t len; + + current_param = 0; + cursor_ori = query_text; + cursor_curr = cursor_ori; + + initStringInfo(&result_buf); + + do + { + /* advance cursor until detecting a placeholder '$' start. */ + while (*cursor_ori && *cursor_ori != '$') + ++cursor_ori; + + /* calculate length of query text before placeholder. */ + len = cursor_ori - cursor_curr; + + /* check if end of string is reached */ + if (!*cursor_ori) + { + /* there may have remaining query data to append */ + if (len > 0) + appendBinaryStringInfo(&result_buf, cursor_curr, len); + + break; + } + + /* append query text before the '$' sign found. */ + if (len > 0) + appendBinaryStringInfo(&result_buf, cursor_curr, len); + + /* skip '$' */ + ++cursor_ori; + + /* skip the placeholder */ + while (*cursor_ori && *cursor_ori >= '0' && *cursor_ori <= '9') + cursor_ori++; + + /* advance current cursor */ + cursor_curr = cursor_ori; + + /* replace the placeholder with actual value */ + get_param_value(paramlist, current_param, &result_buf); + + ++current_param; + } while (*cursor_ori != '\0'); + + + return result_buf; +} diff --git a/regression/expected/denorm_prepared_statements.out b/regression/expected/denorm_prepared_statements.out new file mode 100644 index 0000000..c0b3811 --- /dev/null +++ b/regression/expected/denorm_prepared_statements.out @@ -0,0 +1,63 @@ +CREATE EXTENSION pg_stat_monitor; +Set pg_stat_monitor.pgsm_normalized_query='off'; +CREATE TABLE t1 (a TEXT, b TEXT, c TEXT); +SELECT pg_stat_monitor_reset(); + pg_stat_monitor_reset +----------------------- + +(1 row) + +-- First test, execute cheap query then heavy query. +-- Ensure denormalized heavy query replaces the cheaper one. +PREPARE prepstmt(TEXT, TEXT, TEXT) AS INSERT INTO t1(a, b, c) VALUES($1, $2, $3); +EXECUTE prepstmt('A', 'B', 'C'); +SELECT SUBSTRING(query, 0, 128), calls FROM pg_stat_monitor ORDER BY query COLLATE "C"; + substring | calls +--------------------------------------------------------------------------------+------- + PREPARE prepstmt(TEXT, TEXT, TEXT) AS INSERT INTO t1(a, b, c) VALUES(A, B, C); | 1 + SELECT pg_stat_monitor_reset() | 1 +(2 rows) + +EXECUTE prepstmt(REPEAT('XYZ', 8192), md5(random()::text), REPEAT('RANDOM', 4096)); +SELECT SUBSTRING(query, 0, 128), calls FROM pg_stat_monitor ORDER BY query COLLATE "C"; + substring | calls +---------------------------------------------------------------------------------------------------------------------------------+------- + PREPARE prepstmt(TEXT, TEXT, TEXT) AS INSERT INTO t1(a, b, c) VALUES(XYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZX | 2 + SELECT SUBSTRING(query, 0, 128), calls FROM pg_stat_monitor ORDER BY query COLLATE "C" | 1 + SELECT pg_stat_monitor_reset() | 1 +(3 rows) + +TRUNCATE TABLE t1; +SELECT pg_stat_monitor_reset(); + pg_stat_monitor_reset +----------------------- + +(1 row) + +-- Second test, execute heavy query then cheap query. +-- Ensure denormalized heavy query is not replaced by the cheaper one. +EXECUTE prepstmt(REPEAT('XYZ', 8192), md5(random()::text), REPEAT('RANDOM', 4096)); +SELECT SUBSTRING(query, 0, 128), calls FROM pg_stat_monitor ORDER BY query COLLATE "C"; + substring | calls +---------------------------------------------------------------------------------------------------------------------------------+------- + PREPARE prepstmt(TEXT, TEXT, TEXT) AS INSERT INTO t1(a, b, c) VALUES(XYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZX | 1 + SELECT pg_stat_monitor_reset() | 1 +(2 rows) + +EXECUTE prepstmt('A', 'B', 'C'); +SELECT SUBSTRING(query, 0, 128), calls FROM pg_stat_monitor ORDER BY query COLLATE "C"; + substring | calls +---------------------------------------------------------------------------------------------------------------------------------+------- + PREPARE prepstmt(TEXT, TEXT, TEXT) AS INSERT INTO t1(a, b, c) VALUES(XYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZXYZX | 2 + SELECT SUBSTRING(query, 0, 128), calls FROM pg_stat_monitor ORDER BY query COLLATE "C" | 1 + SELECT pg_stat_monitor_reset() | 1 +(3 rows) + +DROP TABLE t1; +SELECT pg_stat_monitor_reset(); + pg_stat_monitor_reset +----------------------- + +(1 row) + +DROP EXTENSION pg_stat_monitor; diff --git a/regression/expected/pgsm_query_id.out b/regression/expected/pgsm_query_id.out index 9dc2938..db920d2 100644 --- a/regression/expected/pgsm_query_id.out +++ b/regression/expected/pgsm_query_id.out @@ -30,7 +30,7 @@ SELECT * FROM t1; --- (0 rows) -SELECT *, ADD(1, 2) FROM t1; +SELECT *, ADD(1234, 1000) FROM t1; a | add ---+----- (0 rows) @@ -62,7 +62,7 @@ SELECT * FROM t1; --- (0 rows) -SELECT *, ADD(1, 2) FROM t1; +SELECT *, ADD(1234, 1000) FROM t1; a | add ---+----- (0 rows) @@ -87,12 +87,12 @@ SELECT datname, pgsm_query_id, query, calls FROM pg_stat_monitor ORDER BY pgsm_q db1 | 1897482803466821995 | SELECT * FROM t2 | 3 db1 | 1988437669671417938 | SELECT * FROM t1 | 1 db2 | 1988437669671417938 | SELECT * FROM t1 | 1 - db1 | 2864453209316739369 | select $1 + $2 | 1 - db2 | 2864453209316739369 | select $1 + $2 | 1 + db1 | 2864453209316739369 | select 1234 + 1000 | 1 + db2 | 2864453209316739369 | select 1234 + 1000 | 1 db2 | 6220142855706866455 | set pg_stat_monitor.pgsm_enable_pgsm_query_id = on | 1 db2 | 6633979598391393345 | SELECT * FROM t3 where c = 20 | 1 - db1 | 8140395000078788481 | SELECT *, ADD(1, 2) FROM t1 | 1 - db2 | 8140395000078788481 | SELECT *, ADD(1, 2) FROM t1 | 1 + db1 | 8140395000078788481 | SELECT *, ADD(1234, 1000) FROM t1 | 1 + db2 | 8140395000078788481 | SELECT *, ADD(1234, 1000) FROM t1 | 1 db2 | | SELECT * FROM t3 | 1 db2 | | set pg_stat_monitor.pgsm_enable_pgsm_query_id = off | 1 (12 rows) diff --git a/regression/expected/pgsm_query_id_1.out b/regression/expected/pgsm_query_id_1.out index ecd519b..73759d9 100644 --- a/regression/expected/pgsm_query_id_1.out +++ b/regression/expected/pgsm_query_id_1.out @@ -30,7 +30,7 @@ SELECT * FROM t1; --- (0 rows) -SELECT *, ADD(1, 2) FROM t1; +SELECT *, ADD(1234, 1000) FROM t1; a | add ---+----- (0 rows) @@ -62,7 +62,7 @@ SELECT * FROM t1; --- (0 rows) -SELECT *, ADD(1, 2) FROM t1; +SELECT *, ADD(1234, 1000) FROM t1; a | add ---+----- (0 rows) @@ -89,8 +89,8 @@ SELECT datname, pgsm_query_id, query, calls FROM pg_stat_monitor ORDER BY pgsm_q db2 | 1988437669671417938 | SELECT * FROM t1 | 1 db2 | 6220142855706866455 | set pg_stat_monitor.pgsm_enable_pgsm_query_id = on | 1 db2 | 6633979598391393345 | SELECT * FROM t3 where c = 20 | 1 - db1 | 8140395000078788481 | SELECT *, ADD(1, 2) FROM t1 | 1 - db2 | 8140395000078788481 | SELECT *, ADD(1, 2) FROM t1 | 1 + db1 | 8140395000078788481 | SELECT *, ADD(1234, 1000) FROM t1 | 1 + db2 | 8140395000078788481 | SELECT *, ADD(1234, 1000) FROM t1 | 1 db2 | | SELECT * FROM t3 | 1 db2 | | set pg_stat_monitor.pgsm_enable_pgsm_query_id = off | 1 (10 rows) diff --git a/regression/expected/top_query.out b/regression/expected/top_query.out index 38f0b37..2f3c3ca 100644 --- a/regression/expected/top_query.out +++ b/regression/expected/top_query.out @@ -26,7 +26,7 @@ SELECT add2(1,2); SELECT query, top_query FROM pg_stat_monitor ORDER BY query COLLATE "C"; query | top_query --------------------------------------------------------------+------------------- - (select $1 + $2) | SELECT add2(1,2); + (select NULL + NULL) | SELECT add2(1,2); CREATE OR REPLACE FUNCTION add(int, int) RETURNS INTEGER AS +| $$ +| BEGIN +| diff --git a/regression/expected/top_query_1.out b/regression/expected/top_query_1.out index de7df82..bcf5203 100644 --- a/regression/expected/top_query_1.out +++ b/regression/expected/top_query_1.out @@ -37,7 +37,7 @@ SELECT query, top_query FROM pg_stat_monitor ORDER BY query COLLATE "C"; return add($1,$2); +| END; +| $$ language plpgsql | - SELECT (select $1 + $2) | SELECT add2(1,2); + SELECT (select NULL + NULL) | SELECT add2(1,2); SELECT add2(1,2) | SELECT pg_stat_monitor_reset() | (5 rows) diff --git a/regression/sql/denorm_prepared_statements.sql b/regression/sql/denorm_prepared_statements.sql new file mode 100644 index 0000000..440f9a2 --- /dev/null +++ b/regression/sql/denorm_prepared_statements.sql @@ -0,0 +1,33 @@ +CREATE EXTENSION pg_stat_monitor; +Set pg_stat_monitor.pgsm_normalized_query='off'; + +CREATE TABLE t1 (a TEXT, b TEXT, c TEXT); + +SELECT pg_stat_monitor_reset(); + +-- First test, execute cheap query then heavy query. +-- Ensure denormalized heavy query replaces the cheaper one. +PREPARE prepstmt(TEXT, TEXT, TEXT) AS INSERT INTO t1(a, b, c) VALUES($1, $2, $3); + +EXECUTE prepstmt('A', 'B', 'C'); +SELECT SUBSTRING(query, 0, 128), calls FROM pg_stat_monitor ORDER BY query COLLATE "C"; + +EXECUTE prepstmt(REPEAT('XYZ', 8192), md5(random()::text), REPEAT('RANDOM', 4096)); +SELECT SUBSTRING(query, 0, 128), calls FROM pg_stat_monitor ORDER BY query COLLATE "C"; + +TRUNCATE TABLE t1; +SELECT pg_stat_monitor_reset(); + +-- Second test, execute heavy query then cheap query. +-- Ensure denormalized heavy query is not replaced by the cheaper one. + +EXECUTE prepstmt(REPEAT('XYZ', 8192), md5(random()::text), REPEAT('RANDOM', 4096)); +SELECT SUBSTRING(query, 0, 128), calls FROM pg_stat_monitor ORDER BY query COLLATE "C"; + +EXECUTE prepstmt('A', 'B', 'C'); +SELECT SUBSTRING(query, 0, 128), calls FROM pg_stat_monitor ORDER BY query COLLATE "C"; + +DROP TABLE t1; + +SELECT pg_stat_monitor_reset(); +DROP EXTENSION pg_stat_monitor; diff --git a/regression/sql/pgsm_query_id.sql b/regression/sql/pgsm_query_id.sql index f0d47c2..ba2288a 100644 --- a/regression/sql/pgsm_query_id.sql +++ b/regression/sql/pgsm_query_id.sql @@ -27,7 +27,7 @@ CREATE FUNCTION add(integer, integer) RETURNS integer SELECT pg_stat_monitor_reset(); \c db1 SELECT * FROM t1; -SELECT *, ADD(1, 2) FROM t1; +SELECT *, ADD(1234, 1000) FROM t1; SELECT * FROM t2; -- Check that spaces and comments do not generate a different pgsm_query_id SELECT * FROM t2 --WHATEVER; @@ -40,7 +40,7 @@ More comments to check for spaces. \c db2 SELECT * FROM t1; -SELECT *, ADD(1, 2) FROM t1; +SELECT *, ADD(1234, 1000) FROM t1; set pg_stat_monitor.pgsm_enable_pgsm_query_id = off; SELECT * FROM t3;