From fa55eb333d160bb597c2baa0f7afebfe237e375f Mon Sep 17 00:00:00 2001 From: Diego Fronza Date: Mon, 16 Aug 2021 17:28:36 -0400 Subject: [PATCH 1/4] PG-220: Fix pgsm_overflow_target defaults. The GUC variable pgsm_overflow_target was pointing to the wrong index (12, pgsm_track_planning) in the "GucVariable conf[MAX_SETTINGS]" array. Adjusted it to the right index, 11. --- pg_stat_monitor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pg_stat_monitor.h b/pg_stat_monitor.h index 2ddfe01..a1ab4d5 100644 --- a/pg_stat_monitor.h +++ b/pg_stat_monitor.h @@ -411,7 +411,7 @@ void pgss_startup(void); #define PGSM_HISTOGRAM_MAX get_conf(8)->guc_variable #define PGSM_HISTOGRAM_BUCKETS get_conf(9)->guc_variable #define PGSM_QUERY_SHARED_BUFFER get_conf(10)->guc_variable -#define PGSM_OVERFLOW_TARGET get_conf(12)->guc_variable +#define PGSM_OVERFLOW_TARGET get_conf(11)->guc_variable #define PGSM_QUERY_PLAN get_conf(12)->guc_variable #define PGSM_TRACK_PLANNING get_conf(13)->guc_variable From c3d167e4524d37d92fe4dcc73ccdebf8bd9885ae Mon Sep 17 00:00:00 2001 From: Diego Fronza Date: Mon, 16 Aug 2021 18:11:17 -0400 Subject: [PATCH 2/4] PG-220: Check possible query buffer overflow. In SaveQueryText() we check for a possible overflow in the query buffer, but if overflow would happen and pgsm_overflow_target value is 1 (the default), then we dump the query buffer to a temporary file and reset the buffer (start saving queries from the start of the buffer). The problem is that after resetting the buffer we don't check if the current query length would exceed the buffer size of MAX_QUERY_BUFFER_BUCKET, if that is the case the buffer would overflow and probably crash the process or in the worst case become an attack vector for exploitation. This commit fix the problem by adding an additional check for overflow after resetting the query buffer. --- pg_stat_monitor.c | 48 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/pg_stat_monitor.c b/pg_stat_monitor.c index 41c23c8..127d6a3 100644 --- a/pg_stat_monitor.c +++ b/pg_stat_monitor.c @@ -76,7 +76,7 @@ static struct pg_hook_stats_t *pg_hook_stats; static void extract_query_comments(const char *query, char *comments, size_t max_len); static int get_histogram_bucket(double q_time); static bool IsSystemInitialized(void); -static void dump_queries_buffer(int bucket_id, unsigned char *buf, int buf_len); +static bool dump_queries_buffer(int bucket_id, unsigned char *buf, int buf_len); static double time_diff(struct timeval end, struct timeval start); @@ -3092,11 +3092,39 @@ SaveQueryText(uint64 bucketid, return false; case OVERFLOW_TARGET_DISK: { - dump_queries_buffer(bucketid, buf, MAX_QUERY_BUFFER_BUCKET); + bool dump_ok; + + /* + * If the query buffer is empty, there is nothing to dump, this also + * means that the current query length exceeds MAX_QUERY_BUFFER_BUCKET. + */ + if (buf_len <= sizeof (uint64)) + return false; + + dump_ok = dump_queries_buffer(bucketid, buf, buf_len); buf_len = sizeof (uint64); + + /* + * We must check for overflow again, as the query length may + * exceed the size allocated to the buffer (MAX_QUERY_BUFFER_BUCKET). + */ + if (QUERY_BUFFER_OVERFLOW(buf_len, query_len)) + { + /* + * If we successfully dumped the query buffer to disk, then + * reset the buffer, otherwise we could end up dumping the + * same buffer again. + */ + if (dump_ok) + *(uint64 *)buf = 0; + + return false; + } + } break; default: + Assert(false); break; } } @@ -3287,27 +3315,37 @@ IsSystemInitialized(void) return (system_init && IsHashInitialize()); } -static void +static bool dump_queries_buffer(int bucket_id, unsigned char *buf, int buf_len) { - int fd = 0; + int fd = 0; char file_name[1024]; + bool success = true; snprintf(file_name, 1024, "%s.%d", PGSM_TEXT_FILE, bucket_id); fd = OpenTransientFile(file_name, O_RDWR | O_CREAT | O_APPEND | PG_BINARY); - if (fd < 0) + if (fd < 0) + { ereport(LOG, (errcode_for_file_access(), errmsg("could not write file \"%s\": %m", file_name))); + return false; + } if (write(fd, buf, buf_len) != buf_len) + { ereport(LOG, (errcode_for_file_access(), errmsg("could not write file \"%s\": %m", file_name))); + success = false; + } + if (fd > 0) CloseTransientFile(fd); + + return success; } int From e593dbccc33978e3297bb5c91df5a0ac058e8989 Mon Sep 17 00:00:00 2001 From: Diego Fronza Date: Tue, 17 Aug 2021 11:19:22 -0400 Subject: [PATCH 3/4] PG-220: Fix GUC regression test (pgsm_overflow_target). Updated expected output for pgsm_overflow_target to match the default value of 1. --- regression/expected/guc.out | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/regression/expected/guc.out b/regression/expected/guc.out index 934c719..0da2986 100644 --- a/regression/expected/guc.out +++ b/regression/expected/guc.out @@ -23,7 +23,7 @@ SELECT * FROM pg_stat_monitor_settings ORDER BY name COLLATE "C"; pg_stat_monitor.pgsm_max | 100 | 100 | Sets the maximum size of shared memory in (MB) used for statement's metadata tracked by pg_stat_monitor. | 1 | 1000 | 1 pg_stat_monitor.pgsm_max_buckets | 10 | 10 | Sets the maximum number of buckets. | 1 | 10 | 1 pg_stat_monitor.pgsm_normalized_query | 1 | 1 | Selects whether save query in normalized format. | 0 | 0 | 0 - pg_stat_monitor.pgsm_overflow_target | 0 | 1 | Sets the overflow target for pg_stat_monitor | 0 | 1 | 1 + pg_stat_monitor.pgsm_overflow_target | 1 | 1 | Sets the overflow target for pg_stat_monitor | 0 | 1 | 1 pg_stat_monitor.pgsm_query_max_len | 1024 | 1024 | Sets the maximum length of query. | 1024 | 2147483647 | 1 pg_stat_monitor.pgsm_query_shared_buffer | 20 | 20 | Sets the maximum size of shared memory in (MB) used for query tracked by pg_stat_monitor. | 1 | 10000 | 1 pg_stat_monitor.pgsm_track_planning | 1 | 1 | Selects whether planning statistics are tracked. | 0 | 0 | 0 From f66b45afd60756c5c6604664984b754fcf0f3e4f Mon Sep 17 00:00:00 2001 From: Diego Fronza Date: Mon, 25 Oct 2021 16:55:30 -0300 Subject: [PATCH 4/4] PG-220: Fix read/write of dumped query buffer to files. This commit fix some issues when the query buffer overflows and pg_stat_monitor attempts to dump its contents to a file. The dump process is now as follows: 1. The dump will always be a full dump of the current query buffer, meaning pg_stat_monitor will dump MAX_QUERY_BUFFER_BUCKET bytes to the dump file. 2. When scanning the dump file, read chunks of size MAX_QUERY_BUFFER_BUCKET, then look for the query ID using that chunk and the query position metadata, this allows pg_stat_monitor to avoid scanning the whole chunk when looking for a query ID. The code in charge to read from/write to the dump file now takes into account that read() and write() may return less bytes than what it was asked for, the code now ensures that we actually read or write the amount of bytes required (MAX_QUERY_BUFFER_BUCKET), also it handles rare but posssible interrupts when doing those operations. --- pg_stat_monitor.c | 125 ++++++++++++++++++++++++++++++++-------------- pg_stat_monitor.h | 2 +- 2 files changed, 88 insertions(+), 39 deletions(-) diff --git a/pg_stat_monitor.c b/pg_stat_monitor.c index 127d6a3..93789c3 100644 --- a/pg_stat_monitor.c +++ b/pg_stat_monitor.c @@ -1701,9 +1701,9 @@ pg_stat_monitor_internal(FunctionCallInfo fcinfo, if (read_query(buf, queryid, query_txt, entry->query_pos) == 0) { - int len; - len = read_query_buffer(bucketid, queryid, query_txt); - if (len != MAX_QUERY_BUFFER_BUCKET) + int rc; + rc = read_query_buffer(bucketid, queryid, query_txt, entry->query_pos); + if (rc != 1) snprintf(query_txt, 32, "%s", ""); } @@ -1726,11 +1726,11 @@ pg_stat_monitor_internal(FunctionCallInfo fcinfo, if (tmp.info.parentid != UINT64CONST(0)) { - int len = 0; + int rc = 0; if (read_query(buf, tmp.info.parentid, parent_query_txt, 0) == 0) { - len = read_query_buffer(bucketid, tmp.info.parentid, parent_query_txt); - if (len != MAX_QUERY_BUFFER_BUCKET) + rc = read_query_buffer(bucketid, tmp.info.parentid, parent_query_txt, 0); + if (rc != 1) snprintf(parent_query_txt, 32, "%s", ""); } } @@ -3101,7 +3101,7 @@ SaveQueryText(uint64 bucketid, if (buf_len <= sizeof (uint64)) return false; - dump_ok = dump_queries_buffer(bucketid, buf, buf_len); + dump_ok = dump_queries_buffer(bucketid, buf, MAX_QUERY_BUFFER_BUCKET); buf_len = sizeof (uint64); /* @@ -3321,6 +3321,8 @@ dump_queries_buffer(int bucket_id, unsigned char *buf, int buf_len) int fd = 0; char file_name[1024]; bool success = true; + int off = 0; + int tries = 0; snprintf(file_name, 1024, "%s.%d", PGSM_TEXT_FILE, bucket_id); fd = OpenTransientFile(file_name, O_RDWR | O_CREAT | O_APPEND | PG_BINARY); @@ -3333,14 +3335,24 @@ dump_queries_buffer(int bucket_id, unsigned char *buf, int buf_len) return false; } - if (write(fd, buf, buf_len) != buf_len) - { + /* Loop until write buf_len bytes to the file. */ + do { + ssize_t nwrite = write(fd, buf + off, buf_len - off); + if (nwrite == -1) + { + if (errno == EINTR && tries++ < 3) + continue; + + success = false; + break; + } + off += nwrite; + } while (off < buf_len); + + if (!success) ereport(LOG, - (errcode_for_file_access(), - errmsg("could not write file \"%s\": %m", - file_name))); - success = false; - } + (errcode_for_file_access(), + errmsg("could not write file \"%s\": %m", file_name))); if (fd > 0) CloseTransientFile(fd); @@ -3348,14 +3360,25 @@ dump_queries_buffer(int bucket_id, unsigned char *buf, int buf_len) return success; } +/* + * Try to locate query text in a dumped file for bucket_id. + * + * Returns: + * 1 Query sucessfully read, query_text will contain the query text. + * 0 Query not found. + * -1 I/O Error. + */ int -read_query_buffer(int bucket_id, uint64 queryid, char *query_txt) +read_query_buffer(int bucket_id, uint64 queryid, char *query_txt, size_t pos) { int fd = 0; - int buf_len = 0; char file_name[1024]; unsigned char *buf = NULL; + ssize_t nread = 0; int off = 0; + int tries = 0; + bool done = false; + bool found = false; snprintf(file_name, 1024, "%s.%d", PGSM_TEXT_FILE, bucket_id); fd = OpenTransientFile(file_name, O_RDONLY | PG_BINARY); @@ -3363,40 +3386,66 @@ read_query_buffer(int bucket_id, uint64 queryid, char *query_txt) goto exit; buf = (unsigned char*) palloc(MAX_QUERY_BUFFER_BUCKET); - for(;;) + while (!done) { - if (lseek(fd, off, SEEK_SET) != off) - goto exit; + off = 0; + /* read a chunck of MAX_QUERY_BUFFER_BUCKET size. */ + do { + nread = read(fd, buf + off, MAX_QUERY_BUFFER_BUCKET - off); + if (nread == -1) + { + if (errno == EINTR && tries++ < 3) /* read() was interrupted, attempt to read again (max attempts=3) */ + continue; - buf_len = read(fd, buf, MAX_QUERY_BUFFER_BUCKET); - if (buf_len != MAX_QUERY_BUFFER_BUCKET) - { - if (errno != ENOENT) goto exit; - - if (buf_len == 0) + } + else if (nread == 0) /* EOF */ + { + done = true; break; + } + + off += nread; + } while (off < MAX_QUERY_BUFFER_BUCKET); + + if (off == MAX_QUERY_BUFFER_BUCKET) + { + /* we have a chunck, scan it looking for queryid. */ + if (read_query(buf, queryid, query_txt, pos) != 0) + { + + found = true; + /* query was found, don't need to read another chunck. */ + break; + } } - off += buf_len; - if (read_query(buf, queryid, query_txt, 0)) + else + /* + * Either done=true or file has a size not multiple of MAX_QUERY_BUFFER_BUCKET. + * It is safe to assume that the file was truncated or corrupted. + */ break; } - if (fd > 0) - CloseTransientFile(fd); - if (buf) - pfree(buf); - return buf_len; exit: - ereport(LOG, - (errcode_for_file_access(), - errmsg("could not read file \"%s\": %m", - file_name))); - if (fd > 0) + if (fd < 0 || nread == -1) + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", + file_name))); + + if (fd >= 0) CloseTransientFile(fd); + if (buf) pfree(buf); - return buf_len; + + if (found) + return 1; + else if (fd == -1 || nread == -1) + return -1; /* I/O error. */ + else + return 0; /* Not found. */ } static double diff --git a/pg_stat_monitor.h b/pg_stat_monitor.h index a1ab4d5..be32103 100644 --- a/pg_stat_monitor.h +++ b/pg_stat_monitor.h @@ -390,7 +390,7 @@ void hash_entry_dealloc(int new_bucket_id, int old_bucket_id, unsigned char *que pgssEntry* hash_entry_alloc(pgssSharedState *pgss, pgssHashKey *key, int encoding); Size hash_memsize(void); -int read_query_buffer(int bucket_id, uint64 queryid, char *query_txt); +int read_query_buffer(int bucket_id, uint64 queryid, char *query_txt, size_t pos); uint64 read_query(unsigned char *buf, uint64 queryid, char * query, size_t pos); pgssQueryEntry* hash_find_query_entry(uint64 bucket_id, uint64 queryid, uint64 dbid, uint64 userid, uint64 ip, uint64 appid); pgssQueryEntry* hash_create_query_entry(uint64 bucket_id, uint64 queryid, uint64 dbid, uint64 userid, uint64 ip, uint64 appid);