From 2f2c40ed22182e1ddf4ad0d7b4f341db9f9fa0eb Mon Sep 17 00:00:00 2001 From: Muhammad Usama Date: Mon, 21 Nov 2022 18:27:21 +0500 Subject: [PATCH 01/15] PG-555 :Infrastructure to allow multiple SQL APIs (#320) Creating the infrastructure that'll allow using newer versions of the loadable module with old SQL declarations. Also updating the build version to 2.0.0-dev --- pg_stat_monitor--1.0--2.0.sql | 2 +- pg_stat_monitor--2.0.sql | 2 +- pg_stat_monitor.c | 35 +++++++++++++++++++++++++++++++++-- 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/pg_stat_monitor--1.0--2.0.sql b/pg_stat_monitor--1.0--2.0.sql index c3302a7..21a1d41 100644 --- a/pg_stat_monitor--1.0--2.0.sql +++ b/pg_stat_monitor--1.0--2.0.sql @@ -72,7 +72,7 @@ CREATE FUNCTION pg_stat_monitor_internal( OUT toplevel BOOLEAN ) RETURNS SETOF record -AS 'MODULE_PATHNAME', 'pg_stat_monitor' +AS 'MODULE_PATHNAME', 'pg_stat_monitor_2_0' LANGUAGE C STRICT VOLATILE PARALLEL SAFE; -- Register a view on the function for ease of use. diff --git a/pg_stat_monitor--2.0.sql b/pg_stat_monitor--2.0.sql index bc0a0f9..2d6df69 100644 --- a/pg_stat_monitor--2.0.sql +++ b/pg_stat_monitor--2.0.sql @@ -168,7 +168,7 @@ CREATE FUNCTION pg_stat_monitor_internal( OUT toplevel BOOLEAN ) RETURNS SETOF record -AS 'MODULE_PATHNAME', 'pg_stat_monitor' +AS 'MODULE_PATHNAME', 'pg_stat_monitor_2_0' LANGUAGE C STRICT VOLATILE PARALLEL SAFE; -- Register a view on the function for ease of use. diff --git a/pg_stat_monitor.c b/pg_stat_monitor.c index 41bcb4f..6fddf45 100644 --- a/pg_stat_monitor.c +++ b/pg_stat_monitor.c @@ -23,9 +23,19 @@ #include "commands/explain.h" #include "pg_stat_monitor.h" + /* + * Extension version number, for supporting older extension versions' objects + */ + typedef enum pgsmVersion + { + PGSM_V1_0 = 0, + PGSM_V2_0 + } pgsmVersion; + + PG_MODULE_MAGIC; -#define BUILD_VERSION "1.1.1" +#define BUILD_VERSION "2.0.0-dev" #define PG_STAT_STATEMENTS_COLS 52 /* maximum of above */ #define PGSM_TEXT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "pg_stat_monitor_query" @@ -107,6 +117,8 @@ static ExecutorCheckPerms_hook_type prev_ExecutorCheckPerms_hook = NULL; PG_FUNCTION_INFO_V1(pg_stat_monitor_version); PG_FUNCTION_INFO_V1(pg_stat_monitor_reset); +PG_FUNCTION_INFO_V1(pg_stat_monitor_1_0); +PG_FUNCTION_INFO_V1(pg_stat_monitor_2_0); PG_FUNCTION_INFO_V1(pg_stat_monitor); PG_FUNCTION_INFO_V1(pg_stat_monitor_settings); PG_FUNCTION_INFO_V1(get_histogram_timings); @@ -178,6 +190,7 @@ static void pgss_store(uint64 queryid, pgssStoreKind kind); static void pg_stat_monitor_internal(FunctionCallInfo fcinfo, + pgsmVersion api_version, bool showtext); #if PG_VERSION_NUM < 140000 @@ -1573,10 +1586,27 @@ pg_stat_monitor_reset(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } +Datum +pg_stat_monitor_1_0(PG_FUNCTION_ARGS) +{ + pg_stat_monitor_internal(fcinfo, PGSM_V1_0, true); + return (Datum) 0; +} + +Datum +pg_stat_monitor_2_0(PG_FUNCTION_ARGS) +{ + pg_stat_monitor_internal(fcinfo, PGSM_V2_0, true); + return (Datum) 0; +} + +/* + * Legacy entry point for pg_stat_monitor() API versions 1.0 + */ Datum pg_stat_monitor(PG_FUNCTION_ARGS) { - pg_stat_monitor_internal(fcinfo, true); + pg_stat_monitor_internal(fcinfo, PGSM_V1_0, true); return (Datum) 0; } @@ -1603,6 +1633,7 @@ IsBucketValid(uint64 bucketid) /* Common code for all versions of pg_stat_statements() */ static void pg_stat_monitor_internal(FunctionCallInfo fcinfo, + pgsmVersion api_version, bool showtext) { ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; From fe83f56ab7634c00019ab5117da5412fc258abf3 Mon Sep 17 00:00:00 2001 From: Naeem Akhter <40981522+Naeem-Akhter@users.noreply.github.com> Date: Wed, 23 Nov 2022 02:20:43 +0500 Subject: [PATCH 02/15] PG-554: Remove redundant files and fix regression. (#319) PG-554: Remove reduntant files and fix regression. Removed old files with same name and add these files to fix sql regression on PG 14 & 15. 1- regression/expected/error_1.out 2- regression/expected/error_insert_1.out 3- regression/expected/top_query_1.out --- regression/expected/error_1.out | 6 +++++- regression/expected/error_insert_1.out | 3 ++- regression/expected/top_query_1.out | 23 +++++++++++++++++------ 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/regression/expected/error_1.out b/regression/expected/error_1.out index 389f7b6..3d74fd0 100644 --- a/regression/expected/error_1.out +++ b/regression/expected/error_1.out @@ -27,11 +27,15 @@ SELECT query, elevel, sqlcode, message FROM pg_stat_monitor ORDER BY query COLLA SELECT * FROM unknown; | 21 | 42P01 | relation "unknown" does not exist SELECT 1/0; | 21 | 22012 | division by zero SELECT pg_stat_monitor_reset() | 0 | | + do $$ +| 0 | | + BEGIN +| | | + RAISE WARNING 'warning message';+| | | + END $$ | | | do $$ +| 19 | 01000 | warning message BEGIN +| | | RAISE WARNING 'warning message';+| | | END $$; | | | -(5 rows) +(6 rows) SELECT pg_stat_monitor_reset(); pg_stat_monitor_reset diff --git a/regression/expected/error_insert_1.out b/regression/expected/error_insert_1.out index fc8cba7..e201a36 100644 --- a/regression/expected/error_insert_1.out +++ b/regression/expected/error_insert_1.out @@ -19,10 +19,11 @@ Drop Table if exists Company; SELECT query, elevel, sqlcode, message FROM pg_stat_monitor ORDER BY query COLLATE "C",elevel; query | elevel | sqlcode | message -------------------------------------------------------+--------+---------+--------------------------------------------------------------- + Drop Table if exists Company | 0 | | INSERT INTO Company(ID, Name) VALUES (1, 'Percona') | 0 | | INSERT INTO Company(ID, Name) VALUES (1, 'Percona'); | 21 | 23505 | duplicate key value violates unique constraint "company_pkey" SELECT pg_stat_monitor_reset() | 0 | | -(3 rows) +(4 rows) SELECT pg_stat_monitor_reset(); pg_stat_monitor_reset diff --git a/regression/expected/top_query_1.out b/regression/expected/top_query_1.out index eb09470..9109771 100644 --- a/regression/expected/top_query_1.out +++ b/regression/expected/top_query_1.out @@ -24,12 +24,23 @@ SELECT add2(1,2); (1 row) SELECT query, top_query FROM pg_stat_monitor ORDER BY query COLLATE "C"; - query | top_query ---------------------------------+------------------ - (select $1 + $2) | SELECT add2(1,2) - SELECT add2(1,2) | - SELECT pg_stat_monitor_reset() | -(3 rows) + query | top_query +-------------------------------------------------------------+------------------ + (select $1 + $2) | SELECT add2(1,2) + CREATE OR REPLACE FUNCTION add(int, int) RETURNS INTEGER AS+| + $$ +| + BEGIN +| + return (select $1 + $2); +| + END; $$ language plpgsql | + CREATE OR REPLACE function add2(int, int) RETURNS int as +| + $$ +| + BEGIN +| + return add($1,$2); +| + END; +| + $$ language plpgsql | + SELECT add2(1,2) | + SELECT pg_stat_monitor_reset() | +(5 rows) SELECT pg_stat_monitor_reset(); pg_stat_monitor_reset From 8e265b9bfb67a16d9cc988c80032d85e460d324f Mon Sep 17 00:00:00 2001 From: Naeem Akhter <40981522+Naeem-Akhter@users.noreply.github.com> Date: Wed, 23 Nov 2022 02:21:33 +0500 Subject: [PATCH 03/15] PG-556: Fix expected output of test case version. (#322) --- regression/expected/version.out | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/regression/expected/version.out b/regression/expected/version.out index 1a494bf..4a3511b 100644 --- a/regression/expected/version.out +++ b/regression/expected/version.out @@ -2,7 +2,7 @@ CREATE EXTENSION pg_stat_monitor; SELECT pg_stat_monitor_version(); pg_stat_monitor_version ------------------------- - 1.1.1 + 2.0.0-dev (1 row) DROP EXTENSION pg_stat_monitor; From b4ab2ccc84d4afbdb7c36c489896cdf9ee9bc272 Mon Sep 17 00:00:00 2001 From: Naeem Akhter <40981522+Naeem-Akhter@users.noreply.github.com> Date: Wed, 23 Nov 2022 02:22:13 +0500 Subject: [PATCH 04/15] PG-557: Update PGSM+PMM GH workflows to pick intended target branch. (#323) --- .github/workflows/postgresql-11-pmm.yaml | 12 ++++++++++-- .github/workflows/postgresql-12-pmm.yaml | 12 ++++++++++-- .github/workflows/postgresql-13-pmm.yaml | 12 ++++++++++-- .github/workflows/postgresql-14-pmm.yaml | 12 ++++++++++-- .github/workflows/postgresql-15-pmm.yaml | 12 ++++++++++-- 5 files changed, 50 insertions(+), 10 deletions(-) diff --git a/.github/workflows/postgresql-11-pmm.yaml b/.github/workflows/postgresql-11-pmm.yaml index d33e5c1..1791955 100644 --- a/.github/workflows/postgresql-11-pmm.yaml +++ b/.github/workflows/postgresql-11-pmm.yaml @@ -8,7 +8,7 @@ jobs: timeout-minutes: 30 steps: - name: Clone QA Integration repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: repository: 'Percona-Lab/qa-integration' ref: 'main' @@ -16,9 +16,17 @@ jobs: # print branch and Repo name - name: Get branch and Repo Name run: echo 'The branch and Repo Name is' ${{ github.head_ref }} ${{ github.actor }}/pg_stat_monitor + + - name: "Set TARGET_BRANCH variable for a PR run" + if: github.event_name == 'pull_request' + run: echo "TARGET_BRANCH=${{ github.event.pull_request.base.ref }}" >> $GITHUB_ENV + + - name: "Set TARGET_BRANCH variable for a PUSH run" + if: github.event_name == 'push' + run: echo "TARGET_BRANCH=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV - name: Run PMM & PGSM Setup, E2E Tests - run: bash -xe ./pmm_pgsm_setup/pmm_pgsm_setup.sh --pgsql-version=11 --pgstat-monitor-branch=REL_1_1_1 + run: bash -xe ./pmm_pgsm_setup/pmm_pgsm_setup.sh --pgsql-version=11 --pgstat-monitor-branch=${{ env.TARGET_BRANCH }} - name: Get PMM-Agent Logs from the Container if: success() || failure() # run this step even if previous step failed diff --git a/.github/workflows/postgresql-12-pmm.yaml b/.github/workflows/postgresql-12-pmm.yaml index 60cb6e4..96bf832 100644 --- a/.github/workflows/postgresql-12-pmm.yaml +++ b/.github/workflows/postgresql-12-pmm.yaml @@ -8,7 +8,7 @@ jobs: timeout-minutes: 30 steps: - name: Clone QA Integration repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: repository: 'Percona-Lab/qa-integration' ref: 'main' @@ -16,9 +16,17 @@ jobs: # print branch and Repo name - name: Get branch and Repo Name run: echo 'The branch and Repo Name is' ${{ github.head_ref }} ${{ github.actor }}/pg_stat_monitor + + - name: "Set TARGET_BRANCH variable for a PR run" + if: github.event_name == 'pull_request' + run: echo "TARGET_BRANCH=${{ github.event.pull_request.base.ref }}" >> $GITHUB_ENV + + - name: "Set TARGET_BRANCH variable for a PUSH run" + if: github.event_name == 'push' + run: echo "TARGET_BRANCH=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV - name: Run PMM & PGSM Setup, E2E Tests - run: bash -xe ./pmm_pgsm_setup/pmm_pgsm_setup.sh --pgsql-version=12 --pgstat-monitor-branch=REL_1_1_1 + run: bash -xe ./pmm_pgsm_setup/pmm_pgsm_setup.sh --pgsql-version=12 --pgstat-monitor-branch=${{ env.TARGET_BRANCH }} - name: Get PMM-Agent Logs from the Container if: success() || failure() # run this step even if previous step failed diff --git a/.github/workflows/postgresql-13-pmm.yaml b/.github/workflows/postgresql-13-pmm.yaml index 93c5cd0..5d6f108 100644 --- a/.github/workflows/postgresql-13-pmm.yaml +++ b/.github/workflows/postgresql-13-pmm.yaml @@ -8,7 +8,7 @@ jobs: timeout-minutes: 30 steps: - name: Clone QA Integration repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: repository: 'Percona-Lab/qa-integration' ref: 'main' @@ -17,8 +17,16 @@ jobs: - name: Get branch and Repo Name run: echo 'The branch and Repo Name is' ${{ github.head_ref }} ${{ github.actor }}/pg_stat_monitor + - name: "Set TARGET_BRANCH variable for a PR run" + if: github.event_name == 'pull_request' + run: echo "TARGET_BRANCH=${{ github.event.pull_request.base.ref }}" >> $GITHUB_ENV + + - name: "Set TARGET_BRANCH variable for a PUSH run" + if: github.event_name == 'push' + run: echo "TARGET_BRANCH=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV + - name: Run PMM & PGSM Setup, E2E Tests - run: bash -xe ./pmm_pgsm_setup/pmm_pgsm_setup.sh --pgsql-version=13 --pgstat-monitor-branch=REL_1_1_1 + run: bash -xe ./pmm_pgsm_setup/pmm_pgsm_setup.sh --pgsql-version=13 --pgstat-monitor-branch=${{ env.TARGET_BRANCH }} - name: Get PMM-Agent Logs from the Container if: success() || failure() # run this step even if previous step failed diff --git a/.github/workflows/postgresql-14-pmm.yaml b/.github/workflows/postgresql-14-pmm.yaml index 20d0d11..1f83111 100644 --- a/.github/workflows/postgresql-14-pmm.yaml +++ b/.github/workflows/postgresql-14-pmm.yaml @@ -8,7 +8,7 @@ jobs: timeout-minutes: 30 steps: - name: Clone QA Integration repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: repository: 'Percona-Lab/qa-integration' ref: 'main' @@ -17,8 +17,16 @@ jobs: - name: Get branch and Repo Name run: echo 'The branch and Repo Name is' ${{ github.head_ref }} ${{ github.actor }}/pg_stat_monitor + - name: "Set TARGET_BRANCH variable for a PR run" + if: github.event_name == 'pull_request' + run: echo "TARGET_BRANCH=${{ github.event.pull_request.base.ref }}" >> $GITHUB_ENV + + - name: "Set TARGET_BRANCH variable for a PUSH run" + if: github.event_name == 'push' + run: echo "TARGET_BRANCH=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV + - name: Run PMM & PGSM Setup, E2E Tests - run: bash -xe ./pmm_pgsm_setup/pmm_pgsm_setup.sh --pgsql-version=14 --pgstat-monitor-branch=REL_1_1_1 + run: bash -xe ./pmm_pgsm_setup/pmm_pgsm_setup.sh --pgsql-version=14 --pgstat-monitor-branch=${{ env.TARGET_BRANCH }} - name: Get PMM-Agent Logs from the Container if: success() || failure() # run this step even if previous step failed diff --git a/.github/workflows/postgresql-15-pmm.yaml b/.github/workflows/postgresql-15-pmm.yaml index 073fa1b..40e4930 100644 --- a/.github/workflows/postgresql-15-pmm.yaml +++ b/.github/workflows/postgresql-15-pmm.yaml @@ -8,7 +8,7 @@ jobs: timeout-minutes: 30 steps: - name: Clone QA Integration repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: repository: 'Percona-Lab/qa-integration' ref: 'main' @@ -17,8 +17,16 @@ jobs: - name: Get branch and Repo Name run: echo 'The branch and Repo Name is' ${{ github.head_ref }} ${{ github.actor }}/pg_stat_monitor + - name: "Set TARGET_BRANCH variable for a PR run" + if: github.event_name == 'pull_request' + run: echo "TARGET_BRANCH=${{ github.event.pull_request.base.ref }}" >> $GITHUB_ENV + + - name: "Set TARGET_BRANCH variable for a PUSH run" + if: github.event_name == 'push' + run: echo "TARGET_BRANCH=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV + - name: Run PMM & PGSM Setup, E2E Tests - run: bash -xe ./pmm_pgsm_setup/pmm_pgsm_setup.sh --pgsql-version=15 --pgstat-monitor-branch=REL_1_1_1 + run: bash -xe ./pmm_pgsm_setup/pmm_pgsm_setup.sh --pgsql-version=15 --pgstat-monitor-branch=${{ env.TARGET_BRANCH }} - name: Get PMM-Agent Logs from the Container if: success() || failure() # run this step even if previous step failed From f7860b472f9e69219c61f54031b249951de0c5b0 Mon Sep 17 00:00:00 2001 From: Ibrar Ahmed Date: Wed, 23 Nov 2022 02:23:28 +0500 Subject: [PATCH 05/15] =?UTF-8?q?PG-310:=20Bucket=20is=20=E2=80=9CDone?= =?UTF-8?q?=E2=80=9D=20vs=20still=20being=20current/last.=20(#321)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A new column is added to mention that bucket is active or done. there is     some timing based adjustment was required with that too.    Co-authored-by: Hamid Akhtar --- pg_stat_monitor--1.0--2.0.sql | 7 +++++-- pg_stat_monitor--1.0.sql | 3 ++- pg_stat_monitor--2.0.sql | 10 +++++++--- pg_stat_monitor.c | 31 +++++++++++++++++++------------ pg_stat_monitor.h | 4 +--- 5 files changed, 34 insertions(+), 21 deletions(-) diff --git a/pg_stat_monitor--1.0--2.0.sql b/pg_stat_monitor--1.0--2.0.sql index 21a1d41..89cdc98 100644 --- a/pg_stat_monitor--1.0--2.0.sql +++ b/pg_stat_monitor--1.0--2.0.sql @@ -69,7 +69,8 @@ CREATE FUNCTION pg_stat_monitor_internal( OUT wal_fpi int8, OUT wal_bytes numeric, OUT comments TEXT, - OUT toplevel BOOLEAN + OUT toplevel BOOLEAN, + OUT bucket_done BOOLEAN ) RETURNS SETOF record AS 'MODULE_PATHNAME', 'pg_stat_monitor_2_0' @@ -124,7 +125,8 @@ CREATE VIEW pg_stat_monitor AS SELECT cpu_sys_time, wal_records, wal_fpi, - wal_bytes + wal_bytes, + bucket_done FROM pg_stat_monitor_internal(TRUE) p, pg_database d WHERE dbid = oid ORDER BY bucket_start_time; RETURN 0; @@ -242,6 +244,7 @@ CREATE VIEW pg_stat_monitor AS SELECT wal_records, wal_fpi, wal_bytes, + bucket_done, plans_calls, total_plan_time, diff --git a/pg_stat_monitor--1.0.sql b/pg_stat_monitor--1.0.sql index 467d6b5..2d8a411 100644 --- a/pg_stat_monitor--1.0.sql +++ b/pg_stat_monitor--1.0.sql @@ -178,7 +178,8 @@ CREATE FUNCTION pg_stat_monitor_internal( OUT wal_fpi int8, OUT wal_bytes numeric, OUT comments TEXT, - OUT toplevel BOOLEAN + OUT toplevel BOOLEAN, + OUT bucket_done BOOLEAN ) RETURNS SETOF record AS 'MODULE_PATHNAME', 'pg_stat_monitor' diff --git a/pg_stat_monitor--2.0.sql b/pg_stat_monitor--2.0.sql index 2d6df69..b98876a 100644 --- a/pg_stat_monitor--2.0.sql +++ b/pg_stat_monitor--2.0.sql @@ -126,7 +126,7 @@ CREATE FUNCTION pg_stat_monitor_internal( OUT elevel int, OUT sqlcode TEXT, OUT message text, - OUT bucket_start_time timestamp, + OUT bucket_start_time timestamptz, OUT calls int8, -- 16 @@ -165,7 +165,8 @@ CREATE FUNCTION pg_stat_monitor_internal( OUT wal_fpi int8, OUT wal_bytes numeric, OUT comments TEXT, - OUT toplevel BOOLEAN + OUT toplevel BOOLEAN, + OUT bucket_done BOOLEAN ) RETURNS SETOF record AS 'MODULE_PATHNAME', 'pg_stat_monitor_2_0' @@ -219,7 +220,8 @@ CREATE VIEW pg_stat_monitor AS SELECT cpu_sys_time, wal_records, wal_fpi, - wal_bytes + wal_bytes, + bucket_done FROM pg_stat_monitor_internal(TRUE) p, pg_database d WHERE dbid = oid ORDER BY bucket_start_time; RETURN 0; @@ -276,6 +278,7 @@ CREATE VIEW pg_stat_monitor AS SELECT wal_records, wal_fpi, wal_bytes, + bucket_done, -- PostgreSQL-13 Specific Coulumns plans_calls, total_plan_time, @@ -338,6 +341,7 @@ CREATE VIEW pg_stat_monitor AS SELECT wal_records, wal_fpi, wal_bytes, + bucket_done, plans_calls, total_plan_time, diff --git a/pg_stat_monitor.c b/pg_stat_monitor.c index 6fddf45..59c88c9 100644 --- a/pg_stat_monitor.c +++ b/pg_stat_monitor.c @@ -36,7 +36,7 @@ PG_MODULE_MAGIC; #define BUILD_VERSION "2.0.0-dev" -#define PG_STAT_STATEMENTS_COLS 52 /* maximum of above */ +#define PG_STAT_STATEMENTS_COLS 53 /* maximum of above */ #define PGSM_TEXT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "pg_stat_monitor_query" #define roundf(x,d) ((floor(((x)*pow(10,d))+.5))/pow(10,d)) @@ -1613,15 +1613,12 @@ pg_stat_monitor(PG_FUNCTION_ARGS) static bool IsBucketValid(uint64 bucketid) { - struct tm tm; time_t bucket_t, current_t; double diff_t; pgssSharedState *pgss = pgsm_get_ss(); - memset(&tm, 0, sizeof(tm)); - strptime(pgss->bucket_start_time[bucketid], "%Y-%m-%d %H:%M:%S", &tm); - bucket_t = mktime(&tm); + bucket_t = mktime(&pgss->bucket_start_time[bucketid]); time(¤t_t); diff_t = difftime(current_t, bucket_t); @@ -1674,7 +1671,7 @@ pg_stat_monitor_internal(FunctionCallInfo fcinfo, if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "pg_stat_monitor: return type must be a row type"); - if (tupdesc->natts != 51) + if (tupdesc->natts != 52) elog(ERROR, "pg_stat_monitor: incorrect number of output arguments, required %d", tupdesc->natts); tupstore = tuplestore_begin_heap(true, false, work_mem); @@ -1749,7 +1746,7 @@ pg_stat_monitor_internal(FunctionCallInfo fcinfo, { if (read_query(pgss_qbuf, tmp.info.parentid, parent_query_txt, 0) == 0) { - int rc = read_query_buffer(bucketid, tmp.info.parentid, parent_query_txt, 0); + int rc = read_query_buffer(bucketid, tmp.info.parentid, parent_query_txt, 0); if (rc != 1) snprintf(parent_query_txt, 32, "%s", ""); @@ -1892,7 +1889,11 @@ pg_stat_monitor_internal(FunctionCallInfo fcinfo, values[i++] = CStringGetTextDatum(tmp.error.message); /* bucket_start_time at column number 15 */ - values[i++] = CStringGetDatum(pgss->bucket_start_time[entry->key.bucket_id]); + { + TimestampTz tm; + tm2timestamp((struct pg_tm*) &pgss->bucket_start_time[entry->key.bucket_id], 0, NULL, &tm); + values[i++] = TimestampGetDatum(tm); + } if (tmp.calls.calls == 0) { /* Query of pg_stat_monitor itslef started from zero count */ @@ -2003,6 +2004,9 @@ pg_stat_monitor_internal(FunctionCallInfo fcinfo, nulls[i++] = true; } values[i++] = BoolGetDatum(toplevel); + values[i++] = BoolGetDatum(pg_atomic_read_u64(&pgss->current_wbucket) != bucketid); + + /* clean up and return the tuplestore */ tuplestore_putvalues(tupstore, tupdesc, values, nulls); } /* clean up and return the tuplestore */ @@ -2086,13 +2090,16 @@ get_next_wbucket(pgssSharedState *pgss) tv.tv_sec = (tv.tv_sec) - (tv.tv_sec % PGSM_BUCKET_TIME); lt = localtime(&tv.tv_sec); + /* + * Year is 1900 behind and month is 0 based, therefore we need to + * adjust that. + */ + lt->tm_year += 1900; + lt->tm_mon += 1; /* Allign the value in prev_bucket_sec to the bucket start time */ pg_atomic_exchange_u64(&pgss->prev_bucket_sec, (uint64)tv.tv_sec); - - snprintf(pgss->bucket_start_time[new_bucket_id], sizeof(pgss->bucket_start_time[new_bucket_id]), - "%04d-%02d-%02d %02d:%02d:%02d", lt->tm_year + 1900, lt->tm_mon + 1, lt->tm_mday, lt->tm_hour, lt->tm_min, lt->tm_sec); - + memcpy(&pgss->bucket_start_time[new_bucket_id], lt, sizeof(struct tm)); return new_bucket_id; } diff --git a/pg_stat_monitor.h b/pg_stat_monitor.h index fd6c3cd..e816ca3 100644 --- a/pg_stat_monitor.h +++ b/pg_stat_monitor.h @@ -317,11 +317,9 @@ typedef struct pgssSharedState pg_atomic_uint64 current_wbucket; pg_atomic_uint64 prev_bucket_sec; uint64 bucket_entry[MAX_BUCKETS]; - char bucket_start_time[MAX_BUCKETS][60]; /* start time of the - * bucket */ + struct tm bucket_start_time[MAX_BUCKETS]; /* start time of the bucket */ LWLock *errors_lock; /* protects errors hashtable * search/modification */ - /* * These variables are used when pgsm_overflow_target is ON. * From 4a254a538b79e2129a5f1caf569a51b686aec6f8 Mon Sep 17 00:00:00 2001 From: Naeem Akhter Date: Wed, 30 Nov 2022 16:47:24 +0500 Subject: [PATCH 06/15] PG-553: Add a testcase to verify columns names in PGSM. --- t/018_column_names.pl | 100 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 t/018_column_names.pl diff --git a/t/018_column_names.pl b/t/018_column_names.pl new file mode 100644 index 0000000..e67320b --- /dev/null +++ b/t/018_column_names.pl @@ -0,0 +1,100 @@ +#!/usr/bin/perl + +use strict; +use warnings; +use File::Basename; +use File::Compare; +use File::Copy; +use String::Util qw(trim); +use Test::More; +use lib 't'; +use pgsm; + +# Get filename and create out file name and dirs where requried +PGSM::setup_files_dir(basename($0)); + +# Create new PostgreSQL node and do initdb +my $node = PGSM->pgsm_init_pg(); +my $pgdata = $node->data_dir; + +# Update postgresql.conf to include/load pg_stat_monitor library +open my $conf, '>>', "$pgdata/postgresql.conf"; +print $conf "shared_preload_libraries = 'pg_stat_monitor'\n"; +close $conf; + +# Dictionary for expected PGSM columns names on different PG server versions +my %pg_versions_pgsm_columns = ( 15 => "application_name,blk_read_time," . + "blk_write_time,bucket,bucket_done,bucket_start_time,calls," . + "client_ip,cmd_type,cmd_type_text,comments,cpu_sys_time,cpu_user_time," . + "datname,elevel,local_blks_dirtied,local_blks_hit,local_blks_read," . + "local_blks_written,max_exec_time,max_plan_time,mean_exec_time," . + "mean_plan_time,message,min_exec_time,min_plan_time,planid," . + "plans_calls,query,query_plan,queryid,relations,resp_calls," . + "rows_retrieved,shared_blks_dirtied,shared_blks_hit,shared_blks_read," . + "shared_blks_written,sqlcode,stddev_exec_time,stddev_plan_time," . + "temp_blks_read,temp_blks_written,top_query,top_queryid,toplevel," . + "total_exec_time,total_plan_time,userid,wal_bytes,wal_fpi,wal_records", + 14 => "application_name,blk_read_time," . + "blk_write_time,bucket,bucket_done,bucket_start_time,calls," . + "client_ip,cmd_type,cmd_type_text,comments,cpu_sys_time,cpu_user_time," . + "datname,elevel,local_blks_dirtied,local_blks_hit,local_blks_read," . + "local_blks_written,max_exec_time,max_plan_time,mean_exec_time," . + "mean_plan_time,message,min_exec_time,min_plan_time,planid," . + "plans_calls,query,query_plan,queryid,relations,resp_calls," . + "rows_retrieved,shared_blks_dirtied,shared_blks_hit,shared_blks_read," . + "shared_blks_written,sqlcode,stddev_exec_time,stddev_plan_time," . + "temp_blks_read,temp_blks_written,top_query,top_queryid,toplevel," . + "total_exec_time,total_plan_time,userid,wal_bytes,wal_fpi,wal_records", + 13 => "application_name,blk_read_time," . + "blk_write_time,bucket,bucket_done,bucket_start_time,calls," . + "client_ip,cmd_type,cmd_type_text,comments,cpu_sys_time,cpu_user_time," . + "datname,elevel,local_blks_dirtied,local_blks_hit,local_blks_read," . + "local_blks_written,max_exec_time,max_plan_time,mean_exec_time," . + "mean_plan_time,message,min_exec_time,min_plan_time,planid," . + "plans_calls,query,query_plan,queryid,relations,resp_calls," . + "rows_retrieved,shared_blks_dirtied,shared_blks_hit,shared_blks_read," . + "shared_blks_written,sqlcode,stddev_exec_time,stddev_plan_time," . + "temp_blks_read,temp_blks_written,top_query,top_queryid,toplevel," . + "total_exec_time,total_plan_time,userid,wal_bytes,wal_fpi,wal_records", + 12 => "application_name,blk_read_time,blk_write_time,bucket,bucket_done," . + "bucket_start_time,calls,client_ip,cmd_type,cmd_type_text,comments," . + "cpu_sys_time,cpu_user_time,datname,elevel,local_blks_dirtied," . + "local_blks_hit,local_blks_read,local_blks_written,max_time,mean_time," . + "message,min_time,planid,query,query_plan,queryid,relations,resp_calls," . + "rows_retrieved,shared_blks_dirtied,shared_blks_hit,shared_blks_read," . + "shared_blks_written,sqlcode,stddev_time,temp_blks_read,temp_blks_written," . + "top_query,top_queryid,total_time,userid,wal_bytes,wal_fpi,wal_records" + ); + +# Start server +my $rt_value = $node->start; +ok($rt_value == 1, "Start Server"); + +# Create extension and change out file permissions +my ($cmdret, $stdout, $stderr) = $node->psql('postgres', 'CREATE EXTENSION pg_stat_monitor;', extra_params => ['-a']); +ok($cmdret == 0, "Create PGSM Extension"); +PGSM::append_to_file($stdout . "\n"); + +# Get PGSM columns names from PGSM installation in server +($cmdret, $stdout, $stderr) = $node->psql('postgres', "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS WHERE table_name = 'pg_stat_monitor' order by column_name;", extra_params => ['-A', '-R,', '-Ptuples_only=on']); +ok($cmdret == 0, "Get columns names in PGSM installation for PG version $PGSM::PG_MAJOR_VERSION"); +PGSM::append_to_file($stdout . "\n"); + +# Compare PGSM column names in installation to expected column names +ok($stdout eq $pg_versions_pgsm_columns{$PGSM::PG_MAJOR_VERSION}, "Compare supported columns names for PG version $PGSM::PG_MAJOR_VERSION against expected"); + +# Run Select statement against expected column names +($cmdret, $stdout, $stderr) = $node->psql('postgres', "Select $pg_versions_pgsm_columns{$PGSM::PG_MAJOR_VERSION} from pg_stat_monitor;", extra_params => ['-a', '-Pformat=aligned','-Ptuples_only=off']); +ok($cmdret == 0, "Select statement against expected column names"); +PGSM::append_to_file($stdout); + +# Drop extension +$stdout = $node->safe_psql('postgres', 'Drop extension pg_stat_monitor;', extra_params => ['-a']); +ok($cmdret == 0, "Drop PGSM Extension"); +PGSM::append_to_file($stdout); + +# Stop the server +$node->stop; + +# Done testing for this testcase file. +done_testing(); From 913064b68d79487680625c19b1b09e5ff520c5ca Mon Sep 17 00:00:00 2001 From: Muhammad Usama Date: Wed, 7 Dec 2022 15:40:13 +0500 Subject: [PATCH 07/15] PG-435: Adding new counters that are available in PG15 (#329) In line with pg_stat_statments for PG15, This commit adds eight new cumulative counters for jit operations, making it easier to diagnose how JIT is used in an installation. And two new columns, temp_blk_read_time, and temp_blk_write_time, respectively, show the time spent reading and writing temporary file blocks on disk. Moreover, The commit also contains a few indentations and API adjustments. --- pg_stat_monitor--1.0--2.0.sql | 110 +++++++++++++++++++++++++++++++--- pg_stat_monitor--2.0.sql | 104 ++++++++++++++++++++++++++++++-- pg_stat_monitor.c | 89 ++++++++++++++++++++++----- pg_stat_monitor.h | 23 ++++++- t/018_column_names.pl | 10 +++- 5 files changed, 303 insertions(+), 33 deletions(-) diff --git a/pg_stat_monitor--1.0--2.0.sql b/pg_stat_monitor--1.0--2.0.sql index 89cdc98..b8ac5f3 100644 --- a/pg_stat_monitor--1.0--2.0.sql +++ b/pg_stat_monitor--1.0--2.0.sql @@ -62,15 +62,28 @@ CREATE FUNCTION pg_stat_monitor_internal( OUT temp_blks_written int8, OUT blk_read_time float8, OUT blk_write_time float8, + OUT temp_blk_read_time float8, + OUT temp_blk_write_time float8, + OUT resp_calls text, -- 41 OUT cpu_user_time float8, OUT cpu_sys_time float8, - OUT wal_records int8, - OUT wal_fpi int8, - OUT wal_bytes numeric, - OUT comments TEXT, - OUT toplevel BOOLEAN, - OUT bucket_done BOOLEAN + OUT wal_records int8, + OUT wal_fpi int8, + OUT wal_bytes numeric, + OUT comments TEXT, + + OUT jit_functions int8, + OUT jit_generation_time float8, + OUT jit_inlining_count int8, + OUT jit_inlining_time float8, + OUT jit_optimization_count int8, + OUT jit_optimization_time float8, + OUT jit_emission_count int8, + OUT jit_emission_time float8, + + OUT toplevel BOOLEAN, + OUT bucket_done BOOLEAN ) RETURNS SETOF record AS 'MODULE_PATHNAME', 'pg_stat_monitor_2_0' @@ -258,18 +271,97 @@ RETURN 0; END; $$ LANGUAGE plpgsql; +CREATE FUNCTION pgsm_create_15_view() RETURNS INT AS +$$ +BEGIN +CREATE VIEW pg_stat_monitor AS SELECT + bucket, + bucket_start_time AS bucket_start_time, + userid::regrole, + datname, + '0.0.0.0'::inet + client_ip AS client_ip, + queryid, + toplevel, + top_queryid, + query, + comments, + planid, + query_plan, + top_query, + application_name, + string_to_array(relations, ',') AS relations, + cmd_type, + get_cmd_type(cmd_type) AS cmd_type_text, + elevel, + sqlcode, + message, + calls, + total_exec_time, + min_exec_time, + max_exec_time, + mean_exec_time, + stddev_exec_time, + rows_retrieved, + shared_blks_hit, + shared_blks_read, + shared_blks_dirtied, + shared_blks_written, + local_blks_hit, + local_blks_read, + local_blks_dirtied, + local_blks_written, + temp_blks_read, + temp_blks_written, + blk_read_time, + blk_write_time, + temp_blk_read_time, + temp_blk_write_time, + + (string_to_array(resp_calls, ',')) resp_calls, + cpu_user_time, + cpu_sys_time, + wal_records, + wal_fpi, + wal_bytes, + bucket_done, + + plans_calls, + total_plan_time, + min_plan_time, + max_plan_time, + mean_plan_time, + stddev_plan_time, + + jit_functions, + jit_generation_time, + jit_inlining_count, + jit_inlining_time, + jit_optimization_count, + jit_optimization_time, + jit_emission_count, + jit_emission_time + +FROM pg_stat_monitor_internal(TRUE) p, pg_database d WHERE dbid = oid +ORDER BY bucket_start_time; +RETURN 0; +END; +$$ LANGUAGE plpgsql; + CREATE FUNCTION pgsm_create_view() RETURNS INT AS $$ DECLARE ver integer; BEGIN SELECT current_setting('server_version_num') INTO ver; - IF (ver >= 14000) THEN + IF (ver >= 150000) THEN + return pgsm_create_15_view(); + END IF; + IF (ver >= 140000) THEN return pgsm_create_14_view(); END IF; - IF (ver >= 13000) THEN + IF (ver >= 130000) THEN return pgsm_create_13_view(); END IF; - IF (ver >= 11000) THEN + IF (ver >= 110000) THEN return pgsm_create_11_view(); END IF; RETURN 0; diff --git a/pg_stat_monitor--2.0.sql b/pg_stat_monitor--2.0.sql index b98876a..4104cc0 100644 --- a/pg_stat_monitor--2.0.sql +++ b/pg_stat_monitor--2.0.sql @@ -116,7 +116,6 @@ CREATE FUNCTION pg_stat_monitor_internal( OUT planid text, OUT query text, OUT query_plan text, - OUT state_code int8, OUT top_queryid text, OUT top_query text, OUT application_name text, @@ -158,15 +157,29 @@ CREATE FUNCTION pg_stat_monitor_internal( OUT temp_blks_written int8, OUT blk_read_time float8, OUT blk_write_time float8, + + OUT temp_blk_read_time float8, + OUT temp_blk_write_time float8, + OUT resp_calls text, -- 41 OUT cpu_user_time float8, OUT cpu_sys_time float8, - OUT wal_records int8, - OUT wal_fpi int8, - OUT wal_bytes numeric, - OUT comments TEXT, + OUT wal_records int8, + OUT wal_fpi int8, + OUT wal_bytes numeric, + OUT comments TEXT, + + OUT jit_functions int8, + OUT jit_generation_time float8, + OUT jit_inlining_count int8, + OUT jit_inlining_time float8, + OUT jit_optimization_count int8, + OUT jit_optimization_time float8, + OUT jit_emission_count int8, + OUT jit_emission_time float8, + OUT toplevel BOOLEAN, - OUT bucket_done BOOLEAN + OUT bucket_done BOOLEAN ) RETURNS SETOF record AS 'MODULE_PATHNAME', 'pg_stat_monitor_2_0' @@ -355,11 +368,90 @@ RETURN 0; END; $$ LANGUAGE plpgsql; +CREATE FUNCTION pgsm_create_15_view() RETURNS INT AS +$$ +BEGIN +CREATE VIEW pg_stat_monitor AS SELECT + bucket, + bucket_start_time AS bucket_start_time, + userid::regrole, + datname, + '0.0.0.0'::inet + client_ip AS client_ip, + queryid, + toplevel, + top_queryid, + query, + comments, + planid, + query_plan, + top_query, + application_name, + string_to_array(relations, ',') AS relations, + cmd_type, + get_cmd_type(cmd_type) AS cmd_type_text, + elevel, + sqlcode, + message, + calls, + total_exec_time, + min_exec_time, + max_exec_time, + mean_exec_time, + stddev_exec_time, + rows_retrieved, + shared_blks_hit, + shared_blks_read, + shared_blks_dirtied, + shared_blks_written, + local_blks_hit, + local_blks_read, + local_blks_dirtied, + local_blks_written, + temp_blks_read, + temp_blks_written, + blk_read_time, + blk_write_time, + temp_blk_read_time, + temp_blk_write_time, + + (string_to_array(resp_calls, ',')) resp_calls, + cpu_user_time, + cpu_sys_time, + wal_records, + wal_fpi, + wal_bytes, + bucket_done, + + plans_calls, + total_plan_time, + min_plan_time, + max_plan_time, + mean_plan_time, + stddev_plan_time, + + jit_functions, + jit_generation_time, + jit_inlining_count, + jit_inlining_time, + jit_optimization_count, + jit_optimization_time, + jit_emission_count, + jit_emission_time + +FROM pg_stat_monitor_internal(TRUE) p, pg_database d WHERE dbid = oid +ORDER BY bucket_start_time; +RETURN 0; +END; +$$ LANGUAGE plpgsql; + CREATE FUNCTION pgsm_create_view() RETURNS INT AS $$ DECLARE ver integer; BEGIN SELECT current_setting('server_version_num') INTO ver; + IF (ver >= 150000) THEN + return pgsm_create_15_view(); + END IF; IF (ver >= 140000) THEN return pgsm_create_14_view(); END IF; diff --git a/pg_stat_monitor.c b/pg_stat_monitor.c index 59c88c9..ddf1765 100644 --- a/pg_stat_monitor.c +++ b/pg_stat_monitor.c @@ -36,7 +36,12 @@ PG_MODULE_MAGIC; #define BUILD_VERSION "2.0.0-dev" -#define PG_STAT_STATEMENTS_COLS 53 /* maximum of above */ + +/* Number of output arguments (columns) for various API versions */ +#define PG_STAT_MONITOR_COLS_V1_0 52 +#define PG_STAT_MONITOR_COLS_V2_0 61 +#define PG_STAT_MONITOR_COLS 61 /* maximum of above */ + #define PGSM_TEXT_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "pg_stat_monitor_query" #define roundf(x,d) ((floor(((x)*pow(10,d))+.5))/pow(10,d)) @@ -186,6 +191,7 @@ static void pgss_store(uint64 queryid, uint64 rows, BufferUsage *bufusage, WalUsage *walusage, + const struct JitInstrumentation *jitusage, JumbleState *jstate, pgssStoreKind kind); @@ -241,7 +247,7 @@ _PG_init(void) * In order to create our shared memory area, we have to be loaded via * shared_preload_libraries. If not, fall out without hooking into any of * the main system. (We don't throw error here because it seems useful to - * allow the pg_stat_statements functions to be created even when the + * allow the pg_stat_monitor functions to be created even when the * module isn't active. The functions must protect themselves against * being called then, however.) */ @@ -432,6 +438,7 @@ pgss_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate) 0, /* rows */ NULL, /* bufusage */ NULL, /* walusage */ + NULL, /* jitusage */ jstate, /* JumbleState */ PGSS_PARSE); /* pgssStoreKind */ } @@ -490,6 +497,7 @@ pgss_post_parse_analyze(ParseState *pstate, Query *query) 0, /* rows */ NULL, /* bufusage */ NULL, /* walusage */ + NULL, /* jitusage */ &jstate, /* JumbleState */ PGSS_PARSE); /* pgssStoreKind */ } @@ -661,6 +669,11 @@ pgss_ExecutorEnd(QueryDesc *queryDesc) &queryDesc->totaltime->walusage, /* walusage */ #else NULL, +#endif +#if PG_VERSION_NUM >= 150000 + queryDesc->estate->es_jit ? &queryDesc->estate->es_jit->instr : NULL, +#else + NULL, #endif NULL, PGSS_FINISHED); /* pgssStoreKind */ @@ -804,6 +817,7 @@ pgss_planner_hook(Query *parse, const char *query_string, int cursorOptions, Par &bufusage, /* bufusage */ &walusage, /* walusage */ NULL, /* JumbleState */ + NULL, PGSS_PLAN); /* pgssStoreKind */ } else @@ -993,6 +1007,7 @@ pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString, #else NULL, /* walusage, NULL for PG <= 12 */ #endif + NULL, NULL, /* JumbleState */ PGSS_FINISHED); /* pgssStoreKind */ } @@ -1139,7 +1154,7 @@ pg_get_client_addr(bool *ok) static void pgss_update_entry(pgssEntry *entry, - int bucketid, + uint64 bucketid, uint64 queryid, const char *query, const char *comments, @@ -1151,6 +1166,7 @@ pgss_update_entry(pgssEntry *entry, uint64 rows, BufferUsage *bufusage, WalUsage *walusage, + const struct JitInstrumentation *jitusage, bool reset, pgssStoreKind kind, const char *app_name, @@ -1272,6 +1288,10 @@ pgss_update_entry(pgssEntry *entry, e->counters.blocks.temp_blks_written += bufusage->temp_blks_written; e->counters.blocks.blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_read_time); e->counters.blocks.blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_write_time); + #if PG_VERSION_NUM >= 150000 + e->counters.blocks.temp_blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->temp_blk_read_time); + e->counters.blocks.temp_blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->temp_blk_write_time); + #endif } e->counters.calls.usage += USAGE_EXEC(total_time); if (sys_info) @@ -1285,6 +1305,23 @@ pgss_update_entry(pgssEntry *entry, e->counters.walusage.wal_fpi += walusage->wal_fpi; e->counters.walusage.wal_bytes += walusage->wal_bytes; } + if (jitusage) + { + e->counters.jitinfo.jit_functions += jitusage->created_functions; + e->counters.jitinfo.jit_generation_time += INSTR_TIME_GET_MILLISEC(jitusage->generation_counter); + + if (INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter)) + e->counters.jitinfo.jit_inlining_count++; + e->counters.jitinfo.jit_inlining_time += INSTR_TIME_GET_MILLISEC(jitusage->inlining_counter); + + if (INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter)) + e->counters.jitinfo.jit_optimization_count++; + e->counters.jitinfo.jit_optimization_time += INSTR_TIME_GET_MILLISEC(jitusage->optimization_counter); + + if (INSTR_TIME_GET_MILLISEC(jitusage->emission_counter)) + e->counters.jitinfo.jit_emission_count++; + e->counters.jitinfo.jit_emission_time += INSTR_TIME_GET_MILLISEC(jitusage->emission_counter); + } SpinLockRelease(&e->mutex); } } @@ -1313,6 +1350,7 @@ pgss_store_error(uint64 queryid, NULL, /* bufusage */ NULL, /* walusage */ NULL, /* JumbleState */ + NULL, PGSS_ERROR); /* pgssStoreKind */ } @@ -1339,6 +1377,7 @@ pgss_store(uint64 queryid, uint64 rows, BufferUsage *bufusage, WalUsage *walusage, + const struct JitInstrumentation *jitusage, JumbleState *jstate, pgssStoreKind kind) { @@ -1553,6 +1592,7 @@ pgss_store(uint64 queryid, rows, /* rows */ bufusage, /* bufusage */ walusage, /* walusage */ + jitusage, reset, /* reset */ kind, /* kind */ app_name_ptr, @@ -1627,7 +1667,7 @@ IsBucketValid(uint64 bucketid) return true; } -/* Common code for all versions of pg_stat_statements() */ +/* Common code for all versions of pg_stat_monitor() */ static void pg_stat_monitor_internal(FunctionCallInfo fcinfo, pgsmVersion api_version, @@ -1645,6 +1685,7 @@ pg_stat_monitor_internal(FunctionCallInfo fcinfo, HTAB *pgss_hash = pgsm_get_hash(); char *query_txt = (char *) palloc0(PGSM_QUERY_MAX_LEN + 1); char *parent_query_txt = (char *) palloc0(PGSM_QUERY_MAX_LEN + 1); + int expected_columns = (api_version >= PGSM_V2_0)?PG_STAT_MONITOR_COLS_V2_0:PG_STAT_MONITOR_COLS_V1_0; /* Safety check... */ if (!IsSystemInitialized()) @@ -1671,7 +1712,7 @@ pg_stat_monitor_internal(FunctionCallInfo fcinfo, if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "pg_stat_monitor: return type must be a row type"); - if (tupdesc->natts != 52) + if (tupdesc->natts != expected_columns) elog(ERROR, "pg_stat_monitor: incorrect number of output arguments, required %d", tupdesc->natts); tupstore = tuplestore_begin_heap(true, false, work_mem); @@ -1686,18 +1727,18 @@ pg_stat_monitor_internal(FunctionCallInfo fcinfo, hash_seq_init(&hash_seq, pgss_hash); while ((entry = hash_seq_search(&hash_seq)) != NULL) { - Datum values[PG_STAT_STATEMENTS_COLS] = {0}; - bool nulls[PG_STAT_STATEMENTS_COLS] = {0}; + Datum values[PG_STAT_MONITOR_COLS] = {0}; + bool nulls[PG_STAT_MONITOR_COLS] = {0}; int i = 0; Counters tmp; double stddev; char queryid_text[32] = {0}; char planid_text[32] = {0}; uint64 queryid = entry->key.queryid; - uint64 bucketid = entry->key.bucket_id; + int64 bucketid = entry->key.bucket_id; uint64 dbid = entry->key.dbid; uint64 userid = entry->key.userid; - uint64 ip = entry->key.ip; + int64 ip = entry->key.ip; uint64 planid = entry->key.planid; #if PG_VERSION_NUM < 140000 bool toplevel = 1; @@ -1816,8 +1857,9 @@ pg_stat_monitor_internal(FunctionCallInfo fcinfo, values[i++] = CStringGetTextDatum(""); } - /* state at column number 8 */ - values[i++] = Int64GetDatumFast(tmp.state); + /* state at column number 8 for V1.0 API*/ + if (api_version <= PGSM_V1_0) + values[i++] = Int64GetDatumFast(tmp.state); /* parentid at column number 9 */ if (tmp.info.parentid != UINT64CONST(0)) @@ -1871,7 +1913,7 @@ pg_stat_monitor_internal(FunctionCallInfo fcinfo, if (tmp.info.cmd_type == CMD_NOTHING) nulls[i++] = true; else - values[i++] = Int64GetDatumFast(tmp.info.cmd_type); + values[i++] = Int64GetDatumFast((int64)tmp.info.cmd_type); /* elevel at column number 12 */ values[i++] = Int64GetDatumFast(tmp.error.elevel); @@ -1969,6 +2011,12 @@ pg_stat_monitor_internal(FunctionCallInfo fcinfo, values[i++] = Float8GetDatumFast(tmp.blocks.blk_read_time); values[i++] = Float8GetDatumFast(tmp.blocks.blk_write_time); + if (api_version >= PGSM_V2_0) + { + values[i++] = Float8GetDatumFast(tmp.blocks.temp_blk_read_time); + values[i++] = Float8GetDatumFast(tmp.blocks.temp_blk_write_time); + } + /* resp_calls at column number 41 */ values[i++] = IntArrayGetTextDatum(tmp.resp_calls, PGSM_HISTOGRAM_BUCKETS); @@ -2002,6 +2050,19 @@ pg_stat_monitor_internal(FunctionCallInfo fcinfo, values[i++] = CStringGetTextDatum(tmp.info.comments); else nulls[i++] = true; + + if (api_version >= PGSM_V2_0) + { + values[i++] = Int64GetDatumFast(tmp.jitinfo.jit_functions); + values[i++] = Float8GetDatumFast(tmp.jitinfo.jit_generation_time); + values[i++] = Int64GetDatumFast(tmp.jitinfo.jit_inlining_count); + values[i++] = Float8GetDatumFast(tmp.jitinfo.jit_inlining_time); + values[i++] = Int64GetDatumFast(tmp.jitinfo.jit_optimization_count); + values[i++] = Float8GetDatumFast(tmp.jitinfo.jit_optimization_time); + values[i++] = Int64GetDatumFast(tmp.jitinfo.jit_emission_count); + values[i++] = Float8GetDatumFast(tmp.jitinfo.jit_emission_time); + } + } values[i++] = BoolGetDatum(toplevel); values[i++] = BoolGetDatum(pg_atomic_read_u64(&pgss->current_wbucket) != bucketid); @@ -3343,8 +3404,8 @@ pg_stat_monitor_settings(PG_FUNCTION_ARGS) } else { - values[j++] = Int64GetDatumFast(get_conf(i)->guc_min); - values[j++] = Int64GetDatumFast(get_conf(i)->guc_max); + values[j++] = Int32GetDatum(get_conf(i)->guc_min); + values[j++] = Int32GetDatum(get_conf(i)->guc_max); } if (conf->type == PGC_ENUM) diff --git a/pg_stat_monitor.h b/pg_stat_monitor.h index e816ca3..dd66c9d 100644 --- a/pg_stat_monitor.h +++ b/pg_stat_monitor.h @@ -31,6 +31,7 @@ #include "catalog/pg_authid.h" #include "executor/instrument.h" #include "common/ip.h" +#include "jit/jit.h" #include "funcapi.h" #include "access/twophase.h" #include "mb/pg_wchar.h" @@ -255,8 +256,27 @@ typedef struct Blocks int64 temp_blks_written; /* # of temp blocks written */ double blk_read_time; /* time spent reading, in msec */ double blk_write_time; /* time spent writing, in msec */ + + double temp_blk_read_time; /* time spent reading temp blocks, in msec */ + double temp_blk_write_time; /* time spent writing temp blocks, in + * msec */ } Blocks; +typedef struct JitInfo +{ + int64 jit_functions; /* total number of JIT functions emitted */ + double jit_generation_time; /* total time to generate jit code */ + int64 jit_inlining_count; /* number of times inlining time has been + * > 0 */ + double jit_inlining_time; /* total time to inline jit code */ + int64 jit_optimization_count; /* number of times optimization time + * has been > 0 */ + double jit_optimization_time; /* total time to optimize jit code */ + int64 jit_emission_count; /* number of times emission time has been + * > 0 */ + double jit_emission_time; /* total time to emit jit code */ +} JitInfo; + typedef struct SysInfo { float utime; /* user cpu time */ @@ -283,11 +303,12 @@ typedef struct Counters Blocks blocks; SysInfo sysinfo; + JitInfo jitinfo; ErrorInfo error; Wal_Usage walusage; int resp_calls[MAX_RESPONSE_BUCKET]; /* execution time's in * msec */ - uint64 state; /* query state */ + int64 state; /* query state */ } Counters; /* Some global structure to get the cpu usage, really don't like the idea of global variable */ diff --git a/t/018_column_names.pl b/t/018_column_names.pl index e67320b..eedade4 100644 --- a/t/018_column_names.pl +++ b/t/018_column_names.pl @@ -26,14 +26,18 @@ close $conf; my %pg_versions_pgsm_columns = ( 15 => "application_name,blk_read_time," . "blk_write_time,bucket,bucket_done,bucket_start_time,calls," . "client_ip,cmd_type,cmd_type_text,comments,cpu_sys_time,cpu_user_time," . - "datname,elevel,local_blks_dirtied,local_blks_hit,local_blks_read," . + "datname,elevel,jit_emission_count,jit_emission_time,jit_functions," . + "jit_generation_time,jit_inlining_count,jit_inlining_time," . + "jit_optimization_count,jit_optimization_time," . + "local_blks_dirtied,local_blks_hit,local_blks_read," . "local_blks_written,max_exec_time,max_plan_time,mean_exec_time," . "mean_plan_time,message,min_exec_time,min_plan_time,planid," . "plans_calls,query,query_plan,queryid,relations,resp_calls," . "rows_retrieved,shared_blks_dirtied,shared_blks_hit,shared_blks_read," . "shared_blks_written,sqlcode,stddev_exec_time,stddev_plan_time," . - "temp_blks_read,temp_blks_written,top_query,top_queryid,toplevel," . - "total_exec_time,total_plan_time,userid,wal_bytes,wal_fpi,wal_records", + "temp_blk_read_time,temp_blk_write_time,temp_blks_read,temp_blks_written," . + "top_query,top_queryid,toplevel,total_exec_time,total_plan_time," . + "userid,wal_bytes,wal_fpi,wal_records", 14 => "application_name,blk_read_time," . "blk_write_time,bucket,bucket_done,bucket_start_time,calls," . "client_ip,cmd_type,cmd_type_text,comments,cpu_sys_time,cpu_user_time," . From 5ae0f3a0bb1a0ae6ac6ebd3f9cab3aabc5fab30c Mon Sep 17 00:00:00 2001 From: Ibrar Ahmed Date: Wed, 7 Dec 2022 14:52:45 +0000 Subject: [PATCH 08/15] PG-518: Internal Functions should NOT be visible in PGSM API. --- pg_stat_monitor--1.0--2.0.sql | 7 +++++++ pg_stat_monitor--1.0.sql | 6 ++++++ pg_stat_monitor--2.0.sql | 6 ++++++ 3 files changed, 19 insertions(+) diff --git a/pg_stat_monitor--1.0--2.0.sql b/pg_stat_monitor--1.0--2.0.sql index b8ac5f3..6e7e2b0 100644 --- a/pg_stat_monitor--1.0--2.0.sql +++ b/pg_stat_monitor--1.0--2.0.sql @@ -369,11 +369,18 @@ $$ $$ LANGUAGE plpgsql; SELECT pgsm_create_view(); + REVOKE ALL ON FUNCTION range FROM PUBLIC; REVOKE ALL ON FUNCTION get_cmd_type FROM PUBLIC; REVOKE ALL ON FUNCTION pg_stat_monitor_settings FROM PUBLIC; REVOKE ALL ON FUNCTION decode_error_level FROM PUBLIC; REVOKE ALL ON FUNCTION pg_stat_monitor_internal FROM PUBLIC; +REVOKE ALL ON FUNCTION get_histogram_timings FROM PUBLIC; +REVOKE ALL ON FUNCTION pgsm_create_view FROM PUBLIC; +REVOKE ALL ON FUNCTION pgsm_create_11_view FROM PUBLIC; +REVOKE ALL ON FUNCTION pgsm_create_13_view FROM PUBLIC; +REVOKE ALL ON FUNCTION pgsm_create_14_view FROM PUBLIC; +REVOKE ALL ON FUNCTION pgsm_create_15_view FROM PUBLIC; GRANT SELECT ON pg_stat_monitor TO PUBLIC; diff --git a/pg_stat_monitor--1.0.sql b/pg_stat_monitor--1.0.sql index 2d8a411..3242e16 100644 --- a/pg_stat_monitor--1.0.sql +++ b/pg_stat_monitor--1.0.sql @@ -397,6 +397,12 @@ REVOKE ALL ON FUNCTION get_cmd_type FROM PUBLIC; REVOKE ALL ON FUNCTION pg_stat_monitor_settings FROM PUBLIC; REVOKE ALL ON FUNCTION decode_error_level FROM PUBLIC; REVOKE ALL ON FUNCTION pg_stat_monitor_internal FROM PUBLIC; +REVOKE ALL ON FUNCTION get_histogram_timings FROM PUBLIC; +REVOKE ALL ON FUNCTION pgsm_create_view FROM PUBLIC; +REVOKE ALL ON FUNCTION pgsm_create_11_view FROM PUBLIC; +REVOKE ALL ON FUNCTION pgsm_create_13_view FROM PUBLIC; +REVOKE ALL ON FUNCTION pgsm_create_14_view FROM PUBLIC; +REVOKE ALL ON FUNCTION pgsm_create_15_view FROM PUBLIC; GRANT SELECT ON pg_stat_monitor TO PUBLIC; diff --git a/pg_stat_monitor--2.0.sql b/pg_stat_monitor--2.0.sql index 4104cc0..477585f 100644 --- a/pg_stat_monitor--2.0.sql +++ b/pg_stat_monitor--2.0.sql @@ -471,6 +471,12 @@ REVOKE ALL ON FUNCTION get_cmd_type FROM PUBLIC; REVOKE ALL ON FUNCTION pg_stat_monitor_settings FROM PUBLIC; REVOKE ALL ON FUNCTION decode_error_level FROM PUBLIC; REVOKE ALL ON FUNCTION pg_stat_monitor_internal FROM PUBLIC; +REVOKE ALL ON FUNCTION get_histogram_timings FROM PUBLIC; +REVOKE ALL ON FUNCTION pgsm_create_view FROM PUBLIC; +REVOKE ALL ON FUNCTION pgsm_create_11_view FROM PUBLIC; +REVOKE ALL ON FUNCTION pgsm_create_13_view FROM PUBLIC; +REVOKE ALL ON FUNCTION pgsm_create_14_view FROM PUBLIC; +REVOKE ALL ON FUNCTION pgsm_create_15_view FROM PUBLIC; GRANT SELECT ON pg_stat_monitor TO PUBLIC; From 3076d5bf5c845a60ddb513cd2681854528c3e0d6 Mon Sep 17 00:00:00 2001 From: Ibrar Ahmed Date: Wed, 7 Dec 2022 15:03:14 +0000 Subject: [PATCH 09/15] PG-373: Remove wal fields for PG12 and below. --- pg_stat_monitor--1.0--2.0.sql | 3 --- pg_stat_monitor--2.0.sql | 3 --- 2 files changed, 6 deletions(-) diff --git a/pg_stat_monitor--1.0--2.0.sql b/pg_stat_monitor--1.0--2.0.sql index b8ac5f3..992d727 100644 --- a/pg_stat_monitor--1.0--2.0.sql +++ b/pg_stat_monitor--1.0--2.0.sql @@ -136,9 +136,6 @@ CREATE VIEW pg_stat_monitor AS SELECT (string_to_array(resp_calls, ',')) resp_calls, cpu_user_time, cpu_sys_time, - wal_records, - wal_fpi, - wal_bytes, bucket_done FROM pg_stat_monitor_internal(TRUE) p, pg_database d WHERE dbid = oid ORDER BY bucket_start_time; diff --git a/pg_stat_monitor--2.0.sql b/pg_stat_monitor--2.0.sql index 4104cc0..25df1eb 100644 --- a/pg_stat_monitor--2.0.sql +++ b/pg_stat_monitor--2.0.sql @@ -231,9 +231,6 @@ CREATE VIEW pg_stat_monitor AS SELECT (string_to_array(resp_calls, ',')) resp_calls, cpu_user_time, cpu_sys_time, - wal_records, - wal_fpi, - wal_bytes, bucket_done FROM pg_stat_monitor_internal(TRUE) p, pg_database d WHERE dbid = oid ORDER BY bucket_start_time; From 1037fb08a8586a6093ddfa6c793b4cd70f67599b Mon Sep 17 00:00:00 2001 From: Naeem Akhter Date: Mon, 12 Dec 2022 15:41:09 +0500 Subject: [PATCH 10/15] PG-558: Create test case to verify the function names and count in PGSM. --- Makefile | 2 +- regression/expected/functions.out | 41 +++++++++++++++++++++++++++++++ regression/sql/functions.sql | 15 +++++++++++ 3 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 regression/expected/functions.out create mode 100644 regression/sql/functions.sql diff --git a/Makefile b/Makefile index 2a1e11a..471988c 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ LDFLAGS_SL += $(filter -lm, $(LIBS)) TAP_TESTS = 1 REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/pg_stat_monitor/pg_stat_monitor.conf --inputdir=regression -REGRESS = basic version guc counters relations database error_insert application_name application_name_unique top_query cmd_type error rows tags +REGRESS = basic version guc functions counters relations database error_insert application_name application_name_unique top_query cmd_type error rows tags # Disabled because these tests require "shared_preload_libraries=pg_stat_statements", # which typical installcheck users do not have (e.g. buildfarm clients). diff --git a/regression/expected/functions.out b/regression/expected/functions.out new file mode 100644 index 0000000..2e94b48 --- /dev/null +++ b/regression/expected/functions.out @@ -0,0 +1,41 @@ +CREATE USER su WITH SUPERUSER; +SET ROLE su; +CREATE EXTENSION pg_stat_monitor; +CREATE USER u1; +SELECT pg_stat_monitor_reset(); + pg_stat_monitor_reset +----------------------- + +(1 row) + +SELECT routine_schema, routine_name, routine_type, data_type FROM information_schema.routines WHERE routine_schema = 'public' ORDER BY routine_name COLLATE "C"; + routine_schema | routine_name | routine_type | data_type +----------------+--------------------------+--------------+----------- + public | decode_error_level | FUNCTION | text + public | get_cmd_type | FUNCTION | text + public | get_histogram_timings | FUNCTION | text + public | histogram | FUNCTION | record + public | pg_stat_monitor_internal | FUNCTION | record + public | pg_stat_monitor_reset | FUNCTION | void + public | pg_stat_monitor_settings | FUNCTION | record + public | pg_stat_monitor_version | FUNCTION | text + public | pgsm_create_11_view | FUNCTION | integer + public | pgsm_create_13_view | FUNCTION | integer + public | pgsm_create_14_view | FUNCTION | integer + public | pgsm_create_15_view | FUNCTION | integer + public | pgsm_create_view | FUNCTION | integer + public | range | FUNCTION | ARRAY +(14 rows) + +SET ROLE u1; +SELECT routine_schema, routine_name, routine_type, data_type FROM information_schema.routines WHERE routine_schema = 'public' ORDER BY routine_name COLLATE "C"; + routine_schema | routine_name | routine_type | data_type +----------------+-------------------------+--------------+----------- + public | histogram | FUNCTION | record + public | pg_stat_monitor_reset | FUNCTION | void + public | pg_stat_monitor_version | FUNCTION | text +(3 rows) + +set role su; +DROP USER u1; +DROP EXTENSION pg_stat_monitor; diff --git a/regression/sql/functions.sql b/regression/sql/functions.sql new file mode 100644 index 0000000..27db119 --- /dev/null +++ b/regression/sql/functions.sql @@ -0,0 +1,15 @@ +CREATE USER su WITH SUPERUSER; +SET ROLE su; + +CREATE EXTENSION pg_stat_monitor; +CREATE USER u1; + +SELECT pg_stat_monitor_reset(); +SELECT routine_schema, routine_name, routine_type, data_type FROM information_schema.routines WHERE routine_schema = 'public' ORDER BY routine_name COLLATE "C"; + +SET ROLE u1; +SELECT routine_schema, routine_name, routine_type, data_type FROM information_schema.routines WHERE routine_schema = 'public' ORDER BY routine_name COLLATE "C"; + +set role su; +DROP USER u1; +DROP EXTENSION pg_stat_monitor; From 2917ae6805a90f8869564daff9f1b8ceb903bd7b Mon Sep 17 00:00:00 2001 From: Hamid Akhtar Date: Tue, 13 Dec 2022 17:05:46 +0500 Subject: [PATCH 11/15] PG-354: pg_stat_monitor: Remove pg_stat_monitor_settings view Removing the view for 2.0. Updating the required SQL files to manage the upgrade. Downgrade from 2.x to 1.x is not supported. Also part of this fix is the SQL regression. This does not update the tap test cases. --- pg_stat_monitor--1.0--2.0.sql | 2 +- pg_stat_monitor--2.0.sql | 26 --- pg_stat_monitor.c | 131 -------------- regression/expected/guc.out | 313 ++++++++++++++++++++++++++++++---- regression/sql/guc.sql | 33 +++- 5 files changed, 307 insertions(+), 198 deletions(-) diff --git a/pg_stat_monitor--1.0--2.0.sql b/pg_stat_monitor--1.0--2.0.sql index 1b7388a..6199ab3 100644 --- a/pg_stat_monitor--1.0--2.0.sql +++ b/pg_stat_monitor--1.0--2.0.sql @@ -8,6 +8,7 @@ DROP FUNCTION pgsm_create_11_view CASCADE; DROP FUNCTION pgsm_create_13_view CASCADE; DROP FUNCTION pgsm_create_14_view CASCADE; DROP FUNCTION pgsm_create_view CASCADE; +DROP FUNCTION pg_stat_monitor_settings CASCADE; -- pg_stat_monitor internal function, must not call outside from this file. CREATE FUNCTION pg_stat_monitor_internal( @@ -369,7 +370,6 @@ SELECT pgsm_create_view(); REVOKE ALL ON FUNCTION range FROM PUBLIC; REVOKE ALL ON FUNCTION get_cmd_type FROM PUBLIC; -REVOKE ALL ON FUNCTION pg_stat_monitor_settings FROM PUBLIC; REVOKE ALL ON FUNCTION decode_error_level FROM PUBLIC; REVOKE ALL ON FUNCTION pg_stat_monitor_internal FROM PUBLIC; REVOKE ALL ON FUNCTION get_histogram_timings FROM PUBLIC; diff --git a/pg_stat_monitor--2.0.sql b/pg_stat_monitor--2.0.sql index e23a4e3..ff16d71 100644 --- a/pg_stat_monitor--2.0.sql +++ b/pg_stat_monitor--2.0.sql @@ -41,31 +41,6 @@ SELECT $$ LANGUAGE SQL PARALLEL SAFE; -CREATE FUNCTION pg_stat_monitor_settings( - OUT name text, - OUT value text, - OUT default_value text, - OUT description text, - OUT minimum INTEGER, - OUT maximum INTEGER, - OUT options text, - OUT restart text -) -RETURNS SETOF record -AS 'MODULE_PATHNAME', 'pg_stat_monitor_settings' -LANGUAGE C STRICT VOLATILE PARALLEL SAFE; - -CREATE VIEW pg_stat_monitor_settings AS SELECT - name, - value, - default_value, - description, - minimum, - maximum, - options, - restart -FROM pg_stat_monitor_settings(); - CREATE FUNCTION decode_error_level(elevel int) RETURNS text AS @@ -465,7 +440,6 @@ $$ LANGUAGE plpgsql; SELECT pgsm_create_view(); REVOKE ALL ON FUNCTION range FROM PUBLIC; REVOKE ALL ON FUNCTION get_cmd_type FROM PUBLIC; -REVOKE ALL ON FUNCTION pg_stat_monitor_settings FROM PUBLIC; REVOKE ALL ON FUNCTION decode_error_level FROM PUBLIC; REVOKE ALL ON FUNCTION pg_stat_monitor_internal FROM PUBLIC; REVOKE ALL ON FUNCTION get_histogram_timings FROM PUBLIC; diff --git a/pg_stat_monitor.c b/pg_stat_monitor.c index ddf1765..88ccada 100644 --- a/pg_stat_monitor.c +++ b/pg_stat_monitor.c @@ -125,7 +125,6 @@ PG_FUNCTION_INFO_V1(pg_stat_monitor_reset); PG_FUNCTION_INFO_V1(pg_stat_monitor_1_0); PG_FUNCTION_INFO_V1(pg_stat_monitor_2_0); PG_FUNCTION_INFO_V1(pg_stat_monitor); -PG_FUNCTION_INFO_V1(pg_stat_monitor_settings); PG_FUNCTION_INFO_V1(get_histogram_timings); PG_FUNCTION_INFO_V1(pg_stat_monitor_hook_stats); @@ -3304,136 +3303,6 @@ SaveQueryText(uint64 bucketid, return true; } -Datum -pg_stat_monitor_settings(PG_FUNCTION_ARGS) -{ - ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; - TupleDesc tupdesc; - Tuplestorestate *tupstore; - MemoryContext per_query_ctx; - MemoryContext oldcontext; - int i; - - /* Safety check... */ - if (!IsSystemInitialized()) - ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("pg_stat_monitor: must be loaded via shared_preload_libraries"))); - - /* check to see if caller supports us returning a tuplestore */ - if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("pg_stat_monitor: set-valued function called in context that cannot accept a set"))); - - /* Switch into long-lived context to construct returned data structures */ - per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; - oldcontext = MemoryContextSwitchTo(per_query_ctx); - - /* Build a tuple descriptor for our result type */ - if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) - { - elog(ERROR, "pg_stat_monitor_settings: return type must be a row type"); - return (Datum) 0; - } - - if (tupdesc->natts != 8) - { - elog(ERROR, "pg_stat_monitor_settings: incorrect number of output arguments, required: 7, found %d", tupdesc->natts); - return (Datum) 0; - } - - tupstore = tuplestore_begin_heap(true, false, work_mem); - rsinfo->returnMode = SFRM_Materialize; - rsinfo->setResult = tupstore; - rsinfo->setDesc = tupdesc; - - MemoryContextSwitchTo(oldcontext); - - for (i = 0; i < MAX_SETTINGS; i++) - { - Datum values[8]; - bool nulls[8]; - int j = 0; - char options[1024] = ""; - GucVariable *conf; - - memset(values, 0, sizeof(values)); - memset(nulls, 0, sizeof(nulls)); - - conf = get_conf(i); - - values[j++] = CStringGetTextDatum(conf->guc_name); - - /* Handle current and default values. */ - switch (conf->type) - { - case PGC_ENUM: - values[j++] = CStringGetTextDatum(conf->guc_options[conf->guc_variable]); - values[j++] = CStringGetTextDatum(conf->guc_options[conf->guc_default]); - break; - - case PGC_INT: - { - char value[32]; - - sprintf(value, "%d", conf->guc_variable); - values[j++] = CStringGetTextDatum(value); - - sprintf(value, "%d", conf->guc_default); - values[j++] = CStringGetTextDatum(value); - break; - } - - case PGC_BOOL: - values[j++] = CStringGetTextDatum(conf->guc_variable ? "yes" : "no"); - values[j++] = CStringGetTextDatum(conf->guc_default ? "yes" : "no"); - break; - - default: - Assert(false); - } - - values[j++] = CStringGetTextDatum(get_conf(i)->guc_desc); - - /* Minimum and maximum displayed only for integers or real numbers. */ - if (conf->type != PGC_INT) - { - nulls[j++] = true; - nulls[j++] = true; - } - else - { - values[j++] = Int32GetDatum(get_conf(i)->guc_min); - values[j++] = Int32GetDatum(get_conf(i)->guc_max); - } - - if (conf->type == PGC_ENUM) - { - size_t i; - - strcat(options, conf->guc_options[0]); - for (i = 1; i < conf->n_options; ++i) - { - strcat(options, ", "); - strcat(options, conf->guc_options[i]); - } - } - else if (conf->type == PGC_BOOL) - { - strcat(options, "yes, no"); - } - - values[j++] = CStringGetTextDatum(options); - values[j++] = CStringGetTextDatum(get_conf(i)->guc_restart ? "yes" : "no"); - tuplestore_putvalues(tupstore, tupdesc, values, nulls); - } - /* clean up and return the tuplestore */ - tuplestore_donestoring(tupstore); - return (Datum) 0; -} - - Datum pg_stat_monitor_hook_stats(PG_FUNCTION_ARGS) { diff --git a/regression/expected/guc.out b/regression/expected/guc.out index 56bdaab..4e1fa54 100644 --- a/regression/expected/guc.out +++ b/regression/expected/guc.out @@ -1,39 +1,280 @@ CREATE EXTENSION pg_stat_monitor; -SELECT pg_stat_monitor_reset(); - pg_stat_monitor_reset ------------------------ - -(1 row) - -select pg_sleep(.5); - pg_sleep ----------- - -(1 row) - -SELECT * FROM pg_stat_monitor_settings WHERE name NOT LIKE 'pg_stat_monitor.pgsm_track_planning' ORDER BY name COLLATE "C"; - name | value | default_value | description | minimum | maximum | options | restart -------------------------------------------+--------+---------------+----------------------------------------------------------------------------------------------------------+---------+------------+----------------+--------- - pg_stat_monitor.pgsm_bucket_time | 60 | 60 | Sets the time in seconds per bucket. | 1 | 2147483647 | | yes - pg_stat_monitor.pgsm_enable_query_plan | no | no | Enable/Disable query plan monitoring | | | yes, no | no - pg_stat_monitor.pgsm_extract_comments | no | no | Enable/Disable extracting comments from queries. | | | yes, no | no - pg_stat_monitor.pgsm_histogram_buckets | 10 | 10 | Sets the maximum number of histogram buckets | 2 | 50 | | yes - pg_stat_monitor.pgsm_histogram_max | 100000 | 100000 | Sets the time in millisecond. | 10 | 2147483647 | | yes - pg_stat_monitor.pgsm_histogram_min | 0 | 0 | Sets the time in millisecond. | 0 | 2147483647 | | yes - pg_stat_monitor.pgsm_max | 100 | 100 | Sets the maximum size of shared memory in (MB) used for statement's metadata tracked by pg_stat_monitor. | 1 | 1000 | | yes - pg_stat_monitor.pgsm_max_buckets | 10 | 10 | Sets the maximum number of buckets. | 1 | 10 | | yes - pg_stat_monitor.pgsm_normalized_query | no | no | Selects whether save query in normalized format. | | | yes, no | no - pg_stat_monitor.pgsm_overflow_target | 1 | 1 | Sets the overflow target for pg_stat_monitor | 0 | 1 | | yes - pg_stat_monitor.pgsm_query_max_len | 2048 | 2048 | Sets the maximum length of query. | 1024 | 2147483647 | | yes - pg_stat_monitor.pgsm_query_shared_buffer | 20 | 20 | Sets the maximum size of shared memory in (MB) used for query tracked by pg_stat_monitor. | 1 | 10000 | | yes - pg_stat_monitor.pgsm_track | top | top | Selects which statements are tracked by pg_stat_monitor. | | | none, top, all | no - pg_stat_monitor.pgsm_track_utility | yes | yes | Selects whether utility commands are tracked. | | | yes, no | no -(14 rows) - -SELECT pg_stat_monitor_reset(); - pg_stat_monitor_reset ------------------------ - -(1 row) +\x +SELECT name + , setting + , unit + , category + , short_desc + , extra_desc + , context + , vartype + , source + , min_val + , max_val + , enumvals + , boot_val + , reset_val + , sourcefile + , sourceline + , pending_restart +FROM pg_settings +WHERE name LIKE 'pg_stat_monitor.%' + AND name NOT LIKE 'pg_stat_monitor.pgsm_track_planning' +ORDER +BY name +COLLATE "C"; +-[ RECORD 1 ]---+--------------------------------------------------------------------------------------------------------- +name | pg_stat_monitor.pgsm_bucket_time +setting | 60 +unit | +category | Customized Options +short_desc | Sets the time in seconds per bucket. +extra_desc | +context | postmaster +vartype | integer +source | default +min_val | 1 +max_val | 2147483647 +enumvals | +boot_val | 60 +reset_val | 60 +sourcefile | +sourceline | +pending_restart | f +-[ RECORD 2 ]---+--------------------------------------------------------------------------------------------------------- +name | pg_stat_monitor.pgsm_enable_query_plan +setting | off +unit | +category | Customized Options +short_desc | Enable/Disable query plan monitoring +extra_desc | +context | user +vartype | bool +source | default +min_val | +max_val | +enumvals | +boot_val | off +reset_val | off +sourcefile | +sourceline | +pending_restart | f +-[ RECORD 3 ]---+--------------------------------------------------------------------------------------------------------- +name | pg_stat_monitor.pgsm_extract_comments +setting | off +unit | +category | Customized Options +short_desc | Enable/Disable extracting comments from queries. +extra_desc | +context | user +vartype | bool +source | default +min_val | +max_val | +enumvals | +boot_val | off +reset_val | off +sourcefile | +sourceline | +pending_restart | f +-[ RECORD 4 ]---+--------------------------------------------------------------------------------------------------------- +name | pg_stat_monitor.pgsm_histogram_buckets +setting | 10 +unit | +category | Customized Options +short_desc | Sets the maximum number of histogram buckets +extra_desc | +context | postmaster +vartype | integer +source | default +min_val | 2 +max_val | 50 +enumvals | +boot_val | 10 +reset_val | 10 +sourcefile | +sourceline | +pending_restart | f +-[ RECORD 5 ]---+--------------------------------------------------------------------------------------------------------- +name | pg_stat_monitor.pgsm_histogram_max +setting | 100000 +unit | +category | Customized Options +short_desc | Sets the time in millisecond. +extra_desc | +context | postmaster +vartype | integer +source | default +min_val | 10 +max_val | 2147483647 +enumvals | +boot_val | 100000 +reset_val | 100000 +sourcefile | +sourceline | +pending_restart | f +-[ RECORD 6 ]---+--------------------------------------------------------------------------------------------------------- +name | pg_stat_monitor.pgsm_histogram_min +setting | 0 +unit | +category | Customized Options +short_desc | Sets the time in millisecond. +extra_desc | +context | postmaster +vartype | integer +source | default +min_val | 0 +max_val | 2147483647 +enumvals | +boot_val | 0 +reset_val | 0 +sourcefile | +sourceline | +pending_restart | f +-[ RECORD 7 ]---+--------------------------------------------------------------------------------------------------------- +name | pg_stat_monitor.pgsm_max +setting | 100 +unit | MB +category | Customized Options +short_desc | Sets the maximum size of shared memory in (MB) used for statement's metadata tracked by pg_stat_monitor. +extra_desc | +context | postmaster +vartype | integer +source | default +min_val | 1 +max_val | 1000 +enumvals | +boot_val | 100 +reset_val | 100 +sourcefile | +sourceline | +pending_restart | f +-[ RECORD 8 ]---+--------------------------------------------------------------------------------------------------------- +name | pg_stat_monitor.pgsm_max_buckets +setting | 10 +unit | +category | Customized Options +short_desc | Sets the maximum number of buckets. +extra_desc | +context | postmaster +vartype | integer +source | default +min_val | 1 +max_val | 10 +enumvals | +boot_val | 10 +reset_val | 10 +sourcefile | +sourceline | +pending_restart | f +-[ RECORD 9 ]---+--------------------------------------------------------------------------------------------------------- +name | pg_stat_monitor.pgsm_normalized_query +setting | off +unit | +category | Customized Options +short_desc | Selects whether save query in normalized format. +extra_desc | +context | user +vartype | bool +source | default +min_val | +max_val | +enumvals | +boot_val | off +reset_val | off +sourcefile | +sourceline | +pending_restart | f +-[ RECORD 10 ]--+--------------------------------------------------------------------------------------------------------- +name | pg_stat_monitor.pgsm_overflow_target +setting | 1 +unit | +category | Customized Options +short_desc | Sets the overflow target for pg_stat_monitor +extra_desc | +context | postmaster +vartype | integer +source | default +min_val | 0 +max_val | 1 +enumvals | +boot_val | 1 +reset_val | 1 +sourcefile | +sourceline | +pending_restart | f +-[ RECORD 11 ]--+--------------------------------------------------------------------------------------------------------- +name | pg_stat_monitor.pgsm_query_max_len +setting | 2048 +unit | +category | Customized Options +short_desc | Sets the maximum length of query. +extra_desc | +context | postmaster +vartype | integer +source | default +min_val | 1024 +max_val | 2147483647 +enumvals | +boot_val | 2048 +reset_val | 2048 +sourcefile | +sourceline | +pending_restart | f +-[ RECORD 12 ]--+--------------------------------------------------------------------------------------------------------- +name | pg_stat_monitor.pgsm_query_shared_buffer +setting | 20 +unit | MB +category | Customized Options +short_desc | Sets the maximum size of shared memory in (MB) used for query tracked by pg_stat_monitor. +extra_desc | +context | postmaster +vartype | integer +source | default +min_val | 1 +max_val | 10000 +enumvals | +boot_val | 20 +reset_val | 20 +sourcefile | +sourceline | +pending_restart | f +-[ RECORD 13 ]--+--------------------------------------------------------------------------------------------------------- +name | pg_stat_monitor.pgsm_track +setting | top +unit | +category | Customized Options +short_desc | Selects which statements are tracked by pg_stat_monitor. +extra_desc | +context | user +vartype | enum +source | default +min_val | +max_val | +enumvals | {none,top,all} +boot_val | top +reset_val | top +sourcefile | +sourceline | +pending_restart | f +-[ RECORD 14 ]--+--------------------------------------------------------------------------------------------------------- +name | pg_stat_monitor.pgsm_track_utility +setting | on +unit | +category | Customized Options +short_desc | Selects whether utility commands are tracked. +extra_desc | +context | user +vartype | bool +source | default +min_val | +max_val | +enumvals | +boot_val | on +reset_val | on +sourcefile | +sourceline | +pending_restart | f +\x DROP EXTENSION pg_stat_monitor; diff --git a/regression/sql/guc.sql b/regression/sql/guc.sql index ee22e5e..9743b34 100644 --- a/regression/sql/guc.sql +++ b/regression/sql/guc.sql @@ -1,6 +1,31 @@ CREATE EXTENSION pg_stat_monitor; -SELECT pg_stat_monitor_reset(); -select pg_sleep(.5); -SELECT * FROM pg_stat_monitor_settings WHERE name NOT LIKE 'pg_stat_monitor.pgsm_track_planning' ORDER BY name COLLATE "C"; -SELECT pg_stat_monitor_reset(); + +\x + +SELECT name + , setting + , unit + , category + , short_desc + , extra_desc + , context + , vartype + , source + , min_val + , max_val + , enumvals + , boot_val + , reset_val + , sourcefile + , sourceline + , pending_restart +FROM pg_settings +WHERE name LIKE 'pg_stat_monitor.%' + AND name NOT LIKE 'pg_stat_monitor.pgsm_track_planning' +ORDER +BY name +COLLATE "C"; + +\x + DROP EXTENSION pg_stat_monitor; From 5a6b824737a3cf65ee797d3d271a60692e9c36d3 Mon Sep 17 00:00:00 2001 From: Naeem Akhter <40981522+Naeem-Akhter@users.noreply.github.com> Date: Wed, 14 Dec 2022 12:50:29 +0500 Subject: [PATCH 12/15] PG-373: Update test case - Remove WAL fields for PG12 and below. (#335) --- t/018_column_names.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/018_column_names.pl b/t/018_column_names.pl index eedade4..8cb03a3 100644 --- a/t/018_column_names.pl +++ b/t/018_column_names.pl @@ -67,7 +67,7 @@ my %pg_versions_pgsm_columns = ( 15 => "application_name,blk_read_time," . "message,min_time,planid,query,query_plan,queryid,relations,resp_calls," . "rows_retrieved,shared_blks_dirtied,shared_blks_hit,shared_blks_read," . "shared_blks_written,sqlcode,stddev_time,temp_blks_read,temp_blks_written," . - "top_query,top_queryid,total_time,userid,wal_bytes,wal_fpi,wal_records" + "top_query,top_queryid,total_time,userid" ); # Start server From df0580b7413836285e001e8ef9c45855a503118e Mon Sep 17 00:00:00 2001 From: Muhammad Usama Date: Tue, 20 Dec 2022 17:29:15 +0500 Subject: [PATCH 13/15] PG-488: pg_stat_monitor: Overflow management. Reimplement the storage mechanism of buckets and query texts using Dynamic shared memory. Since the dynamic shared memory can grow into a swap area, so we get the overflow out of the box. oreover the new design saves the query pointer inside the bucket and eventually, the query text gets evicted with the bucket recycle. Finally, the dynamic shared memory hash has a built-in locking mechanism so we can revisit the whole locking in pg_stat_monitor has potential for lots of performance improvements --- hash_query.c | 225 ++++++++++++++------- pg_stat_monitor.c | 490 ++++++++-------------------------------------- pg_stat_monitor.h | 44 ++--- 3 files changed, 246 insertions(+), 513 deletions(-) diff --git a/hash_query.c b/hash_query.c index 3139716..48e9532 100644 --- a/hash_query.c +++ b/hash_query.c @@ -16,59 +16,93 @@ */ #include "postgres.h" #include "nodes/pg_list.h" - #include "pg_stat_monitor.h" +static pgsmLocalState pgsmStateLocal; -static pgssSharedState *pgss; -static HTAB *pgss_hash; -static HTAB *pgss_query_hash; +/* parameter for the shared hash */ + static dshash_parameters dsh_params = { + sizeof(pgssHashKey), + sizeof(pgssEntry), + dshash_memcmp, + dshash_memhash + }; +static void pgsm_proc_exit(int code, Datum arg); - -static HTAB * -hash_init(const char *hash_name, int key_size, int entry_size, int hash_size) +static Size +pgsm_query_area_size(void) { - HASHCTL info; - - memset(&info, 0, sizeof(info)); - info.keysize = key_size; - info.entrysize = entry_size; - return ShmemInitHash(hash_name, hash_size, hash_size, &info, HASH_ELEM | HASH_BLOBS); + Size sz = MAXALIGN(MAX_QUERY_BUF); + return MAXALIGN(sz); } +Size +pgsm_ShmemSize(void) + { + Size sz = MAXALIGN(sizeof(pgssSharedState)); + sz = add_size(sz, pgsm_query_area_size()); + sz = add_size(sz, hash_estimate_size(MAX_BUCKET_ENTRIES, sizeof(pgssEntry))); + return sz; + } + void pgss_startup(void) { bool found = false; - + pgssSharedState *pgss; /* reset in case this is a restart within the postmaster */ - - pgss = NULL; - pgss_hash = NULL; + pgsmStateLocal.dsa = NULL; + pgsmStateLocal.shared_hash = NULL; + pgsmStateLocal.shared_pgssState = NULL; /* * Create or attach to the shared memory state, including hash table */ LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); - pgss = ShmemInitStruct("pg_stat_monitor", sizeof(pgssSharedState), &found); + pgss = ShmemInitStruct("pg_stat_monitor", pgsm_ShmemSize(), &found); if (!found) { /* First time through ... */ + dsa_area *dsa; + dshash_table *dsh; + char *p = (char *) pgss; + pgss->lock = &(GetNamedLWLockTranche("pg_stat_monitor"))->lock; SpinLockInit(&pgss->mutex); ResetSharedState(pgss); + /* the allocation of pgssSharedState itself */ + p += MAXALIGN(sizeof(pgssSharedState)); + pgss->raw_dsa_area = p; + dsa = dsa_create_in_place(pgss->raw_dsa_area, + pgsm_query_area_size(), + LWLockNewTrancheId(), 0); + dsa_pin(dsa); + dsa_set_size_limit(dsa, pgsm_query_area_size()); + + pgss->hash_tranche_id = LWLockNewTrancheId(); + + dsh_params.tranche_id = pgss->hash_tranche_id; + dsh = dshash_create(dsa, &dsh_params, 0); + + pgss->hash_handle = dshash_get_hash_table_handle(dsh); + + if (PGSM_OVERFLOW_TARGET == OVERFLOW_TARGET_DISK) + dsa_set_size_limit(dsa, -1); + + pgsmStateLocal.shared_pgssState = pgss; + /* + * Postmaster will never access these again, thus free the local + * dsa/dshash references. + */ + dshash_detach(dsh); + dsa_detach(dsa); } #ifdef BENCHMARK init_hook_stats(); #endif - set_qbuf((unsigned char *) ShmemAlloc(MAX_QUERY_BUF)); - - pgss_hash = hash_init("pg_stat_monitor: bucket hashtable", sizeof(pgssHashKey), sizeof(pgssEntry), MAX_BUCKET_ENTRIES); - pgss_query_hash = hash_init("pg_stat_monitor: queryID hashtable", sizeof(uint64), sizeof(pgssQueryEntry), MAX_BUCKET_ENTRIES); - LWLockRelease(AddinShmemInitLock); /* @@ -78,23 +112,49 @@ pgss_startup(void) on_shmem_exit(pgss_shmem_shutdown, (Datum) 0); } +void +pgsm_attach_shmem(void) +{ + MemoryContext oldcontext; + if (pgsmStateLocal.dsa) + return; + + oldcontext = MemoryContextSwitchTo(TopMemoryContext); + + pgsmStateLocal.dsa = dsa_attach_in_place(pgsmStateLocal.shared_pgssState->raw_dsa_area, + NULL); + dsa_pin_mapping(pgsmStateLocal.dsa); + + dsh_params.tranche_id = pgsmStateLocal.shared_pgssState->hash_tranche_id; + pgsmStateLocal.shared_hash = dshash_attach(pgsmStateLocal.dsa, &dsh_params, + pgsmStateLocal.shared_pgssState->hash_handle, 0); + + on_proc_exit(pgsm_proc_exit, 0); + + MemoryContextSwitchTo(oldcontext); +} + +dsa_area* +get_dsa_area_for_query_text(void) +{ + pgsm_attach_shmem(); + return pgsmStateLocal.dsa; +} + +dshash_table* +get_pgssHash(void) +{ + pgsm_attach_shmem(); + return pgsmStateLocal.shared_hash; +} + pgssSharedState * pgsm_get_ss(void) { - return pgss; + pgsm_attach_shmem(); + return pgsmStateLocal.shared_pgssState; } -HTAB * -pgsm_get_hash(void) -{ - return pgss_hash; -} - -HTAB * -pgsm_get_query_hash(void) -{ - return pgss_query_hash; -} /* * shmem_shutdown hook: Dump statistics into file. @@ -106,26 +166,24 @@ void pgss_shmem_shutdown(int code, Datum arg) { /* Don't try to dump during a crash. */ + elog(LOG,"pgss_shmem_shutdown"); if (code) return; - pgss = NULL; + pgsmStateLocal.shared_pgssState = NULL; /* Safety check ... shouldn't get here unless shmem is set up. */ if (!IsHashInitialize()) return; } -Size -hash_memsize(void) +static void +pgsm_proc_exit(int code, Datum arg) { - Size size; - - size = MAXALIGN(sizeof(pgssSharedState)); - size += MAXALIGN(MAX_QUERY_BUF); - size = add_size(size, hash_estimate_size(MAX_BUCKET_ENTRIES, sizeof(pgssEntry))); - size = add_size(size, hash_estimate_size(MAX_BUCKET_ENTRIES, sizeof(pgssQueryEntry))); - - return size; + Assert(pgsmStateLocal.dsa); + dshash_detach(pgsmStateLocal.shared_hash); + pgsmStateLocal.shared_hash = NULL; + dsa_detach(pgsmStateLocal.dsa); + pgsmStateLocal.dsa = NULL; } pgssEntry * @@ -134,13 +192,9 @@ hash_entry_alloc(pgssSharedState *pgss, pgssHashKey *key, int encoding) pgssEntry *entry = NULL; bool found = false; - if (hash_get_num_entries(pgss_hash) >= MAX_BUCKET_ENTRIES) - { - elog(DEBUG1, "pg_stat_monitor: out of memory"); - return NULL; - } /* Find or create an entry with desired hash code */ - entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER_NULL, &found); + entry = (pgssEntry *) dshash_find_or_insert(pgsmStateLocal.shared_hash, key, &found); + // entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER_NULL, &found); if (entry == NULL) elog(DEBUG1, "hash_entry_alloc: OUT OF MEMORY"); else if (!found) @@ -155,6 +209,7 @@ hash_entry_alloc(pgssSharedState *pgss, pgssHashKey *key, int encoding) /* ... and don't forget the query text metadata */ entry->encoding = encoding; } + dshash_release_lock(pgsmStateLocal.shared_hash, entry); return entry; } @@ -174,17 +229,22 @@ hash_entry_alloc(pgssSharedState *pgss, pgssHashKey *key, int encoding) void hash_entry_dealloc(int new_bucket_id, int old_bucket_id, unsigned char *query_buffer) { - HASH_SEQ_STATUS hash_seq; + dshash_seq_status hstat; pgssEntry *entry = NULL; - /* Store pending query ids from the previous bucket. */ List *pending_entries = NIL; ListCell *pending_entry; + if (!pgsmStateLocal.shared_hash) + return; + /* Iterate over the hash table. */ - hash_seq_init(&hash_seq, pgss_hash); - while ((entry = hash_seq_search(&hash_seq)) != NULL) + dshash_seq_init(&hstat, pgsmStateLocal.shared_hash, true); + + while ((entry = dshash_seq_next(&hstat)) != NULL) { + dsa_pointer pdsa; + /* * Remove all entries if new_bucket_id == -1. Otherwise remove entry * in new_bucket_id if it has finished already. @@ -193,16 +253,14 @@ hash_entry_dealloc(int new_bucket_id, int old_bucket_id, unsigned char *query_bu (entry->key.bucket_id == new_bucket_id && (entry->counters.state == PGSS_FINISHED || entry->counters.state == PGSS_ERROR))) { - if (new_bucket_id == -1) - { - /* - * pg_stat_monitor_reset(), remove entry from query hash table - * too. - */ - hash_search(pgss_query_hash, &(entry->key.queryid), HASH_REMOVE, NULL); - } + pdsa = entry->query_pos; + dsa_pointer parent_qdsa = entry->counters.info.parent_query; + dshash_delete_current(&hstat); + dsa_free(pgsmStateLocal.dsa, pdsa); + + if (DsaPointerIsValid(parent_qdsa)) + dsa_free(pgsmStateLocal.dsa, parent_qdsa); - entry = hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL); } /* @@ -238,7 +296,11 @@ hash_entry_dealloc(int new_bucket_id, int old_bucket_id, unsigned char *query_bu if (entry->counters.calls.calls > 1) entry->counters.state = PGSS_FINISHED; else - entry = hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL); + { + pdsa = entry->query_pos; + dshash_delete_current(&hstat); + dsa_free(pgsmStateLocal.dsa, pdsa); + } continue; } @@ -266,11 +328,15 @@ hash_entry_dealloc(int new_bucket_id, int old_bucket_id, unsigned char *query_bu if (entry->counters.calls.calls > 1) entry->counters.state = PGSS_FINISHED; else - entry = hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL); + { + pdsa = entry->query_pos; + dshash_delete_current(&hstat); + dsa_free(pgsmStateLocal.dsa, pdsa); + } } } } - + dshash_seq_term(&hstat); /* * Iterate over the list of pending queries in order to add them back to * the hash table with the updated bucket id. @@ -281,7 +347,8 @@ hash_entry_dealloc(int new_bucket_id, int old_bucket_id, unsigned char *query_bu pgssEntry *new_entry; pgssEntry *old_entry = (pgssEntry *) lfirst(pending_entry); - new_entry = (pgssEntry *) hash_search(pgss_hash, &old_entry->key, HASH_ENTER_NULL, &found); + + new_entry = (pgssEntry *) dshash_find_or_insert(pgsmStateLocal.shared_hash, &old_entry->key, &found); if (new_entry == NULL) elog(DEBUG1, "%s", "pg_stat_monitor: out of memory"); else if (!found) @@ -292,8 +359,9 @@ hash_entry_dealloc(int new_bucket_id, int old_bucket_id, unsigned char *query_bu new_entry->encoding = old_entry->encoding; new_entry->query_pos = old_entry->query_pos; } - free(old_entry); + dshash_release_lock(pgsmStateLocal.shared_hash, entry); + } list_free(pending_entries); @@ -306,16 +374,22 @@ void hash_entry_reset() { pgssSharedState *pgss = pgsm_get_ss(); - HASH_SEQ_STATUS hash_seq; + dshash_seq_status hstat; pgssEntry *entry; LWLockAcquire(pgss->lock, LW_EXCLUSIVE); - hash_seq_init(&hash_seq, pgss_hash); - while ((entry = hash_seq_search(&hash_seq)) != NULL) + dshash_seq_init(&hstat, pgsmStateLocal.shared_hash, true); + + while ((entry = dshash_seq_next(&hstat)) != NULL) { - hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL); + dsa_pointer pdsa = entry->query_pos; + dshash_delete_current(&hstat); + dsa_free(pgsmStateLocal.dsa, pdsa); } + + dshash_seq_term(&hstat); + pg_atomic_write_u64(&pgss->current_wbucket, 0); LWLockRelease(pgss->lock); } @@ -323,6 +397,5 @@ hash_entry_reset() bool IsHashInitialize(void) { - return (pgss != NULL && - pgss_hash != NULL); + return (pgsmStateLocal.shared_pgssState != NULL); } diff --git a/pg_stat_monitor.c b/pg_stat_monitor.c index ddf1765..bfb9619 100644 --- a/pg_stat_monitor.c +++ b/pg_stat_monitor.c @@ -32,7 +32,6 @@ PGSM_V2_0 } pgsmVersion; - PG_MODULE_MAGIC; #define BUILD_VERSION "2.0.0-dev" @@ -78,6 +77,7 @@ static int plan_nested_level = 0; /* The array to store outer layer query id*/ uint64 *nested_queryids; +char **nested_query_txts; /* Regex object used to extract query comments. */ static regex_t preg_query_comments; @@ -88,13 +88,11 @@ static struct rusage rusage_start; static struct rusage rusage_end; /* Query buffer, store queries' text. */ -static unsigned char *pgss_qbuf = NULL; static char *pgss_explain(QueryDesc *queryDesc); static void extract_query_comments(const char *query, char *comments, size_t max_len); static int get_histogram_bucket(double q_time); static bool IsSystemInitialized(void); -static bool dump_queries_buffer(int bucket_id, unsigned char *buf, int buf_len); static double time_diff(struct timeval end, struct timeval start); static void request_additional_shared_resources(void); @@ -230,7 +228,6 @@ static uint64 djb2_hash(unsigned char *str, size_t len); /* Same as above, but stores the calculated string length into *out_len (small optimization) */ static uint64 djb2_hash_str(unsigned char *str, int *out_len); - /* * Module load callback */ @@ -239,7 +236,6 @@ void _PG_init(void) { int rc; - char file_name[1024]; elog(DEBUG2, "pg_stat_monitor: %s()", __FUNCTION__); @@ -266,8 +262,6 @@ _PG_init(void) EnableQueryId(); #endif - snprintf(file_name, 1024, "%s", PGSM_TEXT_FILE); - unlink(file_name); EmitWarningsOnPlaceholders("pg_stat_monitor"); @@ -313,6 +307,7 @@ _PG_init(void) ExecutorCheckPerms_hook = HOOK(pgss_ExecutorCheckPerms); nested_queryids = (uint64 *) malloc(sizeof(uint64) * max_stack_depth); + nested_query_txts = (char **) malloc(sizeof(char*) * max_stack_depth); system_init = true; } @@ -335,6 +330,7 @@ _PG_fini(void) emit_log_hook = prev_emit_log_hook; free(nested_queryids); + free(nested_query_txts); regfree(&preg_query_comments); hash_entry_reset(); @@ -363,7 +359,7 @@ request_additional_shared_resources(void) * the postmaster process.) We'll allocate or attach to the shared * resources in pgss_shmem_startup(). */ - RequestAddinShmemSpace(hash_memsize() + HOOK_STATS_SIZE); + RequestAddinShmemSpace(pgsm_ShmemSize() + HOOK_STATS_SIZE); RequestNamedLWLockTranche("pg_stat_monitor", 1); } /* @@ -554,7 +550,11 @@ pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, bool execute_once) { if (exec_nested_level >= 0 && exec_nested_level < max_stack_depth) + { nested_queryids[exec_nested_level] = queryDesc->plannedstmt->queryId; + nested_query_txts[exec_nested_level] = strdup(queryDesc->sourceText); + } + exec_nested_level++; PG_TRY(); { @@ -564,13 +564,23 @@ pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, standard_ExecutorRun(queryDesc, direction, count, execute_once); exec_nested_level--; if (exec_nested_level >= 0 && exec_nested_level < max_stack_depth) + { nested_queryids[exec_nested_level] = UINT64CONST(0); + if(nested_query_txts[exec_nested_level]) + free(nested_query_txts[exec_nested_level]); + nested_query_txts[exec_nested_level] = NULL; + } } PG_CATCH(); { exec_nested_level--; if (exec_nested_level >= 0 && exec_nested_level < max_stack_depth) + { nested_queryids[exec_nested_level] = UINT64CONST(0); + if(nested_query_txts[exec_nested_level]) + free(nested_query_txts[exec_nested_level]); + nested_query_txts[exec_nested_level] = NULL; + } PG_RE_THROW(); } PG_END_TRY(); @@ -1260,11 +1270,29 @@ pgss_update_entry(pgssEntry *entry, if (exec_nested_level > 0) { if (exec_nested_level >= 0 && exec_nested_level < max_stack_depth) + { + int parent_query_len = nested_query_txts[exec_nested_level - 1]? + strlen(nested_query_txts[exec_nested_level - 1]): 0; e->counters.info.parentid = nested_queryids[exec_nested_level - 1]; + if (parent_query_len > 0) + { + char *qry_buff; + dsa_area *query_dsa_area = get_dsa_area_for_query_text(); + dsa_pointer qry = dsa_allocate(query_dsa_area, parent_query_len+1); + qry_buff = dsa_get_address(query_dsa_area, qry); + memcpy(qry_buff, nested_query_txts[exec_nested_level - 1], parent_query_len); + qry_buff[parent_query_len] = 0; + e->counters.info.parent_query = qry; + } + else + e->counters.info.parent_query = InvalidDsaPointer; + + } } else { e->counters.info.parentid = UINT64CONST(0); + e->counters.info.parent_query = InvalidDsaPointer; } if (error_info) @@ -1381,7 +1409,6 @@ pgss_store(uint64 queryid, JumbleState *jstate, pgssStoreKind kind) { - HTAB *pgss_hash; pgssHashKey key; pgssEntry *entry; pgssSharedState *pgss = pgsm_get_ss(); @@ -1486,19 +1513,15 @@ pgss_store(uint64 queryid, #else key.toplevel = ((exec_nested_level + plan_nested_level) == 0); #endif - pgss_hash = pgsm_get_hash(); LWLockAcquire(pgss->lock, LW_SHARED); - entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL); + entry = (pgssEntry *) dshash_find(get_pgssHash(), &key, false); if (!entry) { - pgssQueryEntry *query_entry; - bool query_found = false; - uint64 prev_qbuf_len = 0; - HTAB *pgss_query_hash; + dsa_pointer dsa_query_pointer; + char* query_buff; - pgss_query_hash = pgsm_get_query_hash(); /* * Create a new, normalized query string if caller asked. We don't @@ -1509,74 +1532,41 @@ pgss_store(uint64 queryid, */ if (jstate && PGSM_NORMALIZED_QUERY) { - LWLockRelease(pgss->lock); norm_query = generate_normalized_query(jstate, query, query_location, &query_len, GetDatabaseEncoding()); - LWLockAcquire(pgss->lock, LW_SHARED); } - query_entry = hash_search(pgss_query_hash, &queryid, HASH_ENTER_NULL, &query_found); - if (query_entry == NULL) - { - LWLockRelease(pgss->lock); - if (norm_query) - pfree(norm_query); - elog(DEBUG1, "pgss_store: out of memory (pgss_query_hash)."); - return; - } - else if (!query_found) - { - /* New query, truncate length if necessary. */ - if (query_len > PGSM_QUERY_MAX_LEN) - query_len = PGSM_QUERY_MAX_LEN; - } + /* New query, truncate length if necessary. */ + if (query_len > PGSM_QUERY_MAX_LEN) + query_len = PGSM_QUERY_MAX_LEN; /* Need exclusive lock to make a new hashtable entry - promote */ LWLockRelease(pgss->lock); LWLockAcquire(pgss->lock, LW_EXCLUSIVE); - if (!query_found) - { - if (!SaveQueryText(bucketid, - queryid, - pgss_qbuf, - norm_query ? norm_query : query, - query_len, - &query_entry->query_pos)) - { - LWLockRelease(pgss->lock); - if (norm_query) - pfree(norm_query); - elog(DEBUG1, "pgss_store: insufficient shared space for query."); - return; - } - - /* - * Save current query buffer length, if we fail to add a new new - * entry to the hash table then we must restore the original - * length. - */ - memcpy(&prev_qbuf_len, pgss_qbuf, sizeof(prev_qbuf_len)); - } + /* Save the query text in raw dsa area */ + dsa_area* query_dsa_area = get_dsa_area_for_query_text(); + dsa_query_pointer = dsa_allocate(query_dsa_area, query_len+1); + query_buff = dsa_get_address(query_dsa_area, dsa_query_pointer); + memcpy(query_buff, norm_query ? norm_query : query, query_len); + query_buff[query_len] = 0; /* OK to create a new hashtable entry */ entry = hash_entry_alloc(pgss, &key, GetDatabaseEncoding()); if (entry == NULL) { - if (!query_found) - { - /* Restore previous query buffer length. */ - memcpy(pgss_qbuf, &prev_qbuf_len, sizeof(prev_qbuf_len)); - } LWLockRelease(pgss->lock); if (norm_query) pfree(norm_query); return; } - entry->query_pos = query_entry->query_pos; + entry->query_pos = dsa_query_pointer; } + else + dshash_release_lock(get_pgssHash(), entry); + if (jstate == NULL) pgss_update_entry(entry, /* entry */ @@ -1619,9 +1609,6 @@ pg_stat_monitor_reset(PG_FUNCTION_ARGS) LWLockAcquire(pgss->lock, LW_EXCLUSIVE); hash_entry_dealloc(-1, -1, NULL); - /* Reset query buffer. */ - *(uint64 *) pgss_qbuf = 0; - LWLockRelease(pgss->lock); PG_RETURN_VOID(); } @@ -1678,13 +1665,12 @@ pg_stat_monitor_internal(FunctionCallInfo fcinfo, Tuplestorestate *tupstore; MemoryContext per_query_ctx; MemoryContext oldcontext; - HASH_SEQ_STATUS hash_seq; + dshash_seq_status hstat; pgssEntry *entry; char parentid_txt[32]; pgssSharedState *pgss = pgsm_get_ss(); - HTAB *pgss_hash = pgsm_get_hash(); - char *query_txt = (char *) palloc0(PGSM_QUERY_MAX_LEN + 1); - char *parent_query_txt = (char *) palloc0(PGSM_QUERY_MAX_LEN + 1); + char *query_txt = NULL; + char *parent_query_txt = NULL; int expected_columns = (api_version >= PGSM_V2_0)?PG_STAT_MONITOR_COLS_V2_0:PG_STAT_MONITOR_COLS_V1_0; /* Safety check... */ @@ -1722,10 +1708,11 @@ pg_stat_monitor_internal(FunctionCallInfo fcinfo, MemoryContextSwitchTo(oldcontext); - LWLockAcquire(pgss->lock, LW_SHARED); + // LWLockAcquire(pgss->lock, LW_SHARED); - hash_seq_init(&hash_seq, pgss_hash); - while ((entry = hash_seq_search(&hash_seq)) != NULL) + dshash_seq_init(&hstat, get_pgssHash(), false); + + while ((entry = dshash_seq_next(&hstat)) != NULL) { Datum values[PG_STAT_MONITOR_COLS] = {0}; bool nulls[PG_STAT_MONITOR_COLS] = {0}; @@ -1740,6 +1727,8 @@ pg_stat_monitor_internal(FunctionCallInfo fcinfo, uint64 userid = entry->key.userid; int64 ip = entry->key.ip; uint64 planid = entry->key.planid; + dsa_area *query_dsa_area; + char *query_ptr; #if PG_VERSION_NUM < 140000 bool toplevel = 1; bool is_allowed_role = is_member_of_role(GetUserId(), DEFAULT_ROLE_READ_ALL_STATS); @@ -1747,15 +1736,10 @@ pg_stat_monitor_internal(FunctionCallInfo fcinfo, bool is_allowed_role = is_member_of_role(GetUserId(), ROLE_PG_READ_ALL_STATS); bool toplevel = entry->key.toplevel; #endif - - if (read_query(pgss_qbuf, queryid, query_txt, entry->query_pos) == 0) - { - int rc; - - rc = read_query_buffer(bucketid, queryid, query_txt, entry->query_pos); - if (rc != 1) - snprintf(query_txt, 32, "%s", ""); - } + /* Load the query text from dsa area */ + query_dsa_area = get_dsa_area_for_query_text(); + query_ptr = dsa_get_address(query_dsa_area, entry->query_pos); + query_txt = pstrdup(query_ptr); /* copy counters to a local variable to keep locking time short */ { @@ -1783,15 +1767,17 @@ pg_stat_monitor_internal(FunctionCallInfo fcinfo, if (tmp.state == PGSS_PARSE || tmp.state == PGSS_PLAN) continue; + /* read the parent query text if any */ if (tmp.info.parentid != UINT64CONST(0)) { - if (read_query(pgss_qbuf, tmp.info.parentid, parent_query_txt, 0) == 0) + if (DsaPointerIsValid(tmp.info.parent_query)) { - int rc = read_query_buffer(bucketid, tmp.info.parentid, parent_query_txt, 0); - - if (rc != 1) - snprintf(parent_query_txt, 32, "%s", ""); + query_dsa_area = get_dsa_area_for_query_text(); + query_ptr = dsa_get_address(query_dsa_area, tmp.info.parent_query); + parent_query_txt = pstrdup(query_ptr); } + else + parent_query_txt = pstrdup("parent query text not available"); } /* bucketid at column number 0 */ values[i++] = Int64GetDatumFast(bucketid); @@ -2071,10 +2057,12 @@ pg_stat_monitor_internal(FunctionCallInfo fcinfo, tuplestore_putvalues(tupstore, tupdesc, values, nulls); } /* clean up and return the tuplestore */ - LWLockRelease(pgss->lock); + dshash_seq_term(&hstat); - pfree(query_txt); - pfree(parent_query_txt); + if(query_txt) + pfree(query_txt); + if(parent_query_txt) + pfree(parent_query_txt); tuplestore_donestoring(tupstore); } @@ -2120,7 +2108,6 @@ get_next_wbucket(pgssSharedState *pgss) if (update_bucket) { - char file_name[1024]; new_bucket_id = (tv.tv_sec / PGSM_BUCKET_TIME) % PGSM_MAX_BUCKETS; @@ -2128,24 +2115,7 @@ get_next_wbucket(pgssSharedState *pgss) prev_bucket_id = pg_atomic_exchange_u64(&pgss->current_wbucket, new_bucket_id); LWLockAcquire(pgss->lock, LW_EXCLUSIVE); - hash_entry_dealloc(new_bucket_id, prev_bucket_id, pgss_qbuf); - - if (pgss->overflow) - { - pgss->n_bucket_cycles += 1; - if (pgss->n_bucket_cycles >= PGSM_MAX_BUCKETS) - { - /* - * A full rotation of PGSM_MAX_BUCKETS buckets happened since - * we detected a query buffer overflow. - * Reset overflow state and remove the dump file. - */ - pgss->overflow = false; - pgss->n_bucket_cycles = 0; - snprintf(file_name, 1024, "%s", PGSM_TEXT_FILE); - unlink(file_name); - } - } + hash_entry_dealloc(new_bucket_id, prev_bucket_id, NULL); LWLockRelease(pgss->lock); @@ -3144,165 +3114,6 @@ intarray_get_datum(int32 arr[], int len) } -uint64 -read_query(unsigned char *buf, uint64 queryid, char *query, size_t pos) -{ - bool found = false; - uint64 query_id = 0; - uint64 query_len = 0; - uint64 rlen = 0; - uint64 buf_len = 0; - - memcpy(&buf_len, buf, sizeof(uint64)); - if (buf_len <= 0) - goto exit; - - /* If a position hint is given, try to locate the query directly. */ - if (pos != 0 && (pos + sizeof(uint64) + sizeof(uint64)) < buf_len) - { - memcpy(&query_id, &buf[pos], sizeof(uint64)); - if (query_id != queryid) - return 0; - - pos += sizeof(uint64); - - memcpy(&query_len, &buf[pos], sizeof(uint64)); /* query len */ - pos += sizeof(uint64); - - if (pos + query_len > buf_len) /* avoid reading past buffer's length. */ - return 0; - - memcpy(query, &buf[pos], query_len); /* Actual query */ - query[query_len] = '\0'; - - return queryid; - } - - rlen = sizeof(uint64); /* Move forwad to skip length bytes */ - for (;;) - { - if (rlen >= buf_len) - goto exit; - - memcpy(&query_id, &buf[rlen], sizeof(uint64)); /* query id */ - if (query_id == queryid) - found = true; - - rlen += sizeof(uint64); - if (buf_len <= rlen) - continue; - - memcpy(&query_len, &buf[rlen], sizeof(uint64)); /* query len */ - rlen += sizeof(uint64); - if (buf_len < rlen + query_len) - goto exit; - if (found) - { - if (query != NULL) - { - memcpy(query, &buf[rlen], query_len); /* Actual query */ - query[query_len] = 0; - } - return query_id; - } - rlen += query_len; - } -exit: - if (PGSM_OVERFLOW_TARGET == OVERFLOW_TARGET_NONE) - { - sprintf(query, "%s", ""); - return -1; - } - return 0; -} - -bool -SaveQueryText(uint64 bucketid, - uint64 queryid, - unsigned char *buf, - const char *query, - uint64 query_len, - size_t *query_pos) -{ - uint64 buf_len = 0; - - memcpy(&buf_len, buf, sizeof(uint64)); - if (buf_len == 0) - buf_len += sizeof(uint64); - - if (QUERY_BUFFER_OVERFLOW(buf_len, query_len)) - { - switch (PGSM_OVERFLOW_TARGET) - { - case OVERFLOW_TARGET_NONE: - return false; - case OVERFLOW_TARGET_DISK: - { - bool dump_ok; - pgssSharedState *pgss = pgsm_get_ss(); - - if (pgss->overflow) - { - elog(DEBUG1, "query buffer overflowed twice"); - return false; - } - - /* - * If the query buffer is empty, there is nothing to dump, - * this also means that the current query length exceeds - * MAX_QUERY_BUF. - */ - if (buf_len <= sizeof(uint64)) - return false; - - dump_ok = dump_queries_buffer(bucketid, buf, MAX_QUERY_BUF); - buf_len = sizeof(uint64); - - if (dump_ok) - { - pgss->overflow = true; - pgss->n_bucket_cycles = 0; - } - - /* - * We must check for overflow again, as the query length - * may exceed the total size allocated to the buffer - * (MAX_QUERY_BUF). - */ - if (QUERY_BUFFER_OVERFLOW(buf_len, query_len)) - { - /* - * If we successfully dumped the query buffer to disk, - * then reset the buffer, otherwise we could end up - * dumping the same buffer again. - */ - if (dump_ok) - *(uint64 *) buf = 0; - - return false; - } - - } - break; - default: - Assert(false); - break; - } - } - - *query_pos = buf_len; - - memcpy(&buf[buf_len], &queryid, sizeof(uint64)); /* query id */ - buf_len += sizeof(uint64); - - memcpy(&buf[buf_len], &query_len, sizeof(uint64)); /* query length */ - buf_len += sizeof(uint64); - - memcpy(&buf[buf_len], query, query_len); /* query */ - buf_len += query_len; - memcpy(buf, &buf_len, sizeof(uint64)); - return true; -} Datum pg_stat_monitor_settings(PG_FUNCTION_ARGS) @@ -3440,12 +3251,6 @@ pg_stat_monitor_hook_stats(PG_FUNCTION_ARGS) return (Datum) 0; } -void -set_qbuf(unsigned char *buf) -{ - pgss_qbuf = buf; - *(uint64 *) pgss_qbuf = 0; -} void pgsm_emit_log_hook(ErrorData *edata) @@ -3482,145 +3287,6 @@ IsSystemInitialized(void) return (system_init && IsHashInitialize()); } -static bool -dump_queries_buffer(int bucket_id, unsigned char *buf, int buf_len) -{ - int fd = 0; - char file_name[1024]; - bool success = true; - int off = 0; - int tries = 0; - - snprintf(file_name, 1024, "%s", PGSM_TEXT_FILE); - fd = OpenTransientFile(file_name, O_RDWR | O_CREAT | O_APPEND | PG_BINARY); - if (fd < 0) - { - ereport(LOG, - (errcode_for_file_access(), - errmsg("could not write file \"%s\": %m", - file_name))); - return false; - } - - /* Loop until write buf_len bytes to the file. */ - do - { - ssize_t nwrite = write(fd, buf + off, buf_len - off); - - if (nwrite == -1) - { - if (errno == EINTR && tries++ < 3) - continue; - - success = false; - break; - } - off += nwrite; - } while (off < buf_len); - - if (!success) - ereport(LOG, - (errcode_for_file_access(), - errmsg("could not write file \"%s\": %m", file_name))); - - if (fd > 0) - CloseTransientFile(fd); - - return success; -} - -/* - * Try to locate query text in a dumped file for bucket_id. - * - * Returns: - * 1 Query sucessfully read, query_text will contain the query text. - * 0 Query not found. - * -1 I/O Error. - */ -int -read_query_buffer(int bucket_id, uint64 queryid, char *query_txt, size_t pos) -{ - int fd = 0; - char file_name[1024]; - unsigned char *buf = NULL; - ssize_t nread = 0; - int off = 0; - int tries = 0; - bool done = false; - bool found = false; - - snprintf(file_name, 1024, "%s", PGSM_TEXT_FILE); - fd = OpenTransientFile(file_name, O_RDONLY | PG_BINARY); - if (fd < 0) - goto exit; - - buf = (unsigned char *) palloc(MAX_QUERY_BUF); - while (!done) - { - off = 0; - /* read a chunck of MAX_QUERY_BUF size. */ - do - { - nread = read(fd, buf + off, MAX_QUERY_BUF - off); - if (nread == -1) - { - if (errno == EINTR && tries++ < 3) /* read() was interrupted, - * attempt to read again - * (max attempts=3) */ - continue; - - goto exit; - } - else if (nread == 0) /* EOF */ - { - done = true; - break; - } - - off += nread; - } while (off < MAX_QUERY_BUF); - - if (off == MAX_QUERY_BUF) - { - /* we have a chunck, scan it looking for queryid. */ - if (read_query(buf, queryid, query_txt, pos) != 0) - { - - found = true; - /* query was found, don't need to read another chunck. */ - break; - } - } - else - - /* - * Either done=true or file has a size not multiple of - * MAX_QUERY_BUF. It is safe to assume that the file was truncated - * or corrupted. - */ - break; - } - -exit: - if (fd < 0 || nread == -1) - ereport(LOG, - (errcode_for_file_access(), - errmsg("could not read file \"%s\": %m", - file_name))); - - if (fd >= 0) - CloseTransientFile(fd); - - if (buf) - pfree(buf); - - if (found) - return 1; - else if (fd == -1 || nread == -1) - return -1; /* I/O error. */ - else - return 0; /* Not found. */ -} static double time_diff(struct timeval end, struct timeval start) diff --git a/pg_stat_monitor.h b/pg_stat_monitor.h index dd66c9d..78d8183 100644 --- a/pg_stat_monitor.h +++ b/pg_stat_monitor.h @@ -27,6 +27,9 @@ #include #include +#include "lib/dshash.h" +#include "utils/dsa.h" + #include "access/hash.h" #include "catalog/pg_authid.h" #include "executor/instrument.h" @@ -179,20 +182,6 @@ typedef struct CallTime double sum_var_time; /* sum of variances in execution time in msec */ } CallTime; -/* - * Entry type for queries hash table (query ID). - * - * We use a hash table to keep track of query IDs that have their - * corresponding query text added to the query buffer (pgsm_query_shared_buffer). - * - * This allow us to avoid adding duplicated queries to the buffer, therefore - * leaving more space for other queries and saving some CPU. - */ -typedef struct pgssQueryEntry -{ - uint64 queryid; /* query identifier, also the key. */ - size_t query_pos; /* query location within query buffer */ -} pgssQueryEntry; typedef struct PlanInfo { @@ -216,6 +205,7 @@ typedef struct pgssHashKey typedef struct QueryInfo { uint64 parentid; /* parent queryid of current query */ + dsa_pointer parent_query; int64 type; /* type of query, options are query, info, * warning, error, fatal */ char application_name[APPLICATIONNAME_LEN]; @@ -322,7 +312,7 @@ typedef struct pgssEntry Counters counters; /* the statistics for this query */ int encoding; /* query text encoding */ slock_t mutex; /* protects the counters only */ - size_t query_pos; /* query location within query buffer */ + dsa_pointer query_pos; /* query location within query buffer */ } pgssEntry; /* @@ -353,10 +343,19 @@ typedef struct pgssSharedState * This allows us to avoid having a large file on disk that would also * slowdown queries to the pg_stat_monitor view. */ - bool overflow; size_t n_bucket_cycles; + int hash_tranche_id; + void *raw_dsa_area; + dshash_table_handle hash_handle; } pgssSharedState; +typedef struct pgsmLocalState +{ + pgssSharedState *shared_pgssState; + dsa_area *dsa; + dshash_table *shared_hash; +}pgsmLocalState; + #define ResetSharedState(x) \ do { \ x->cur_median_usage = ASSUMED_MEDIAN_INIT; \ @@ -418,27 +417,22 @@ void init_guc(void); GucVariable *get_conf(int i); /* hash_create.c */ +dsa_area *get_dsa_area_for_query_text(void); +dshash_table *get_pgssHash(void); +void pgsm_attach_shmem(void); bool IsHashInitialize(void); void pgss_shmem_startup(void); void pgss_shmem_shutdown(int code, Datum arg); int pgsm_get_bucket_size(void); pgssSharedState *pgsm_get_ss(void); -HTAB *pgsm_get_plan_hash(void); -HTAB *pgsm_get_hash(void); -HTAB *pgsm_get_query_hash(void); -HTAB *pgsm_get_plan_hash(void); void hash_entry_reset(void); void hash_query_entryies_reset(void); void hash_query_entries(); void hash_query_entry_dealloc(int new_bucket_id, int old_bucket_id, unsigned char *query_buffer[]); void hash_entry_dealloc(int new_bucket_id, int old_bucket_id, unsigned char *query_buffer); pgssEntry *hash_entry_alloc(pgssSharedState *pgss, pgssHashKey *key, int encoding); -Size hash_memsize(void); - -int read_query_buffer(int bucket_id, uint64 queryid, char *query_txt, size_t pos); -uint64 read_query(unsigned char *buf, uint64 queryid, char *query, size_t pos); +Size pgsm_ShmemSize(void); void pgss_startup(void); -void set_qbuf(unsigned char *); /* hash_query.c */ void pgss_startup(void); From 802774a2a771ef2912b04f4fee8bafcca61b0a3b Mon Sep 17 00:00:00 2001 From: Ibrar Ahmed Date: Thu, 22 Dec 2022 19:15:14 +0500 Subject: [PATCH 14/15] PG-488: Revert pg_stat_monitor: Overflow management. (#338) PG-488: Revert pg_stat_monitor: Overflow management. This patch does not work for < PostgreSQL - 15. More work required. --- hash_query.c | 225 +++++++-------------- pg_stat_monitor.c | 490 ++++++++++++++++++++++++++++++++++++++-------- pg_stat_monitor.h | 44 +++-- 3 files changed, 513 insertions(+), 246 deletions(-) diff --git a/hash_query.c b/hash_query.c index 48e9532..3139716 100644 --- a/hash_query.c +++ b/hash_query.c @@ -16,93 +16,59 @@ */ #include "postgres.h" #include "nodes/pg_list.h" + #include "pg_stat_monitor.h" -static pgsmLocalState pgsmStateLocal; -/* parameter for the shared hash */ - static dshash_parameters dsh_params = { - sizeof(pgssHashKey), - sizeof(pgssEntry), - dshash_memcmp, - dshash_memhash - }; -static void pgsm_proc_exit(int code, Datum arg); +static pgssSharedState *pgss; +static HTAB *pgss_hash; +static HTAB *pgss_query_hash; -static Size -pgsm_query_area_size(void) + +static HTAB * +hash_init(const char *hash_name, int key_size, int entry_size, int hash_size) { - Size sz = MAXALIGN(MAX_QUERY_BUF); - return MAXALIGN(sz); -} + HASHCTL info; -Size -pgsm_ShmemSize(void) - { - Size sz = MAXALIGN(sizeof(pgssSharedState)); - sz = add_size(sz, pgsm_query_area_size()); - sz = add_size(sz, hash_estimate_size(MAX_BUCKET_ENTRIES, sizeof(pgssEntry))); - return sz; - } + memset(&info, 0, sizeof(info)); + info.keysize = key_size; + info.entrysize = entry_size; + return ShmemInitHash(hash_name, hash_size, hash_size, &info, HASH_ELEM | HASH_BLOBS); +} void pgss_startup(void) { bool found = false; - pgssSharedState *pgss; + /* reset in case this is a restart within the postmaster */ - pgsmStateLocal.dsa = NULL; - pgsmStateLocal.shared_hash = NULL; - pgsmStateLocal.shared_pgssState = NULL; + + pgss = NULL; + pgss_hash = NULL; /* * Create or attach to the shared memory state, including hash table */ LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); - pgss = ShmemInitStruct("pg_stat_monitor", pgsm_ShmemSize(), &found); + pgss = ShmemInitStruct("pg_stat_monitor", sizeof(pgssSharedState), &found); if (!found) { /* First time through ... */ - dsa_area *dsa; - dshash_table *dsh; - char *p = (char *) pgss; - pgss->lock = &(GetNamedLWLockTranche("pg_stat_monitor"))->lock; SpinLockInit(&pgss->mutex); ResetSharedState(pgss); - /* the allocation of pgssSharedState itself */ - p += MAXALIGN(sizeof(pgssSharedState)); - pgss->raw_dsa_area = p; - dsa = dsa_create_in_place(pgss->raw_dsa_area, - pgsm_query_area_size(), - LWLockNewTrancheId(), 0); - dsa_pin(dsa); - dsa_set_size_limit(dsa, pgsm_query_area_size()); - - pgss->hash_tranche_id = LWLockNewTrancheId(); - - dsh_params.tranche_id = pgss->hash_tranche_id; - dsh = dshash_create(dsa, &dsh_params, 0); - - pgss->hash_handle = dshash_get_hash_table_handle(dsh); - - if (PGSM_OVERFLOW_TARGET == OVERFLOW_TARGET_DISK) - dsa_set_size_limit(dsa, -1); - - pgsmStateLocal.shared_pgssState = pgss; - /* - * Postmaster will never access these again, thus free the local - * dsa/dshash references. - */ - dshash_detach(dsh); - dsa_detach(dsa); } #ifdef BENCHMARK init_hook_stats(); #endif + set_qbuf((unsigned char *) ShmemAlloc(MAX_QUERY_BUF)); + + pgss_hash = hash_init("pg_stat_monitor: bucket hashtable", sizeof(pgssHashKey), sizeof(pgssEntry), MAX_BUCKET_ENTRIES); + pgss_query_hash = hash_init("pg_stat_monitor: queryID hashtable", sizeof(uint64), sizeof(pgssQueryEntry), MAX_BUCKET_ENTRIES); + LWLockRelease(AddinShmemInitLock); /* @@ -112,49 +78,23 @@ pgss_startup(void) on_shmem_exit(pgss_shmem_shutdown, (Datum) 0); } -void -pgsm_attach_shmem(void) -{ - MemoryContext oldcontext; - if (pgsmStateLocal.dsa) - return; - - oldcontext = MemoryContextSwitchTo(TopMemoryContext); - - pgsmStateLocal.dsa = dsa_attach_in_place(pgsmStateLocal.shared_pgssState->raw_dsa_area, - NULL); - dsa_pin_mapping(pgsmStateLocal.dsa); - - dsh_params.tranche_id = pgsmStateLocal.shared_pgssState->hash_tranche_id; - pgsmStateLocal.shared_hash = dshash_attach(pgsmStateLocal.dsa, &dsh_params, - pgsmStateLocal.shared_pgssState->hash_handle, 0); - - on_proc_exit(pgsm_proc_exit, 0); - - MemoryContextSwitchTo(oldcontext); -} - -dsa_area* -get_dsa_area_for_query_text(void) -{ - pgsm_attach_shmem(); - return pgsmStateLocal.dsa; -} - -dshash_table* -get_pgssHash(void) -{ - pgsm_attach_shmem(); - return pgsmStateLocal.shared_hash; -} - pgssSharedState * pgsm_get_ss(void) { - pgsm_attach_shmem(); - return pgsmStateLocal.shared_pgssState; + return pgss; } +HTAB * +pgsm_get_hash(void) +{ + return pgss_hash; +} + +HTAB * +pgsm_get_query_hash(void) +{ + return pgss_query_hash; +} /* * shmem_shutdown hook: Dump statistics into file. @@ -166,24 +106,26 @@ void pgss_shmem_shutdown(int code, Datum arg) { /* Don't try to dump during a crash. */ - elog(LOG,"pgss_shmem_shutdown"); if (code) return; - pgsmStateLocal.shared_pgssState = NULL; + pgss = NULL; /* Safety check ... shouldn't get here unless shmem is set up. */ if (!IsHashInitialize()) return; } -static void -pgsm_proc_exit(int code, Datum arg) +Size +hash_memsize(void) { - Assert(pgsmStateLocal.dsa); - dshash_detach(pgsmStateLocal.shared_hash); - pgsmStateLocal.shared_hash = NULL; - dsa_detach(pgsmStateLocal.dsa); - pgsmStateLocal.dsa = NULL; + Size size; + + size = MAXALIGN(sizeof(pgssSharedState)); + size += MAXALIGN(MAX_QUERY_BUF); + size = add_size(size, hash_estimate_size(MAX_BUCKET_ENTRIES, sizeof(pgssEntry))); + size = add_size(size, hash_estimate_size(MAX_BUCKET_ENTRIES, sizeof(pgssQueryEntry))); + + return size; } pgssEntry * @@ -192,9 +134,13 @@ hash_entry_alloc(pgssSharedState *pgss, pgssHashKey *key, int encoding) pgssEntry *entry = NULL; bool found = false; + if (hash_get_num_entries(pgss_hash) >= MAX_BUCKET_ENTRIES) + { + elog(DEBUG1, "pg_stat_monitor: out of memory"); + return NULL; + } /* Find or create an entry with desired hash code */ - entry = (pgssEntry *) dshash_find_or_insert(pgsmStateLocal.shared_hash, key, &found); - // entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER_NULL, &found); + entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER_NULL, &found); if (entry == NULL) elog(DEBUG1, "hash_entry_alloc: OUT OF MEMORY"); else if (!found) @@ -209,7 +155,6 @@ hash_entry_alloc(pgssSharedState *pgss, pgssHashKey *key, int encoding) /* ... and don't forget the query text metadata */ entry->encoding = encoding; } - dshash_release_lock(pgsmStateLocal.shared_hash, entry); return entry; } @@ -229,22 +174,17 @@ hash_entry_alloc(pgssSharedState *pgss, pgssHashKey *key, int encoding) void hash_entry_dealloc(int new_bucket_id, int old_bucket_id, unsigned char *query_buffer) { - dshash_seq_status hstat; + HASH_SEQ_STATUS hash_seq; pgssEntry *entry = NULL; + /* Store pending query ids from the previous bucket. */ List *pending_entries = NIL; ListCell *pending_entry; - if (!pgsmStateLocal.shared_hash) - return; - /* Iterate over the hash table. */ - dshash_seq_init(&hstat, pgsmStateLocal.shared_hash, true); - - while ((entry = dshash_seq_next(&hstat)) != NULL) + hash_seq_init(&hash_seq, pgss_hash); + while ((entry = hash_seq_search(&hash_seq)) != NULL) { - dsa_pointer pdsa; - /* * Remove all entries if new_bucket_id == -1. Otherwise remove entry * in new_bucket_id if it has finished already. @@ -253,14 +193,16 @@ hash_entry_dealloc(int new_bucket_id, int old_bucket_id, unsigned char *query_bu (entry->key.bucket_id == new_bucket_id && (entry->counters.state == PGSS_FINISHED || entry->counters.state == PGSS_ERROR))) { - pdsa = entry->query_pos; - dsa_pointer parent_qdsa = entry->counters.info.parent_query; - dshash_delete_current(&hstat); - dsa_free(pgsmStateLocal.dsa, pdsa); - - if (DsaPointerIsValid(parent_qdsa)) - dsa_free(pgsmStateLocal.dsa, parent_qdsa); + if (new_bucket_id == -1) + { + /* + * pg_stat_monitor_reset(), remove entry from query hash table + * too. + */ + hash_search(pgss_query_hash, &(entry->key.queryid), HASH_REMOVE, NULL); + } + entry = hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL); } /* @@ -296,11 +238,7 @@ hash_entry_dealloc(int new_bucket_id, int old_bucket_id, unsigned char *query_bu if (entry->counters.calls.calls > 1) entry->counters.state = PGSS_FINISHED; else - { - pdsa = entry->query_pos; - dshash_delete_current(&hstat); - dsa_free(pgsmStateLocal.dsa, pdsa); - } + entry = hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL); continue; } @@ -328,15 +266,11 @@ hash_entry_dealloc(int new_bucket_id, int old_bucket_id, unsigned char *query_bu if (entry->counters.calls.calls > 1) entry->counters.state = PGSS_FINISHED; else - { - pdsa = entry->query_pos; - dshash_delete_current(&hstat); - dsa_free(pgsmStateLocal.dsa, pdsa); - } + entry = hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL); } } } - dshash_seq_term(&hstat); + /* * Iterate over the list of pending queries in order to add them back to * the hash table with the updated bucket id. @@ -347,8 +281,7 @@ hash_entry_dealloc(int new_bucket_id, int old_bucket_id, unsigned char *query_bu pgssEntry *new_entry; pgssEntry *old_entry = (pgssEntry *) lfirst(pending_entry); - - new_entry = (pgssEntry *) dshash_find_or_insert(pgsmStateLocal.shared_hash, &old_entry->key, &found); + new_entry = (pgssEntry *) hash_search(pgss_hash, &old_entry->key, HASH_ENTER_NULL, &found); if (new_entry == NULL) elog(DEBUG1, "%s", "pg_stat_monitor: out of memory"); else if (!found) @@ -359,9 +292,8 @@ hash_entry_dealloc(int new_bucket_id, int old_bucket_id, unsigned char *query_bu new_entry->encoding = old_entry->encoding; new_entry->query_pos = old_entry->query_pos; } - free(old_entry); - dshash_release_lock(pgsmStateLocal.shared_hash, entry); + free(old_entry); } list_free(pending_entries); @@ -374,22 +306,16 @@ void hash_entry_reset() { pgssSharedState *pgss = pgsm_get_ss(); - dshash_seq_status hstat; + HASH_SEQ_STATUS hash_seq; pgssEntry *entry; LWLockAcquire(pgss->lock, LW_EXCLUSIVE); - dshash_seq_init(&hstat, pgsmStateLocal.shared_hash, true); - - while ((entry = dshash_seq_next(&hstat)) != NULL) + hash_seq_init(&hash_seq, pgss_hash); + while ((entry = hash_seq_search(&hash_seq)) != NULL) { - dsa_pointer pdsa = entry->query_pos; - dshash_delete_current(&hstat); - dsa_free(pgsmStateLocal.dsa, pdsa); + hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL); } - - dshash_seq_term(&hstat); - pg_atomic_write_u64(&pgss->current_wbucket, 0); LWLockRelease(pgss->lock); } @@ -397,5 +323,6 @@ hash_entry_reset() bool IsHashInitialize(void) { - return (pgsmStateLocal.shared_pgssState != NULL); + return (pgss != NULL && + pgss_hash != NULL); } diff --git a/pg_stat_monitor.c b/pg_stat_monitor.c index d4b4ca9..88ccada 100644 --- a/pg_stat_monitor.c +++ b/pg_stat_monitor.c @@ -32,6 +32,7 @@ PGSM_V2_0 } pgsmVersion; + PG_MODULE_MAGIC; #define BUILD_VERSION "2.0.0-dev" @@ -77,7 +78,6 @@ static int plan_nested_level = 0; /* The array to store outer layer query id*/ uint64 *nested_queryids; -char **nested_query_txts; /* Regex object used to extract query comments. */ static regex_t preg_query_comments; @@ -88,11 +88,13 @@ static struct rusage rusage_start; static struct rusage rusage_end; /* Query buffer, store queries' text. */ +static unsigned char *pgss_qbuf = NULL; static char *pgss_explain(QueryDesc *queryDesc); static void extract_query_comments(const char *query, char *comments, size_t max_len); static int get_histogram_bucket(double q_time); static bool IsSystemInitialized(void); +static bool dump_queries_buffer(int bucket_id, unsigned char *buf, int buf_len); static double time_diff(struct timeval end, struct timeval start); static void request_additional_shared_resources(void); @@ -227,6 +229,7 @@ static uint64 djb2_hash(unsigned char *str, size_t len); /* Same as above, but stores the calculated string length into *out_len (small optimization) */ static uint64 djb2_hash_str(unsigned char *str, int *out_len); + /* * Module load callback */ @@ -235,6 +238,7 @@ void _PG_init(void) { int rc; + char file_name[1024]; elog(DEBUG2, "pg_stat_monitor: %s()", __FUNCTION__); @@ -261,6 +265,8 @@ _PG_init(void) EnableQueryId(); #endif + snprintf(file_name, 1024, "%s", PGSM_TEXT_FILE); + unlink(file_name); EmitWarningsOnPlaceholders("pg_stat_monitor"); @@ -306,7 +312,6 @@ _PG_init(void) ExecutorCheckPerms_hook = HOOK(pgss_ExecutorCheckPerms); nested_queryids = (uint64 *) malloc(sizeof(uint64) * max_stack_depth); - nested_query_txts = (char **) malloc(sizeof(char*) * max_stack_depth); system_init = true; } @@ -329,7 +334,6 @@ _PG_fini(void) emit_log_hook = prev_emit_log_hook; free(nested_queryids); - free(nested_query_txts); regfree(&preg_query_comments); hash_entry_reset(); @@ -358,7 +362,7 @@ request_additional_shared_resources(void) * the postmaster process.) We'll allocate or attach to the shared * resources in pgss_shmem_startup(). */ - RequestAddinShmemSpace(pgsm_ShmemSize() + HOOK_STATS_SIZE); + RequestAddinShmemSpace(hash_memsize() + HOOK_STATS_SIZE); RequestNamedLWLockTranche("pg_stat_monitor", 1); } /* @@ -549,11 +553,7 @@ pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, bool execute_once) { if (exec_nested_level >= 0 && exec_nested_level < max_stack_depth) - { nested_queryids[exec_nested_level] = queryDesc->plannedstmt->queryId; - nested_query_txts[exec_nested_level] = strdup(queryDesc->sourceText); - } - exec_nested_level++; PG_TRY(); { @@ -563,23 +563,13 @@ pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count, standard_ExecutorRun(queryDesc, direction, count, execute_once); exec_nested_level--; if (exec_nested_level >= 0 && exec_nested_level < max_stack_depth) - { nested_queryids[exec_nested_level] = UINT64CONST(0); - if(nested_query_txts[exec_nested_level]) - free(nested_query_txts[exec_nested_level]); - nested_query_txts[exec_nested_level] = NULL; - } } PG_CATCH(); { exec_nested_level--; if (exec_nested_level >= 0 && exec_nested_level < max_stack_depth) - { nested_queryids[exec_nested_level] = UINT64CONST(0); - if(nested_query_txts[exec_nested_level]) - free(nested_query_txts[exec_nested_level]); - nested_query_txts[exec_nested_level] = NULL; - } PG_RE_THROW(); } PG_END_TRY(); @@ -1269,29 +1259,11 @@ pgss_update_entry(pgssEntry *entry, if (exec_nested_level > 0) { if (exec_nested_level >= 0 && exec_nested_level < max_stack_depth) - { - int parent_query_len = nested_query_txts[exec_nested_level - 1]? - strlen(nested_query_txts[exec_nested_level - 1]): 0; e->counters.info.parentid = nested_queryids[exec_nested_level - 1]; - if (parent_query_len > 0) - { - char *qry_buff; - dsa_area *query_dsa_area = get_dsa_area_for_query_text(); - dsa_pointer qry = dsa_allocate(query_dsa_area, parent_query_len+1); - qry_buff = dsa_get_address(query_dsa_area, qry); - memcpy(qry_buff, nested_query_txts[exec_nested_level - 1], parent_query_len); - qry_buff[parent_query_len] = 0; - e->counters.info.parent_query = qry; - } - else - e->counters.info.parent_query = InvalidDsaPointer; - - } } else { e->counters.info.parentid = UINT64CONST(0); - e->counters.info.parent_query = InvalidDsaPointer; } if (error_info) @@ -1408,6 +1380,7 @@ pgss_store(uint64 queryid, JumbleState *jstate, pgssStoreKind kind) { + HTAB *pgss_hash; pgssHashKey key; pgssEntry *entry; pgssSharedState *pgss = pgsm_get_ss(); @@ -1512,15 +1485,19 @@ pgss_store(uint64 queryid, #else key.toplevel = ((exec_nested_level + plan_nested_level) == 0); #endif + pgss_hash = pgsm_get_hash(); LWLockAcquire(pgss->lock, LW_SHARED); - entry = (pgssEntry *) dshash_find(get_pgssHash(), &key, false); + entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL); if (!entry) { - dsa_pointer dsa_query_pointer; - char* query_buff; + pgssQueryEntry *query_entry; + bool query_found = false; + uint64 prev_qbuf_len = 0; + HTAB *pgss_query_hash; + pgss_query_hash = pgsm_get_query_hash(); /* * Create a new, normalized query string if caller asked. We don't @@ -1531,41 +1508,74 @@ pgss_store(uint64 queryid, */ if (jstate && PGSM_NORMALIZED_QUERY) { + LWLockRelease(pgss->lock); norm_query = generate_normalized_query(jstate, query, query_location, &query_len, GetDatabaseEncoding()); + LWLockAcquire(pgss->lock, LW_SHARED); } - /* New query, truncate length if necessary. */ - if (query_len > PGSM_QUERY_MAX_LEN) - query_len = PGSM_QUERY_MAX_LEN; + query_entry = hash_search(pgss_query_hash, &queryid, HASH_ENTER_NULL, &query_found); + if (query_entry == NULL) + { + LWLockRelease(pgss->lock); + if (norm_query) + pfree(norm_query); + elog(DEBUG1, "pgss_store: out of memory (pgss_query_hash)."); + return; + } + else if (!query_found) + { + /* New query, truncate length if necessary. */ + if (query_len > PGSM_QUERY_MAX_LEN) + query_len = PGSM_QUERY_MAX_LEN; + } /* Need exclusive lock to make a new hashtable entry - promote */ LWLockRelease(pgss->lock); LWLockAcquire(pgss->lock, LW_EXCLUSIVE); - /* Save the query text in raw dsa area */ - dsa_area* query_dsa_area = get_dsa_area_for_query_text(); - dsa_query_pointer = dsa_allocate(query_dsa_area, query_len+1); - query_buff = dsa_get_address(query_dsa_area, dsa_query_pointer); - memcpy(query_buff, norm_query ? norm_query : query, query_len); - query_buff[query_len] = 0; + if (!query_found) + { + if (!SaveQueryText(bucketid, + queryid, + pgss_qbuf, + norm_query ? norm_query : query, + query_len, + &query_entry->query_pos)) + { + LWLockRelease(pgss->lock); + if (norm_query) + pfree(norm_query); + elog(DEBUG1, "pgss_store: insufficient shared space for query."); + return; + } + + /* + * Save current query buffer length, if we fail to add a new new + * entry to the hash table then we must restore the original + * length. + */ + memcpy(&prev_qbuf_len, pgss_qbuf, sizeof(prev_qbuf_len)); + } /* OK to create a new hashtable entry */ entry = hash_entry_alloc(pgss, &key, GetDatabaseEncoding()); if (entry == NULL) { + if (!query_found) + { + /* Restore previous query buffer length. */ + memcpy(pgss_qbuf, &prev_qbuf_len, sizeof(prev_qbuf_len)); + } LWLockRelease(pgss->lock); if (norm_query) pfree(norm_query); return; } - entry->query_pos = dsa_query_pointer; + entry->query_pos = query_entry->query_pos; } - else - dshash_release_lock(get_pgssHash(), entry); - if (jstate == NULL) pgss_update_entry(entry, /* entry */ @@ -1608,6 +1618,9 @@ pg_stat_monitor_reset(PG_FUNCTION_ARGS) LWLockAcquire(pgss->lock, LW_EXCLUSIVE); hash_entry_dealloc(-1, -1, NULL); + /* Reset query buffer. */ + *(uint64 *) pgss_qbuf = 0; + LWLockRelease(pgss->lock); PG_RETURN_VOID(); } @@ -1664,12 +1677,13 @@ pg_stat_monitor_internal(FunctionCallInfo fcinfo, Tuplestorestate *tupstore; MemoryContext per_query_ctx; MemoryContext oldcontext; - dshash_seq_status hstat; + HASH_SEQ_STATUS hash_seq; pgssEntry *entry; char parentid_txt[32]; pgssSharedState *pgss = pgsm_get_ss(); - char *query_txt = NULL; - char *parent_query_txt = NULL; + HTAB *pgss_hash = pgsm_get_hash(); + char *query_txt = (char *) palloc0(PGSM_QUERY_MAX_LEN + 1); + char *parent_query_txt = (char *) palloc0(PGSM_QUERY_MAX_LEN + 1); int expected_columns = (api_version >= PGSM_V2_0)?PG_STAT_MONITOR_COLS_V2_0:PG_STAT_MONITOR_COLS_V1_0; /* Safety check... */ @@ -1707,11 +1721,10 @@ pg_stat_monitor_internal(FunctionCallInfo fcinfo, MemoryContextSwitchTo(oldcontext); - // LWLockAcquire(pgss->lock, LW_SHARED); + LWLockAcquire(pgss->lock, LW_SHARED); - dshash_seq_init(&hstat, get_pgssHash(), false); - - while ((entry = dshash_seq_next(&hstat)) != NULL) + hash_seq_init(&hash_seq, pgss_hash); + while ((entry = hash_seq_search(&hash_seq)) != NULL) { Datum values[PG_STAT_MONITOR_COLS] = {0}; bool nulls[PG_STAT_MONITOR_COLS] = {0}; @@ -1726,8 +1739,6 @@ pg_stat_monitor_internal(FunctionCallInfo fcinfo, uint64 userid = entry->key.userid; int64 ip = entry->key.ip; uint64 planid = entry->key.planid; - dsa_area *query_dsa_area; - char *query_ptr; #if PG_VERSION_NUM < 140000 bool toplevel = 1; bool is_allowed_role = is_member_of_role(GetUserId(), DEFAULT_ROLE_READ_ALL_STATS); @@ -1735,10 +1746,15 @@ pg_stat_monitor_internal(FunctionCallInfo fcinfo, bool is_allowed_role = is_member_of_role(GetUserId(), ROLE_PG_READ_ALL_STATS); bool toplevel = entry->key.toplevel; #endif - /* Load the query text from dsa area */ - query_dsa_area = get_dsa_area_for_query_text(); - query_ptr = dsa_get_address(query_dsa_area, entry->query_pos); - query_txt = pstrdup(query_ptr); + + if (read_query(pgss_qbuf, queryid, query_txt, entry->query_pos) == 0) + { + int rc; + + rc = read_query_buffer(bucketid, queryid, query_txt, entry->query_pos); + if (rc != 1) + snprintf(query_txt, 32, "%s", ""); + } /* copy counters to a local variable to keep locking time short */ { @@ -1766,17 +1782,15 @@ pg_stat_monitor_internal(FunctionCallInfo fcinfo, if (tmp.state == PGSS_PARSE || tmp.state == PGSS_PLAN) continue; - /* read the parent query text if any */ if (tmp.info.parentid != UINT64CONST(0)) { - if (DsaPointerIsValid(tmp.info.parent_query)) + if (read_query(pgss_qbuf, tmp.info.parentid, parent_query_txt, 0) == 0) { - query_dsa_area = get_dsa_area_for_query_text(); - query_ptr = dsa_get_address(query_dsa_area, tmp.info.parent_query); - parent_query_txt = pstrdup(query_ptr); + int rc = read_query_buffer(bucketid, tmp.info.parentid, parent_query_txt, 0); + + if (rc != 1) + snprintf(parent_query_txt, 32, "%s", ""); } - else - parent_query_txt = pstrdup("parent query text not available"); } /* bucketid at column number 0 */ values[i++] = Int64GetDatumFast(bucketid); @@ -2056,12 +2070,10 @@ pg_stat_monitor_internal(FunctionCallInfo fcinfo, tuplestore_putvalues(tupstore, tupdesc, values, nulls); } /* clean up and return the tuplestore */ - dshash_seq_term(&hstat); + LWLockRelease(pgss->lock); - if(query_txt) - pfree(query_txt); - if(parent_query_txt) - pfree(parent_query_txt); + pfree(query_txt); + pfree(parent_query_txt); tuplestore_donestoring(tupstore); } @@ -2107,6 +2119,7 @@ get_next_wbucket(pgssSharedState *pgss) if (update_bucket) { + char file_name[1024]; new_bucket_id = (tv.tv_sec / PGSM_BUCKET_TIME) % PGSM_MAX_BUCKETS; @@ -2114,7 +2127,24 @@ get_next_wbucket(pgssSharedState *pgss) prev_bucket_id = pg_atomic_exchange_u64(&pgss->current_wbucket, new_bucket_id); LWLockAcquire(pgss->lock, LW_EXCLUSIVE); - hash_entry_dealloc(new_bucket_id, prev_bucket_id, NULL); + hash_entry_dealloc(new_bucket_id, prev_bucket_id, pgss_qbuf); + + if (pgss->overflow) + { + pgss->n_bucket_cycles += 1; + if (pgss->n_bucket_cycles >= PGSM_MAX_BUCKETS) + { + /* + * A full rotation of PGSM_MAX_BUCKETS buckets happened since + * we detected a query buffer overflow. + * Reset overflow state and remove the dump file. + */ + pgss->overflow = false; + pgss->n_bucket_cycles = 0; + snprintf(file_name, 1024, "%s", PGSM_TEXT_FILE); + unlink(file_name); + } + } LWLockRelease(pgss->lock); @@ -3113,6 +3143,165 @@ intarray_get_datum(int32 arr[], int len) } +uint64 +read_query(unsigned char *buf, uint64 queryid, char *query, size_t pos) +{ + bool found = false; + uint64 query_id = 0; + uint64 query_len = 0; + uint64 rlen = 0; + uint64 buf_len = 0; + + memcpy(&buf_len, buf, sizeof(uint64)); + if (buf_len <= 0) + goto exit; + + /* If a position hint is given, try to locate the query directly. */ + if (pos != 0 && (pos + sizeof(uint64) + sizeof(uint64)) < buf_len) + { + memcpy(&query_id, &buf[pos], sizeof(uint64)); + if (query_id != queryid) + return 0; + + pos += sizeof(uint64); + + memcpy(&query_len, &buf[pos], sizeof(uint64)); /* query len */ + pos += sizeof(uint64); + + if (pos + query_len > buf_len) /* avoid reading past buffer's length. */ + return 0; + + memcpy(query, &buf[pos], query_len); /* Actual query */ + query[query_len] = '\0'; + + return queryid; + } + + rlen = sizeof(uint64); /* Move forwad to skip length bytes */ + for (;;) + { + if (rlen >= buf_len) + goto exit; + + memcpy(&query_id, &buf[rlen], sizeof(uint64)); /* query id */ + if (query_id == queryid) + found = true; + + rlen += sizeof(uint64); + if (buf_len <= rlen) + continue; + + memcpy(&query_len, &buf[rlen], sizeof(uint64)); /* query len */ + rlen += sizeof(uint64); + if (buf_len < rlen + query_len) + goto exit; + if (found) + { + if (query != NULL) + { + memcpy(query, &buf[rlen], query_len); /* Actual query */ + query[query_len] = 0; + } + return query_id; + } + rlen += query_len; + } +exit: + if (PGSM_OVERFLOW_TARGET == OVERFLOW_TARGET_NONE) + { + sprintf(query, "%s", ""); + return -1; + } + return 0; +} + +bool +SaveQueryText(uint64 bucketid, + uint64 queryid, + unsigned char *buf, + const char *query, + uint64 query_len, + size_t *query_pos) +{ + uint64 buf_len = 0; + + memcpy(&buf_len, buf, sizeof(uint64)); + if (buf_len == 0) + buf_len += sizeof(uint64); + + if (QUERY_BUFFER_OVERFLOW(buf_len, query_len)) + { + switch (PGSM_OVERFLOW_TARGET) + { + case OVERFLOW_TARGET_NONE: + return false; + case OVERFLOW_TARGET_DISK: + { + bool dump_ok; + pgssSharedState *pgss = pgsm_get_ss(); + + if (pgss->overflow) + { + elog(DEBUG1, "query buffer overflowed twice"); + return false; + } + + /* + * If the query buffer is empty, there is nothing to dump, + * this also means that the current query length exceeds + * MAX_QUERY_BUF. + */ + if (buf_len <= sizeof(uint64)) + return false; + + dump_ok = dump_queries_buffer(bucketid, buf, MAX_QUERY_BUF); + buf_len = sizeof(uint64); + + if (dump_ok) + { + pgss->overflow = true; + pgss->n_bucket_cycles = 0; + } + + /* + * We must check for overflow again, as the query length + * may exceed the total size allocated to the buffer + * (MAX_QUERY_BUF). + */ + if (QUERY_BUFFER_OVERFLOW(buf_len, query_len)) + { + /* + * If we successfully dumped the query buffer to disk, + * then reset the buffer, otherwise we could end up + * dumping the same buffer again. + */ + if (dump_ok) + *(uint64 *) buf = 0; + + return false; + } + + } + break; + default: + Assert(false); + break; + } + } + + *query_pos = buf_len; + + memcpy(&buf[buf_len], &queryid, sizeof(uint64)); /* query id */ + buf_len += sizeof(uint64); + + memcpy(&buf[buf_len], &query_len, sizeof(uint64)); /* query length */ + buf_len += sizeof(uint64); + + memcpy(&buf[buf_len], query, query_len); /* query */ + buf_len += query_len; + memcpy(buf, &buf_len, sizeof(uint64)); + return true; +} Datum pg_stat_monitor_hook_stats(PG_FUNCTION_ARGS) @@ -3120,6 +3309,12 @@ pg_stat_monitor_hook_stats(PG_FUNCTION_ARGS) return (Datum) 0; } +void +set_qbuf(unsigned char *buf) +{ + pgss_qbuf = buf; + *(uint64 *) pgss_qbuf = 0; +} void pgsm_emit_log_hook(ErrorData *edata) @@ -3156,6 +3351,145 @@ IsSystemInitialized(void) return (system_init && IsHashInitialize()); } +static bool +dump_queries_buffer(int bucket_id, unsigned char *buf, int buf_len) +{ + int fd = 0; + char file_name[1024]; + bool success = true; + int off = 0; + int tries = 0; + + snprintf(file_name, 1024, "%s", PGSM_TEXT_FILE); + fd = OpenTransientFile(file_name, O_RDWR | O_CREAT | O_APPEND | PG_BINARY); + if (fd < 0) + { + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not write file \"%s\": %m", + file_name))); + return false; + } + + /* Loop until write buf_len bytes to the file. */ + do + { + ssize_t nwrite = write(fd, buf + off, buf_len - off); + + if (nwrite == -1) + { + if (errno == EINTR && tries++ < 3) + continue; + + success = false; + break; + } + off += nwrite; + } while (off < buf_len); + + if (!success) + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not write file \"%s\": %m", file_name))); + + if (fd > 0) + CloseTransientFile(fd); + + return success; +} + +/* + * Try to locate query text in a dumped file for bucket_id. + * + * Returns: + * 1 Query sucessfully read, query_text will contain the query text. + * 0 Query not found. + * -1 I/O Error. + */ +int +read_query_buffer(int bucket_id, uint64 queryid, char *query_txt, size_t pos) +{ + int fd = 0; + char file_name[1024]; + unsigned char *buf = NULL; + ssize_t nread = 0; + int off = 0; + int tries = 0; + bool done = false; + bool found = false; + + snprintf(file_name, 1024, "%s", PGSM_TEXT_FILE); + fd = OpenTransientFile(file_name, O_RDONLY | PG_BINARY); + if (fd < 0) + goto exit; + + buf = (unsigned char *) palloc(MAX_QUERY_BUF); + while (!done) + { + off = 0; + /* read a chunck of MAX_QUERY_BUF size. */ + do + { + nread = read(fd, buf + off, MAX_QUERY_BUF - off); + if (nread == -1) + { + if (errno == EINTR && tries++ < 3) /* read() was interrupted, + * attempt to read again + * (max attempts=3) */ + continue; + + goto exit; + } + else if (nread == 0) /* EOF */ + { + done = true; + break; + } + + off += nread; + } while (off < MAX_QUERY_BUF); + + if (off == MAX_QUERY_BUF) + { + /* we have a chunck, scan it looking for queryid. */ + if (read_query(buf, queryid, query_txt, pos) != 0) + { + + found = true; + /* query was found, don't need to read another chunck. */ + break; + } + } + else + + /* + * Either done=true or file has a size not multiple of + * MAX_QUERY_BUF. It is safe to assume that the file was truncated + * or corrupted. + */ + break; + } + +exit: + if (fd < 0 || nread == -1) + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", + file_name))); + + if (fd >= 0) + CloseTransientFile(fd); + + if (buf) + pfree(buf); + + if (found) + return 1; + else if (fd == -1 || nread == -1) + return -1; /* I/O error. */ + else + return 0; /* Not found. */ +} static double time_diff(struct timeval end, struct timeval start) diff --git a/pg_stat_monitor.h b/pg_stat_monitor.h index 78d8183..dd66c9d 100644 --- a/pg_stat_monitor.h +++ b/pg_stat_monitor.h @@ -27,9 +27,6 @@ #include #include -#include "lib/dshash.h" -#include "utils/dsa.h" - #include "access/hash.h" #include "catalog/pg_authid.h" #include "executor/instrument.h" @@ -182,6 +179,20 @@ typedef struct CallTime double sum_var_time; /* sum of variances in execution time in msec */ } CallTime; +/* + * Entry type for queries hash table (query ID). + * + * We use a hash table to keep track of query IDs that have their + * corresponding query text added to the query buffer (pgsm_query_shared_buffer). + * + * This allow us to avoid adding duplicated queries to the buffer, therefore + * leaving more space for other queries and saving some CPU. + */ +typedef struct pgssQueryEntry +{ + uint64 queryid; /* query identifier, also the key. */ + size_t query_pos; /* query location within query buffer */ +} pgssQueryEntry; typedef struct PlanInfo { @@ -205,7 +216,6 @@ typedef struct pgssHashKey typedef struct QueryInfo { uint64 parentid; /* parent queryid of current query */ - dsa_pointer parent_query; int64 type; /* type of query, options are query, info, * warning, error, fatal */ char application_name[APPLICATIONNAME_LEN]; @@ -312,7 +322,7 @@ typedef struct pgssEntry Counters counters; /* the statistics for this query */ int encoding; /* query text encoding */ slock_t mutex; /* protects the counters only */ - dsa_pointer query_pos; /* query location within query buffer */ + size_t query_pos; /* query location within query buffer */ } pgssEntry; /* @@ -343,19 +353,10 @@ typedef struct pgssSharedState * This allows us to avoid having a large file on disk that would also * slowdown queries to the pg_stat_monitor view. */ + bool overflow; size_t n_bucket_cycles; - int hash_tranche_id; - void *raw_dsa_area; - dshash_table_handle hash_handle; } pgssSharedState; -typedef struct pgsmLocalState -{ - pgssSharedState *shared_pgssState; - dsa_area *dsa; - dshash_table *shared_hash; -}pgsmLocalState; - #define ResetSharedState(x) \ do { \ x->cur_median_usage = ASSUMED_MEDIAN_INIT; \ @@ -417,22 +418,27 @@ void init_guc(void); GucVariable *get_conf(int i); /* hash_create.c */ -dsa_area *get_dsa_area_for_query_text(void); -dshash_table *get_pgssHash(void); -void pgsm_attach_shmem(void); bool IsHashInitialize(void); void pgss_shmem_startup(void); void pgss_shmem_shutdown(int code, Datum arg); int pgsm_get_bucket_size(void); pgssSharedState *pgsm_get_ss(void); +HTAB *pgsm_get_plan_hash(void); +HTAB *pgsm_get_hash(void); +HTAB *pgsm_get_query_hash(void); +HTAB *pgsm_get_plan_hash(void); void hash_entry_reset(void); void hash_query_entryies_reset(void); void hash_query_entries(); void hash_query_entry_dealloc(int new_bucket_id, int old_bucket_id, unsigned char *query_buffer[]); void hash_entry_dealloc(int new_bucket_id, int old_bucket_id, unsigned char *query_buffer); pgssEntry *hash_entry_alloc(pgssSharedState *pgss, pgssHashKey *key, int encoding); -Size pgsm_ShmemSize(void); +Size hash_memsize(void); + +int read_query_buffer(int bucket_id, uint64 queryid, char *query_txt, size_t pos); +uint64 read_query(unsigned char *buf, uint64 queryid, char *query, size_t pos); void pgss_startup(void); +void set_qbuf(unsigned char *); /* hash_query.c */ void pgss_startup(void); From 0656d5f22df320ee9847e41a4be3c55a24422522 Mon Sep 17 00:00:00 2001 From: Naeem Akhter Date: Fri, 23 Dec 2022 01:00:13 +0500 Subject: [PATCH 15/15] PG-354: Update expected output file for functions testcase. --- regression/expected/functions.out | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/regression/expected/functions.out b/regression/expected/functions.out index 2e94b48..2a9c5c0 100644 --- a/regression/expected/functions.out +++ b/regression/expected/functions.out @@ -17,7 +17,6 @@ SELECT routine_schema, routine_name, routine_type, data_type FROM information_sc public | histogram | FUNCTION | record public | pg_stat_monitor_internal | FUNCTION | record public | pg_stat_monitor_reset | FUNCTION | void - public | pg_stat_monitor_settings | FUNCTION | record public | pg_stat_monitor_version | FUNCTION | text public | pgsm_create_11_view | FUNCTION | integer public | pgsm_create_13_view | FUNCTION | integer @@ -25,7 +24,7 @@ SELECT routine_schema, routine_name, routine_type, data_type FROM information_sc public | pgsm_create_15_view | FUNCTION | integer public | pgsm_create_view | FUNCTION | integer public | range | FUNCTION | ARRAY -(14 rows) +(13 rows) SET ROLE u1; SELECT routine_schema, routine_name, routine_type, data_type FROM information_schema.routines WHERE routine_schema = 'public' ORDER BY routine_name COLLATE "C";