474 lines
13 KiB
Markdown
474 lines
13 KiB
Markdown
# pg_stat_monitor Low-Memory Fork: Phased Implementation Plan
|
|
|
|
## 🎯 **Objective**
|
|
Create a fork that stores data in partitioned tables with materialized views, ultimately reducing memory from 276MB → 1-3MB through phased, reviewable changes.
|
|
|
|
## 📊 **Memory Reduction Stages**
|
|
|
|
### **Current State**
|
|
- Default: 256MB + 20MB = **276MB total**
|
|
|
|
### **Phase 1: Configuration Only**
|
|
- `pgsm_max`: 256MB → 10MB (minimum allowed)
|
|
- `pgsm_max_buckets`: 10 → 2
|
|
- `pgsm_query_shared_buffer`: 20MB → 1MB
|
|
- **Result**: 11MB (96% reduction)
|
|
|
|
### **Phase 3: Clear After Export**
|
|
- Keep only current bucket in memory
|
|
- Clear immediately after export to table
|
|
- **Target**: 1-3MB (99% reduction)
|
|
|
|
## 🏗️ **Phased Implementation Plan**
|
|
|
|
---
|
|
|
|
# **PHASE 1: Table Storage (2 hours)**
|
|
*Git diff: ~150 lines in new file + 2 line hook*
|
|
|
|
## **Objective**
|
|
Export statistics to partitioned table on bucket rotation without changing memory usage.
|
|
|
|
## **Changes**
|
|
|
|
### **1.1: New File** (`pgsm_table_export.c`)
|
|
|
|
```c
|
|
/*
|
|
* pgsm_table_export.c - Export pg_stat_monitor data to partitioned tables
|
|
* Phase 1: Basic table export functionality
|
|
*/
|
|
#include "postgres.h"
|
|
#include "pg_stat_monitor.h"
|
|
#include "executor/spi.h"
|
|
#include "utils/builtins.h"
|
|
#include "utils/timestamp.h"
|
|
|
|
/* GUC variable */
|
|
bool pgsm_enable_table_export = true;
|
|
|
|
/* External references */
|
|
extern HTAB *pgsm_hash;
|
|
|
|
/*
|
|
* Export bucket data to partitioned table
|
|
* Called from get_next_wbucket() before bucket cleanup
|
|
*/
|
|
void
|
|
pgsm_export_bucket_to_table(uint64 bucket_id)
|
|
{
|
|
HASH_SEQ_STATUS hash_seq;
|
|
pgsmEntry *entry;
|
|
StringInfoData sql;
|
|
int ret, exported = 0;
|
|
bool first = true;
|
|
|
|
/* Quick exit if disabled */
|
|
if (!pgsm_enable_table_export)
|
|
return;
|
|
|
|
/* Check table exists (will be created by SQL migration) */
|
|
SPI_connect();
|
|
ret = SPI_execute("SELECT 1 FROM pg_tables WHERE tablename = 'pg_stat_monitor_data'",
|
|
true, 1);
|
|
if (ret != SPI_OK_SELECT || SPI_processed == 0) {
|
|
SPI_finish();
|
|
return; /* Table doesn't exist yet */
|
|
}
|
|
SPI_finish();
|
|
|
|
/* Build batch INSERT */
|
|
initStringInfo(&sql);
|
|
appendStringInfo(&sql,
|
|
"INSERT INTO pg_stat_monitor_data "
|
|
"(bucket_id, queryid, dbid, userid, calls, rows, "
|
|
"total_time, mean_time, min_time, max_time, "
|
|
"shared_blks_hit, shared_blks_read) VALUES ");
|
|
|
|
/* Export all entries from this bucket */
|
|
hash_seq_init(&hash_seq, pgsm_hash);
|
|
while ((entry = hash_seq_search(&hash_seq)) != NULL)
|
|
{
|
|
if (entry->key.bucket_id != bucket_id)
|
|
continue;
|
|
|
|
if (!first)
|
|
appendStringInfoChar(&sql, ',');
|
|
first = false;
|
|
|
|
/* Add entry data */
|
|
appendStringInfo(&sql,
|
|
"(%lu, %lu, %u, %u, %ld, %ld, "
|
|
"%.3f, %.3f, %.3f, %.3f, %ld, %ld)",
|
|
bucket_id,
|
|
entry->pgsm_query_id,
|
|
entry->key.dbid,
|
|
entry->key.userid,
|
|
entry->counters.calls.calls,
|
|
entry->counters.calls.rows,
|
|
entry->counters.time.total_time,
|
|
entry->counters.time.mean_time,
|
|
entry->counters.time.min_time,
|
|
entry->counters.time.max_time,
|
|
entry->counters.blocks.shared_blks_hit,
|
|
entry->counters.blocks.shared_blks_read);
|
|
|
|
exported++;
|
|
}
|
|
|
|
/* Execute if we have data */
|
|
if (exported > 0)
|
|
{
|
|
SPI_connect();
|
|
ret = SPI_execute(sql.data, false, 0);
|
|
SPI_finish();
|
|
|
|
elog(DEBUG1, "pg_stat_monitor: exported %d entries from bucket %lu",
|
|
exported, bucket_id);
|
|
}
|
|
|
|
pfree(sql.data);
|
|
}
|
|
```
|
|
|
|
### **1.2: Hook Addition** (`pg_stat_monitor.c`)
|
|
|
|
```diff
|
|
+ /* External function from pgsm_table_export.c */
|
|
+ extern void pgsm_export_bucket_to_table(uint64 bucket_id);
|
|
|
|
static uint64
|
|
get_next_wbucket(pgsmSharedState *pgsm)
|
|
{
|
|
// ... line ~2570 ...
|
|
if (update_bucket)
|
|
{
|
|
new_bucket_id = (tv.tv_sec / pgsm_bucket_time) % pgsm_max_buckets;
|
|
prev_bucket_id = pg_atomic_exchange_u64(&pgsm->current_wbucket, new_bucket_id);
|
|
|
|
pgsm_lock_aquire(pgsm, LW_EXCLUSIVE);
|
|
|
|
+ /* Export bucket data before deallocation */
|
|
+ pgsm_export_bucket_to_table(new_bucket_id);
|
|
|
|
hash_entry_dealloc(new_bucket_id, prev_bucket_id, NULL);
|
|
pgsm_lock_release(pgsm);
|
|
}
|
|
```
|
|
|
|
### **1.3: SQL Migration** (`pg_stat_monitor--2.0--2.1.sql`)
|
|
|
|
```sql
|
|
-- Phase 1: Create partitioned table for data export
|
|
CREATE TABLE IF NOT EXISTS pg_stat_monitor_data (
|
|
bucket_id bigint,
|
|
queryid bigint,
|
|
dbid oid,
|
|
userid oid,
|
|
calls bigint,
|
|
rows bigint,
|
|
total_time double precision,
|
|
mean_time double precision,
|
|
min_time double precision,
|
|
max_time double precision,
|
|
shared_blks_hit bigint,
|
|
shared_blks_read bigint,
|
|
exported_at timestamptz DEFAULT now()
|
|
) PARTITION BY RANGE (exported_at);
|
|
|
|
-- Create initial partition for today
|
|
CREATE TABLE pg_stat_monitor_data_default
|
|
PARTITION OF pg_stat_monitor_data DEFAULT;
|
|
|
|
-- Create indexes
|
|
CREATE INDEX ON pg_stat_monitor_data (queryid);
|
|
CREATE INDEX ON pg_stat_monitor_data (exported_at);
|
|
|
|
-- Reduce memory usage via configuration
|
|
ALTER SYSTEM SET pg_stat_monitor.pgsm_max = '10MB';
|
|
ALTER SYSTEM SET pg_stat_monitor.pgsm_max_buckets = 2;
|
|
ALTER SYSTEM SET pg_stat_monitor.pgsm_query_shared_buffer = '1MB';
|
|
```
|
|
|
|
### **1.4: Makefile Update**
|
|
|
|
```diff
|
|
- OBJS = hash_query.o guc.o pg_stat_monitor.o $(WIN32RES)
|
|
+ OBJS = hash_query.o guc.o pg_stat_monitor.o pgsm_table_export.o $(WIN32RES)
|
|
```
|
|
|
|
### **1.5: GUC Addition** (`guc.c`)
|
|
|
|
```diff
|
|
+ /* Declared in pgsm_table_export.c */
|
|
+ extern bool pgsm_enable_table_export;
|
|
|
|
void
|
|
init_guc(void)
|
|
{
|
|
// ... existing GUCs ...
|
|
|
|
+ DefineCustomBoolVariable("pg_stat_monitor.pgsm_enable_table_export",
|
|
+ "Enable export to partitioned tables",
|
|
+ NULL,
|
|
+ &pgsm_enable_table_export,
|
|
+ true,
|
|
+ PGC_SIGHUP,
|
|
+ 0,
|
|
+ NULL, NULL, NULL);
|
|
}
|
|
|
|
## **Phase 1 Testing**
|
|
|
|
```sql
|
|
-- Verify export is working
|
|
SELECT count(*) FROM pg_stat_monitor_data;
|
|
|
|
-- Check data contents
|
|
SELECT queryid, calls, total_time FROM pg_stat_monitor_data LIMIT 10;
|
|
```
|
|
|
|
---
|
|
|
|
# **PHASE 2: Materialized View with Bucket-Synced Refresh (1 hour)**
|
|
*Git diff: ~30 lines added to existing file*
|
|
|
|
## **Objective**
|
|
Replace existing view with materialized view that refreshes after each bucket export (synchronized with `pgsm_bucket_time`).
|
|
|
|
## **Changes**
|
|
|
|
### **2.1: Add Refresh Function** (`pgsm_table_export.c`)
|
|
|
|
```c
|
|
/*
|
|
* Refresh materialized view after bucket export
|
|
* This keeps the view in sync with the data export schedule
|
|
*/
|
|
void
|
|
pgsm_refresh_materialized_view(void)
|
|
{
|
|
int ret;
|
|
|
|
/* Skip if table export disabled */
|
|
if (!pgsm_enable_table_export)
|
|
return;
|
|
|
|
/* Check if materialized view exists */
|
|
SPI_connect();
|
|
ret = SPI_execute("SELECT 1 FROM pg_matviews WHERE matviewname = 'pg_stat_monitor'",
|
|
true, 1);
|
|
if (ret != SPI_OK_SELECT || SPI_processed == 0) {
|
|
SPI_finish();
|
|
return; /* Materialized view doesn't exist yet */
|
|
}
|
|
|
|
/* Refresh the view (CONCURRENTLY to avoid blocking) */
|
|
ret = SPI_execute("REFRESH MATERIALIZED VIEW CONCURRENTLY pg_stat_monitor",
|
|
false, 0);
|
|
if (ret == SPI_OK_UTILITY)
|
|
{
|
|
elog(DEBUG1, "pg_stat_monitor: refreshed materialized view");
|
|
}
|
|
SPI_finish();
|
|
}
|
|
```
|
|
|
|
### **2.2: Hook After Export** (`pg_stat_monitor.c`)
|
|
|
|
```diff
|
|
+ extern void pgsm_refresh_materialized_view(void);
|
|
|
|
static uint64
|
|
get_next_wbucket(pgsmSharedState *pgsm)
|
|
{
|
|
if (update_bucket)
|
|
{
|
|
new_bucket_id = (tv.tv_sec / pgsm_bucket_time) % pgsm_max_buckets;
|
|
prev_bucket_id = pg_atomic_exchange_u64(&pgsm->current_wbucket, new_bucket_id);
|
|
|
|
pgsm_lock_aquire(pgsm, LW_EXCLUSIVE);
|
|
|
|
/* Export bucket data before deallocation */
|
|
pgsm_export_bucket_to_table(new_bucket_id);
|
|
|
|
+ /* Refresh materialized view after export (same timing as bucket) */
|
|
+ pgsm_refresh_materialized_view();
|
|
|
|
hash_entry_dealloc(new_bucket_id, prev_bucket_id, NULL);
|
|
pgsm_lock_release(pgsm);
|
|
}
|
|
}
|
|
```
|
|
|
|
### **2.3: SQL Migration** (`pg_stat_monitor--2.1--2.2.sql`)
|
|
|
|
```sql
|
|
-- Phase 2: Replace view with materialized view
|
|
|
|
-- Save existing view definition
|
|
CREATE OR REPLACE VIEW pg_stat_monitor_old AS
|
|
SELECT * FROM pg_stat_monitor;
|
|
|
|
-- Drop existing view
|
|
DROP VIEW IF EXISTS pg_stat_monitor;
|
|
|
|
-- Create materialized view from table data
|
|
CREATE MATERIALIZED VIEW pg_stat_monitor AS
|
|
SELECT
|
|
bucket_id,
|
|
queryid,
|
|
dbid,
|
|
userid,
|
|
calls,
|
|
rows,
|
|
total_time,
|
|
mean_time,
|
|
min_time,
|
|
max_time,
|
|
shared_blks_hit,
|
|
shared_blks_read
|
|
FROM pg_stat_monitor_data
|
|
WHERE exported_at > now() - interval '24 hours';
|
|
|
|
-- Create unique index for CONCURRENT refresh
|
|
CREATE UNIQUE INDEX ON pg_stat_monitor (bucket_id, queryid, dbid, userid);
|
|
|
|
-- Initial population
|
|
REFRESH MATERIALIZED VIEW pg_stat_monitor;
|
|
```
|
|
|
|
## **Phase 2 Testing**
|
|
|
|
```sql
|
|
-- Query the materialized view
|
|
SELECT * FROM pg_stat_monitor LIMIT 10;
|
|
|
|
-- Verify refresh timing matches bucket rotation
|
|
-- With default pgsm_bucket_time=60, view should refresh every 60 seconds
|
|
-- Check PostgreSQL logs for: "pg_stat_monitor: refreshed materialized view"
|
|
|
|
-- Test with different bucket timing
|
|
ALTER SYSTEM SET pg_stat_monitor.pgsm_bucket_time = 30; -- 30-second buckets
|
|
SELECT pg_reload_conf();
|
|
-- Now view should refresh every 30 seconds
|
|
```
|
|
|
|
---
|
|
|
|
# **PHASE 3: Memory Reduction to 1-3MB (2 hours)**
|
|
*Git diff: ~100 lines, mostly in isolated functions*
|
|
|
|
## **Objective**
|
|
Clear buckets immediately after export to achieve 1-3MB memory usage.
|
|
|
|
## **Changes**
|
|
|
|
### **3.1: Immediate Clear After Export** (`pgsm_table_export.c`)
|
|
|
|
```c
|
|
/*
|
|
* Clear all entries from exported bucket
|
|
* This allows us to keep only current data in memory
|
|
*/
|
|
void
|
|
pgsm_clear_bucket_after_export(uint64 bucket_id)
|
|
{
|
|
HASH_SEQ_STATUS hash_seq;
|
|
pgsmEntry *entry;
|
|
int cleared = 0;
|
|
|
|
/* Iterate and remove entries from exported bucket */
|
|
hash_seq_init(&hash_seq, pgsm_hash);
|
|
while ((entry = hash_seq_search(&hash_seq)) != NULL)
|
|
{
|
|
if (entry->key.bucket_id == bucket_id)
|
|
{
|
|
/* Remove from hash table */
|
|
hash_search(pgsm_hash, &entry->key, HASH_REMOVE, NULL);
|
|
cleared++;
|
|
}
|
|
}
|
|
|
|
elog(DEBUG1, "pg_stat_monitor: cleared %d entries from bucket %lu",
|
|
cleared, bucket_id);
|
|
}
|
|
```
|
|
|
|
### **3.2: Modify Export Flow** (`pg_stat_monitor.c`)
|
|
|
|
```diff
|
|
static uint64
|
|
get_next_wbucket(pgsmSharedState *pgsm)
|
|
{
|
|
if (update_bucket)
|
|
{
|
|
new_bucket_id = (tv.tv_sec / pgsm_bucket_time) % pgsm_max_buckets;
|
|
prev_bucket_id = pg_atomic_exchange_u64(&pgsm->current_wbucket, new_bucket_id);
|
|
|
|
pgsm_lock_aquire(pgsm, LW_EXCLUSIVE);
|
|
|
|
/* Export bucket data before deallocation */
|
|
pgsm_export_bucket_to_table(new_bucket_id);
|
|
|
|
+ /* Clear exported bucket immediately to free memory */
|
|
+ extern void pgsm_clear_bucket_after_export(uint64);
|
|
+ pgsm_clear_bucket_after_export(new_bucket_id);
|
|
|
|
- hash_entry_dealloc(new_bucket_id, prev_bucket_id, NULL);
|
|
+ /* Skip normal dealloc since we already cleared */
|
|
|
|
pgsm_lock_release(pgsm);
|
|
}
|
|
}
|
|
```
|
|
|
|
### **3.3: Single Bucket Mode** (`guc.c`)
|
|
|
|
```diff
|
|
void
|
|
init_guc(void)
|
|
{
|
|
+ /* Override to single bucket for minimal memory */
|
|
+ if (pgsm_enable_table_export)
|
|
+ pgsm_max_buckets = 1; /* Only current bucket needed */
|
|
```
|
|
|
|
### **3.4: Further Memory Reduction**
|
|
|
|
```sql
|
|
-- Phase 3: Aggressive memory reduction
|
|
ALTER SYSTEM SET pg_stat_monitor.pgsm_max = '1MB'; -- Minimum possible
|
|
ALTER SYSTEM SET pg_stat_monitor.pgsm_max_buckets = 1; -- Single bucket
|
|
ALTER SYSTEM SET pg_stat_monitor.pgsm_query_shared_buffer = '100kB'; -- Tiny buffer
|
|
```
|
|
|
|
## **Phase 3 Testing**
|
|
|
|
```sql
|
|
-- Check memory usage
|
|
SELECT name, size FROM pg_shmem_allocations
|
|
WHERE name LIKE '%pg_stat_monitor%';
|
|
|
|
-- Should show ~1-3MB instead of 276MB
|
|
```
|
|
|
|
---
|
|
|
|
## **Summary of Changes**
|
|
|
|
| Phase | Files Changed | Lines Added | Memory Usage | Feature |
|
|
|-------|--------------|-------------|--------------|---------|
|
|
| **Phase 1** | 4 files | ~150 lines | 11MB | Table export |
|
|
| **Phase 2** | 2 files | ~30 lines | 11MB | Mat view + bucket-synced refresh |
|
|
| **Phase 3** | 3 files | ~100 lines | **1-3MB** | Clear after export |
|
|
|
|
**Total Git Diff: ~300 lines across 5 files**
|
|
|
|
## **Key Benefits**
|
|
|
|
1. **Phased approach** - Each phase can be reviewed/tested independently
|
|
2. **Minimal core changes** - Hooks into existing bucket rotation
|
|
3. **No cron needed** - Uses existing function calls for timing
|
|
4. **Easy rollback** - Can disable with single GUC
|
|
5. **Rebase friendly** - Changes isolated in new file + minimal hooks |