From 2ede755107fc8389ef3b38a0e62f67ce4ae2fc93 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 5 Oct 2020 10:34:52 -0700 Subject: [PATCH] Initial version of VACUUM --- cstore_tableam.c | 137 ++++++++++++++++++++++++++++++++++++++++- expected/am_vacuum.out | 52 ++++++++++++++++ sql/am_vacuum.sql | 20 ++++++ 3 files changed, 208 insertions(+), 1 deletion(-) diff --git a/cstore_tableam.c b/cstore_tableam.c index 39a0695e2..59df86fb2 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -32,6 +32,7 @@ #include "storage/procarray.h" #include "storage/smgr.h" #include "utils/builtins.h" +#include "utils/pg_rusage.h" #include "utils/rel.h" #include "utils/syscache.h" @@ -40,6 +41,15 @@ #define CSTORE_TABLEAM_NAME "cstore_tableam" +/* + * Timing parameters for truncate locking heuristics. + * + * These are the same values from src/backend/access/heap/vacuumlazy.c + */ +#define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */ +#define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */ +#define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */ + typedef struct CStoreScanDescData { TableScanDescData cs_base; @@ -59,6 +69,9 @@ static void CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, void *arg); static bool IsCStoreTableAmTable(Oid relationId); + +static void TruncateCStore(Relation rel, int elevel); + static CStoreOptions * CStoreTableAMGetOptions(void) { @@ -575,6 +588,128 @@ cstore_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, } +/* + * cstore_vacuum_rel implements VACUUM without FULL option. + */ +static void +cstore_vacuum_rel(Relation rel, VacuumParams *params, + BufferAccessStrategy bstrategy) +{ + int elevel = (params->options & VACOPT_VERBOSE) ? INFO : DEBUG2; + + /* this should have been resolved by vacuum.c until now */ + Assert(params->truncate != VACOPT_TERNARY_DEFAULT); + + /* + * We don't have updates, deletes, or concurrent updates, so all we + * care for now is truncating the unused space at the end of storage. + */ + if (params->truncate == VACOPT_TERNARY_ENABLED) + { + TruncateCStore(rel, elevel); + } +} + + +/* + * TruncateCStore truncates the unused space at the end of main fork for + * a cstore table. This unused space can be created by aborted transactions. + * + * This implementation is based on heap_vacuum_rel in vacuumlazy.c with some + * changes so it suits columnar store relations. + */ +static void +TruncateCStore(Relation rel, int elevel) +{ + PGRUsage ru0; + int lock_retry = 0; + BlockNumber old_rel_pages = 0; + BlockNumber new_rel_pages = 0; + DataFileMetadata *metadata = NULL; + ListCell *stripeMetadataCell = NULL; + + pg_rusage_init(&ru0); + + /* Report that we are now truncating */ + pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, + PROGRESS_VACUUM_PHASE_TRUNCATE); + + /* + * We need an ExclusiveLock to do the truncation. + * Loop until we acquire a lock or retry threshold is reached. + */ + while (true) + { + if (ConditionalLockRelation(rel, AccessExclusiveLock)) + { + break; + } + + /* + * Check for interrupts while trying to (re-)acquire the exclusive + * lock. + */ + CHECK_FOR_INTERRUPTS(); + + if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT / + VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL)) + { + /* + * We failed to establish the lock in the specified number of + * retries. This means we give up truncating. + */ + ereport(elevel, + (errmsg("\"%s\": stopping truncate due to conflicting lock request", + RelationGetRelationName(rel)))); + return; + } + + pg_usleep(VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL * 1000L); + } + + RelationOpenSmgr(rel); + old_rel_pages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); + RelationCloseSmgr(rel); + + /* loop over stripes and find max used block */ + metadata = ReadDataFileMetadata(rel->rd_node.relNode); + foreach(stripeMetadataCell, metadata->stripeMetadataList) + { + StripeMetadata *stripe = lfirst(stripeMetadataCell); + uint64 lastByte = stripe->fileOffset + stripe->dataLength - 1; + SmgrAddr addr = logical_to_smgr(lastByte); + new_rel_pages = Max(new_rel_pages, addr.blockno + 1); + } + + if (new_rel_pages == old_rel_pages) + { + UnlockRelation(rel, AccessExclusiveLock); + return; + } + + /* + * Truncate the storage. Note that RelationTruncate() takes care of + * Write Ahead Logging. + */ + RelationTruncate(rel, new_rel_pages); + + /* + * We can release the exclusive lock as soon as we have truncated. + * Other backends can't safely access the relation until they have + * processed the smgr invalidation that smgrtruncate sent out ... but + * that should happen as part of standard invalidation processing once + * they acquire lock on the relation. + */ + UnlockRelation(rel, AccessExclusiveLock); + + ereport(elevel, + (errmsg("\"%s\": truncated %u to %u pages", + RelationGetRelationName(rel), + old_rel_pages, new_rel_pages), + errdetail_internal("%s", pg_rusage_show(&ru0)))); +} + + static bool cstore_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno, BufferAccessStrategy bstrategy) @@ -853,7 +988,7 @@ static const TableAmRoutine cstore_am_methods = { .relation_nontransactional_truncate = cstore_relation_nontransactional_truncate, .relation_copy_data = cstore_relation_copy_data, .relation_copy_for_cluster = cstore_relation_copy_for_cluster, - .relation_vacuum = heap_vacuum_rel, + .relation_vacuum = cstore_vacuum_rel, .scan_analyze_next_block = cstore_scan_analyze_next_block, .scan_analyze_next_tuple = cstore_scan_analyze_next_tuple, .index_build_range_scan = cstore_index_build_range_scan, diff --git a/expected/am_vacuum.out b/expected/am_vacuum.out index dbeddca2b..7a1ff2777 100644 --- a/expected/am_vacuum.out +++ b/expected/am_vacuum.out @@ -95,6 +95,58 @@ SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; 1 (1 row) +-- do this in a transaction so concurrent autovacuum doesn't interfere with results +BEGIN; +SAVEPOINT s1; +SELECT count(*) FROM t; + count +------- + 2530 +(1 row) + +SELECT pg_size_pretty(pg_relation_size('t')); + pg_size_pretty +---------------- + 16 kB +(1 row) + +INSERT INTO t SELECT i FROM generate_series(1, 10000) i; +SELECT pg_size_pretty(pg_relation_size('t')); + pg_size_pretty +---------------- + 56 kB +(1 row) + +SELECT count(*) FROM t; + count +------- + 12530 +(1 row) + +ROLLBACK TO SAVEPOINT s1; +-- not truncated by VACUUM or autovacuum yet (being in transaction ensures this), +-- so relation size should be same as before. +SELECT pg_size_pretty(pg_relation_size('t')); + pg_size_pretty +---------------- + 56 kB +(1 row) + +COMMIT; +-- vacuum should truncate the relation to the usable space +VACUUM t; +SELECT pg_size_pretty(pg_relation_size('t')); + pg_size_pretty +---------------- + 16 kB +(1 row) + +SELECT count(*) FROM t; + count +------- + 2530 +(1 row) + DROP TABLE t; -- Make sure we cleaned the metadata for t too SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; diff --git a/sql/am_vacuum.sql b/sql/am_vacuum.sql index 8cb70167d..10d1c7f6c 100644 --- a/sql/am_vacuum.sql +++ b/sql/am_vacuum.sql @@ -41,6 +41,26 @@ SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cs -- Make sure we cleaned-up the transient table metadata after VACUUM FULL commands SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; +-- do this in a transaction so concurrent autovacuum doesn't interfere with results +BEGIN; +SAVEPOINT s1; +SELECT count(*) FROM t; +SELECT pg_size_pretty(pg_relation_size('t')); +INSERT INTO t SELECT i FROM generate_series(1, 10000) i; +SELECT pg_size_pretty(pg_relation_size('t')); +SELECT count(*) FROM t; +ROLLBACK TO SAVEPOINT s1; + +-- not truncated by VACUUM or autovacuum yet (being in transaction ensures this), +-- so relation size should be same as before. +SELECT pg_size_pretty(pg_relation_size('t')); +COMMIT; + +-- vacuum should truncate the relation to the usable space +VACUUM t; +SELECT pg_size_pretty(pg_relation_size('t')); +SELECT count(*) FROM t; + DROP TABLE t; -- Make sure we cleaned the metadata for t too