From 2ede755107fc8389ef3b38a0e62f67ce4ae2fc93 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 5 Oct 2020 10:34:52 -0700 Subject: [PATCH 1/6] Initial version of VACUUM --- cstore_tableam.c | 137 ++++++++++++++++++++++++++++++++++++++++- expected/am_vacuum.out | 52 ++++++++++++++++ sql/am_vacuum.sql | 20 ++++++ 3 files changed, 208 insertions(+), 1 deletion(-) diff --git a/cstore_tableam.c b/cstore_tableam.c index 39a0695e2..59df86fb2 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -32,6 +32,7 @@ #include "storage/procarray.h" #include "storage/smgr.h" #include "utils/builtins.h" +#include "utils/pg_rusage.h" #include "utils/rel.h" #include "utils/syscache.h" @@ -40,6 +41,15 @@ #define CSTORE_TABLEAM_NAME "cstore_tableam" +/* + * Timing parameters for truncate locking heuristics. + * + * These are the same values from src/backend/access/heap/vacuumlazy.c + */ +#define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */ +#define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */ +#define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */ + typedef struct CStoreScanDescData { TableScanDescData cs_base; @@ -59,6 +69,9 @@ static void CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, void *arg); static bool IsCStoreTableAmTable(Oid relationId); + +static void TruncateCStore(Relation rel, int elevel); + static CStoreOptions * CStoreTableAMGetOptions(void) { @@ -575,6 +588,128 @@ cstore_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, } +/* + * cstore_vacuum_rel implements VACUUM without FULL option. + */ +static void +cstore_vacuum_rel(Relation rel, VacuumParams *params, + BufferAccessStrategy bstrategy) +{ + int elevel = (params->options & VACOPT_VERBOSE) ? INFO : DEBUG2; + + /* this should have been resolved by vacuum.c until now */ + Assert(params->truncate != VACOPT_TERNARY_DEFAULT); + + /* + * We don't have updates, deletes, or concurrent updates, so all we + * care for now is truncating the unused space at the end of storage. + */ + if (params->truncate == VACOPT_TERNARY_ENABLED) + { + TruncateCStore(rel, elevel); + } +} + + +/* + * TruncateCStore truncates the unused space at the end of main fork for + * a cstore table. This unused space can be created by aborted transactions. + * + * This implementation is based on heap_vacuum_rel in vacuumlazy.c with some + * changes so it suits columnar store relations. + */ +static void +TruncateCStore(Relation rel, int elevel) +{ + PGRUsage ru0; + int lock_retry = 0; + BlockNumber old_rel_pages = 0; + BlockNumber new_rel_pages = 0; + DataFileMetadata *metadata = NULL; + ListCell *stripeMetadataCell = NULL; + + pg_rusage_init(&ru0); + + /* Report that we are now truncating */ + pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, + PROGRESS_VACUUM_PHASE_TRUNCATE); + + /* + * We need an ExclusiveLock to do the truncation. + * Loop until we acquire a lock or retry threshold is reached. + */ + while (true) + { + if (ConditionalLockRelation(rel, AccessExclusiveLock)) + { + break; + } + + /* + * Check for interrupts while trying to (re-)acquire the exclusive + * lock. + */ + CHECK_FOR_INTERRUPTS(); + + if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT / + VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL)) + { + /* + * We failed to establish the lock in the specified number of + * retries. This means we give up truncating. + */ + ereport(elevel, + (errmsg("\"%s\": stopping truncate due to conflicting lock request", + RelationGetRelationName(rel)))); + return; + } + + pg_usleep(VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL * 1000L); + } + + RelationOpenSmgr(rel); + old_rel_pages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); + RelationCloseSmgr(rel); + + /* loop over stripes and find max used block */ + metadata = ReadDataFileMetadata(rel->rd_node.relNode); + foreach(stripeMetadataCell, metadata->stripeMetadataList) + { + StripeMetadata *stripe = lfirst(stripeMetadataCell); + uint64 lastByte = stripe->fileOffset + stripe->dataLength - 1; + SmgrAddr addr = logical_to_smgr(lastByte); + new_rel_pages = Max(new_rel_pages, addr.blockno + 1); + } + + if (new_rel_pages == old_rel_pages) + { + UnlockRelation(rel, AccessExclusiveLock); + return; + } + + /* + * Truncate the storage. Note that RelationTruncate() takes care of + * Write Ahead Logging. + */ + RelationTruncate(rel, new_rel_pages); + + /* + * We can release the exclusive lock as soon as we have truncated. + * Other backends can't safely access the relation until they have + * processed the smgr invalidation that smgrtruncate sent out ... but + * that should happen as part of standard invalidation processing once + * they acquire lock on the relation. + */ + UnlockRelation(rel, AccessExclusiveLock); + + ereport(elevel, + (errmsg("\"%s\": truncated %u to %u pages", + RelationGetRelationName(rel), + old_rel_pages, new_rel_pages), + errdetail_internal("%s", pg_rusage_show(&ru0)))); +} + + static bool cstore_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno, BufferAccessStrategy bstrategy) @@ -853,7 +988,7 @@ static const TableAmRoutine cstore_am_methods = { .relation_nontransactional_truncate = cstore_relation_nontransactional_truncate, .relation_copy_data = cstore_relation_copy_data, .relation_copy_for_cluster = cstore_relation_copy_for_cluster, - .relation_vacuum = heap_vacuum_rel, + .relation_vacuum = cstore_vacuum_rel, .scan_analyze_next_block = cstore_scan_analyze_next_block, .scan_analyze_next_tuple = cstore_scan_analyze_next_tuple, .index_build_range_scan = cstore_index_build_range_scan, diff --git a/expected/am_vacuum.out b/expected/am_vacuum.out index dbeddca2b..7a1ff2777 100644 --- a/expected/am_vacuum.out +++ b/expected/am_vacuum.out @@ -95,6 +95,58 @@ SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; 1 (1 row) +-- do this in a transaction so concurrent autovacuum doesn't interfere with results +BEGIN; +SAVEPOINT s1; +SELECT count(*) FROM t; + count +------- + 2530 +(1 row) + +SELECT pg_size_pretty(pg_relation_size('t')); + pg_size_pretty +---------------- + 16 kB +(1 row) + +INSERT INTO t SELECT i FROM generate_series(1, 10000) i; +SELECT pg_size_pretty(pg_relation_size('t')); + pg_size_pretty +---------------- + 56 kB +(1 row) + +SELECT count(*) FROM t; + count +------- + 12530 +(1 row) + +ROLLBACK TO SAVEPOINT s1; +-- not truncated by VACUUM or autovacuum yet (being in transaction ensures this), +-- so relation size should be same as before. +SELECT pg_size_pretty(pg_relation_size('t')); + pg_size_pretty +---------------- + 56 kB +(1 row) + +COMMIT; +-- vacuum should truncate the relation to the usable space +VACUUM t; +SELECT pg_size_pretty(pg_relation_size('t')); + pg_size_pretty +---------------- + 16 kB +(1 row) + +SELECT count(*) FROM t; + count +------- + 2530 +(1 row) + DROP TABLE t; -- Make sure we cleaned the metadata for t too SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; diff --git a/sql/am_vacuum.sql b/sql/am_vacuum.sql index 8cb70167d..10d1c7f6c 100644 --- a/sql/am_vacuum.sql +++ b/sql/am_vacuum.sql @@ -41,6 +41,26 @@ SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cs -- Make sure we cleaned-up the transient table metadata after VACUUM FULL commands SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; +-- do this in a transaction so concurrent autovacuum doesn't interfere with results +BEGIN; +SAVEPOINT s1; +SELECT count(*) FROM t; +SELECT pg_size_pretty(pg_relation_size('t')); +INSERT INTO t SELECT i FROM generate_series(1, 10000) i; +SELECT pg_size_pretty(pg_relation_size('t')); +SELECT count(*) FROM t; +ROLLBACK TO SAVEPOINT s1; + +-- not truncated by VACUUM or autovacuum yet (being in transaction ensures this), +-- so relation size should be same as before. +SELECT pg_size_pretty(pg_relation_size('t')); +COMMIT; + +-- vacuum should truncate the relation to the usable space +VACUUM t; +SELECT pg_size_pretty(pg_relation_size('t')); +SELECT count(*) FROM t; + DROP TABLE t; -- Make sure we cleaned the metadata for t too From 74dd1facf36250e661e8b79f05bc562549ad9da3 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 5 Oct 2020 10:43:03 -0700 Subject: [PATCH 2/6] add isolation tests --- Makefile | 18 ++++++++++ expected/am_vacuum_vs_insert.out | 58 ++++++++++++++++++++++++++++++++ expected/create.out | 6 ++++ specs/am_vacuum_vs_insert.spec | 47 ++++++++++++++++++++++++++ specs/create.spec | 8 +++++ 5 files changed, 137 insertions(+) create mode 100644 expected/am_vacuum_vs_insert.out create mode 100644 expected/create.out create mode 100644 specs/am_vacuum_vs_insert.spec create mode 100644 specs/create.spec diff --git a/Makefile b/Makefile index 60d8855f8..58340450f 100644 --- a/Makefile +++ b/Makefile @@ -35,6 +35,7 @@ DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cs cstore_fdw--1.0--1.1.sql cstore_fdw--1.7--1.8.sql REGRESS = extension_create +ISOLATION = create EXTRA_CLEAN = cstore.pb-c.h cstore.pb-c.c data/*.cstore data/*.cstore.footer \ sql/block_filtering.sql sql/create.sql sql/data_types.sql sql/load.sql \ sql/copyto.sql expected/block_filtering.out expected/create.out \ @@ -54,6 +55,7 @@ ifeq ($(USE_TABLEAM),yes) OBJS += cstore_tableam.o REGRESS += am_create am_load am_query am_analyze am_data_types am_functions \ am_drop am_insert am_copyto am_alter am_rollback am_truncate am_vacuum am_clean + ISOLATION += am_vacuum_vs_insert endif ifeq ($(enable_coverage),yes) @@ -76,6 +78,22 @@ PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) +# command for getting postgres source directory is taken from citus/configure.in +POSTGRES_SRCDIR=$(shell grep ^abs_top_srcdir $(shell dirname $(shell $(PG_CONFIG) --pgxs))/../Makefile.global|cut -d ' ' -f3-) +PGXS_ISOLATION_TESTER=$(top_builddir)/src/test/isolation/pg_isolation_regress + +# If postgres installation doesn't include pg_isolation_regress, try using the +# one in postgres source directory. +ifeq (,$(wildcard $(PGXS_ISOLATION_TESTER))) + pg_isolation_regress_installcheck = \ + $(POSTGRES_SRCDIR)/src/test/isolation/pg_isolation_regress \ + --inputdir=$(srcdir) $(EXTRA_REGRESS_OPTS) +else + pg_isolation_regress_installcheck = \ + $(PGXS_ISOLATION_TESTER) \ + --inputdir=$(srcdir) $(EXTRA_REGRESS_OPTS) +endif + installcheck: reindent: diff --git a/expected/am_vacuum_vs_insert.out b/expected/am_vacuum_vs_insert.out new file mode 100644 index 000000000..8ef78bfa4 --- /dev/null +++ b/expected/am_vacuum_vs_insert.out @@ -0,0 +1,58 @@ +Parsed test spec with 2 sessions + +starting permutation: s1-insert s1-begin s1-insert s2-vacuum s1-commit s2-select +step s1-insert: + INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; + +step s1-begin: + BEGIN; + +step s1-insert: + INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; + +step s2-vacuum: + VACUUM test_vacuum_vs_insert; + +step s1-commit: + COMMIT; + +step s2-select: + SELECT * FROM test_vacuum_vs_insert; + +a b + +1 2 +2 4 +3 6 +1 2 +2 4 +3 6 + +starting permutation: s1-insert s1-begin s1-insert s2-vacuum-full s1-commit s2-select +step s1-insert: + INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; + +step s1-begin: + BEGIN; + +step s1-insert: + INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; + +step s2-vacuum-full: + VACUUM FULL test_vacuum_vs_insert; + +step s1-commit: + COMMIT; + +step s2-vacuum-full: <... completed> +step s2-select: + SELECT * FROM test_vacuum_vs_insert; + +a b + +1 2 +2 4 +3 6 +1 2 +2 4 +3 6 diff --git a/expected/create.out b/expected/create.out new file mode 100644 index 000000000..39b477c81 --- /dev/null +++ b/expected/create.out @@ -0,0 +1,6 @@ +Parsed test spec with 1 sessions + +starting permutation: s1a +step s1a: + CREATE EXTENSION cstore_fdw; + diff --git a/specs/am_vacuum_vs_insert.spec b/specs/am_vacuum_vs_insert.spec new file mode 100644 index 000000000..57105e1dd --- /dev/null +++ b/specs/am_vacuum_vs_insert.spec @@ -0,0 +1,47 @@ +setup +{ + CREATE TABLE test_vacuum_vs_insert (a int, b int) USING cstore_tableam; +} + +teardown +{ + DROP TABLE IF EXISTS test_vacuum_vs_insert CASCADE; +} + +session "s1" + +step "s1-begin" +{ + BEGIN; +} + +step "s1-insert" +{ + INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; +} + +step "s1-commit" +{ + COMMIT; +} + +session "s2" + +step "s2-vacuum" +{ + VACUUM test_vacuum_vs_insert; +} + +step "s2-vacuum-full" +{ + VACUUM FULL test_vacuum_vs_insert; +} + +step "s2-select" +{ + SELECT * FROM test_vacuum_vs_insert; +} + +permutation "s1-insert" "s1-begin" "s1-insert" "s2-vacuum" "s1-commit" "s2-select" +permutation "s1-insert" "s1-begin" "s1-insert" "s2-vacuum-full" "s1-commit" "s2-select" + diff --git a/specs/create.spec b/specs/create.spec new file mode 100644 index 000000000..f8e874678 --- /dev/null +++ b/specs/create.spec @@ -0,0 +1,8 @@ +session "s1" +step "s1a" +{ + CREATE EXTENSION cstore_fdw; +} + +permutation "s1a" + From 37e3845e6afdb91bfd77940770da74ecd07ca698 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 5 Oct 2020 10:47:09 -0700 Subject: [PATCH 3/6] Address Nils feedback --- cstore_tableam.c | 96 ++++++++++++++++++++++---------- expected/am_vacuum_vs_insert.out | 9 ++- specs/am_vacuum_vs_insert.spec | 4 +- 3 files changed, 76 insertions(+), 33 deletions(-) diff --git a/cstore_tableam.c b/cstore_tableam.c index 59df86fb2..f6b51b770 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -46,7 +46,6 @@ * * These are the same values from src/backend/access/heap/vacuumlazy.c */ -#define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */ #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */ #define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */ @@ -68,6 +67,8 @@ static void CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, objectId, int subId, void *arg); static bool IsCStoreTableAmTable(Oid relationId); +static bool ConditionalLockRelationWithTimeout(Relation rel, LOCKMODE lockMode, + int timeout, int retryInterval); static void TruncateCStore(Relation rel, int elevel); @@ -622,7 +623,6 @@ static void TruncateCStore(Relation rel, int elevel) { PGRUsage ru0; - int lock_retry = 0; BlockNumber old_rel_pages = 0; BlockNumber new_rel_pages = 0; DataFileMetadata *metadata = NULL; @@ -634,45 +634,46 @@ TruncateCStore(Relation rel, int elevel) pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_TRUNCATE); + /* - * We need an ExclusiveLock to do the truncation. - * Loop until we acquire a lock or retry threshold is reached. + * We need access exclusive lock on the relation in order to do + * truncation. If we can't get it, give up rather than waiting --- we + * don't want to block other backends, and we don't want to deadlock + * (which is quite possible considering we already hold a lower-grade + * lock). + * + * The decisions for AccessExclusiveLock and conditional lock with + * a timeout is based on lazy_truncate_heap in vacuumlazy.c. */ - while (true) + if (!ConditionalLockRelationWithTimeout(rel, AccessExclusiveLock, + VACUUM_TRUNCATE_LOCK_TIMEOUT, + VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL)) { - if (ConditionalLockRelation(rel, AccessExclusiveLock)) - { - break; - } - /* - * Check for interrupts while trying to (re-)acquire the exclusive - * lock. + * We failed to establish the lock in the specified number of + * retries. This means we give up truncating. */ - CHECK_FOR_INTERRUPTS(); - - if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT / - VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL)) - { - /* - * We failed to establish the lock in the specified number of - * retries. This means we give up truncating. - */ - ereport(elevel, - (errmsg("\"%s\": stopping truncate due to conflicting lock request", - RelationGetRelationName(rel)))); - return; - } - - pg_usleep(VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL * 1000L); + ereport(elevel, + (errmsg("\"%s\": stopping truncate due to conflicting lock request", + RelationGetRelationName(rel)))); + return; } RelationOpenSmgr(rel); old_rel_pages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); RelationCloseSmgr(rel); - /* loop over stripes and find max used block */ + /* + * Get metadata as viewed in latest snapshot. Reading metadata in transaction + * snapshot is not enough, since stripes could have been created between + * current transaction start and lock acquisition time. Ignoring those + * stripes can destory data. + */ + PushActiveSnapshot(GetLatestSnapshot()); metadata = ReadDataFileMetadata(rel->rd_node.relNode); + PopActiveSnapshot(); + + /* loop over stripes and find max used block */ foreach(stripeMetadataCell, metadata->stripeMetadataList) { StripeMetadata *stripe = lfirst(stripeMetadataCell); @@ -710,6 +711,43 @@ TruncateCStore(Relation rel, int elevel) } +/* + * ConditionalLockRelationWithTimeout tries to acquire a relation lock until + * it either succeeds or timesout. It doesn't enter wait queue and instead it + * sleeps between lock tries. + * + * This is based on the lock loop in lazy_truncate_heap(). + */ +static bool +ConditionalLockRelationWithTimeout(Relation rel, LOCKMODE lockMode, int timeout, + int retryInterval) +{ + int lock_retry = 0; + + while (true) + { + if (ConditionalLockRelation(rel, lockMode)) + { + break; + } + + /* + * Check for interrupts while trying to (re-)acquire the lock + */ + CHECK_FOR_INTERRUPTS(); + + if (++lock_retry > (timeout / retryInterval)) + { + return false; + } + + pg_usleep(retryInterval * 1000L); + } + + return true; +} + + static bool cstore_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno, BufferAccessStrategy bstrategy) diff --git a/expected/am_vacuum_vs_insert.out b/expected/am_vacuum_vs_insert.out index 8ef78bfa4..ae23d9a26 100644 --- a/expected/am_vacuum_vs_insert.out +++ b/expected/am_vacuum_vs_insert.out @@ -10,8 +10,9 @@ step s1-begin: step s1-insert: INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; +s2: INFO: "test_vacuum_vs_insert": stopping truncate due to conflicting lock request step s2-vacuum: - VACUUM test_vacuum_vs_insert; + VACUUM VERBOSE test_vacuum_vs_insert; step s1-commit: COMMIT; @@ -39,11 +40,15 @@ step s1-insert: INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; step s2-vacuum-full: - VACUUM FULL test_vacuum_vs_insert; + VACUUM FULL VERBOSE test_vacuum_vs_insert; step s1-commit: COMMIT; +s2: INFO: vacuuming "public.test_vacuum_vs_insert" +s2: INFO: "test_vacuum_vs_insert": found 0 removable, 6 nonremovable row versions in 1 pages +DETAIL: 0 dead row versions cannot be removed yet. +CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s. step s2-vacuum-full: <... completed> step s2-select: SELECT * FROM test_vacuum_vs_insert; diff --git a/specs/am_vacuum_vs_insert.spec b/specs/am_vacuum_vs_insert.spec index 57105e1dd..ac2d83667 100644 --- a/specs/am_vacuum_vs_insert.spec +++ b/specs/am_vacuum_vs_insert.spec @@ -29,12 +29,12 @@ session "s2" step "s2-vacuum" { - VACUUM test_vacuum_vs_insert; + VACUUM VERBOSE test_vacuum_vs_insert; } step "s2-vacuum-full" { - VACUUM FULL test_vacuum_vs_insert; + VACUUM FULL VERBOSE test_vacuum_vs_insert; } step "s2-select" From 55885c81dd5cdb9c60cd0e23a27d681a4df97034 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 5 Oct 2020 10:48:34 -0700 Subject: [PATCH 4/6] log stats on verbose --- cstore_tableam.c | 106 ++++++++++++++++++++++++++++++- expected/am_vacuum.out | 58 ++++++++++++++++- expected/am_vacuum_vs_insert.out | 5 ++ specs/am_vacuum_vs_insert.spec | 1 - specs/create.spec | 1 - sql/am_vacuum.sql | 38 ++++++++++- 6 files changed, 203 insertions(+), 6 deletions(-) diff --git a/cstore_tableam.c b/cstore_tableam.c index f6b51b770..fa3cd8739 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -69,8 +69,8 @@ static void CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, static bool IsCStoreTableAmTable(Oid relationId); static bool ConditionalLockRelationWithTimeout(Relation rel, LOCKMODE lockMode, int timeout, int retryInterval); - - +static void LogRelationStats(Relation rel, int elevel); +static char * CompressionTypeStr(CompressionType type); static void TruncateCStore(Relation rel, int elevel); static CStoreOptions * @@ -609,6 +609,108 @@ cstore_vacuum_rel(Relation rel, VacuumParams *params, { TruncateCStore(rel, elevel); } + + LogRelationStats(rel, elevel); +} + + +static void +LogRelationStats(Relation rel, int elevel) +{ + DataFileMetadata *datafileMetadata = NULL; + ListCell *stripeMetadataCell = NULL; + Oid relfilenode = rel->rd_node.relNode; + StringInfo infoBuf = makeStringInfo(); + + int compressionStats[COMPRESSION_COUNT] = { 0 }; + uint64 totalStripeLength = 0; + uint64 tupleCount = 0; + uint64 blockCount = 0; + uint64 relPages = 0; + int stripeCount = 0; + TupleDesc tupdesc = RelationGetDescr(rel); + uint64 droppedBlocksWithData = 0; + + datafileMetadata = ReadDataFileMetadata(relfilenode); + stripeCount = list_length(datafileMetadata->stripeMetadataList); + + foreach(stripeMetadataCell, datafileMetadata->stripeMetadataList) + { + StripeMetadata *stripe = lfirst(stripeMetadataCell); + StripeSkipList *skiplist = ReadStripeSkipList(relfilenode, stripe->id, + RelationGetDescr(rel), + stripe->blockCount); + for (uint32 column = 0; column < skiplist->columnCount; column++) + { + bool attrDropped = tupdesc->attrs[column].attisdropped; + for (uint32 block = 0; block < skiplist->blockCount; block++) + { + ColumnBlockSkipNode *skipnode = + &skiplist->blockSkipNodeArray[column][block]; + + /* ignore zero length blocks for dropped attributes */ + if (skipnode->valueLength > 0) + { + compressionStats[skipnode->valueCompressionType]++; + blockCount++; + + if (attrDropped) + { + droppedBlocksWithData++; + } + } + } + } + + tupleCount += stripe->rowCount; + totalStripeLength += stripe->dataLength; + } + + RelationOpenSmgr(rel); + relPages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); + RelationCloseSmgr(rel); + + appendStringInfo(infoBuf, "total file size: %ld, total data size: %ld\n", + relPages * BLCKSZ, totalStripeLength); + appendStringInfo(infoBuf, + "total row count: %ld, stripe count: %d, " + "average rows per stripe: %ld\n", + tupleCount, stripeCount, tupleCount / stripeCount); + appendStringInfo(infoBuf, + "block count: %ld" + ", containing data for dropped columns: %ld", + blockCount, droppedBlocksWithData); + for (int compressionType = 0; compressionType < COMPRESSION_COUNT; compressionType++) + { + appendStringInfo(infoBuf, + ", %s compressed: %d", + CompressionTypeStr(compressionType), + compressionStats[compressionType]); + } + appendStringInfoString(infoBuf, "\n"); + + ereport(elevel, (errmsg("statistics for \"%s\":\n%s", RelationGetRelationName(rel), + infoBuf->data))); +} + + +/* + * CompressionTypeStr returns string representation of a compression type. + */ +static char * +CompressionTypeStr(CompressionType type) +{ + switch (type) + { + case COMPRESSION_NONE: + return "none"; + + case COMPRESSION_PG_LZ: + return "pglz"; + + default: + return "unknown"; + } } diff --git a/expected/am_vacuum.out b/expected/am_vacuum.out index 7a1ff2777..9552f6ade 100644 --- a/expected/am_vacuum.out +++ b/expected/am_vacuum.out @@ -134,7 +134,14 @@ SELECT pg_size_pretty(pg_relation_size('t')); COMMIT; -- vacuum should truncate the relation to the usable space -VACUUM t; +VACUUM VERBOSE t; +INFO: "t": truncated 7 to 2 pages +DETAIL: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s +INFO: statistics for "t": +total file size: 16384, total data size: 10754 +total row count: 2530, stripe count: 3, average rows per stripe: 843 +block count: 3, containing data for dropped columns: 0, none compressed: 3, pglz compressed: 0 + SELECT pg_size_pretty(pg_relation_size('t')); pg_size_pretty ---------------- @@ -147,6 +154,55 @@ SELECT count(*) FROM t; 2530 (1 row) +-- add some stripes with different compression types and create some gaps, +-- then vacuum to print stats +BEGIN; +SET cstore.block_row_count TO 1000; +SET cstore.stripe_row_count TO 2000; +SET cstore.compression TO "pglz"; +SAVEPOINT s1; +INSERT INTO t SELECT i FROM generate_series(1, 1500) i; +ROLLBACK TO SAVEPOINT s1; +INSERT INTO t SELECT i / 5 FROM generate_series(1, 1500) i; +SET cstore.compression TO "none"; +SAVEPOINT s2; +INSERT INTO t SELECT i FROM generate_series(1, 1500) i; +ROLLBACK TO SAVEPOINT s2; +INSERT INTO t SELECT i / 5 FROM generate_series(1, 1500) i; +COMMIT; +VACUUM VERBOSE t; +INFO: statistics for "t": +total file size: 24576, total data size: 18808 +total row count: 5530, stripe count: 5, average rows per stripe: 1106 +block count: 7, containing data for dropped columns: 0, none compressed: 5, pglz compressed: 2 + +SELECT count(*) FROM t; + count +------- + 5530 +(1 row) + +-- check that we report blocks with data for dropped columns +ALTER TABLE t ADD COLUMN c int; +INSERT INTO t SELECT 1, i / 5 FROM generate_series(1, 1500) i; +ALTER TABLE t DROP COLUMN c; +VACUUM VERBOSE t; +INFO: statistics for "t": +total file size: 32768, total data size: 31372 +total row count: 7030, stripe count: 6, average rows per stripe: 1171 +block count: 11, containing data for dropped columns: 2, none compressed: 9, pglz compressed: 2 + +-- vacuum full should remove blocks for dropped columns +-- note that, a block will be stored in non-compressed for if compression +-- doesn't reduce its size. +SET cstore.compression TO "pglz"; +VACUUM FULL t; +VACUUM VERBOSE t; +INFO: statistics for "t": +total file size: 16384, total data size: 15728 +total row count: 7030, stripe count: 4, average rows per stripe: 1757 +block count: 8, containing data for dropped columns: 0, none compressed: 2, pglz compressed: 6 + DROP TABLE t; -- Make sure we cleaned the metadata for t too SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; diff --git a/expected/am_vacuum_vs_insert.out b/expected/am_vacuum_vs_insert.out index ae23d9a26..767604251 100644 --- a/expected/am_vacuum_vs_insert.out +++ b/expected/am_vacuum_vs_insert.out @@ -11,6 +11,11 @@ step s1-insert: INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; s2: INFO: "test_vacuum_vs_insert": stopping truncate due to conflicting lock request +s2: INFO: statistics for "test_vacuum_vs_insert": +total file size: 8192, total data size: 26 +total row count: 3, stripe count: 1, average rows per stripe: 3 +block count: 2, containing data for dropped columns: 0, none compressed: 2, pglz compressed: 0 + step s2-vacuum: VACUUM VERBOSE test_vacuum_vs_insert; diff --git a/specs/am_vacuum_vs_insert.spec b/specs/am_vacuum_vs_insert.spec index ac2d83667..59c7274d5 100644 --- a/specs/am_vacuum_vs_insert.spec +++ b/specs/am_vacuum_vs_insert.spec @@ -44,4 +44,3 @@ step "s2-select" permutation "s1-insert" "s1-begin" "s1-insert" "s2-vacuum" "s1-commit" "s2-select" permutation "s1-insert" "s1-begin" "s1-insert" "s2-vacuum-full" "s1-commit" "s2-select" - diff --git a/specs/create.spec b/specs/create.spec index f8e874678..09fc32131 100644 --- a/specs/create.spec +++ b/specs/create.spec @@ -5,4 +5,3 @@ step "s1a" } permutation "s1a" - diff --git a/sql/am_vacuum.sql b/sql/am_vacuum.sql index 10d1c7f6c..f7f9d77bd 100644 --- a/sql/am_vacuum.sql +++ b/sql/am_vacuum.sql @@ -57,10 +57,46 @@ SELECT pg_size_pretty(pg_relation_size('t')); COMMIT; -- vacuum should truncate the relation to the usable space -VACUUM t; +VACUUM VERBOSE t; SELECT pg_size_pretty(pg_relation_size('t')); SELECT count(*) FROM t; +-- add some stripes with different compression types and create some gaps, +-- then vacuum to print stats + +BEGIN; +SET cstore.block_row_count TO 1000; +SET cstore.stripe_row_count TO 2000; +SET cstore.compression TO "pglz"; +SAVEPOINT s1; +INSERT INTO t SELECT i FROM generate_series(1, 1500) i; +ROLLBACK TO SAVEPOINT s1; +INSERT INTO t SELECT i / 5 FROM generate_series(1, 1500) i; +SET cstore.compression TO "none"; +SAVEPOINT s2; +INSERT INTO t SELECT i FROM generate_series(1, 1500) i; +ROLLBACK TO SAVEPOINT s2; +INSERT INTO t SELECT i / 5 FROM generate_series(1, 1500) i; +COMMIT; + +VACUUM VERBOSE t; + +SELECT count(*) FROM t; + +-- check that we report blocks with data for dropped columns +ALTER TABLE t ADD COLUMN c int; +INSERT INTO t SELECT 1, i / 5 FROM generate_series(1, 1500) i; +ALTER TABLE t DROP COLUMN c; + +VACUUM VERBOSE t; + +-- vacuum full should remove blocks for dropped columns +-- note that, a block will be stored in non-compressed for if compression +-- doesn't reduce its size. +SET cstore.compression TO "pglz"; +VACUUM FULL t; +VACUUM VERBOSE t; + DROP TABLE t; -- Make sure we cleaned the metadata for t too From 76a71aa61a283e2e973d235d211d9db328fab425 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 5 Oct 2020 14:18:29 -0700 Subject: [PATCH 5/6] Use SnapshotDirty for reading metadata in truncation --- cstore.h | 1 + cstore_metadata_tables.c | 3 ++- cstore_tableam.c | 12 ++---------- 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/cstore.h b/cstore.h index 489a4839b..c84588627 100644 --- a/cstore.h +++ b/cstore.h @@ -20,6 +20,7 @@ #include "storage/bufpage.h" #include "storage/lockdefs.h" #include "utils/relcache.h" +#include "utils/snapmgr.h" /* Defines for valid option names */ #define OPTION_NAME_COMPRESSION_TYPE "compression" diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index 7c214eed5..565a37b07 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -377,7 +377,8 @@ ReadDataFileMetadata(Oid relfilenode, bool missingOk) index = index_open(CStoreStripesIndexRelationId(), AccessShareLock); tupleDescriptor = RelationGetDescr(cstoreStripes); - scanDescriptor = systable_beginscan_ordered(cstoreStripes, index, NULL, 1, scanKey); + scanDescriptor = systable_beginscan_ordered(cstoreStripes, index, NULL, 1, + scanKey); while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor))) { diff --git a/cstore_tableam.c b/cstore_tableam.c index fa3cd8739..4e9d47260 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -631,7 +631,7 @@ LogRelationStats(Relation rel, int elevel) TupleDesc tupdesc = RelationGetDescr(rel); uint64 droppedBlocksWithData = 0; - datafileMetadata = ReadDataFileMetadata(relfilenode); + datafileMetadata = ReadDataFileMetadata(relfilenode, false); stripeCount = list_length(datafileMetadata->stripeMetadataList); foreach(stripeMetadataCell, datafileMetadata->stripeMetadataList) @@ -765,15 +765,7 @@ TruncateCStore(Relation rel, int elevel) old_rel_pages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); RelationCloseSmgr(rel); - /* - * Get metadata as viewed in latest snapshot. Reading metadata in transaction - * snapshot is not enough, since stripes could have been created between - * current transaction start and lock acquisition time. Ignoring those - * stripes can destory data. - */ - PushActiveSnapshot(GetLatestSnapshot()); - metadata = ReadDataFileMetadata(rel->rd_node.relNode); - PopActiveSnapshot(); + metadata = ReadDataFileMetadata(rel->rd_node.relNode, false); /* loop over stripes and find max used block */ foreach(stripeMetadataCell, metadata->stripeMetadataList) From e481e73d18722121c67a625b31e7732f01f545c6 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Thu, 8 Oct 2020 16:02:45 -0700 Subject: [PATCH 6/6] Encapsulate snapshot used for reading stripes in cstore_metadata_tables --- cstore.h | 1 + cstore_metadata_tables.c | 68 +++++++++++++++++++++++++------- cstore_tableam.c | 25 ++++++------ expected/am_vacuum.out | 6 +-- expected/am_vacuum_vs_insert.out | 2 +- 5 files changed, 70 insertions(+), 32 deletions(-) diff --git a/cstore.h b/cstore.h index c84588627..8a64730c8 100644 --- a/cstore.h +++ b/cstore.h @@ -285,6 +285,7 @@ extern void DeleteDataFileMetadataRowIfExists(Oid relfilenode); extern void InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount); extern void InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe); extern DataFileMetadata * ReadDataFileMetadata(Oid relfilenode, bool missingOk); +extern uint64 GetHighestUsedAddress(Oid relfilenode); extern void SaveStripeSkipList(Oid relfilenode, uint64 stripe, StripeSkipList *stripeSkipList, TupleDesc tupleDescriptor); diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index 565a37b07..d5ad28388 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -43,6 +43,7 @@ typedef struct EState *estate; } ModifyState; +static List * ReadDataFileStripeList(Oid relfilenode, Snapshot snapshot); static Oid CStoreStripesRelationId(void); static Oid CStoreStripesIndexRelationId(void); static Oid CStoreDataFilesRelationId(void); @@ -345,17 +346,8 @@ InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe) DataFileMetadata * ReadDataFileMetadata(Oid relfilenode, bool missingOk) { - Oid cstoreStripesOid = InvalidOid; - Relation cstoreStripes = NULL; - Relation index = NULL; - TupleDesc tupleDescriptor = NULL; - ScanKeyData scanKey[1]; - SysScanDesc scanDescriptor = NULL; - HeapTuple heapTuple; - bool found = false; - DataFileMetadata *datafileMetadata = palloc0(sizeof(DataFileMetadata)); - found = ReadCStoreDataFiles(relfilenode, &datafileMetadata->blockRowCount); + bool found = ReadCStoreDataFiles(relfilenode, &datafileMetadata->blockRowCount); if (!found) { if (!missingOk) @@ -369,6 +361,56 @@ ReadDataFileMetadata(Oid relfilenode, bool missingOk) } } + datafileMetadata->stripeMetadataList = + ReadDataFileStripeList(relfilenode, GetTransactionSnapshot()); + + return datafileMetadata; +} + + +/* + * GetHighestUsedAddress returns the highest used address for the given + * relfilenode across all active and inactive transactions. + */ +uint64 +GetHighestUsedAddress(Oid relfilenode) +{ + uint64 highestUsedAddress = 0; + ListCell *stripeMetadataCell = NULL; + List *stripeMetadataList = NIL; + + SnapshotData SnapshotDirty; + InitDirtySnapshot(SnapshotDirty); + + stripeMetadataList = ReadDataFileStripeList(relfilenode, &SnapshotDirty); + + foreach(stripeMetadataCell, stripeMetadataList) + { + StripeMetadata *stripe = lfirst(stripeMetadataCell); + uint64 lastByte = stripe->fileOffset + stripe->dataLength - 1; + highestUsedAddress = Max(highestUsedAddress, lastByte); + } + + return highestUsedAddress; +} + + +/* + * ReadDataFileStripeList reads the stripe list for a given relfilenode + * in the given snapshot. + */ +static List * +ReadDataFileStripeList(Oid relfilenode, Snapshot snapshot) +{ + List *stripeMetadataList = NIL; + Oid cstoreStripesOid = InvalidOid; + Relation cstoreStripes = NULL; + Relation index = NULL; + TupleDesc tupleDescriptor = NULL; + ScanKeyData scanKey[1]; + SysScanDesc scanDescriptor = NULL; + HeapTuple heapTuple; + ScanKeyInit(&scanKey[0], Anum_cstore_stripes_relfilenode, BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode)); @@ -403,16 +445,14 @@ ReadDataFileMetadata(Oid relfilenode, bool missingOk) stripeMetadata->rowCount = DatumGetInt64( datumArray[Anum_cstore_stripes_row_count - 1]); - datafileMetadata->stripeMetadataList = lappend( - datafileMetadata->stripeMetadataList, - stripeMetadata); + stripeMetadataList = lappend(stripeMetadataList, stripeMetadata); } systable_endscan_ordered(scanDescriptor); index_close(index, NoLock); heap_close(cstoreStripes, NoLock); - return datafileMetadata; + return stripeMetadataList; } diff --git a/cstore_tableam.c b/cstore_tableam.c index 4e9d47260..0840436ec 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -601,6 +601,8 @@ cstore_vacuum_rel(Relation rel, VacuumParams *params, /* this should have been resolved by vacuum.c until now */ Assert(params->truncate != VACOPT_TERNARY_DEFAULT); + LogRelationStats(rel, elevel); + /* * We don't have updates, deletes, or concurrent updates, so all we * care for now is truncating the unused space at the end of storage. @@ -609,8 +611,6 @@ cstore_vacuum_rel(Relation rel, VacuumParams *params, { TruncateCStore(rel, elevel); } - - LogRelationStats(rel, elevel); } @@ -727,8 +727,7 @@ TruncateCStore(Relation rel, int elevel) PGRUsage ru0; BlockNumber old_rel_pages = 0; BlockNumber new_rel_pages = 0; - DataFileMetadata *metadata = NULL; - ListCell *stripeMetadataCell = NULL; + SmgrAddr highestPhysicalAddress; pg_rusage_init(&ru0); @@ -765,17 +764,15 @@ TruncateCStore(Relation rel, int elevel) old_rel_pages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); RelationCloseSmgr(rel); - metadata = ReadDataFileMetadata(rel->rd_node.relNode, false); - - /* loop over stripes and find max used block */ - foreach(stripeMetadataCell, metadata->stripeMetadataList) - { - StripeMetadata *stripe = lfirst(stripeMetadataCell); - uint64 lastByte = stripe->fileOffset + stripe->dataLength - 1; - SmgrAddr addr = logical_to_smgr(lastByte); - new_rel_pages = Max(new_rel_pages, addr.blockno + 1); - } + /* + * Due to the AccessExclusive lock there's no danger that + * new stripes be added beyond highestPhysicalAddress while + * we're truncating. + */ + highestPhysicalAddress = + logical_to_smgr(GetHighestUsedAddress(rel->rd_node.relNode)); + new_rel_pages = highestPhysicalAddress.blockno + 1; if (new_rel_pages == old_rel_pages) { UnlockRelation(rel, AccessExclusiveLock); diff --git a/expected/am_vacuum.out b/expected/am_vacuum.out index 9552f6ade..3db30a761 100644 --- a/expected/am_vacuum.out +++ b/expected/am_vacuum.out @@ -135,13 +135,13 @@ SELECT pg_size_pretty(pg_relation_size('t')); COMMIT; -- vacuum should truncate the relation to the usable space VACUUM VERBOSE t; -INFO: "t": truncated 7 to 2 pages -DETAIL: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s INFO: statistics for "t": -total file size: 16384, total data size: 10754 +total file size: 57344, total data size: 10754 total row count: 2530, stripe count: 3, average rows per stripe: 843 block count: 3, containing data for dropped columns: 0, none compressed: 3, pglz compressed: 0 +INFO: "t": truncated 7 to 2 pages +DETAIL: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s SELECT pg_size_pretty(pg_relation_size('t')); pg_size_pretty ---------------- diff --git a/expected/am_vacuum_vs_insert.out b/expected/am_vacuum_vs_insert.out index 767604251..a3eb0fb89 100644 --- a/expected/am_vacuum_vs_insert.out +++ b/expected/am_vacuum_vs_insert.out @@ -10,12 +10,12 @@ step s1-begin: step s1-insert: INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; -s2: INFO: "test_vacuum_vs_insert": stopping truncate due to conflicting lock request s2: INFO: statistics for "test_vacuum_vs_insert": total file size: 8192, total data size: 26 total row count: 3, stripe count: 1, average rows per stripe: 3 block count: 2, containing data for dropped columns: 0, none compressed: 2, pglz compressed: 0 +s2: INFO: "test_vacuum_vs_insert": stopping truncate due to conflicting lock request step s2-vacuum: VACUUM VERBOSE test_vacuum_vs_insert;