diff --git a/cstore_tableam.c b/cstore_tableam.c index f6b51b770..fa3cd8739 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -69,8 +69,8 @@ static void CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, static bool IsCStoreTableAmTable(Oid relationId); static bool ConditionalLockRelationWithTimeout(Relation rel, LOCKMODE lockMode, int timeout, int retryInterval); - - +static void LogRelationStats(Relation rel, int elevel); +static char * CompressionTypeStr(CompressionType type); static void TruncateCStore(Relation rel, int elevel); static CStoreOptions * @@ -609,6 +609,108 @@ cstore_vacuum_rel(Relation rel, VacuumParams *params, { TruncateCStore(rel, elevel); } + + LogRelationStats(rel, elevel); +} + + +static void +LogRelationStats(Relation rel, int elevel) +{ + DataFileMetadata *datafileMetadata = NULL; + ListCell *stripeMetadataCell = NULL; + Oid relfilenode = rel->rd_node.relNode; + StringInfo infoBuf = makeStringInfo(); + + int compressionStats[COMPRESSION_COUNT] = { 0 }; + uint64 totalStripeLength = 0; + uint64 tupleCount = 0; + uint64 blockCount = 0; + uint64 relPages = 0; + int stripeCount = 0; + TupleDesc tupdesc = RelationGetDescr(rel); + uint64 droppedBlocksWithData = 0; + + datafileMetadata = ReadDataFileMetadata(relfilenode); + stripeCount = list_length(datafileMetadata->stripeMetadataList); + + foreach(stripeMetadataCell, datafileMetadata->stripeMetadataList) + { + StripeMetadata *stripe = lfirst(stripeMetadataCell); + StripeSkipList *skiplist = ReadStripeSkipList(relfilenode, stripe->id, + RelationGetDescr(rel), + stripe->blockCount); + for (uint32 column = 0; column < skiplist->columnCount; column++) + { + bool attrDropped = tupdesc->attrs[column].attisdropped; + for (uint32 block = 0; block < skiplist->blockCount; block++) + { + ColumnBlockSkipNode *skipnode = + &skiplist->blockSkipNodeArray[column][block]; + + /* ignore zero length blocks for dropped attributes */ + if (skipnode->valueLength > 0) + { + compressionStats[skipnode->valueCompressionType]++; + blockCount++; + + if (attrDropped) + { + droppedBlocksWithData++; + } + } + } + } + + tupleCount += stripe->rowCount; + totalStripeLength += stripe->dataLength; + } + + RelationOpenSmgr(rel); + relPages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); + RelationCloseSmgr(rel); + + appendStringInfo(infoBuf, "total file size: %ld, total data size: %ld\n", + relPages * BLCKSZ, totalStripeLength); + appendStringInfo(infoBuf, + "total row count: %ld, stripe count: %d, " + "average rows per stripe: %ld\n", + tupleCount, stripeCount, tupleCount / stripeCount); + appendStringInfo(infoBuf, + "block count: %ld" + ", containing data for dropped columns: %ld", + blockCount, droppedBlocksWithData); + for (int compressionType = 0; compressionType < COMPRESSION_COUNT; compressionType++) + { + appendStringInfo(infoBuf, + ", %s compressed: %d", + CompressionTypeStr(compressionType), + compressionStats[compressionType]); + } + appendStringInfoString(infoBuf, "\n"); + + ereport(elevel, (errmsg("statistics for \"%s\":\n%s", RelationGetRelationName(rel), + infoBuf->data))); +} + + +/* + * CompressionTypeStr returns string representation of a compression type. + */ +static char * +CompressionTypeStr(CompressionType type) +{ + switch (type) + { + case COMPRESSION_NONE: + return "none"; + + case COMPRESSION_PG_LZ: + return "pglz"; + + default: + return "unknown"; + } } diff --git a/expected/am_vacuum.out b/expected/am_vacuum.out index 7a1ff2777..9552f6ade 100644 --- a/expected/am_vacuum.out +++ b/expected/am_vacuum.out @@ -134,7 +134,14 @@ SELECT pg_size_pretty(pg_relation_size('t')); COMMIT; -- vacuum should truncate the relation to the usable space -VACUUM t; +VACUUM VERBOSE t; +INFO: "t": truncated 7 to 2 pages +DETAIL: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s +INFO: statistics for "t": +total file size: 16384, total data size: 10754 +total row count: 2530, stripe count: 3, average rows per stripe: 843 +block count: 3, containing data for dropped columns: 0, none compressed: 3, pglz compressed: 0 + SELECT pg_size_pretty(pg_relation_size('t')); pg_size_pretty ---------------- @@ -147,6 +154,55 @@ SELECT count(*) FROM t; 2530 (1 row) +-- add some stripes with different compression types and create some gaps, +-- then vacuum to print stats +BEGIN; +SET cstore.block_row_count TO 1000; +SET cstore.stripe_row_count TO 2000; +SET cstore.compression TO "pglz"; +SAVEPOINT s1; +INSERT INTO t SELECT i FROM generate_series(1, 1500) i; +ROLLBACK TO SAVEPOINT s1; +INSERT INTO t SELECT i / 5 FROM generate_series(1, 1500) i; +SET cstore.compression TO "none"; +SAVEPOINT s2; +INSERT INTO t SELECT i FROM generate_series(1, 1500) i; +ROLLBACK TO SAVEPOINT s2; +INSERT INTO t SELECT i / 5 FROM generate_series(1, 1500) i; +COMMIT; +VACUUM VERBOSE t; +INFO: statistics for "t": +total file size: 24576, total data size: 18808 +total row count: 5530, stripe count: 5, average rows per stripe: 1106 +block count: 7, containing data for dropped columns: 0, none compressed: 5, pglz compressed: 2 + +SELECT count(*) FROM t; + count +------- + 5530 +(1 row) + +-- check that we report blocks with data for dropped columns +ALTER TABLE t ADD COLUMN c int; +INSERT INTO t SELECT 1, i / 5 FROM generate_series(1, 1500) i; +ALTER TABLE t DROP COLUMN c; +VACUUM VERBOSE t; +INFO: statistics for "t": +total file size: 32768, total data size: 31372 +total row count: 7030, stripe count: 6, average rows per stripe: 1171 +block count: 11, containing data for dropped columns: 2, none compressed: 9, pglz compressed: 2 + +-- vacuum full should remove blocks for dropped columns +-- note that, a block will be stored in non-compressed for if compression +-- doesn't reduce its size. +SET cstore.compression TO "pglz"; +VACUUM FULL t; +VACUUM VERBOSE t; +INFO: statistics for "t": +total file size: 16384, total data size: 15728 +total row count: 7030, stripe count: 4, average rows per stripe: 1757 +block count: 8, containing data for dropped columns: 0, none compressed: 2, pglz compressed: 6 + DROP TABLE t; -- Make sure we cleaned the metadata for t too SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; diff --git a/expected/am_vacuum_vs_insert.out b/expected/am_vacuum_vs_insert.out index ae23d9a26..767604251 100644 --- a/expected/am_vacuum_vs_insert.out +++ b/expected/am_vacuum_vs_insert.out @@ -11,6 +11,11 @@ step s1-insert: INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; s2: INFO: "test_vacuum_vs_insert": stopping truncate due to conflicting lock request +s2: INFO: statistics for "test_vacuum_vs_insert": +total file size: 8192, total data size: 26 +total row count: 3, stripe count: 1, average rows per stripe: 3 +block count: 2, containing data for dropped columns: 0, none compressed: 2, pglz compressed: 0 + step s2-vacuum: VACUUM VERBOSE test_vacuum_vs_insert; diff --git a/specs/am_vacuum_vs_insert.spec b/specs/am_vacuum_vs_insert.spec index ac2d83667..59c7274d5 100644 --- a/specs/am_vacuum_vs_insert.spec +++ b/specs/am_vacuum_vs_insert.spec @@ -44,4 +44,3 @@ step "s2-select" permutation "s1-insert" "s1-begin" "s1-insert" "s2-vacuum" "s1-commit" "s2-select" permutation "s1-insert" "s1-begin" "s1-insert" "s2-vacuum-full" "s1-commit" "s2-select" - diff --git a/specs/create.spec b/specs/create.spec index f8e874678..09fc32131 100644 --- a/specs/create.spec +++ b/specs/create.spec @@ -5,4 +5,3 @@ step "s1a" } permutation "s1a" - diff --git a/sql/am_vacuum.sql b/sql/am_vacuum.sql index 10d1c7f6c..f7f9d77bd 100644 --- a/sql/am_vacuum.sql +++ b/sql/am_vacuum.sql @@ -57,10 +57,46 @@ SELECT pg_size_pretty(pg_relation_size('t')); COMMIT; -- vacuum should truncate the relation to the usable space -VACUUM t; +VACUUM VERBOSE t; SELECT pg_size_pretty(pg_relation_size('t')); SELECT count(*) FROM t; +-- add some stripes with different compression types and create some gaps, +-- then vacuum to print stats + +BEGIN; +SET cstore.block_row_count TO 1000; +SET cstore.stripe_row_count TO 2000; +SET cstore.compression TO "pglz"; +SAVEPOINT s1; +INSERT INTO t SELECT i FROM generate_series(1, 1500) i; +ROLLBACK TO SAVEPOINT s1; +INSERT INTO t SELECT i / 5 FROM generate_series(1, 1500) i; +SET cstore.compression TO "none"; +SAVEPOINT s2; +INSERT INTO t SELECT i FROM generate_series(1, 1500) i; +ROLLBACK TO SAVEPOINT s2; +INSERT INTO t SELECT i / 5 FROM generate_series(1, 1500) i; +COMMIT; + +VACUUM VERBOSE t; + +SELECT count(*) FROM t; + +-- check that we report blocks with data for dropped columns +ALTER TABLE t ADD COLUMN c int; +INSERT INTO t SELECT 1, i / 5 FROM generate_series(1, 1500) i; +ALTER TABLE t DROP COLUMN c; + +VACUUM VERBOSE t; + +-- vacuum full should remove blocks for dropped columns +-- note that, a block will be stored in non-compressed for if compression +-- doesn't reduce its size. +SET cstore.compression TO "pglz"; +VACUUM FULL t; +VACUUM VERBOSE t; + DROP TABLE t; -- Make sure we cleaned the metadata for t too