From 2ede755107fc8389ef3b38a0e62f67ce4ae2fc93 Mon Sep 17 00:00:00 2001
From: Hadi Moshayedi <hadi@moshayedi.net>
Date: Mon, 5 Oct 2020 10:34:52 -0700
Subject: [PATCH] Initial version of VACUUM

---
 cstore_tableam.c       | 137 ++++++++++++++++++++++++++++++++++++++++-
 expected/am_vacuum.out |  52 ++++++++++++++++
 sql/am_vacuum.sql      |  20 ++++++
 3 files changed, 208 insertions(+), 1 deletion(-)

diff --git a/cstore_tableam.c b/cstore_tableam.c
index 39a0695e2..59df86fb2 100644
--- a/cstore_tableam.c
+++ b/cstore_tableam.c
@@ -32,6 +32,7 @@
 #include "storage/procarray.h"
 #include "storage/smgr.h"
 #include "utils/builtins.h"
+#include "utils/pg_rusage.h"
 #include "utils/rel.h"
 #include "utils/syscache.h"
 
@@ -40,6 +41,15 @@
 
 #define CSTORE_TABLEAM_NAME "cstore_tableam"
 
+/*
+ * Timing parameters for truncate locking heuristics.
+ *
+ * These are the same values from src/backend/access/heap/vacuumlazy.c
+ */
+#define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20      /* ms */
+#define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50       /* ms */
+#define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000               /* ms */
+
 typedef struct CStoreScanDescData
 {
 	TableScanDescData cs_base;
@@ -59,6 +69,9 @@ static void CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId,
 										  void *arg);
 static bool IsCStoreTableAmTable(Oid relationId);
 
+
+static void TruncateCStore(Relation rel, int elevel);
+
 static CStoreOptions *
 CStoreTableAMGetOptions(void)
 {
@@ -575,6 +588,128 @@ cstore_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
 }
 
 
+/*
+ * cstore_vacuum_rel implements VACUUM without FULL option.
+ */
+static void
+cstore_vacuum_rel(Relation rel, VacuumParams *params,
+				  BufferAccessStrategy bstrategy)
+{
+	int elevel = (params->options & VACOPT_VERBOSE) ? INFO : DEBUG2;
+
+	/* this should have been resolved by vacuum.c until now */
+	Assert(params->truncate != VACOPT_TERNARY_DEFAULT);
+
+	/*
+	 * We don't have updates, deletes, or concurrent updates, so all we
+	 * care for now is truncating the unused space at the end of storage.
+	 */
+	if (params->truncate == VACOPT_TERNARY_ENABLED)
+	{
+		TruncateCStore(rel, elevel);
+	}
+}
+
+
+/*
+ * TruncateCStore truncates the unused space at the end of main fork for
+ * a cstore table. This unused space can be created by aborted transactions.
+ *
+ * This implementation is based on heap_vacuum_rel in vacuumlazy.c with some
+ * changes so it suits columnar store relations.
+ */
+static void
+TruncateCStore(Relation rel, int elevel)
+{
+	PGRUsage ru0;
+	int lock_retry = 0;
+	BlockNumber old_rel_pages = 0;
+	BlockNumber new_rel_pages = 0;
+	DataFileMetadata *metadata = NULL;
+	ListCell *stripeMetadataCell = NULL;
+
+	pg_rusage_init(&ru0);
+
+	/* Report that we are now truncating */
+	pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
+								 PROGRESS_VACUUM_PHASE_TRUNCATE);
+
+	/*
+	 * We need an ExclusiveLock to do the truncation.
+	 * Loop until we acquire a lock or retry threshold is reached.
+	 */
+	while (true)
+	{
+		if (ConditionalLockRelation(rel, AccessExclusiveLock))
+		{
+			break;
+		}
+
+		/*
+		 * Check for interrupts while trying to (re-)acquire the exclusive
+		 * lock.
+		 */
+		CHECK_FOR_INTERRUPTS();
+
+		if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT /
+							VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL))
+		{
+			/*
+			 * We failed to establish the lock in the specified number of
+			 * retries. This means we give up truncating.
+			 */
+			ereport(elevel,
+					(errmsg("\"%s\": stopping truncate due to conflicting lock request",
+							RelationGetRelationName(rel))));
+			return;
+		}
+
+		pg_usleep(VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL * 1000L);
+	}
+
+	RelationOpenSmgr(rel);
+	old_rel_pages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM);
+	RelationCloseSmgr(rel);
+
+	/* loop over stripes and find max used block */
+	metadata = ReadDataFileMetadata(rel->rd_node.relNode);
+	foreach(stripeMetadataCell, metadata->stripeMetadataList)
+	{
+		StripeMetadata *stripe = lfirst(stripeMetadataCell);
+		uint64 lastByte = stripe->fileOffset + stripe->dataLength - 1;
+		SmgrAddr addr = logical_to_smgr(lastByte);
+		new_rel_pages = Max(new_rel_pages, addr.blockno + 1);
+	}
+
+	if (new_rel_pages == old_rel_pages)
+	{
+		UnlockRelation(rel, AccessExclusiveLock);
+		return;
+	}
+
+	/*
+	 * Truncate the storage. Note that RelationTruncate() takes care of
+	 * Write Ahead Logging.
+	 */
+	RelationTruncate(rel, new_rel_pages);
+
+	/*
+	 * We can release the exclusive lock as soon as we have truncated.
+	 * Other backends can't safely access the relation until they have
+	 * processed the smgr invalidation that smgrtruncate sent out ... but
+	 * that should happen as part of standard invalidation processing once
+	 * they acquire lock on the relation.
+	 */
+	UnlockRelation(rel, AccessExclusiveLock);
+
+	ereport(elevel,
+			(errmsg("\"%s\": truncated %u to %u pages",
+					RelationGetRelationName(rel),
+					old_rel_pages, new_rel_pages),
+			 errdetail_internal("%s", pg_rusage_show(&ru0))));
+}
+
+
 static bool
 cstore_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno,
 							   BufferAccessStrategy bstrategy)
@@ -853,7 +988,7 @@ static const TableAmRoutine cstore_am_methods = {
 	.relation_nontransactional_truncate = cstore_relation_nontransactional_truncate,
 	.relation_copy_data = cstore_relation_copy_data,
 	.relation_copy_for_cluster = cstore_relation_copy_for_cluster,
-	.relation_vacuum = heap_vacuum_rel,
+	.relation_vacuum = cstore_vacuum_rel,
 	.scan_analyze_next_block = cstore_scan_analyze_next_block,
 	.scan_analyze_next_tuple = cstore_scan_analyze_next_tuple,
 	.index_build_range_scan = cstore_index_build_range_scan,
diff --git a/expected/am_vacuum.out b/expected/am_vacuum.out
index dbeddca2b..7a1ff2777 100644
--- a/expected/am_vacuum.out
+++ b/expected/am_vacuum.out
@@ -95,6 +95,58 @@ SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files;
         1
 (1 row)
 
+-- do this in a transaction so concurrent autovacuum doesn't interfere with results
+BEGIN;
+SAVEPOINT s1;
+SELECT count(*) FROM t;
+ count 
+-------
+  2530
+(1 row)
+
+SELECT pg_size_pretty(pg_relation_size('t'));
+ pg_size_pretty 
+----------------
+ 16 kB
+(1 row)
+
+INSERT INTO t SELECT i FROM generate_series(1, 10000) i;
+SELECT pg_size_pretty(pg_relation_size('t'));
+ pg_size_pretty 
+----------------
+ 56 kB
+(1 row)
+
+SELECT count(*) FROM t;
+ count 
+-------
+ 12530
+(1 row)
+
+ROLLBACK TO SAVEPOINT s1;
+-- not truncated by VACUUM or autovacuum yet (being in transaction ensures this),
+-- so relation size should be same as before.
+SELECT pg_size_pretty(pg_relation_size('t'));
+ pg_size_pretty 
+----------------
+ 56 kB
+(1 row)
+
+COMMIT;
+-- vacuum should truncate the relation to the usable space
+VACUUM t;
+SELECT pg_size_pretty(pg_relation_size('t'));
+ pg_size_pretty 
+----------------
+ 16 kB
+(1 row)
+
+SELECT count(*) FROM t;
+ count 
+-------
+  2530
+(1 row)
+
 DROP TABLE t;
 -- Make sure we cleaned the metadata for t too
 SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files;
diff --git a/sql/am_vacuum.sql b/sql/am_vacuum.sql
index 8cb70167d..10d1c7f6c 100644
--- a/sql/am_vacuum.sql
+++ b/sql/am_vacuum.sql
@@ -41,6 +41,26 @@ SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cs
 -- Make sure we cleaned-up the transient table metadata after VACUUM FULL commands
 SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files;
 
+-- do this in a transaction so concurrent autovacuum doesn't interfere with results
+BEGIN;
+SAVEPOINT s1;
+SELECT count(*) FROM t;
+SELECT pg_size_pretty(pg_relation_size('t'));
+INSERT INTO t SELECT i FROM generate_series(1, 10000) i;
+SELECT pg_size_pretty(pg_relation_size('t'));
+SELECT count(*) FROM t;
+ROLLBACK TO SAVEPOINT s1;
+
+-- not truncated by VACUUM or autovacuum yet (being in transaction ensures this),
+-- so relation size should be same as before.
+SELECT pg_size_pretty(pg_relation_size('t'));
+COMMIT;
+
+-- vacuum should truncate the relation to the usable space
+VACUUM t;
+SELECT pg_size_pretty(pg_relation_size('t'));
+SELECT count(*) FROM t;
+
 DROP TABLE t;
 
 -- Make sure we cleaned the metadata for t too