From 94f30a04284d7170c21071809a3ad717dc910d89 Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Tue, 1 Jun 2021 02:19:55 +0300
Subject: [PATCH 1/7] Refactor index check in ColumnarProcessUtility

---
 src/backend/columnar/columnar_tableam.c  | 24 ++++++++++++------------
 src/backend/distributed/commands/index.c |  3 +--
 src/include/distributed/commands.h       |  1 +
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/backend/columnar/columnar_tableam.c b/src/backend/columnar/columnar_tableam.c
index 9977a66a8..e6cf2c0e3 100644
--- a/src/backend/columnar/columnar_tableam.c
+++ b/src/backend/columnar/columnar_tableam.c
@@ -1348,24 +1348,24 @@ ColumnarProcessUtility(PlannedStmt *pstmt,
 	{
 		IndexStmt *indexStmt = (IndexStmt *) parsetree;
 
-		/*
-		 * We should reject CREATE INDEX CONCURRENTLY before DefineIndex() is
-		 * called. Erroring in callbacks called from DefineIndex() will create
-		 * the index and mark it as INVALID, which will cause segfault during
-		 * inserts.
-		 */
-		if (indexStmt->concurrent)
+		Relation rel = relation_openrv(indexStmt->relation,
+									   GetCreateIndexRelationLockMode(indexStmt));
+		if (rel->rd_tableam == GetColumnarTableAmRoutine())
 		{
-			Relation rel = relation_openrv(indexStmt->relation,
-										   ShareUpdateExclusiveLock);
-			if (rel->rd_tableam == GetColumnarTableAmRoutine())
+			/*
+			 * We should reject CREATE INDEX CONCURRENTLY before DefineIndex() is
+			 * called. Erroring in callbacks called from DefineIndex() will create
+			 * the index and mark it as INVALID, which will cause segfault during
+			 * inserts.
+			 */
+			if (indexStmt->concurrent)
 			{
 				ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 								errmsg("indexes not supported for columnar tables")));
 			}
-
-			RelationClose(rel);
 		}
+
+		RelationClose(rel);
 	}
 
 	PrevProcessUtilityHook(pstmt, queryString, context,
diff --git a/src/backend/distributed/commands/index.c b/src/backend/distributed/commands/index.c
index 1f66e512b..ee553de1e 100644
--- a/src/backend/distributed/commands/index.c
+++ b/src/backend/distributed/commands/index.c
@@ -62,7 +62,6 @@ static List * GenerateIndexParameters(IndexStmt *createIndexStatement);
 static DDLJob * GenerateCreateIndexDDLJob(IndexStmt *createIndexStatement,
 										  const char *createIndexCommand);
 static Oid CreateIndexStmtGetRelationId(IndexStmt *createIndexStatement);
-static LOCKMODE GetCreateIndexRelationLockMode(IndexStmt *createIndexStatement);
 static List * CreateIndexTaskList(IndexStmt *indexStmt);
 static List * CreateReindexTaskList(Oid relationId, ReindexStmt *reindexStmt);
 static void RangeVarCallbackForDropIndex(const RangeVar *rel, Oid relOid, Oid oldRelOid,
@@ -502,7 +501,7 @@ CreateIndexStmtGetRelationId(IndexStmt *createIndexStatement)
  * GetCreateIndexRelationLockMode returns required lock mode to open the
  * relation that given CREATE INDEX command operates on.
  */
-static LOCKMODE
+LOCKMODE
 GetCreateIndexRelationLockMode(IndexStmt *createIndexStatement)
 {
 	if (createIndexStatement->concurrent)
diff --git a/src/include/distributed/commands.h b/src/include/distributed/commands.h
index 4810e5915..a5070f2ad 100644
--- a/src/include/distributed/commands.h
+++ b/src/include/distributed/commands.h
@@ -266,6 +266,7 @@ extern char * ChooseIndexName(const char *tabname, Oid namespaceId,
 							  bool primary, bool isconstraint);
 extern char * ChooseIndexNameAddition(List *colnames);
 extern List * ChooseIndexColumnNames(List *indexElems);
+extern LOCKMODE GetCreateIndexRelationLockMode(IndexStmt *createIndexStatement);
 extern List * PreprocessReindexStmt(Node *ReindexStatement,
 									const char *ReindexCommand,
 									ProcessUtilityContext processUtilityContext);

From 10a762aa88be2188bc3fde61635af2b5e29a0f02 Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Thu, 15 Apr 2021 01:40:05 +0300
Subject: [PATCH 2/7] Implement columnar index support functions

---
 src/backend/columnar/columnar_metadata.c      |  62 +++-
 src/backend/columnar/columnar_reader.c        | 107 ++++++
 src/backend/columnar/columnar_tableam.c       | 262 +++++++++++++-
 src/include/columnar/columnar.h               |   5 +
 src/test/regress/expected/columnar_alter.out  |  21 +-
 src/test/regress/expected/columnar_create.out |   2 -
 .../regress/expected/columnar_indexes.out     | 334 +++++++++++++++++-
 src/test/regress/sql/columnar_alter.sql       |  14 +-
 src/test/regress/sql/columnar_create.sql      |   1 -
 src/test/regress/sql/columnar_indexes.sql     | 260 +++++++++++++-
 10 files changed, 1029 insertions(+), 39 deletions(-)

diff --git a/src/backend/columnar/columnar_metadata.c b/src/backend/columnar/columnar_metadata.c
index e44bccc92..af62227a6 100644
--- a/src/backend/columnar/columnar_metadata.c
+++ b/src/backend/columnar/columnar_metadata.c
@@ -12,7 +12,7 @@
  *     min/max values (used for Chunk Group Filtering)
  *   * useful for fast VACUUM operations (e.g. reporting with VACUUM VERBOSE)
  *   * useful for stats/costing
- *   * TODO: maps logical row numbers to stripe IDs
+ *   * maps logical row numbers to stripe IDs
  *   * TODO: visibility information
  *
  *-------------------------------------------------------------------------
@@ -77,6 +77,7 @@ static uint32 * ReadChunkGroupRowCounts(uint64 storageId, uint64 stripe, uint32
 static Oid ColumnarStorageIdSequenceRelationId(void);
 static Oid ColumnarStripeRelationId(void);
 static Oid ColumnarStripePKeyIndexRelationId(void);
+static Oid ColumnarStripeFirstRowNumberIndexRelationId(void);
 static Oid ColumnarOptionsRelationId(void);
 static Oid ColumnarOptionsIndexRegclass(void);
 static Oid ColumnarChunkRelationId(void);
@@ -620,6 +621,53 @@ ReadStripeSkipList(RelFileNode relfilenode, uint64 stripe, TupleDesc tupleDescri
 }
 
 
+/*
+ * FindStripeByRowNumber returns StripeMetadata for the stripe that has the
+ * row with rowNumber by doing backward index scan on
+ * stripe_first_row_number_idx. If no such row exists, then returns NULL.
+ */
+StripeMetadata *
+FindStripeByRowNumber(Relation relation, uint64 rowNumber, Snapshot snapshot)
+{
+	StripeMetadata *foundStripeMetadata = NULL;
+
+	uint64 storageId = ColumnarStorageGetStorageId(relation, false);
+	ScanKeyData scanKey[2];
+	ScanKeyInit(&scanKey[0], Anum_columnar_stripe_storageid,
+				BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(storageId));
+	ScanKeyInit(&scanKey[1], Anum_columnar_stripe_first_row_number,
+				BTLessEqualStrategyNumber, F_INT8LE, UInt64GetDatum(rowNumber));
+
+	Relation columnarStripes = table_open(ColumnarStripeRelationId(), AccessShareLock);
+	Relation index = index_open(ColumnarStripeFirstRowNumberIndexRelationId(),
+								AccessShareLock);
+	SysScanDesc scanDescriptor = systable_beginscan_ordered(columnarStripes, index,
+															snapshot, 2,
+															scanKey);
+
+	HeapTuple heapTuple = systable_getnext_ordered(scanDescriptor, BackwardScanDirection);
+	if (HeapTupleIsValid(heapTuple))
+	{
+		TupleDesc tupleDescriptor = RelationGetDescr(columnarStripes);
+		Datum datumArray[Natts_columnar_stripe];
+		bool isNullArray[Natts_columnar_stripe];
+		heap_deform_tuple(heapTuple, tupleDescriptor, datumArray, isNullArray);
+
+		StripeMetadata *stripeMetadata = BuildStripeMetadata(datumArray);
+		if (rowNumber < stripeMetadata->firstRowNumber + stripeMetadata->rowCount)
+		{
+			foundStripeMetadata = stripeMetadata;
+		}
+	}
+
+	systable_endscan_ordered(scanDescriptor);
+	index_close(index, AccessShareLock);
+	table_close(columnarStripes, AccessShareLock);
+
+	return foundStripeMetadata;
+}
+
+
 /*
  * ReadChunkGroupRowCounts returns an array of row counts of chunk groups for the
  * given stripe.
@@ -1153,6 +1201,18 @@ ColumnarStripePKeyIndexRelationId(void)
 }
 
 
+/*
+ * ColumnarStripeFirstRowNumberIndexRelationId returns relation id of
+ * columnar.stripe_first_row_number_idx.
+ * TODO: should we cache this similar to citus?
+ */
+static Oid
+ColumnarStripeFirstRowNumberIndexRelationId(void)
+{
+	return get_relname_relid("stripe_first_row_number_idx", ColumnarNamespaceId());
+}
+
+
 /*
  * ColumnarOptionsRelationId returns relation id of columnar.options.
  */
diff --git a/src/backend/columnar/columnar_reader.c b/src/backend/columnar/columnar_reader.c
index 64029a5c6..0f1a1767f 100644
--- a/src/backend/columnar/columnar_reader.c
+++ b/src/backend/columnar/columnar_reader.c
@@ -35,6 +35,7 @@
 
 #include "columnar/columnar.h"
 #include "columnar/columnar_storage.h"
+#include "columnar/columnar_tableam.h"
 #include "columnar/columnar_version_compat.h"
 
 typedef struct ChunkGroupReadState
@@ -85,6 +86,14 @@ struct ColumnarReadState
 
 /* static function declarations */
 static MemoryContext CreateStripeReadMemoryContext(void);
+static void ReadStripeRowByRowNumber(StripeReadState *stripeReadState,
+									 StripeMetadata *stripeMetadata,
+									 uint64 rowNumber, Datum *columnValues,
+									 bool *columnNulls);
+static void ReadChunkGroupRowByRowOffset(ChunkGroupReadState *chunkGroupReadState,
+										 StripeMetadata *stripeMetadata,
+										 uint64 stripeRowOffset, Datum *columnValues,
+										 bool *columnNulls);
 static bool StripeReadInProgress(ColumnarReadState *readState);
 static bool HasUnreadStripe(ColumnarReadState *readState);
 static StripeReadState * BeginStripeRead(StripeMetadata *stripeMetadata, Relation rel,
@@ -243,6 +252,104 @@ ColumnarReadNextRow(ColumnarReadState *readState, Datum *columnValues, bool *col
 }
 
 
+/*
+ * ColumnarReadRowByRowNumber reads row with rowNumber from given relation
+ * into columnValues and columnNulls, and returns true. If no such row
+ * exists, then returns false.
+ */
+bool
+ColumnarReadRowByRowNumber(Relation relation, uint64 rowNumber,
+						   List *neededColumnList, Datum *columnValues,
+						   bool *columnNulls, Snapshot snapshot)
+{
+	StripeMetadata *stripeMetadata = FindStripeByRowNumber(relation, rowNumber, snapshot);
+	if (stripeMetadata == NULL)
+	{
+		/* no such row exists */
+		return false;
+	}
+
+	TupleDesc relationTupleDesc = RelationGetDescr(relation);
+	List *whereClauseList = NIL;
+	List *whereClauseVars = NIL;
+	MemoryContext stripeReadContext = CreateStripeReadMemoryContext();
+	StripeReadState *stripeReadState = BeginStripeRead(stripeMetadata,
+													   relation,
+													   relationTupleDesc,
+													   neededColumnList,
+													   whereClauseList,
+													   whereClauseVars,
+													   stripeReadContext);
+
+	ReadStripeRowByRowNumber(stripeReadState, stripeMetadata, rowNumber,
+							 columnValues, columnNulls);
+
+	EndStripeRead(stripeReadState);
+	MemoryContextReset(stripeReadContext);
+
+	return true;
+}
+
+
+/*
+ * ReadStripeRowByRowNumber reads row with rowNumber from given
+ * stripeReadState into columnValues and columnNulls.
+ * Errors out if no such row exists in the stripe being read.
+ */
+static void
+ReadStripeRowByRowNumber(StripeReadState *stripeReadState,
+						 StripeMetadata *stripeMetadata,
+						 uint64 rowNumber, Datum *columnValues,
+						 bool *columnNulls)
+{
+	if (rowNumber < stripeMetadata->firstRowNumber)
+	{
+		/* not expected but be on the safe side */
+		ereport(ERROR, (errmsg("row offset cannot be negative")));
+	}
+
+	/* find the exact chunk group to be read */
+	uint64 stripeRowOffset = rowNumber - stripeMetadata->firstRowNumber;
+	stripeReadState->chunkGroupIndex = stripeRowOffset /
+									   stripeMetadata->chunkGroupRowCount;
+	stripeReadState->chunkGroupReadState = BeginChunkGroupRead(
+		stripeReadState->stripeBuffers,
+		stripeReadState->chunkGroupIndex,
+		stripeReadState->tupleDescriptor,
+		stripeReadState->projectedColumnList,
+		stripeReadState->stripeReadContext);
+
+	ReadChunkGroupRowByRowOffset(stripeReadState->chunkGroupReadState,
+								 stripeMetadata, stripeRowOffset,
+								 columnValues, columnNulls);
+
+	EndChunkGroupRead(stripeReadState->chunkGroupReadState);
+	stripeReadState->chunkGroupReadState = NULL;
+}
+
+
+/*
+ * ReadChunkGroupRowByRowOffset reads row with stripeRowOffset from given
+ * chunkGroupReadState into columnValues and columnNulls.
+ * Errors out if no such row exists in the chunk group being read.
+ */
+static void
+ReadChunkGroupRowByRowOffset(ChunkGroupReadState *chunkGroupReadState,
+							 StripeMetadata *stripeMetadata,
+							 uint64 stripeRowOffset, Datum *columnValues,
+							 bool *columnNulls)
+{
+	/* set the exact row number to be read from given chunk roup */
+	chunkGroupReadState->currentRow = stripeRowOffset %
+									  stripeMetadata->chunkGroupRowCount;
+	if (!ReadChunkGroupNextRow(chunkGroupReadState, columnValues, columnNulls))
+	{
+		/* not expected but be on the safe side */
+		ereport(ERROR, (errmsg("could not find the row in stripe")));
+	}
+}
+
+
 /*
  * StripeReadInProgress returns true if we already started reading a stripe.
  */
diff --git a/src/backend/columnar/columnar_tableam.c b/src/backend/columnar/columnar_tableam.c
index e6cf2c0e3..944032b84 100644
--- a/src/backend/columnar/columnar_tableam.c
+++ b/src/backend/columnar/columnar_tableam.c
@@ -56,6 +56,7 @@
 #include "columnar/columnar_version_compat.h"
 #include "distributed/commands.h"
 #include "distributed/commands/utility_hook.h"
+#include "distributed/listutils.h"
 #include "distributed/metadata_cache.h"
 
 /*
@@ -111,7 +112,14 @@ static HeapTuple ColumnarSlotCopyHeapTuple(TupleTableSlot *slot);
 static void ColumnarCheckLogicalReplication(Relation rel);
 static Datum * detoast_values(TupleDesc tupleDesc, Datum *orig_values, bool *isnull);
 static ItemPointerData row_number_to_tid(uint64 rowNumber);
+static uint64 tid_to_row_number(ItemPointerData tid);
 static void ErrorIfInvalidRowNumber(uint64 rowNumber);
+static double ColumnarReadRowsIntoIndex(TableScanDesc scan,
+										Relation indexRelation,
+										IndexInfo *indexInfo,
+										IndexBuildCallback indexCallback,
+										void *indexCallbackState,
+										EState *estate, ExprState *predicate);
 
 /* Custom tuple slot ops used for columnar. Initialized in columnar_tableam_init(). */
 static TupleTableSlotOps TTSOpsColumnar;
@@ -294,6 +302,21 @@ row_number_to_tid(uint64 rowNumber)
 }
 
 
+/*
+ * tid_to_row_number maps given ItemPointerData to rowNumber.
+ */
+static uint64
+tid_to_row_number(ItemPointerData tid)
+{
+	uint64 rowNumber = ItemPointerGetBlockNumber(&tid) * VALID_ITEMPOINTER_OFFSETS +
+					   ItemPointerGetOffsetNumber(&tid) - FirstOffsetNumber;
+
+	ErrorIfInvalidRowNumber(rowNumber);
+
+	return rowNumber;
+}
+
+
 /*
  * ErrorIfInvalidRowNumber errors out if given rowNumber is invalid.
  */
@@ -341,24 +364,34 @@ columnar_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
 static IndexFetchTableData *
 columnar_index_fetch_begin(Relation rel)
 {
-	ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					errmsg("indexes not supported for columnar tables")));
+	Oid relfilenode = rel->rd_node.relNode;
+	if (PendingWritesInUpperTransactions(relfilenode, GetCurrentSubTransactionId()))
+	{
+		/* XXX: maybe we can just flush the data and continue */
+		elog(ERROR, "cannot read from index when there is unflushed data in "
+					"upper transactions");
+	}
+
+	FlushWriteStateForRelfilenode(relfilenode, GetCurrentSubTransactionId());
+
+	IndexFetchTableData *scan = palloc0(sizeof(IndexFetchTableData));
+	scan->rel = rel;
+	return scan;
 }
 
 
 static void
 columnar_index_fetch_reset(IndexFetchTableData *scan)
 {
-	ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					errmsg("indexes not supported for columnar tables")));
+	/* no-op */
 }
 
 
 static void
 columnar_index_fetch_end(IndexFetchTableData *scan)
 {
-	ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					errmsg("indexes not supported for columnar tables")));
+	columnar_index_fetch_reset(scan);
+	pfree(scan);
 }
 
 
@@ -369,8 +402,37 @@ columnar_index_fetch_tuple(struct IndexFetchTableData *scan,
 						   TupleTableSlot *slot,
 						   bool *call_again, bool *all_dead)
 {
-	ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					errmsg("indexes not supported for columnar tables")));
+	/* no HOT chains are possible in columnar, directly set it to false */
+	*call_again = false;
+
+	/*
+	 * No dead tuples are possible in columnar, set it to false if it's
+	 * passed to be non-NULL.
+	 */
+	if (all_dead)
+	{
+		*all_dead = false;
+	}
+
+	ExecClearTuple(slot);
+
+	/* we need all columns */
+	int natts = scan->rel->rd_att->natts;
+	Bitmapset *attr_needed = bms_add_range(NULL, 0, natts - 1);
+	TupleDesc relationTupleDesc = RelationGetDescr(scan->rel);
+	List *relationColumnList = NeededColumnsList(relationTupleDesc, attr_needed);
+	uint64 rowNumber = tid_to_row_number(*tid);
+	if (!ColumnarReadRowByRowNumber(scan->rel, rowNumber, relationColumnList,
+									slot->tts_values, slot->tts_isnull, snapshot))
+	{
+		return false;
+	}
+
+	slot->tts_tableOid = RelationGetRelid(scan->rel);
+	slot->tts_tid = *tid;
+	ExecStoreVirtualTuple(slot);
+
+	return true;
 }
 
 
@@ -1003,7 +1065,7 @@ columnar_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
 
 
 static double
-columnar_index_build_range_scan(Relation heapRelation,
+columnar_index_build_range_scan(Relation columnarRelation,
 								Relation indexRelation,
 								IndexInfo *indexInfo,
 								bool allow_sync,
@@ -1015,8 +1077,165 @@ columnar_index_build_range_scan(Relation heapRelation,
 								void *callback_state,
 								TableScanDesc scan)
 {
-	ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					errmsg("indexes not supported for columnar tables")));
+	/*
+	 * TODO: Should this function call pgstat_progress_update_param in
+	 * somewhere as heapam_index_build_range_scan ?
+	 */
+
+	if (start_blockno != 0 || numblocks != InvalidBlockNumber)
+	{
+		/*
+		 * Columnar utility hook already errors out for BRIN indexes on columnar
+		 * tables, but be on the safe side.
+		 */
+		ereport(ERROR, (errmsg("BRIN indexes on columnar tables are not supported")));
+	}
+
+	if (indexInfo->ii_Concurrent)
+	{
+		/* we already don't allow CONCURRENTLY syntax but be on the safe side */
+		ereport(ERROR, (errmsg("concurrent index builds are not supported "
+							   "for columnar tables")));
+	}
+
+	if (scan)
+	{
+		/*
+		 * Since we don't support parallel reads on columnar tables, we
+		 * should have already errored out for that, but be on the safe side.
+		 */
+		ereport(ERROR, (errmsg("parallel reads on columnar are not supported")));
+	}
+
+	/*
+	 * In a normal index build, we use SnapshotAny to retrieve all tuples. In
+	 * a concurrent build or during bootstrap, we take a regular MVCC snapshot
+	 * and index whatever's live according to that.
+	 */
+	TransactionId OldestXmin = InvalidTransactionId;
+
+	/*
+	 * We already don't allow concurrent index builds so ii_Concurrent
+	 * will always be false, but let's keep the code close to heapAM.
+	 */
+	if (!IsBootstrapProcessingMode() && !indexInfo->ii_Concurrent)
+	{
+		/* ignore lazy VACUUM's */
+		OldestXmin = GetOldestXmin(columnarRelation, PROCARRAY_FLAGS_VACUUM);
+	}
+
+	Snapshot snapshot = { 0 };
+	bool snapshotRegisteredByUs = false;
+	if (!scan)
+	{
+		/*
+		 * For serial index build, we begin our own scan. We may also need to
+		 * register a snapshot whose lifetime is under our direct control.
+		 */
+		if (!TransactionIdIsValid(OldestXmin))
+		{
+			snapshot = RegisterSnapshot(GetTransactionSnapshot());
+			snapshotRegisteredByUs = true;
+		}
+		else
+		{
+			snapshot = SnapshotAny;
+		}
+
+		int nkeys = 0;
+		ScanKeyData *scanKey = NULL;
+		bool allowAccessStrategy = true;
+		scan = table_beginscan_strat(columnarRelation, snapshot, nkeys, scanKey,
+									 allowAccessStrategy, allow_sync);
+	}
+	else
+	{
+		/*
+		 * For parallel index build, we don't register/unregister own snapshot
+		 * since snapshot is taken from parallel scan. Note that even if we
+		 * don't support parallel index builds, we still continue building the
+		 * index via the main backend and we should still rely on the snapshot
+		 * provided by parallel scan.
+		 */
+		snapshot = scan->rs_snapshot;
+	}
+
+	/*
+	 * Set up execution state for predicate, if any.
+	 * Note that this is only useful for partial indexes.
+	 */
+	EState *estate = CreateExecutorState();
+	ExprContext *econtext = GetPerTupleExprContext(estate);
+	econtext->ecxt_scantuple = table_slot_create(columnarRelation, NULL);
+	ExprState *predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
+
+	double reltuples = ColumnarReadRowsIntoIndex(scan, indexRelation, indexInfo,
+												 callback, callback_state, estate,
+												 predicate);
+	table_endscan(scan);
+
+	if (snapshotRegisteredByUs)
+	{
+		UnregisterSnapshot(snapshot);
+	}
+
+	ExecDropSingleTupleTableSlot(econtext->ecxt_scantuple);
+	FreeExecutorState(estate);
+	indexInfo->ii_ExpressionsState = NIL;
+	indexInfo->ii_PredicateState = NULL;
+
+	return reltuples;
+}
+
+
+/*
+ * ColumnarReadRowsIntoIndex builds indexRelation tuples by reading the
+ * actual relation based on given "scan" and returns number of tuples
+ * scanned to build the indexRelation.
+ */
+static double
+ColumnarReadRowsIntoIndex(TableScanDesc scan, Relation indexRelation,
+						  IndexInfo *indexInfo, IndexBuildCallback indexCallback,
+						  void *indexCallbackState, EState *estate, ExprState *predicate)
+{
+	double reltuples = 0;
+
+	ExprContext *econtext = GetPerTupleExprContext(estate);
+	TupleTableSlot *slot = econtext->ecxt_scantuple;
+	while (columnar_getnextslot(scan, ForwardScanDirection, slot))
+	{
+		CHECK_FOR_INTERRUPTS();
+
+		MemoryContextReset(econtext->ecxt_per_tuple_memory);
+
+		if (predicate != NULL && !ExecQual(predicate, econtext))
+		{
+			/* for partial indexes, discard tuples that don't satisfy the predicate */
+			continue;
+		}
+
+		Datum indexValues[INDEX_MAX_KEYS];
+		bool indexNulls[INDEX_MAX_KEYS];
+		FormIndexDatum(indexInfo, slot, estate, indexValues, indexNulls);
+
+		ItemPointerData itemPointerData = slot->tts_tid;
+
+		/* currently, columnar tables can't have dead tuples */
+		bool tupleIsAlive = true;
+#if PG_VERSION_NUM >= PG_VERSION_13
+		indexCallback(indexRelation, &itemPointerData, indexValues, indexNulls,
+					  tupleIsAlive, indexCallbackState);
+#else
+		HeapTuple scanTuple = ExecCopySlotHeapTuple(slot);
+		scanTuple->t_self = itemPointerData;
+		indexCallback(indexRelation, scanTuple, indexValues, indexNulls,
+					  tupleIsAlive, indexCallbackState);
+#endif
+
+		reltuples++;
+	}
+
+	return reltuples;
 }
 
 
@@ -1027,8 +1246,15 @@ columnar_index_validate_scan(Relation heapRelation,
 							 Snapshot snapshot,
 							 ValidateIndexState *state)
 {
+	/*
+	 * This is only called for concurrent index builds,
+	 * see table_index_validate_scan.
+	 * Note that we already error out for concurrent index
+	 * builds in utility hook but be on the safe side.
+	 */
 	ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					errmsg("indexes not supported for columnar tables")));
+					errmsg("concurrent index builds are not supported for "
+						   "columnar tables")));
 }
 
 
@@ -1361,7 +1587,17 @@ ColumnarProcessUtility(PlannedStmt *pstmt,
 			if (indexStmt->concurrent)
 			{
 				ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-								errmsg("indexes not supported for columnar tables")));
+								errmsg("concurrent index commands are not "
+									   "supported for columnar tables")));
+			}
+
+			/* for now, we don't support index access methods other than btree & hash */
+			if (strncmp(indexStmt->accessMethod, "btree", NAMEDATALEN) != 0 &&
+				strncmp(indexStmt->accessMethod, "hash", NAMEDATALEN) != 0)
+			{
+				ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+								errmsg("only btree and hash indexes are supported on "
+									   "columnar tables ")));
 			}
 		}
 
diff --git a/src/include/columnar/columnar.h b/src/include/columnar/columnar.h
index 2689d5947..8288a20aa 100644
--- a/src/include/columnar/columnar.h
+++ b/src/include/columnar/columnar.h
@@ -216,6 +216,9 @@ extern ColumnarReadState * ColumnarBeginRead(Relation relation,
 extern bool ColumnarReadNextRow(ColumnarReadState *state, Datum *columnValues,
 								bool *columnNulls, uint64 *rowNumber);
 extern void ColumnarRescan(ColumnarReadState *readState);
+extern bool ColumnarReadRowByRowNumber(Relation relation, uint64 rowNumber,
+									   List *neededColumnList, Datum *columnValues,
+									   bool *columnNulls, Snapshot snapshot);
 extern void ColumnarEndRead(ColumnarReadState *state);
 extern int64 ColumnarReadChunkGroupsFiltered(ColumnarReadState *state);
 
@@ -251,6 +254,8 @@ extern void SaveChunkGroups(RelFileNode relfilenode, uint64 stripe,
 extern StripeSkipList * ReadStripeSkipList(RelFileNode relfilenode, uint64 stripe,
 										   TupleDesc tupleDescriptor,
 										   uint32 chunkCount);
+extern StripeMetadata * FindStripeByRowNumber(Relation relation, uint64 rowNumber,
+											  Snapshot snapshot);
 extern Datum columnar_relation_storageid(PG_FUNCTION_ARGS);
 
 
diff --git a/src/test/regress/expected/columnar_alter.out b/src/test/regress/expected/columnar_alter.out
index 2bb229786..4706fa386 100644
--- a/src/test/regress/expected/columnar_alter.out
+++ b/src/test/regress/expected/columnar_alter.out
@@ -255,7 +255,6 @@ insert into atacc1 values(1);
 alter table atacc1
   add column b float8 not null default random(),
   add primary key(a);
-ERROR:  indexes not supported for columnar tables
 -- Add a generate column with an expression value
 create table test_gen_ex (x int) using columnar;
 INSERT INTO test_gen_ex VALUES (1), (2), (3);
@@ -390,30 +389,30 @@ SELECT * FROM products ORDER BY 1;
           3 | pen     |     2
 (3 rows)
 
--- Add a UNIQUE constraint (should fail)
-CREATE TABLE products_fail (
+-- Add a UNIQUE constraint
+CREATE TABLE products_unique (
     product_no integer UNIQUE,
     name text,
     price numeric
 ) USING columnar;
-ERROR:  indexes not supported for columnar tables
 ALTER TABLE products ADD COLUMN store_id text UNIQUE;
-ERROR:  indexes not supported for columnar tables
--- Add a PRIMARY KEY constraint (should fail)
-CREATE TABLE products_fail (
+-- Add a PRIMARY KEY constraint
+CREATE TABLE products_primary (
     product_no integer PRIMARY KEY,
     name text,
     price numeric
 ) USING columnar;
-ERROR:  indexes not supported for columnar tables
-ALTER TABLE products ADD COLUMN store_id text PRIMARY KEY;
-ERROR:  indexes not supported for columnar tables
+BEGIN;
+  ALTER TABLE products DROP COLUMN store_id;
+  ALTER TABLE products ADD COLUMN store_id text PRIMARY KEY;
+ERROR:  column "store_id" contains null values
+ROLLBACK;
 -- Add an EXCLUSION constraint (should fail)
 CREATE TABLE circles (
     c circle,
     EXCLUDE USING gist (c WITH &&)
 ) USING columnar;
-ERROR:  indexes not supported for columnar tables
+ERROR:  only btree and hash indexes are supported on columnar tables
 -- Row level security
 CREATE TABLE public.row_level_security_col (id int, pgUser CHARACTER VARYING) USING columnar;
 CREATE USER user1;
diff --git a/src/test/regress/expected/columnar_create.out b/src/test/regress/expected/columnar_create.out
index 772d6ac64..704c0d932 100644
--- a/src/test/regress/expected/columnar_create.out
+++ b/src/test/regress/expected/columnar_create.out
@@ -11,9 +11,7 @@ SELECT alter_columnar_table_set('contestant', compression => 'none');
 
 (1 row)
 
--- should fail
 CREATE INDEX contestant_idx on contestant(handle);
-ERROR:  indexes not supported for columnar tables
 -- Create zstd compressed table
 CREATE TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT,
 	percentile FLOAT, country CHAR(3), achievements TEXT[])
diff --git a/src/test/regress/expected/columnar_indexes.out b/src/test/regress/expected/columnar_indexes.out
index bd1c41802..6062cae34 100644
--- a/src/test/regress/expected/columnar_indexes.out
+++ b/src/test/regress/expected/columnar_indexes.out
@@ -10,7 +10,7 @@ SET search_path tO columnar_indexes, public;
 --
 create table t(a int, b int) using columnar;
 create index CONCURRENTLY t_idx on t(a, b);
-ERROR:  indexes not supported for columnar tables
+ERROR:  concurrent index commands are not supported for columnar tables
 \d t
             Table "columnar_indexes.t"
  Column |  Type   | Collation | Nullable | Default
@@ -32,16 +32,15 @@ SELECT * FROM t;
  1 | 2
 (1 row)
 
--- create index without the concurrent option. We should
--- error out during index creation.
 create index t_idx on t(a, b);
-ERROR:  indexes not supported for columnar tables
 \d t
             Table "columnar_indexes.t"
  Column |  Type   | Collation | Nullable | Default
 ---------------------------------------------------------------------
  a      | integer |           |          |
  b      | integer |           |          |
+Indexes:
+    "t_idx" btree (a, b)
 
 explain insert into t values (1, 2);
                    QUERY PLAN
@@ -58,5 +57,332 @@ SELECT * FROM t;
  3 | 4
 (2 rows)
 
+-- make sure that we test index scan
+set columnar.enable_custom_scan to 'off';
+set enable_seqscan to off;
+CREATE table columnar_table (a INT, b int) USING columnar;
+INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(0, 16000) i;
+-- unique --
+BEGIN;
+  INSERT INTO columnar_table VALUES (100000000);
+  SAVEPOINT s1;
+  -- errors out due to unflushed data in upper transaction
+  CREATE UNIQUE INDEX ON columnar_table (a);
+ERROR:  cannot read from table when there is unflushed data in upper transactions
+ROLLBACK;
+CREATE UNIQUE INDEX ON columnar_table (a);
+BEGIN;
+  INSERT INTO columnar_table VALUES (16050);
+  SAVEPOINT s1;
+  -- index scan errors out due to unflushed data in upper transaction
+  SELECT a FROM columnar_table WHERE a = 16050;
+ERROR:  cannot read from index when there is unflushed data in upper transactions
+ROLLBACK;
+EXPLAIN (COSTS OFF) SELECT * FROM columnar_table WHERE a=6456;
+                       QUERY PLAN
+---------------------------------------------------------------------
+ Index Scan using columnar_table_a_idx on columnar_table
+   Index Cond: (a = 6456)
+(2 rows)
+
+EXPLAIN (COSTS OFF) SELECT a FROM columnar_table WHERE a=6456;
+                          QUERY PLAN
+---------------------------------------------------------------------
+ Index Only Scan using columnar_table_a_idx on columnar_table
+   Index Cond: (a = 6456)
+(2 rows)
+
+SELECT (SELECT a FROM columnar_table WHERE a=6456 limit 1)=6456;
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT (SELECT b FROM columnar_table WHERE a=6456 limit 1)=6456*2;
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+-- even if a=16050 doesn't exist, we try to insert it twice so this should error out
+INSERT INTO columnar_table VALUES (16050), (16050);
+ERROR:  duplicate key value violates unique constraint "columnar_table_a_idx"
+DETAIL:  Key (a)=(16050) already exists.
+-- should work
+INSERT INTO columnar_table VALUES (16050);
+-- check edge cases around stripe boundaries, error out
+INSERT INTO columnar_table VALUES (16050);
+ERROR:  duplicate key value violates unique constraint "columnar_table_a_idx"
+DETAIL:  Key (a)=(16050) already exists.
+INSERT INTO columnar_table VALUES (15999);
+ERROR:  duplicate key value violates unique constraint "columnar_table_a_idx"
+DETAIL:  Key (a)=(15999) already exists.
+DROP INDEX columnar_table_a_idx;
+CREATE TABLE partial_unique_idx_test (a INT, b INT) USING columnar;
+CREATE UNIQUE INDEX ON partial_unique_idx_test (a)
+WHERE b > 500;
+-- should work since b =< 500 and our partial index doesn't check this interval
+INSERT INTO partial_unique_idx_test VALUES (1, 2), (1, 2);
+-- should work since our partial index wouldn't cover the tuples that we inserted above
+INSERT INTO partial_unique_idx_test VALUES (1, 800);
+INSERT INTO partial_unique_idx_test VALUES (4, 600);
+-- should error out due to (4, 600)
+INSERT INTO partial_unique_idx_test VALUES (4, 700);
+ERROR:  duplicate key value violates unique constraint "partial_unique_idx_test_a_idx"
+DETAIL:  Key (a)=(4) already exists.
+-- btree --
+CREATE INDEX ON columnar_table (a);
+SELECT (SELECT SUM(b) FROM columnar_table WHERE a>700 and a<965)=439560;
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+CREATE INDEX ON columnar_table (b)
+WHERE (b > 30000 AND b < 33000);
+-- partial index should be way smaller than the non-partial index
+SELECT pg_total_relation_size('columnar_table_b_idx') * 5 <
+       pg_total_relation_size('columnar_table_a_idx');
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+-- can't use index scan due to partial index boundaries
+EXPLAIN (COSTS OFF) SELECT b FROM columnar_table WHERE b = 30000;
+         QUERY PLAN
+---------------------------------------------------------------------
+ Seq Scan on columnar_table
+   Filter: (b = 30000)
+(2 rows)
+
+-- can use index scan
+EXPLAIN (COSTS OFF) SELECT b FROM columnar_table WHERE b = 30001;
+                          QUERY PLAN
+---------------------------------------------------------------------
+ Index Only Scan using columnar_table_b_idx on columnar_table
+   Index Cond: (b = 30001)
+(2 rows)
+
+-- some more rows
+INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(16000, 17000) i;
+DROP INDEX columnar_table_a_idx;
+TRUNCATE columnar_table;
+-- pkey --
+INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(16000, 16499) i;
+ALTER TABLE columnar_table ADD PRIMARY KEY (a);
+INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(16500, 17000) i;
+BEGIN;
+  INSERT INTO columnar_table (a) SELECT 1;
+ROLLBACK;
+-- should work
+INSERT INTO columnar_table (a) SELECT 1;
+-- error out
+INSERT INTO columnar_table VALUES (16100), (16101);
+ERROR:  duplicate key value violates unique constraint "columnar_table_pkey"
+DETAIL:  Key (a)=(16100) already exists.
+INSERT INTO columnar_table VALUES (16999);
+ERROR:  duplicate key value violates unique constraint "columnar_table_pkey"
+DETAIL:  Key (a)=(16999) already exists.
+BEGIN;
+  REINDEX INDEX columnar_table_pkey;
+  -- should error even after reindex
+  INSERT INTO columnar_table VALUES (16999);
+ERROR:  duplicate key value violates unique constraint "columnar_table_pkey"
+DETAIL:  Key (a)=(16999) already exists.
+ROLLBACK;
+VACUUM FULL columnar_table;
+-- should error even after vacuum
+INSERT INTO columnar_table VALUES (16999);
+ERROR:  duplicate key value violates unique constraint "columnar_table_pkey"
+DETAIL:  Key (a)=(16999) already exists.
+TRUNCATE columnar_table;
+INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(1, 160000) i;
+SELECT (SELECT b FROM columnar_table WHERE a = 150000)=300000;
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+TRUNCATE columnar_table;
+ALTER TABLE columnar_table DROP CONSTRAINT columnar_table_pkey;
+-- hash --
+INSERT INTO columnar_table (a, b) SELECT i*2,i FROM generate_series(1, 8000) i;
+CREATE INDEX hash_idx ON columnar_table USING HASH (b);
+BEGIN;
+  CREATE INDEX hash_idx_fill_factor ON columnar_table USING HASH (b) WITH (fillfactor=10);
+  -- same hash index with lower fillfactor should be way bigger
+  SELECT pg_total_relation_size ('hash_idx_fill_factor') >
+         pg_total_relation_size ('hash_idx') * 5;
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+BEGIN;
+  INSERT INTO columnar_table (a, b) SELECT i*3,i FROM generate_series(1, 8000) i;
+ROLLBACK;
+INSERT INTO columnar_table (a, b) SELECT i*4,i FROM generate_series(1, 8000) i;
+SELECT SUM(a)=42000 FROM columnar_table WHERE b = 7000;
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+BEGIN;
+  REINDEX TABLE columnar_table;
+  SELECT SUM(a)=42000 FROM columnar_table WHERE b = 7000;
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+VACUUM FULL columnar_table;
+SELECT SUM(a)=42000 FROM columnar_table WHERE b = 7000;
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+-- exclusion contraints --
+CREATE TABLE exclusion_test (c1 INT,c2 INT, c3 INT, c4 BOX,
+EXCLUDE USING btree (c1 WITH =) INCLUDE(c3,c4) WHERE (c1 < 10)) USING columnar;
+-- error out since "c1" is "1" for all rows to be inserted
+INSERT INTO exclusion_test SELECT 1, 2, 3*x, BOX('4,4,4,4') FROM generate_series(1,3) AS x;
+ERROR:  conflicting key value violates exclusion constraint "exclusion_test_c1_c3_c4_excl"
+DETAIL:  Key (c1)=(1) conflicts with existing key (c1)=(1).
+BEGIN;
+  INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(1,3) AS x;
+ROLLBACK;
+-- should work
+INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(1,3) AS x;
+INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(10,15) AS x;
+BEGIN;
+  -- should work thanks to "where" clause in exclusion constraint
+  INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(10,15) AS x;
+ROLLBACK;
+REINDEX TABLE exclusion_test;
+-- should still work after reindex
+INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(10,15) AS x;
+-- make sure that we respect INCLUDE syntax --
+CREATE TABLE include_test (a INT, b BIGINT, c BIGINT, d BIGINT) USING columnar;
+INSERT INTO include_test SELECT i, i, i, i FROM generate_series (1, 1000) i;
+CREATE UNIQUE INDEX unique_a ON include_test (a);
+-- cannot use index only scan
+EXPLAIN (COSTS OFF) SELECT b FROM include_test WHERE a = 500;
+                QUERY PLAN
+---------------------------------------------------------------------
+ Index Scan using unique_a on include_test
+   Index Cond: (a = 500)
+(2 rows)
+
+CREATE UNIQUE INDEX unique_a_include_b_c_d ON include_test (a) INCLUDE(b, c, d);
+-- same unique index that includes other columns should be way bigger
+SELECT pg_total_relation_size ('unique_a') * 1.5 <
+       pg_total_relation_size ('unique_a_include_b_c_d');
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+DROP INDEX unique_a;
+-- should use index only scan since unique_a_include_b_c_d includes column "b" too
+EXPLAIN (COSTS OFF) SELECT b FROM include_test WHERE a = 500;
+                          QUERY PLAN
+---------------------------------------------------------------------
+ Index Only Scan using unique_a_include_b_c_d on include_test
+   Index Cond: (a = 500)
+(2 rows)
+
+BEGIN;
+  SET enable_indexonlyscan = OFF;
+  -- show that we respect enable_indexonlyscan GUC
+  EXPLAIN (COSTS OFF) SELECT b FROM include_test WHERE a = 500;
+                       QUERY PLAN
+---------------------------------------------------------------------
+ Index Scan using unique_a_include_b_c_d on include_test
+   Index Cond: (a = 500)
+(2 rows)
+
+ROLLBACK;
+-- make sure that we read the correct value for "b" when doing index only scan
+SELECT b=980 FROM include_test WHERE a = 980;
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+-- some tests with distributed & partitioned tables --
+CREATE TABLE dist_part_table(
+  dist_col INT,
+  part_col TIMESTAMPTZ,
+  col1 TEXT
+) PARTITION BY RANGE (part_col);
+-- create an index before creating a columnar partition
+CREATE INDEX dist_part_table_btree ON dist_part_table (col1);
+-- columnar partition
+CREATE TABLE p0 PARTITION OF dist_part_table
+FOR VALUES FROM ('2020-01-01') TO ('2020-02-01')
+USING columnar;
+SELECT create_distributed_table('dist_part_table', 'dist_col');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+-- columnar partition
+CREATE TABLE p1 PARTITION OF dist_part_table
+FOR VALUES FROM ('2020-02-01') TO ('2020-03-01')
+USING columnar;
+-- row partition
+CREATE TABLE p2 PARTITION OF dist_part_table
+FOR VALUES FROM ('2020-03-01') TO ('2020-04-01');
+INSERT INTO dist_part_table VALUES (1, '2020-03-15', 'str1', POINT(1, 1));
+ERROR:  INSERT has more expressions than target columns
+-- insert into columnar partitions
+INSERT INTO dist_part_table VALUES (1, '2020-01-15', 'str2', POINT(2, 2));
+ERROR:  INSERT has more expressions than target columns
+INSERT INTO dist_part_table VALUES (1, '2020-02-15', 'str3', POINT(3, 3));
+ERROR:  INSERT has more expressions than target columns
+-- create another index after creating a columnar partition
+CREATE UNIQUE INDEX dist_part_table_unique ON dist_part_table (dist_col, part_col);
+-- verify that indexes are created on columnar partitions
+SELECT COUNT(*)=2 FROM pg_indexes WHERE tablename = 'p0';
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT COUNT(*)=2 FROM pg_indexes WHERE tablename = 'p1';
+ ?column?
+---------------------------------------------------------------------
+ t
+(1 row)
+
+-- unsupported index types --
+-- gin --
+CREATE TABLE testjsonb (j JSONB) USING columnar;
+INSERT INTO testjsonb SELECT CAST('{"f1" : ' ||'"'|| i*4 ||'", ' || '"f2" : '||'"'|| i*10 ||'"}' AS JSON) FROM generate_series(1,10) i;
+CREATE INDEX jidx ON testjsonb USING GIN (j);
+ERROR:  only btree and hash indexes are supported on columnar tables
+INSERT INTO testjsonb SELECT CAST('{"f1" : ' ||'"'|| i*4 ||'", ' || '"f2" : '||'"'|| i*10 ||'"}' AS JSON) FROM generate_series(15,20) i;
+-- gist --
+CREATE TABLE gist_point_tbl(id INT4, p POINT) USING columnar;
+INSERT INTO gist_point_tbl (id, p) SELECT g, point(g*10, g*10) FROM generate_series(1, 10) g;
+CREATE INDEX gist_pointidx ON gist_point_tbl USING gist(p);
+ERROR:  only btree and hash indexes are supported on columnar tables
+INSERT INTO gist_point_tbl (id, p) SELECT g, point(g*10, g*10) FROM generate_series(10, 20) g;
+-- sp gist --
+CREATE TABLE box_temp (f1 box) USING columnar;
+INSERT INTO box_temp SELECT box(point(i, i), point(i * 2, i * 2)) FROM generate_series(1, 10) AS i;
+CREATE INDEX box_spgist ON box_temp USING spgist (f1);
+ERROR:  only btree and hash indexes are supported on columnar tables
+INSERT INTO box_temp SELECT box(point(i, i), point(i * 2, i * 2)) FROM generate_series(1, 10) AS i;
+-- brin --
+CREATE TABLE brin_summarize (value int) USING columnar;
+CREATE INDEX brin_summarize_idx ON brin_summarize USING brin (value) WITH (pages_per_range=2);
+ERROR:  only btree and hash indexes are supported on columnar tables
 SET client_min_messages TO WARNING;
 DROP SCHEMA columnar_indexes CASCADE;
diff --git a/src/test/regress/sql/columnar_alter.sql b/src/test/regress/sql/columnar_alter.sql
index ed916967e..ecd647fe1 100644
--- a/src/test/regress/sql/columnar_alter.sql
+++ b/src/test/regress/sql/columnar_alter.sql
@@ -215,21 +215,25 @@ ALTER TABLE products DROP CONSTRAINT dummy_constraint;
 INSERT INTO products VALUES (3, 'pen', 2);
 SELECT * FROM products ORDER BY 1;
 
--- Add a UNIQUE constraint (should fail)
-CREATE TABLE products_fail (
+-- Add a UNIQUE constraint
+CREATE TABLE products_unique (
     product_no integer UNIQUE,
     name text,
     price numeric
 ) USING columnar;
 ALTER TABLE products ADD COLUMN store_id text UNIQUE;
 
--- Add a PRIMARY KEY constraint (should fail)
-CREATE TABLE products_fail (
+-- Add a PRIMARY KEY constraint
+CREATE TABLE products_primary (
     product_no integer PRIMARY KEY,
     name text,
     price numeric
 ) USING columnar;
-ALTER TABLE products ADD COLUMN store_id text PRIMARY KEY;
+
+BEGIN;
+  ALTER TABLE products DROP COLUMN store_id;
+  ALTER TABLE products ADD COLUMN store_id text PRIMARY KEY;
+ROLLBACK;
 
 -- Add an EXCLUSION constraint (should fail)
 CREATE TABLE circles (
diff --git a/src/test/regress/sql/columnar_create.sql b/src/test/regress/sql/columnar_create.sql
index 0c18f2212..f83b1c2a8 100644
--- a/src/test/regress/sql/columnar_create.sql
+++ b/src/test/regress/sql/columnar_create.sql
@@ -9,7 +9,6 @@ CREATE TABLE contestant (handle TEXT, birthdate DATE, rating INT,
 	USING columnar;
 SELECT alter_columnar_table_set('contestant', compression => 'none');
 
--- should fail
 CREATE INDEX contestant_idx on contestant(handle);
 
 -- Create zstd compressed table
diff --git a/src/test/regress/sql/columnar_indexes.sql b/src/test/regress/sql/columnar_indexes.sql
index 831699dc4..60991b56a 100644
--- a/src/test/regress/sql/columnar_indexes.sql
+++ b/src/test/regress/sql/columnar_indexes.sql
@@ -17,13 +17,269 @@ explain insert into t values (1, 2);
 insert into t values (1, 2);
 SELECT * FROM t;
 
--- create index without the concurrent option. We should
--- error out during index creation.
 create index t_idx on t(a, b);
 \d t
 explain insert into t values (1, 2);
 insert into t values (3, 4);
 SELECT * FROM t;
 
+-- make sure that we test index scan
+set columnar.enable_custom_scan to 'off';
+set enable_seqscan to off;
+
+CREATE table columnar_table (a INT, b int) USING columnar;
+INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(0, 16000) i;
+
+-- unique --
+BEGIN;
+  INSERT INTO columnar_table VALUES (100000000);
+  SAVEPOINT s1;
+  -- errors out due to unflushed data in upper transaction
+  CREATE UNIQUE INDEX ON columnar_table (a);
+ROLLBACK;
+
+CREATE UNIQUE INDEX ON columnar_table (a);
+
+BEGIN;
+  INSERT INTO columnar_table VALUES (16050);
+  SAVEPOINT s1;
+  -- index scan errors out due to unflushed data in upper transaction
+  SELECT a FROM columnar_table WHERE a = 16050;
+ROLLBACK;
+
+EXPLAIN (COSTS OFF) SELECT * FROM columnar_table WHERE a=6456;
+EXPLAIN (COSTS OFF) SELECT a FROM columnar_table WHERE a=6456;
+SELECT (SELECT a FROM columnar_table WHERE a=6456 limit 1)=6456;
+SELECT (SELECT b FROM columnar_table WHERE a=6456 limit 1)=6456*2;
+
+-- even if a=16050 doesn't exist, we try to insert it twice so this should error out
+INSERT INTO columnar_table VALUES (16050), (16050);
+
+-- should work
+INSERT INTO columnar_table VALUES (16050);
+
+-- check edge cases around stripe boundaries, error out
+INSERT INTO columnar_table VALUES (16050);
+INSERT INTO columnar_table VALUES (15999);
+
+DROP INDEX columnar_table_a_idx;
+
+CREATE TABLE partial_unique_idx_test (a INT, b INT) USING columnar;
+CREATE UNIQUE INDEX ON partial_unique_idx_test (a)
+WHERE b > 500;
+
+-- should work since b =< 500 and our partial index doesn't check this interval
+INSERT INTO partial_unique_idx_test VALUES (1, 2), (1, 2);
+
+-- should work since our partial index wouldn't cover the tuples that we inserted above
+INSERT INTO partial_unique_idx_test VALUES (1, 800);
+
+INSERT INTO partial_unique_idx_test VALUES (4, 600);
+
+-- should error out due to (4, 600)
+INSERT INTO partial_unique_idx_test VALUES (4, 700);
+
+-- btree --
+CREATE INDEX ON columnar_table (a);
+SELECT (SELECT SUM(b) FROM columnar_table WHERE a>700 and a<965)=439560;
+
+CREATE INDEX ON columnar_table (b)
+WHERE (b > 30000 AND b < 33000);
+
+-- partial index should be way smaller than the non-partial index
+SELECT pg_total_relation_size('columnar_table_b_idx') * 5 <
+       pg_total_relation_size('columnar_table_a_idx');
+
+-- can't use index scan due to partial index boundaries
+EXPLAIN (COSTS OFF) SELECT b FROM columnar_table WHERE b = 30000;
+-- can use index scan
+EXPLAIN (COSTS OFF) SELECT b FROM columnar_table WHERE b = 30001;
+
+-- some more rows
+INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(16000, 17000) i;
+
+DROP INDEX columnar_table_a_idx;
+TRUNCATE columnar_table;
+
+-- pkey --
+INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(16000, 16499) i;
+ALTER TABLE columnar_table ADD PRIMARY KEY (a);
+INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(16500, 17000) i;
+
+BEGIN;
+  INSERT INTO columnar_table (a) SELECT 1;
+ROLLBACK;
+
+-- should work
+INSERT INTO columnar_table (a) SELECT 1;
+
+-- error out
+INSERT INTO columnar_table VALUES (16100), (16101);
+INSERT INTO columnar_table VALUES (16999);
+
+BEGIN;
+  REINDEX INDEX columnar_table_pkey;
+  -- should error even after reindex
+  INSERT INTO columnar_table VALUES (16999);
+ROLLBACK;
+
+VACUUM FULL columnar_table;
+-- should error even after vacuum
+INSERT INTO columnar_table VALUES (16999);
+
+TRUNCATE columnar_table;
+INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(1, 160000) i;
+SELECT (SELECT b FROM columnar_table WHERE a = 150000)=300000;
+
+TRUNCATE columnar_table;
+ALTER TABLE columnar_table DROP CONSTRAINT columnar_table_pkey;
+
+-- hash --
+INSERT INTO columnar_table (a, b) SELECT i*2,i FROM generate_series(1, 8000) i;
+CREATE INDEX hash_idx ON columnar_table USING HASH (b);
+
+BEGIN;
+  CREATE INDEX hash_idx_fill_factor ON columnar_table USING HASH (b) WITH (fillfactor=10);
+  -- same hash index with lower fillfactor should be way bigger
+  SELECT pg_total_relation_size ('hash_idx_fill_factor') >
+         pg_total_relation_size ('hash_idx') * 5;
+ROLLBACK;
+
+BEGIN;
+  INSERT INTO columnar_table (a, b) SELECT i*3,i FROM generate_series(1, 8000) i;
+ROLLBACK;
+
+INSERT INTO columnar_table (a, b) SELECT i*4,i FROM generate_series(1, 8000) i;
+
+SELECT SUM(a)=42000 FROM columnar_table WHERE b = 7000;
+
+BEGIN;
+  REINDEX TABLE columnar_table;
+  SELECT SUM(a)=42000 FROM columnar_table WHERE b = 7000;
+ROLLBACK;
+
+VACUUM FULL columnar_table;
+SELECT SUM(a)=42000 FROM columnar_table WHERE b = 7000;
+
+-- exclusion contraints --
+CREATE TABLE exclusion_test (c1 INT,c2 INT, c3 INT, c4 BOX,
+EXCLUDE USING btree (c1 WITH =) INCLUDE(c3,c4) WHERE (c1 < 10)) USING columnar;
+
+-- error out since "c1" is "1" for all rows to be inserted
+INSERT INTO exclusion_test SELECT 1, 2, 3*x, BOX('4,4,4,4') FROM generate_series(1,3) AS x;
+
+BEGIN;
+  INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(1,3) AS x;
+ROLLBACK;
+
+-- should work
+INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(1,3) AS x;
+
+INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(10,15) AS x;
+
+BEGIN;
+  -- should work thanks to "where" clause in exclusion constraint
+  INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(10,15) AS x;
+ROLLBACK;
+
+REINDEX TABLE exclusion_test;
+-- should still work after reindex
+INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(10,15) AS x;
+
+-- make sure that we respect INCLUDE syntax --
+
+CREATE TABLE include_test (a INT, b BIGINT, c BIGINT, d BIGINT) USING columnar;
+
+INSERT INTO include_test SELECT i, i, i, i FROM generate_series (1, 1000) i;
+
+CREATE UNIQUE INDEX unique_a ON include_test (a);
+
+-- cannot use index only scan
+EXPLAIN (COSTS OFF) SELECT b FROM include_test WHERE a = 500;
+
+CREATE UNIQUE INDEX unique_a_include_b_c_d ON include_test (a) INCLUDE(b, c, d);
+
+-- same unique index that includes other columns should be way bigger
+SELECT pg_total_relation_size ('unique_a') * 1.5 <
+       pg_total_relation_size ('unique_a_include_b_c_d');
+
+DROP INDEX unique_a;
+
+-- should use index only scan since unique_a_include_b_c_d includes column "b" too
+EXPLAIN (COSTS OFF) SELECT b FROM include_test WHERE a = 500;
+
+BEGIN;
+  SET enable_indexonlyscan = OFF;
+  -- show that we respect enable_indexonlyscan GUC
+  EXPLAIN (COSTS OFF) SELECT b FROM include_test WHERE a = 500;
+ROLLBACK;
+
+-- make sure that we read the correct value for "b" when doing index only scan
+SELECT b=980 FROM include_test WHERE a = 980;
+
+-- some tests with distributed & partitioned tables --
+
+CREATE TABLE dist_part_table(
+  dist_col INT,
+  part_col TIMESTAMPTZ,
+  col1 TEXT
+) PARTITION BY RANGE (part_col);
+
+-- create an index before creating a columnar partition
+CREATE INDEX dist_part_table_btree ON dist_part_table (col1);
+
+-- columnar partition
+CREATE TABLE p0 PARTITION OF dist_part_table
+FOR VALUES FROM ('2020-01-01') TO ('2020-02-01')
+USING columnar;
+
+SELECT create_distributed_table('dist_part_table', 'dist_col');
+
+-- columnar partition
+CREATE TABLE p1 PARTITION OF dist_part_table
+FOR VALUES FROM ('2020-02-01') TO ('2020-03-01')
+USING columnar;
+
+-- row partition
+CREATE TABLE p2 PARTITION OF dist_part_table
+FOR VALUES FROM ('2020-03-01') TO ('2020-04-01');
+
+INSERT INTO dist_part_table VALUES (1, '2020-03-15', 'str1', POINT(1, 1));
+
+-- insert into columnar partitions
+INSERT INTO dist_part_table VALUES (1, '2020-01-15', 'str2', POINT(2, 2));
+INSERT INTO dist_part_table VALUES (1, '2020-02-15', 'str3', POINT(3, 3));
+
+-- create another index after creating a columnar partition
+CREATE UNIQUE INDEX dist_part_table_unique ON dist_part_table (dist_col, part_col);
+
+-- verify that indexes are created on columnar partitions
+SELECT COUNT(*)=2 FROM pg_indexes WHERE tablename = 'p0';
+SELECT COUNT(*)=2 FROM pg_indexes WHERE tablename = 'p1';
+
+-- unsupported index types --
+
+-- gin --
+CREATE TABLE testjsonb (j JSONB) USING columnar;
+INSERT INTO testjsonb SELECT CAST('{"f1" : ' ||'"'|| i*4 ||'", ' || '"f2" : '||'"'|| i*10 ||'"}' AS JSON) FROM generate_series(1,10) i;
+CREATE INDEX jidx ON testjsonb USING GIN (j);
+INSERT INTO testjsonb SELECT CAST('{"f1" : ' ||'"'|| i*4 ||'", ' || '"f2" : '||'"'|| i*10 ||'"}' AS JSON) FROM generate_series(15,20) i;
+
+-- gist --
+CREATE TABLE gist_point_tbl(id INT4, p POINT) USING columnar;
+INSERT INTO gist_point_tbl (id, p) SELECT g, point(g*10, g*10) FROM generate_series(1, 10) g;
+CREATE INDEX gist_pointidx ON gist_point_tbl USING gist(p);
+INSERT INTO gist_point_tbl (id, p) SELECT g, point(g*10, g*10) FROM generate_series(10, 20) g;
+
+-- sp gist --
+CREATE TABLE box_temp (f1 box) USING columnar;
+INSERT INTO box_temp SELECT box(point(i, i), point(i * 2, i * 2)) FROM generate_series(1, 10) AS i;
+CREATE INDEX box_spgist ON box_temp USING spgist (f1);
+INSERT INTO box_temp SELECT box(point(i, i), point(i * 2, i * 2)) FROM generate_series(1, 10) AS i;
+
+-- brin --
+CREATE TABLE brin_summarize (value int) USING columnar;
+CREATE INDEX brin_summarize_idx ON brin_summarize USING brin (value) WITH (pages_per_range=2);
+
 SET client_min_messages TO WARNING;
 DROP SCHEMA columnar_indexes CASCADE;

From 1af50e98b3f2da5e9fe742ed433a7ea028877d69 Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Wed, 26 May 2021 16:45:05 +0300
Subject: [PATCH 3/7] Fix a comment in ColumnarMetapageRead

---
 src/backend/columnar/columnar_storage.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/backend/columnar/columnar_storage.c b/src/backend/columnar/columnar_storage.c
index a73275249..aba79303f 100644
--- a/src/backend/columnar/columnar_storage.c
+++ b/src/backend/columnar/columnar_storage.c
@@ -615,7 +615,7 @@ ColumnarMetapageRead(Relation rel, bool force)
 	if (nblocks == 0)
 	{
 		/*
-		 * We only expect this to happen during binary ugrades. This is because,
+		 * We only expect this to happen when upgrading citus.so. This is because,
 		 * in current version of columnar, we immediately create the metapage
 		 * for columnar tables, i.e right after creating the table.
 		 * However in older versions, we were creating metapages lazily, i.e

From 82ea1b5dafba66c1091bb16f86aa527009d8b178 Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Tue, 1 Jun 2021 10:43:51 +0300
Subject: [PATCH 4/7] Not remove all paths, keep IndexPath's

---
 src/backend/columnar/columnar_customscan.c | 59 +++++++++++++++++-----
 1 file changed, 45 insertions(+), 14 deletions(-)

diff --git a/src/backend/columnar/columnar_customscan.c b/src/backend/columnar/columnar_customscan.c
index 4866c81ad..88760c654 100644
--- a/src/backend/columnar/columnar_customscan.c
+++ b/src/backend/columnar/columnar_customscan.c
@@ -27,6 +27,7 @@
 #include "columnar/columnar_customscan.h"
 #include "columnar/columnar_metadata.h"
 #include "columnar/columnar_tableam.h"
+#include "distributed/listutils.h"
 
 typedef struct ColumnarScanPath
 {
@@ -50,8 +51,13 @@ typedef struct ColumnarScanState
 } ColumnarScanState;
 
 
+typedef bool (*PathPredicate)(Path *path);
+
+
 static void ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti,
 									   RangeTblEntry *rte);
+static void RemovePathsByPredicate(RelOptInfo *rel, PathPredicate removePathPredicate);
+static bool IsNotIndexPath(Path *path);
 static Path * CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel,
 									 RangeTblEntry *rte);
 static Cost ColumnarScanCost(RangeTblEntry *rte);
@@ -137,18 +143,6 @@ columnar_customscan_init()
 }
 
 
-static void
-clear_paths(RelOptInfo *rel)
-{
-	rel->pathlist = NIL;
-	rel->partial_pathlist = NIL;
-	rel->cheapest_startup_path = NULL;
-	rel->cheapest_total_path = NULL;
-	rel->cheapest_unique_path = NULL;
-	rel->cheapest_parameterized_paths = NIL;
-}
-
-
 static void
 ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti,
 						   RangeTblEntry *rte)
@@ -188,8 +182,13 @@ ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti,
 
 			ereport(DEBUG1, (errmsg("pathlist hook for columnar table am")));
 
-			/* we propose a new path that will be the only path for scanning this relation */
-			clear_paths(rel);
+			/*
+			 * TODO: Since we don't have a proper costing model for
+			 * ColumnarCustomScan, we remove other paths to force postgres
+			 * using ColumnarCustomScan. Note that we still keep index paths
+			 * since they still might be useful.
+			 */
+			RemovePathsByPredicate(rel, IsNotIndexPath);
 			add_path(rel, customPath);
 		}
 	}
@@ -197,6 +196,38 @@ ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti,
 }
 
 
+/*
+ * RemovePathsByPredicate removes the paths that removePathPredicate
+ * evaluates to true from pathlist of given rel.
+ */
+static void
+RemovePathsByPredicate(RelOptInfo *rel, PathPredicate removePathPredicate)
+{
+	List *filteredPathList = NIL;
+
+	Path *path = NULL;
+	foreach_ptr(path, rel->pathlist)
+	{
+		if (!removePathPredicate(path))
+		{
+			filteredPathList = lappend(filteredPathList, path);
+		}
+	}
+
+	rel->pathlist = filteredPathList;
+}
+
+
+/*
+ * IsNotIndexPath returns true if given path is not an IndexPath.
+ */
+static bool
+IsNotIndexPath(Path *path)
+{
+	return !IsA(path, IndexPath);
+}
+
+
 static Path *
 CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
 {

From 9b4dc2f804d67a1f5b41fab9d0dfeaa4fe365739 Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Tue, 1 Jun 2021 02:22:26 +0300
Subject: [PATCH 5/7] Prevent using parallel scan for columnar index builds

---
 src/backend/columnar/columnar_tableam.c       | 46 ++++++++++++++++---
 .../regress/expected/columnar_indexes.out     |  9 ++++
 src/test/regress/sql/columnar_indexes.sql     |  7 +++
 3 files changed, 56 insertions(+), 6 deletions(-)

diff --git a/src/backend/columnar/columnar_tableam.c b/src/backend/columnar/columnar_tableam.c
index 944032b84..3ed5d8187 100644
--- a/src/backend/columnar/columnar_tableam.c
+++ b/src/backend/columnar/columnar_tableam.c
@@ -343,21 +343,33 @@ ErrorIfInvalidRowNumber(uint64 rowNumber)
 static Size
 columnar_parallelscan_estimate(Relation rel)
 {
-	elog(ERROR, "columnar_parallelscan_estimate not implemented");
+	return sizeof(ParallelBlockTableScanDescData);
 }
 
 
 static Size
 columnar_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan)
 {
-	elog(ERROR, "columnar_parallelscan_initialize not implemented");
+	ParallelBlockTableScanDesc bpscan = (ParallelBlockTableScanDesc) pscan;
+
+	bpscan->base.phs_relid = RelationGetRelid(rel);
+	bpscan->phs_nblocks = RelationGetNumberOfBlocks(rel);
+	bpscan->base.phs_syncscan = synchronize_seqscans &&
+								!RelationUsesLocalBuffers(rel) &&
+								bpscan->phs_nblocks > NBuffers / 4;
+	SpinLockInit(&bpscan->phs_mutex);
+	bpscan->phs_startblock = InvalidBlockNumber;
+	pg_atomic_init_u64(&bpscan->phs_nallocated, 0);
+
+	return sizeof(ParallelBlockTableScanDescData);
 }
 
 
 static void
 columnar_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
 {
-	elog(ERROR, "columnar_parallelscan_reinitialize not implemented");
+	ParallelBlockTableScanDesc bpscan = (ParallelBlockTableScanDesc) pscan;
+	pg_atomic_write_u64(&bpscan->phs_nallocated, 0);
 }
 
 
@@ -1101,10 +1113,21 @@ columnar_index_build_range_scan(Relation columnarRelation,
 	if (scan)
 	{
 		/*
-		 * Since we don't support parallel reads on columnar tables, we
-		 * should have already errored out for that, but be on the safe side.
+		 * Scan is initialized iff postgres decided to build the index using
+		 * parallel workers. In this case, we simply return for parallel
+		 * workers since we don't support parallel scan on columnar tables.
 		 */
-		ereport(ERROR, (errmsg("parallel reads on columnar are not supported")));
+		if (IsBackgroundWorker)
+		{
+			ereport(DEBUG4, (errmsg("ignoring parallel worker when building "
+									"index since parallel scan on columnar "
+									"tables is not supported")));
+			return 0;
+		}
+
+		ereport(NOTICE, (errmsg("falling back to serial index build since "
+								"parallel scan on columnar tables is not "
+								"supported")));
 	}
 
 	/*
@@ -1643,6 +1666,17 @@ static const TableAmRoutine columnar_am_methods = {
 	.scan_rescan = columnar_rescan,
 	.scan_getnextslot = columnar_getnextslot,
 
+	/*
+	 * Postgres calls following three callbacks during index builds, if it
+	 * decides to use parallel workers when building the index. On the other
+	 * hand, we don't support parallel scans on columnar tables but we also
+	 * want to fallback to serial index build. For this reason, we both skip
+	 * parallel workers in columnar_index_build_range_scan and also provide
+	 * basic implementations for those callbacks based on their corresponding
+	 * implementations in heapAM.
+	 * Note that for regular query plans, we already ignore parallel paths via
+	 * ColumnarSetRelPathlistHook.
+	 */
 	.parallelscan_estimate = columnar_parallelscan_estimate,
 	.parallelscan_initialize = columnar_parallelscan_initialize,
 	.parallelscan_reinitialize = columnar_parallelscan_reinitialize,
diff --git a/src/test/regress/expected/columnar_indexes.out b/src/test/regress/expected/columnar_indexes.out
index 6062cae34..32af88c92 100644
--- a/src/test/regress/expected/columnar_indexes.out
+++ b/src/test/regress/expected/columnar_indexes.out
@@ -384,5 +384,14 @@ INSERT INTO box_temp SELECT box(point(i, i), point(i * 2, i * 2)) FROM generate_
 CREATE TABLE brin_summarize (value int) USING columnar;
 CREATE INDEX brin_summarize_idx ON brin_summarize USING brin (value) WITH (pages_per_range=2);
 ERROR:  only btree and hash indexes are supported on columnar tables
+-- Show that we safely fallback to serial index build.
+CREATE TABLE parallel_scan_test(a int) USING columnar WITH ( parallel_workers = 2 );
+INSERT INTO parallel_scan_test SELECT i FROM generate_series(1,10) i;
+CREATE INDEX ON parallel_scan_test (a);
+NOTICE:  falling back to serial index build since parallel scan on columnar tables is not supported
+VACUUM FULL parallel_scan_test;
+NOTICE:  falling back to serial index build since parallel scan on columnar tables is not supported
+REINDEX TABLE parallel_scan_test;
+NOTICE:  falling back to serial index build since parallel scan on columnar tables is not supported
 SET client_min_messages TO WARNING;
 DROP SCHEMA columnar_indexes CASCADE;
diff --git a/src/test/regress/sql/columnar_indexes.sql b/src/test/regress/sql/columnar_indexes.sql
index 60991b56a..2b48e2a37 100644
--- a/src/test/regress/sql/columnar_indexes.sql
+++ b/src/test/regress/sql/columnar_indexes.sql
@@ -281,5 +281,12 @@ INSERT INTO box_temp SELECT box(point(i, i), point(i * 2, i * 2)) FROM generate_
 CREATE TABLE brin_summarize (value int) USING columnar;
 CREATE INDEX brin_summarize_idx ON brin_summarize USING brin (value) WITH (pages_per_range=2);
 
+-- Show that we safely fallback to serial index build.
+CREATE TABLE parallel_scan_test(a int) USING columnar WITH ( parallel_workers = 2 );
+INSERT INTO parallel_scan_test SELECT i FROM generate_series(1,10) i;
+CREATE INDEX ON parallel_scan_test (a);
+VACUUM FULL parallel_scan_test;
+REINDEX TABLE parallel_scan_test;
+
 SET client_min_messages TO WARNING;
 DROP SCHEMA columnar_indexes CASCADE;

From 5adab2a3acd5e815ff6fdeda4c2d83560524f89c Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Thu, 3 Jun 2021 17:56:47 +0300
Subject: [PATCH 6/7] Report progress when building index on columnar tables

---
 src/backend/columnar/columnar_metadata.c |  43 +++++++-
 src/backend/columnar/columnar_tableam.c  | 126 +++++++++++++++++++++--
 src/include/columnar/columnar.h          |   2 +
 3 files changed, 161 insertions(+), 10 deletions(-)

diff --git a/src/backend/columnar/columnar_metadata.c b/src/backend/columnar/columnar_metadata.c
index af62227a6..0ebb533c7 100644
--- a/src/backend/columnar/columnar_metadata.c
+++ b/src/backend/columnar/columnar_metadata.c
@@ -668,6 +668,46 @@ FindStripeByRowNumber(Relation relation, uint64 rowNumber, Snapshot snapshot)
 }
 
 
+/*
+ * FindStripeWithHighestRowNumber returns StripeMetadata for the stripe that
+ * has the row with highest rowNumber by doing backward index scan on
+ * stripe_first_row_number_idx. If given relation is empty, then returns NULL.
+ */
+StripeMetadata *
+FindStripeWithHighestRowNumber(Relation relation, Snapshot snapshot)
+{
+	StripeMetadata *stripeWithHighestRowNumber = NULL;
+
+	uint64 storageId = ColumnarStorageGetStorageId(relation, false);
+	ScanKeyData scanKey[1];
+	ScanKeyInit(&scanKey[0], Anum_columnar_stripe_storageid,
+				BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(storageId));
+
+	Relation columnarStripes = table_open(ColumnarStripeRelationId(), AccessShareLock);
+	Relation index = index_open(ColumnarStripeFirstRowNumberIndexRelationId(),
+								AccessShareLock);
+	SysScanDesc scanDescriptor = systable_beginscan_ordered(columnarStripes, index,
+															snapshot, 1, scanKey);
+
+	HeapTuple heapTuple = systable_getnext_ordered(scanDescriptor, BackwardScanDirection);
+	if (HeapTupleIsValid(heapTuple))
+	{
+		TupleDesc tupleDescriptor = RelationGetDescr(columnarStripes);
+		Datum datumArray[Natts_columnar_stripe];
+		bool isNullArray[Natts_columnar_stripe];
+		heap_deform_tuple(heapTuple, tupleDescriptor, datumArray, isNullArray);
+
+		stripeWithHighestRowNumber = BuildStripeMetadata(datumArray);
+	}
+
+	systable_endscan_ordered(scanDescriptor);
+	index_close(index, AccessShareLock);
+	table_close(columnarStripes, AccessShareLock);
+
+	return stripeWithHighestRowNumber;
+}
+
+
 /*
  * ReadChunkGroupRowCounts returns an array of row counts of chunk groups for the
  * given stripe.
@@ -876,7 +916,8 @@ ReadDataFileStripeList(uint64 storageId, Snapshot snapshot)
 	Oid columnarStripesOid = ColumnarStripeRelationId();
 
 	Relation columnarStripes = table_open(columnarStripesOid, AccessShareLock);
-	Relation index = index_open(ColumnarStripePKeyIndexRelationId(), AccessShareLock);
+	Relation index = index_open(ColumnarStripeFirstRowNumberIndexRelationId(),
+								AccessShareLock);
 	TupleDesc tupleDescriptor = RelationGetDescr(columnarStripes);
 
 	SysScanDesc scanDescriptor = systable_beginscan_ordered(columnarStripes, index,
diff --git a/src/backend/columnar/columnar_tableam.c b/src/backend/columnar/columnar_tableam.c
index 3ed5d8187..89cb10ece 100644
--- a/src/backend/columnar/columnar_tableam.c
+++ b/src/backend/columnar/columnar_tableam.c
@@ -114,9 +114,15 @@ static Datum * detoast_values(TupleDesc tupleDesc, Datum *orig_values, bool *isn
 static ItemPointerData row_number_to_tid(uint64 rowNumber);
 static uint64 tid_to_row_number(ItemPointerData tid);
 static void ErrorIfInvalidRowNumber(uint64 rowNumber);
+static void ColumnarReportTotalVirtualBlocks(Relation relation, Snapshot snapshot,
+											 int progressArrIndex);
+static BlockNumber ColumnarGetNumberOfVirtualBlocks(Relation relation, Snapshot snapshot);
+static ItemPointerData ColumnarGetHighestItemPointer(Relation relation,
+													 Snapshot snapshot);
 static double ColumnarReadRowsIntoIndex(TableScanDesc scan,
 										Relation indexRelation,
 										IndexInfo *indexInfo,
+										bool progress,
 										IndexBuildCallback indexCallback,
 										void *indexCallbackState,
 										EState *estate, ExprState *predicate);
@@ -1089,11 +1095,6 @@ columnar_index_build_range_scan(Relation columnarRelation,
 								void *callback_state,
 								TableScanDesc scan)
 {
-	/*
-	 * TODO: Should this function call pgstat_progress_update_param in
-	 * somewhere as heapam_index_build_range_scan ?
-	 */
-
 	if (start_blockno != 0 || numblocks != InvalidBlockNumber)
 	{
 		/*
@@ -1183,6 +1184,12 @@ columnar_index_build_range_scan(Relation columnarRelation,
 		snapshot = scan->rs_snapshot;
 	}
 
+	if (progress)
+	{
+		ColumnarReportTotalVirtualBlocks(columnarRelation, snapshot,
+										 PROGRESS_SCAN_BLOCKS_TOTAL);
+	}
+
 	/*
 	 * Set up execution state for predicate, if any.
 	 * Note that this is only useful for partial indexes.
@@ -1193,10 +1200,17 @@ columnar_index_build_range_scan(Relation columnarRelation,
 	ExprState *predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
 
 	double reltuples = ColumnarReadRowsIntoIndex(scan, indexRelation, indexInfo,
-												 callback, callback_state, estate,
-												 predicate);
+												 progress, callback, callback_state,
+												 estate, predicate);
 	table_endscan(scan);
 
+	if (progress)
+	{
+		/* report the last "virtual" block as "done" */
+		ColumnarReportTotalVirtualBlocks(columnarRelation, snapshot,
+										 PROGRESS_SCAN_BLOCKS_DONE);
+	}
+
 	if (snapshotRegisteredByUs)
 	{
 		UnregisterSnapshot(snapshot);
@@ -1211,6 +1225,81 @@ columnar_index_build_range_scan(Relation columnarRelation,
 }
 
 
+/*
+ * ColumnarReportTotalVirtualBlocks reports progress for index build based on
+ * number of "virtual" blocks that given relation has.
+ * "progressArrIndex" argument determines which entry in st_progress_param
+ * array should be updated. In this case, we only expect PROGRESS_SCAN_BLOCKS_TOTAL
+ * or PROGRESS_SCAN_BLOCKS_DONE to specify whether we want to report calculated
+ * number of blocks as "done" or as "total" number of "virtual" blocks to scan.
+ */
+static void
+ColumnarReportTotalVirtualBlocks(Relation relation, Snapshot snapshot,
+								 int progressArrIndex)
+{
+	/*
+	 * Indeed, columnar tables might have gaps between row numbers, e.g
+	 * due to aborted transactions etc. Also, ItemPointer BlockNumber's
+	 * for columnar tables don't actually correspond to actual disk blocks
+	 * as in heapAM. For this reason, we call them as "virtual" blocks. At
+	 * the moment, we believe it is better to report our progress based on
+	 * this "virtual" block concept instead of doing nothing.
+	 */
+	Assert(progressArrIndex == PROGRESS_SCAN_BLOCKS_TOTAL ||
+		   progressArrIndex == PROGRESS_SCAN_BLOCKS_DONE);
+	BlockNumber nvirtualBlocks =
+		ColumnarGetNumberOfVirtualBlocks(relation, snapshot);
+	pgstat_progress_update_param(progressArrIndex, nvirtualBlocks);
+}
+
+
+/*
+ * ColumnarGetNumberOfVirtualBlocks returns total number of "virtual" blocks
+ * that given columnar table has based on based on ItemPointer BlockNumber's.
+ */
+static BlockNumber
+ColumnarGetNumberOfVirtualBlocks(Relation relation, Snapshot snapshot)
+{
+	ItemPointerData highestItemPointer =
+		ColumnarGetHighestItemPointer(relation, snapshot);
+	if (!ItemPointerIsValid(&highestItemPointer))
+	{
+		/* table is empty according to our snapshot */
+		return 0;
+	}
+
+	/*
+	 * Since BlockNumber is 0-based, increment it by 1 to find the total
+	 * number of "virtual" blocks.
+	 */
+	return ItemPointerGetBlockNumber(&highestItemPointer) + 1;
+}
+
+
+/*
+ * ColumnarGetHighestItemPointer returns ItemPointerData for the tuple with
+ * highest tid for given relation.
+ * If given relation is empty, then returns invalid item pointer.
+ */
+static ItemPointerData
+ColumnarGetHighestItemPointer(Relation relation, Snapshot snapshot)
+{
+	StripeMetadata *stripeWithHighestRowNumber =
+		FindStripeWithHighestRowNumber(relation, snapshot);
+	if (stripeWithHighestRowNumber == NULL)
+	{
+		/* table is empty according to our snapshot */
+		ItemPointerData invalidItemPtr;
+		ItemPointerSetInvalid(&invalidItemPtr);
+		return invalidItemPtr;
+	}
+
+	uint64 highestRowNumber = stripeWithHighestRowNumber->firstRowNumber +
+							  stripeWithHighestRowNumber->rowCount - 1;
+	return row_number_to_tid(highestRowNumber);
+}
+
+
 /*
  * ColumnarReadRowsIntoIndex builds indexRelation tuples by reading the
  * actual relation based on given "scan" and returns number of tuples
@@ -1218,17 +1307,36 @@ columnar_index_build_range_scan(Relation columnarRelation,
  */
 static double
 ColumnarReadRowsIntoIndex(TableScanDesc scan, Relation indexRelation,
-						  IndexInfo *indexInfo, IndexBuildCallback indexCallback,
-						  void *indexCallbackState, EState *estate, ExprState *predicate)
+						  IndexInfo *indexInfo, bool progress,
+						  IndexBuildCallback indexCallback,
+						  void *indexCallbackState, EState *estate,
+						  ExprState *predicate)
 {
 	double reltuples = 0;
 
+	BlockNumber lastReportedBlockNumber = InvalidBlockNumber;
+
 	ExprContext *econtext = GetPerTupleExprContext(estate);
 	TupleTableSlot *slot = econtext->ecxt_scantuple;
 	while (columnar_getnextslot(scan, ForwardScanDirection, slot))
 	{
 		CHECK_FOR_INTERRUPTS();
 
+		BlockNumber currentBlockNumber = ItemPointerGetBlockNumber(&slot->tts_tid);
+		if (progress && lastReportedBlockNumber != currentBlockNumber)
+		{
+			/*
+			 * columnar_getnextslot guarantees that returned tuple will
+			 * always have a greater ItemPointer than the ones we fetched
+			 * before, so we directly use BlockNumber to report our progress.
+			 */
+			Assert(lastReportedBlockNumber == InvalidBlockNumber ||
+				   currentBlockNumber >= lastReportedBlockNumber);
+			pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_DONE,
+										 currentBlockNumber);
+			lastReportedBlockNumber = currentBlockNumber;
+		}
+
 		MemoryContextReset(econtext->ecxt_per_tuple_memory);
 
 		if (predicate != NULL && !ExecQual(predicate, econtext))
diff --git a/src/include/columnar/columnar.h b/src/include/columnar/columnar.h
index 8288a20aa..e4770acc2 100644
--- a/src/include/columnar/columnar.h
+++ b/src/include/columnar/columnar.h
@@ -256,6 +256,8 @@ extern StripeSkipList * ReadStripeSkipList(RelFileNode relfilenode, uint64 strip
 										   uint32 chunkCount);
 extern StripeMetadata * FindStripeByRowNumber(Relation relation, uint64 rowNumber,
 											  Snapshot snapshot);
+extern StripeMetadata * FindStripeWithHighestRowNumber(Relation relation,
+													   Snapshot snapshot);
 extern Datum columnar_relation_storageid(PG_FUNCTION_ARGS);
 
 

From b6b969971aafa93be638052c62f2da18b5831938 Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Tue, 8 Jun 2021 19:26:55 +0300
Subject: [PATCH 7/7] Error out for CLUSTER commands on columnar tables

---
 src/backend/columnar/columnar_tableam.c        | 3 ++-
 src/test/regress/expected/columnar_indexes.out | 6 ++++++
 src/test/regress/sql/columnar_indexes.sql      | 7 +++++++
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/backend/columnar/columnar_tableam.c b/src/backend/columnar/columnar_tableam.c
index 89cb10ece..6d0a9c7fd 100644
--- a/src/backend/columnar/columnar_tableam.c
+++ b/src/backend/columnar/columnar_tableam.c
@@ -707,7 +707,8 @@ columnar_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
 	if (OldIndex != NULL || use_sort)
 	{
 		ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-						errmsg("indexes not supported for columnar tables")));
+						errmsg("clustering columnar tables using indexes is "
+							   "not supported")));
 	}
 
 	/*
diff --git a/src/test/regress/expected/columnar_indexes.out b/src/test/regress/expected/columnar_indexes.out
index 32af88c92..9edc0900d 100644
--- a/src/test/regress/expected/columnar_indexes.out
+++ b/src/test/regress/expected/columnar_indexes.out
@@ -192,6 +192,12 @@ ERROR:  duplicate key value violates unique constraint "columnar_table_pkey"
 DETAIL:  Key (a)=(16999) already exists.
 ROLLBACK;
 VACUUM FULL columnar_table;
+-- show that we don't support clustering columnar tables using indexes
+CLUSTER columnar_table USING columnar_table_pkey;
+ERROR:  clustering columnar tables using indexes is not supported
+ALTER TABLE columnar_table CLUSTER ON columnar_table_pkey;
+CLUSTER columnar_table;
+ERROR:  clustering columnar tables using indexes is not supported
 -- should error even after vacuum
 INSERT INTO columnar_table VALUES (16999);
 ERROR:  duplicate key value violates unique constraint "columnar_table_pkey"
diff --git a/src/test/regress/sql/columnar_indexes.sql b/src/test/regress/sql/columnar_indexes.sql
index 2b48e2a37..43ad75221 100644
--- a/src/test/regress/sql/columnar_indexes.sql
+++ b/src/test/regress/sql/columnar_indexes.sql
@@ -124,6 +124,13 @@ BEGIN;
 ROLLBACK;
 
 VACUUM FULL columnar_table;
+
+-- show that we don't support clustering columnar tables using indexes
+CLUSTER columnar_table USING columnar_table_pkey;
+
+ALTER TABLE columnar_table CLUSTER ON columnar_table_pkey;
+CLUSTER columnar_table;
+
 -- should error even after vacuum
 INSERT INTO columnar_table VALUES (16999);