From dba84214536f8de820219856a19d2e3f8f06ead6 Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Fri, 25 Jun 2021 16:22:14 +0300
Subject: [PATCH 1/3] Refactor ColumnarScanCost into
 ColumnarPerChunkGroupScanCost

---
 src/backend/columnar/columnar_customscan.c | 82 +++++++++++++++-------
 1 file changed, 55 insertions(+), 27 deletions(-)

diff --git a/src/backend/columnar/columnar_customscan.c b/src/backend/columnar/columnar_customscan.c
index 88760c654..e2914e509 100644
--- a/src/backend/columnar/columnar_customscan.c
+++ b/src/backend/columnar/columnar_customscan.c
@@ -60,7 +60,10 @@ static void RemovePathsByPredicate(RelOptInfo *rel, PathPredicate removePathPred
 static bool IsNotIndexPath(Path *path);
 static Path * CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel,
 									 RangeTblEntry *rte);
-static Cost ColumnarScanCost(RangeTblEntry *rte);
+static Cost ColumnarScanCost(Oid relationId, int numberOfColumnsRead);
+static Cost ColumnarPerStripeScanCost(Oid relationId,
+									  int numberOfColumnsRead);
+static uint64 ColumnarTableStripeCount(Oid relationId);
 static Plan * ColumnarScanPath_PlanCustomPath(PlannerInfo *root,
 											  RelOptInfo *rel,
 											  struct CustomPath *best_path,
@@ -265,7 +268,9 @@ CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
 	 */
 	path->rows = rel->rows;
 	path->startup_cost = 0;
-	path->total_cost = path->startup_cost + ColumnarScanCost(rte);
+	int numberOfColumnsRead = bms_num_members(rte->selectedCols);
+	path->total_cost = path->startup_cost +
+					   ColumnarScanCost(rte->relid, numberOfColumnsRead);
 
 	return (Path *) cspath;
 }
@@ -277,42 +282,65 @@ CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
  * need to be read.
  */
 static Cost
-ColumnarScanCost(RangeTblEntry *rte)
+ColumnarScanCost(Oid relationId, int numberOfColumnsRead)
 {
-	Relation rel = RelationIdGetRelation(rte->relid);
-	List *stripeList = StripesForRelfilenode(rel->rd_node);
-	RelationClose(rel);
+	return ColumnarTableStripeCount(relationId) *
+		   ColumnarPerStripeScanCost(relationId, numberOfColumnsRead);
+}
+
+
+/*
+ * ColumnarPerStripeScanCost calculates the cost to scan a single stripe
+ * of given columnar table based on number of columns that needs to be
+ * read during scan operation.
+ */
+static Cost
+ColumnarPerStripeScanCost(Oid relationId, int numberOfColumnsRead)
+{
+	Relation relation = RelationIdGetRelation(relationId);
+	List *stripeList = StripesForRelfilenode(relation->rd_node);
+	RelationClose(relation);
 
 	uint32 maxColumnCount = 0;
 	uint64 totalStripeSize = 0;
-	ListCell *stripeMetadataCell = NULL;
-	rel = NULL;
-
-	foreach(stripeMetadataCell, stripeList)
+	StripeMetadata *stripeMetadata = NULL;
+	foreach_ptr(stripeMetadata, stripeList)
 	{
-		StripeMetadata *stripeMetadata = (StripeMetadata *) lfirst(stripeMetadataCell);
 		totalStripeSize += stripeMetadata->dataLength;
 		maxColumnCount = Max(maxColumnCount, stripeMetadata->columnCount);
 	}
 
+	/*
+	 * When no stripes are in the table we don't have a count in maxColumnCount. To
+	 * prevent a division by zero turning into a NaN we keep the ratio on zero.
+	 * This will result in a cost of 0 for scanning the table which is a reasonable
+	 * cost on an empty table.
+	 */
+	if (maxColumnCount == 0)
 	{
-		Bitmapset *attr_needed = rte->selectedCols;
-		double numberOfColumnsRead = bms_num_members(attr_needed);
-		double selectionRatio = 0;
-
-		/*
-		 * When no stripes are in the table we don't have a count in maxColumnCount. To
-		 * prevent a division by zero turning into a NaN we keep the ratio on zero.
-		 * This will result in a cost of 0 for scanning the table which is a reasonable
-		 * cost on an empty table.
-		 */
-		if (maxColumnCount != 0)
-		{
-			selectionRatio = numberOfColumnsRead / (double) maxColumnCount;
-		}
-		Cost scanCost = (double) totalStripeSize / BLCKSZ * selectionRatio;
-		return scanCost;
+		return 0;
 	}
+
+	double columnSelectionRatio = numberOfColumnsRead / (double) maxColumnCount;
+	Cost tableScanCost = (double) totalStripeSize / BLCKSZ * columnSelectionRatio;
+	Cost perStripeScanCost = tableScanCost / list_length(stripeList);
+	return perStripeScanCost;
+}
+
+
+/*
+ * ColumnarTableStripeCount returns the number of stripes that columnar
+ * table with relationId has by using stripe metadata.
+ */
+static uint64
+ColumnarTableStripeCount(Oid relationId)
+{
+	Relation relation = RelationIdGetRelation(relationId);
+	List *stripeList = StripesForRelfilenode(relation->rd_node);
+	int stripeCount = list_length(stripeList);
+	RelationClose(relation);
+
+	return stripeCount;
 }
 
 

From 8adcf2096b28a4a0eb5402ab22d325e104d468e8 Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Mon, 5 Jul 2021 17:52:56 +0300
Subject: [PATCH 2/3] Multiply ColumnarCustomScan cost by tblspace.seqpage cost

---
 src/backend/columnar/columnar_customscan.c | 27 +++++++++++++++++-----
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/src/backend/columnar/columnar_customscan.c b/src/backend/columnar/columnar_customscan.c
index e2914e509..748049740 100644
--- a/src/backend/columnar/columnar_customscan.c
+++ b/src/backend/columnar/columnar_customscan.c
@@ -23,6 +23,7 @@
 #include "optimizer/paths.h"
 #include "optimizer/restrictinfo.h"
 #include "utils/relcache.h"
+#include "utils/spccache.h"
 
 #include "columnar/columnar_customscan.h"
 #include "columnar/columnar_metadata.h"
@@ -60,8 +61,8 @@ static void RemovePathsByPredicate(RelOptInfo *rel, PathPredicate removePathPred
 static bool IsNotIndexPath(Path *path);
 static Path * CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel,
 									 RangeTblEntry *rte);
-static Cost ColumnarScanCost(Oid relationId, int numberOfColumnsRead);
-static Cost ColumnarPerStripeScanCost(Oid relationId,
+static Cost ColumnarScanCost(RelOptInfo *rel, Oid relationId, int numberOfColumnsRead);
+static Cost ColumnarPerStripeScanCost(RelOptInfo *rel, Oid relationId,
 									  int numberOfColumnsRead);
 static uint64 ColumnarTableStripeCount(Oid relationId);
 static Plan * ColumnarScanPath_PlanCustomPath(PlannerInfo *root,
@@ -270,7 +271,7 @@ CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
 	path->startup_cost = 0;
 	int numberOfColumnsRead = bms_num_members(rte->selectedCols);
 	path->total_cost = path->startup_cost +
-					   ColumnarScanCost(rte->relid, numberOfColumnsRead);
+					   ColumnarScanCost(rel, rte->relid, numberOfColumnsRead);
 
 	return (Path *) cspath;
 }
@@ -282,10 +283,10 @@ CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
  * need to be read.
  */
 static Cost
-ColumnarScanCost(Oid relationId, int numberOfColumnsRead)
+ColumnarScanCost(RelOptInfo *rel, Oid relationId, int numberOfColumnsRead)
 {
 	return ColumnarTableStripeCount(relationId) *
-		   ColumnarPerStripeScanCost(relationId, numberOfColumnsRead);
+		   ColumnarPerStripeScanCost(rel, relationId, numberOfColumnsRead);
 }
 
 
@@ -295,7 +296,7 @@ ColumnarScanCost(Oid relationId, int numberOfColumnsRead)
  * read during scan operation.
  */
 static Cost
-ColumnarPerStripeScanCost(Oid relationId, int numberOfColumnsRead)
+ColumnarPerStripeScanCost(RelOptInfo *rel, Oid relationId, int numberOfColumnsRead)
 {
 	Relation relation = RelationIdGetRelation(relationId);
 	List *stripeList = StripesForRelfilenode(relation->rd_node);
@@ -324,6 +325,20 @@ ColumnarPerStripeScanCost(Oid relationId, int numberOfColumnsRead)
 	double columnSelectionRatio = numberOfColumnsRead / (double) maxColumnCount;
 	Cost tableScanCost = (double) totalStripeSize / BLCKSZ * columnSelectionRatio;
 	Cost perStripeScanCost = tableScanCost / list_length(stripeList);
+
+	/*
+	 * Finally, multiply the cost of reading a single stripe by seq page read
+	 * cost to make our estimation scale compatible with postgres.
+	 * Since we are calculating the cost for a single stripe here, we use seq
+	 * page cost instead of random page cost. This is because, random page
+	 * access only happens when switching between columns, which is pretty
+	 * much neglactable.
+	 */
+	double relSpaceSeqPageCost;
+	get_tablespace_page_costs(rel->reltablespace,
+							  NULL, &relSpaceSeqPageCost);
+	perStripeScanCost = perStripeScanCost * relSpaceSeqPageCost;
+
 	return perStripeScanCost;
 }
 

From 297f59a70eefc32131c67ecfe3c38d3cb3519ec5 Mon Sep 17 00:00:00 2001
From: Onur Tirtir <onurcantirtir@gmail.com>
Date: Thu, 24 Jun 2021 20:40:54 +0300
Subject: [PATCH 3/3] Re-cost columnar table index paths

---
 src/backend/columnar/columnar_customscan.c    | 197 +++++++++++-
 src/test/regress/columnar_schedule            |   2 +-
 src/test/regress/expected/columnar_paths.out  | 302 ++++++++++++++++++
 .../expected/columnar_test_helpers.out        |  27 ++
 src/test/regress/sql/columnar_paths.sql       | 203 ++++++++++++
 .../regress/sql/columnar_test_helpers.sql     |  29 ++
 6 files changed, 755 insertions(+), 5 deletions(-)
 create mode 100644 src/test/regress/expected/columnar_paths.out
 create mode 100644 src/test/regress/sql/columnar_paths.sql

diff --git a/src/backend/columnar/columnar_customscan.c b/src/backend/columnar/columnar_customscan.c
index 748049740..50354be46 100644
--- a/src/backend/columnar/columnar_customscan.c
+++ b/src/backend/columnar/columnar_customscan.c
@@ -14,6 +14,7 @@
 
 #include "postgres.h"
 
+#include "access/amapi.h"
 #include "access/skey.h"
 #include "nodes/extensible.h"
 #include "nodes/pg_list.h"
@@ -25,6 +26,7 @@
 #include "utils/relcache.h"
 #include "utils/spccache.h"
 
+#include "columnar/columnar.h"
 #include "columnar/columnar_customscan.h"
 #include "columnar/columnar_metadata.h"
 #include "columnar/columnar_tableam.h"
@@ -59,6 +61,14 @@ static void ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index
 									   RangeTblEntry *rte);
 static void RemovePathsByPredicate(RelOptInfo *rel, PathPredicate removePathPredicate);
 static bool IsNotIndexPath(Path *path);
+static void RecostColumnarIndexPaths(PlannerInfo *root, RelOptInfo *rel, Oid relationId);
+static void RecostColumnarIndexPath(PlannerInfo *root, RelOptInfo *rel, Oid relationId,
+									IndexPath *indexPath);
+static Cost ColumnarIndexScanAddStartupCost(RelOptInfo *rel, Oid relationId,
+											IndexPath *indexPath);
+static Cost ColumnarIndexScanAddTotalCost(PlannerInfo *root, RelOptInfo *rel,
+										  Oid relationId, IndexPath *indexPath);
+static int RelationIdGetNumberOfAttributes(Oid relationId);
 static Path * CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel,
 									 RangeTblEntry *rte);
 static Cost ColumnarScanCost(RelOptInfo *rel, Oid relationId, int numberOfColumnsRead);
@@ -187,12 +197,13 @@ ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti,
 			ereport(DEBUG1, (errmsg("pathlist hook for columnar table am")));
 
 			/*
-			 * TODO: Since we don't have a proper costing model for
-			 * ColumnarCustomScan, we remove other paths to force postgres
-			 * using ColumnarCustomScan. Note that we still keep index paths
-			 * since they still might be useful.
+			 * When columnar custom scan is enabled (columnar.enable_custom_scan),
+			 * we only consider ColumnarScanPath's & IndexPath's. For this reason,
+			 * we remove other paths and re-estimate IndexPath costs to make accurate
+			 * comparisons between them.
 			 */
 			RemovePathsByPredicate(rel, IsNotIndexPath);
+			RecostColumnarIndexPaths(root, rel, rte->relid);
 			add_path(rel, customPath);
 		}
 	}
@@ -232,6 +243,184 @@ IsNotIndexPath(Path *path)
 }
 
 
+/*
+ * RecostColumnarIndexPaths re-costs index paths of given RelOptInfo for
+ * columnar table with relationId.
+ */
+static void
+RecostColumnarIndexPaths(PlannerInfo *root, RelOptInfo *rel, Oid relationId)
+{
+	Path *path = NULL;
+	foreach_ptr(path, rel->pathlist)
+	{
+		/*
+		 * Since we don't provide implementations for scan_bitmap_next_block
+		 * & scan_bitmap_next_tuple, postgres doesn't generate bitmap index
+		 * scan paths for columnar tables already (see related comments in
+		 * TableAmRoutine). For this reason, we only consider IndexPath's
+		 * here.
+		 */
+		if (IsA(path, IndexPath))
+		{
+			RecostColumnarIndexPath(root, rel, relationId, (IndexPath *) path);
+		}
+	}
+}
+
+
+/*
+ * RecostColumnarIndexPath re-costs given index path for columnar table with
+ * relationId.
+ */
+static void
+RecostColumnarIndexPath(PlannerInfo *root, RelOptInfo *rel, Oid relationId,
+						IndexPath *indexPath)
+{
+	ereport(DEBUG4, (errmsg("columnar table index scan costs estimated by "
+							"indexAM: startup cost = %.10f, total cost = "
+							"%.10f", indexPath->path.startup_cost,
+							indexPath->path.total_cost)));
+
+	/*
+	 * We estimate the cost for columnar table read during index scan. Also,
+	 * instead of overwriting startup & total costs, we "add" ours to the
+	 * costs estimated by indexAM since we should consider index traversal
+	 * related costs too.
+	 */
+	Cost indexAMStartupCost = indexPath->path.startup_cost;
+	Cost indexAMScanCost = indexPath->path.total_cost - indexAMStartupCost;
+
+	Cost columnarIndexScanStartupCost = ColumnarIndexScanAddStartupCost(rel, relationId,
+																		indexPath);
+	Cost columnarIndexScanCost = ColumnarIndexScanAddTotalCost(root, rel, relationId,
+															   indexPath);
+
+	indexPath->path.startup_cost = indexAMStartupCost + columnarIndexScanStartupCost;
+	indexPath->path.total_cost = indexPath->path.startup_cost +
+								 indexAMScanCost + columnarIndexScanCost;
+
+	ereport(DEBUG4, (errmsg("columnar table index scan costs re-estimated "
+							"by columnarAM (including indexAM costs): "
+							"startup cost = %.10f, total cost = %.10f",
+							indexPath->path.startup_cost,
+							indexPath->path.total_cost)));
+}
+
+
+/*
+ * ColumnarIndexScanAddStartupCost returns additional startup cost estimated
+ * for index scan described by IndexPath for columnar table with relationId.
+ */
+static Cost
+ColumnarIndexScanAddStartupCost(RelOptInfo *rel, Oid relationId, IndexPath *indexPath)
+{
+	int numberOfColumnsRead = RelationIdGetNumberOfAttributes(relationId);
+
+	/* we would at least read one stripe */
+	return ColumnarPerStripeScanCost(rel, relationId, numberOfColumnsRead);
+}
+
+
+/*
+ * ColumnarIndexScanAddTotalCost returns additional cost estimated for
+ * index scan described by IndexPath for columnar table with relationId.
+ */
+static Cost
+ColumnarIndexScanAddTotalCost(PlannerInfo *root, RelOptInfo *rel,
+							  Oid relationId, IndexPath *indexPath)
+{
+	int numberOfColumnsRead = RelationIdGetNumberOfAttributes(relationId);
+	Cost perStripeCost = ColumnarPerStripeScanCost(rel, relationId, numberOfColumnsRead);
+
+	/*
+	 * We don't need to pass correct loop count to amcostestimate since we
+	 * will only use index correlation & index selectivity, and loop count
+	 * doesn't have any effect on those two.
+	 */
+	double fakeLoopCount = 1;
+	Cost fakeIndexStartupCost;
+	Cost fakeIndexTotalCost;
+	double fakeIndexPages;
+	Selectivity indexSelectivity;
+	double indexCorrelation;
+	amcostestimate_function amcostestimate = indexPath->indexinfo->amcostestimate;
+	amcostestimate(root, indexPath, fakeLoopCount, &fakeIndexStartupCost,
+				   &fakeIndexTotalCost, &indexSelectivity,
+				   &indexCorrelation, &fakeIndexPages);
+
+	Relation relation = RelationIdGetRelation(relationId);
+	uint64 rowCount = ColumnarTableRowCount(relation);
+	RelationClose(relation);
+	double estimatedRows = rowCount * indexSelectivity;
+
+	/*
+	 * In the worst case (i.e no correlation between the column & the index),
+	 * we need to read a different stripe for each row.
+	 */
+	double maxStripeReadCount = estimatedRows;
+
+	/*
+	 * In the best case (i.e the column is fully correlated with the index),
+	 * we wouldn't read the same stripe again and again thanks
+	 * to locality.
+	 */
+	double avgStripeRowCount =
+		rowCount / (double) ColumnarTableStripeCount(relationId);
+	double minStripeReadCount = estimatedRows / avgStripeRowCount;
+
+	/*
+	 * While being close to 0 means low correlation, being close to -1 or +1
+	 * means high correlation. For index scans on columnar tables, it doesn't
+	 * matter if the column and the index are "correlated" (+1) or
+	 * "anti-correlated" (-1) since both help us avoiding from reading the
+	 * same stripe again and again.
+	 */
+	double absIndexCorrelation = Abs(indexCorrelation);
+
+	/*
+	 * To estimate the number of stripes that we need to read, we do linear
+	 * interpolation between minStripeReadCount & maxStripeReadCount. To do
+	 * that, we use complement to 1 of absolute correlation, where being
+	 * close to 0 means high correlation and being close to 1 means low
+	 * correlation.
+	 * In practice, we only want to do an index scan when absIndexCorrelation
+	 * is 1 (or extremely close to it), or when the absolute number of tuples
+	 * returned is very small. Other cases will have a prohibitive cost.
+	 */
+	double complementIndexCorrelation = 1 - absIndexCorrelation;
+	double estimatedStripeReadCount =
+		minStripeReadCount + complementIndexCorrelation * (maxStripeReadCount -
+														   minStripeReadCount);
+
+	Cost scanCost = perStripeCost * estimatedStripeReadCount;
+
+	ereport(DEBUG4, (errmsg("re-costing index scan for columnar table: "
+							"selectivity = %.10f, complement abs "
+							"correlation = %.10f, per stripe cost = %.10f, "
+							"estimated stripe read count = %.10f, "
+							"total additional cost = %.10f",
+							indexSelectivity, complementIndexCorrelation,
+							perStripeCost, estimatedStripeReadCount,
+							scanCost)));
+
+	return scanCost;
+}
+
+
+/*
+ * RelationIdGetNumberOfAttributes returns number of attributes that relation
+ * with relationId has.
+ */
+static int
+RelationIdGetNumberOfAttributes(Oid relationId)
+{
+	Relation relation = RelationIdGetRelation(relationId);
+	int nattrs = relation->rd_att->natts;
+	RelationClose(relation);
+	return nattrs;
+}
+
+
 static Path *
 CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
 {
diff --git a/src/test/regress/columnar_schedule b/src/test/regress/columnar_schedule
index 218b04c4a..84a106c81 100644
--- a/src/test/regress/columnar_schedule
+++ b/src/test/regress/columnar_schedule
@@ -9,7 +9,7 @@ test: columnar_analyze
 test: columnar_data_types
 test: columnar_drop
 test: columnar_indexes
-test: columnar_fallback_scan
+test: columnar_fallback_scan columnar_paths
 test: columnar_partitioning
 test: columnar_permissions
 test: columnar_empty
diff --git a/src/test/regress/expected/columnar_paths.out b/src/test/regress/expected/columnar_paths.out
new file mode 100644
index 000000000..cf5a86a64
--- /dev/null
+++ b/src/test/regress/expected/columnar_paths.out
@@ -0,0 +1,302 @@
+CREATE SCHEMA columnar_paths;
+SET search_path TO columnar_paths;
+CREATE TABLE full_correlated (a int, b text, c int, d int) USING columnar;
+INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000000) i;
+CREATE INDEX full_correlated_btree ON full_correlated (a);
+ANALYZE full_correlated;
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a FROM full_correlated WHERE a=200;
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a FROM full_correlated WHERE a<0;
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a FROM full_correlated WHERE a>10 AND a<20;
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a FROM full_correlated WHERE a>1000000;
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_correlated WHERE a>900000;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a FROM full_correlated WHERE a<1000;
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a,b FROM full_correlated WHERE a<3000;
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_correlated WHERE a<9000;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+BEGIN;
+  TRUNCATE full_correlated;
+  INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000) i;
+  -- Since we have much smaller number of rows, selectivity of below
+  -- query should be much higher. So we would choose columnar custom scan.
+  SELECT columnar_test_helpers.uses_custom_scan (
+  $$
+  SELECT a FROM full_correlated WHERE a=200;
+  $$
+  );
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+-- same filter used in above, but choosing multiple columns would increase
+-- custom scan cost, so we would prefer index scan this time
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a,b,c,d FROM full_correlated WHERE a<9000;
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+-- again same filter used in above, but we would choose custom scan this
+-- time since it would read three less columns from disk
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT c FROM full_correlated WHERE a<10000;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_correlated WHERE a>200;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_correlated WHERE a=0 OR a=5;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+DROP INDEX full_correlated_btree;
+CREATE INDEX full_correlated_hash ON full_correlated USING hash(a);
+ANALYZE full_correlated;
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_correlated WHERE a<10;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_correlated WHERE a>1 AND a<10;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_correlated WHERE a=0 OR a=5;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_correlated WHERE a=1000;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a,c FROM full_correlated WHERE a=1000;
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+CREATE TABLE full_anti_correlated (a int, b text) USING columnar;
+INSERT INTO full_anti_correlated SELECT i, i::text FROM generate_series(1, 500000) i;
+CREATE INDEX full_anti_correlated_hash ON full_anti_correlated USING hash(b);
+ANALYZE full_anti_correlated;
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a FROM full_anti_correlated WHERE b='600';
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a,b FROM full_anti_correlated WHERE b='600';
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a,b FROM full_anti_correlated WHERE b='600' OR b='10';
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+DROP INDEX full_anti_correlated_hash;
+CREATE INDEX full_anti_correlated_btree ON full_anti_correlated (a,b);
+ANALYZE full_anti_correlated;
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a FROM full_anti_correlated WHERE a>6500 AND a<7000 AND b<'10000';
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_anti_correlated WHERE a>2000 AND a<7000;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a FROM full_anti_correlated WHERE a>2000 AND a<7000 AND b='24';
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_anti_correlated WHERE a<7000 AND b<'10000';
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+CREATE TABLE no_correlation (a int, b text) USING columnar;
+INSERT INTO no_correlation SELECT random()*5000, (random()*5000)::int::text FROM generate_series(1, 500000) i;
+CREATE INDEX no_correlation_btree ON no_correlation (a);
+ANALYZE no_correlation;
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM no_correlation WHERE a < 2;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM no_correlation WHERE a = 200;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SET client_min_messages TO WARNING;
+DROP SCHEMA columnar_paths CASCADE;
diff --git a/src/test/regress/expected/columnar_test_helpers.out b/src/test/regress/expected/columnar_test_helpers.out
index 2f7cad23b..6b1421de8 100644
--- a/src/test/regress/expected/columnar_test_helpers.out
+++ b/src/test/regress/expected/columnar_test_helpers.out
@@ -77,3 +77,30 @@ CREATE FUNCTION top_memory_context_usage()
 	RETURNS BIGINT AS $$
 		SELECT TopMemoryContext FROM columnar_test_helpers.columnar_store_memory_stats();
 	$$ LANGUAGE SQL VOLATILE;
+CREATE OR REPLACE FUNCTION uses_index_scan(command text)
+RETURNS BOOLEAN AS $$
+DECLARE
+  query_plan text;
+BEGIN
+  FOR query_plan IN EXECUTE 'EXPLAIN' || command LOOP
+    IF query_plan ILIKE '%Index Only Scan using%' OR
+       query_plan ILIKE '%Index Scan using%'
+    THEN
+        RETURN true;
+    END IF;
+  END LOOP;
+  RETURN false;
+END; $$ language plpgsql;
+CREATE OR REPLACE FUNCTION uses_custom_scan(command text)
+RETURNS BOOLEAN AS $$
+DECLARE
+  query_plan text;
+BEGIN
+  FOR query_plan IN EXECUTE 'EXPLAIN' || command LOOP
+    IF query_plan ILIKE '%Custom Scan (ColumnarScan)%'
+    THEN
+        RETURN true;
+    END IF;
+  END LOOP;
+  RETURN false;
+END; $$ language plpgsql;
diff --git a/src/test/regress/sql/columnar_paths.sql b/src/test/regress/sql/columnar_paths.sql
new file mode 100644
index 000000000..be128ba9b
--- /dev/null
+++ b/src/test/regress/sql/columnar_paths.sql
@@ -0,0 +1,203 @@
+CREATE SCHEMA columnar_paths;
+SET search_path TO columnar_paths;
+
+CREATE TABLE full_correlated (a int, b text, c int, d int) USING columnar;
+INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000000) i;
+CREATE INDEX full_correlated_btree ON full_correlated (a);
+ANALYZE full_correlated;
+
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a FROM full_correlated WHERE a=200;
+$$
+);
+
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a FROM full_correlated WHERE a<0;
+$$
+);
+
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a FROM full_correlated WHERE a>10 AND a<20;
+$$
+);
+
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a FROM full_correlated WHERE a>1000000;
+$$
+);
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_correlated WHERE a>900000;
+$$
+);
+
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a FROM full_correlated WHERE a<1000;
+$$
+);
+
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a,b FROM full_correlated WHERE a<3000;
+$$
+);
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_correlated WHERE a<9000;
+$$
+);
+
+BEGIN;
+  TRUNCATE full_correlated;
+  INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000) i;
+
+  -- Since we have much smaller number of rows, selectivity of below
+  -- query should be much higher. So we would choose columnar custom scan.
+  SELECT columnar_test_helpers.uses_custom_scan (
+  $$
+  SELECT a FROM full_correlated WHERE a=200;
+  $$
+  );
+ROLLBACK;
+
+-- same filter used in above, but choosing multiple columns would increase
+-- custom scan cost, so we would prefer index scan this time
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a,b,c,d FROM full_correlated WHERE a<9000;
+$$
+);
+
+-- again same filter used in above, but we would choose custom scan this
+-- time since it would read three less columns from disk
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT c FROM full_correlated WHERE a<10000;
+$$
+);
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_correlated WHERE a>200;
+$$
+);
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_correlated WHERE a=0 OR a=5;
+$$
+);
+
+DROP INDEX full_correlated_btree;
+
+CREATE INDEX full_correlated_hash ON full_correlated USING hash(a);
+ANALYZE full_correlated;
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_correlated WHERE a<10;
+$$
+);
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_correlated WHERE a>1 AND a<10;
+$$
+);
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_correlated WHERE a=0 OR a=5;
+$$
+);
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_correlated WHERE a=1000;
+$$
+);
+
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a,c FROM full_correlated WHERE a=1000;
+$$
+);
+
+CREATE TABLE full_anti_correlated (a int, b text) USING columnar;
+INSERT INTO full_anti_correlated SELECT i, i::text FROM generate_series(1, 500000) i;
+CREATE INDEX full_anti_correlated_hash ON full_anti_correlated USING hash(b);
+ANALYZE full_anti_correlated;
+
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a FROM full_anti_correlated WHERE b='600';
+$$
+);
+
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a,b FROM full_anti_correlated WHERE b='600';
+$$
+);
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a,b FROM full_anti_correlated WHERE b='600' OR b='10';
+$$
+);
+
+DROP INDEX full_anti_correlated_hash;
+
+CREATE INDEX full_anti_correlated_btree ON full_anti_correlated (a,b);
+ANALYZE full_anti_correlated;
+
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a FROM full_anti_correlated WHERE a>6500 AND a<7000 AND b<'10000';
+$$
+);
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_anti_correlated WHERE a>2000 AND a<7000;
+$$
+);
+
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a FROM full_anti_correlated WHERE a>2000 AND a<7000 AND b='24';
+$$
+);
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_anti_correlated WHERE a<7000 AND b<'10000';
+$$
+);
+
+CREATE TABLE no_correlation (a int, b text) USING columnar;
+INSERT INTO no_correlation SELECT random()*5000, (random()*5000)::int::text FROM generate_series(1, 500000) i;
+CREATE INDEX no_correlation_btree ON no_correlation (a);
+ANALYZE no_correlation;
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM no_correlation WHERE a < 2;
+$$
+);
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM no_correlation WHERE a = 200;
+$$
+);
+
+SET client_min_messages TO WARNING;
+DROP SCHEMA columnar_paths CASCADE;
diff --git a/src/test/regress/sql/columnar_test_helpers.sql b/src/test/regress/sql/columnar_test_helpers.sql
index 08438a75f..07b6230ab 100644
--- a/src/test/regress/sql/columnar_test_helpers.sql
+++ b/src/test/regress/sql/columnar_test_helpers.sql
@@ -84,3 +84,32 @@ CREATE FUNCTION top_memory_context_usage()
 	RETURNS BIGINT AS $$
 		SELECT TopMemoryContext FROM columnar_test_helpers.columnar_store_memory_stats();
 	$$ LANGUAGE SQL VOLATILE;
+
+CREATE OR REPLACE FUNCTION uses_index_scan(command text)
+RETURNS BOOLEAN AS $$
+DECLARE
+  query_plan text;
+BEGIN
+  FOR query_plan IN EXECUTE 'EXPLAIN' || command LOOP
+    IF query_plan ILIKE '%Index Only Scan using%' OR
+       query_plan ILIKE '%Index Scan using%'
+    THEN
+        RETURN true;
+    END IF;
+  END LOOP;
+  RETURN false;
+END; $$ language plpgsql;
+
+CREATE OR REPLACE FUNCTION uses_custom_scan(command text)
+RETURNS BOOLEAN AS $$
+DECLARE
+  query_plan text;
+BEGIN
+  FOR query_plan IN EXECUTE 'EXPLAIN' || command LOOP
+    IF query_plan ILIKE '%Custom Scan (ColumnarScan)%'
+    THEN
+        RETURN true;
+    END IF;
+  END LOOP;
+  RETURN false;
+END; $$ language plpgsql;