Re-cost columnar table index paths

2021-06-24 20:40:54 +03:00 · 2021-06-24 20:40:54 +03:00 · 297f59a70e
parent 8adcf2096b
commit 297f59a70e
6 changed files with 755 additions and 5 deletions
--- a/src/backend/columnar/columnar_customscan.c
+++ b/src/backend/columnar/columnar_customscan.c
@ -14,6 +14,7 @@
 #include "postgres.h"
 #include "access/amapi.h"
 #include "access/skey.h"
 #include "nodes/extensible.h"
 #include "nodes/pg_list.h"
@ -25,6 +26,7 @@
 #include "utils/relcache.h"
 #include "utils/spccache.h"
 #include "columnar/columnar.h"
 #include "columnar/columnar_customscan.h"
 #include "columnar/columnar_metadata.h"
 #include "columnar/columnar_tableam.h"
@ -59,6 +61,14 @@ static void ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index
 									   RangeTblEntry *rte);
 static void RemovePathsByPredicate(RelOptInfo *rel, PathPredicate removePathPredicate);
 static bool IsNotIndexPath(Path *path);
 static void RecostColumnarIndexPaths(PlannerInfo *root, RelOptInfo *rel, Oid relationId);
 static void RecostColumnarIndexPath(PlannerInfo *root, RelOptInfo *rel, Oid relationId,
 									IndexPath *indexPath);
 static Cost ColumnarIndexScanAddStartupCost(RelOptInfo *rel, Oid relationId,
 											IndexPath *indexPath);
 static Cost ColumnarIndexScanAddTotalCost(PlannerInfo *root, RelOptInfo *rel,
 										  Oid relationId, IndexPath *indexPath);
 static int RelationIdGetNumberOfAttributes(Oid relationId);
 static Path * CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel,
 									 RangeTblEntry *rte);
 static Cost ColumnarScanCost(RelOptInfo *rel, Oid relationId, int numberOfColumnsRead);
@ -187,12 +197,13 @@ ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti,
 			ereport(DEBUG1, (errmsg("pathlist hook for columnar table am")));
 			/*
-			 * TODO: Since we don't have a proper costing model for
+			 * When columnar custom scan is enabled (columnar.enable_custom_scan),
-			 * ColumnarCustomScan, we remove other paths to force postgres
+			 * we only consider ColumnarScanPath's & IndexPath's. For this reason,
-			 * using ColumnarCustomScan. Note that we still keep index paths
+			 * we remove other paths and re-estimate IndexPath costs to make accurate
-			 * since they still might be useful.
+			 * comparisons between them.
 			 */
 			RemovePathsByPredicate(rel, IsNotIndexPath);
 			RecostColumnarIndexPaths(root, rel, rte->relid);
 			add_path(rel, customPath);
 		}
 	}
@ -232,6 +243,184 @@ IsNotIndexPath(Path *path)
 }
 /*
 * RecostColumnarIndexPaths re-costs index paths of given RelOptInfo for
 * columnar table with relationId.
 */
 static void
 RecostColumnarIndexPaths(PlannerInfo *root, RelOptInfo *rel, Oid relationId)
 {
 	Path *path = NULL;
 	foreach_ptr(path, rel->pathlist)
 	{
 		/*
 		 * Since we don't provide implementations for scan_bitmap_next_block
 		 * & scan_bitmap_next_tuple, postgres doesn't generate bitmap index
 		 * scan paths for columnar tables already (see related comments in
 		 * TableAmRoutine). For this reason, we only consider IndexPath's
 		 * here.
 		 */
 		if (IsA(path, IndexPath))
 		{
 			RecostColumnarIndexPath(root, rel, relationId, (IndexPath *) path);
 		}
 	}
 }
 /*
 * RecostColumnarIndexPath re-costs given index path for columnar table with
 * relationId.
 */
 static void
 RecostColumnarIndexPath(PlannerInfo *root, RelOptInfo *rel, Oid relationId,
 						IndexPath *indexPath)
 {
 	ereport(DEBUG4, (errmsg("columnar table index scan costs estimated by "
 							"indexAM: startup cost = %.10f, total cost = "
 							"%.10f", indexPath->path.startup_cost,
 							indexPath->path.total_cost)));
 	/*
 	 * We estimate the cost for columnar table read during index scan. Also,
 	 * instead of overwriting startup & total costs, we "add" ours to the
 	 * costs estimated by indexAM since we should consider index traversal
 	 * related costs too.
 	 */
 	Cost indexAMStartupCost = indexPath->path.startup_cost;
 	Cost indexAMScanCost = indexPath->path.total_cost - indexAMStartupCost;
 	Cost columnarIndexScanStartupCost = ColumnarIndexScanAddStartupCost(rel, relationId,
 																		indexPath);
 	Cost columnarIndexScanCost = ColumnarIndexScanAddTotalCost(root, rel, relationId,
 															   indexPath);
 	indexPath->path.startup_cost = indexAMStartupCost + columnarIndexScanStartupCost;
 	indexPath->path.total_cost = indexPath->path.startup_cost +
 								 indexAMScanCost + columnarIndexScanCost;
 	ereport(DEBUG4, (errmsg("columnar table index scan costs re-estimated "
 							"by columnarAM (including indexAM costs): "
 							"startup cost = %.10f, total cost = %.10f",
 							indexPath->path.startup_cost,
 							indexPath->path.total_cost)));
 }
 /*
 * ColumnarIndexScanAddStartupCost returns additional startup cost estimated
 * for index scan described by IndexPath for columnar table with relationId.
 */
 static Cost
 ColumnarIndexScanAddStartupCost(RelOptInfo *rel, Oid relationId, IndexPath *indexPath)
 {
 	int numberOfColumnsRead = RelationIdGetNumberOfAttributes(relationId);
 	/* we would at least read one stripe */
 	return ColumnarPerStripeScanCost(rel, relationId, numberOfColumnsRead);
 }
 /*
 * ColumnarIndexScanAddTotalCost returns additional cost estimated for
 * index scan described by IndexPath for columnar table with relationId.
 */
 static Cost
 ColumnarIndexScanAddTotalCost(PlannerInfo *root, RelOptInfo *rel,
 							  Oid relationId, IndexPath *indexPath)
 {
 	int numberOfColumnsRead = RelationIdGetNumberOfAttributes(relationId);
 	Cost perStripeCost = ColumnarPerStripeScanCost(rel, relationId, numberOfColumnsRead);
 	/*
 	 * We don't need to pass correct loop count to amcostestimate since we
 	 * will only use index correlation & index selectivity, and loop count
 	 * doesn't have any effect on those two.
 	 */
 	double fakeLoopCount = 1;
 	Cost fakeIndexStartupCost;
 	Cost fakeIndexTotalCost;
 	double fakeIndexPages;
 	Selectivity indexSelectivity;
 	double indexCorrelation;
 	amcostestimate_function amcostestimate = indexPath->indexinfo->amcostestimate;
 	amcostestimate(root, indexPath, fakeLoopCount, &fakeIndexStartupCost,
 				   &fakeIndexTotalCost, &indexSelectivity,
 				   &indexCorrelation, &fakeIndexPages);
 	Relation relation = RelationIdGetRelation(relationId);
 	uint64 rowCount = ColumnarTableRowCount(relation);
 	RelationClose(relation);
 	double estimatedRows = rowCount * indexSelectivity;
 	/*
 	 * In the worst case (i.e no correlation between the column & the index),
 	 * we need to read a different stripe for each row.
 	 */
 	double maxStripeReadCount = estimatedRows;
 	/*
 	 * In the best case (i.e the column is fully correlated with the index),
 	 * we wouldn't read the same stripe again and again thanks
 	 * to locality.
 	 */
 	double avgStripeRowCount =
 		rowCount / (double) ColumnarTableStripeCount(relationId);
 	double minStripeReadCount = estimatedRows / avgStripeRowCount;
 	/*
 	 * While being close to 0 means low correlation, being close to -1 or +1
 	 * means high correlation. For index scans on columnar tables, it doesn't
 	 * matter if the column and the index are "correlated" (+1) or
 	 * "anti-correlated" (-1) since both help us avoiding from reading the
 	 * same stripe again and again.
 	 */
 	double absIndexCorrelation = Abs(indexCorrelation);
 	/*
 	 * To estimate the number of stripes that we need to read, we do linear
 	 * interpolation between minStripeReadCount & maxStripeReadCount. To do
 	 * that, we use complement to 1 of absolute correlation, where being
 	 * close to 0 means high correlation and being close to 1 means low
 	 * correlation.
 	 * In practice, we only want to do an index scan when absIndexCorrelation
 	 * is 1 (or extremely close to it), or when the absolute number of tuples
 	 * returned is very small. Other cases will have a prohibitive cost.
 	 */
 	double complementIndexCorrelation = 1 - absIndexCorrelation;
 	double estimatedStripeReadCount =
 		minStripeReadCount + complementIndexCorrelation * (maxStripeReadCount -
 														   minStripeReadCount);
 	Cost scanCost = perStripeCost * estimatedStripeReadCount;
 	ereport(DEBUG4, (errmsg("re-costing index scan for columnar table: "
 							"selectivity = %.10f, complement abs "
 							"correlation = %.10f, per stripe cost = %.10f, "
 							"estimated stripe read count = %.10f, "
 							"total additional cost = %.10f",
 							indexSelectivity, complementIndexCorrelation,
 							perStripeCost, estimatedStripeReadCount,
 							scanCost)));
 	return scanCost;
 }
 /*
 * RelationIdGetNumberOfAttributes returns number of attributes that relation
 * with relationId has.
 */
 static int
 RelationIdGetNumberOfAttributes(Oid relationId)
 {
 	Relation relation = RelationIdGetRelation(relationId);
 	int nattrs = relation->rd_att->natts;
 	RelationClose(relation);
 	return nattrs;
 }
 static Path *
 CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
 {
--- a/src/test/regress/columnar_schedule
+++ b/src/test/regress/columnar_schedule
@ -9,7 +9,7 @@ test: columnar_analyze
 test: columnar_data_types
 test: columnar_drop
 test: columnar_indexes
-test: columnar_fallback_scan
+test: columnar_fallback_scan columnar_paths
 test: columnar_partitioning
 test: columnar_permissions
 test: columnar_empty
--- a/src/test/regress/expected/columnar_paths.out
+++ b/src/test/regress/expected/columnar_paths.out
@ -0,0 +1,302 @@
 CREATE SCHEMA columnar_paths;
 SET search_path TO columnar_paths;
 CREATE TABLE full_correlated (a int, b text, c int, d int) USING columnar;
 INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000000) i;
 CREATE INDEX full_correlated_btree ON full_correlated (a);
 ANALYZE full_correlated;
 SELECT columnar_test_helpers.uses_index_scan (
 $$
 SELECT a FROM full_correlated WHERE a=200;
 $$
 );
 uses_index_scan
 ---------------------------------------------------------------------
 t
 (1 row)
 SELECT columnar_test_helpers.uses_index_scan (
 $$
 SELECT a FROM full_correlated WHERE a<0;
 $$
 );
 uses_index_scan
 ---------------------------------------------------------------------
 t
 (1 row)
 SELECT columnar_test_helpers.uses_index_scan (
 $$
 SELECT a FROM full_correlated WHERE a>10 AND a<20;
 $$
 );
 uses_index_scan
 ---------------------------------------------------------------------
 t
 (1 row)
 SELECT columnar_test_helpers.uses_index_scan (
 $$
 SELECT a FROM full_correlated WHERE a>1000000;
 $$
 );
 uses_index_scan
 ---------------------------------------------------------------------
 t
 (1 row)
 SELECT columnar_test_helpers.uses_custom_scan (
 $$
 SELECT a FROM full_correlated WHERE a>900000;
 $$
 );
 uses_custom_scan
 ---------------------------------------------------------------------
 t
 (1 row)
 SELECT columnar_test_helpers.uses_index_scan (
 $$
 SELECT a FROM full_correlated WHERE a<1000;
 $$
 );
 uses_index_scan
 ---------------------------------------------------------------------
 t
 (1 row)
 SELECT columnar_test_helpers.uses_index_scan (
 $$
 SELECT a,b FROM full_correlated WHERE a<3000;
 $$
 );
 uses_index_scan
 ---------------------------------------------------------------------
 t
 (1 row)
 SELECT columnar_test_helpers.uses_custom_scan (
 $$
 SELECT a FROM full_correlated WHERE a<9000;
 $$
 );
 uses_custom_scan
 ---------------------------------------------------------------------
 t
 (1 row)
 BEGIN;
  TRUNCATE full_correlated;
  INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000) i;
  -- Since we have much smaller number of rows, selectivity of below
  -- query should be much higher. So we would choose columnar custom scan.
  SELECT columnar_test_helpers.uses_custom_scan (
  $$
  SELECT a FROM full_correlated WHERE a=200;
  $$
  );
 uses_custom_scan
 ---------------------------------------------------------------------
 t
 (1 row)
 ROLLBACK;
 -- same filter used in above, but choosing multiple columns would increase
 -- custom scan cost, so we would prefer index scan this time
 SELECT columnar_test_helpers.uses_index_scan (
 $$
 SELECT a,b,c,d FROM full_correlated WHERE a<9000;
 $$
 );
 uses_index_scan
 ---------------------------------------------------------------------
 t
 (1 row)
 -- again same filter used in above, but we would choose custom scan this
 -- time since it would read three less columns from disk
 SELECT columnar_test_helpers.uses_custom_scan (
 $$
 SELECT c FROM full_correlated WHERE a<10000;
 $$
 );
 uses_custom_scan
 ---------------------------------------------------------------------
 t
 (1 row)
 SELECT columnar_test_helpers.uses_custom_scan (
 $$
 SELECT a FROM full_correlated WHERE a>200;
 $$
 );
 uses_custom_scan
 ---------------------------------------------------------------------
 t
 (1 row)
 SELECT columnar_test_helpers.uses_custom_scan (
 $$
 SELECT a FROM full_correlated WHERE a=0 OR a=5;
 $$
 );
 uses_custom_scan
 ---------------------------------------------------------------------
 t
 (1 row)
 DROP INDEX full_correlated_btree;
 CREATE INDEX full_correlated_hash ON full_correlated USING hash(a);
 ANALYZE full_correlated;
 SELECT columnar_test_helpers.uses_custom_scan (
 $$
 SELECT a FROM full_correlated WHERE a<10;
 $$
 );
 uses_custom_scan
 ---------------------------------------------------------------------
 t
 (1 row)
 SELECT columnar_test_helpers.uses_custom_scan (
 $$
 SELECT a FROM full_correlated WHERE a>1 AND a<10;
 $$
 );
 uses_custom_scan
 ---------------------------------------------------------------------
 t
 (1 row)
 SELECT columnar_test_helpers.uses_custom_scan (
 $$
 SELECT a FROM full_correlated WHERE a=0 OR a=5;
 $$
 );
 uses_custom_scan
 ---------------------------------------------------------------------
 t
 (1 row)
 SELECT columnar_test_helpers.uses_custom_scan (
 $$
 SELECT a FROM full_correlated WHERE a=1000;
 $$
 );
 uses_custom_scan
 ---------------------------------------------------------------------
 t
 (1 row)
 SELECT columnar_test_helpers.uses_index_scan (
 $$
 SELECT a,c FROM full_correlated WHERE a=1000;
 $$
 );
 uses_index_scan
 ---------------------------------------------------------------------
 t
 (1 row)
 CREATE TABLE full_anti_correlated (a int, b text) USING columnar;
 INSERT INTO full_anti_correlated SELECT i, i::text FROM generate_series(1, 500000) i;
 CREATE INDEX full_anti_correlated_hash ON full_anti_correlated USING hash(b);
 ANALYZE full_anti_correlated;
 SELECT columnar_test_helpers.uses_index_scan (
 $$
 SELECT a FROM full_anti_correlated WHERE b='600';
 $$
 );
 uses_index_scan
 ---------------------------------------------------------------------
 t
 (1 row)
 SELECT columnar_test_helpers.uses_index_scan (
 $$
 SELECT a,b FROM full_anti_correlated WHERE b='600';
 $$
 );
 uses_index_scan
 ---------------------------------------------------------------------
 t
 (1 row)
 SELECT columnar_test_helpers.uses_custom_scan (
 $$
 SELECT a,b FROM full_anti_correlated WHERE b='600' OR b='10';
 $$
 );
 uses_custom_scan
 ---------------------------------------------------------------------
 t
 (1 row)
 DROP INDEX full_anti_correlated_hash;
 CREATE INDEX full_anti_correlated_btree ON full_anti_correlated (a,b);
 ANALYZE full_anti_correlated;
 SELECT columnar_test_helpers.uses_index_scan (
 $$
 SELECT a FROM full_anti_correlated WHERE a>6500 AND a<7000 AND b<'10000';
 $$
 );
 uses_index_scan
 ---------------------------------------------------------------------
 t
 (1 row)
 SELECT columnar_test_helpers.uses_custom_scan (
 $$
 SELECT a FROM full_anti_correlated WHERE a>2000 AND a<7000;
 $$
 );
 uses_custom_scan
 ---------------------------------------------------------------------
 t
 (1 row)
 SELECT columnar_test_helpers.uses_index_scan (
 $$
 SELECT a FROM full_anti_correlated WHERE a>2000 AND a<7000 AND b='24';
 $$
 );
 uses_index_scan
 ---------------------------------------------------------------------
 t
 (1 row)
 SELECT columnar_test_helpers.uses_custom_scan (
 $$
 SELECT a FROM full_anti_correlated WHERE a<7000 AND b<'10000';
 $$
 );
 uses_custom_scan
 ---------------------------------------------------------------------
 t
 (1 row)
 CREATE TABLE no_correlation (a int, b text) USING columnar;
 INSERT INTO no_correlation SELECT random()*5000, (random()*5000)::int::text FROM generate_series(1, 500000) i;
 CREATE INDEX no_correlation_btree ON no_correlation (a);
 ANALYZE no_correlation;
 SELECT columnar_test_helpers.uses_custom_scan (
 $$
 SELECT a FROM no_correlation WHERE a < 2;
 $$
 );
 uses_custom_scan
 ---------------------------------------------------------------------
 t
 (1 row)
 SELECT columnar_test_helpers.uses_custom_scan (
 $$
 SELECT a FROM no_correlation WHERE a = 200;
 $$
 );
 uses_custom_scan
 ---------------------------------------------------------------------
 t
 (1 row)
 SET client_min_messages TO WARNING;
 DROP SCHEMA columnar_paths CASCADE;
--- a/src/test/regress/expected/columnar_test_helpers.out
+++ b/src/test/regress/expected/columnar_test_helpers.out
@ -77,3 +77,30 @@ CREATE FUNCTION top_memory_context_usage()
 	RETURNS BIGINT AS $$
 		SELECT TopMemoryContext FROM columnar_test_helpers.columnar_store_memory_stats();
 	$$ LANGUAGE SQL VOLATILE;
 CREATE OR REPLACE FUNCTION uses_index_scan(command text)
 RETURNS BOOLEAN AS $$
 DECLARE
  query_plan text;
 BEGIN
  FOR query_plan IN EXECUTE 'EXPLAIN' || command LOOP
    IF query_plan ILIKE '%Index Only Scan using%' OR
       query_plan ILIKE '%Index Scan using%'
    THEN
        RETURN true;
    END IF;
  END LOOP;
  RETURN false;
 END; $$ language plpgsql;
 CREATE OR REPLACE FUNCTION uses_custom_scan(command text)
 RETURNS BOOLEAN AS $$
 DECLARE
  query_plan text;
 BEGIN
  FOR query_plan IN EXECUTE 'EXPLAIN' || command LOOP
    IF query_plan ILIKE '%Custom Scan (ColumnarScan)%'
    THEN
        RETURN true;
    END IF;
  END LOOP;
  RETURN false;
 END; $$ language plpgsql;
--- a/src/test/regress/sql/columnar_paths.sql
+++ b/src/test/regress/sql/columnar_paths.sql
@ -0,0 +1,203 @@
 CREATE SCHEMA columnar_paths;
 SET search_path TO columnar_paths;
 CREATE TABLE full_correlated (a int, b text, c int, d int) USING columnar;
 INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000000) i;
 CREATE INDEX full_correlated_btree ON full_correlated (a);
 ANALYZE full_correlated;
 SELECT columnar_test_helpers.uses_index_scan (
 $$
 SELECT a FROM full_correlated WHERE a=200;
 $$
 );
 SELECT columnar_test_helpers.uses_index_scan (
 $$
 SELECT a FROM full_correlated WHERE a<0;
 $$
 );
 SELECT columnar_test_helpers.uses_index_scan (
 $$
 SELECT a FROM full_correlated WHERE a>10 AND a<20;
 $$
 );
 SELECT columnar_test_helpers.uses_index_scan (
 $$
 SELECT a FROM full_correlated WHERE a>1000000;
 $$
 );
 SELECT columnar_test_helpers.uses_custom_scan (
 $$
 SELECT a FROM full_correlated WHERE a>900000;
 $$
 );
 SELECT columnar_test_helpers.uses_index_scan (
 $$
 SELECT a FROM full_correlated WHERE a<1000;
 $$
 );
 SELECT columnar_test_helpers.uses_index_scan (
 $$
 SELECT a,b FROM full_correlated WHERE a<3000;
 $$
 );
 SELECT columnar_test_helpers.uses_custom_scan (
 $$
 SELECT a FROM full_correlated WHERE a<9000;
 $$
 );
 BEGIN;
  TRUNCATE full_correlated;
  INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000) i;
  -- Since we have much smaller number of rows, selectivity of below
  -- query should be much higher. So we would choose columnar custom scan.
  SELECT columnar_test_helpers.uses_custom_scan (
  $$
  SELECT a FROM full_correlated WHERE a=200;
  $$
  );
 ROLLBACK;
 -- same filter used in above, but choosing multiple columns would increase
 -- custom scan cost, so we would prefer index scan this time
 SELECT columnar_test_helpers.uses_index_scan (
 $$
 SELECT a,b,c,d FROM full_correlated WHERE a<9000;
 $$
 );
 -- again same filter used in above, but we would choose custom scan this
 -- time since it would read three less columns from disk
 SELECT columnar_test_helpers.uses_custom_scan (
 $$
 SELECT c FROM full_correlated WHERE a<10000;
 $$
 );
 SELECT columnar_test_helpers.uses_custom_scan (
 $$
 SELECT a FROM full_correlated WHERE a>200;
 $$
 );
 SELECT columnar_test_helpers.uses_custom_scan (
 $$
 SELECT a FROM full_correlated WHERE a=0 OR a=5;
 $$
 );
 DROP INDEX full_correlated_btree;
 CREATE INDEX full_correlated_hash ON full_correlated USING hash(a);
 ANALYZE full_correlated;
 SELECT columnar_test_helpers.uses_custom_scan (
 $$
 SELECT a FROM full_correlated WHERE a<10;
 $$
 );
 SELECT columnar_test_helpers.uses_custom_scan (
 $$
 SELECT a FROM full_correlated WHERE a>1 AND a<10;
 $$
 );
 SELECT columnar_test_helpers.uses_custom_scan (
 $$
 SELECT a FROM full_correlated WHERE a=0 OR a=5;
 $$
 );
 SELECT columnar_test_helpers.uses_custom_scan (
 $$
 SELECT a FROM full_correlated WHERE a=1000;
 $$
 );
 SELECT columnar_test_helpers.uses_index_scan (
 $$
 SELECT a,c FROM full_correlated WHERE a=1000;
 $$
 );
 CREATE TABLE full_anti_correlated (a int, b text) USING columnar;
 INSERT INTO full_anti_correlated SELECT i, i::text FROM generate_series(1, 500000) i;
 CREATE INDEX full_anti_correlated_hash ON full_anti_correlated USING hash(b);
 ANALYZE full_anti_correlated;
 SELECT columnar_test_helpers.uses_index_scan (
 $$
 SELECT a FROM full_anti_correlated WHERE b='600';
 $$
 );
 SELECT columnar_test_helpers.uses_index_scan (
 $$
 SELECT a,b FROM full_anti_correlated WHERE b='600';
 $$
 );
 SELECT columnar_test_helpers.uses_custom_scan (
 $$
 SELECT a,b FROM full_anti_correlated WHERE b='600' OR b='10';
 $$
 );
 DROP INDEX full_anti_correlated_hash;
 CREATE INDEX full_anti_correlated_btree ON full_anti_correlated (a,b);
 ANALYZE full_anti_correlated;
 SELECT columnar_test_helpers.uses_index_scan (
 $$
 SELECT a FROM full_anti_correlated WHERE a>6500 AND a<7000 AND b<'10000';
 $$
 );
 SELECT columnar_test_helpers.uses_custom_scan (
 $$
 SELECT a FROM full_anti_correlated WHERE a>2000 AND a<7000;
 $$
 );
 SELECT columnar_test_helpers.uses_index_scan (
 $$
 SELECT a FROM full_anti_correlated WHERE a>2000 AND a<7000 AND b='24';
 $$
 );
 SELECT columnar_test_helpers.uses_custom_scan (
 $$
 SELECT a FROM full_anti_correlated WHERE a<7000 AND b<'10000';
 $$
 );
 CREATE TABLE no_correlation (a int, b text) USING columnar;
 INSERT INTO no_correlation SELECT random()*5000, (random()*5000)::int::text FROM generate_series(1, 500000) i;
 CREATE INDEX no_correlation_btree ON no_correlation (a);
 ANALYZE no_correlation;
 SELECT columnar_test_helpers.uses_custom_scan (
 $$
 SELECT a FROM no_correlation WHERE a < 2;
 $$
 );
 SELECT columnar_test_helpers.uses_custom_scan (
 $$
 SELECT a FROM no_correlation WHERE a = 200;
 $$
 );
 SET client_min_messages TO WARNING;
 DROP SCHEMA columnar_paths CASCADE;
--- a/src/test/regress/sql/columnar_test_helpers.sql
+++ b/src/test/regress/sql/columnar_test_helpers.sql
@ -84,3 +84,32 @@ CREATE FUNCTION top_memory_context_usage()
 	RETURNS BIGINT AS $$
 		SELECT TopMemoryContext FROM columnar_test_helpers.columnar_store_memory_stats();
 	$$ LANGUAGE SQL VOLATILE;
 CREATE OR REPLACE FUNCTION uses_index_scan(command text)
 RETURNS BOOLEAN AS $$
 DECLARE
  query_plan text;
 BEGIN
  FOR query_plan IN EXECUTE 'EXPLAIN' || command LOOP
    IF query_plan ILIKE '%Index Only Scan using%' OR
       query_plan ILIKE '%Index Scan using%'
    THEN
        RETURN true;
    END IF;
  END LOOP;
  RETURN false;
 END; $$ language plpgsql;
 CREATE OR REPLACE FUNCTION uses_custom_scan(command text)
 RETURNS BOOLEAN AS $$
 DECLARE
  query_plan text;
 BEGIN
  FOR query_plan IN EXECUTE 'EXPLAIN' || command LOOP
    IF query_plan ILIKE '%Custom Scan (ColumnarScan)%'
    THEN
        RETURN true;
    END IF;
  END LOOP;
  RETURN false;
 END; $$ language plpgsql;