Re-cost SeqPath's as well for columnar tables

2021-07-26 14:54:51 +03:00 · 2021-07-26 14:54:51 +03:00 · 93ebbb0607
parent 453ac40725
commit 93ebbb0607
8 changed files with 600 additions and 3 deletions
--- a/src/backend/columnar/columnar_customscan.c
+++ b/src/backend/columnar/columnar_customscan.c
@ -62,6 +62,8 @@ static void ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index
 									   RangeTblEntry *rte);
 static void RemovePathsByPredicate(RelOptInfo *rel, PathPredicate removePathPredicate);
 static bool IsNotIndexPath(Path *path);
+static Path * CreateColumnarSeqScanPath(PlannerInfo *root, RelOptInfo *rel,
+										Oid relationId);
 static void RecostColumnarPaths(PlannerInfo *root, RelOptInfo *rel, Oid relationId);
 static void RecostColumnarIndexPath(PlannerInfo *root, RelOptInfo *rel, Oid relationId,
 									IndexPath *indexPath);
@ -69,6 +71,7 @@ static Cost ColumnarIndexScanAddStartupCost(RelOptInfo *rel, Oid relationId,
 											IndexPath *indexPath);
 static Cost ColumnarIndexScanAddTotalCost(PlannerInfo *root, RelOptInfo *rel,
 										  Oid relationId, IndexPath *indexPath);
+static void RecostColumnarSeqPath(RelOptInfo *rel, Oid relationId, Path *path);
 static int RelationIdGetNumberOfAttributes(Oid relationId);
 static Path * CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel,
 									 RangeTblEntry *rte);
@ -191,6 +194,22 @@ ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti,
 		/* columnar doesn't support parallel paths */
 		rel->partial_pathlist = NIL;

+		/*
+		 * There are cases where IndexPath is normally more preferrable over
+		 * SeqPath for heapAM but not for columnarAM. In such cases, an
+		 * IndexPath could wrongly dominate a SeqPath based on the costs
+		 * estimated by postgres earlier. For this reason, here we manually
+		 * create a SeqPath, estimate the cost based on columnarAM and append
+		 * to pathlist.
+		 *
+		 * Before doing that, we first re-cost all the existing paths so that
+		 * add_path makes correct cost comparisons when appending our SeqPath.
+		 */
+		RecostColumnarPaths(root, rel, rte->relid);
+
+		Path *seqPath = CreateColumnarSeqScanPath(root, rel, rte->relid);
+		add_path(rel, seqPath);
+
 		if (EnableColumnarCustomScan)
 		{
 			Path *customPath = CreateColumnarScanPath(root, rel, rte);
@ -211,7 +230,6 @@ ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti,
 			 * SeqPath thinking that its cost would be equal to ColumnarCustomScan.
 			 */
 			RemovePathsByPredicate(rel, IsNotIndexPath);
-			RecostColumnarPaths(root, rel, rte->relid);
 			add_path(rel, customPath);
 		}
 	}
@ -251,6 +269,23 @@ IsNotIndexPath(Path *path)
 }


+/*
+ * CreateColumnarSeqScanPath returns Path for sequential scan on columnar
+ * table with relationId.
+ */
+static Path *
+CreateColumnarSeqScanPath(PlannerInfo *root, RelOptInfo *rel, Oid relationId)
+{
+	/* columnar doesn't support parallel scan */
+	int parallelWorkers = 0;
+
+	Relids requiredOuter = rel->lateral_relids;
+	Path *path = create_seqscan_path(root, rel, requiredOuter, parallelWorkers);
+	RecostColumnarSeqPath(rel, relationId, path);
+	return path;
+}
+
+
 /*
 * RecostColumnarPaths re-costs paths of given RelOptInfo for
 * columnar table with relationId.
@ -272,6 +307,10 @@ RecostColumnarPaths(PlannerInfo *root, RelOptInfo *rel, Oid relationId)
 			 */
 			RecostColumnarIndexPath(root, rel, relationId, (IndexPath *) path);
 		}
+		else if (path->pathtype == T_SeqScan)
+		{
+			RecostColumnarSeqPath(rel, relationId, path);
+		}
 	}
 }

@ -421,6 +460,33 @@ ColumnarIndexScanAddTotalCost(PlannerInfo *root, RelOptInfo *rel,
 }


+/*
+ * RecostColumnarSeqPath re-costs given seq path for columnar table with
+ * relationId.
+ */
+static void
+RecostColumnarSeqPath(RelOptInfo *rel, Oid relationId, Path *path)
+{
+	if (!enable_seqscan)
+	{
+		/* costs are already set to disable_cost, don't adjust them */
+		return;
+	}
+
+	path->startup_cost = 0;
+
+	/*
+	 * Seq scan doesn't support projection pushdown, so we will read all the
+	 * columns.
+	 * Also note that seq scan doesn't support chunk group filtering too but
+	 * our costing model already doesn't consider chunk group filtering.
+	 */
+	int numberOfColumnsRead = RelationIdGetNumberOfAttributes(relationId);
+	path->total_cost = path->startup_cost +
+					   ColumnarScanCost(rel, relationId, numberOfColumnsRead);
+}
+
+
 /*
 * RelationIdGetNumberOfAttributes returns number of attributes that relation
 * with relationId has.
--- a/src/test/regress/expected/columnar_partitioning.out
+++ b/src/test/regress/expected/columnar_partitioning.out
@ -105,9 +105,9 @@ EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent;
         Workers Planned: 4
         ->  Partial Aggregate
               ->  Parallel Append
+                     ->  Seq Scan on p3 parent_4
                     ->  Seq Scan on p0 parent_1
                     ->  Seq Scan on p1 parent_2
-                     ->  Seq Scan on p3 parent_4
                     ->  Parallel Seq Scan on p2 parent_3
 (9 rows)

@ -123,6 +123,45 @@ SET min_parallel_table_scan_size TO DEFAULT;
 SET parallel_tuple_cost TO DEFAULT;
 SET max_parallel_workers TO DEFAULT;
 SET max_parallel_workers_per_gather TO DEFAULT;
+CREATE INDEX parent_btree ON parent (n);
+ANALYZE parent;
+-- will use columnar custom scan on columnar partitions but index
+-- scan on heap partition
+EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent
+WHERE ts > '2020-02-20' AND n < 5;
+                                                   QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate
+   ->  Append
+         ->  Custom Scan (ColumnarScan) on p1 parent_1
+               Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
+         ->  Index Scan using p2_n_idx on p2 parent_2
+               Index Cond: (n < '5'::numeric)
+               Filter: (ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone)
+         ->  Custom Scan (ColumnarScan) on p3 parent_3
+               Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
+(9 rows)
+
+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  -- now that we disabled columnar custom scan, will use seq scan on columnar
+  -- partitions since index scan is more expensive than seq scan too
+  EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent
+  WHERE ts > '2020-02-20' AND n < 5;
+                                                   QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate
+   ->  Append
+         ->  Seq Scan on p1 parent_1
+               Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
+         ->  Index Scan using p2_n_idx on p2 parent_2
+               Index Cond: (n < '5'::numeric)
+               Filter: (ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone)
+         ->  Seq Scan on p3 parent_3
+               Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
+(9 rows)
+
+ROLLBACK;
 DROP TABLE parent;
 --
 -- Test inheritance
--- a/src/test/regress/expected/columnar_partitioning_1.out
+++ b/src/test/regress/expected/columnar_partitioning_1.out
@ -105,9 +105,9 @@ EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent;
         Workers Planned: 4
         ->  Partial Aggregate
               ->  Parallel Append
+                     ->  Seq Scan on p3
                     ->  Seq Scan on p0
                     ->  Seq Scan on p1
-                     ->  Seq Scan on p3
                     ->  Parallel Seq Scan on p2
 (9 rows)

@ -123,6 +123,45 @@ SET min_parallel_table_scan_size TO DEFAULT;
 SET parallel_tuple_cost TO DEFAULT;
 SET max_parallel_workers TO DEFAULT;
 SET max_parallel_workers_per_gather TO DEFAULT;
+CREATE INDEX parent_btree ON parent (n);
+ANALYZE parent;
+-- will use columnar custom scan on columnar partitions but index
+-- scan on heap partition
+EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent
+WHERE ts > '2020-02-20' AND n < 5;
+                                                   QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate
+   ->  Append
+         ->  Custom Scan (ColumnarScan) on p1
+               Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
+         ->  Index Scan using p2_n_idx on p2
+               Index Cond: (n < '5'::numeric)
+               Filter: (ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone)
+         ->  Custom Scan (ColumnarScan) on p3
+               Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
+(9 rows)
+
+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  -- now that we disabled columnar custom scan, will use seq scan on columnar
+  -- partitions since index scan is more expensive than seq scan too
+  EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent
+  WHERE ts > '2020-02-20' AND n < 5;
+                                                   QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate
+   ->  Append
+         ->  Seq Scan on p1
+               Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
+         ->  Index Scan using p2_n_idx on p2
+               Index Cond: (n < '5'::numeric)
+               Filter: (ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone)
+         ->  Seq Scan on p3
+               Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
+(9 rows)
+
+ROLLBACK;
 DROP TABLE parent;
 --
 -- Test inheritance
--- a/src/test/regress/expected/columnar_paths.out
+++ b/src/test/regress/expected/columnar_paths.out
@ -54,6 +54,19 @@ $$
 t
 (1 row)

+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_seq_scan (
+  $$
+  SELECT a FROM full_correlated WHERE a>900000;
+  $$
+  );
+ uses_seq_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
 SELECT columnar_test_helpers.uses_index_scan (
 $$
 SELECT a FROM full_correlated WHERE a<1000;
@ -84,6 +97,19 @@ $$
 t
 (1 row)

+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_index_scan (
+  $$
+  SELECT a FROM full_correlated WHERE a<9000;
+  $$
+  );
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
 BEGIN;
  TRUNCATE full_correlated;
  INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000) i;
@ -99,6 +125,17 @@ BEGIN;
 t
 (1 row)

+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_seq_scan (
+  $$
+  SELECT a FROM full_correlated WHERE a=200;
+  $$
+  );
+ uses_seq_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
 ROLLBACK;
 -- same filter used in above, but choosing multiple columns would increase
 -- custom scan cost, so we would prefer index scan this time
@ -112,6 +149,19 @@ $$
 t
 (1 row)

+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_index_scan (
+  $$
+  SELECT a,b,c,d FROM full_correlated WHERE a<9000;
+  $$
+  );
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
 -- again same filter used in above, but we would choose custom scan this
 -- time since it would read three less columns from disk
 SELECT columnar_test_helpers.uses_custom_scan (
@ -124,6 +174,19 @@ $$
 t
 (1 row)

+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_index_scan (
+  $$
+  SELECT c FROM full_correlated WHERE a<10000;
+  $$
+  );
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
 SELECT columnar_test_helpers.uses_custom_scan (
 $$
 SELECT a FROM full_correlated WHERE a>200;
@ -144,6 +207,162 @@ $$
 t
 (1 row)

+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_seq_scan (
+  $$
+  SELECT a FROM full_correlated WHERE a=0 OR a=5;
+  $$
+  );
+ uses_seq_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+--
+-- some tests with joins / subqueries etc.
+--
+CREATE TABLE heap_table (a int, b text, c int, d int);
+INSERT INTO heap_table SELECT i, i::text, (i+1000)*7, (i+900)*5 FROM generate_series(1, 1000000) i;
+CREATE INDEX heap_table_btree ON heap_table (a);
+ANALYZE heap_table;
+EXPLAIN (COSTS OFF)
+WITH cte AS MATERIALIZED (SELECT d FROM full_correlated WHERE a > 1)
+SELECT SUM(ht_1.a), MIN(ct_1.c)
+FROM heap_table AS ht_1
+LEFT JOIN full_correlated AS ct_1 ON ht_1.a=ct_1.d
+LEFT JOIN heap_table AS ht_2 ON ht_2.a=ct_1.c
+JOIN cte ON cte.d=ht_1.a
+WHERE ct_1.a < 3000;
+                                          QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate
+   CTE cte
+     ->  Custom Scan (ColumnarScan) on full_correlated
+           Filter: (a > 1)
+   ->  Nested Loop Left Join
+         ->  Hash Join
+               Hash Cond: (cte.d = ht_1.a)
+               ->  CTE Scan on cte
+               ->  Hash
+                     ->  Nested Loop
+                           ->  Index Scan using full_correlated_btree on full_correlated ct_1
+                                 Index Cond: (a < 3000)
+                           ->  Index Only Scan using heap_table_btree on heap_table ht_1
+                                 Index Cond: (a = ct_1.d)
+         ->  Index Only Scan using heap_table_btree on heap_table ht_2
+               Index Cond: (a = ct_1.c)
+(16 rows)
+
+-- same query but columnar custom scan is disabled
+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  EXPLAIN (COSTS OFF)
+  WITH cte AS MATERIALIZED (SELECT d FROM full_correlated WHERE a > 1)
+  SELECT SUM(ht_1.a), MIN(ct_1.c)
+  FROM heap_table AS ht_1
+  LEFT JOIN full_correlated AS ct_1 ON ht_1.a=ct_1.d
+  LEFT JOIN heap_table AS ht_2 ON ht_2.a=ct_1.c
+  JOIN cte ON cte.d=ht_1.a
+  WHERE ct_1.a < 3000;
+                                          QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate
+   CTE cte
+     ->  Seq Scan on full_correlated
+           Filter: (a > 1)
+   ->  Nested Loop Left Join
+         ->  Hash Join
+               Hash Cond: (cte.d = ht_1.a)
+               ->  CTE Scan on cte
+               ->  Hash
+                     ->  Nested Loop
+                           ->  Index Scan using full_correlated_btree on full_correlated ct_1
+                                 Index Cond: (a < 3000)
+                           ->  Index Only Scan using heap_table_btree on heap_table ht_1
+                                 Index Cond: (a = ct_1.d)
+         ->  Index Only Scan using heap_table_btree on heap_table ht_2
+               Index Cond: (a = ct_1.c)
+(16 rows)
+
+ROLLBACK;
+-- use custom scan
+EXPLAIN (COSTS OFF) WITH w AS (SELECT * FROM full_correlated)
+SELECT * FROM w AS w1 JOIN w AS w2 ON w1.a = w2.d
+WHERE w2.a = 123;
+                      QUERY PLAN
+---------------------------------------------------------------------
+ Merge Join
+   Merge Cond: (w2.d = w1.a)
+   CTE w
+     ->  Custom Scan (ColumnarScan) on full_correlated
+   ->  Sort
+         Sort Key: w2.d
+         ->  CTE Scan on w w2
+               Filter: (a = 123)
+   ->  Materialize
+         ->  Sort
+               Sort Key: w1.a
+               ->  CTE Scan on w w1
+(12 rows)
+
+-- use index
+EXPLAIN (COSTS OFF) WITH w AS NOT MATERIALIZED (SELECT * FROM full_correlated)
+SELECT * FROM w AS w1 JOIN w AS w2 ON w1.a = w2.d
+WHERE w2.a = 123;
+                                    QUERY PLAN
+---------------------------------------------------------------------
+ Nested Loop
+   ->  Index Scan using full_correlated_btree on full_correlated full_correlated_1
+         Index Cond: (a = 123)
+   ->  Index Scan using full_correlated_btree on full_correlated
+         Index Cond: (a = full_correlated_1.d)
+(5 rows)
+
+EXPLAIN (COSTS OFF) SELECT sub_1.b, sub_2.a, sub_3.avg
+FROM
+  (SELECT b FROM full_correlated WHERE (a > 2) GROUP BY b HAVING count(DISTINCT a) > 0 ORDER BY 1 DESC LIMIT 5) AS sub_1,
+  (SELECT a FROM full_correlated WHERE (a > 10) GROUP BY a HAVING count(DISTINCT a) >= 1 ORDER BY 1 DESC LIMIT 3) AS sub_2,
+  (SELECT avg(a) AS AVG FROM full_correlated WHERE (a > 2) GROUP BY a HAVING sum(a) > 10 ORDER BY (sum(d) - avg(a) - COALESCE(array_upper(ARRAY[max(a)],1) * 5, 0)) DESC LIMIT 3) AS sub_3
+WHERE sub_2.a < sub_1.b::integer
+ORDER BY 3 DESC, 2 DESC, 1 DESC
+LIMIT 100;
+                                                                                                QUERY PLAN
+---------------------------------------------------------------------
+ Limit
+   ->  Sort
+         Sort Key: sub_3.avg DESC, full_correlated_1.a DESC, full_correlated.b DESC
+         ->  Nested Loop
+               ->  Nested Loop
+                     Join Filter: (full_correlated_1.a < (full_correlated.b)::integer)
+                     ->  Limit
+                           ->  GroupAggregate
+                                 Group Key: full_correlated.b
+                                 Filter: (count(DISTINCT full_correlated.a) > 0)
+                                 ->  Sort
+                                       Sort Key: full_correlated.b DESC
+                                       ->  Custom Scan (ColumnarScan) on full_correlated
+                                             Filter: (a > 2)
+                     ->  Materialize
+                           ->  Limit
+                                 ->  GroupAggregate
+                                       Group Key: full_correlated_1.a
+                                       Filter: (count(DISTINCT full_correlated_1.a) >= 1)
+                                       ->  Index Only Scan Backward using full_correlated_btree on full_correlated full_correlated_1
+                                             Index Cond: (a > 10)
+               ->  Materialize
+                     ->  Subquery Scan on sub_3
+                           ->  Limit
+                                 ->  Sort
+                                       Sort Key: ((((sum(full_correlated_2.d))::numeric - avg(full_correlated_2.a)) - (COALESCE((array_upper(ARRAY[max(full_correlated_2.a)], 1) * 5), 0))::numeric)) DESC
+                                       ->  GroupAggregate
+                                             Group Key: full_correlated_2.a
+                                             Filter: (sum(full_correlated_2.a) > 10)
+                                             ->  Index Scan using full_correlated_btree on full_correlated full_correlated_2
+                                                   Index Cond: (a > 2)
+(31 rows)
+
 DROP INDEX full_correlated_btree;
 CREATE INDEX full_correlated_hash ON full_correlated USING hash(a);
 ANALYZE full_correlated;
@ -197,6 +416,19 @@ $$
 t
 (1 row)

+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_index_scan (
+  $$
+  SELECT a,c FROM full_correlated WHERE a=1000;
+  $$
+  );
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
 CREATE TABLE full_anti_correlated (a int, b text) USING columnar;
 INSERT INTO full_anti_correlated SELECT i, i::text FROM generate_series(1, 500000) i;
 CREATE INDEX full_anti_correlated_hash ON full_anti_correlated USING hash(b);
@ -231,6 +463,19 @@ $$
 t
 (1 row)

+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_seq_scan (
+  $$
+  SELECT a,b FROM full_anti_correlated WHERE b='600' OR b='10';
+  $$
+  );
+ uses_seq_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
 DROP INDEX full_anti_correlated_hash;
 CREATE INDEX full_anti_correlated_btree ON full_anti_correlated (a,b);
 ANALYZE full_anti_correlated;
@ -274,6 +519,19 @@ $$
 t
 (1 row)

+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_seq_scan (
+  $$
+  SELECT a FROM full_anti_correlated WHERE a<7000 AND b<'10000';
+  $$
+  );
+ uses_seq_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
 CREATE TABLE no_correlation (a int, b text) USING columnar;
 INSERT INTO no_correlation SELECT random()*5000, (random()*5000)::int::text FROM generate_series(1, 500000) i;
 CREATE INDEX no_correlation_btree ON no_correlation (a);
@ -298,5 +556,18 @@ $$
 t
 (1 row)

+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_seq_scan (
+  $$
+  SELECT a FROM no_correlation WHERE a = 200;
+  $$
+  );
+ uses_seq_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
 SET client_min_messages TO WARNING;
 DROP SCHEMA columnar_paths CASCADE;
--- a/src/test/regress/expected/columnar_test_helpers.out
+++ b/src/test/regress/expected/columnar_test_helpers.out
@ -104,3 +104,16 @@ BEGIN
  END LOOP;
  RETURN false;
 END; $$ language plpgsql;
+CREATE OR REPLACE FUNCTION uses_seq_scan(command text)
+RETURNS BOOLEAN AS $$
+DECLARE
+  query_plan text;
+BEGIN
+  FOR query_plan IN EXECUTE 'EXPLAIN' || command LOOP
+    IF query_plan ILIKE '%Seq Scan on %'
+    THEN
+        RETURN true;
+    END IF;
+  END LOOP;
+  RETURN false;
+END; $$ language plpgsql;
--- a/src/test/regress/sql/columnar_partitioning.sql
+++ b/src/test/regress/sql/columnar_partitioning.sql
@ -52,6 +52,23 @@ SET parallel_tuple_cost TO DEFAULT;
 SET max_parallel_workers TO DEFAULT;
 SET max_parallel_workers_per_gather TO DEFAULT;

+CREATE INDEX parent_btree ON parent (n);
+ANALYZE parent;
+
+-- will use columnar custom scan on columnar partitions but index
+-- scan on heap partition
+EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent
+WHERE ts > '2020-02-20' AND n < 5;
+
+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+
+  -- now that we disabled columnar custom scan, will use seq scan on columnar
+  -- partitions since index scan is more expensive than seq scan too
+  EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent
+  WHERE ts > '2020-02-20' AND n < 5;
+ROLLBACK;
+
 DROP TABLE parent;

 --
--- a/src/test/regress/sql/columnar_paths.sql
+++ b/src/test/regress/sql/columnar_paths.sql
@ -36,6 +36,15 @@ SELECT a FROM full_correlated WHERE a>900000;
 $$
 );

+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_seq_scan (
+  $$
+  SELECT a FROM full_correlated WHERE a>900000;
+  $$
+  );
+ROLLBACK;
+
 SELECT columnar_test_helpers.uses_index_scan (
 $$
 SELECT a FROM full_correlated WHERE a<1000;
@ -54,6 +63,15 @@ SELECT a FROM full_correlated WHERE a<9000;
 $$
 );

+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_index_scan (
+  $$
+  SELECT a FROM full_correlated WHERE a<9000;
+  $$
+  );
+ROLLBACK;
+
 BEGIN;
  TRUNCATE full_correlated;
  INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000) i;
@ -65,6 +83,13 @@ BEGIN;
  SELECT a FROM full_correlated WHERE a=200;
  $$
  );
+
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_seq_scan (
+  $$
+  SELECT a FROM full_correlated WHERE a=200;
+  $$
+  );
 ROLLBACK;

 -- same filter used in above, but choosing multiple columns would increase
@ -75,6 +100,15 @@ SELECT a,b,c,d FROM full_correlated WHERE a<9000;
 $$
 );

+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_index_scan (
+  $$
+  SELECT a,b,c,d FROM full_correlated WHERE a<9000;
+  $$
+  );
+ROLLBACK;
+
 -- again same filter used in above, but we would choose custom scan this
 -- time since it would read three less columns from disk
 SELECT columnar_test_helpers.uses_custom_scan (
@ -83,6 +117,15 @@ SELECT c FROM full_correlated WHERE a<10000;
 $$
 );

+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_index_scan (
+  $$
+  SELECT c FROM full_correlated WHERE a<10000;
+  $$
+  );
+ROLLBACK;
+
 SELECT columnar_test_helpers.uses_custom_scan (
 $$
 SELECT a FROM full_correlated WHERE a>200;
@ -95,6 +138,65 @@ SELECT a FROM full_correlated WHERE a=0 OR a=5;
 $$
 );

+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_seq_scan (
+  $$
+  SELECT a FROM full_correlated WHERE a=0 OR a=5;
+  $$
+  );
+ROLLBACK;
+
+--
+-- some tests with joins / subqueries etc.
+--
+CREATE TABLE heap_table (a int, b text, c int, d int);
+INSERT INTO heap_table SELECT i, i::text, (i+1000)*7, (i+900)*5 FROM generate_series(1, 1000000) i;
+CREATE INDEX heap_table_btree ON heap_table (a);
+ANALYZE heap_table;
+
+EXPLAIN (COSTS OFF)
+WITH cte AS MATERIALIZED (SELECT d FROM full_correlated WHERE a > 1)
+SELECT SUM(ht_1.a), MIN(ct_1.c)
+FROM heap_table AS ht_1
+LEFT JOIN full_correlated AS ct_1 ON ht_1.a=ct_1.d
+LEFT JOIN heap_table AS ht_2 ON ht_2.a=ct_1.c
+JOIN cte ON cte.d=ht_1.a
+WHERE ct_1.a < 3000;
+
+-- same query but columnar custom scan is disabled
+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+
+  EXPLAIN (COSTS OFF)
+  WITH cte AS MATERIALIZED (SELECT d FROM full_correlated WHERE a > 1)
+  SELECT SUM(ht_1.a), MIN(ct_1.c)
+  FROM heap_table AS ht_1
+  LEFT JOIN full_correlated AS ct_1 ON ht_1.a=ct_1.d
+  LEFT JOIN heap_table AS ht_2 ON ht_2.a=ct_1.c
+  JOIN cte ON cte.d=ht_1.a
+  WHERE ct_1.a < 3000;
+ROLLBACK;
+
+-- use custom scan
+EXPLAIN (COSTS OFF) WITH w AS (SELECT * FROM full_correlated)
+SELECT * FROM w AS w1 JOIN w AS w2 ON w1.a = w2.d
+WHERE w2.a = 123;
+
+-- use index
+EXPLAIN (COSTS OFF) WITH w AS NOT MATERIALIZED (SELECT * FROM full_correlated)
+SELECT * FROM w AS w1 JOIN w AS w2 ON w1.a = w2.d
+WHERE w2.a = 123;
+
+EXPLAIN (COSTS OFF) SELECT sub_1.b, sub_2.a, sub_3.avg
+FROM
+  (SELECT b FROM full_correlated WHERE (a > 2) GROUP BY b HAVING count(DISTINCT a) > 0 ORDER BY 1 DESC LIMIT 5) AS sub_1,
+  (SELECT a FROM full_correlated WHERE (a > 10) GROUP BY a HAVING count(DISTINCT a) >= 1 ORDER BY 1 DESC LIMIT 3) AS sub_2,
+  (SELECT avg(a) AS AVG FROM full_correlated WHERE (a > 2) GROUP BY a HAVING sum(a) > 10 ORDER BY (sum(d) - avg(a) - COALESCE(array_upper(ARRAY[max(a)],1) * 5, 0)) DESC LIMIT 3) AS sub_3
+WHERE sub_2.a < sub_1.b::integer
+ORDER BY 3 DESC, 2 DESC, 1 DESC
+LIMIT 100;
+
 DROP INDEX full_correlated_btree;

 CREATE INDEX full_correlated_hash ON full_correlated USING hash(a);
@ -130,6 +232,15 @@ SELECT a,c FROM full_correlated WHERE a=1000;
 $$
 );

+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_index_scan (
+  $$
+  SELECT a,c FROM full_correlated WHERE a=1000;
+  $$
+  );
+ROLLBACK;
+
 CREATE TABLE full_anti_correlated (a int, b text) USING columnar;
 INSERT INTO full_anti_correlated SELECT i, i::text FROM generate_series(1, 500000) i;
 CREATE INDEX full_anti_correlated_hash ON full_anti_correlated USING hash(b);
@ -153,6 +264,15 @@ SELECT a,b FROM full_anti_correlated WHERE b='600' OR b='10';
 $$
 );

+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_seq_scan (
+  $$
+  SELECT a,b FROM full_anti_correlated WHERE b='600' OR b='10';
+  $$
+  );
+ROLLBACK;
+
 DROP INDEX full_anti_correlated_hash;

 CREATE INDEX full_anti_correlated_btree ON full_anti_correlated (a,b);
@ -182,6 +302,15 @@ SELECT a FROM full_anti_correlated WHERE a<7000 AND b<'10000';
 $$
 );

+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_seq_scan (
+  $$
+  SELECT a FROM full_anti_correlated WHERE a<7000 AND b<'10000';
+  $$
+  );
+ROLLBACK;
+
 CREATE TABLE no_correlation (a int, b text) USING columnar;
 INSERT INTO no_correlation SELECT random()*5000, (random()*5000)::int::text FROM generate_series(1, 500000) i;
 CREATE INDEX no_correlation_btree ON no_correlation (a);
@ -199,5 +328,14 @@ SELECT a FROM no_correlation WHERE a = 200;
 $$
 );

+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_seq_scan (
+  $$
+  SELECT a FROM no_correlation WHERE a = 200;
+  $$
+  );
+ROLLBACK;
+
 SET client_min_messages TO WARNING;
 DROP SCHEMA columnar_paths CASCADE;
--- a/src/test/regress/sql/columnar_test_helpers.sql
+++ b/src/test/regress/sql/columnar_test_helpers.sql
@ -113,3 +113,17 @@ BEGIN
  END LOOP;
  RETURN false;
 END; $$ language plpgsql;
+
+CREATE OR REPLACE FUNCTION uses_seq_scan(command text)
+RETURNS BOOLEAN AS $$
+DECLARE
+  query_plan text;
+BEGIN
+  FOR query_plan IN EXECUTE 'EXPLAIN' || command LOOP
+    IF query_plan ILIKE '%Seq Scan on %'
+    THEN
+        RETURN true;
+    END IF;
+  END LOOP;
+  RETURN false;
+END; $$ language plpgsql;