PG17 regress sanity (#7653): Fix test diffs in columnar schedule

Add an alternative file for columnar_paths. The diff is because of PG17 commit: f7816aec23 which enabled the planner to use statistics to estimate the cardinality of CTE scans. Add a helper function for columnnar_chunk_filtering. The diff is due to: https://github.com/postgres/postgres/commit/fd0398fc which changed how subquery outputs appear in expressions.
2024-11-25 14:41:07 +00:00 · 2024-11-25 14:41:07 +00:00 · 1d11d3d0c7
parent 5f479d5f47
commit 1d11d3d0c7
8 changed files with 697 additions and 15 deletions
--- a/src/test/regress/expected/columnar_chunk_filtering.out
+++ b/src/test/regress/expected/columnar_chunk_filtering.out
@ -977,6 +977,7 @@ DETAIL:  unparameterized; 1 clauses pushed down
 (1 row)

 SET hash_mem_multiplier = 1.0;
+SELECT public.explain_with_pg16_subplan_format($Q$
 EXPLAIN (analyze on, costs off, timing off, summary off)
 SELECT sum(a) FROM pushdown_test where
 (
@ -989,13 +990,18 @@ SELECT sum(a) FROM pushdown_test where
 )
 or
 (a > 200000-2010);
+$Q$) as "QUERY PLAN";
 NOTICE:  columnar planner: adding CustomScan path for pushdown_test
 DETAIL:  unparameterized; 0 clauses pushed down
+CONTEXT:  PL/pgSQL function explain_with_pg16_subplan_format(text) line XX at FOR over EXECUTE statement
 NOTICE:  columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
 HINT:  Var must only reference this rel, and Expr must not reference this rel
+CONTEXT:  PL/pgSQL function explain_with_pg16_subplan_format(text) line XX at FOR over EXECUTE statement
 NOTICE:  columnar planner: cannot push down clause: must not contain a subplan
+CONTEXT:  PL/pgSQL function explain_with_pg16_subplan_format(text) line XX at FOR over EXECUTE statement
 NOTICE:  columnar planner: adding CustomScan path for pushdown_test
 DETAIL:  unparameterized; 1 clauses pushed down
+CONTEXT:  PL/pgSQL function explain_with_pg16_subplan_format(text) line XX at FOR over EXECUTE statement
                                                                  QUERY PLAN
 ---------------------------------------------------------------------
 Aggregate (actual rows=1 loops=1)
@ -1092,14 +1098,14 @@ BEGIN;
 END;
 EXPLAIN (analyze on, costs off, timing off, summary off)
 SELECT id FROM pushdown_test WHERE country IN ('USA', 'BR', 'ZW');
-                                QUERY PLAN
+                               QUERY PLAN
 ---------------------------------------------------------------------
 Custom Scan (ColumnarScan) on pushdown_test (actual rows=3 loops=1)
-    Filter: (country = ANY ('{USA,BR,ZW}'::text[]))
-    Rows Removed by Filter: 1
-    Columnar Projected Columns: id, country
-    Columnar Chunk Group Filters: (country = ANY ('{USA,BR,ZW}'::text[]))
-    Columnar Chunk Groups Removed by Filter: 2
+   Filter: (country = ANY ('{USA,BR,ZW}'::text[]))
+   Rows Removed by Filter: 1
+   Columnar Projected Columns: id, country
+   Columnar Chunk Group Filters: (country = ANY ('{USA,BR,ZW}'::text[]))
+   Columnar Chunk Groups Removed by Filter: 2
 (6 rows)

 SELECT id FROM pushdown_test WHERE country IN ('USA', 'BR', 'ZW');
--- a/src/test/regress/expected/columnar_chunk_filtering_0.out
+++ b/src/test/regress/expected/columnar_chunk_filtering_0.out
@ -977,6 +977,7 @@ DETAIL:  unparameterized; 1 clauses pushed down
 (1 row)

 SET hash_mem_multiplier = 1.0;
+SELECT public.explain_with_pg16_subplan_format($Q$
 EXPLAIN (analyze on, costs off, timing off, summary off)
 SELECT sum(a) FROM pushdown_test where
 (
@ -989,13 +990,18 @@ SELECT sum(a) FROM pushdown_test where
 )
 or
 (a > 200000-2010);
+$Q$) as "QUERY PLAN";
 NOTICE:  columnar planner: adding CustomScan path for pushdown_test
 DETAIL:  unparameterized; 0 clauses pushed down
+CONTEXT:  PL/pgSQL function explain_with_pg16_subplan_format(text) line XX at FOR over EXECUTE statement
 NOTICE:  columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
 HINT:  Var must only reference this rel, and Expr must not reference this rel
+CONTEXT:  PL/pgSQL function explain_with_pg16_subplan_format(text) line XX at FOR over EXECUTE statement
 NOTICE:  columnar planner: cannot push down clause: must not contain a subplan
+CONTEXT:  PL/pgSQL function explain_with_pg16_subplan_format(text) line XX at FOR over EXECUTE statement
 NOTICE:  columnar planner: adding CustomScan path for pushdown_test
 DETAIL:  unparameterized; 1 clauses pushed down
+CONTEXT:  PL/pgSQL function explain_with_pg16_subplan_format(text) line XX at FOR over EXECUTE statement
                                                                  QUERY PLAN
 ---------------------------------------------------------------------
 Aggregate (actual rows=1 loops=1)
--- a/src/test/regress/expected/columnar_paths.out
+++ b/src/test/regress/expected/columnar_paths.out
@ -1,5 +1,10 @@
 CREATE SCHEMA columnar_paths;
 SET search_path TO columnar_paths;
+-- columnar_paths has an alternative test output file because PG17 improved
+-- the optimizer's ability to use statistics to estimate the size of a CTE
+-- scan.
+-- The relevant PG commit is:
+-- https://github.com/postgres/postgres/commit/f7816aec23eed1dc1da5f9a53cb6507d30b7f0a2
 CREATE TABLE full_correlated (a int, b text, c int, d int) USING columnar;
 INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000000) i;
 CREATE INDEX full_correlated_btree ON full_correlated (a);
@ -296,20 +301,16 @@ SELECT * FROM w AS w1 JOIN w AS w2 ON w1.a = w2.d
 WHERE w2.a = 123;
                      QUERY PLAN
 ---------------------------------------------------------------------
- Merge Join
-   Merge Cond: (w2.d = w1.a)
+ Hash Join
+   Hash Cond: (w1.a = w2.d)
   CTE w
     ->  Custom Scan (ColumnarScan) on full_correlated
           Columnar Projected Columns: a, b, c, d
-   ->  Sort
-         Sort Key: w2.d
+   ->  CTE Scan on w w1
+   ->  Hash
         ->  CTE Scan on w w2
               Filter: (a = 123)
-   ->  Materialize
-         ->  Sort
-               Sort Key: w1.a
-               ->  CTE Scan on w w1
-(13 rows)
+(9 rows)

 -- use index
 EXPLAIN (COSTS OFF) WITH w AS NOT MATERIALIZED (SELECT * FROM full_correlated)
--- a/src/test/regress/expected/columnar_paths_0.out
+++ b/src/test/regress/expected/columnar_paths_0.out
@ -0,0 +1,620 @@
+CREATE SCHEMA columnar_paths;
+SET search_path TO columnar_paths;
+-- columnar_paths has an alternative test output file because PG17 improved
+-- the optimizer's ability to use statistics to estimate the size of a CTE
+-- scan.
+-- The relevant PG commit is:
+-- https://github.com/postgres/postgres/commit/f7816aec23eed1dc1da5f9a53cb6507d30b7f0a2
+CREATE TABLE full_correlated (a int, b text, c int, d int) USING columnar;
+INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000000) i;
+CREATE INDEX full_correlated_btree ON full_correlated (a);
+ANALYZE full_correlated;
+-- Prevent qual pushdown from competing with index scans.
+SET columnar.enable_qual_pushdown = false;
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a FROM full_correlated WHERE a=200;
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a FROM full_correlated WHERE a<0;
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a FROM full_correlated WHERE a>10 AND a<20;
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a FROM full_correlated WHERE a>1000000;
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_correlated WHERE a>900000;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_seq_scan (
+  $$
+  SELECT a FROM full_correlated WHERE a>900000;
+  $$
+  );
+ uses_seq_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a FROM full_correlated WHERE a<1000;
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a,b FROM full_correlated WHERE a<3000;
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_correlated WHERE a<9000;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_index_scan (
+  $$
+  SELECT a FROM full_correlated WHERE a<9000;
+  $$
+  );
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+BEGIN;
+  TRUNCATE full_correlated;
+  INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000) i;
+  -- Since we have much smaller number of rows, selectivity of below
+  -- query should be much higher. So we would choose columnar custom scan.
+  SELECT columnar_test_helpers.uses_custom_scan (
+  $$
+  SELECT a FROM full_correlated WHERE a=200;
+  $$
+  );
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_seq_scan (
+  $$
+  SELECT a FROM full_correlated WHERE a=200;
+  $$
+  );
+ uses_seq_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+-- same filter used in above, but choosing multiple columns would increase
+-- custom scan cost, so we would prefer index scan this time
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a,b,c,d FROM full_correlated WHERE a<9000;
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_index_scan (
+  $$
+  SELECT a,b,c,d FROM full_correlated WHERE a<9000;
+  $$
+  );
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+-- again same filter used in above, but we would choose custom scan this
+-- time since it would read three less columns from disk
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT c FROM full_correlated WHERE a<10000;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_index_scan (
+  $$
+  SELECT c FROM full_correlated WHERE a<10000;
+  $$
+  );
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_correlated WHERE a>200;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_correlated WHERE a=0 OR a=5;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_seq_scan (
+  $$
+  SELECT a FROM full_correlated WHERE a=0 OR a=5;
+  $$
+  );
+ uses_seq_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+--
+-- some tests with joins / subqueries etc.
+--
+CREATE TABLE heap_table (a int, b text, c int, d int);
+INSERT INTO heap_table SELECT i, i::text, (i+1000)*7, (i+900)*5 FROM generate_series(1, 1000000) i;
+CREATE INDEX heap_table_btree ON heap_table (a);
+ANALYZE heap_table;
+EXPLAIN (COSTS OFF)
+WITH cte AS MATERIALIZED (SELECT d FROM full_correlated WHERE a > 1)
+SELECT SUM(ht_1.a), MIN(ct_1.c)
+FROM heap_table AS ht_1
+LEFT JOIN full_correlated AS ct_1 ON ht_1.a=ct_1.d
+LEFT JOIN heap_table AS ht_2 ON ht_2.a=ct_1.c
+JOIN cte ON cte.d=ht_1.a
+WHERE ct_1.a < 3000;
+                                          QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate
+   CTE cte
+     ->  Custom Scan (ColumnarScan) on full_correlated
+           Filter: (a > 1)
+           Columnar Projected Columns: a, d
+   ->  Nested Loop Left Join
+         ->  Hash Join
+               Hash Cond: (cte.d = ht_1.a)
+               ->  CTE Scan on cte
+               ->  Hash
+                     ->  Nested Loop
+                           ->  Index Scan using full_correlated_btree on full_correlated ct_1
+                                 Index Cond: (a < 3000)
+                           ->  Index Only Scan using heap_table_btree on heap_table ht_1
+                                 Index Cond: (a = ct_1.d)
+         ->  Index Only Scan using heap_table_btree on heap_table ht_2
+               Index Cond: (a = ct_1.c)
+(17 rows)
+
+-- same query but columnar custom scan is disabled
+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  EXPLAIN (COSTS OFF)
+  WITH cte AS MATERIALIZED (SELECT d FROM full_correlated WHERE a > 1)
+  SELECT SUM(ht_1.a), MIN(ct_1.c)
+  FROM heap_table AS ht_1
+  LEFT JOIN full_correlated AS ct_1 ON ht_1.a=ct_1.d
+  LEFT JOIN heap_table AS ht_2 ON ht_2.a=ct_1.c
+  JOIN cte ON cte.d=ht_1.a
+  WHERE ct_1.a < 3000;
+                                          QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate
+   CTE cte
+     ->  Seq Scan on full_correlated
+           Filter: (a > 1)
+   ->  Nested Loop Left Join
+         ->  Hash Join
+               Hash Cond: (cte.d = ht_1.a)
+               ->  CTE Scan on cte
+               ->  Hash
+                     ->  Nested Loop
+                           ->  Index Scan using full_correlated_btree on full_correlated ct_1
+                                 Index Cond: (a < 3000)
+                           ->  Index Only Scan using heap_table_btree on heap_table ht_1
+                                 Index Cond: (a = ct_1.d)
+         ->  Index Only Scan using heap_table_btree on heap_table ht_2
+               Index Cond: (a = ct_1.c)
+(16 rows)
+
+ROLLBACK;
+-- use custom scan
+EXPLAIN (COSTS OFF) WITH w AS (SELECT * FROM full_correlated)
+SELECT * FROM w AS w1 JOIN w AS w2 ON w1.a = w2.d
+WHERE w2.a = 123;
+                      QUERY PLAN
+---------------------------------------------------------------------
+ Merge Join
+   Merge Cond: (w2.d = w1.a)
+   CTE w
+     ->  Custom Scan (ColumnarScan) on full_correlated
+           Columnar Projected Columns: a, b, c, d
+   ->  Sort
+         Sort Key: w2.d
+         ->  CTE Scan on w w2
+               Filter: (a = 123)
+   ->  Materialize
+         ->  Sort
+               Sort Key: w1.a
+               ->  CTE Scan on w w1
+(13 rows)
+
+-- use index
+EXPLAIN (COSTS OFF) WITH w AS NOT MATERIALIZED (SELECT * FROM full_correlated)
+SELECT * FROM w AS w1 JOIN w AS w2 ON w1.a = w2.d
+WHERE w2.a = 123;
+                                    QUERY PLAN
+---------------------------------------------------------------------
+ Nested Loop
+   ->  Index Scan using full_correlated_btree on full_correlated full_correlated_1
+         Index Cond: (a = 123)
+   ->  Index Scan using full_correlated_btree on full_correlated
+         Index Cond: (a = full_correlated_1.d)
+(5 rows)
+
+EXPLAIN (COSTS OFF) SELECT sub_1.b, sub_2.a, sub_3.avg
+FROM
+  (SELECT b FROM full_correlated WHERE (a > 2) GROUP BY b ORDER BY 1 DESC LIMIT 5) AS sub_1,
+  (SELECT a FROM full_correlated WHERE (a > 10) GROUP BY a HAVING count(DISTINCT a) >= 1 ORDER BY 1 DESC LIMIT 3) AS sub_2,
+  (SELECT avg(a) AS AVG FROM full_correlated WHERE (a > 2) GROUP BY a HAVING sum(a) > 10 ORDER BY (sum(d) - avg(a) - COALESCE(array_upper(ARRAY[max(a)],1) * 5, 0)) DESC LIMIT 3) AS sub_3
+WHERE sub_2.a < sub_1.b::integer
+ORDER BY 3 DESC, 2 DESC, 1 DESC
+LIMIT 100;
+                                                                                                QUERY PLAN
+---------------------------------------------------------------------
+ Limit
+   ->  Sort
+         Sort Key: sub_3.avg DESC, full_correlated_1.a DESC, full_correlated.b DESC
+         ->  Nested Loop
+               ->  Nested Loop
+                     Join Filter: (full_correlated_1.a < (full_correlated.b)::integer)
+                     ->  Limit
+                           ->  Sort
+                                 Sort Key: full_correlated.b DESC
+                                 ->  HashAggregate
+                                       Group Key: full_correlated.b
+                                       ->  Custom Scan (ColumnarScan) on full_correlated
+                                             Filter: (a > 2)
+                                             Columnar Projected Columns: a, b
+                     ->  Materialize
+                           ->  Limit
+                                 ->  GroupAggregate
+                                       Group Key: full_correlated_1.a
+                                       Filter: (count(DISTINCT full_correlated_1.a) >= 1)
+                                       ->  Index Scan Backward using full_correlated_btree on full_correlated full_correlated_1
+                                             Index Cond: (a > 10)
+               ->  Materialize
+                     ->  Subquery Scan on sub_3
+                           ->  Limit
+                                 ->  Sort
+                                       Sort Key: ((((sum(full_correlated_2.d))::numeric - avg(full_correlated_2.a)) - (COALESCE((array_upper(ARRAY[max(full_correlated_2.a)], 1) * 5), 0))::numeric)) DESC
+                                       ->  GroupAggregate
+                                             Group Key: full_correlated_2.a
+                                             Filter: (sum(full_correlated_2.a) > 10)
+                                             ->  Index Scan using full_correlated_btree on full_correlated full_correlated_2
+                                                   Index Cond: (a > 2)
+(31 rows)
+
+DROP INDEX full_correlated_btree;
+CREATE INDEX full_correlated_hash ON full_correlated USING hash(a);
+ANALYZE full_correlated;
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_correlated WHERE a<10;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_correlated WHERE a>1 AND a<10;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_correlated WHERE a=0 OR a=5;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a FROM full_correlated WHERE a=1000;
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a,c FROM full_correlated WHERE a=1000;
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_index_scan (
+  $$
+  SELECT a,c FROM full_correlated WHERE a=1000;
+  $$
+  );
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+CREATE TABLE full_anti_correlated (a int, b text) USING columnar;
+INSERT INTO full_anti_correlated SELECT i, i::text FROM generate_series(1, 500000) i;
+CREATE INDEX full_anti_correlated_hash ON full_anti_correlated USING hash(b);
+ANALYZE full_anti_correlated;
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a FROM full_anti_correlated WHERE b='600';
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a,b FROM full_anti_correlated WHERE b='600';
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a,b FROM full_anti_correlated WHERE b='600' OR b='10';
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_seq_scan (
+  $$
+  SELECT a,b FROM full_anti_correlated WHERE b='600' OR b='10';
+  $$
+  );
+ uses_seq_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+DROP INDEX full_anti_correlated_hash;
+CREATE INDEX full_anti_correlated_btree ON full_anti_correlated (a,b);
+ANALYZE full_anti_correlated;
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a FROM full_anti_correlated WHERE a>6500 AND a<7000 AND b<'10000';
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_anti_correlated WHERE a>2000 AND a<7000;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_anti_correlated WHERE a<7000 AND b<'10000';
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_seq_scan (
+  $$
+  SELECT a FROM full_anti_correlated WHERE a<7000 AND b<'10000';
+  $$
+  );
+ uses_seq_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+CREATE TABLE no_correlation (a int, b text) USING columnar;
+INSERT INTO no_correlation SELECT random()*5000, (random()*5000)::int::text FROM generate_series(1, 500000) i;
+CREATE INDEX no_correlation_btree ON no_correlation (a);
+ANALYZE no_correlation;
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM no_correlation WHERE a < 2;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM no_correlation WHERE a = 200;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_seq_scan (
+  $$
+  SELECT a FROM no_correlation WHERE a = 200;
+  $$
+  );
+ uses_seq_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+SET columnar.enable_qual_pushdown TO DEFAULT;
+BEGIN;
+SET LOCAL columnar.stripe_row_limit = 2000;
+SET LOCAL columnar.chunk_group_row_limit = 1000;
+CREATE TABLE correlated(x int) using columnar;
+INSERT INTO correlated
+  SELECT g FROM generate_series(1,100000) g;
+CREATE TABLE uncorrelated(x int) using columnar;
+INSERT INTO uncorrelated
+  SELECT (g * 19) % 100000 FROM generate_series(1,100000) g;
+COMMIT;
+CREATE INDEX correlated_idx ON correlated(x);
+CREATE INDEX uncorrelated_idx ON uncorrelated(x);
+ANALYZE correlated, uncorrelated;
+-- should choose chunk group filtering; selective and correlated
+EXPLAIN (analyze on, costs off, timing off, summary off)
+SELECT * FROM correlated WHERE x = 78910;
+                            QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (ColumnarScan) on correlated (actual rows=1 loops=1)
+   Filter: (x = 78910)
+   Rows Removed by Filter: 999
+   Columnar Projected Columns: x
+   Columnar Chunk Group Filters: (x = 78910)
+   Columnar Chunk Groups Removed by Filter: 99
+(6 rows)
+
+SELECT * FROM correlated WHERE x = 78910;
+   x
+---------------------------------------------------------------------
+ 78910
+(1 row)
+
+-- should choose index scan; selective but uncorrelated
+EXPLAIN (analyze on, costs off, timing off, summary off)
+SELECT * FROM uncorrelated WHERE x = 78910;
+                                QUERY PLAN
+---------------------------------------------------------------------
+ Index Scan using uncorrelated_idx on uncorrelated (actual rows=1 loops=1)
+   Index Cond: (x = 78910)
+(2 rows)
+
+SELECT * FROM uncorrelated WHERE x = 78910;
+   x
+---------------------------------------------------------------------
+ 78910
+(1 row)
+
+SET client_min_messages TO WARNING;
+DROP SCHEMA columnar_paths CASCADE;
--- a/src/test/regress/expected/multi_test_helpers.out
+++ b/src/test/regress/expected/multi_test_helpers.out
@ -585,3 +585,23 @@ BEGIN
    RETURN NEXT;
  END LOOP;
 END; $$ language plpgsql;
+-- This function formats EXPLAIN output to conform to how pg <= 16 EXPLAIN
+-- shows ANY <subquery> in an expression the pg version >= 17. When 17 is
+-- the minimum supported pgversion this function can be retired. The commit
+-- that changed how ANY <subquery> exrpressions appear in EXPLAIN is:
+-- https://git.postgresql.org/gitweb/?p=postgresql.git;a=commitdiff;h=fd0398fcb
+CREATE OR REPLACE FUNCTION explain_with_pg16_subplan_format(explain_command text, out query_plan text)
+RETURNS SETOF TEXT AS $$
+DECLARE
+  pgversion int = 0;
+BEGIN
+  pgversion = substring(version(), '\d+')::int ;
+  FOR query_plan IN execute explain_command LOOP
+    IF pgversion >= 17 THEN
+      IF query_plan ~ 'SubPlan \d+\).col' THEN
+    	  query_plan = regexp_replace(query_plan, '\(ANY \(\w+ = \(SubPlan (\d+)\).col1\)\)', '(SubPlan \1)', 'g');
+      END IF;
+    END IF;
+    RETURN NEXT;
+  END LOOP;
+END; $$ language plpgsql;
--- a/src/test/regress/sql/columnar_chunk_filtering.sql
+++ b/src/test/regress/sql/columnar_chunk_filtering.sql
@ -415,6 +415,7 @@ SELECT sum(a) FROM pushdown_test where (a > random() and a <= 2000) or (a > 2000
 SELECT sum(a) FROM pushdown_test where (a > random() and a <= 2000) or (a > 200000-1010);

 SET hash_mem_multiplier = 1.0;
+SELECT public.explain_with_pg16_subplan_format($Q$
 EXPLAIN (analyze on, costs off, timing off, summary off)
 SELECT sum(a) FROM pushdown_test where
 (
@ -427,6 +428,7 @@ SELECT sum(a) FROM pushdown_test where
 )
 or
 (a > 200000-2010);
+$Q$) as "QUERY PLAN";
 RESET hash_mem_multiplier;
 SELECT sum(a) FROM pushdown_test where
 (
--- a/src/test/regress/sql/columnar_paths.sql
+++ b/src/test/regress/sql/columnar_paths.sql
@ -1,6 +1,12 @@
 CREATE SCHEMA columnar_paths;
 SET search_path TO columnar_paths;

+-- columnar_paths has an alternative test output file because PG17 improved
+-- the optimizer's ability to use statistics to estimate the size of a CTE
+-- scan. 
+-- The relevant PG commit is:
+-- https://github.com/postgres/postgres/commit/f7816aec23eed1dc1da5f9a53cb6507d30b7f0a2
+
 CREATE TABLE full_correlated (a int, b text, c int, d int) USING columnar;
 INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000000) i;
 CREATE INDEX full_correlated_btree ON full_correlated (a);
--- a/src/test/regress/sql/multi_test_helpers.sql
+++ b/src/test/regress/sql/multi_test_helpers.sql
@ -611,3 +611,24 @@ BEGIN
    RETURN NEXT;
  END LOOP;
 END; $$ language plpgsql;
+
+-- This function formats EXPLAIN output to conform to how pg <= 16 EXPLAIN 
+-- shows ANY <subquery> in an expression the pg version >= 17. When 17 is
+-- the minimum supported pgversion this function can be retired. The commit  
+-- that changed how ANY <subquery> exrpressions appear in EXPLAIN is:
+-- https://git.postgresql.org/gitweb/?p=postgresql.git;a=commitdiff;h=fd0398fcb
+CREATE OR REPLACE FUNCTION explain_with_pg16_subplan_format(explain_command text, out query_plan text)
+RETURNS SETOF TEXT AS $$
+DECLARE
+  pgversion int = 0;
+BEGIN
+  pgversion = substring(version(), '\d+')::int ;
+  FOR query_plan IN execute explain_command LOOP
+    IF pgversion >= 17 THEN
+      IF query_plan ~ 'SubPlan \d+\).col' THEN
+    	  query_plan = regexp_replace(query_plan, '\(ANY \(\w+ = \(SubPlan (\d+)\).col1\)\)', '(SubPlan \1)', 'g');
+      END IF;
+    END IF;
+    RETURN NEXT;
+  END LOOP;
+END; $$ language plpgsql;