From 808626ea78b7b9db1327ed95883066cf1e0aa6c3 Mon Sep 17 00:00:00 2001
From: Colm <colmmchugh@microsoft.com>
Date: Tue, 3 Dec 2024 09:14:47 +0000
Subject: [PATCH] PG17 compatibility (#7653): Fix test diffs in columnar
 schedule (#7768)

This PR fixes diffs in `columnnar_chunk_filtering` and `columnar_paths`
tests.

In `columnnar_chunk_filtering` an expression `(NOT (SubPlan 1))` changed
to `(NOT (ANY (a = (SubPlan 1).col1)))`. This is due to [aPG17
commit](https://github.com/postgres/postgres/commit/fd0398fc) that
improved how scalar subqueries (InitPlans) and ANY subqueries (SubPlans)
are EXPLAINed in expressions. The fix uses a helper function which
converts the PG17 format to the pre-PG17 format. It is done this way
because pre-PG17 EXPLAIN does not provide enough context to convert to
the PG17 format. The helper function can (and should) be retired when 17
becomes the minimum supported PG.

In `columnar_paths`, a merge join changed to a hash join. This is due to
[this PG17
commit](https://github.com/postgres/postgres/commit/f7816aec23eed1dc1da5f9a53cb6507d30b7f0a2),
which improved the PG optimizer's ability to estimate the size of a CTE
scan. The impacted query involves a CTE scan with a point predicate
`(a=123)` and before the change the CTE size was estimated to be 5000,
but with the change it is correctly (given the data in the table)
estimated to be 1, making hash join a more attractive join method. The
fix is to have an alternative goldfile for pre-PG17. I tried, but was
unable, to force a specific kind of join method using the GUCs
(`enable_nestloop`, `enable_hashjoin`, `enable_mergejoin`), but it was
not possible to obtain a consistent plan across all supported PG
versions (in some cases the join inputs switched sides).
---
 .../expected/columnar_chunk_filtering.out     |  18 +-
 .../expected/columnar_chunk_filtering_0.out   |   6 +
 src/test/regress/expected/columnar_paths.out  |  19 +-
 .../regress/expected/columnar_paths_0.out     | 620 ++++++++++++++++++
 .../regress/expected/multi_test_helpers.out   |  20 +
 .../regress/sql/columnar_chunk_filtering.sql  |   2 +
 src/test/regress/sql/columnar_paths.sql       |   6 +
 src/test/regress/sql/multi_test_helpers.sql   |  21 +
 8 files changed, 697 insertions(+), 15 deletions(-)
 create mode 100644 src/test/regress/expected/columnar_paths_0.out
diff --git a/src/test/regress/expected/columnar_chunk_filtering.out b/src/test/regress/expected/columnar_chunk_filtering.out
index 3acdd957d..f952eb27b 100644
--- a/src/test/regress/expected/columnar_chunk_filtering.out
+++ b/src/test/regress/expected/columnar_chunk_filtering.out
@@ -977,6 +977,7 @@ DETAIL:  unparameterized; 1 clauses pushed down
 (1 row)
 
 SET hash_mem_multiplier = 1.0;
+SELECT public.explain_with_pg16_subplan_format($Q$
 EXPLAIN (analyze on, costs off, timing off, summary off)
 SELECT sum(a) FROM pushdown_test where
 (
@@ -989,13 +990,18 @@ SELECT sum(a) FROM pushdown_test where
 )
 or
 (a > 200000-2010);
+$Q$) as "QUERY PLAN";
 NOTICE:  columnar planner: adding CustomScan path for pushdown_test
 DETAIL:  unparameterized; 0 clauses pushed down
+CONTEXT:  PL/pgSQL function explain_with_pg16_subplan_format(text) line XX at FOR over EXECUTE statement
 NOTICE:  columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
 HINT:  Var must only reference this rel, and Expr must not reference this rel
+CONTEXT:  PL/pgSQL function explain_with_pg16_subplan_format(text) line XX at FOR over EXECUTE statement
 NOTICE:  columnar planner: cannot push down clause: must not contain a subplan
+CONTEXT:  PL/pgSQL function explain_with_pg16_subplan_format(text) line XX at FOR over EXECUTE statement
 NOTICE:  columnar planner: adding CustomScan path for pushdown_test
 DETAIL:  unparameterized; 1 clauses pushed down
+CONTEXT:  PL/pgSQL function explain_with_pg16_subplan_format(text) line XX at FOR over EXECUTE statement
                                                                   QUERY PLAN
 ---------------------------------------------------------------------
  Aggregate (actual rows=1 loops=1)
@@ -1092,14 +1098,14 @@ BEGIN;
 END;
 EXPLAIN (analyze on, costs off, timing off, summary off)
 SELECT id FROM pushdown_test WHERE country IN ('USA', 'BR', 'ZW');
-                                QUERY PLAN
+                               QUERY PLAN
 ---------------------------------------------------------------------
  Custom Scan (ColumnarScan) on pushdown_test (actual rows=3 loops=1)
-    Filter: (country = ANY ('{USA,BR,ZW}'::text[]))
-    Rows Removed by Filter: 1
-    Columnar Projected Columns: id, country
-    Columnar Chunk Group Filters: (country = ANY ('{USA,BR,ZW}'::text[]))
-    Columnar Chunk Groups Removed by Filter: 2
+   Filter: (country = ANY ('{USA,BR,ZW}'::text[]))
+   Rows Removed by Filter: 1
+   Columnar Projected Columns: id, country
+   Columnar Chunk Group Filters: (country = ANY ('{USA,BR,ZW}'::text[]))
+   Columnar Chunk Groups Removed by Filter: 2
 (6 rows)
 
 SELECT id FROM pushdown_test WHERE country IN ('USA', 'BR', 'ZW');
diff --git a/src/test/regress/expected/columnar_chunk_filtering_0.out b/src/test/regress/expected/columnar_chunk_filtering_0.out
index 746f3406f..57b30b8b1 100644
--- a/src/test/regress/expected/columnar_chunk_filtering_0.out
+++ b/src/test/regress/expected/columnar_chunk_filtering_0.out
@@ -977,6 +977,7 @@ DETAIL:  unparameterized; 1 clauses pushed down
 (1 row)
 
 SET hash_mem_multiplier = 1.0;
+SELECT public.explain_with_pg16_subplan_format($Q$
 EXPLAIN (analyze on, costs off, timing off, summary off)
 SELECT sum(a) FROM pushdown_test where
 (
@@ -989,13 +990,18 @@ SELECT sum(a) FROM pushdown_test where
 )
 or
 (a > 200000-2010);
+$Q$) as "QUERY PLAN";
 NOTICE:  columnar planner: adding CustomScan path for pushdown_test
 DETAIL:  unparameterized; 0 clauses pushed down
+CONTEXT:  PL/pgSQL function explain_with_pg16_subplan_format(text) line XX at FOR over EXECUTE statement
 NOTICE:  columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
 HINT:  Var must only reference this rel, and Expr must not reference this rel
+CONTEXT:  PL/pgSQL function explain_with_pg16_subplan_format(text) line XX at FOR over EXECUTE statement
 NOTICE:  columnar planner: cannot push down clause: must not contain a subplan
+CONTEXT:  PL/pgSQL function explain_with_pg16_subplan_format(text) line XX at FOR over EXECUTE statement
 NOTICE:  columnar planner: adding CustomScan path for pushdown_test
 DETAIL:  unparameterized; 1 clauses pushed down
+CONTEXT:  PL/pgSQL function explain_with_pg16_subplan_format(text) line XX at FOR over EXECUTE statement
                                                                   QUERY PLAN
 ---------------------------------------------------------------------
  Aggregate (actual rows=1 loops=1)
diff --git a/src/test/regress/expected/columnar_paths.out b/src/test/regress/expected/columnar_paths.out
index 07b91a42e..1c4bfc608 100644
--- a/src/test/regress/expected/columnar_paths.out
+++ b/src/test/regress/expected/columnar_paths.out
@@ -1,5 +1,10 @@
 CREATE SCHEMA columnar_paths;
 SET search_path TO columnar_paths;
+-- columnar_paths has an alternative test output file because PG17 improved
+-- the optimizer's ability to use statistics to estimate the size of a CTE
+-- scan.
+-- The relevant PG commit is:
+-- https://github.com/postgres/postgres/commit/f7816aec23eed1dc1da5f9a53cb6507d30b7f0a2
 CREATE TABLE full_correlated (a int, b text, c int, d int) USING columnar;
 INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000000) i;
 CREATE INDEX full_correlated_btree ON full_correlated (a);
@@ -296,20 +301,16 @@ SELECT * FROM w AS w1 JOIN w AS w2 ON w1.a = w2.d
 WHERE w2.a = 123;
                       QUERY PLAN
 ---------------------------------------------------------------------
- Merge Join
-   Merge Cond: (w2.d = w1.a)
+ Hash Join
+   Hash Cond: (w1.a = w2.d)
    CTE w
      ->  Custom Scan (ColumnarScan) on full_correlated
            Columnar Projected Columns: a, b, c, d
-   ->  Sort
-         Sort Key: w2.d
+   ->  CTE Scan on w w1
+   ->  Hash
          ->  CTE Scan on w w2
                Filter: (a = 123)
-   ->  Materialize
-         ->  Sort
-               Sort Key: w1.a
-               ->  CTE Scan on w w1
-(13 rows)
+(9 rows)
 
 -- use index
 EXPLAIN (COSTS OFF) WITH w AS NOT MATERIALIZED (SELECT * FROM full_correlated)
diff --git a/src/test/regress/expected/columnar_paths_0.out b/src/test/regress/expected/columnar_paths_0.out
new file mode 100644
index 000000000..2b7349e42
--- /dev/null
+++ b/src/test/regress/expected/columnar_paths_0.out
@@ -0,0 +1,620 @@
+CREATE SCHEMA columnar_paths;
+SET search_path TO columnar_paths;
+-- columnar_paths has an alternative test output file because PG17 improved
+-- the optimizer's ability to use statistics to estimate the size of a CTE
+-- scan.
+-- The relevant PG commit is:
+-- https://github.com/postgres/postgres/commit/f7816aec23eed1dc1da5f9a53cb6507d30b7f0a2
+CREATE TABLE full_correlated (a int, b text, c int, d int) USING columnar;
+INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000000) i;
+CREATE INDEX full_correlated_btree ON full_correlated (a);
+ANALYZE full_correlated;
+-- Prevent qual pushdown from competing with index scans.
+SET columnar.enable_qual_pushdown = false;
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a FROM full_correlated WHERE a=200;
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a FROM full_correlated WHERE a<0;
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a FROM full_correlated WHERE a>10 AND a<20;
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a FROM full_correlated WHERE a>1000000;
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_correlated WHERE a>900000;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_seq_scan (
+  $$
+  SELECT a FROM full_correlated WHERE a>900000;
+  $$
+  );
+ uses_seq_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a FROM full_correlated WHERE a<1000;
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a,b FROM full_correlated WHERE a<3000;
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_correlated WHERE a<9000;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_index_scan (
+  $$
+  SELECT a FROM full_correlated WHERE a<9000;
+  $$
+  );
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+BEGIN;
+  TRUNCATE full_correlated;
+  INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000) i;
+  -- Since we have much smaller number of rows, selectivity of below
+  -- query should be much higher. So we would choose columnar custom scan.
+  SELECT columnar_test_helpers.uses_custom_scan (
+  $$
+  SELECT a FROM full_correlated WHERE a=200;
+  $$
+  );
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_seq_scan (
+  $$
+  SELECT a FROM full_correlated WHERE a=200;
+  $$
+  );
+ uses_seq_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+-- same filter used in above, but choosing multiple columns would increase
+-- custom scan cost, so we would prefer index scan this time
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a,b,c,d FROM full_correlated WHERE a<9000;
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_index_scan (
+  $$
+  SELECT a,b,c,d FROM full_correlated WHERE a<9000;
+  $$
+  );
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+-- again same filter used in above, but we would choose custom scan this
+-- time since it would read three less columns from disk
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT c FROM full_correlated WHERE a<10000;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_index_scan (
+  $$
+  SELECT c FROM full_correlated WHERE a<10000;
+  $$
+  );
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_correlated WHERE a>200;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_correlated WHERE a=0 OR a=5;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_seq_scan (
+  $$
+  SELECT a FROM full_correlated WHERE a=0 OR a=5;
+  $$
+  );
+ uses_seq_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+--
+-- some tests with joins / subqueries etc.
+--
+CREATE TABLE heap_table (a int, b text, c int, d int);
+INSERT INTO heap_table SELECT i, i::text, (i+1000)*7, (i+900)*5 FROM generate_series(1, 1000000) i;
+CREATE INDEX heap_table_btree ON heap_table (a);
+ANALYZE heap_table;
+EXPLAIN (COSTS OFF)
+WITH cte AS MATERIALIZED (SELECT d FROM full_correlated WHERE a > 1)
+SELECT SUM(ht_1.a), MIN(ct_1.c)
+FROM heap_table AS ht_1
+LEFT JOIN full_correlated AS ct_1 ON ht_1.a=ct_1.d
+LEFT JOIN heap_table AS ht_2 ON ht_2.a=ct_1.c
+JOIN cte ON cte.d=ht_1.a
+WHERE ct_1.a < 3000;
+                                          QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate
+   CTE cte
+     ->  Custom Scan (ColumnarScan) on full_correlated
+           Filter: (a > 1)
+           Columnar Projected Columns: a, d
+   ->  Nested Loop Left Join
+         ->  Hash Join
+               Hash Cond: (cte.d = ht_1.a)
+               ->  CTE Scan on cte
+               ->  Hash
+                     ->  Nested Loop
+                           ->  Index Scan using full_correlated_btree on full_correlated ct_1
+                                 Index Cond: (a < 3000)
+                           ->  Index Only Scan using heap_table_btree on heap_table ht_1
+                                 Index Cond: (a = ct_1.d)
+         ->  Index Only Scan using heap_table_btree on heap_table ht_2
+               Index Cond: (a = ct_1.c)
+(17 rows)
+
+-- same query but columnar custom scan is disabled
+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  EXPLAIN (COSTS OFF)
+  WITH cte AS MATERIALIZED (SELECT d FROM full_correlated WHERE a > 1)
+  SELECT SUM(ht_1.a), MIN(ct_1.c)
+  FROM heap_table AS ht_1
+  LEFT JOIN full_correlated AS ct_1 ON ht_1.a=ct_1.d
+  LEFT JOIN heap_table AS ht_2 ON ht_2.a=ct_1.c
+  JOIN cte ON cte.d=ht_1.a
+  WHERE ct_1.a < 3000;
+                                          QUERY PLAN
+---------------------------------------------------------------------
+ Aggregate
+   CTE cte
+     ->  Seq Scan on full_correlated
+           Filter: (a > 1)
+   ->  Nested Loop Left Join
+         ->  Hash Join
+               Hash Cond: (cte.d = ht_1.a)
+               ->  CTE Scan on cte
+               ->  Hash
+                     ->  Nested Loop
+                           ->  Index Scan using full_correlated_btree on full_correlated ct_1
+                                 Index Cond: (a < 3000)
+                           ->  Index Only Scan using heap_table_btree on heap_table ht_1
+                                 Index Cond: (a = ct_1.d)
+         ->  Index Only Scan using heap_table_btree on heap_table ht_2
+               Index Cond: (a = ct_1.c)
+(16 rows)
+
+ROLLBACK;
+-- use custom scan
+EXPLAIN (COSTS OFF) WITH w AS (SELECT * FROM full_correlated)
+SELECT * FROM w AS w1 JOIN w AS w2 ON w1.a = w2.d
+WHERE w2.a = 123;
+                      QUERY PLAN
+---------------------------------------------------------------------
+ Merge Join
+   Merge Cond: (w2.d = w1.a)
+   CTE w
+     ->  Custom Scan (ColumnarScan) on full_correlated
+           Columnar Projected Columns: a, b, c, d
+   ->  Sort
+         Sort Key: w2.d
+         ->  CTE Scan on w w2
+               Filter: (a = 123)
+   ->  Materialize
+         ->  Sort
+               Sort Key: w1.a
+               ->  CTE Scan on w w1
+(13 rows)
+
+-- use index
+EXPLAIN (COSTS OFF) WITH w AS NOT MATERIALIZED (SELECT * FROM full_correlated)
+SELECT * FROM w AS w1 JOIN w AS w2 ON w1.a = w2.d
+WHERE w2.a = 123;
+                                    QUERY PLAN
+---------------------------------------------------------------------
+ Nested Loop
+   ->  Index Scan using full_correlated_btree on full_correlated full_correlated_1
+         Index Cond: (a = 123)
+   ->  Index Scan using full_correlated_btree on full_correlated
+         Index Cond: (a = full_correlated_1.d)
+(5 rows)
+
+EXPLAIN (COSTS OFF) SELECT sub_1.b, sub_2.a, sub_3.avg
+FROM
+  (SELECT b FROM full_correlated WHERE (a > 2) GROUP BY b ORDER BY 1 DESC LIMIT 5) AS sub_1,
+  (SELECT a FROM full_correlated WHERE (a > 10) GROUP BY a HAVING count(DISTINCT a) >= 1 ORDER BY 1 DESC LIMIT 3) AS sub_2,
+  (SELECT avg(a) AS AVG FROM full_correlated WHERE (a > 2) GROUP BY a HAVING sum(a) > 10 ORDER BY (sum(d) - avg(a) - COALESCE(array_upper(ARRAY[max(a)],1) * 5, 0)) DESC LIMIT 3) AS sub_3
+WHERE sub_2.a < sub_1.b::integer
+ORDER BY 3 DESC, 2 DESC, 1 DESC
+LIMIT 100;
+                                                                                                QUERY PLAN
+---------------------------------------------------------------------
+ Limit
+   ->  Sort
+         Sort Key: sub_3.avg DESC, full_correlated_1.a DESC, full_correlated.b DESC
+         ->  Nested Loop
+               ->  Nested Loop
+                     Join Filter: (full_correlated_1.a < (full_correlated.b)::integer)
+                     ->  Limit
+                           ->  Sort
+                                 Sort Key: full_correlated.b DESC
+                                 ->  HashAggregate
+                                       Group Key: full_correlated.b
+                                       ->  Custom Scan (ColumnarScan) on full_correlated
+                                             Filter: (a > 2)
+                                             Columnar Projected Columns: a, b
+                     ->  Materialize
+                           ->  Limit
+                                 ->  GroupAggregate
+                                       Group Key: full_correlated_1.a
+                                       Filter: (count(DISTINCT full_correlated_1.a) >= 1)
+                                       ->  Index Scan Backward using full_correlated_btree on full_correlated full_correlated_1
+                                             Index Cond: (a > 10)
+               ->  Materialize
+                     ->  Subquery Scan on sub_3
+                           ->  Limit
+                                 ->  Sort
+                                       Sort Key: ((((sum(full_correlated_2.d))::numeric - avg(full_correlated_2.a)) - (COALESCE((array_upper(ARRAY[max(full_correlated_2.a)], 1) * 5), 0))::numeric)) DESC
+                                       ->  GroupAggregate
+                                             Group Key: full_correlated_2.a
+                                             Filter: (sum(full_correlated_2.a) > 10)
+                                             ->  Index Scan using full_correlated_btree on full_correlated full_correlated_2
+                                                   Index Cond: (a > 2)
+(31 rows)
+
+DROP INDEX full_correlated_btree;
+CREATE INDEX full_correlated_hash ON full_correlated USING hash(a);
+ANALYZE full_correlated;
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_correlated WHERE a<10;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_correlated WHERE a>1 AND a<10;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_correlated WHERE a=0 OR a=5;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a FROM full_correlated WHERE a=1000;
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a,c FROM full_correlated WHERE a=1000;
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_index_scan (
+  $$
+  SELECT a,c FROM full_correlated WHERE a=1000;
+  $$
+  );
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+CREATE TABLE full_anti_correlated (a int, b text) USING columnar;
+INSERT INTO full_anti_correlated SELECT i, i::text FROM generate_series(1, 500000) i;
+CREATE INDEX full_anti_correlated_hash ON full_anti_correlated USING hash(b);
+ANALYZE full_anti_correlated;
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a FROM full_anti_correlated WHERE b='600';
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a,b FROM full_anti_correlated WHERE b='600';
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a,b FROM full_anti_correlated WHERE b='600' OR b='10';
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_seq_scan (
+  $$
+  SELECT a,b FROM full_anti_correlated WHERE b='600' OR b='10';
+  $$
+  );
+ uses_seq_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+DROP INDEX full_anti_correlated_hash;
+CREATE INDEX full_anti_correlated_btree ON full_anti_correlated (a,b);
+ANALYZE full_anti_correlated;
+SELECT columnar_test_helpers.uses_index_scan (
+$$
+SELECT a FROM full_anti_correlated WHERE a>6500 AND a<7000 AND b<'10000';
+$$
+);
+ uses_index_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_anti_correlated WHERE a>2000 AND a<7000;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM full_anti_correlated WHERE a<7000 AND b<'10000';
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_seq_scan (
+  $$
+  SELECT a FROM full_anti_correlated WHERE a<7000 AND b<'10000';
+  $$
+  );
+ uses_seq_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+CREATE TABLE no_correlation (a int, b text) USING columnar;
+INSERT INTO no_correlation SELECT random()*5000, (random()*5000)::int::text FROM generate_series(1, 500000) i;
+CREATE INDEX no_correlation_btree ON no_correlation (a);
+ANALYZE no_correlation;
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM no_correlation WHERE a < 2;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+SELECT columnar_test_helpers.uses_custom_scan (
+$$
+SELECT a FROM no_correlation WHERE a = 200;
+$$
+);
+ uses_custom_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+BEGIN;
+  SET LOCAL columnar.enable_custom_scan TO 'OFF';
+  SELECT columnar_test_helpers.uses_seq_scan (
+  $$
+  SELECT a FROM no_correlation WHERE a = 200;
+  $$
+  );
+ uses_seq_scan
+---------------------------------------------------------------------
+ t
+(1 row)
+
+ROLLBACK;
+SET columnar.enable_qual_pushdown TO DEFAULT;
+BEGIN;
+SET LOCAL columnar.stripe_row_limit = 2000;
+SET LOCAL columnar.chunk_group_row_limit = 1000;
+CREATE TABLE correlated(x int) using columnar;
+INSERT INTO correlated
+  SELECT g FROM generate_series(1,100000) g;
+CREATE TABLE uncorrelated(x int) using columnar;
+INSERT INTO uncorrelated
+  SELECT (g * 19) % 100000 FROM generate_series(1,100000) g;
+COMMIT;
+CREATE INDEX correlated_idx ON correlated(x);
+CREATE INDEX uncorrelated_idx ON uncorrelated(x);
+ANALYZE correlated, uncorrelated;
+-- should choose chunk group filtering; selective and correlated
+EXPLAIN (analyze on, costs off, timing off, summary off)
+SELECT * FROM correlated WHERE x = 78910;
+                            QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (ColumnarScan) on correlated (actual rows=1 loops=1)
+   Filter: (x = 78910)
+   Rows Removed by Filter: 999
+   Columnar Projected Columns: x
+   Columnar Chunk Group Filters: (x = 78910)
+   Columnar Chunk Groups Removed by Filter: 99
+(6 rows)
+
+SELECT * FROM correlated WHERE x = 78910;
+   x
+---------------------------------------------------------------------
+ 78910
+(1 row)
+
+-- should choose index scan; selective but uncorrelated
+EXPLAIN (analyze on, costs off, timing off, summary off)
+SELECT * FROM uncorrelated WHERE x = 78910;
+                                QUERY PLAN
+---------------------------------------------------------------------
+ Index Scan using uncorrelated_idx on uncorrelated (actual rows=1 loops=1)
+   Index Cond: (x = 78910)
+(2 rows)
+
+SELECT * FROM uncorrelated WHERE x = 78910;
+   x
+---------------------------------------------------------------------
+ 78910
+(1 row)
+
+SET client_min_messages TO WARNING;
+DROP SCHEMA columnar_paths CASCADE;
diff --git a/src/test/regress/expected/multi_test_helpers.out b/src/test/regress/expected/multi_test_helpers.out
index 3e9de8bce..4b74070d1 100644
--- a/src/test/regress/expected/multi_test_helpers.out
+++ b/src/test/regress/expected/multi_test_helpers.out
@@ -698,3 +698,23 @@ BEGIN
     RETURN NEXT;
   END LOOP;
 END; $$ language plpgsql;
+-- This function formats EXPLAIN output to conform to how pg <= 16 EXPLAIN
+-- shows ANY <subquery> in an expression the pg version >= 17. When 17 is
+-- the minimum supported pgversion this function can be retired. The commit
+-- that changed how ANY <subquery> exrpressions appear in EXPLAIN is:
+-- https://git.postgresql.org/gitweb/?p=postgresql.git;a=commitdiff;h=fd0398fcb
+CREATE OR REPLACE FUNCTION explain_with_pg16_subplan_format(explain_command text, out query_plan text)
+RETURNS SETOF TEXT AS $$
+DECLARE
+  pgversion int = 0;
+BEGIN
+  pgversion = substring(version(), '\d+')::int ;
+  FOR query_plan IN execute explain_command LOOP
+    IF pgversion >= 17 THEN
+      IF query_plan ~ 'SubPlan \d+\).col' THEN
+    	  query_plan = regexp_replace(query_plan, '\(ANY \(\w+ = \(SubPlan (\d+)\).col1\)\)', '(SubPlan \1)', 'g');
+      END IF;
+    END IF;
+    RETURN NEXT;
+  END LOOP;
+END; $$ language plpgsql;
diff --git a/src/test/regress/sql/columnar_chunk_filtering.sql b/src/test/regress/sql/columnar_chunk_filtering.sql
index d37a8d8b6..6c90e1943 100644
--- a/src/test/regress/sql/columnar_chunk_filtering.sql
+++ b/src/test/regress/sql/columnar_chunk_filtering.sql
@@ -415,6 +415,7 @@ SELECT sum(a) FROM pushdown_test where (a > random() and a <= 2000) or (a > 2000
 SELECT sum(a) FROM pushdown_test where (a > random() and a <= 2000) or (a > 200000-1010);
 
 SET hash_mem_multiplier = 1.0;
+SELECT public.explain_with_pg16_subplan_format($Q$
 EXPLAIN (analyze on, costs off, timing off, summary off)
 SELECT sum(a) FROM pushdown_test where
 (
@@ -427,6 +428,7 @@ SELECT sum(a) FROM pushdown_test where
 )
 or
 (a > 200000-2010);
+$Q$) as "QUERY PLAN";
 RESET hash_mem_multiplier;
 SELECT sum(a) FROM pushdown_test where
 (
diff --git a/src/test/regress/sql/columnar_paths.sql b/src/test/regress/sql/columnar_paths.sql
index 3c92d4a21..d56443a03 100644
--- a/src/test/regress/sql/columnar_paths.sql
+++ b/src/test/regress/sql/columnar_paths.sql
@@ -1,6 +1,12 @@
 CREATE SCHEMA columnar_paths;
 SET search_path TO columnar_paths;
 
+-- columnar_paths has an alternative test output file because PG17 improved
+-- the optimizer's ability to use statistics to estimate the size of a CTE
+-- scan. 
+-- The relevant PG commit is:
+-- https://github.com/postgres/postgres/commit/f7816aec23eed1dc1da5f9a53cb6507d30b7f0a2
+
 CREATE TABLE full_correlated (a int, b text, c int, d int) USING columnar;
 INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000000) i;
 CREATE INDEX full_correlated_btree ON full_correlated (a);
diff --git a/src/test/regress/sql/multi_test_helpers.sql b/src/test/regress/sql/multi_test_helpers.sql
index aba3abcc2..14249c145 100644
--- a/src/test/regress/sql/multi_test_helpers.sql
+++ b/src/test/regress/sql/multi_test_helpers.sql
@@ -726,3 +726,24 @@ BEGIN
     RETURN NEXT;
   END LOOP;
 END; $$ language plpgsql;
+
+-- This function formats EXPLAIN output to conform to how pg <= 16 EXPLAIN 
+-- shows ANY <subquery> in an expression the pg version >= 17. When 17 is
+-- the minimum supported pgversion this function can be retired. The commit  
+-- that changed how ANY <subquery> exrpressions appear in EXPLAIN is:
+-- https://git.postgresql.org/gitweb/?p=postgresql.git;a=commitdiff;h=fd0398fcb
+CREATE OR REPLACE FUNCTION explain_with_pg16_subplan_format(explain_command text, out query_plan text)
+RETURNS SETOF TEXT AS $$
+DECLARE
+  pgversion int = 0;
+BEGIN
+  pgversion = substring(version(), '\d+')::int ;
+  FOR query_plan IN execute explain_command LOOP
+    IF pgversion >= 17 THEN
+      IF query_plan ~ 'SubPlan \d+\).col' THEN
+    	  query_plan = regexp_replace(query_plan, '\(ANY \(\w+ = \(SubPlan (\d+)\).col1\)\)', '(SubPlan \1)', 'g');
+      END IF;
+    END IF;
+    RETURN NEXT;
+  END LOOP;
+END; $$ language plpgsql;