PG17 compatibility (#7653): Fix test diffs in columnar schedule (#7768)

This PR fixes diffs in `columnnar_chunk_filtering` and `columnar_paths`
tests.

In `columnnar_chunk_filtering` an expression `(NOT (SubPlan 1))` changed
to `(NOT (ANY (a = (SubPlan 1).col1)))`. This is due to [aPG17
commit](https://github.com/postgres/postgres/commit/fd0398fc) that
improved how scalar subqueries (InitPlans) and ANY subqueries (SubPlans)
are EXPLAINed in expressions. The fix uses a helper function which
converts the PG17 format to the pre-PG17 format. It is done this way
because pre-PG17 EXPLAIN does not provide enough context to convert to
the PG17 format. The helper function can (and should) be retired when 17
becomes the minimum supported PG.

In `columnar_paths`, a merge join changed to a hash join. This is due to
[this PG17
commit](f7816aec23),
which improved the PG optimizer's ability to estimate the size of a CTE
scan. The impacted query involves a CTE scan with a point predicate
`(a=123)` and before the change the CTE size was estimated to be 5000,
but with the change it is correctly (given the data in the table)
estimated to be 1, making hash join a more attractive join method. The
fix is to have an alternative goldfile for pre-PG17. I tried, but was
unable, to force a specific kind of join method using the GUCs
(`enable_nestloop`, `enable_hashjoin`, `enable_mergejoin`), but it was
not possible to obtain a consistent plan across all supported PG
versions (in some cases the join inputs switched sides).
pull/7776/head
Colm 2024-12-03 09:14:47 +00:00 committed by GitHub
parent 5f479d5f47
commit 698699d89e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 697 additions and 15 deletions

View File

@ -977,6 +977,7 @@ DETAIL: unparameterized; 1 clauses pushed down
(1 row)
SET hash_mem_multiplier = 1.0;
SELECT public.explain_with_pg16_subplan_format($Q$
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT sum(a) FROM pushdown_test where
(
@ -989,13 +990,18 @@ SELECT sum(a) FROM pushdown_test where
)
or
(a > 200000-2010);
$Q$) as "QUERY PLAN";
NOTICE: columnar planner: adding CustomScan path for pushdown_test
DETAIL: unparameterized; 0 clauses pushed down
CONTEXT: PL/pgSQL function explain_with_pg16_subplan_format(text) line XX at FOR over EXECUTE statement
NOTICE: columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
HINT: Var must only reference this rel, and Expr must not reference this rel
CONTEXT: PL/pgSQL function explain_with_pg16_subplan_format(text) line XX at FOR over EXECUTE statement
NOTICE: columnar planner: cannot push down clause: must not contain a subplan
CONTEXT: PL/pgSQL function explain_with_pg16_subplan_format(text) line XX at FOR over EXECUTE statement
NOTICE: columnar planner: adding CustomScan path for pushdown_test
DETAIL: unparameterized; 1 clauses pushed down
CONTEXT: PL/pgSQL function explain_with_pg16_subplan_format(text) line XX at FOR over EXECUTE statement
QUERY PLAN
---------------------------------------------------------------------
Aggregate (actual rows=1 loops=1)
@ -1092,14 +1098,14 @@ BEGIN;
END;
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT id FROM pushdown_test WHERE country IN ('USA', 'BR', 'ZW');
QUERY PLAN
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (ColumnarScan) on pushdown_test (actual rows=3 loops=1)
Filter: (country = ANY ('{USA,BR,ZW}'::text[]))
Rows Removed by Filter: 1
Columnar Projected Columns: id, country
Columnar Chunk Group Filters: (country = ANY ('{USA,BR,ZW}'::text[]))
Columnar Chunk Groups Removed by Filter: 2
Filter: (country = ANY ('{USA,BR,ZW}'::text[]))
Rows Removed by Filter: 1
Columnar Projected Columns: id, country
Columnar Chunk Group Filters: (country = ANY ('{USA,BR,ZW}'::text[]))
Columnar Chunk Groups Removed by Filter: 2
(6 rows)
SELECT id FROM pushdown_test WHERE country IN ('USA', 'BR', 'ZW');

View File

@ -977,6 +977,7 @@ DETAIL: unparameterized; 1 clauses pushed down
(1 row)
SET hash_mem_multiplier = 1.0;
SELECT public.explain_with_pg16_subplan_format($Q$
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT sum(a) FROM pushdown_test where
(
@ -989,13 +990,18 @@ SELECT sum(a) FROM pushdown_test where
)
or
(a > 200000-2010);
$Q$) as "QUERY PLAN";
NOTICE: columnar planner: adding CustomScan path for pushdown_test
DETAIL: unparameterized; 0 clauses pushed down
CONTEXT: PL/pgSQL function explain_with_pg16_subplan_format(text) line XX at FOR over EXECUTE statement
NOTICE: columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
HINT: Var must only reference this rel, and Expr must not reference this rel
CONTEXT: PL/pgSQL function explain_with_pg16_subplan_format(text) line XX at FOR over EXECUTE statement
NOTICE: columnar planner: cannot push down clause: must not contain a subplan
CONTEXT: PL/pgSQL function explain_with_pg16_subplan_format(text) line XX at FOR over EXECUTE statement
NOTICE: columnar planner: adding CustomScan path for pushdown_test
DETAIL: unparameterized; 1 clauses pushed down
CONTEXT: PL/pgSQL function explain_with_pg16_subplan_format(text) line XX at FOR over EXECUTE statement
QUERY PLAN
---------------------------------------------------------------------
Aggregate (actual rows=1 loops=1)

View File

@ -1,5 +1,10 @@
CREATE SCHEMA columnar_paths;
SET search_path TO columnar_paths;
-- columnar_paths has an alternative test output file because PG17 improved
-- the optimizer's ability to use statistics to estimate the size of a CTE
-- scan.
-- The relevant PG commit is:
-- https://github.com/postgres/postgres/commit/f7816aec23eed1dc1da5f9a53cb6507d30b7f0a2
CREATE TABLE full_correlated (a int, b text, c int, d int) USING columnar;
INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000000) i;
CREATE INDEX full_correlated_btree ON full_correlated (a);
@ -296,20 +301,16 @@ SELECT * FROM w AS w1 JOIN w AS w2 ON w1.a = w2.d
WHERE w2.a = 123;
QUERY PLAN
---------------------------------------------------------------------
Merge Join
Merge Cond: (w2.d = w1.a)
Hash Join
Hash Cond: (w1.a = w2.d)
CTE w
-> Custom Scan (ColumnarScan) on full_correlated
Columnar Projected Columns: a, b, c, d
-> Sort
Sort Key: w2.d
-> CTE Scan on w w1
-> Hash
-> CTE Scan on w w2
Filter: (a = 123)
-> Materialize
-> Sort
Sort Key: w1.a
-> CTE Scan on w w1
(13 rows)
(9 rows)
-- use index
EXPLAIN (COSTS OFF) WITH w AS NOT MATERIALIZED (SELECT * FROM full_correlated)

View File

@ -0,0 +1,620 @@
CREATE SCHEMA columnar_paths;
SET search_path TO columnar_paths;
-- columnar_paths has an alternative test output file because PG17 improved
-- the optimizer's ability to use statistics to estimate the size of a CTE
-- scan.
-- The relevant PG commit is:
-- https://github.com/postgres/postgres/commit/f7816aec23eed1dc1da5f9a53cb6507d30b7f0a2
CREATE TABLE full_correlated (a int, b text, c int, d int) USING columnar;
INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000000) i;
CREATE INDEX full_correlated_btree ON full_correlated (a);
ANALYZE full_correlated;
-- Prevent qual pushdown from competing with index scans.
SET columnar.enable_qual_pushdown = false;
SELECT columnar_test_helpers.uses_index_scan (
$$
SELECT a FROM full_correlated WHERE a=200;
$$
);
uses_index_scan
---------------------------------------------------------------------
t
(1 row)
SELECT columnar_test_helpers.uses_index_scan (
$$
SELECT a FROM full_correlated WHERE a<0;
$$
);
uses_index_scan
---------------------------------------------------------------------
t
(1 row)
SELECT columnar_test_helpers.uses_index_scan (
$$
SELECT a FROM full_correlated WHERE a>10 AND a<20;
$$
);
uses_index_scan
---------------------------------------------------------------------
t
(1 row)
SELECT columnar_test_helpers.uses_index_scan (
$$
SELECT a FROM full_correlated WHERE a>1000000;
$$
);
uses_index_scan
---------------------------------------------------------------------
t
(1 row)
SELECT columnar_test_helpers.uses_custom_scan (
$$
SELECT a FROM full_correlated WHERE a>900000;
$$
);
uses_custom_scan
---------------------------------------------------------------------
t
(1 row)
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';
SELECT columnar_test_helpers.uses_seq_scan (
$$
SELECT a FROM full_correlated WHERE a>900000;
$$
);
uses_seq_scan
---------------------------------------------------------------------
t
(1 row)
ROLLBACK;
SELECT columnar_test_helpers.uses_index_scan (
$$
SELECT a FROM full_correlated WHERE a<1000;
$$
);
uses_index_scan
---------------------------------------------------------------------
t
(1 row)
SELECT columnar_test_helpers.uses_index_scan (
$$
SELECT a,b FROM full_correlated WHERE a<3000;
$$
);
uses_index_scan
---------------------------------------------------------------------
t
(1 row)
SELECT columnar_test_helpers.uses_custom_scan (
$$
SELECT a FROM full_correlated WHERE a<9000;
$$
);
uses_custom_scan
---------------------------------------------------------------------
t
(1 row)
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';
SELECT columnar_test_helpers.uses_index_scan (
$$
SELECT a FROM full_correlated WHERE a<9000;
$$
);
uses_index_scan
---------------------------------------------------------------------
t
(1 row)
ROLLBACK;
BEGIN;
TRUNCATE full_correlated;
INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000) i;
-- Since we have much smaller number of rows, selectivity of below
-- query should be much higher. So we would choose columnar custom scan.
SELECT columnar_test_helpers.uses_custom_scan (
$$
SELECT a FROM full_correlated WHERE a=200;
$$
);
uses_custom_scan
---------------------------------------------------------------------
t
(1 row)
SET LOCAL columnar.enable_custom_scan TO 'OFF';
SELECT columnar_test_helpers.uses_seq_scan (
$$
SELECT a FROM full_correlated WHERE a=200;
$$
);
uses_seq_scan
---------------------------------------------------------------------
t
(1 row)
ROLLBACK;
-- same filter used in above, but choosing multiple columns would increase
-- custom scan cost, so we would prefer index scan this time
SELECT columnar_test_helpers.uses_index_scan (
$$
SELECT a,b,c,d FROM full_correlated WHERE a<9000;
$$
);
uses_index_scan
---------------------------------------------------------------------
t
(1 row)
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';
SELECT columnar_test_helpers.uses_index_scan (
$$
SELECT a,b,c,d FROM full_correlated WHERE a<9000;
$$
);
uses_index_scan
---------------------------------------------------------------------
t
(1 row)
ROLLBACK;
-- again same filter used in above, but we would choose custom scan this
-- time since it would read three less columns from disk
SELECT columnar_test_helpers.uses_custom_scan (
$$
SELECT c FROM full_correlated WHERE a<10000;
$$
);
uses_custom_scan
---------------------------------------------------------------------
t
(1 row)
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';
SELECT columnar_test_helpers.uses_index_scan (
$$
SELECT c FROM full_correlated WHERE a<10000;
$$
);
uses_index_scan
---------------------------------------------------------------------
t
(1 row)
ROLLBACK;
SELECT columnar_test_helpers.uses_custom_scan (
$$
SELECT a FROM full_correlated WHERE a>200;
$$
);
uses_custom_scan
---------------------------------------------------------------------
t
(1 row)
SELECT columnar_test_helpers.uses_custom_scan (
$$
SELECT a FROM full_correlated WHERE a=0 OR a=5;
$$
);
uses_custom_scan
---------------------------------------------------------------------
t
(1 row)
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';
SELECT columnar_test_helpers.uses_seq_scan (
$$
SELECT a FROM full_correlated WHERE a=0 OR a=5;
$$
);
uses_seq_scan
---------------------------------------------------------------------
t
(1 row)
ROLLBACK;
--
-- some tests with joins / subqueries etc.
--
CREATE TABLE heap_table (a int, b text, c int, d int);
INSERT INTO heap_table SELECT i, i::text, (i+1000)*7, (i+900)*5 FROM generate_series(1, 1000000) i;
CREATE INDEX heap_table_btree ON heap_table (a);
ANALYZE heap_table;
EXPLAIN (COSTS OFF)
WITH cte AS MATERIALIZED (SELECT d FROM full_correlated WHERE a > 1)
SELECT SUM(ht_1.a), MIN(ct_1.c)
FROM heap_table AS ht_1
LEFT JOIN full_correlated AS ct_1 ON ht_1.a=ct_1.d
LEFT JOIN heap_table AS ht_2 ON ht_2.a=ct_1.c
JOIN cte ON cte.d=ht_1.a
WHERE ct_1.a < 3000;
QUERY PLAN
---------------------------------------------------------------------
Aggregate
CTE cte
-> Custom Scan (ColumnarScan) on full_correlated
Filter: (a > 1)
Columnar Projected Columns: a, d
-> Nested Loop Left Join
-> Hash Join
Hash Cond: (cte.d = ht_1.a)
-> CTE Scan on cte
-> Hash
-> Nested Loop
-> Index Scan using full_correlated_btree on full_correlated ct_1
Index Cond: (a < 3000)
-> Index Only Scan using heap_table_btree on heap_table ht_1
Index Cond: (a = ct_1.d)
-> Index Only Scan using heap_table_btree on heap_table ht_2
Index Cond: (a = ct_1.c)
(17 rows)
-- same query but columnar custom scan is disabled
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';
EXPLAIN (COSTS OFF)
WITH cte AS MATERIALIZED (SELECT d FROM full_correlated WHERE a > 1)
SELECT SUM(ht_1.a), MIN(ct_1.c)
FROM heap_table AS ht_1
LEFT JOIN full_correlated AS ct_1 ON ht_1.a=ct_1.d
LEFT JOIN heap_table AS ht_2 ON ht_2.a=ct_1.c
JOIN cte ON cte.d=ht_1.a
WHERE ct_1.a < 3000;
QUERY PLAN
---------------------------------------------------------------------
Aggregate
CTE cte
-> Seq Scan on full_correlated
Filter: (a > 1)
-> Nested Loop Left Join
-> Hash Join
Hash Cond: (cte.d = ht_1.a)
-> CTE Scan on cte
-> Hash
-> Nested Loop
-> Index Scan using full_correlated_btree on full_correlated ct_1
Index Cond: (a < 3000)
-> Index Only Scan using heap_table_btree on heap_table ht_1
Index Cond: (a = ct_1.d)
-> Index Only Scan using heap_table_btree on heap_table ht_2
Index Cond: (a = ct_1.c)
(16 rows)
ROLLBACK;
-- use custom scan
EXPLAIN (COSTS OFF) WITH w AS (SELECT * FROM full_correlated)
SELECT * FROM w AS w1 JOIN w AS w2 ON w1.a = w2.d
WHERE w2.a = 123;
QUERY PLAN
---------------------------------------------------------------------
Merge Join
Merge Cond: (w2.d = w1.a)
CTE w
-> Custom Scan (ColumnarScan) on full_correlated
Columnar Projected Columns: a, b, c, d
-> Sort
Sort Key: w2.d
-> CTE Scan on w w2
Filter: (a = 123)
-> Materialize
-> Sort
Sort Key: w1.a
-> CTE Scan on w w1
(13 rows)
-- use index
EXPLAIN (COSTS OFF) WITH w AS NOT MATERIALIZED (SELECT * FROM full_correlated)
SELECT * FROM w AS w1 JOIN w AS w2 ON w1.a = w2.d
WHERE w2.a = 123;
QUERY PLAN
---------------------------------------------------------------------
Nested Loop
-> Index Scan using full_correlated_btree on full_correlated full_correlated_1
Index Cond: (a = 123)
-> Index Scan using full_correlated_btree on full_correlated
Index Cond: (a = full_correlated_1.d)
(5 rows)
EXPLAIN (COSTS OFF) SELECT sub_1.b, sub_2.a, sub_3.avg
FROM
(SELECT b FROM full_correlated WHERE (a > 2) GROUP BY b ORDER BY 1 DESC LIMIT 5) AS sub_1,
(SELECT a FROM full_correlated WHERE (a > 10) GROUP BY a HAVING count(DISTINCT a) >= 1 ORDER BY 1 DESC LIMIT 3) AS sub_2,
(SELECT avg(a) AS AVG FROM full_correlated WHERE (a > 2) GROUP BY a HAVING sum(a) > 10 ORDER BY (sum(d) - avg(a) - COALESCE(array_upper(ARRAY[max(a)],1) * 5, 0)) DESC LIMIT 3) AS sub_3
WHERE sub_2.a < sub_1.b::integer
ORDER BY 3 DESC, 2 DESC, 1 DESC
LIMIT 100;
QUERY PLAN
---------------------------------------------------------------------
Limit
-> Sort
Sort Key: sub_3.avg DESC, full_correlated_1.a DESC, full_correlated.b DESC
-> Nested Loop
-> Nested Loop
Join Filter: (full_correlated_1.a < (full_correlated.b)::integer)
-> Limit
-> Sort
Sort Key: full_correlated.b DESC
-> HashAggregate
Group Key: full_correlated.b
-> Custom Scan (ColumnarScan) on full_correlated
Filter: (a > 2)
Columnar Projected Columns: a, b
-> Materialize
-> Limit
-> GroupAggregate
Group Key: full_correlated_1.a
Filter: (count(DISTINCT full_correlated_1.a) >= 1)
-> Index Scan Backward using full_correlated_btree on full_correlated full_correlated_1
Index Cond: (a > 10)
-> Materialize
-> Subquery Scan on sub_3
-> Limit
-> Sort
Sort Key: ((((sum(full_correlated_2.d))::numeric - avg(full_correlated_2.a)) - (COALESCE((array_upper(ARRAY[max(full_correlated_2.a)], 1) * 5), 0))::numeric)) DESC
-> GroupAggregate
Group Key: full_correlated_2.a
Filter: (sum(full_correlated_2.a) > 10)
-> Index Scan using full_correlated_btree on full_correlated full_correlated_2
Index Cond: (a > 2)
(31 rows)
DROP INDEX full_correlated_btree;
CREATE INDEX full_correlated_hash ON full_correlated USING hash(a);
ANALYZE full_correlated;
SELECT columnar_test_helpers.uses_custom_scan (
$$
SELECT a FROM full_correlated WHERE a<10;
$$
);
uses_custom_scan
---------------------------------------------------------------------
t
(1 row)
SELECT columnar_test_helpers.uses_custom_scan (
$$
SELECT a FROM full_correlated WHERE a>1 AND a<10;
$$
);
uses_custom_scan
---------------------------------------------------------------------
t
(1 row)
SELECT columnar_test_helpers.uses_custom_scan (
$$
SELECT a FROM full_correlated WHERE a=0 OR a=5;
$$
);
uses_custom_scan
---------------------------------------------------------------------
t
(1 row)
SELECT columnar_test_helpers.uses_index_scan (
$$
SELECT a FROM full_correlated WHERE a=1000;
$$
);
uses_index_scan
---------------------------------------------------------------------
t
(1 row)
SELECT columnar_test_helpers.uses_index_scan (
$$
SELECT a,c FROM full_correlated WHERE a=1000;
$$
);
uses_index_scan
---------------------------------------------------------------------
t
(1 row)
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';
SELECT columnar_test_helpers.uses_index_scan (
$$
SELECT a,c FROM full_correlated WHERE a=1000;
$$
);
uses_index_scan
---------------------------------------------------------------------
t
(1 row)
ROLLBACK;
CREATE TABLE full_anti_correlated (a int, b text) USING columnar;
INSERT INTO full_anti_correlated SELECT i, i::text FROM generate_series(1, 500000) i;
CREATE INDEX full_anti_correlated_hash ON full_anti_correlated USING hash(b);
ANALYZE full_anti_correlated;
SELECT columnar_test_helpers.uses_index_scan (
$$
SELECT a FROM full_anti_correlated WHERE b='600';
$$
);
uses_index_scan
---------------------------------------------------------------------
t
(1 row)
SELECT columnar_test_helpers.uses_index_scan (
$$
SELECT a,b FROM full_anti_correlated WHERE b='600';
$$
);
uses_index_scan
---------------------------------------------------------------------
t
(1 row)
SELECT columnar_test_helpers.uses_custom_scan (
$$
SELECT a,b FROM full_anti_correlated WHERE b='600' OR b='10';
$$
);
uses_custom_scan
---------------------------------------------------------------------
t
(1 row)
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';
SELECT columnar_test_helpers.uses_seq_scan (
$$
SELECT a,b FROM full_anti_correlated WHERE b='600' OR b='10';
$$
);
uses_seq_scan
---------------------------------------------------------------------
t
(1 row)
ROLLBACK;
DROP INDEX full_anti_correlated_hash;
CREATE INDEX full_anti_correlated_btree ON full_anti_correlated (a,b);
ANALYZE full_anti_correlated;
SELECT columnar_test_helpers.uses_index_scan (
$$
SELECT a FROM full_anti_correlated WHERE a>6500 AND a<7000 AND b<'10000';
$$
);
uses_index_scan
---------------------------------------------------------------------
t
(1 row)
SELECT columnar_test_helpers.uses_custom_scan (
$$
SELECT a FROM full_anti_correlated WHERE a>2000 AND a<7000;
$$
);
uses_custom_scan
---------------------------------------------------------------------
t
(1 row)
SELECT columnar_test_helpers.uses_custom_scan (
$$
SELECT a FROM full_anti_correlated WHERE a<7000 AND b<'10000';
$$
);
uses_custom_scan
---------------------------------------------------------------------
t
(1 row)
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';
SELECT columnar_test_helpers.uses_seq_scan (
$$
SELECT a FROM full_anti_correlated WHERE a<7000 AND b<'10000';
$$
);
uses_seq_scan
---------------------------------------------------------------------
t
(1 row)
ROLLBACK;
CREATE TABLE no_correlation (a int, b text) USING columnar;
INSERT INTO no_correlation SELECT random()*5000, (random()*5000)::int::text FROM generate_series(1, 500000) i;
CREATE INDEX no_correlation_btree ON no_correlation (a);
ANALYZE no_correlation;
SELECT columnar_test_helpers.uses_custom_scan (
$$
SELECT a FROM no_correlation WHERE a < 2;
$$
);
uses_custom_scan
---------------------------------------------------------------------
t
(1 row)
SELECT columnar_test_helpers.uses_custom_scan (
$$
SELECT a FROM no_correlation WHERE a = 200;
$$
);
uses_custom_scan
---------------------------------------------------------------------
t
(1 row)
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';
SELECT columnar_test_helpers.uses_seq_scan (
$$
SELECT a FROM no_correlation WHERE a = 200;
$$
);
uses_seq_scan
---------------------------------------------------------------------
t
(1 row)
ROLLBACK;
SET columnar.enable_qual_pushdown TO DEFAULT;
BEGIN;
SET LOCAL columnar.stripe_row_limit = 2000;
SET LOCAL columnar.chunk_group_row_limit = 1000;
CREATE TABLE correlated(x int) using columnar;
INSERT INTO correlated
SELECT g FROM generate_series(1,100000) g;
CREATE TABLE uncorrelated(x int) using columnar;
INSERT INTO uncorrelated
SELECT (g * 19) % 100000 FROM generate_series(1,100000) g;
COMMIT;
CREATE INDEX correlated_idx ON correlated(x);
CREATE INDEX uncorrelated_idx ON uncorrelated(x);
ANALYZE correlated, uncorrelated;
-- should choose chunk group filtering; selective and correlated
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT * FROM correlated WHERE x = 78910;
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (ColumnarScan) on correlated (actual rows=1 loops=1)
Filter: (x = 78910)
Rows Removed by Filter: 999
Columnar Projected Columns: x
Columnar Chunk Group Filters: (x = 78910)
Columnar Chunk Groups Removed by Filter: 99
(6 rows)
SELECT * FROM correlated WHERE x = 78910;
x
---------------------------------------------------------------------
78910
(1 row)
-- should choose index scan; selective but uncorrelated
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT * FROM uncorrelated WHERE x = 78910;
QUERY PLAN
---------------------------------------------------------------------
Index Scan using uncorrelated_idx on uncorrelated (actual rows=1 loops=1)
Index Cond: (x = 78910)
(2 rows)
SELECT * FROM uncorrelated WHERE x = 78910;
x
---------------------------------------------------------------------
78910
(1 row)
SET client_min_messages TO WARNING;
DROP SCHEMA columnar_paths CASCADE;

View File

@ -585,3 +585,23 @@ BEGIN
RETURN NEXT;
END LOOP;
END; $$ language plpgsql;
-- This function formats EXPLAIN output to conform to how pg <= 16 EXPLAIN
-- shows ANY <subquery> in an expression the pg version >= 17. When 17 is
-- the minimum supported pgversion this function can be retired. The commit
-- that changed how ANY <subquery> exrpressions appear in EXPLAIN is:
-- https://git.postgresql.org/gitweb/?p=postgresql.git;a=commitdiff;h=fd0398fcb
CREATE OR REPLACE FUNCTION explain_with_pg16_subplan_format(explain_command text, out query_plan text)
RETURNS SETOF TEXT AS $$
DECLARE
pgversion int = 0;
BEGIN
pgversion = substring(version(), '\d+')::int ;
FOR query_plan IN execute explain_command LOOP
IF pgversion >= 17 THEN
IF query_plan ~ 'SubPlan \d+\).col' THEN
query_plan = regexp_replace(query_plan, '\(ANY \(\w+ = \(SubPlan (\d+)\).col1\)\)', '(SubPlan \1)', 'g');
END IF;
END IF;
RETURN NEXT;
END LOOP;
END; $$ language plpgsql;

View File

@ -415,6 +415,7 @@ SELECT sum(a) FROM pushdown_test where (a > random() and a <= 2000) or (a > 2000
SELECT sum(a) FROM pushdown_test where (a > random() and a <= 2000) or (a > 200000-1010);
SET hash_mem_multiplier = 1.0;
SELECT public.explain_with_pg16_subplan_format($Q$
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT sum(a) FROM pushdown_test where
(
@ -427,6 +428,7 @@ SELECT sum(a) FROM pushdown_test where
)
or
(a > 200000-2010);
$Q$) as "QUERY PLAN";
RESET hash_mem_multiplier;
SELECT sum(a) FROM pushdown_test where
(

View File

@ -1,6 +1,12 @@
CREATE SCHEMA columnar_paths;
SET search_path TO columnar_paths;
-- columnar_paths has an alternative test output file because PG17 improved
-- the optimizer's ability to use statistics to estimate the size of a CTE
-- scan.
-- The relevant PG commit is:
-- https://github.com/postgres/postgres/commit/f7816aec23eed1dc1da5f9a53cb6507d30b7f0a2
CREATE TABLE full_correlated (a int, b text, c int, d int) USING columnar;
INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000000) i;
CREATE INDEX full_correlated_btree ON full_correlated (a);

View File

@ -611,3 +611,24 @@ BEGIN
RETURN NEXT;
END LOOP;
END; $$ language plpgsql;
-- This function formats EXPLAIN output to conform to how pg <= 16 EXPLAIN
-- shows ANY <subquery> in an expression the pg version >= 17. When 17 is
-- the minimum supported pgversion this function can be retired. The commit
-- that changed how ANY <subquery> exrpressions appear in EXPLAIN is:
-- https://git.postgresql.org/gitweb/?p=postgresql.git;a=commitdiff;h=fd0398fcb
CREATE OR REPLACE FUNCTION explain_with_pg16_subplan_format(explain_command text, out query_plan text)
RETURNS SETOF TEXT AS $$
DECLARE
pgversion int = 0;
BEGIN
pgversion = substring(version(), '\d+')::int ;
FOR query_plan IN execute explain_command LOOP
IF pgversion >= 17 THEN
IF query_plan ~ 'SubPlan \d+\).col' THEN
query_plan = regexp_replace(query_plan, '\(ANY \(\w+ = \(SubPlan (\d+)\).col1\)\)', '(SubPlan \1)', 'g');
END IF;
END IF;
RETURN NEXT;
END LOOP;
END; $$ language plpgsql;