diff --git a/src/backend/columnar/columnar_customscan.c b/src/backend/columnar/columnar_customscan.c index fe27cd15e..5d033bc7e 100644 --- a/src/backend/columnar/columnar_customscan.c +++ b/src/backend/columnar/columnar_customscan.c @@ -62,6 +62,8 @@ static void ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index RangeTblEntry *rte); static void RemovePathsByPredicate(RelOptInfo *rel, PathPredicate removePathPredicate); static bool IsNotIndexPath(Path *path); +static Path * CreateColumnarSeqScanPath(PlannerInfo *root, RelOptInfo *rel, + Oid relationId); static void RecostColumnarPaths(PlannerInfo *root, RelOptInfo *rel, Oid relationId); static void RecostColumnarIndexPath(PlannerInfo *root, RelOptInfo *rel, Oid relationId, IndexPath *indexPath); @@ -69,6 +71,7 @@ static Cost ColumnarIndexScanAddStartupCost(RelOptInfo *rel, Oid relationId, IndexPath *indexPath); static Cost ColumnarIndexScanAddTotalCost(PlannerInfo *root, RelOptInfo *rel, Oid relationId, IndexPath *indexPath); +static void RecostColumnarSeqPath(RelOptInfo *rel, Oid relationId, Path *path); static int RelationIdGetNumberOfAttributes(Oid relationId); static Path * CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte); @@ -191,6 +194,22 @@ ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti, /* columnar doesn't support parallel paths */ rel->partial_pathlist = NIL; + /* + * There are cases where IndexPath is normally more preferrable over + * SeqPath for heapAM but not for columnarAM. In such cases, an + * IndexPath could wrongly dominate a SeqPath based on the costs + * estimated by postgres earlier. For this reason, here we manually + * create a SeqPath, estimate the cost based on columnarAM and append + * to pathlist. + * + * Before doing that, we first re-cost all the existing paths so that + * add_path makes correct cost comparisons when appending our SeqPath. + */ + RecostColumnarPaths(root, rel, rte->relid); + + Path *seqPath = CreateColumnarSeqScanPath(root, rel, rte->relid); + add_path(rel, seqPath); + if (EnableColumnarCustomScan) { Path *customPath = CreateColumnarScanPath(root, rel, rte); @@ -211,7 +230,6 @@ ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti, * SeqPath thinking that its cost would be equal to ColumnarCustomScan. */ RemovePathsByPredicate(rel, IsNotIndexPath); - RecostColumnarPaths(root, rel, rte->relid); add_path(rel, customPath); } } @@ -251,6 +269,23 @@ IsNotIndexPath(Path *path) } +/* + * CreateColumnarSeqScanPath returns Path for sequential scan on columnar + * table with relationId. + */ +static Path * +CreateColumnarSeqScanPath(PlannerInfo *root, RelOptInfo *rel, Oid relationId) +{ + /* columnar doesn't support parallel scan */ + int parallelWorkers = 0; + + Relids requiredOuter = rel->lateral_relids; + Path *path = create_seqscan_path(root, rel, requiredOuter, parallelWorkers); + RecostColumnarSeqPath(rel, relationId, path); + return path; +} + + /* * RecostColumnarPaths re-costs paths of given RelOptInfo for * columnar table with relationId. @@ -272,6 +307,10 @@ RecostColumnarPaths(PlannerInfo *root, RelOptInfo *rel, Oid relationId) */ RecostColumnarIndexPath(root, rel, relationId, (IndexPath *) path); } + else if (path->pathtype == T_SeqScan) + { + RecostColumnarSeqPath(rel, relationId, path); + } } } @@ -421,6 +460,33 @@ ColumnarIndexScanAddTotalCost(PlannerInfo *root, RelOptInfo *rel, } +/* + * RecostColumnarSeqPath re-costs given seq path for columnar table with + * relationId. + */ +static void +RecostColumnarSeqPath(RelOptInfo *rel, Oid relationId, Path *path) +{ + if (!enable_seqscan) + { + /* costs are already set to disable_cost, don't adjust them */ + return; + } + + path->startup_cost = 0; + + /* + * Seq scan doesn't support projection pushdown, so we will read all the + * columns. + * Also note that seq scan doesn't support chunk group filtering too but + * our costing model already doesn't consider chunk group filtering. + */ + int numberOfColumnsRead = RelationIdGetNumberOfAttributes(relationId); + path->total_cost = path->startup_cost + + ColumnarScanCost(rel, relationId, numberOfColumnsRead); +} + + /* * RelationIdGetNumberOfAttributes returns number of attributes that relation * with relationId has. diff --git a/src/test/regress/expected/columnar_partitioning.out b/src/test/regress/expected/columnar_partitioning.out index 85d3118e3..2800d26d5 100644 --- a/src/test/regress/expected/columnar_partitioning.out +++ b/src/test/regress/expected/columnar_partitioning.out @@ -105,9 +105,9 @@ EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent; Workers Planned: 4 -> Partial Aggregate -> Parallel Append + -> Seq Scan on p3 parent_4 -> Seq Scan on p0 parent_1 -> Seq Scan on p1 parent_2 - -> Seq Scan on p3 parent_4 -> Parallel Seq Scan on p2 parent_3 (9 rows) @@ -123,6 +123,45 @@ SET min_parallel_table_scan_size TO DEFAULT; SET parallel_tuple_cost TO DEFAULT; SET max_parallel_workers TO DEFAULT; SET max_parallel_workers_per_gather TO DEFAULT; +CREATE INDEX parent_btree ON parent (n); +ANALYZE parent; +-- will use columnar custom scan on columnar partitions but index +-- scan on heap partition +EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent +WHERE ts > '2020-02-20' AND n < 5; + QUERY PLAN +--------------------------------------------------------------------- + Aggregate + -> Append + -> Custom Scan (ColumnarScan) on p1 parent_1 + Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric)) + -> Index Scan using p2_n_idx on p2 parent_2 + Index Cond: (n < '5'::numeric) + Filter: (ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) + -> Custom Scan (ColumnarScan) on p3 parent_3 + Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric)) +(9 rows) + +BEGIN; + SET LOCAL columnar.enable_custom_scan TO 'OFF'; + -- now that we disabled columnar custom scan, will use seq scan on columnar + -- partitions since index scan is more expensive than seq scan too + EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent + WHERE ts > '2020-02-20' AND n < 5; + QUERY PLAN +--------------------------------------------------------------------- + Aggregate + -> Append + -> Seq Scan on p1 parent_1 + Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric)) + -> Index Scan using p2_n_idx on p2 parent_2 + Index Cond: (n < '5'::numeric) + Filter: (ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) + -> Seq Scan on p3 parent_3 + Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric)) +(9 rows) + +ROLLBACK; DROP TABLE parent; -- -- Test inheritance diff --git a/src/test/regress/expected/columnar_partitioning_1.out b/src/test/regress/expected/columnar_partitioning_1.out index 0bf50a0c2..a21b4ae36 100644 --- a/src/test/regress/expected/columnar_partitioning_1.out +++ b/src/test/regress/expected/columnar_partitioning_1.out @@ -105,9 +105,9 @@ EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent; Workers Planned: 4 -> Partial Aggregate -> Parallel Append + -> Seq Scan on p3 -> Seq Scan on p0 -> Seq Scan on p1 - -> Seq Scan on p3 -> Parallel Seq Scan on p2 (9 rows) @@ -123,6 +123,45 @@ SET min_parallel_table_scan_size TO DEFAULT; SET parallel_tuple_cost TO DEFAULT; SET max_parallel_workers TO DEFAULT; SET max_parallel_workers_per_gather TO DEFAULT; +CREATE INDEX parent_btree ON parent (n); +ANALYZE parent; +-- will use columnar custom scan on columnar partitions but index +-- scan on heap partition +EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent +WHERE ts > '2020-02-20' AND n < 5; + QUERY PLAN +--------------------------------------------------------------------- + Aggregate + -> Append + -> Custom Scan (ColumnarScan) on p1 + Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric)) + -> Index Scan using p2_n_idx on p2 + Index Cond: (n < '5'::numeric) + Filter: (ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) + -> Custom Scan (ColumnarScan) on p3 + Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric)) +(9 rows) + +BEGIN; + SET LOCAL columnar.enable_custom_scan TO 'OFF'; + -- now that we disabled columnar custom scan, will use seq scan on columnar + -- partitions since index scan is more expensive than seq scan too + EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent + WHERE ts > '2020-02-20' AND n < 5; + QUERY PLAN +--------------------------------------------------------------------- + Aggregate + -> Append + -> Seq Scan on p1 + Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric)) + -> Index Scan using p2_n_idx on p2 + Index Cond: (n < '5'::numeric) + Filter: (ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) + -> Seq Scan on p3 + Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric)) +(9 rows) + +ROLLBACK; DROP TABLE parent; -- -- Test inheritance diff --git a/src/test/regress/expected/columnar_paths.out b/src/test/regress/expected/columnar_paths.out index cf5a86a64..24e712d16 100644 --- a/src/test/regress/expected/columnar_paths.out +++ b/src/test/regress/expected/columnar_paths.out @@ -54,6 +54,19 @@ $$ t (1 row) +BEGIN; + SET LOCAL columnar.enable_custom_scan TO 'OFF'; + SELECT columnar_test_helpers.uses_seq_scan ( + $$ + SELECT a FROM full_correlated WHERE a>900000; + $$ + ); + uses_seq_scan +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; SELECT columnar_test_helpers.uses_index_scan ( $$ SELECT a FROM full_correlated WHERE a<1000; @@ -84,6 +97,19 @@ $$ t (1 row) +BEGIN; + SET LOCAL columnar.enable_custom_scan TO 'OFF'; + SELECT columnar_test_helpers.uses_index_scan ( + $$ + SELECT a FROM full_correlated WHERE a<9000; + $$ + ); + uses_index_scan +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; BEGIN; TRUNCATE full_correlated; INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000) i; @@ -99,6 +125,17 @@ BEGIN; t (1 row) + SET LOCAL columnar.enable_custom_scan TO 'OFF'; + SELECT columnar_test_helpers.uses_seq_scan ( + $$ + SELECT a FROM full_correlated WHERE a=200; + $$ + ); + uses_seq_scan +--------------------------------------------------------------------- + t +(1 row) + ROLLBACK; -- same filter used in above, but choosing multiple columns would increase -- custom scan cost, so we would prefer index scan this time @@ -112,6 +149,19 @@ $$ t (1 row) +BEGIN; + SET LOCAL columnar.enable_custom_scan TO 'OFF'; + SELECT columnar_test_helpers.uses_index_scan ( + $$ + SELECT a,b,c,d FROM full_correlated WHERE a<9000; + $$ + ); + uses_index_scan +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; -- again same filter used in above, but we would choose custom scan this -- time since it would read three less columns from disk SELECT columnar_test_helpers.uses_custom_scan ( @@ -124,6 +174,19 @@ $$ t (1 row) +BEGIN; + SET LOCAL columnar.enable_custom_scan TO 'OFF'; + SELECT columnar_test_helpers.uses_index_scan ( + $$ + SELECT c FROM full_correlated WHERE a<10000; + $$ + ); + uses_index_scan +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; SELECT columnar_test_helpers.uses_custom_scan ( $$ SELECT a FROM full_correlated WHERE a>200; @@ -144,6 +207,162 @@ $$ t (1 row) +BEGIN; + SET LOCAL columnar.enable_custom_scan TO 'OFF'; + SELECT columnar_test_helpers.uses_seq_scan ( + $$ + SELECT a FROM full_correlated WHERE a=0 OR a=5; + $$ + ); + uses_seq_scan +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +-- +-- some tests with joins / subqueries etc. +-- +CREATE TABLE heap_table (a int, b text, c int, d int); +INSERT INTO heap_table SELECT i, i::text, (i+1000)*7, (i+900)*5 FROM generate_series(1, 1000000) i; +CREATE INDEX heap_table_btree ON heap_table (a); +ANALYZE heap_table; +EXPLAIN (COSTS OFF) +WITH cte AS MATERIALIZED (SELECT d FROM full_correlated WHERE a > 1) +SELECT SUM(ht_1.a), MIN(ct_1.c) +FROM heap_table AS ht_1 +LEFT JOIN full_correlated AS ct_1 ON ht_1.a=ct_1.d +LEFT JOIN heap_table AS ht_2 ON ht_2.a=ct_1.c +JOIN cte ON cte.d=ht_1.a +WHERE ct_1.a < 3000; + QUERY PLAN +--------------------------------------------------------------------- + Aggregate + CTE cte + -> Custom Scan (ColumnarScan) on full_correlated + Filter: (a > 1) + -> Nested Loop Left Join + -> Hash Join + Hash Cond: (cte.d = ht_1.a) + -> CTE Scan on cte + -> Hash + -> Nested Loop + -> Index Scan using full_correlated_btree on full_correlated ct_1 + Index Cond: (a < 3000) + -> Index Only Scan using heap_table_btree on heap_table ht_1 + Index Cond: (a = ct_1.d) + -> Index Only Scan using heap_table_btree on heap_table ht_2 + Index Cond: (a = ct_1.c) +(16 rows) + +-- same query but columnar custom scan is disabled +BEGIN; + SET LOCAL columnar.enable_custom_scan TO 'OFF'; + EXPLAIN (COSTS OFF) + WITH cte AS MATERIALIZED (SELECT d FROM full_correlated WHERE a > 1) + SELECT SUM(ht_1.a), MIN(ct_1.c) + FROM heap_table AS ht_1 + LEFT JOIN full_correlated AS ct_1 ON ht_1.a=ct_1.d + LEFT JOIN heap_table AS ht_2 ON ht_2.a=ct_1.c + JOIN cte ON cte.d=ht_1.a + WHERE ct_1.a < 3000; + QUERY PLAN +--------------------------------------------------------------------- + Aggregate + CTE cte + -> Seq Scan on full_correlated + Filter: (a > 1) + -> Nested Loop Left Join + -> Hash Join + Hash Cond: (cte.d = ht_1.a) + -> CTE Scan on cte + -> Hash + -> Nested Loop + -> Index Scan using full_correlated_btree on full_correlated ct_1 + Index Cond: (a < 3000) + -> Index Only Scan using heap_table_btree on heap_table ht_1 + Index Cond: (a = ct_1.d) + -> Index Only Scan using heap_table_btree on heap_table ht_2 + Index Cond: (a = ct_1.c) +(16 rows) + +ROLLBACK; +-- use custom scan +EXPLAIN (COSTS OFF) WITH w AS (SELECT * FROM full_correlated) +SELECT * FROM w AS w1 JOIN w AS w2 ON w1.a = w2.d +WHERE w2.a = 123; + QUERY PLAN +--------------------------------------------------------------------- + Merge Join + Merge Cond: (w2.d = w1.a) + CTE w + -> Custom Scan (ColumnarScan) on full_correlated + -> Sort + Sort Key: w2.d + -> CTE Scan on w w2 + Filter: (a = 123) + -> Materialize + -> Sort + Sort Key: w1.a + -> CTE Scan on w w1 +(12 rows) + +-- use index +EXPLAIN (COSTS OFF) WITH w AS NOT MATERIALIZED (SELECT * FROM full_correlated) +SELECT * FROM w AS w1 JOIN w AS w2 ON w1.a = w2.d +WHERE w2.a = 123; + QUERY PLAN +--------------------------------------------------------------------- + Nested Loop + -> Index Scan using full_correlated_btree on full_correlated full_correlated_1 + Index Cond: (a = 123) + -> Index Scan using full_correlated_btree on full_correlated + Index Cond: (a = full_correlated_1.d) +(5 rows) + +EXPLAIN (COSTS OFF) SELECT sub_1.b, sub_2.a, sub_3.avg +FROM + (SELECT b FROM full_correlated WHERE (a > 2) GROUP BY b HAVING count(DISTINCT a) > 0 ORDER BY 1 DESC LIMIT 5) AS sub_1, + (SELECT a FROM full_correlated WHERE (a > 10) GROUP BY a HAVING count(DISTINCT a) >= 1 ORDER BY 1 DESC LIMIT 3) AS sub_2, + (SELECT avg(a) AS AVG FROM full_correlated WHERE (a > 2) GROUP BY a HAVING sum(a) > 10 ORDER BY (sum(d) - avg(a) - COALESCE(array_upper(ARRAY[max(a)],1) * 5, 0)) DESC LIMIT 3) AS sub_3 +WHERE sub_2.a < sub_1.b::integer +ORDER BY 3 DESC, 2 DESC, 1 DESC +LIMIT 100; + QUERY PLAN +--------------------------------------------------------------------- + Limit + -> Sort + Sort Key: sub_3.avg DESC, full_correlated_1.a DESC, full_correlated.b DESC + -> Nested Loop + -> Nested Loop + Join Filter: (full_correlated_1.a < (full_correlated.b)::integer) + -> Limit + -> GroupAggregate + Group Key: full_correlated.b + Filter: (count(DISTINCT full_correlated.a) > 0) + -> Sort + Sort Key: full_correlated.b DESC + -> Custom Scan (ColumnarScan) on full_correlated + Filter: (a > 2) + -> Materialize + -> Limit + -> GroupAggregate + Group Key: full_correlated_1.a + Filter: (count(DISTINCT full_correlated_1.a) >= 1) + -> Index Only Scan Backward using full_correlated_btree on full_correlated full_correlated_1 + Index Cond: (a > 10) + -> Materialize + -> Subquery Scan on sub_3 + -> Limit + -> Sort + Sort Key: ((((sum(full_correlated_2.d))::numeric - avg(full_correlated_2.a)) - (COALESCE((array_upper(ARRAY[max(full_correlated_2.a)], 1) * 5), 0))::numeric)) DESC + -> GroupAggregate + Group Key: full_correlated_2.a + Filter: (sum(full_correlated_2.a) > 10) + -> Index Scan using full_correlated_btree on full_correlated full_correlated_2 + Index Cond: (a > 2) +(31 rows) + DROP INDEX full_correlated_btree; CREATE INDEX full_correlated_hash ON full_correlated USING hash(a); ANALYZE full_correlated; @@ -197,6 +416,19 @@ $$ t (1 row) +BEGIN; + SET LOCAL columnar.enable_custom_scan TO 'OFF'; + SELECT columnar_test_helpers.uses_index_scan ( + $$ + SELECT a,c FROM full_correlated WHERE a=1000; + $$ + ); + uses_index_scan +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; CREATE TABLE full_anti_correlated (a int, b text) USING columnar; INSERT INTO full_anti_correlated SELECT i, i::text FROM generate_series(1, 500000) i; CREATE INDEX full_anti_correlated_hash ON full_anti_correlated USING hash(b); @@ -231,6 +463,19 @@ $$ t (1 row) +BEGIN; + SET LOCAL columnar.enable_custom_scan TO 'OFF'; + SELECT columnar_test_helpers.uses_seq_scan ( + $$ + SELECT a,b FROM full_anti_correlated WHERE b='600' OR b='10'; + $$ + ); + uses_seq_scan +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; DROP INDEX full_anti_correlated_hash; CREATE INDEX full_anti_correlated_btree ON full_anti_correlated (a,b); ANALYZE full_anti_correlated; @@ -274,6 +519,19 @@ $$ t (1 row) +BEGIN; + SET LOCAL columnar.enable_custom_scan TO 'OFF'; + SELECT columnar_test_helpers.uses_seq_scan ( + $$ + SELECT a FROM full_anti_correlated WHERE a<7000 AND b<'10000'; + $$ + ); + uses_seq_scan +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; CREATE TABLE no_correlation (a int, b text) USING columnar; INSERT INTO no_correlation SELECT random()*5000, (random()*5000)::int::text FROM generate_series(1, 500000) i; CREATE INDEX no_correlation_btree ON no_correlation (a); @@ -298,5 +556,18 @@ $$ t (1 row) +BEGIN; + SET LOCAL columnar.enable_custom_scan TO 'OFF'; + SELECT columnar_test_helpers.uses_seq_scan ( + $$ + SELECT a FROM no_correlation WHERE a = 200; + $$ + ); + uses_seq_scan +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; SET client_min_messages TO WARNING; DROP SCHEMA columnar_paths CASCADE; diff --git a/src/test/regress/expected/columnar_test_helpers.out b/src/test/regress/expected/columnar_test_helpers.out index 6b1421de8..a3a8c541c 100644 --- a/src/test/regress/expected/columnar_test_helpers.out +++ b/src/test/regress/expected/columnar_test_helpers.out @@ -104,3 +104,16 @@ BEGIN END LOOP; RETURN false; END; $$ language plpgsql; +CREATE OR REPLACE FUNCTION uses_seq_scan(command text) +RETURNS BOOLEAN AS $$ +DECLARE + query_plan text; +BEGIN + FOR query_plan IN EXECUTE 'EXPLAIN' || command LOOP + IF query_plan ILIKE '%Seq Scan on %' + THEN + RETURN true; + END IF; + END LOOP; + RETURN false; +END; $$ language plpgsql; diff --git a/src/test/regress/sql/columnar_partitioning.sql b/src/test/regress/sql/columnar_partitioning.sql index 2ee7db453..e76589857 100644 --- a/src/test/regress/sql/columnar_partitioning.sql +++ b/src/test/regress/sql/columnar_partitioning.sql @@ -52,6 +52,23 @@ SET parallel_tuple_cost TO DEFAULT; SET max_parallel_workers TO DEFAULT; SET max_parallel_workers_per_gather TO DEFAULT; +CREATE INDEX parent_btree ON parent (n); +ANALYZE parent; + +-- will use columnar custom scan on columnar partitions but index +-- scan on heap partition +EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent +WHERE ts > '2020-02-20' AND n < 5; + +BEGIN; + SET LOCAL columnar.enable_custom_scan TO 'OFF'; + + -- now that we disabled columnar custom scan, will use seq scan on columnar + -- partitions since index scan is more expensive than seq scan too + EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent + WHERE ts > '2020-02-20' AND n < 5; +ROLLBACK; + DROP TABLE parent; -- diff --git a/src/test/regress/sql/columnar_paths.sql b/src/test/regress/sql/columnar_paths.sql index be128ba9b..e851f91f3 100644 --- a/src/test/regress/sql/columnar_paths.sql +++ b/src/test/regress/sql/columnar_paths.sql @@ -36,6 +36,15 @@ SELECT a FROM full_correlated WHERE a>900000; $$ ); +BEGIN; + SET LOCAL columnar.enable_custom_scan TO 'OFF'; + SELECT columnar_test_helpers.uses_seq_scan ( + $$ + SELECT a FROM full_correlated WHERE a>900000; + $$ + ); +ROLLBACK; + SELECT columnar_test_helpers.uses_index_scan ( $$ SELECT a FROM full_correlated WHERE a<1000; @@ -54,6 +63,15 @@ SELECT a FROM full_correlated WHERE a<9000; $$ ); +BEGIN; + SET LOCAL columnar.enable_custom_scan TO 'OFF'; + SELECT columnar_test_helpers.uses_index_scan ( + $$ + SELECT a FROM full_correlated WHERE a<9000; + $$ + ); +ROLLBACK; + BEGIN; TRUNCATE full_correlated; INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000) i; @@ -65,6 +83,13 @@ BEGIN; SELECT a FROM full_correlated WHERE a=200; $$ ); + + SET LOCAL columnar.enable_custom_scan TO 'OFF'; + SELECT columnar_test_helpers.uses_seq_scan ( + $$ + SELECT a FROM full_correlated WHERE a=200; + $$ + ); ROLLBACK; -- same filter used in above, but choosing multiple columns would increase @@ -75,6 +100,15 @@ SELECT a,b,c,d FROM full_correlated WHERE a<9000; $$ ); +BEGIN; + SET LOCAL columnar.enable_custom_scan TO 'OFF'; + SELECT columnar_test_helpers.uses_index_scan ( + $$ + SELECT a,b,c,d FROM full_correlated WHERE a<9000; + $$ + ); +ROLLBACK; + -- again same filter used in above, but we would choose custom scan this -- time since it would read three less columns from disk SELECT columnar_test_helpers.uses_custom_scan ( @@ -83,6 +117,15 @@ SELECT c FROM full_correlated WHERE a<10000; $$ ); +BEGIN; + SET LOCAL columnar.enable_custom_scan TO 'OFF'; + SELECT columnar_test_helpers.uses_index_scan ( + $$ + SELECT c FROM full_correlated WHERE a<10000; + $$ + ); +ROLLBACK; + SELECT columnar_test_helpers.uses_custom_scan ( $$ SELECT a FROM full_correlated WHERE a>200; @@ -95,6 +138,65 @@ SELECT a FROM full_correlated WHERE a=0 OR a=5; $$ ); +BEGIN; + SET LOCAL columnar.enable_custom_scan TO 'OFF'; + SELECT columnar_test_helpers.uses_seq_scan ( + $$ + SELECT a FROM full_correlated WHERE a=0 OR a=5; + $$ + ); +ROLLBACK; + +-- +-- some tests with joins / subqueries etc. +-- +CREATE TABLE heap_table (a int, b text, c int, d int); +INSERT INTO heap_table SELECT i, i::text, (i+1000)*7, (i+900)*5 FROM generate_series(1, 1000000) i; +CREATE INDEX heap_table_btree ON heap_table (a); +ANALYZE heap_table; + +EXPLAIN (COSTS OFF) +WITH cte AS MATERIALIZED (SELECT d FROM full_correlated WHERE a > 1) +SELECT SUM(ht_1.a), MIN(ct_1.c) +FROM heap_table AS ht_1 +LEFT JOIN full_correlated AS ct_1 ON ht_1.a=ct_1.d +LEFT JOIN heap_table AS ht_2 ON ht_2.a=ct_1.c +JOIN cte ON cte.d=ht_1.a +WHERE ct_1.a < 3000; + +-- same query but columnar custom scan is disabled +BEGIN; + SET LOCAL columnar.enable_custom_scan TO 'OFF'; + + EXPLAIN (COSTS OFF) + WITH cte AS MATERIALIZED (SELECT d FROM full_correlated WHERE a > 1) + SELECT SUM(ht_1.a), MIN(ct_1.c) + FROM heap_table AS ht_1 + LEFT JOIN full_correlated AS ct_1 ON ht_1.a=ct_1.d + LEFT JOIN heap_table AS ht_2 ON ht_2.a=ct_1.c + JOIN cte ON cte.d=ht_1.a + WHERE ct_1.a < 3000; +ROLLBACK; + +-- use custom scan +EXPLAIN (COSTS OFF) WITH w AS (SELECT * FROM full_correlated) +SELECT * FROM w AS w1 JOIN w AS w2 ON w1.a = w2.d +WHERE w2.a = 123; + +-- use index +EXPLAIN (COSTS OFF) WITH w AS NOT MATERIALIZED (SELECT * FROM full_correlated) +SELECT * FROM w AS w1 JOIN w AS w2 ON w1.a = w2.d +WHERE w2.a = 123; + +EXPLAIN (COSTS OFF) SELECT sub_1.b, sub_2.a, sub_3.avg +FROM + (SELECT b FROM full_correlated WHERE (a > 2) GROUP BY b HAVING count(DISTINCT a) > 0 ORDER BY 1 DESC LIMIT 5) AS sub_1, + (SELECT a FROM full_correlated WHERE (a > 10) GROUP BY a HAVING count(DISTINCT a) >= 1 ORDER BY 1 DESC LIMIT 3) AS sub_2, + (SELECT avg(a) AS AVG FROM full_correlated WHERE (a > 2) GROUP BY a HAVING sum(a) > 10 ORDER BY (sum(d) - avg(a) - COALESCE(array_upper(ARRAY[max(a)],1) * 5, 0)) DESC LIMIT 3) AS sub_3 +WHERE sub_2.a < sub_1.b::integer +ORDER BY 3 DESC, 2 DESC, 1 DESC +LIMIT 100; + DROP INDEX full_correlated_btree; CREATE INDEX full_correlated_hash ON full_correlated USING hash(a); @@ -130,6 +232,15 @@ SELECT a,c FROM full_correlated WHERE a=1000; $$ ); +BEGIN; + SET LOCAL columnar.enable_custom_scan TO 'OFF'; + SELECT columnar_test_helpers.uses_index_scan ( + $$ + SELECT a,c FROM full_correlated WHERE a=1000; + $$ + ); +ROLLBACK; + CREATE TABLE full_anti_correlated (a int, b text) USING columnar; INSERT INTO full_anti_correlated SELECT i, i::text FROM generate_series(1, 500000) i; CREATE INDEX full_anti_correlated_hash ON full_anti_correlated USING hash(b); @@ -153,6 +264,15 @@ SELECT a,b FROM full_anti_correlated WHERE b='600' OR b='10'; $$ ); +BEGIN; + SET LOCAL columnar.enable_custom_scan TO 'OFF'; + SELECT columnar_test_helpers.uses_seq_scan ( + $$ + SELECT a,b FROM full_anti_correlated WHERE b='600' OR b='10'; + $$ + ); +ROLLBACK; + DROP INDEX full_anti_correlated_hash; CREATE INDEX full_anti_correlated_btree ON full_anti_correlated (a,b); @@ -182,6 +302,15 @@ SELECT a FROM full_anti_correlated WHERE a<7000 AND b<'10000'; $$ ); +BEGIN; + SET LOCAL columnar.enable_custom_scan TO 'OFF'; + SELECT columnar_test_helpers.uses_seq_scan ( + $$ + SELECT a FROM full_anti_correlated WHERE a<7000 AND b<'10000'; + $$ + ); +ROLLBACK; + CREATE TABLE no_correlation (a int, b text) USING columnar; INSERT INTO no_correlation SELECT random()*5000, (random()*5000)::int::text FROM generate_series(1, 500000) i; CREATE INDEX no_correlation_btree ON no_correlation (a); @@ -199,5 +328,14 @@ SELECT a FROM no_correlation WHERE a = 200; $$ ); +BEGIN; + SET LOCAL columnar.enable_custom_scan TO 'OFF'; + SELECT columnar_test_helpers.uses_seq_scan ( + $$ + SELECT a FROM no_correlation WHERE a = 200; + $$ + ); +ROLLBACK; + SET client_min_messages TO WARNING; DROP SCHEMA columnar_paths CASCADE; diff --git a/src/test/regress/sql/columnar_test_helpers.sql b/src/test/regress/sql/columnar_test_helpers.sql index 07b6230ab..772a58690 100644 --- a/src/test/regress/sql/columnar_test_helpers.sql +++ b/src/test/regress/sql/columnar_test_helpers.sql @@ -113,3 +113,17 @@ BEGIN END LOOP; RETURN false; END; $$ language plpgsql; + +CREATE OR REPLACE FUNCTION uses_seq_scan(command text) +RETURNS BOOLEAN AS $$ +DECLARE + query_plan text; +BEGIN + FOR query_plan IN EXECUTE 'EXPLAIN' || command LOOP + IF query_plan ILIKE '%Seq Scan on %' + THEN + RETURN true; + END IF; + END LOOP; + RETURN false; +END; $$ language plpgsql;