Re-cost SeqPath's as well for columnar tables

pull/5140/head
Onur Tirtir 2021-07-26 14:54:51 +03:00
parent 453ac40725
commit 93ebbb0607
8 changed files with 600 additions and 3 deletions

View File

@ -62,6 +62,8 @@ static void ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index
RangeTblEntry *rte);
static void RemovePathsByPredicate(RelOptInfo *rel, PathPredicate removePathPredicate);
static bool IsNotIndexPath(Path *path);
static Path * CreateColumnarSeqScanPath(PlannerInfo *root, RelOptInfo *rel,
Oid relationId);
static void RecostColumnarPaths(PlannerInfo *root, RelOptInfo *rel, Oid relationId);
static void RecostColumnarIndexPath(PlannerInfo *root, RelOptInfo *rel, Oid relationId,
IndexPath *indexPath);
@ -69,6 +71,7 @@ static Cost ColumnarIndexScanAddStartupCost(RelOptInfo *rel, Oid relationId,
IndexPath *indexPath);
static Cost ColumnarIndexScanAddTotalCost(PlannerInfo *root, RelOptInfo *rel,
Oid relationId, IndexPath *indexPath);
static void RecostColumnarSeqPath(RelOptInfo *rel, Oid relationId, Path *path);
static int RelationIdGetNumberOfAttributes(Oid relationId);
static Path * CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
@ -191,6 +194,22 @@ ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti,
/* columnar doesn't support parallel paths */
rel->partial_pathlist = NIL;
/*
* There are cases where IndexPath is normally more preferrable over
* SeqPath for heapAM but not for columnarAM. In such cases, an
* IndexPath could wrongly dominate a SeqPath based on the costs
* estimated by postgres earlier. For this reason, here we manually
* create a SeqPath, estimate the cost based on columnarAM and append
* to pathlist.
*
* Before doing that, we first re-cost all the existing paths so that
* add_path makes correct cost comparisons when appending our SeqPath.
*/
RecostColumnarPaths(root, rel, rte->relid);
Path *seqPath = CreateColumnarSeqScanPath(root, rel, rte->relid);
add_path(rel, seqPath);
if (EnableColumnarCustomScan)
{
Path *customPath = CreateColumnarScanPath(root, rel, rte);
@ -211,7 +230,6 @@ ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti,
* SeqPath thinking that its cost would be equal to ColumnarCustomScan.
*/
RemovePathsByPredicate(rel, IsNotIndexPath);
RecostColumnarPaths(root, rel, rte->relid);
add_path(rel, customPath);
}
}
@ -251,6 +269,23 @@ IsNotIndexPath(Path *path)
}
/*
* CreateColumnarSeqScanPath returns Path for sequential scan on columnar
* table with relationId.
*/
static Path *
CreateColumnarSeqScanPath(PlannerInfo *root, RelOptInfo *rel, Oid relationId)
{
/* columnar doesn't support parallel scan */
int parallelWorkers = 0;
Relids requiredOuter = rel->lateral_relids;
Path *path = create_seqscan_path(root, rel, requiredOuter, parallelWorkers);
RecostColumnarSeqPath(rel, relationId, path);
return path;
}
/*
* RecostColumnarPaths re-costs paths of given RelOptInfo for
* columnar table with relationId.
@ -272,6 +307,10 @@ RecostColumnarPaths(PlannerInfo *root, RelOptInfo *rel, Oid relationId)
*/
RecostColumnarIndexPath(root, rel, relationId, (IndexPath *) path);
}
else if (path->pathtype == T_SeqScan)
{
RecostColumnarSeqPath(rel, relationId, path);
}
}
}
@ -421,6 +460,33 @@ ColumnarIndexScanAddTotalCost(PlannerInfo *root, RelOptInfo *rel,
}
/*
* RecostColumnarSeqPath re-costs given seq path for columnar table with
* relationId.
*/
static void
RecostColumnarSeqPath(RelOptInfo *rel, Oid relationId, Path *path)
{
if (!enable_seqscan)
{
/* costs are already set to disable_cost, don't adjust them */
return;
}
path->startup_cost = 0;
/*
* Seq scan doesn't support projection pushdown, so we will read all the
* columns.
* Also note that seq scan doesn't support chunk group filtering too but
* our costing model already doesn't consider chunk group filtering.
*/
int numberOfColumnsRead = RelationIdGetNumberOfAttributes(relationId);
path->total_cost = path->startup_cost +
ColumnarScanCost(rel, relationId, numberOfColumnsRead);
}
/*
* RelationIdGetNumberOfAttributes returns number of attributes that relation
* with relationId has.

View File

@ -105,9 +105,9 @@ EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent;
Workers Planned: 4
-> Partial Aggregate
-> Parallel Append
-> Seq Scan on p3 parent_4
-> Seq Scan on p0 parent_1
-> Seq Scan on p1 parent_2
-> Seq Scan on p3 parent_4
-> Parallel Seq Scan on p2 parent_3
(9 rows)
@ -123,6 +123,45 @@ SET min_parallel_table_scan_size TO DEFAULT;
SET parallel_tuple_cost TO DEFAULT;
SET max_parallel_workers TO DEFAULT;
SET max_parallel_workers_per_gather TO DEFAULT;
CREATE INDEX parent_btree ON parent (n);
ANALYZE parent;
-- will use columnar custom scan on columnar partitions but index
-- scan on heap partition
EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent
WHERE ts > '2020-02-20' AND n < 5;
QUERY PLAN
---------------------------------------------------------------------
Aggregate
-> Append
-> Custom Scan (ColumnarScan) on p1 parent_1
Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
-> Index Scan using p2_n_idx on p2 parent_2
Index Cond: (n < '5'::numeric)
Filter: (ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone)
-> Custom Scan (ColumnarScan) on p3 parent_3
Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
(9 rows)
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';
-- now that we disabled columnar custom scan, will use seq scan on columnar
-- partitions since index scan is more expensive than seq scan too
EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent
WHERE ts > '2020-02-20' AND n < 5;
QUERY PLAN
---------------------------------------------------------------------
Aggregate
-> Append
-> Seq Scan on p1 parent_1
Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
-> Index Scan using p2_n_idx on p2 parent_2
Index Cond: (n < '5'::numeric)
Filter: (ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone)
-> Seq Scan on p3 parent_3
Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
(9 rows)
ROLLBACK;
DROP TABLE parent;
--
-- Test inheritance

View File

@ -105,9 +105,9 @@ EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent;
Workers Planned: 4
-> Partial Aggregate
-> Parallel Append
-> Seq Scan on p3
-> Seq Scan on p0
-> Seq Scan on p1
-> Seq Scan on p3
-> Parallel Seq Scan on p2
(9 rows)
@ -123,6 +123,45 @@ SET min_parallel_table_scan_size TO DEFAULT;
SET parallel_tuple_cost TO DEFAULT;
SET max_parallel_workers TO DEFAULT;
SET max_parallel_workers_per_gather TO DEFAULT;
CREATE INDEX parent_btree ON parent (n);
ANALYZE parent;
-- will use columnar custom scan on columnar partitions but index
-- scan on heap partition
EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent
WHERE ts > '2020-02-20' AND n < 5;
QUERY PLAN
---------------------------------------------------------------------
Aggregate
-> Append
-> Custom Scan (ColumnarScan) on p1
Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
-> Index Scan using p2_n_idx on p2
Index Cond: (n < '5'::numeric)
Filter: (ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone)
-> Custom Scan (ColumnarScan) on p3
Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
(9 rows)
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';
-- now that we disabled columnar custom scan, will use seq scan on columnar
-- partitions since index scan is more expensive than seq scan too
EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent
WHERE ts > '2020-02-20' AND n < 5;
QUERY PLAN
---------------------------------------------------------------------
Aggregate
-> Append
-> Seq Scan on p1
Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
-> Index Scan using p2_n_idx on p2
Index Cond: (n < '5'::numeric)
Filter: (ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone)
-> Seq Scan on p3
Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
(9 rows)
ROLLBACK;
DROP TABLE parent;
--
-- Test inheritance

View File

@ -54,6 +54,19 @@ $$
t
(1 row)
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';
SELECT columnar_test_helpers.uses_seq_scan (
$$
SELECT a FROM full_correlated WHERE a>900000;
$$
);
uses_seq_scan
---------------------------------------------------------------------
t
(1 row)
ROLLBACK;
SELECT columnar_test_helpers.uses_index_scan (
$$
SELECT a FROM full_correlated WHERE a<1000;
@ -84,6 +97,19 @@ $$
t
(1 row)
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';
SELECT columnar_test_helpers.uses_index_scan (
$$
SELECT a FROM full_correlated WHERE a<9000;
$$
);
uses_index_scan
---------------------------------------------------------------------
t
(1 row)
ROLLBACK;
BEGIN;
TRUNCATE full_correlated;
INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000) i;
@ -99,6 +125,17 @@ BEGIN;
t
(1 row)
SET LOCAL columnar.enable_custom_scan TO 'OFF';
SELECT columnar_test_helpers.uses_seq_scan (
$$
SELECT a FROM full_correlated WHERE a=200;
$$
);
uses_seq_scan
---------------------------------------------------------------------
t
(1 row)
ROLLBACK;
-- same filter used in above, but choosing multiple columns would increase
-- custom scan cost, so we would prefer index scan this time
@ -112,6 +149,19 @@ $$
t
(1 row)
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';
SELECT columnar_test_helpers.uses_index_scan (
$$
SELECT a,b,c,d FROM full_correlated WHERE a<9000;
$$
);
uses_index_scan
---------------------------------------------------------------------
t
(1 row)
ROLLBACK;
-- again same filter used in above, but we would choose custom scan this
-- time since it would read three less columns from disk
SELECT columnar_test_helpers.uses_custom_scan (
@ -124,6 +174,19 @@ $$
t
(1 row)
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';
SELECT columnar_test_helpers.uses_index_scan (
$$
SELECT c FROM full_correlated WHERE a<10000;
$$
);
uses_index_scan
---------------------------------------------------------------------
t
(1 row)
ROLLBACK;
SELECT columnar_test_helpers.uses_custom_scan (
$$
SELECT a FROM full_correlated WHERE a>200;
@ -144,6 +207,162 @@ $$
t
(1 row)
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';
SELECT columnar_test_helpers.uses_seq_scan (
$$
SELECT a FROM full_correlated WHERE a=0 OR a=5;
$$
);
uses_seq_scan
---------------------------------------------------------------------
t
(1 row)
ROLLBACK;
--
-- some tests with joins / subqueries etc.
--
CREATE TABLE heap_table (a int, b text, c int, d int);
INSERT INTO heap_table SELECT i, i::text, (i+1000)*7, (i+900)*5 FROM generate_series(1, 1000000) i;
CREATE INDEX heap_table_btree ON heap_table (a);
ANALYZE heap_table;
EXPLAIN (COSTS OFF)
WITH cte AS MATERIALIZED (SELECT d FROM full_correlated WHERE a > 1)
SELECT SUM(ht_1.a), MIN(ct_1.c)
FROM heap_table AS ht_1
LEFT JOIN full_correlated AS ct_1 ON ht_1.a=ct_1.d
LEFT JOIN heap_table AS ht_2 ON ht_2.a=ct_1.c
JOIN cte ON cte.d=ht_1.a
WHERE ct_1.a < 3000;
QUERY PLAN
---------------------------------------------------------------------
Aggregate
CTE cte
-> Custom Scan (ColumnarScan) on full_correlated
Filter: (a > 1)
-> Nested Loop Left Join
-> Hash Join
Hash Cond: (cte.d = ht_1.a)
-> CTE Scan on cte
-> Hash
-> Nested Loop
-> Index Scan using full_correlated_btree on full_correlated ct_1
Index Cond: (a < 3000)
-> Index Only Scan using heap_table_btree on heap_table ht_1
Index Cond: (a = ct_1.d)
-> Index Only Scan using heap_table_btree on heap_table ht_2
Index Cond: (a = ct_1.c)
(16 rows)
-- same query but columnar custom scan is disabled
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';
EXPLAIN (COSTS OFF)
WITH cte AS MATERIALIZED (SELECT d FROM full_correlated WHERE a > 1)
SELECT SUM(ht_1.a), MIN(ct_1.c)
FROM heap_table AS ht_1
LEFT JOIN full_correlated AS ct_1 ON ht_1.a=ct_1.d
LEFT JOIN heap_table AS ht_2 ON ht_2.a=ct_1.c
JOIN cte ON cte.d=ht_1.a
WHERE ct_1.a < 3000;
QUERY PLAN
---------------------------------------------------------------------
Aggregate
CTE cte
-> Seq Scan on full_correlated
Filter: (a > 1)
-> Nested Loop Left Join
-> Hash Join
Hash Cond: (cte.d = ht_1.a)
-> CTE Scan on cte
-> Hash
-> Nested Loop
-> Index Scan using full_correlated_btree on full_correlated ct_1
Index Cond: (a < 3000)
-> Index Only Scan using heap_table_btree on heap_table ht_1
Index Cond: (a = ct_1.d)
-> Index Only Scan using heap_table_btree on heap_table ht_2
Index Cond: (a = ct_1.c)
(16 rows)
ROLLBACK;
-- use custom scan
EXPLAIN (COSTS OFF) WITH w AS (SELECT * FROM full_correlated)
SELECT * FROM w AS w1 JOIN w AS w2 ON w1.a = w2.d
WHERE w2.a = 123;
QUERY PLAN
---------------------------------------------------------------------
Merge Join
Merge Cond: (w2.d = w1.a)
CTE w
-> Custom Scan (ColumnarScan) on full_correlated
-> Sort
Sort Key: w2.d
-> CTE Scan on w w2
Filter: (a = 123)
-> Materialize
-> Sort
Sort Key: w1.a
-> CTE Scan on w w1
(12 rows)
-- use index
EXPLAIN (COSTS OFF) WITH w AS NOT MATERIALIZED (SELECT * FROM full_correlated)
SELECT * FROM w AS w1 JOIN w AS w2 ON w1.a = w2.d
WHERE w2.a = 123;
QUERY PLAN
---------------------------------------------------------------------
Nested Loop
-> Index Scan using full_correlated_btree on full_correlated full_correlated_1
Index Cond: (a = 123)
-> Index Scan using full_correlated_btree on full_correlated
Index Cond: (a = full_correlated_1.d)
(5 rows)
EXPLAIN (COSTS OFF) SELECT sub_1.b, sub_2.a, sub_3.avg
FROM
(SELECT b FROM full_correlated WHERE (a > 2) GROUP BY b HAVING count(DISTINCT a) > 0 ORDER BY 1 DESC LIMIT 5) AS sub_1,
(SELECT a FROM full_correlated WHERE (a > 10) GROUP BY a HAVING count(DISTINCT a) >= 1 ORDER BY 1 DESC LIMIT 3) AS sub_2,
(SELECT avg(a) AS AVG FROM full_correlated WHERE (a > 2) GROUP BY a HAVING sum(a) > 10 ORDER BY (sum(d) - avg(a) - COALESCE(array_upper(ARRAY[max(a)],1) * 5, 0)) DESC LIMIT 3) AS sub_3
WHERE sub_2.a < sub_1.b::integer
ORDER BY 3 DESC, 2 DESC, 1 DESC
LIMIT 100;
QUERY PLAN
---------------------------------------------------------------------
Limit
-> Sort
Sort Key: sub_3.avg DESC, full_correlated_1.a DESC, full_correlated.b DESC
-> Nested Loop
-> Nested Loop
Join Filter: (full_correlated_1.a < (full_correlated.b)::integer)
-> Limit
-> GroupAggregate
Group Key: full_correlated.b
Filter: (count(DISTINCT full_correlated.a) > 0)
-> Sort
Sort Key: full_correlated.b DESC
-> Custom Scan (ColumnarScan) on full_correlated
Filter: (a > 2)
-> Materialize
-> Limit
-> GroupAggregate
Group Key: full_correlated_1.a
Filter: (count(DISTINCT full_correlated_1.a) >= 1)
-> Index Only Scan Backward using full_correlated_btree on full_correlated full_correlated_1
Index Cond: (a > 10)
-> Materialize
-> Subquery Scan on sub_3
-> Limit
-> Sort
Sort Key: ((((sum(full_correlated_2.d))::numeric - avg(full_correlated_2.a)) - (COALESCE((array_upper(ARRAY[max(full_correlated_2.a)], 1) * 5), 0))::numeric)) DESC
-> GroupAggregate
Group Key: full_correlated_2.a
Filter: (sum(full_correlated_2.a) > 10)
-> Index Scan using full_correlated_btree on full_correlated full_correlated_2
Index Cond: (a > 2)
(31 rows)
DROP INDEX full_correlated_btree;
CREATE INDEX full_correlated_hash ON full_correlated USING hash(a);
ANALYZE full_correlated;
@ -197,6 +416,19 @@ $$
t
(1 row)
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';
SELECT columnar_test_helpers.uses_index_scan (
$$
SELECT a,c FROM full_correlated WHERE a=1000;
$$
);
uses_index_scan
---------------------------------------------------------------------
t
(1 row)
ROLLBACK;
CREATE TABLE full_anti_correlated (a int, b text) USING columnar;
INSERT INTO full_anti_correlated SELECT i, i::text FROM generate_series(1, 500000) i;
CREATE INDEX full_anti_correlated_hash ON full_anti_correlated USING hash(b);
@ -231,6 +463,19 @@ $$
t
(1 row)
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';
SELECT columnar_test_helpers.uses_seq_scan (
$$
SELECT a,b FROM full_anti_correlated WHERE b='600' OR b='10';
$$
);
uses_seq_scan
---------------------------------------------------------------------
t
(1 row)
ROLLBACK;
DROP INDEX full_anti_correlated_hash;
CREATE INDEX full_anti_correlated_btree ON full_anti_correlated (a,b);
ANALYZE full_anti_correlated;
@ -274,6 +519,19 @@ $$
t
(1 row)
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';
SELECT columnar_test_helpers.uses_seq_scan (
$$
SELECT a FROM full_anti_correlated WHERE a<7000 AND b<'10000';
$$
);
uses_seq_scan
---------------------------------------------------------------------
t
(1 row)
ROLLBACK;
CREATE TABLE no_correlation (a int, b text) USING columnar;
INSERT INTO no_correlation SELECT random()*5000, (random()*5000)::int::text FROM generate_series(1, 500000) i;
CREATE INDEX no_correlation_btree ON no_correlation (a);
@ -298,5 +556,18 @@ $$
t
(1 row)
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';
SELECT columnar_test_helpers.uses_seq_scan (
$$
SELECT a FROM no_correlation WHERE a = 200;
$$
);
uses_seq_scan
---------------------------------------------------------------------
t
(1 row)
ROLLBACK;
SET client_min_messages TO WARNING;
DROP SCHEMA columnar_paths CASCADE;

View File

@ -104,3 +104,16 @@ BEGIN
END LOOP;
RETURN false;
END; $$ language plpgsql;
CREATE OR REPLACE FUNCTION uses_seq_scan(command text)
RETURNS BOOLEAN AS $$
DECLARE
query_plan text;
BEGIN
FOR query_plan IN EXECUTE 'EXPLAIN' || command LOOP
IF query_plan ILIKE '%Seq Scan on %'
THEN
RETURN true;
END IF;
END LOOP;
RETURN false;
END; $$ language plpgsql;

View File

@ -52,6 +52,23 @@ SET parallel_tuple_cost TO DEFAULT;
SET max_parallel_workers TO DEFAULT;
SET max_parallel_workers_per_gather TO DEFAULT;
CREATE INDEX parent_btree ON parent (n);
ANALYZE parent;
-- will use columnar custom scan on columnar partitions but index
-- scan on heap partition
EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent
WHERE ts > '2020-02-20' AND n < 5;
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';
-- now that we disabled columnar custom scan, will use seq scan on columnar
-- partitions since index scan is more expensive than seq scan too
EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent
WHERE ts > '2020-02-20' AND n < 5;
ROLLBACK;
DROP TABLE parent;
--

View File

@ -36,6 +36,15 @@ SELECT a FROM full_correlated WHERE a>900000;
$$
);
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';
SELECT columnar_test_helpers.uses_seq_scan (
$$
SELECT a FROM full_correlated WHERE a>900000;
$$
);
ROLLBACK;
SELECT columnar_test_helpers.uses_index_scan (
$$
SELECT a FROM full_correlated WHERE a<1000;
@ -54,6 +63,15 @@ SELECT a FROM full_correlated WHERE a<9000;
$$
);
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';
SELECT columnar_test_helpers.uses_index_scan (
$$
SELECT a FROM full_correlated WHERE a<9000;
$$
);
ROLLBACK;
BEGIN;
TRUNCATE full_correlated;
INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000) i;
@ -65,6 +83,13 @@ BEGIN;
SELECT a FROM full_correlated WHERE a=200;
$$
);
SET LOCAL columnar.enable_custom_scan TO 'OFF';
SELECT columnar_test_helpers.uses_seq_scan (
$$
SELECT a FROM full_correlated WHERE a=200;
$$
);
ROLLBACK;
-- same filter used in above, but choosing multiple columns would increase
@ -75,6 +100,15 @@ SELECT a,b,c,d FROM full_correlated WHERE a<9000;
$$
);
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';
SELECT columnar_test_helpers.uses_index_scan (
$$
SELECT a,b,c,d FROM full_correlated WHERE a<9000;
$$
);
ROLLBACK;
-- again same filter used in above, but we would choose custom scan this
-- time since it would read three less columns from disk
SELECT columnar_test_helpers.uses_custom_scan (
@ -83,6 +117,15 @@ SELECT c FROM full_correlated WHERE a<10000;
$$
);
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';
SELECT columnar_test_helpers.uses_index_scan (
$$
SELECT c FROM full_correlated WHERE a<10000;
$$
);
ROLLBACK;
SELECT columnar_test_helpers.uses_custom_scan (
$$
SELECT a FROM full_correlated WHERE a>200;
@ -95,6 +138,65 @@ SELECT a FROM full_correlated WHERE a=0 OR a=5;
$$
);
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';
SELECT columnar_test_helpers.uses_seq_scan (
$$
SELECT a FROM full_correlated WHERE a=0 OR a=5;
$$
);
ROLLBACK;
--
-- some tests with joins / subqueries etc.
--
CREATE TABLE heap_table (a int, b text, c int, d int);
INSERT INTO heap_table SELECT i, i::text, (i+1000)*7, (i+900)*5 FROM generate_series(1, 1000000) i;
CREATE INDEX heap_table_btree ON heap_table (a);
ANALYZE heap_table;
EXPLAIN (COSTS OFF)
WITH cte AS MATERIALIZED (SELECT d FROM full_correlated WHERE a > 1)
SELECT SUM(ht_1.a), MIN(ct_1.c)
FROM heap_table AS ht_1
LEFT JOIN full_correlated AS ct_1 ON ht_1.a=ct_1.d
LEFT JOIN heap_table AS ht_2 ON ht_2.a=ct_1.c
JOIN cte ON cte.d=ht_1.a
WHERE ct_1.a < 3000;
-- same query but columnar custom scan is disabled
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';
EXPLAIN (COSTS OFF)
WITH cte AS MATERIALIZED (SELECT d FROM full_correlated WHERE a > 1)
SELECT SUM(ht_1.a), MIN(ct_1.c)
FROM heap_table AS ht_1
LEFT JOIN full_correlated AS ct_1 ON ht_1.a=ct_1.d
LEFT JOIN heap_table AS ht_2 ON ht_2.a=ct_1.c
JOIN cte ON cte.d=ht_1.a
WHERE ct_1.a < 3000;
ROLLBACK;
-- use custom scan
EXPLAIN (COSTS OFF) WITH w AS (SELECT * FROM full_correlated)
SELECT * FROM w AS w1 JOIN w AS w2 ON w1.a = w2.d
WHERE w2.a = 123;
-- use index
EXPLAIN (COSTS OFF) WITH w AS NOT MATERIALIZED (SELECT * FROM full_correlated)
SELECT * FROM w AS w1 JOIN w AS w2 ON w1.a = w2.d
WHERE w2.a = 123;
EXPLAIN (COSTS OFF) SELECT sub_1.b, sub_2.a, sub_3.avg
FROM
(SELECT b FROM full_correlated WHERE (a > 2) GROUP BY b HAVING count(DISTINCT a) > 0 ORDER BY 1 DESC LIMIT 5) AS sub_1,
(SELECT a FROM full_correlated WHERE (a > 10) GROUP BY a HAVING count(DISTINCT a) >= 1 ORDER BY 1 DESC LIMIT 3) AS sub_2,
(SELECT avg(a) AS AVG FROM full_correlated WHERE (a > 2) GROUP BY a HAVING sum(a) > 10 ORDER BY (sum(d) - avg(a) - COALESCE(array_upper(ARRAY[max(a)],1) * 5, 0)) DESC LIMIT 3) AS sub_3
WHERE sub_2.a < sub_1.b::integer
ORDER BY 3 DESC, 2 DESC, 1 DESC
LIMIT 100;
DROP INDEX full_correlated_btree;
CREATE INDEX full_correlated_hash ON full_correlated USING hash(a);
@ -130,6 +232,15 @@ SELECT a,c FROM full_correlated WHERE a=1000;
$$
);
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';
SELECT columnar_test_helpers.uses_index_scan (
$$
SELECT a,c FROM full_correlated WHERE a=1000;
$$
);
ROLLBACK;
CREATE TABLE full_anti_correlated (a int, b text) USING columnar;
INSERT INTO full_anti_correlated SELECT i, i::text FROM generate_series(1, 500000) i;
CREATE INDEX full_anti_correlated_hash ON full_anti_correlated USING hash(b);
@ -153,6 +264,15 @@ SELECT a,b FROM full_anti_correlated WHERE b='600' OR b='10';
$$
);
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';
SELECT columnar_test_helpers.uses_seq_scan (
$$
SELECT a,b FROM full_anti_correlated WHERE b='600' OR b='10';
$$
);
ROLLBACK;
DROP INDEX full_anti_correlated_hash;
CREATE INDEX full_anti_correlated_btree ON full_anti_correlated (a,b);
@ -182,6 +302,15 @@ SELECT a FROM full_anti_correlated WHERE a<7000 AND b<'10000';
$$
);
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';
SELECT columnar_test_helpers.uses_seq_scan (
$$
SELECT a FROM full_anti_correlated WHERE a<7000 AND b<'10000';
$$
);
ROLLBACK;
CREATE TABLE no_correlation (a int, b text) USING columnar;
INSERT INTO no_correlation SELECT random()*5000, (random()*5000)::int::text FROM generate_series(1, 500000) i;
CREATE INDEX no_correlation_btree ON no_correlation (a);
@ -199,5 +328,14 @@ SELECT a FROM no_correlation WHERE a = 200;
$$
);
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';
SELECT columnar_test_helpers.uses_seq_scan (
$$
SELECT a FROM no_correlation WHERE a = 200;
$$
);
ROLLBACK;
SET client_min_messages TO WARNING;
DROP SCHEMA columnar_paths CASCADE;

View File

@ -113,3 +113,17 @@ BEGIN
END LOOP;
RETURN false;
END; $$ language plpgsql;
CREATE OR REPLACE FUNCTION uses_seq_scan(command text)
RETURNS BOOLEAN AS $$
DECLARE
query_plan text;
BEGIN
FOR query_plan IN EXECUTE 'EXPLAIN' || command LOOP
IF query_plan ILIKE '%Seq Scan on %'
THEN
RETURN true;
END IF;
END LOOP;
RETURN false;
END; $$ language plpgsql;