mirror of https://github.com/citusdata/citus.git
Re-cost SeqPath's as well for columnar tables
parent
453ac40725
commit
93ebbb0607
|
@ -62,6 +62,8 @@ static void ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index
|
|||
RangeTblEntry *rte);
|
||||
static void RemovePathsByPredicate(RelOptInfo *rel, PathPredicate removePathPredicate);
|
||||
static bool IsNotIndexPath(Path *path);
|
||||
static Path * CreateColumnarSeqScanPath(PlannerInfo *root, RelOptInfo *rel,
|
||||
Oid relationId);
|
||||
static void RecostColumnarPaths(PlannerInfo *root, RelOptInfo *rel, Oid relationId);
|
||||
static void RecostColumnarIndexPath(PlannerInfo *root, RelOptInfo *rel, Oid relationId,
|
||||
IndexPath *indexPath);
|
||||
|
@ -69,6 +71,7 @@ static Cost ColumnarIndexScanAddStartupCost(RelOptInfo *rel, Oid relationId,
|
|||
IndexPath *indexPath);
|
||||
static Cost ColumnarIndexScanAddTotalCost(PlannerInfo *root, RelOptInfo *rel,
|
||||
Oid relationId, IndexPath *indexPath);
|
||||
static void RecostColumnarSeqPath(RelOptInfo *rel, Oid relationId, Path *path);
|
||||
static int RelationIdGetNumberOfAttributes(Oid relationId);
|
||||
static Path * CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel,
|
||||
RangeTblEntry *rte);
|
||||
|
@ -191,6 +194,22 @@ ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti,
|
|||
/* columnar doesn't support parallel paths */
|
||||
rel->partial_pathlist = NIL;
|
||||
|
||||
/*
|
||||
* There are cases where IndexPath is normally more preferrable over
|
||||
* SeqPath for heapAM but not for columnarAM. In such cases, an
|
||||
* IndexPath could wrongly dominate a SeqPath based on the costs
|
||||
* estimated by postgres earlier. For this reason, here we manually
|
||||
* create a SeqPath, estimate the cost based on columnarAM and append
|
||||
* to pathlist.
|
||||
*
|
||||
* Before doing that, we first re-cost all the existing paths so that
|
||||
* add_path makes correct cost comparisons when appending our SeqPath.
|
||||
*/
|
||||
RecostColumnarPaths(root, rel, rte->relid);
|
||||
|
||||
Path *seqPath = CreateColumnarSeqScanPath(root, rel, rte->relid);
|
||||
add_path(rel, seqPath);
|
||||
|
||||
if (EnableColumnarCustomScan)
|
||||
{
|
||||
Path *customPath = CreateColumnarScanPath(root, rel, rte);
|
||||
|
@ -211,7 +230,6 @@ ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti,
|
|||
* SeqPath thinking that its cost would be equal to ColumnarCustomScan.
|
||||
*/
|
||||
RemovePathsByPredicate(rel, IsNotIndexPath);
|
||||
RecostColumnarPaths(root, rel, rte->relid);
|
||||
add_path(rel, customPath);
|
||||
}
|
||||
}
|
||||
|
@ -251,6 +269,23 @@ IsNotIndexPath(Path *path)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* CreateColumnarSeqScanPath returns Path for sequential scan on columnar
|
||||
* table with relationId.
|
||||
*/
|
||||
static Path *
|
||||
CreateColumnarSeqScanPath(PlannerInfo *root, RelOptInfo *rel, Oid relationId)
|
||||
{
|
||||
/* columnar doesn't support parallel scan */
|
||||
int parallelWorkers = 0;
|
||||
|
||||
Relids requiredOuter = rel->lateral_relids;
|
||||
Path *path = create_seqscan_path(root, rel, requiredOuter, parallelWorkers);
|
||||
RecostColumnarSeqPath(rel, relationId, path);
|
||||
return path;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* RecostColumnarPaths re-costs paths of given RelOptInfo for
|
||||
* columnar table with relationId.
|
||||
|
@ -272,6 +307,10 @@ RecostColumnarPaths(PlannerInfo *root, RelOptInfo *rel, Oid relationId)
|
|||
*/
|
||||
RecostColumnarIndexPath(root, rel, relationId, (IndexPath *) path);
|
||||
}
|
||||
else if (path->pathtype == T_SeqScan)
|
||||
{
|
||||
RecostColumnarSeqPath(rel, relationId, path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -421,6 +460,33 @@ ColumnarIndexScanAddTotalCost(PlannerInfo *root, RelOptInfo *rel,
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* RecostColumnarSeqPath re-costs given seq path for columnar table with
|
||||
* relationId.
|
||||
*/
|
||||
static void
|
||||
RecostColumnarSeqPath(RelOptInfo *rel, Oid relationId, Path *path)
|
||||
{
|
||||
if (!enable_seqscan)
|
||||
{
|
||||
/* costs are already set to disable_cost, don't adjust them */
|
||||
return;
|
||||
}
|
||||
|
||||
path->startup_cost = 0;
|
||||
|
||||
/*
|
||||
* Seq scan doesn't support projection pushdown, so we will read all the
|
||||
* columns.
|
||||
* Also note that seq scan doesn't support chunk group filtering too but
|
||||
* our costing model already doesn't consider chunk group filtering.
|
||||
*/
|
||||
int numberOfColumnsRead = RelationIdGetNumberOfAttributes(relationId);
|
||||
path->total_cost = path->startup_cost +
|
||||
ColumnarScanCost(rel, relationId, numberOfColumnsRead);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* RelationIdGetNumberOfAttributes returns number of attributes that relation
|
||||
* with relationId has.
|
||||
|
|
|
@ -105,9 +105,9 @@ EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent;
|
|||
Workers Planned: 4
|
||||
-> Partial Aggregate
|
||||
-> Parallel Append
|
||||
-> Seq Scan on p3 parent_4
|
||||
-> Seq Scan on p0 parent_1
|
||||
-> Seq Scan on p1 parent_2
|
||||
-> Seq Scan on p3 parent_4
|
||||
-> Parallel Seq Scan on p2 parent_3
|
||||
(9 rows)
|
||||
|
||||
|
@ -123,6 +123,45 @@ SET min_parallel_table_scan_size TO DEFAULT;
|
|||
SET parallel_tuple_cost TO DEFAULT;
|
||||
SET max_parallel_workers TO DEFAULT;
|
||||
SET max_parallel_workers_per_gather TO DEFAULT;
|
||||
CREATE INDEX parent_btree ON parent (n);
|
||||
ANALYZE parent;
|
||||
-- will use columnar custom scan on columnar partitions but index
|
||||
-- scan on heap partition
|
||||
EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent
|
||||
WHERE ts > '2020-02-20' AND n < 5;
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------
|
||||
Aggregate
|
||||
-> Append
|
||||
-> Custom Scan (ColumnarScan) on p1 parent_1
|
||||
Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
|
||||
-> Index Scan using p2_n_idx on p2 parent_2
|
||||
Index Cond: (n < '5'::numeric)
|
||||
Filter: (ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone)
|
||||
-> Custom Scan (ColumnarScan) on p3 parent_3
|
||||
Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
|
||||
(9 rows)
|
||||
|
||||
BEGIN;
|
||||
SET LOCAL columnar.enable_custom_scan TO 'OFF';
|
||||
-- now that we disabled columnar custom scan, will use seq scan on columnar
|
||||
-- partitions since index scan is more expensive than seq scan too
|
||||
EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent
|
||||
WHERE ts > '2020-02-20' AND n < 5;
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------
|
||||
Aggregate
|
||||
-> Append
|
||||
-> Seq Scan on p1 parent_1
|
||||
Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
|
||||
-> Index Scan using p2_n_idx on p2 parent_2
|
||||
Index Cond: (n < '5'::numeric)
|
||||
Filter: (ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone)
|
||||
-> Seq Scan on p3 parent_3
|
||||
Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
|
||||
(9 rows)
|
||||
|
||||
ROLLBACK;
|
||||
DROP TABLE parent;
|
||||
--
|
||||
-- Test inheritance
|
||||
|
|
|
@ -105,9 +105,9 @@ EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent;
|
|||
Workers Planned: 4
|
||||
-> Partial Aggregate
|
||||
-> Parallel Append
|
||||
-> Seq Scan on p3
|
||||
-> Seq Scan on p0
|
||||
-> Seq Scan on p1
|
||||
-> Seq Scan on p3
|
||||
-> Parallel Seq Scan on p2
|
||||
(9 rows)
|
||||
|
||||
|
@ -123,6 +123,45 @@ SET min_parallel_table_scan_size TO DEFAULT;
|
|||
SET parallel_tuple_cost TO DEFAULT;
|
||||
SET max_parallel_workers TO DEFAULT;
|
||||
SET max_parallel_workers_per_gather TO DEFAULT;
|
||||
CREATE INDEX parent_btree ON parent (n);
|
||||
ANALYZE parent;
|
||||
-- will use columnar custom scan on columnar partitions but index
|
||||
-- scan on heap partition
|
||||
EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent
|
||||
WHERE ts > '2020-02-20' AND n < 5;
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------
|
||||
Aggregate
|
||||
-> Append
|
||||
-> Custom Scan (ColumnarScan) on p1
|
||||
Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
|
||||
-> Index Scan using p2_n_idx on p2
|
||||
Index Cond: (n < '5'::numeric)
|
||||
Filter: (ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone)
|
||||
-> Custom Scan (ColumnarScan) on p3
|
||||
Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
|
||||
(9 rows)
|
||||
|
||||
BEGIN;
|
||||
SET LOCAL columnar.enable_custom_scan TO 'OFF';
|
||||
-- now that we disabled columnar custom scan, will use seq scan on columnar
|
||||
-- partitions since index scan is more expensive than seq scan too
|
||||
EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent
|
||||
WHERE ts > '2020-02-20' AND n < 5;
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------
|
||||
Aggregate
|
||||
-> Append
|
||||
-> Seq Scan on p1
|
||||
Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
|
||||
-> Index Scan using p2_n_idx on p2
|
||||
Index Cond: (n < '5'::numeric)
|
||||
Filter: (ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone)
|
||||
-> Seq Scan on p3
|
||||
Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
|
||||
(9 rows)
|
||||
|
||||
ROLLBACK;
|
||||
DROP TABLE parent;
|
||||
--
|
||||
-- Test inheritance
|
||||
|
|
|
@ -54,6 +54,19 @@ $$
|
|||
t
|
||||
(1 row)
|
||||
|
||||
BEGIN;
|
||||
SET LOCAL columnar.enable_custom_scan TO 'OFF';
|
||||
SELECT columnar_test_helpers.uses_seq_scan (
|
||||
$$
|
||||
SELECT a FROM full_correlated WHERE a>900000;
|
||||
$$
|
||||
);
|
||||
uses_seq_scan
|
||||
---------------------------------------------------------------------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
ROLLBACK;
|
||||
SELECT columnar_test_helpers.uses_index_scan (
|
||||
$$
|
||||
SELECT a FROM full_correlated WHERE a<1000;
|
||||
|
@ -84,6 +97,19 @@ $$
|
|||
t
|
||||
(1 row)
|
||||
|
||||
BEGIN;
|
||||
SET LOCAL columnar.enable_custom_scan TO 'OFF';
|
||||
SELECT columnar_test_helpers.uses_index_scan (
|
||||
$$
|
||||
SELECT a FROM full_correlated WHERE a<9000;
|
||||
$$
|
||||
);
|
||||
uses_index_scan
|
||||
---------------------------------------------------------------------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
ROLLBACK;
|
||||
BEGIN;
|
||||
TRUNCATE full_correlated;
|
||||
INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000) i;
|
||||
|
@ -99,6 +125,17 @@ BEGIN;
|
|||
t
|
||||
(1 row)
|
||||
|
||||
SET LOCAL columnar.enable_custom_scan TO 'OFF';
|
||||
SELECT columnar_test_helpers.uses_seq_scan (
|
||||
$$
|
||||
SELECT a FROM full_correlated WHERE a=200;
|
||||
$$
|
||||
);
|
||||
uses_seq_scan
|
||||
---------------------------------------------------------------------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
ROLLBACK;
|
||||
-- same filter used in above, but choosing multiple columns would increase
|
||||
-- custom scan cost, so we would prefer index scan this time
|
||||
|
@ -112,6 +149,19 @@ $$
|
|||
t
|
||||
(1 row)
|
||||
|
||||
BEGIN;
|
||||
SET LOCAL columnar.enable_custom_scan TO 'OFF';
|
||||
SELECT columnar_test_helpers.uses_index_scan (
|
||||
$$
|
||||
SELECT a,b,c,d FROM full_correlated WHERE a<9000;
|
||||
$$
|
||||
);
|
||||
uses_index_scan
|
||||
---------------------------------------------------------------------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
ROLLBACK;
|
||||
-- again same filter used in above, but we would choose custom scan this
|
||||
-- time since it would read three less columns from disk
|
||||
SELECT columnar_test_helpers.uses_custom_scan (
|
||||
|
@ -124,6 +174,19 @@ $$
|
|||
t
|
||||
(1 row)
|
||||
|
||||
BEGIN;
|
||||
SET LOCAL columnar.enable_custom_scan TO 'OFF';
|
||||
SELECT columnar_test_helpers.uses_index_scan (
|
||||
$$
|
||||
SELECT c FROM full_correlated WHERE a<10000;
|
||||
$$
|
||||
);
|
||||
uses_index_scan
|
||||
---------------------------------------------------------------------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
ROLLBACK;
|
||||
SELECT columnar_test_helpers.uses_custom_scan (
|
||||
$$
|
||||
SELECT a FROM full_correlated WHERE a>200;
|
||||
|
@ -144,6 +207,162 @@ $$
|
|||
t
|
||||
(1 row)
|
||||
|
||||
BEGIN;
|
||||
SET LOCAL columnar.enable_custom_scan TO 'OFF';
|
||||
SELECT columnar_test_helpers.uses_seq_scan (
|
||||
$$
|
||||
SELECT a FROM full_correlated WHERE a=0 OR a=5;
|
||||
$$
|
||||
);
|
||||
uses_seq_scan
|
||||
---------------------------------------------------------------------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
ROLLBACK;
|
||||
--
|
||||
-- some tests with joins / subqueries etc.
|
||||
--
|
||||
CREATE TABLE heap_table (a int, b text, c int, d int);
|
||||
INSERT INTO heap_table SELECT i, i::text, (i+1000)*7, (i+900)*5 FROM generate_series(1, 1000000) i;
|
||||
CREATE INDEX heap_table_btree ON heap_table (a);
|
||||
ANALYZE heap_table;
|
||||
EXPLAIN (COSTS OFF)
|
||||
WITH cte AS MATERIALIZED (SELECT d FROM full_correlated WHERE a > 1)
|
||||
SELECT SUM(ht_1.a), MIN(ct_1.c)
|
||||
FROM heap_table AS ht_1
|
||||
LEFT JOIN full_correlated AS ct_1 ON ht_1.a=ct_1.d
|
||||
LEFT JOIN heap_table AS ht_2 ON ht_2.a=ct_1.c
|
||||
JOIN cte ON cte.d=ht_1.a
|
||||
WHERE ct_1.a < 3000;
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------
|
||||
Aggregate
|
||||
CTE cte
|
||||
-> Custom Scan (ColumnarScan) on full_correlated
|
||||
Filter: (a > 1)
|
||||
-> Nested Loop Left Join
|
||||
-> Hash Join
|
||||
Hash Cond: (cte.d = ht_1.a)
|
||||
-> CTE Scan on cte
|
||||
-> Hash
|
||||
-> Nested Loop
|
||||
-> Index Scan using full_correlated_btree on full_correlated ct_1
|
||||
Index Cond: (a < 3000)
|
||||
-> Index Only Scan using heap_table_btree on heap_table ht_1
|
||||
Index Cond: (a = ct_1.d)
|
||||
-> Index Only Scan using heap_table_btree on heap_table ht_2
|
||||
Index Cond: (a = ct_1.c)
|
||||
(16 rows)
|
||||
|
||||
-- same query but columnar custom scan is disabled
|
||||
BEGIN;
|
||||
SET LOCAL columnar.enable_custom_scan TO 'OFF';
|
||||
EXPLAIN (COSTS OFF)
|
||||
WITH cte AS MATERIALIZED (SELECT d FROM full_correlated WHERE a > 1)
|
||||
SELECT SUM(ht_1.a), MIN(ct_1.c)
|
||||
FROM heap_table AS ht_1
|
||||
LEFT JOIN full_correlated AS ct_1 ON ht_1.a=ct_1.d
|
||||
LEFT JOIN heap_table AS ht_2 ON ht_2.a=ct_1.c
|
||||
JOIN cte ON cte.d=ht_1.a
|
||||
WHERE ct_1.a < 3000;
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------
|
||||
Aggregate
|
||||
CTE cte
|
||||
-> Seq Scan on full_correlated
|
||||
Filter: (a > 1)
|
||||
-> Nested Loop Left Join
|
||||
-> Hash Join
|
||||
Hash Cond: (cte.d = ht_1.a)
|
||||
-> CTE Scan on cte
|
||||
-> Hash
|
||||
-> Nested Loop
|
||||
-> Index Scan using full_correlated_btree on full_correlated ct_1
|
||||
Index Cond: (a < 3000)
|
||||
-> Index Only Scan using heap_table_btree on heap_table ht_1
|
||||
Index Cond: (a = ct_1.d)
|
||||
-> Index Only Scan using heap_table_btree on heap_table ht_2
|
||||
Index Cond: (a = ct_1.c)
|
||||
(16 rows)
|
||||
|
||||
ROLLBACK;
|
||||
-- use custom scan
|
||||
EXPLAIN (COSTS OFF) WITH w AS (SELECT * FROM full_correlated)
|
||||
SELECT * FROM w AS w1 JOIN w AS w2 ON w1.a = w2.d
|
||||
WHERE w2.a = 123;
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------
|
||||
Merge Join
|
||||
Merge Cond: (w2.d = w1.a)
|
||||
CTE w
|
||||
-> Custom Scan (ColumnarScan) on full_correlated
|
||||
-> Sort
|
||||
Sort Key: w2.d
|
||||
-> CTE Scan on w w2
|
||||
Filter: (a = 123)
|
||||
-> Materialize
|
||||
-> Sort
|
||||
Sort Key: w1.a
|
||||
-> CTE Scan on w w1
|
||||
(12 rows)
|
||||
|
||||
-- use index
|
||||
EXPLAIN (COSTS OFF) WITH w AS NOT MATERIALIZED (SELECT * FROM full_correlated)
|
||||
SELECT * FROM w AS w1 JOIN w AS w2 ON w1.a = w2.d
|
||||
WHERE w2.a = 123;
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------
|
||||
Nested Loop
|
||||
-> Index Scan using full_correlated_btree on full_correlated full_correlated_1
|
||||
Index Cond: (a = 123)
|
||||
-> Index Scan using full_correlated_btree on full_correlated
|
||||
Index Cond: (a = full_correlated_1.d)
|
||||
(5 rows)
|
||||
|
||||
EXPLAIN (COSTS OFF) SELECT sub_1.b, sub_2.a, sub_3.avg
|
||||
FROM
|
||||
(SELECT b FROM full_correlated WHERE (a > 2) GROUP BY b HAVING count(DISTINCT a) > 0 ORDER BY 1 DESC LIMIT 5) AS sub_1,
|
||||
(SELECT a FROM full_correlated WHERE (a > 10) GROUP BY a HAVING count(DISTINCT a) >= 1 ORDER BY 1 DESC LIMIT 3) AS sub_2,
|
||||
(SELECT avg(a) AS AVG FROM full_correlated WHERE (a > 2) GROUP BY a HAVING sum(a) > 10 ORDER BY (sum(d) - avg(a) - COALESCE(array_upper(ARRAY[max(a)],1) * 5, 0)) DESC LIMIT 3) AS sub_3
|
||||
WHERE sub_2.a < sub_1.b::integer
|
||||
ORDER BY 3 DESC, 2 DESC, 1 DESC
|
||||
LIMIT 100;
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------
|
||||
Limit
|
||||
-> Sort
|
||||
Sort Key: sub_3.avg DESC, full_correlated_1.a DESC, full_correlated.b DESC
|
||||
-> Nested Loop
|
||||
-> Nested Loop
|
||||
Join Filter: (full_correlated_1.a < (full_correlated.b)::integer)
|
||||
-> Limit
|
||||
-> GroupAggregate
|
||||
Group Key: full_correlated.b
|
||||
Filter: (count(DISTINCT full_correlated.a) > 0)
|
||||
-> Sort
|
||||
Sort Key: full_correlated.b DESC
|
||||
-> Custom Scan (ColumnarScan) on full_correlated
|
||||
Filter: (a > 2)
|
||||
-> Materialize
|
||||
-> Limit
|
||||
-> GroupAggregate
|
||||
Group Key: full_correlated_1.a
|
||||
Filter: (count(DISTINCT full_correlated_1.a) >= 1)
|
||||
-> Index Only Scan Backward using full_correlated_btree on full_correlated full_correlated_1
|
||||
Index Cond: (a > 10)
|
||||
-> Materialize
|
||||
-> Subquery Scan on sub_3
|
||||
-> Limit
|
||||
-> Sort
|
||||
Sort Key: ((((sum(full_correlated_2.d))::numeric - avg(full_correlated_2.a)) - (COALESCE((array_upper(ARRAY[max(full_correlated_2.a)], 1) * 5), 0))::numeric)) DESC
|
||||
-> GroupAggregate
|
||||
Group Key: full_correlated_2.a
|
||||
Filter: (sum(full_correlated_2.a) > 10)
|
||||
-> Index Scan using full_correlated_btree on full_correlated full_correlated_2
|
||||
Index Cond: (a > 2)
|
||||
(31 rows)
|
||||
|
||||
DROP INDEX full_correlated_btree;
|
||||
CREATE INDEX full_correlated_hash ON full_correlated USING hash(a);
|
||||
ANALYZE full_correlated;
|
||||
|
@ -197,6 +416,19 @@ $$
|
|||
t
|
||||
(1 row)
|
||||
|
||||
BEGIN;
|
||||
SET LOCAL columnar.enable_custom_scan TO 'OFF';
|
||||
SELECT columnar_test_helpers.uses_index_scan (
|
||||
$$
|
||||
SELECT a,c FROM full_correlated WHERE a=1000;
|
||||
$$
|
||||
);
|
||||
uses_index_scan
|
||||
---------------------------------------------------------------------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
ROLLBACK;
|
||||
CREATE TABLE full_anti_correlated (a int, b text) USING columnar;
|
||||
INSERT INTO full_anti_correlated SELECT i, i::text FROM generate_series(1, 500000) i;
|
||||
CREATE INDEX full_anti_correlated_hash ON full_anti_correlated USING hash(b);
|
||||
|
@ -231,6 +463,19 @@ $$
|
|||
t
|
||||
(1 row)
|
||||
|
||||
BEGIN;
|
||||
SET LOCAL columnar.enable_custom_scan TO 'OFF';
|
||||
SELECT columnar_test_helpers.uses_seq_scan (
|
||||
$$
|
||||
SELECT a,b FROM full_anti_correlated WHERE b='600' OR b='10';
|
||||
$$
|
||||
);
|
||||
uses_seq_scan
|
||||
---------------------------------------------------------------------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
ROLLBACK;
|
||||
DROP INDEX full_anti_correlated_hash;
|
||||
CREATE INDEX full_anti_correlated_btree ON full_anti_correlated (a,b);
|
||||
ANALYZE full_anti_correlated;
|
||||
|
@ -274,6 +519,19 @@ $$
|
|||
t
|
||||
(1 row)
|
||||
|
||||
BEGIN;
|
||||
SET LOCAL columnar.enable_custom_scan TO 'OFF';
|
||||
SELECT columnar_test_helpers.uses_seq_scan (
|
||||
$$
|
||||
SELECT a FROM full_anti_correlated WHERE a<7000 AND b<'10000';
|
||||
$$
|
||||
);
|
||||
uses_seq_scan
|
||||
---------------------------------------------------------------------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
ROLLBACK;
|
||||
CREATE TABLE no_correlation (a int, b text) USING columnar;
|
||||
INSERT INTO no_correlation SELECT random()*5000, (random()*5000)::int::text FROM generate_series(1, 500000) i;
|
||||
CREATE INDEX no_correlation_btree ON no_correlation (a);
|
||||
|
@ -298,5 +556,18 @@ $$
|
|||
t
|
||||
(1 row)
|
||||
|
||||
BEGIN;
|
||||
SET LOCAL columnar.enable_custom_scan TO 'OFF';
|
||||
SELECT columnar_test_helpers.uses_seq_scan (
|
||||
$$
|
||||
SELECT a FROM no_correlation WHERE a = 200;
|
||||
$$
|
||||
);
|
||||
uses_seq_scan
|
||||
---------------------------------------------------------------------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
ROLLBACK;
|
||||
SET client_min_messages TO WARNING;
|
||||
DROP SCHEMA columnar_paths CASCADE;
|
||||
|
|
|
@ -104,3 +104,16 @@ BEGIN
|
|||
END LOOP;
|
||||
RETURN false;
|
||||
END; $$ language plpgsql;
|
||||
CREATE OR REPLACE FUNCTION uses_seq_scan(command text)
|
||||
RETURNS BOOLEAN AS $$
|
||||
DECLARE
|
||||
query_plan text;
|
||||
BEGIN
|
||||
FOR query_plan IN EXECUTE 'EXPLAIN' || command LOOP
|
||||
IF query_plan ILIKE '%Seq Scan on %'
|
||||
THEN
|
||||
RETURN true;
|
||||
END IF;
|
||||
END LOOP;
|
||||
RETURN false;
|
||||
END; $$ language plpgsql;
|
||||
|
|
|
@ -52,6 +52,23 @@ SET parallel_tuple_cost TO DEFAULT;
|
|||
SET max_parallel_workers TO DEFAULT;
|
||||
SET max_parallel_workers_per_gather TO DEFAULT;
|
||||
|
||||
CREATE INDEX parent_btree ON parent (n);
|
||||
ANALYZE parent;
|
||||
|
||||
-- will use columnar custom scan on columnar partitions but index
|
||||
-- scan on heap partition
|
||||
EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent
|
||||
WHERE ts > '2020-02-20' AND n < 5;
|
||||
|
||||
BEGIN;
|
||||
SET LOCAL columnar.enable_custom_scan TO 'OFF';
|
||||
|
||||
-- now that we disabled columnar custom scan, will use seq scan on columnar
|
||||
-- partitions since index scan is more expensive than seq scan too
|
||||
EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent
|
||||
WHERE ts > '2020-02-20' AND n < 5;
|
||||
ROLLBACK;
|
||||
|
||||
DROP TABLE parent;
|
||||
|
||||
--
|
||||
|
|
|
@ -36,6 +36,15 @@ SELECT a FROM full_correlated WHERE a>900000;
|
|||
$$
|
||||
);
|
||||
|
||||
BEGIN;
|
||||
SET LOCAL columnar.enable_custom_scan TO 'OFF';
|
||||
SELECT columnar_test_helpers.uses_seq_scan (
|
||||
$$
|
||||
SELECT a FROM full_correlated WHERE a>900000;
|
||||
$$
|
||||
);
|
||||
ROLLBACK;
|
||||
|
||||
SELECT columnar_test_helpers.uses_index_scan (
|
||||
$$
|
||||
SELECT a FROM full_correlated WHERE a<1000;
|
||||
|
@ -54,6 +63,15 @@ SELECT a FROM full_correlated WHERE a<9000;
|
|||
$$
|
||||
);
|
||||
|
||||
BEGIN;
|
||||
SET LOCAL columnar.enable_custom_scan TO 'OFF';
|
||||
SELECT columnar_test_helpers.uses_index_scan (
|
||||
$$
|
||||
SELECT a FROM full_correlated WHERE a<9000;
|
||||
$$
|
||||
);
|
||||
ROLLBACK;
|
||||
|
||||
BEGIN;
|
||||
TRUNCATE full_correlated;
|
||||
INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000) i;
|
||||
|
@ -65,6 +83,13 @@ BEGIN;
|
|||
SELECT a FROM full_correlated WHERE a=200;
|
||||
$$
|
||||
);
|
||||
|
||||
SET LOCAL columnar.enable_custom_scan TO 'OFF';
|
||||
SELECT columnar_test_helpers.uses_seq_scan (
|
||||
$$
|
||||
SELECT a FROM full_correlated WHERE a=200;
|
||||
$$
|
||||
);
|
||||
ROLLBACK;
|
||||
|
||||
-- same filter used in above, but choosing multiple columns would increase
|
||||
|
@ -75,6 +100,15 @@ SELECT a,b,c,d FROM full_correlated WHERE a<9000;
|
|||
$$
|
||||
);
|
||||
|
||||
BEGIN;
|
||||
SET LOCAL columnar.enable_custom_scan TO 'OFF';
|
||||
SELECT columnar_test_helpers.uses_index_scan (
|
||||
$$
|
||||
SELECT a,b,c,d FROM full_correlated WHERE a<9000;
|
||||
$$
|
||||
);
|
||||
ROLLBACK;
|
||||
|
||||
-- again same filter used in above, but we would choose custom scan this
|
||||
-- time since it would read three less columns from disk
|
||||
SELECT columnar_test_helpers.uses_custom_scan (
|
||||
|
@ -83,6 +117,15 @@ SELECT c FROM full_correlated WHERE a<10000;
|
|||
$$
|
||||
);
|
||||
|
||||
BEGIN;
|
||||
SET LOCAL columnar.enable_custom_scan TO 'OFF';
|
||||
SELECT columnar_test_helpers.uses_index_scan (
|
||||
$$
|
||||
SELECT c FROM full_correlated WHERE a<10000;
|
||||
$$
|
||||
);
|
||||
ROLLBACK;
|
||||
|
||||
SELECT columnar_test_helpers.uses_custom_scan (
|
||||
$$
|
||||
SELECT a FROM full_correlated WHERE a>200;
|
||||
|
@ -95,6 +138,65 @@ SELECT a FROM full_correlated WHERE a=0 OR a=5;
|
|||
$$
|
||||
);
|
||||
|
||||
BEGIN;
|
||||
SET LOCAL columnar.enable_custom_scan TO 'OFF';
|
||||
SELECT columnar_test_helpers.uses_seq_scan (
|
||||
$$
|
||||
SELECT a FROM full_correlated WHERE a=0 OR a=5;
|
||||
$$
|
||||
);
|
||||
ROLLBACK;
|
||||
|
||||
--
|
||||
-- some tests with joins / subqueries etc.
|
||||
--
|
||||
CREATE TABLE heap_table (a int, b text, c int, d int);
|
||||
INSERT INTO heap_table SELECT i, i::text, (i+1000)*7, (i+900)*5 FROM generate_series(1, 1000000) i;
|
||||
CREATE INDEX heap_table_btree ON heap_table (a);
|
||||
ANALYZE heap_table;
|
||||
|
||||
EXPLAIN (COSTS OFF)
|
||||
WITH cte AS MATERIALIZED (SELECT d FROM full_correlated WHERE a > 1)
|
||||
SELECT SUM(ht_1.a), MIN(ct_1.c)
|
||||
FROM heap_table AS ht_1
|
||||
LEFT JOIN full_correlated AS ct_1 ON ht_1.a=ct_1.d
|
||||
LEFT JOIN heap_table AS ht_2 ON ht_2.a=ct_1.c
|
||||
JOIN cte ON cte.d=ht_1.a
|
||||
WHERE ct_1.a < 3000;
|
||||
|
||||
-- same query but columnar custom scan is disabled
|
||||
BEGIN;
|
||||
SET LOCAL columnar.enable_custom_scan TO 'OFF';
|
||||
|
||||
EXPLAIN (COSTS OFF)
|
||||
WITH cte AS MATERIALIZED (SELECT d FROM full_correlated WHERE a > 1)
|
||||
SELECT SUM(ht_1.a), MIN(ct_1.c)
|
||||
FROM heap_table AS ht_1
|
||||
LEFT JOIN full_correlated AS ct_1 ON ht_1.a=ct_1.d
|
||||
LEFT JOIN heap_table AS ht_2 ON ht_2.a=ct_1.c
|
||||
JOIN cte ON cte.d=ht_1.a
|
||||
WHERE ct_1.a < 3000;
|
||||
ROLLBACK;
|
||||
|
||||
-- use custom scan
|
||||
EXPLAIN (COSTS OFF) WITH w AS (SELECT * FROM full_correlated)
|
||||
SELECT * FROM w AS w1 JOIN w AS w2 ON w1.a = w2.d
|
||||
WHERE w2.a = 123;
|
||||
|
||||
-- use index
|
||||
EXPLAIN (COSTS OFF) WITH w AS NOT MATERIALIZED (SELECT * FROM full_correlated)
|
||||
SELECT * FROM w AS w1 JOIN w AS w2 ON w1.a = w2.d
|
||||
WHERE w2.a = 123;
|
||||
|
||||
EXPLAIN (COSTS OFF) SELECT sub_1.b, sub_2.a, sub_3.avg
|
||||
FROM
|
||||
(SELECT b FROM full_correlated WHERE (a > 2) GROUP BY b HAVING count(DISTINCT a) > 0 ORDER BY 1 DESC LIMIT 5) AS sub_1,
|
||||
(SELECT a FROM full_correlated WHERE (a > 10) GROUP BY a HAVING count(DISTINCT a) >= 1 ORDER BY 1 DESC LIMIT 3) AS sub_2,
|
||||
(SELECT avg(a) AS AVG FROM full_correlated WHERE (a > 2) GROUP BY a HAVING sum(a) > 10 ORDER BY (sum(d) - avg(a) - COALESCE(array_upper(ARRAY[max(a)],1) * 5, 0)) DESC LIMIT 3) AS sub_3
|
||||
WHERE sub_2.a < sub_1.b::integer
|
||||
ORDER BY 3 DESC, 2 DESC, 1 DESC
|
||||
LIMIT 100;
|
||||
|
||||
DROP INDEX full_correlated_btree;
|
||||
|
||||
CREATE INDEX full_correlated_hash ON full_correlated USING hash(a);
|
||||
|
@ -130,6 +232,15 @@ SELECT a,c FROM full_correlated WHERE a=1000;
|
|||
$$
|
||||
);
|
||||
|
||||
BEGIN;
|
||||
SET LOCAL columnar.enable_custom_scan TO 'OFF';
|
||||
SELECT columnar_test_helpers.uses_index_scan (
|
||||
$$
|
||||
SELECT a,c FROM full_correlated WHERE a=1000;
|
||||
$$
|
||||
);
|
||||
ROLLBACK;
|
||||
|
||||
CREATE TABLE full_anti_correlated (a int, b text) USING columnar;
|
||||
INSERT INTO full_anti_correlated SELECT i, i::text FROM generate_series(1, 500000) i;
|
||||
CREATE INDEX full_anti_correlated_hash ON full_anti_correlated USING hash(b);
|
||||
|
@ -153,6 +264,15 @@ SELECT a,b FROM full_anti_correlated WHERE b='600' OR b='10';
|
|||
$$
|
||||
);
|
||||
|
||||
BEGIN;
|
||||
SET LOCAL columnar.enable_custom_scan TO 'OFF';
|
||||
SELECT columnar_test_helpers.uses_seq_scan (
|
||||
$$
|
||||
SELECT a,b FROM full_anti_correlated WHERE b='600' OR b='10';
|
||||
$$
|
||||
);
|
||||
ROLLBACK;
|
||||
|
||||
DROP INDEX full_anti_correlated_hash;
|
||||
|
||||
CREATE INDEX full_anti_correlated_btree ON full_anti_correlated (a,b);
|
||||
|
@ -182,6 +302,15 @@ SELECT a FROM full_anti_correlated WHERE a<7000 AND b<'10000';
|
|||
$$
|
||||
);
|
||||
|
||||
BEGIN;
|
||||
SET LOCAL columnar.enable_custom_scan TO 'OFF';
|
||||
SELECT columnar_test_helpers.uses_seq_scan (
|
||||
$$
|
||||
SELECT a FROM full_anti_correlated WHERE a<7000 AND b<'10000';
|
||||
$$
|
||||
);
|
||||
ROLLBACK;
|
||||
|
||||
CREATE TABLE no_correlation (a int, b text) USING columnar;
|
||||
INSERT INTO no_correlation SELECT random()*5000, (random()*5000)::int::text FROM generate_series(1, 500000) i;
|
||||
CREATE INDEX no_correlation_btree ON no_correlation (a);
|
||||
|
@ -199,5 +328,14 @@ SELECT a FROM no_correlation WHERE a = 200;
|
|||
$$
|
||||
);
|
||||
|
||||
BEGIN;
|
||||
SET LOCAL columnar.enable_custom_scan TO 'OFF';
|
||||
SELECT columnar_test_helpers.uses_seq_scan (
|
||||
$$
|
||||
SELECT a FROM no_correlation WHERE a = 200;
|
||||
$$
|
||||
);
|
||||
ROLLBACK;
|
||||
|
||||
SET client_min_messages TO WARNING;
|
||||
DROP SCHEMA columnar_paths CASCADE;
|
||||
|
|
|
@ -113,3 +113,17 @@ BEGIN
|
|||
END LOOP;
|
||||
RETURN false;
|
||||
END; $$ language plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION uses_seq_scan(command text)
|
||||
RETURNS BOOLEAN AS $$
|
||||
DECLARE
|
||||
query_plan text;
|
||||
BEGIN
|
||||
FOR query_plan IN EXECUTE 'EXPLAIN' || command LOOP
|
||||
IF query_plan ILIKE '%Seq Scan on %'
|
||||
THEN
|
||||
RETURN true;
|
||||
END IF;
|
||||
END LOOP;
|
||||
RETURN false;
|
||||
END; $$ language plpgsql;
|
||||
|
|
Loading…
Reference in New Issue