mirror of https://github.com/citusdata/citus.git
Re-cost columnar table index paths
parent
8adcf2096b
commit
297f59a70e
|
@ -14,6 +14,7 @@
|
||||||
|
|
||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
|
|
||||||
|
#include "access/amapi.h"
|
||||||
#include "access/skey.h"
|
#include "access/skey.h"
|
||||||
#include "nodes/extensible.h"
|
#include "nodes/extensible.h"
|
||||||
#include "nodes/pg_list.h"
|
#include "nodes/pg_list.h"
|
||||||
|
@ -25,6 +26,7 @@
|
||||||
#include "utils/relcache.h"
|
#include "utils/relcache.h"
|
||||||
#include "utils/spccache.h"
|
#include "utils/spccache.h"
|
||||||
|
|
||||||
|
#include "columnar/columnar.h"
|
||||||
#include "columnar/columnar_customscan.h"
|
#include "columnar/columnar_customscan.h"
|
||||||
#include "columnar/columnar_metadata.h"
|
#include "columnar/columnar_metadata.h"
|
||||||
#include "columnar/columnar_tableam.h"
|
#include "columnar/columnar_tableam.h"
|
||||||
|
@ -59,6 +61,14 @@ static void ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index
|
||||||
RangeTblEntry *rte);
|
RangeTblEntry *rte);
|
||||||
static void RemovePathsByPredicate(RelOptInfo *rel, PathPredicate removePathPredicate);
|
static void RemovePathsByPredicate(RelOptInfo *rel, PathPredicate removePathPredicate);
|
||||||
static bool IsNotIndexPath(Path *path);
|
static bool IsNotIndexPath(Path *path);
|
||||||
|
static void RecostColumnarIndexPaths(PlannerInfo *root, RelOptInfo *rel, Oid relationId);
|
||||||
|
static void RecostColumnarIndexPath(PlannerInfo *root, RelOptInfo *rel, Oid relationId,
|
||||||
|
IndexPath *indexPath);
|
||||||
|
static Cost ColumnarIndexScanAddStartupCost(RelOptInfo *rel, Oid relationId,
|
||||||
|
IndexPath *indexPath);
|
||||||
|
static Cost ColumnarIndexScanAddTotalCost(PlannerInfo *root, RelOptInfo *rel,
|
||||||
|
Oid relationId, IndexPath *indexPath);
|
||||||
|
static int RelationIdGetNumberOfAttributes(Oid relationId);
|
||||||
static Path * CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel,
|
static Path * CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel,
|
||||||
RangeTblEntry *rte);
|
RangeTblEntry *rte);
|
||||||
static Cost ColumnarScanCost(RelOptInfo *rel, Oid relationId, int numberOfColumnsRead);
|
static Cost ColumnarScanCost(RelOptInfo *rel, Oid relationId, int numberOfColumnsRead);
|
||||||
|
@ -187,12 +197,13 @@ ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti,
|
||||||
ereport(DEBUG1, (errmsg("pathlist hook for columnar table am")));
|
ereport(DEBUG1, (errmsg("pathlist hook for columnar table am")));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* TODO: Since we don't have a proper costing model for
|
* When columnar custom scan is enabled (columnar.enable_custom_scan),
|
||||||
* ColumnarCustomScan, we remove other paths to force postgres
|
* we only consider ColumnarScanPath's & IndexPath's. For this reason,
|
||||||
* using ColumnarCustomScan. Note that we still keep index paths
|
* we remove other paths and re-estimate IndexPath costs to make accurate
|
||||||
* since they still might be useful.
|
* comparisons between them.
|
||||||
*/
|
*/
|
||||||
RemovePathsByPredicate(rel, IsNotIndexPath);
|
RemovePathsByPredicate(rel, IsNotIndexPath);
|
||||||
|
RecostColumnarIndexPaths(root, rel, rte->relid);
|
||||||
add_path(rel, customPath);
|
add_path(rel, customPath);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -232,6 +243,184 @@ IsNotIndexPath(Path *path)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* RecostColumnarIndexPaths re-costs index paths of given RelOptInfo for
|
||||||
|
* columnar table with relationId.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
RecostColumnarIndexPaths(PlannerInfo *root, RelOptInfo *rel, Oid relationId)
|
||||||
|
{
|
||||||
|
Path *path = NULL;
|
||||||
|
foreach_ptr(path, rel->pathlist)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Since we don't provide implementations for scan_bitmap_next_block
|
||||||
|
* & scan_bitmap_next_tuple, postgres doesn't generate bitmap index
|
||||||
|
* scan paths for columnar tables already (see related comments in
|
||||||
|
* TableAmRoutine). For this reason, we only consider IndexPath's
|
||||||
|
* here.
|
||||||
|
*/
|
||||||
|
if (IsA(path, IndexPath))
|
||||||
|
{
|
||||||
|
RecostColumnarIndexPath(root, rel, relationId, (IndexPath *) path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* RecostColumnarIndexPath re-costs given index path for columnar table with
|
||||||
|
* relationId.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
RecostColumnarIndexPath(PlannerInfo *root, RelOptInfo *rel, Oid relationId,
|
||||||
|
IndexPath *indexPath)
|
||||||
|
{
|
||||||
|
ereport(DEBUG4, (errmsg("columnar table index scan costs estimated by "
|
||||||
|
"indexAM: startup cost = %.10f, total cost = "
|
||||||
|
"%.10f", indexPath->path.startup_cost,
|
||||||
|
indexPath->path.total_cost)));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We estimate the cost for columnar table read during index scan. Also,
|
||||||
|
* instead of overwriting startup & total costs, we "add" ours to the
|
||||||
|
* costs estimated by indexAM since we should consider index traversal
|
||||||
|
* related costs too.
|
||||||
|
*/
|
||||||
|
Cost indexAMStartupCost = indexPath->path.startup_cost;
|
||||||
|
Cost indexAMScanCost = indexPath->path.total_cost - indexAMStartupCost;
|
||||||
|
|
||||||
|
Cost columnarIndexScanStartupCost = ColumnarIndexScanAddStartupCost(rel, relationId,
|
||||||
|
indexPath);
|
||||||
|
Cost columnarIndexScanCost = ColumnarIndexScanAddTotalCost(root, rel, relationId,
|
||||||
|
indexPath);
|
||||||
|
|
||||||
|
indexPath->path.startup_cost = indexAMStartupCost + columnarIndexScanStartupCost;
|
||||||
|
indexPath->path.total_cost = indexPath->path.startup_cost +
|
||||||
|
indexAMScanCost + columnarIndexScanCost;
|
||||||
|
|
||||||
|
ereport(DEBUG4, (errmsg("columnar table index scan costs re-estimated "
|
||||||
|
"by columnarAM (including indexAM costs): "
|
||||||
|
"startup cost = %.10f, total cost = %.10f",
|
||||||
|
indexPath->path.startup_cost,
|
||||||
|
indexPath->path.total_cost)));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ColumnarIndexScanAddStartupCost returns additional startup cost estimated
|
||||||
|
* for index scan described by IndexPath for columnar table with relationId.
|
||||||
|
*/
|
||||||
|
static Cost
|
||||||
|
ColumnarIndexScanAddStartupCost(RelOptInfo *rel, Oid relationId, IndexPath *indexPath)
|
||||||
|
{
|
||||||
|
int numberOfColumnsRead = RelationIdGetNumberOfAttributes(relationId);
|
||||||
|
|
||||||
|
/* we would at least read one stripe */
|
||||||
|
return ColumnarPerStripeScanCost(rel, relationId, numberOfColumnsRead);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ColumnarIndexScanAddTotalCost returns additional cost estimated for
|
||||||
|
* index scan described by IndexPath for columnar table with relationId.
|
||||||
|
*/
|
||||||
|
static Cost
|
||||||
|
ColumnarIndexScanAddTotalCost(PlannerInfo *root, RelOptInfo *rel,
|
||||||
|
Oid relationId, IndexPath *indexPath)
|
||||||
|
{
|
||||||
|
int numberOfColumnsRead = RelationIdGetNumberOfAttributes(relationId);
|
||||||
|
Cost perStripeCost = ColumnarPerStripeScanCost(rel, relationId, numberOfColumnsRead);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We don't need to pass correct loop count to amcostestimate since we
|
||||||
|
* will only use index correlation & index selectivity, and loop count
|
||||||
|
* doesn't have any effect on those two.
|
||||||
|
*/
|
||||||
|
double fakeLoopCount = 1;
|
||||||
|
Cost fakeIndexStartupCost;
|
||||||
|
Cost fakeIndexTotalCost;
|
||||||
|
double fakeIndexPages;
|
||||||
|
Selectivity indexSelectivity;
|
||||||
|
double indexCorrelation;
|
||||||
|
amcostestimate_function amcostestimate = indexPath->indexinfo->amcostestimate;
|
||||||
|
amcostestimate(root, indexPath, fakeLoopCount, &fakeIndexStartupCost,
|
||||||
|
&fakeIndexTotalCost, &indexSelectivity,
|
||||||
|
&indexCorrelation, &fakeIndexPages);
|
||||||
|
|
||||||
|
Relation relation = RelationIdGetRelation(relationId);
|
||||||
|
uint64 rowCount = ColumnarTableRowCount(relation);
|
||||||
|
RelationClose(relation);
|
||||||
|
double estimatedRows = rowCount * indexSelectivity;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In the worst case (i.e no correlation between the column & the index),
|
||||||
|
* we need to read a different stripe for each row.
|
||||||
|
*/
|
||||||
|
double maxStripeReadCount = estimatedRows;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In the best case (i.e the column is fully correlated with the index),
|
||||||
|
* we wouldn't read the same stripe again and again thanks
|
||||||
|
* to locality.
|
||||||
|
*/
|
||||||
|
double avgStripeRowCount =
|
||||||
|
rowCount / (double) ColumnarTableStripeCount(relationId);
|
||||||
|
double minStripeReadCount = estimatedRows / avgStripeRowCount;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* While being close to 0 means low correlation, being close to -1 or +1
|
||||||
|
* means high correlation. For index scans on columnar tables, it doesn't
|
||||||
|
* matter if the column and the index are "correlated" (+1) or
|
||||||
|
* "anti-correlated" (-1) since both help us avoiding from reading the
|
||||||
|
* same stripe again and again.
|
||||||
|
*/
|
||||||
|
double absIndexCorrelation = Abs(indexCorrelation);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* To estimate the number of stripes that we need to read, we do linear
|
||||||
|
* interpolation between minStripeReadCount & maxStripeReadCount. To do
|
||||||
|
* that, we use complement to 1 of absolute correlation, where being
|
||||||
|
* close to 0 means high correlation and being close to 1 means low
|
||||||
|
* correlation.
|
||||||
|
* In practice, we only want to do an index scan when absIndexCorrelation
|
||||||
|
* is 1 (or extremely close to it), or when the absolute number of tuples
|
||||||
|
* returned is very small. Other cases will have a prohibitive cost.
|
||||||
|
*/
|
||||||
|
double complementIndexCorrelation = 1 - absIndexCorrelation;
|
||||||
|
double estimatedStripeReadCount =
|
||||||
|
minStripeReadCount + complementIndexCorrelation * (maxStripeReadCount -
|
||||||
|
minStripeReadCount);
|
||||||
|
|
||||||
|
Cost scanCost = perStripeCost * estimatedStripeReadCount;
|
||||||
|
|
||||||
|
ereport(DEBUG4, (errmsg("re-costing index scan for columnar table: "
|
||||||
|
"selectivity = %.10f, complement abs "
|
||||||
|
"correlation = %.10f, per stripe cost = %.10f, "
|
||||||
|
"estimated stripe read count = %.10f, "
|
||||||
|
"total additional cost = %.10f",
|
||||||
|
indexSelectivity, complementIndexCorrelation,
|
||||||
|
perStripeCost, estimatedStripeReadCount,
|
||||||
|
scanCost)));
|
||||||
|
|
||||||
|
return scanCost;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* RelationIdGetNumberOfAttributes returns number of attributes that relation
|
||||||
|
* with relationId has.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
RelationIdGetNumberOfAttributes(Oid relationId)
|
||||||
|
{
|
||||||
|
Relation relation = RelationIdGetRelation(relationId);
|
||||||
|
int nattrs = relation->rd_att->natts;
|
||||||
|
RelationClose(relation);
|
||||||
|
return nattrs;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static Path *
|
static Path *
|
||||||
CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
|
CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
|
||||||
{
|
{
|
||||||
|
|
|
@ -9,7 +9,7 @@ test: columnar_analyze
|
||||||
test: columnar_data_types
|
test: columnar_data_types
|
||||||
test: columnar_drop
|
test: columnar_drop
|
||||||
test: columnar_indexes
|
test: columnar_indexes
|
||||||
test: columnar_fallback_scan
|
test: columnar_fallback_scan columnar_paths
|
||||||
test: columnar_partitioning
|
test: columnar_partitioning
|
||||||
test: columnar_permissions
|
test: columnar_permissions
|
||||||
test: columnar_empty
|
test: columnar_empty
|
||||||
|
|
|
@ -0,0 +1,302 @@
|
||||||
|
CREATE SCHEMA columnar_paths;
|
||||||
|
SET search_path TO columnar_paths;
|
||||||
|
CREATE TABLE full_correlated (a int, b text, c int, d int) USING columnar;
|
||||||
|
INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000000) i;
|
||||||
|
CREATE INDEX full_correlated_btree ON full_correlated (a);
|
||||||
|
ANALYZE full_correlated;
|
||||||
|
SELECT columnar_test_helpers.uses_index_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_correlated WHERE a=200;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
uses_index_scan
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_index_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_correlated WHERE a<0;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
uses_index_scan
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_index_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_correlated WHERE a>10 AND a<20;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
uses_index_scan
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_index_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_correlated WHERE a>1000000;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
uses_index_scan
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_custom_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_correlated WHERE a>900000;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
uses_custom_scan
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_index_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_correlated WHERE a<1000;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
uses_index_scan
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_index_scan (
|
||||||
|
$$
|
||||||
|
SELECT a,b FROM full_correlated WHERE a<3000;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
uses_index_scan
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_custom_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_correlated WHERE a<9000;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
uses_custom_scan
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
BEGIN;
|
||||||
|
TRUNCATE full_correlated;
|
||||||
|
INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000) i;
|
||||||
|
-- Since we have much smaller number of rows, selectivity of below
|
||||||
|
-- query should be much higher. So we would choose columnar custom scan.
|
||||||
|
SELECT columnar_test_helpers.uses_custom_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_correlated WHERE a=200;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
uses_custom_scan
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
ROLLBACK;
|
||||||
|
-- same filter used in above, but choosing multiple columns would increase
|
||||||
|
-- custom scan cost, so we would prefer index scan this time
|
||||||
|
SELECT columnar_test_helpers.uses_index_scan (
|
||||||
|
$$
|
||||||
|
SELECT a,b,c,d FROM full_correlated WHERE a<9000;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
uses_index_scan
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- again same filter used in above, but we would choose custom scan this
|
||||||
|
-- time since it would read three less columns from disk
|
||||||
|
SELECT columnar_test_helpers.uses_custom_scan (
|
||||||
|
$$
|
||||||
|
SELECT c FROM full_correlated WHERE a<10000;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
uses_custom_scan
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_custom_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_correlated WHERE a>200;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
uses_custom_scan
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_custom_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_correlated WHERE a=0 OR a=5;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
uses_custom_scan
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
DROP INDEX full_correlated_btree;
|
||||||
|
CREATE INDEX full_correlated_hash ON full_correlated USING hash(a);
|
||||||
|
ANALYZE full_correlated;
|
||||||
|
SELECT columnar_test_helpers.uses_custom_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_correlated WHERE a<10;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
uses_custom_scan
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_custom_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_correlated WHERE a>1 AND a<10;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
uses_custom_scan
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_custom_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_correlated WHERE a=0 OR a=5;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
uses_custom_scan
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_custom_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_correlated WHERE a=1000;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
uses_custom_scan
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_index_scan (
|
||||||
|
$$
|
||||||
|
SELECT a,c FROM full_correlated WHERE a=1000;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
uses_index_scan
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
CREATE TABLE full_anti_correlated (a int, b text) USING columnar;
|
||||||
|
INSERT INTO full_anti_correlated SELECT i, i::text FROM generate_series(1, 500000) i;
|
||||||
|
CREATE INDEX full_anti_correlated_hash ON full_anti_correlated USING hash(b);
|
||||||
|
ANALYZE full_anti_correlated;
|
||||||
|
SELECT columnar_test_helpers.uses_index_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_anti_correlated WHERE b='600';
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
uses_index_scan
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_index_scan (
|
||||||
|
$$
|
||||||
|
SELECT a,b FROM full_anti_correlated WHERE b='600';
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
uses_index_scan
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_custom_scan (
|
||||||
|
$$
|
||||||
|
SELECT a,b FROM full_anti_correlated WHERE b='600' OR b='10';
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
uses_custom_scan
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
DROP INDEX full_anti_correlated_hash;
|
||||||
|
CREATE INDEX full_anti_correlated_btree ON full_anti_correlated (a,b);
|
||||||
|
ANALYZE full_anti_correlated;
|
||||||
|
SELECT columnar_test_helpers.uses_index_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_anti_correlated WHERE a>6500 AND a<7000 AND b<'10000';
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
uses_index_scan
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_custom_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_anti_correlated WHERE a>2000 AND a<7000;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
uses_custom_scan
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_index_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_anti_correlated WHERE a>2000 AND a<7000 AND b='24';
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
uses_index_scan
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_custom_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_anti_correlated WHERE a<7000 AND b<'10000';
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
uses_custom_scan
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
CREATE TABLE no_correlation (a int, b text) USING columnar;
|
||||||
|
INSERT INTO no_correlation SELECT random()*5000, (random()*5000)::int::text FROM generate_series(1, 500000) i;
|
||||||
|
CREATE INDEX no_correlation_btree ON no_correlation (a);
|
||||||
|
ANALYZE no_correlation;
|
||||||
|
SELECT columnar_test_helpers.uses_custom_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM no_correlation WHERE a < 2;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
uses_custom_scan
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_custom_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM no_correlation WHERE a = 200;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
uses_custom_scan
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SET client_min_messages TO WARNING;
|
||||||
|
DROP SCHEMA columnar_paths CASCADE;
|
|
@ -77,3 +77,30 @@ CREATE FUNCTION top_memory_context_usage()
|
||||||
RETURNS BIGINT AS $$
|
RETURNS BIGINT AS $$
|
||||||
SELECT TopMemoryContext FROM columnar_test_helpers.columnar_store_memory_stats();
|
SELECT TopMemoryContext FROM columnar_test_helpers.columnar_store_memory_stats();
|
||||||
$$ LANGUAGE SQL VOLATILE;
|
$$ LANGUAGE SQL VOLATILE;
|
||||||
|
CREATE OR REPLACE FUNCTION uses_index_scan(command text)
|
||||||
|
RETURNS BOOLEAN AS $$
|
||||||
|
DECLARE
|
||||||
|
query_plan text;
|
||||||
|
BEGIN
|
||||||
|
FOR query_plan IN EXECUTE 'EXPLAIN' || command LOOP
|
||||||
|
IF query_plan ILIKE '%Index Only Scan using%' OR
|
||||||
|
query_plan ILIKE '%Index Scan using%'
|
||||||
|
THEN
|
||||||
|
RETURN true;
|
||||||
|
END IF;
|
||||||
|
END LOOP;
|
||||||
|
RETURN false;
|
||||||
|
END; $$ language plpgsql;
|
||||||
|
CREATE OR REPLACE FUNCTION uses_custom_scan(command text)
|
||||||
|
RETURNS BOOLEAN AS $$
|
||||||
|
DECLARE
|
||||||
|
query_plan text;
|
||||||
|
BEGIN
|
||||||
|
FOR query_plan IN EXECUTE 'EXPLAIN' || command LOOP
|
||||||
|
IF query_plan ILIKE '%Custom Scan (ColumnarScan)%'
|
||||||
|
THEN
|
||||||
|
RETURN true;
|
||||||
|
END IF;
|
||||||
|
END LOOP;
|
||||||
|
RETURN false;
|
||||||
|
END; $$ language plpgsql;
|
||||||
|
|
|
@ -0,0 +1,203 @@
|
||||||
|
CREATE SCHEMA columnar_paths;
|
||||||
|
SET search_path TO columnar_paths;
|
||||||
|
|
||||||
|
CREATE TABLE full_correlated (a int, b text, c int, d int) USING columnar;
|
||||||
|
INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000000) i;
|
||||||
|
CREATE INDEX full_correlated_btree ON full_correlated (a);
|
||||||
|
ANALYZE full_correlated;
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_index_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_correlated WHERE a=200;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_index_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_correlated WHERE a<0;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_index_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_correlated WHERE a>10 AND a<20;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_index_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_correlated WHERE a>1000000;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_custom_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_correlated WHERE a>900000;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_index_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_correlated WHERE a<1000;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_index_scan (
|
||||||
|
$$
|
||||||
|
SELECT a,b FROM full_correlated WHERE a<3000;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_custom_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_correlated WHERE a<9000;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
|
||||||
|
BEGIN;
|
||||||
|
TRUNCATE full_correlated;
|
||||||
|
INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000) i;
|
||||||
|
|
||||||
|
-- Since we have much smaller number of rows, selectivity of below
|
||||||
|
-- query should be much higher. So we would choose columnar custom scan.
|
||||||
|
SELECT columnar_test_helpers.uses_custom_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_correlated WHERE a=200;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
ROLLBACK;
|
||||||
|
|
||||||
|
-- same filter used in above, but choosing multiple columns would increase
|
||||||
|
-- custom scan cost, so we would prefer index scan this time
|
||||||
|
SELECT columnar_test_helpers.uses_index_scan (
|
||||||
|
$$
|
||||||
|
SELECT a,b,c,d FROM full_correlated WHERE a<9000;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
|
||||||
|
-- again same filter used in above, but we would choose custom scan this
|
||||||
|
-- time since it would read three less columns from disk
|
||||||
|
SELECT columnar_test_helpers.uses_custom_scan (
|
||||||
|
$$
|
||||||
|
SELECT c FROM full_correlated WHERE a<10000;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_custom_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_correlated WHERE a>200;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_custom_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_correlated WHERE a=0 OR a=5;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
|
||||||
|
DROP INDEX full_correlated_btree;
|
||||||
|
|
||||||
|
CREATE INDEX full_correlated_hash ON full_correlated USING hash(a);
|
||||||
|
ANALYZE full_correlated;
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_custom_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_correlated WHERE a<10;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_custom_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_correlated WHERE a>1 AND a<10;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_custom_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_correlated WHERE a=0 OR a=5;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_custom_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_correlated WHERE a=1000;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_index_scan (
|
||||||
|
$$
|
||||||
|
SELECT a,c FROM full_correlated WHERE a=1000;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE full_anti_correlated (a int, b text) USING columnar;
|
||||||
|
INSERT INTO full_anti_correlated SELECT i, i::text FROM generate_series(1, 500000) i;
|
||||||
|
CREATE INDEX full_anti_correlated_hash ON full_anti_correlated USING hash(b);
|
||||||
|
ANALYZE full_anti_correlated;
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_index_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_anti_correlated WHERE b='600';
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_index_scan (
|
||||||
|
$$
|
||||||
|
SELECT a,b FROM full_anti_correlated WHERE b='600';
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_custom_scan (
|
||||||
|
$$
|
||||||
|
SELECT a,b FROM full_anti_correlated WHERE b='600' OR b='10';
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
|
||||||
|
DROP INDEX full_anti_correlated_hash;
|
||||||
|
|
||||||
|
CREATE INDEX full_anti_correlated_btree ON full_anti_correlated (a,b);
|
||||||
|
ANALYZE full_anti_correlated;
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_index_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_anti_correlated WHERE a>6500 AND a<7000 AND b<'10000';
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_custom_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_anti_correlated WHERE a>2000 AND a<7000;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_index_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_anti_correlated WHERE a>2000 AND a<7000 AND b='24';
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_custom_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM full_anti_correlated WHERE a<7000 AND b<'10000';
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE no_correlation (a int, b text) USING columnar;
|
||||||
|
INSERT INTO no_correlation SELECT random()*5000, (random()*5000)::int::text FROM generate_series(1, 500000) i;
|
||||||
|
CREATE INDEX no_correlation_btree ON no_correlation (a);
|
||||||
|
ANALYZE no_correlation;
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_custom_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM no_correlation WHERE a < 2;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
|
||||||
|
SELECT columnar_test_helpers.uses_custom_scan (
|
||||||
|
$$
|
||||||
|
SELECT a FROM no_correlation WHERE a = 200;
|
||||||
|
$$
|
||||||
|
);
|
||||||
|
|
||||||
|
SET client_min_messages TO WARNING;
|
||||||
|
DROP SCHEMA columnar_paths CASCADE;
|
|
@ -84,3 +84,32 @@ CREATE FUNCTION top_memory_context_usage()
|
||||||
RETURNS BIGINT AS $$
|
RETURNS BIGINT AS $$
|
||||||
SELECT TopMemoryContext FROM columnar_test_helpers.columnar_store_memory_stats();
|
SELECT TopMemoryContext FROM columnar_test_helpers.columnar_store_memory_stats();
|
||||||
$$ LANGUAGE SQL VOLATILE;
|
$$ LANGUAGE SQL VOLATILE;
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION uses_index_scan(command text)
|
||||||
|
RETURNS BOOLEAN AS $$
|
||||||
|
DECLARE
|
||||||
|
query_plan text;
|
||||||
|
BEGIN
|
||||||
|
FOR query_plan IN EXECUTE 'EXPLAIN' || command LOOP
|
||||||
|
IF query_plan ILIKE '%Index Only Scan using%' OR
|
||||||
|
query_plan ILIKE '%Index Scan using%'
|
||||||
|
THEN
|
||||||
|
RETURN true;
|
||||||
|
END IF;
|
||||||
|
END LOOP;
|
||||||
|
RETURN false;
|
||||||
|
END; $$ language plpgsql;
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION uses_custom_scan(command text)
|
||||||
|
RETURNS BOOLEAN AS $$
|
||||||
|
DECLARE
|
||||||
|
query_plan text;
|
||||||
|
BEGIN
|
||||||
|
FOR query_plan IN EXECUTE 'EXPLAIN' || command LOOP
|
||||||
|
IF query_plan ILIKE '%Custom Scan (ColumnarScan)%'
|
||||||
|
THEN
|
||||||
|
RETURN true;
|
||||||
|
END IF;
|
||||||
|
END LOOP;
|
||||||
|
RETURN false;
|
||||||
|
END; $$ language plpgsql;
|
||||||
|
|
Loading…
Reference in New Issue