From 297f59a70eefc32131c67ecfe3c38d3cb3519ec5 Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Thu, 24 Jun 2021 20:40:54 +0300 Subject: [PATCH] Re-cost columnar table index paths --- src/backend/columnar/columnar_customscan.c | 197 +++++++++++- src/test/regress/columnar_schedule | 2 +- src/test/regress/expected/columnar_paths.out | 302 ++++++++++++++++++ .../expected/columnar_test_helpers.out | 27 ++ src/test/regress/sql/columnar_paths.sql | 203 ++++++++++++ .../regress/sql/columnar_test_helpers.sql | 29 ++ 6 files changed, 755 insertions(+), 5 deletions(-) create mode 100644 src/test/regress/expected/columnar_paths.out create mode 100644 src/test/regress/sql/columnar_paths.sql diff --git a/src/backend/columnar/columnar_customscan.c b/src/backend/columnar/columnar_customscan.c index 748049740..50354be46 100644 --- a/src/backend/columnar/columnar_customscan.c +++ b/src/backend/columnar/columnar_customscan.c @@ -14,6 +14,7 @@ #include "postgres.h" +#include "access/amapi.h" #include "access/skey.h" #include "nodes/extensible.h" #include "nodes/pg_list.h" @@ -25,6 +26,7 @@ #include "utils/relcache.h" #include "utils/spccache.h" +#include "columnar/columnar.h" #include "columnar/columnar_customscan.h" #include "columnar/columnar_metadata.h" #include "columnar/columnar_tableam.h" @@ -59,6 +61,14 @@ static void ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index RangeTblEntry *rte); static void RemovePathsByPredicate(RelOptInfo *rel, PathPredicate removePathPredicate); static bool IsNotIndexPath(Path *path); +static void RecostColumnarIndexPaths(PlannerInfo *root, RelOptInfo *rel, Oid relationId); +static void RecostColumnarIndexPath(PlannerInfo *root, RelOptInfo *rel, Oid relationId, + IndexPath *indexPath); +static Cost ColumnarIndexScanAddStartupCost(RelOptInfo *rel, Oid relationId, + IndexPath *indexPath); +static Cost ColumnarIndexScanAddTotalCost(PlannerInfo *root, RelOptInfo *rel, + Oid relationId, IndexPath *indexPath); +static int RelationIdGetNumberOfAttributes(Oid relationId); static Path * CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte); static Cost ColumnarScanCost(RelOptInfo *rel, Oid relationId, int numberOfColumnsRead); @@ -187,12 +197,13 @@ ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti, ereport(DEBUG1, (errmsg("pathlist hook for columnar table am"))); /* - * TODO: Since we don't have a proper costing model for - * ColumnarCustomScan, we remove other paths to force postgres - * using ColumnarCustomScan. Note that we still keep index paths - * since they still might be useful. + * When columnar custom scan is enabled (columnar.enable_custom_scan), + * we only consider ColumnarScanPath's & IndexPath's. For this reason, + * we remove other paths and re-estimate IndexPath costs to make accurate + * comparisons between them. */ RemovePathsByPredicate(rel, IsNotIndexPath); + RecostColumnarIndexPaths(root, rel, rte->relid); add_path(rel, customPath); } } @@ -232,6 +243,184 @@ IsNotIndexPath(Path *path) } +/* + * RecostColumnarIndexPaths re-costs index paths of given RelOptInfo for + * columnar table with relationId. + */ +static void +RecostColumnarIndexPaths(PlannerInfo *root, RelOptInfo *rel, Oid relationId) +{ + Path *path = NULL; + foreach_ptr(path, rel->pathlist) + { + /* + * Since we don't provide implementations for scan_bitmap_next_block + * & scan_bitmap_next_tuple, postgres doesn't generate bitmap index + * scan paths for columnar tables already (see related comments in + * TableAmRoutine). For this reason, we only consider IndexPath's + * here. + */ + if (IsA(path, IndexPath)) + { + RecostColumnarIndexPath(root, rel, relationId, (IndexPath *) path); + } + } +} + + +/* + * RecostColumnarIndexPath re-costs given index path for columnar table with + * relationId. + */ +static void +RecostColumnarIndexPath(PlannerInfo *root, RelOptInfo *rel, Oid relationId, + IndexPath *indexPath) +{ + ereport(DEBUG4, (errmsg("columnar table index scan costs estimated by " + "indexAM: startup cost = %.10f, total cost = " + "%.10f", indexPath->path.startup_cost, + indexPath->path.total_cost))); + + /* + * We estimate the cost for columnar table read during index scan. Also, + * instead of overwriting startup & total costs, we "add" ours to the + * costs estimated by indexAM since we should consider index traversal + * related costs too. + */ + Cost indexAMStartupCost = indexPath->path.startup_cost; + Cost indexAMScanCost = indexPath->path.total_cost - indexAMStartupCost; + + Cost columnarIndexScanStartupCost = ColumnarIndexScanAddStartupCost(rel, relationId, + indexPath); + Cost columnarIndexScanCost = ColumnarIndexScanAddTotalCost(root, rel, relationId, + indexPath); + + indexPath->path.startup_cost = indexAMStartupCost + columnarIndexScanStartupCost; + indexPath->path.total_cost = indexPath->path.startup_cost + + indexAMScanCost + columnarIndexScanCost; + + ereport(DEBUG4, (errmsg("columnar table index scan costs re-estimated " + "by columnarAM (including indexAM costs): " + "startup cost = %.10f, total cost = %.10f", + indexPath->path.startup_cost, + indexPath->path.total_cost))); +} + + +/* + * ColumnarIndexScanAddStartupCost returns additional startup cost estimated + * for index scan described by IndexPath for columnar table with relationId. + */ +static Cost +ColumnarIndexScanAddStartupCost(RelOptInfo *rel, Oid relationId, IndexPath *indexPath) +{ + int numberOfColumnsRead = RelationIdGetNumberOfAttributes(relationId); + + /* we would at least read one stripe */ + return ColumnarPerStripeScanCost(rel, relationId, numberOfColumnsRead); +} + + +/* + * ColumnarIndexScanAddTotalCost returns additional cost estimated for + * index scan described by IndexPath for columnar table with relationId. + */ +static Cost +ColumnarIndexScanAddTotalCost(PlannerInfo *root, RelOptInfo *rel, + Oid relationId, IndexPath *indexPath) +{ + int numberOfColumnsRead = RelationIdGetNumberOfAttributes(relationId); + Cost perStripeCost = ColumnarPerStripeScanCost(rel, relationId, numberOfColumnsRead); + + /* + * We don't need to pass correct loop count to amcostestimate since we + * will only use index correlation & index selectivity, and loop count + * doesn't have any effect on those two. + */ + double fakeLoopCount = 1; + Cost fakeIndexStartupCost; + Cost fakeIndexTotalCost; + double fakeIndexPages; + Selectivity indexSelectivity; + double indexCorrelation; + amcostestimate_function amcostestimate = indexPath->indexinfo->amcostestimate; + amcostestimate(root, indexPath, fakeLoopCount, &fakeIndexStartupCost, + &fakeIndexTotalCost, &indexSelectivity, + &indexCorrelation, &fakeIndexPages); + + Relation relation = RelationIdGetRelation(relationId); + uint64 rowCount = ColumnarTableRowCount(relation); + RelationClose(relation); + double estimatedRows = rowCount * indexSelectivity; + + /* + * In the worst case (i.e no correlation between the column & the index), + * we need to read a different stripe for each row. + */ + double maxStripeReadCount = estimatedRows; + + /* + * In the best case (i.e the column is fully correlated with the index), + * we wouldn't read the same stripe again and again thanks + * to locality. + */ + double avgStripeRowCount = + rowCount / (double) ColumnarTableStripeCount(relationId); + double minStripeReadCount = estimatedRows / avgStripeRowCount; + + /* + * While being close to 0 means low correlation, being close to -1 or +1 + * means high correlation. For index scans on columnar tables, it doesn't + * matter if the column and the index are "correlated" (+1) or + * "anti-correlated" (-1) since both help us avoiding from reading the + * same stripe again and again. + */ + double absIndexCorrelation = Abs(indexCorrelation); + + /* + * To estimate the number of stripes that we need to read, we do linear + * interpolation between minStripeReadCount & maxStripeReadCount. To do + * that, we use complement to 1 of absolute correlation, where being + * close to 0 means high correlation and being close to 1 means low + * correlation. + * In practice, we only want to do an index scan when absIndexCorrelation + * is 1 (or extremely close to it), or when the absolute number of tuples + * returned is very small. Other cases will have a prohibitive cost. + */ + double complementIndexCorrelation = 1 - absIndexCorrelation; + double estimatedStripeReadCount = + minStripeReadCount + complementIndexCorrelation * (maxStripeReadCount - + minStripeReadCount); + + Cost scanCost = perStripeCost * estimatedStripeReadCount; + + ereport(DEBUG4, (errmsg("re-costing index scan for columnar table: " + "selectivity = %.10f, complement abs " + "correlation = %.10f, per stripe cost = %.10f, " + "estimated stripe read count = %.10f, " + "total additional cost = %.10f", + indexSelectivity, complementIndexCorrelation, + perStripeCost, estimatedStripeReadCount, + scanCost))); + + return scanCost; +} + + +/* + * RelationIdGetNumberOfAttributes returns number of attributes that relation + * with relationId has. + */ +static int +RelationIdGetNumberOfAttributes(Oid relationId) +{ + Relation relation = RelationIdGetRelation(relationId); + int nattrs = relation->rd_att->natts; + RelationClose(relation); + return nattrs; +} + + static Path * CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) { diff --git a/src/test/regress/columnar_schedule b/src/test/regress/columnar_schedule index 218b04c4a..84a106c81 100644 --- a/src/test/regress/columnar_schedule +++ b/src/test/regress/columnar_schedule @@ -9,7 +9,7 @@ test: columnar_analyze test: columnar_data_types test: columnar_drop test: columnar_indexes -test: columnar_fallback_scan +test: columnar_fallback_scan columnar_paths test: columnar_partitioning test: columnar_permissions test: columnar_empty diff --git a/src/test/regress/expected/columnar_paths.out b/src/test/regress/expected/columnar_paths.out new file mode 100644 index 000000000..cf5a86a64 --- /dev/null +++ b/src/test/regress/expected/columnar_paths.out @@ -0,0 +1,302 @@ +CREATE SCHEMA columnar_paths; +SET search_path TO columnar_paths; +CREATE TABLE full_correlated (a int, b text, c int, d int) USING columnar; +INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000000) i; +CREATE INDEX full_correlated_btree ON full_correlated (a); +ANALYZE full_correlated; +SELECT columnar_test_helpers.uses_index_scan ( +$$ +SELECT a FROM full_correlated WHERE a=200; +$$ +); + uses_index_scan +--------------------------------------------------------------------- + t +(1 row) + +SELECT columnar_test_helpers.uses_index_scan ( +$$ +SELECT a FROM full_correlated WHERE a<0; +$$ +); + uses_index_scan +--------------------------------------------------------------------- + t +(1 row) + +SELECT columnar_test_helpers.uses_index_scan ( +$$ +SELECT a FROM full_correlated WHERE a>10 AND a<20; +$$ +); + uses_index_scan +--------------------------------------------------------------------- + t +(1 row) + +SELECT columnar_test_helpers.uses_index_scan ( +$$ +SELECT a FROM full_correlated WHERE a>1000000; +$$ +); + uses_index_scan +--------------------------------------------------------------------- + t +(1 row) + +SELECT columnar_test_helpers.uses_custom_scan ( +$$ +SELECT a FROM full_correlated WHERE a>900000; +$$ +); + uses_custom_scan +--------------------------------------------------------------------- + t +(1 row) + +SELECT columnar_test_helpers.uses_index_scan ( +$$ +SELECT a FROM full_correlated WHERE a<1000; +$$ +); + uses_index_scan +--------------------------------------------------------------------- + t +(1 row) + +SELECT columnar_test_helpers.uses_index_scan ( +$$ +SELECT a,b FROM full_correlated WHERE a<3000; +$$ +); + uses_index_scan +--------------------------------------------------------------------- + t +(1 row) + +SELECT columnar_test_helpers.uses_custom_scan ( +$$ +SELECT a FROM full_correlated WHERE a<9000; +$$ +); + uses_custom_scan +--------------------------------------------------------------------- + t +(1 row) + +BEGIN; + TRUNCATE full_correlated; + INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000) i; + -- Since we have much smaller number of rows, selectivity of below + -- query should be much higher. So we would choose columnar custom scan. + SELECT columnar_test_helpers.uses_custom_scan ( + $$ + SELECT a FROM full_correlated WHERE a=200; + $$ + ); + uses_custom_scan +--------------------------------------------------------------------- + t +(1 row) + +ROLLBACK; +-- same filter used in above, but choosing multiple columns would increase +-- custom scan cost, so we would prefer index scan this time +SELECT columnar_test_helpers.uses_index_scan ( +$$ +SELECT a,b,c,d FROM full_correlated WHERE a<9000; +$$ +); + uses_index_scan +--------------------------------------------------------------------- + t +(1 row) + +-- again same filter used in above, but we would choose custom scan this +-- time since it would read three less columns from disk +SELECT columnar_test_helpers.uses_custom_scan ( +$$ +SELECT c FROM full_correlated WHERE a<10000; +$$ +); + uses_custom_scan +--------------------------------------------------------------------- + t +(1 row) + +SELECT columnar_test_helpers.uses_custom_scan ( +$$ +SELECT a FROM full_correlated WHERE a>200; +$$ +); + uses_custom_scan +--------------------------------------------------------------------- + t +(1 row) + +SELECT columnar_test_helpers.uses_custom_scan ( +$$ +SELECT a FROM full_correlated WHERE a=0 OR a=5; +$$ +); + uses_custom_scan +--------------------------------------------------------------------- + t +(1 row) + +DROP INDEX full_correlated_btree; +CREATE INDEX full_correlated_hash ON full_correlated USING hash(a); +ANALYZE full_correlated; +SELECT columnar_test_helpers.uses_custom_scan ( +$$ +SELECT a FROM full_correlated WHERE a<10; +$$ +); + uses_custom_scan +--------------------------------------------------------------------- + t +(1 row) + +SELECT columnar_test_helpers.uses_custom_scan ( +$$ +SELECT a FROM full_correlated WHERE a>1 AND a<10; +$$ +); + uses_custom_scan +--------------------------------------------------------------------- + t +(1 row) + +SELECT columnar_test_helpers.uses_custom_scan ( +$$ +SELECT a FROM full_correlated WHERE a=0 OR a=5; +$$ +); + uses_custom_scan +--------------------------------------------------------------------- + t +(1 row) + +SELECT columnar_test_helpers.uses_custom_scan ( +$$ +SELECT a FROM full_correlated WHERE a=1000; +$$ +); + uses_custom_scan +--------------------------------------------------------------------- + t +(1 row) + +SELECT columnar_test_helpers.uses_index_scan ( +$$ +SELECT a,c FROM full_correlated WHERE a=1000; +$$ +); + uses_index_scan +--------------------------------------------------------------------- + t +(1 row) + +CREATE TABLE full_anti_correlated (a int, b text) USING columnar; +INSERT INTO full_anti_correlated SELECT i, i::text FROM generate_series(1, 500000) i; +CREATE INDEX full_anti_correlated_hash ON full_anti_correlated USING hash(b); +ANALYZE full_anti_correlated; +SELECT columnar_test_helpers.uses_index_scan ( +$$ +SELECT a FROM full_anti_correlated WHERE b='600'; +$$ +); + uses_index_scan +--------------------------------------------------------------------- + t +(1 row) + +SELECT columnar_test_helpers.uses_index_scan ( +$$ +SELECT a,b FROM full_anti_correlated WHERE b='600'; +$$ +); + uses_index_scan +--------------------------------------------------------------------- + t +(1 row) + +SELECT columnar_test_helpers.uses_custom_scan ( +$$ +SELECT a,b FROM full_anti_correlated WHERE b='600' OR b='10'; +$$ +); + uses_custom_scan +--------------------------------------------------------------------- + t +(1 row) + +DROP INDEX full_anti_correlated_hash; +CREATE INDEX full_anti_correlated_btree ON full_anti_correlated (a,b); +ANALYZE full_anti_correlated; +SELECT columnar_test_helpers.uses_index_scan ( +$$ +SELECT a FROM full_anti_correlated WHERE a>6500 AND a<7000 AND b<'10000'; +$$ +); + uses_index_scan +--------------------------------------------------------------------- + t +(1 row) + +SELECT columnar_test_helpers.uses_custom_scan ( +$$ +SELECT a FROM full_anti_correlated WHERE a>2000 AND a<7000; +$$ +); + uses_custom_scan +--------------------------------------------------------------------- + t +(1 row) + +SELECT columnar_test_helpers.uses_index_scan ( +$$ +SELECT a FROM full_anti_correlated WHERE a>2000 AND a<7000 AND b='24'; +$$ +); + uses_index_scan +--------------------------------------------------------------------- + t +(1 row) + +SELECT columnar_test_helpers.uses_custom_scan ( +$$ +SELECT a FROM full_anti_correlated WHERE a<7000 AND b<'10000'; +$$ +); + uses_custom_scan +--------------------------------------------------------------------- + t +(1 row) + +CREATE TABLE no_correlation (a int, b text) USING columnar; +INSERT INTO no_correlation SELECT random()*5000, (random()*5000)::int::text FROM generate_series(1, 500000) i; +CREATE INDEX no_correlation_btree ON no_correlation (a); +ANALYZE no_correlation; +SELECT columnar_test_helpers.uses_custom_scan ( +$$ +SELECT a FROM no_correlation WHERE a < 2; +$$ +); + uses_custom_scan +--------------------------------------------------------------------- + t +(1 row) + +SELECT columnar_test_helpers.uses_custom_scan ( +$$ +SELECT a FROM no_correlation WHERE a = 200; +$$ +); + uses_custom_scan +--------------------------------------------------------------------- + t +(1 row) + +SET client_min_messages TO WARNING; +DROP SCHEMA columnar_paths CASCADE; diff --git a/src/test/regress/expected/columnar_test_helpers.out b/src/test/regress/expected/columnar_test_helpers.out index 2f7cad23b..6b1421de8 100644 --- a/src/test/regress/expected/columnar_test_helpers.out +++ b/src/test/regress/expected/columnar_test_helpers.out @@ -77,3 +77,30 @@ CREATE FUNCTION top_memory_context_usage() RETURNS BIGINT AS $$ SELECT TopMemoryContext FROM columnar_test_helpers.columnar_store_memory_stats(); $$ LANGUAGE SQL VOLATILE; +CREATE OR REPLACE FUNCTION uses_index_scan(command text) +RETURNS BOOLEAN AS $$ +DECLARE + query_plan text; +BEGIN + FOR query_plan IN EXECUTE 'EXPLAIN' || command LOOP + IF query_plan ILIKE '%Index Only Scan using%' OR + query_plan ILIKE '%Index Scan using%' + THEN + RETURN true; + END IF; + END LOOP; + RETURN false; +END; $$ language plpgsql; +CREATE OR REPLACE FUNCTION uses_custom_scan(command text) +RETURNS BOOLEAN AS $$ +DECLARE + query_plan text; +BEGIN + FOR query_plan IN EXECUTE 'EXPLAIN' || command LOOP + IF query_plan ILIKE '%Custom Scan (ColumnarScan)%' + THEN + RETURN true; + END IF; + END LOOP; + RETURN false; +END; $$ language plpgsql; diff --git a/src/test/regress/sql/columnar_paths.sql b/src/test/regress/sql/columnar_paths.sql new file mode 100644 index 000000000..be128ba9b --- /dev/null +++ b/src/test/regress/sql/columnar_paths.sql @@ -0,0 +1,203 @@ +CREATE SCHEMA columnar_paths; +SET search_path TO columnar_paths; + +CREATE TABLE full_correlated (a int, b text, c int, d int) USING columnar; +INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000000) i; +CREATE INDEX full_correlated_btree ON full_correlated (a); +ANALYZE full_correlated; + +SELECT columnar_test_helpers.uses_index_scan ( +$$ +SELECT a FROM full_correlated WHERE a=200; +$$ +); + +SELECT columnar_test_helpers.uses_index_scan ( +$$ +SELECT a FROM full_correlated WHERE a<0; +$$ +); + +SELECT columnar_test_helpers.uses_index_scan ( +$$ +SELECT a FROM full_correlated WHERE a>10 AND a<20; +$$ +); + +SELECT columnar_test_helpers.uses_index_scan ( +$$ +SELECT a FROM full_correlated WHERE a>1000000; +$$ +); + +SELECT columnar_test_helpers.uses_custom_scan ( +$$ +SELECT a FROM full_correlated WHERE a>900000; +$$ +); + +SELECT columnar_test_helpers.uses_index_scan ( +$$ +SELECT a FROM full_correlated WHERE a<1000; +$$ +); + +SELECT columnar_test_helpers.uses_index_scan ( +$$ +SELECT a,b FROM full_correlated WHERE a<3000; +$$ +); + +SELECT columnar_test_helpers.uses_custom_scan ( +$$ +SELECT a FROM full_correlated WHERE a<9000; +$$ +); + +BEGIN; + TRUNCATE full_correlated; + INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000) i; + + -- Since we have much smaller number of rows, selectivity of below + -- query should be much higher. So we would choose columnar custom scan. + SELECT columnar_test_helpers.uses_custom_scan ( + $$ + SELECT a FROM full_correlated WHERE a=200; + $$ + ); +ROLLBACK; + +-- same filter used in above, but choosing multiple columns would increase +-- custom scan cost, so we would prefer index scan this time +SELECT columnar_test_helpers.uses_index_scan ( +$$ +SELECT a,b,c,d FROM full_correlated WHERE a<9000; +$$ +); + +-- again same filter used in above, but we would choose custom scan this +-- time since it would read three less columns from disk +SELECT columnar_test_helpers.uses_custom_scan ( +$$ +SELECT c FROM full_correlated WHERE a<10000; +$$ +); + +SELECT columnar_test_helpers.uses_custom_scan ( +$$ +SELECT a FROM full_correlated WHERE a>200; +$$ +); + +SELECT columnar_test_helpers.uses_custom_scan ( +$$ +SELECT a FROM full_correlated WHERE a=0 OR a=5; +$$ +); + +DROP INDEX full_correlated_btree; + +CREATE INDEX full_correlated_hash ON full_correlated USING hash(a); +ANALYZE full_correlated; + +SELECT columnar_test_helpers.uses_custom_scan ( +$$ +SELECT a FROM full_correlated WHERE a<10; +$$ +); + +SELECT columnar_test_helpers.uses_custom_scan ( +$$ +SELECT a FROM full_correlated WHERE a>1 AND a<10; +$$ +); + +SELECT columnar_test_helpers.uses_custom_scan ( +$$ +SELECT a FROM full_correlated WHERE a=0 OR a=5; +$$ +); + +SELECT columnar_test_helpers.uses_custom_scan ( +$$ +SELECT a FROM full_correlated WHERE a=1000; +$$ +); + +SELECT columnar_test_helpers.uses_index_scan ( +$$ +SELECT a,c FROM full_correlated WHERE a=1000; +$$ +); + +CREATE TABLE full_anti_correlated (a int, b text) USING columnar; +INSERT INTO full_anti_correlated SELECT i, i::text FROM generate_series(1, 500000) i; +CREATE INDEX full_anti_correlated_hash ON full_anti_correlated USING hash(b); +ANALYZE full_anti_correlated; + +SELECT columnar_test_helpers.uses_index_scan ( +$$ +SELECT a FROM full_anti_correlated WHERE b='600'; +$$ +); + +SELECT columnar_test_helpers.uses_index_scan ( +$$ +SELECT a,b FROM full_anti_correlated WHERE b='600'; +$$ +); + +SELECT columnar_test_helpers.uses_custom_scan ( +$$ +SELECT a,b FROM full_anti_correlated WHERE b='600' OR b='10'; +$$ +); + +DROP INDEX full_anti_correlated_hash; + +CREATE INDEX full_anti_correlated_btree ON full_anti_correlated (a,b); +ANALYZE full_anti_correlated; + +SELECT columnar_test_helpers.uses_index_scan ( +$$ +SELECT a FROM full_anti_correlated WHERE a>6500 AND a<7000 AND b<'10000'; +$$ +); + +SELECT columnar_test_helpers.uses_custom_scan ( +$$ +SELECT a FROM full_anti_correlated WHERE a>2000 AND a<7000; +$$ +); + +SELECT columnar_test_helpers.uses_index_scan ( +$$ +SELECT a FROM full_anti_correlated WHERE a>2000 AND a<7000 AND b='24'; +$$ +); + +SELECT columnar_test_helpers.uses_custom_scan ( +$$ +SELECT a FROM full_anti_correlated WHERE a<7000 AND b<'10000'; +$$ +); + +CREATE TABLE no_correlation (a int, b text) USING columnar; +INSERT INTO no_correlation SELECT random()*5000, (random()*5000)::int::text FROM generate_series(1, 500000) i; +CREATE INDEX no_correlation_btree ON no_correlation (a); +ANALYZE no_correlation; + +SELECT columnar_test_helpers.uses_custom_scan ( +$$ +SELECT a FROM no_correlation WHERE a < 2; +$$ +); + +SELECT columnar_test_helpers.uses_custom_scan ( +$$ +SELECT a FROM no_correlation WHERE a = 200; +$$ +); + +SET client_min_messages TO WARNING; +DROP SCHEMA columnar_paths CASCADE; diff --git a/src/test/regress/sql/columnar_test_helpers.sql b/src/test/regress/sql/columnar_test_helpers.sql index 08438a75f..07b6230ab 100644 --- a/src/test/regress/sql/columnar_test_helpers.sql +++ b/src/test/regress/sql/columnar_test_helpers.sql @@ -84,3 +84,32 @@ CREATE FUNCTION top_memory_context_usage() RETURNS BIGINT AS $$ SELECT TopMemoryContext FROM columnar_test_helpers.columnar_store_memory_stats(); $$ LANGUAGE SQL VOLATILE; + +CREATE OR REPLACE FUNCTION uses_index_scan(command text) +RETURNS BOOLEAN AS $$ +DECLARE + query_plan text; +BEGIN + FOR query_plan IN EXECUTE 'EXPLAIN' || command LOOP + IF query_plan ILIKE '%Index Only Scan using%' OR + query_plan ILIKE '%Index Scan using%' + THEN + RETURN true; + END IF; + END LOOP; + RETURN false; +END; $$ language plpgsql; + +CREATE OR REPLACE FUNCTION uses_custom_scan(command text) +RETURNS BOOLEAN AS $$ +DECLARE + query_plan text; +BEGIN + FOR query_plan IN EXECUTE 'EXPLAIN' || command LOOP + IF query_plan ILIKE '%Custom Scan (ColumnarScan)%' + THEN + RETURN true; + END IF; + END LOOP; + RETURN false; +END; $$ language plpgsql;