Generate parameterized paths for columnar scans. (#5172)

Allow ColumnarScans to push down join quals by generating
parameterized paths. This significantly expands the utility of chunk
group filtering, making a ColumnarScan behave similar to an index when
on the inner of a nested loop join.

Also, evaluate all parameters on beginscan/rescan, which also works
for external parameters.

Fixes #4488.
pull/5238/head
jeff-davis 2021-09-02 22:22:48 -07:00 committed by GitHub
parent e41854f590
commit 4718b6bcdf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 2340 additions and 137 deletions

File diff suppressed because it is too large Load Diff

View File

@ -467,7 +467,7 @@ HasUnreadStripe(ColumnarReadState *readState)
* the beginning again
*/
void
ColumnarRescan(ColumnarReadState *readState)
ColumnarRescan(ColumnarReadState *readState, List *scanQual)
{
MemoryContext oldContext = MemoryContextSwitchTo(readState->scanContext);
@ -477,6 +477,7 @@ ColumnarRescan(ColumnarReadState *readState)
readState->snapshot);
readState->chunkGroupsFiltered = 0;
readState->whereClauseList = copyObject(scanQual);
MemoryContextSwitchTo(oldContext);
}

View File

@ -330,9 +330,13 @@ columnar_rescan(TableScanDesc sscan, ScanKey key, bool set_params,
bool allow_strat, bool allow_sync, bool allow_pagemode)
{
ColumnarScanDesc scan = (ColumnarScanDesc) sscan;
/* XXX: hack to pass in new quals that aren't actually scan keys */
List *scanQual = (List *) key;
if (scan->cs_readState != NULL)
{
ColumnarRescan(scan->cs_readState);
ColumnarRescan(scan->cs_readState, scanQual);
}
}

View File

@ -218,7 +218,7 @@ extern ColumnarReadState * ColumnarBeginRead(Relation relation,
bool snapshotRegisteredByUs);
extern bool ColumnarReadNextRow(ColumnarReadState *state, Datum *columnValues,
bool *columnNulls, uint64 *rowNumber);
extern void ColumnarRescan(ColumnarReadState *readState);
extern void ColumnarRescan(ColumnarReadState *readState, List *scanQual);
extern bool ColumnarReadRowByRowNumber(ColumnarReadState *readState,
uint64 rowNumber, Datum *columnValues,
bool *columnNulls);

View File

@ -23,6 +23,7 @@ $$
RETURN result;
END;
$$ LANGUAGE PLPGSQL;
set columnar.qual_pushdown_correlation = 0.0;
-- Create and load data
-- chunk_group_row_limit '1000', stripe_row_limit '2000'
set columnar.stripe_row_limit = 2000;
@ -127,9 +128,10 @@ EXPLAIN (analyze on, costs off, timing off, summary off)
Custom Scan (ColumnarScan) on simple_chunk_filtering (actual rows=111111 loops=1)
Filter: (i > 123456)
Rows Removed by Filter: 3457
Columnar Chunk Groups Removed by Filter: 12
Columnar Projected Columns: i
(5 rows)
Columnar Chunk Group Filters: (i > 123456)
Columnar Chunk Groups Removed by Filter: 12
(6 rows)
SET columnar.enable_qual_pushdown = false;
EXPLAIN (analyze on, costs off, timing off, summary off)
@ -139,9 +141,8 @@ EXPLAIN (analyze on, costs off, timing off, summary off)
Custom Scan (ColumnarScan) on simple_chunk_filtering (actual rows=111111 loops=1)
Filter: (i > 123456)
Rows Removed by Filter: 123457
Columnar Chunk Groups Removed by Filter: 0
Columnar Projected Columns: i
(5 rows)
(4 rows)
SET columnar.enable_qual_pushdown TO DEFAULT;
-- https://github.com/citusdata/citus/issues/4555
@ -155,9 +156,10 @@ EXPLAIN (analyze on, costs off, timing off, summary off)
Custom Scan (ColumnarScan) on simple_chunk_filtering (actual rows=20000 loops=1)
Filter: (i > 180000)
Rows Removed by Filter: 1
Columnar Chunk Groups Removed by Filter: 18
Columnar Projected Columns: i
(5 rows)
Columnar Chunk Group Filters: (i > 180000)
Columnar Chunk Groups Removed by Filter: 18
(6 rows)
DROP TABLE simple_chunk_filtering;
CREATE TABLE multi_column_chunk_filtering(a int, b int) USING columnar;
@ -170,9 +172,10 @@ EXPLAIN (analyze on, costs off, timing off, summary off)
-> Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=184567 loops=1)
Filter: (a > 50000)
Rows Removed by Filter: 1
Columnar Chunk Groups Removed by Filter: 5
Columnar Projected Columns: a
(6 rows)
Columnar Chunk Group Filters: (a > 50000)
Columnar Chunk Groups Removed by Filter: 5
(7 rows)
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT count(*) FROM multi_column_chunk_filtering WHERE a > 50000 AND b > 50000;
@ -182,9 +185,10 @@ EXPLAIN (analyze on, costs off, timing off, summary off)
-> Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=184567 loops=1)
Filter: ((a > 50000) AND (b > 50000))
Rows Removed by Filter: 1
Columnar Chunk Groups Removed by Filter: 5
Columnar Projected Columns: a, b
(6 rows)
Columnar Chunk Group Filters: ((a > 50000) AND (b > 50000))
Columnar Chunk Groups Removed by Filter: 5
(7 rows)
-- make next tests faster
TRUNCATE multi_column_chunk_filtering;
@ -195,9 +199,10 @@ EXPLAIN (analyze on, costs off, timing off, summary off)
---------------------------------------------------------------------
Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=0 loops=1)
Filter: ((a > 50000) AND (b > 50000))
Columnar Chunk Groups Removed by Filter: 1
Columnar Projected Columns: a, b
(4 rows)
Columnar Chunk Group Filters: ((a > 50000) AND (b > 50000))
Columnar Chunk Groups Removed by Filter: 1
(5 rows)
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT b, a FROM multi_column_chunk_filtering WHERE b > 50000;
@ -206,9 +211,10 @@ EXPLAIN (analyze on, costs off, timing off, summary off)
Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=0 loops=1)
Filter: (b > 50000)
Rows Removed by Filter: 6
Columnar Chunk Groups Removed by Filter: 0
Columnar Projected Columns: a, b
(5 rows)
Columnar Chunk Group Filters: (b > 50000)
Columnar Chunk Groups Removed by Filter: 0
(6 rows)
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT FROM multi_column_chunk_filtering WHERE a > 50000;
@ -216,18 +222,18 @@ EXPLAIN (analyze on, costs off, timing off, summary off)
---------------------------------------------------------------------
Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=0 loops=1)
Filter: (a > 50000)
Columnar Chunk Groups Removed by Filter: 1
Columnar Projected Columns: a
(4 rows)
Columnar Chunk Group Filters: (a > 50000)
Columnar Chunk Groups Removed by Filter: 1
(5 rows)
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT FROM multi_column_chunk_filtering;
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=6 loops=1)
Columnar Chunk Groups Removed by Filter: 0
Columnar Projected Columns: <columnar optimized out all columns>
(3 rows)
(2 rows)
BEGIN;
ALTER TABLE multi_column_chunk_filtering DROP COLUMN a;
@ -237,9 +243,8 @@ BEGIN;
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=6 loops=1)
Columnar Chunk Groups Removed by Filter: 0
Columnar Projected Columns: <columnar optimized out all columns>
(3 rows)
(2 rows)
ROLLBACK;
CREATE TABLE another_columnar_table(x int, y int) USING columnar;
@ -252,10 +257,10 @@ EXPLAIN (analyze on, costs off, timing off, summary off)
-> Custom Scan (ColumnarScan) on another_columnar_table (actual rows=4 loops=1)
Filter: (x > 1)
Rows Removed by Filter: 2
Columnar Chunk Groups Removed by Filter: 0
Columnar Projected Columns: x, y
-> Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=6 loops=4)
Columnar Chunk Group Filters: (x > 1)
Columnar Chunk Groups Removed by Filter: 0
-> Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=6 loops=4)
Columnar Projected Columns: a
(9 rows)
@ -306,3 +311,438 @@ select filtered_row_count('select count(*) from part_table where id > 75000');
(1 row)
drop table part_table;
-- test join parameterization
set columnar.stripe_row_limit = 2000;
set columnar.chunk_group_row_limit = 1000;
create table r1(id1 int, n1 int); -- row
create table r2(id2 int, n2 int); -- row
create table r3(id3 int, n3 int); -- row
create table r4(id4 int, n4 int); -- row
create table r5(id5 int, n5 int); -- row
create table r6(id6 int, n6 int); -- row
create table r7(id7 int, n7 int); -- row
create table coltest(id int, x1 int, x2 int, x3 int) using columnar;
create table coltest_part(id int, x1 int, x2 int, x3 int)
partition by range (id);
create table coltest_part0
partition of coltest_part for values from (0) to (10000)
using columnar;
create table coltest_part1
partition of coltest_part for values from (10000) to (20000); -- row
set columnar.stripe_row_limit to default;
set columnar.chunk_group_row_limit to default;
insert into r1 values(1234, 12350);
insert into r1 values(4567, 45000);
insert into r1 values(9101, 176000);
insert into r1 values(14202, 7);
insert into r1 values(18942, 189430);
insert into r2 values(1234, 123502);
insert into r2 values(4567, 450002);
insert into r2 values(9101, 1760002);
insert into r2 values(14202, 72);
insert into r2 values(18942, 1894302);
insert into r3 values(1234, 1235075);
insert into r3 values(4567, 4500075);
insert into r3 values(9101, 17600075);
insert into r3 values(14202, 775);
insert into r3 values(18942, 18943075);
insert into r4 values(1234, -1);
insert into r5 values(1234, -1);
insert into r6 values(1234, -1);
insert into r7 values(1234, -1);
insert into coltest
select g, g*10, g*100, g*1000 from generate_series(0, 19999) g;
insert into coltest_part
select g, g*10, g*100, g*1000 from generate_series(0, 19999) g;
ANALYZE r1, r2, r3, coltest, coltest_part;
-- force nested loop
set enable_mergejoin=false;
set enable_hashjoin=false;
set enable_material=false;
-- test different kinds of expressions
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT * FROM r1, coltest WHERE
id1 = id AND x1 > 15000 AND x1::text > '000000' AND n1 % 10 = 0;
QUERY PLAN
---------------------------------------------------------------------
Nested Loop (actual rows=3 loops=1)
-> Seq Scan on r1 (actual rows=4 loops=1)
Filter: ((n1 % 10) = 0)
Rows Removed by Filter: 1
-> Custom Scan (ColumnarScan) on coltest (actual rows=1 loops=4)
Filter: ((x1 > 15000) AND (r1.id1 = id) AND ((x1)::text > '000000'::text))
Rows Removed by Filter: 999
Columnar Projected Columns: id, x1, x2, x3
Columnar Chunk Group Filters: ((x1 > 15000) AND (r1.id1 = id))
Columnar Chunk Groups Removed by Filter: 19
(10 rows)
SELECT * FROM r1, coltest WHERE
id1 = id AND x1 > 15000 AND x1::text > '000000' AND n1 % 10 = 0;
id1 | n1 | id | x1 | x2 | x3
---------------------------------------------------------------------
4567 | 45000 | 4567 | 45670 | 456700 | 4567000
9101 | 176000 | 9101 | 91010 | 910100 | 9101000
18942 | 189430 | 18942 | 189420 | 1894200 | 18942000
(3 rows)
-- test equivalence classes
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT * FROM r1, r2, r3, r4, r5, r6, r7, coltest WHERE
id = id1 AND id1 = id2 AND id2 = id3 AND id3 = id4 AND
id4 = id5 AND id5 = id6 AND id6 = id7;
QUERY PLAN
---------------------------------------------------------------------
Nested Loop (actual rows=1 loops=1)
Join Filter: (coltest.id = r7.id7)
-> Nested Loop (actual rows=1 loops=1)
Join Filter: (coltest.id = r6.id6)
-> Nested Loop (actual rows=1 loops=1)
Join Filter: (coltest.id = r5.id5)
-> Nested Loop (actual rows=1 loops=1)
Join Filter: (coltest.id = r4.id4)
Rows Removed by Join Filter: 4
-> Nested Loop (actual rows=5 loops=1)
-> Nested Loop (actual rows=5 loops=1)
Join Filter: (r1.id1 = r3.id3)
Rows Removed by Join Filter: 20
-> Nested Loop (actual rows=5 loops=1)
Join Filter: (r1.id1 = r2.id2)
Rows Removed by Join Filter: 20
-> Seq Scan on r1 (actual rows=5 loops=1)
-> Seq Scan on r2 (actual rows=5 loops=5)
-> Seq Scan on r3 (actual rows=5 loops=5)
-> Custom Scan (ColumnarScan) on coltest (actual rows=1 loops=5)
Filter: (r1.id1 = id)
Rows Removed by Filter: 999
Columnar Projected Columns: id, x1, x2, x3
Columnar Chunk Group Filters: (r1.id1 = id)
Columnar Chunk Groups Removed by Filter: 19
-> Seq Scan on r4 (actual rows=1 loops=5)
-> Seq Scan on r5 (actual rows=1 loops=1)
-> Seq Scan on r6 (actual rows=1 loops=1)
-> Seq Scan on r7 (actual rows=1 loops=1)
(29 rows)
SELECT * FROM r1, r2, r3, r4, r5, r6, r7, coltest WHERE
id = id1 AND id1 = id2 AND id2 = id3 AND id3 = id4 AND
id4 = id5 AND id5 = id6 AND id6 = id7;
id1 | n1 | id2 | n2 | id3 | n3 | id4 | n4 | id5 | n5 | id6 | n6 | id7 | n7 | id | x1 | x2 | x3
---------------------------------------------------------------------
1234 | 12350 | 1234 | 123502 | 1234 | 1235075 | 1234 | -1 | 1234 | -1 | 1234 | -1 | 1234 | -1 | 1234 | 12340 | 123400 | 1234000
(1 row)
-- test path generation with different thresholds
set columnar.planner_debug_level = 'notice';
set columnar.max_custom_scan_paths to 10;
EXPLAIN (costs off, timing off, summary off)
SELECT * FROM coltest c1, coltest c2, coltest c3, coltest c4 WHERE
c1.id = c2.id and c1.id = c3.id and c1.id = c4.id;
NOTICE: columnar planner: adding CustomScan path for c1
DETAIL: unparameterized; 0 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c1
DETAIL: parameterized by rels {c2}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c1
DETAIL: parameterized by rels {c2, c3}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c1
DETAIL: parameterized by rels {c2, c3, c4}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c1
DETAIL: parameterized by rels {c2, c4}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c1
DETAIL: parameterized by rels {c3}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c1
DETAIL: parameterized by rels {c3, c4}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c1
DETAIL: parameterized by rels {c4}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c2
DETAIL: unparameterized; 0 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c2
DETAIL: parameterized by rels {c1}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c2
DETAIL: parameterized by rels {c1, c3}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c2
DETAIL: parameterized by rels {c1, c3, c4}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c2
DETAIL: parameterized by rels {c1, c4}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c2
DETAIL: parameterized by rels {c3}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c2
DETAIL: parameterized by rels {c3, c4}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c2
DETAIL: parameterized by rels {c4}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c3
DETAIL: unparameterized; 0 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c3
DETAIL: parameterized by rels {c1}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c3
DETAIL: parameterized by rels {c1, c2}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c3
DETAIL: parameterized by rels {c1, c2, c4}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c3
DETAIL: parameterized by rels {c1, c4}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c3
DETAIL: parameterized by rels {c2}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c3
DETAIL: parameterized by rels {c2, c4}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c3
DETAIL: parameterized by rels {c4}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c4
DETAIL: unparameterized; 0 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c4
DETAIL: parameterized by rels {c1}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c4
DETAIL: parameterized by rels {c1, c2}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c4
DETAIL: parameterized by rels {c1, c2, c3}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c4
DETAIL: parameterized by rels {c1, c3}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c4
DETAIL: parameterized by rels {c2}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c4
DETAIL: parameterized by rels {c2, c3}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c4
DETAIL: parameterized by rels {c3}; 1 clauses pushed down
QUERY PLAN
---------------------------------------------------------------------
Nested Loop
-> Nested Loop
-> Nested Loop
-> Custom Scan (ColumnarScan) on coltest c1
Columnar Projected Columns: id, x1, x2, x3
-> Custom Scan (ColumnarScan) on coltest c2
Filter: (c1.id = id)
Columnar Projected Columns: id, x1, x2, x3
Columnar Chunk Group Filters: (c1.id = id)
-> Custom Scan (ColumnarScan) on coltest c3
Filter: (c1.id = id)
Columnar Projected Columns: id, x1, x2, x3
Columnar Chunk Group Filters: (c1.id = id)
-> Custom Scan (ColumnarScan) on coltest c4
Filter: (c1.id = id)
Columnar Projected Columns: id, x1, x2, x3
Columnar Chunk Group Filters: (c1.id = id)
(17 rows)
set columnar.max_custom_scan_paths to 2;
EXPLAIN (costs off, timing off, summary off)
SELECT * FROM coltest c1, coltest c2, coltest c3, coltest c4 WHERE
c1.id = c2.id and c1.id = c3.id and c1.id = c4.id;
NOTICE: columnar planner: adding CustomScan path for c1
DETAIL: unparameterized; 0 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c2
DETAIL: unparameterized; 0 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c3
DETAIL: unparameterized; 0 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c4
DETAIL: unparameterized; 0 clauses pushed down
QUERY PLAN
---------------------------------------------------------------------
Nested Loop
Join Filter: (c1.id = c4.id)
-> Nested Loop
Join Filter: (c1.id = c3.id)
-> Nested Loop
Join Filter: (c1.id = c2.id)
-> Custom Scan (ColumnarScan) on coltest c1
Columnar Projected Columns: id, x1, x2, x3
-> Custom Scan (ColumnarScan) on coltest c2
Columnar Projected Columns: id, x1, x2, x3
-> Custom Scan (ColumnarScan) on coltest c3
Columnar Projected Columns: id, x1, x2, x3
-> Custom Scan (ColumnarScan) on coltest c4
Columnar Projected Columns: id, x1, x2, x3
(14 rows)
set columnar.max_custom_scan_paths to default;
set columnar.planner_debug_level to default;
-- test more complex parameterization
set columnar.planner_debug_level = 'notice';
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT * FROM r1, r2, r3, coltest WHERE
id1 = id2 AND id2 = id3 AND id3 = id AND
n1 > x1 AND n2 > x2 AND n3 > x3;
NOTICE: columnar planner: adding CustomScan path for coltest
DETAIL: unparameterized; 0 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for coltest
DETAIL: parameterized by rels {r1}; 2 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for coltest
DETAIL: parameterized by rels {r1, r2}; 3 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for coltest
DETAIL: parameterized by rels {r1, r2, r3}; 4 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for coltest
DETAIL: parameterized by rels {r1, r3}; 3 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for coltest
DETAIL: parameterized by rels {r2}; 2 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for coltest
DETAIL: parameterized by rels {r2, r3}; 3 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for coltest
DETAIL: parameterized by rels {r3}; 2 clauses pushed down
QUERY PLAN
---------------------------------------------------------------------
Nested Loop (actual rows=3 loops=1)
Join Filter: ((r3.n3 > coltest.x3) AND (r1.id1 = r3.id3))
Rows Removed by Join Filter: 12
-> Nested Loop (actual rows=3 loops=1)
Join Filter: ((r2.n2 > coltest.x2) AND (r1.id1 = r2.id2))
Rows Removed by Join Filter: 12
-> Nested Loop (actual rows=3 loops=1)
-> Seq Scan on r1 (actual rows=5 loops=1)
-> Custom Scan (ColumnarScan) on coltest (actual rows=1 loops=5)
Filter: ((r1.n1 > x1) AND (r1.id1 = id))
Rows Removed by Filter: 799
Columnar Projected Columns: id, x1, x2, x3
Columnar Chunk Group Filters: ((r1.n1 > x1) AND (r1.id1 = id))
Columnar Chunk Groups Removed by Filter: 19
-> Seq Scan on r2 (actual rows=5 loops=3)
-> Seq Scan on r3 (actual rows=5 loops=3)
(16 rows)
set columnar.planner_debug_level to default;
SELECT * FROM r1, r2, r3, coltest WHERE
id1 = id2 AND id2 = id3 AND id3 = id AND
n1 > x1 AND n2 > x2 AND n3 > x3;
id1 | n1 | id2 | n2 | id3 | n3 | id | x1 | x2 | x3
---------------------------------------------------------------------
1234 | 12350 | 1234 | 123502 | 1234 | 1235075 | 1234 | 12340 | 123400 | 1234000
9101 | 176000 | 9101 | 1760002 | 9101 | 17600075 | 9101 | 91010 | 910100 | 9101000
18942 | 189430 | 18942 | 1894302 | 18942 | 18943075 | 18942 | 189420 | 1894200 | 18942000
(3 rows)
-- test partitioning parameterization
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT * FROM r1, coltest_part WHERE
id1 = id AND n1 > x1;
QUERY PLAN
---------------------------------------------------------------------
Nested Loop (actual rows=3 loops=1)
-> Seq Scan on r1 (actual rows=5 loops=1)
-> Append (actual rows=1 loops=5)
-> Custom Scan (ColumnarScan) on coltest_part0 coltest_part_1 (actual rows=1 loops=3)
Filter: ((r1.n1 > x1) AND (r1.id1 = id))
Rows Removed by Filter: 999
Columnar Projected Columns: id, x1, x2, x3
Columnar Chunk Group Filters: ((r1.n1 > x1) AND (r1.id1 = id))
Columnar Chunk Groups Removed by Filter: 9
-> Seq Scan on coltest_part1 coltest_part_2 (actual rows=0 loops=2)
Filter: ((r1.n1 > x1) AND (r1.id1 = id))
Rows Removed by Filter: 10000
(12 rows)
SELECT * FROM r1, coltest_part WHERE
id1 = id AND n1 > x1;
id1 | n1 | id | x1 | x2 | x3
---------------------------------------------------------------------
1234 | 12350 | 1234 | 12340 | 123400 | 1234000
9101 | 176000 | 9101 | 91010 | 910100 | 9101000
18942 | 189430 | 18942 | 189420 | 1894200 | 18942000
(3 rows)
set enable_mergejoin to default;
set enable_hashjoin to default;
set enable_material to default;
set columnar.planner_debug_level = 'notice';
alter table coltest add column x5 int default (random()*20000)::int;
analyze coltest;
-- test that expressions on whole-row references are not pushed down
select * from coltest where coltest = (1,1,1,1);
NOTICE: columnar planner: cannot push down clause: var is whole-row reference
NOTICE: columnar planner: adding CustomScan path for coltest
DETAIL: unparameterized; 0 clauses pushed down
id | x1 | x2 | x3 | x5
---------------------------------------------------------------------
(0 rows)
-- test that expressions on uncorrelated attributes are not pushed down
set columnar.qual_pushdown_correlation to default;
select * from coltest where x5 = 23484;
NOTICE: columnar planner: cannot push down clause: var attribute 5 is uncorrelated
NOTICE: columnar planner: adding CustomScan path for coltest
DETAIL: unparameterized; 0 clauses pushed down
id | x1 | x2 | x3 | x5
---------------------------------------------------------------------
(0 rows)
-- test that expressions on volatile functions are not pushed down
create function vol() returns int language plpgsql as $$
BEGIN
RETURN 1;
END;
$$;
select * from coltest where x3 = vol();
NOTICE: columnar planner: cannot push down clause: expr contains volatile functions
NOTICE: columnar planner: adding CustomScan path for coltest
DETAIL: unparameterized; 0 clauses pushed down
id | x1 | x2 | x3 | x5
---------------------------------------------------------------------
(0 rows)
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT * FROM coltest c1 WHERE ceil(x1) > 4222;
NOTICE: columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
HINT: Var must only reference this rel, and Expr must not reference this rel
NOTICE: columnar planner: adding CustomScan path for c1
DETAIL: unparameterized; 0 clauses pushed down
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (ColumnarScan) on coltest c1 (actual rows=19577 loops=1)
Filter: (ceil((x1)::double precision) > '4222'::double precision)
Rows Removed by Filter: 423
Columnar Projected Columns: id, x1, x2, x3, x5
(4 rows)
set columnar.planner_debug_level to default;
--
-- https://github.com/citusdata/citus/issues/4488
--
create table columnar_prepared_stmt (x int, y int) using columnar;
insert into columnar_prepared_stmt select s, s from generate_series(1,5000000) s;
prepare foo (int) as select x from columnar_prepared_stmt where x = $1;
execute foo(3);
x
---------------------------------------------------------------------
3
(1 row)
execute foo(3);
x
---------------------------------------------------------------------
3
(1 row)
execute foo(3);
x
---------------------------------------------------------------------
3
(1 row)
execute foo(3);
x
---------------------------------------------------------------------
3
(1 row)
select filtered_row_count('execute foo(3)');
filtered_row_count
---------------------------------------------------------------------
9999
(1 row)
select filtered_row_count('execute foo(3)');
filtered_row_count
---------------------------------------------------------------------
9999
(1 row)
select filtered_row_count('execute foo(3)');
filtered_row_count
---------------------------------------------------------------------
9999
(1 row)
select filtered_row_count('execute foo(3)');
filtered_row_count
---------------------------------------------------------------------
9999
(1 row)
drop table columnar_prepared_stmt;

View File

@ -0,0 +1,748 @@
--
-- Test chunk filtering in columnar using min/max values in stripe skip lists.
--
--
-- filtered_row_count returns number of rows filtered by the WHERE clause.
-- If chunks get filtered by columnar, less rows are passed to WHERE
-- clause, so this function should return a lower number.
--
CREATE OR REPLACE FUNCTION filtered_row_count (query text) RETURNS bigint AS
$$
DECLARE
result bigint;
rec text;
BEGIN
result := 0;
FOR rec IN EXECUTE 'EXPLAIN ANALYZE ' || query LOOP
IF rec ~ '^\s+Rows Removed by Filter' then
result := regexp_replace(rec, '[^0-9]*', '', 'g');
END IF;
END LOOP;
RETURN result;
END;
$$ LANGUAGE PLPGSQL;
set columnar.qual_pushdown_correlation = 0.0;
-- Create and load data
-- chunk_group_row_limit '1000', stripe_row_limit '2000'
set columnar.stripe_row_limit = 2000;
set columnar.chunk_group_row_limit = 1000;
CREATE TABLE test_chunk_filtering (a int)
USING columnar;
INSERT INTO test_chunk_filtering SELECT generate_series(1,10000);
-- Verify that filtered_row_count is less than 1000 for the following queries
SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering');
filtered_row_count
---------------------------------------------------------------------
0
(1 row)
SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering WHERE a < 200');
filtered_row_count
---------------------------------------------------------------------
801
(1 row)
SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering WHERE a > 200');
filtered_row_count
---------------------------------------------------------------------
200
(1 row)
SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering WHERE a < 9900');
filtered_row_count
---------------------------------------------------------------------
101
(1 row)
SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering WHERE a > 9900');
filtered_row_count
---------------------------------------------------------------------
900
(1 row)
SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering WHERE a < 0');
filtered_row_count
---------------------------------------------------------------------
0
(1 row)
-- Verify that filtered_row_count is less than 2000 for the following queries
SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering WHERE a BETWEEN 1 AND 10');
filtered_row_count
---------------------------------------------------------------------
990
(1 row)
SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering WHERE a BETWEEN 990 AND 2010');
filtered_row_count
---------------------------------------------------------------------
1979
(1 row)
SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering WHERE a BETWEEN -10 AND 0');
filtered_row_count
---------------------------------------------------------------------
0
(1 row)
-- Load data for second time and verify that filtered_row_count is exactly twice as before
INSERT INTO test_chunk_filtering SELECT generate_series(1,10000);
SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering WHERE a < 200');
filtered_row_count
---------------------------------------------------------------------
1602
(1 row)
SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering WHERE a < 0');
filtered_row_count
---------------------------------------------------------------------
0
(1 row)
SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering WHERE a BETWEEN 990 AND 2010');
filtered_row_count
---------------------------------------------------------------------
3958
(1 row)
set columnar.stripe_row_limit to default;
set columnar.chunk_group_row_limit to default;
-- Verify that we are fine with collations which use a different alphabet order
CREATE TABLE collation_chunk_filtering_test(A text collate "da_DK")
USING columnar;
COPY collation_chunk_filtering_test FROM STDIN;
SELECT * FROM collation_chunk_filtering_test WHERE A > 'B';
a
---------------------------------------------------------------------
Å
(1 row)
CREATE TABLE simple_chunk_filtering(i int) USING COLUMNAR;
INSERT INTO simple_chunk_filtering SELECT generate_series(0,234567);
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT * FROM simple_chunk_filtering WHERE i > 123456;
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (ColumnarScan) on simple_chunk_filtering (actual rows=111111 loops=1)
Filter: (i > 123456)
Rows Removed by Filter: 3457
Columnar Projected Columns: i
Columnar Chunk Group Filters: (i > 123456)
Columnar Chunk Groups Removed by Filter: 12
(6 rows)
SET columnar.enable_qual_pushdown = false;
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT * FROM simple_chunk_filtering WHERE i > 123456;
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (ColumnarScan) on simple_chunk_filtering (actual rows=111111 loops=1)
Filter: (i > 123456)
Rows Removed by Filter: 123457
Columnar Projected Columns: i
(4 rows)
SET columnar.enable_qual_pushdown TO DEFAULT;
-- https://github.com/citusdata/citus/issues/4555
TRUNCATE simple_chunk_filtering;
INSERT INTO simple_chunk_filtering SELECT generate_series(0,200000);
COPY (SELECT * FROM simple_chunk_filtering WHERE i > 180000) TO '/dev/null';
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT * FROM simple_chunk_filtering WHERE i > 180000;
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (ColumnarScan) on simple_chunk_filtering (actual rows=20000 loops=1)
Filter: (i > 180000)
Rows Removed by Filter: 1
Columnar Projected Columns: i
Columnar Chunk Group Filters: (i > 180000)
Columnar Chunk Groups Removed by Filter: 18
(6 rows)
DROP TABLE simple_chunk_filtering;
CREATE TABLE multi_column_chunk_filtering(a int, b int) USING columnar;
INSERT INTO multi_column_chunk_filtering SELECT i,i+1 FROM generate_series(0,234567) i;
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT count(*) FROM multi_column_chunk_filtering WHERE a > 50000;
QUERY PLAN
---------------------------------------------------------------------
Aggregate (actual rows=1 loops=1)
-> Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=184567 loops=1)
Filter: (a > 50000)
Rows Removed by Filter: 1
Columnar Projected Columns: a
Columnar Chunk Group Filters: (a > 50000)
Columnar Chunk Groups Removed by Filter: 5
(7 rows)
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT count(*) FROM multi_column_chunk_filtering WHERE a > 50000 AND b > 50000;
QUERY PLAN
---------------------------------------------------------------------
Aggregate (actual rows=1 loops=1)
-> Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=184567 loops=1)
Filter: ((a > 50000) AND (b > 50000))
Rows Removed by Filter: 1
Columnar Projected Columns: a, b
Columnar Chunk Group Filters: ((a > 50000) AND (b > 50000))
Columnar Chunk Groups Removed by Filter: 5
(7 rows)
-- make next tests faster
TRUNCATE multi_column_chunk_filtering;
INSERT INTO multi_column_chunk_filtering SELECT generate_series(0,5);
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT b FROM multi_column_chunk_filtering WHERE a > 50000 AND b > 50000;
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=0 loops=1)
Filter: ((a > 50000) AND (b > 50000))
Columnar Projected Columns: a, b
Columnar Chunk Group Filters: ((a > 50000) AND (b > 50000))
Columnar Chunk Groups Removed by Filter: 1
(5 rows)
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT b, a FROM multi_column_chunk_filtering WHERE b > 50000;
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=0 loops=1)
Filter: (b > 50000)
Rows Removed by Filter: 6
Columnar Projected Columns: a, b
Columnar Chunk Group Filters: (b > 50000)
Columnar Chunk Groups Removed by Filter: 0
(6 rows)
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT FROM multi_column_chunk_filtering WHERE a > 50000;
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=0 loops=1)
Filter: (a > 50000)
Columnar Projected Columns: a
Columnar Chunk Group Filters: (a > 50000)
Columnar Chunk Groups Removed by Filter: 1
(5 rows)
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT FROM multi_column_chunk_filtering;
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=6 loops=1)
Columnar Projected Columns: <columnar optimized out all columns>
(2 rows)
BEGIN;
ALTER TABLE multi_column_chunk_filtering DROP COLUMN a;
ALTER TABLE multi_column_chunk_filtering DROP COLUMN b;
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT * FROM multi_column_chunk_filtering;
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=6 loops=1)
Columnar Projected Columns: <columnar optimized out all columns>
(2 rows)
ROLLBACK;
CREATE TABLE another_columnar_table(x int, y int) USING columnar;
INSERT INTO another_columnar_table SELECT generate_series(0,5);
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT a, y FROM multi_column_chunk_filtering, another_columnar_table WHERE x > 1;
QUERY PLAN
---------------------------------------------------------------------
Nested Loop (actual rows=24 loops=1)
-> Custom Scan (ColumnarScan) on another_columnar_table (actual rows=4 loops=1)
Filter: (x > 1)
Rows Removed by Filter: 2
Columnar Projected Columns: x, y
Columnar Chunk Group Filters: (x > 1)
Columnar Chunk Groups Removed by Filter: 0
-> Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=6 loops=4)
Columnar Projected Columns: a
(9 rows)
EXPLAIN (costs off, timing off, summary off)
SELECT y, * FROM another_columnar_table;
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (ColumnarScan) on another_columnar_table
Columnar Projected Columns: x, y
(2 rows)
EXPLAIN (costs off, timing off, summary off)
SELECT *, x FROM another_columnar_table;
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (ColumnarScan) on another_columnar_table
Columnar Projected Columns: x, y
(2 rows)
EXPLAIN (costs off, timing off, summary off)
SELECT y, another_columnar_table FROM another_columnar_table;
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (ColumnarScan) on another_columnar_table
Columnar Projected Columns: x, y
(2 rows)
EXPLAIN (costs off, timing off, summary off)
SELECT another_columnar_table, x FROM another_columnar_table;
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (ColumnarScan) on another_columnar_table
Columnar Projected Columns: x, y
(2 rows)
DROP TABLE multi_column_chunk_filtering, another_columnar_table;
--
-- https://github.com/citusdata/citus/issues/4780
--
create table part_table (id int) partition by range (id);
create table part_1_row partition of part_table for values from (150000) to (160000);
create table part_2_columnar partition of part_table for values from (0) to (150000) using columnar;
insert into part_table select generate_series(1,159999);
select filtered_row_count('select count(*) from part_table where id > 75000');
filtered_row_count
---------------------------------------------------------------------
5000
(1 row)
drop table part_table;
-- test join parameterization
set columnar.stripe_row_limit = 2000;
set columnar.chunk_group_row_limit = 1000;
create table r1(id1 int, n1 int); -- row
create table r2(id2 int, n2 int); -- row
create table r3(id3 int, n3 int); -- row
create table r4(id4 int, n4 int); -- row
create table r5(id5 int, n5 int); -- row
create table r6(id6 int, n6 int); -- row
create table r7(id7 int, n7 int); -- row
create table coltest(id int, x1 int, x2 int, x3 int) using columnar;
create table coltest_part(id int, x1 int, x2 int, x3 int)
partition by range (id);
create table coltest_part0
partition of coltest_part for values from (0) to (10000)
using columnar;
create table coltest_part1
partition of coltest_part for values from (10000) to (20000); -- row
set columnar.stripe_row_limit to default;
set columnar.chunk_group_row_limit to default;
insert into r1 values(1234, 12350);
insert into r1 values(4567, 45000);
insert into r1 values(9101, 176000);
insert into r1 values(14202, 7);
insert into r1 values(18942, 189430);
insert into r2 values(1234, 123502);
insert into r2 values(4567, 450002);
insert into r2 values(9101, 1760002);
insert into r2 values(14202, 72);
insert into r2 values(18942, 1894302);
insert into r3 values(1234, 1235075);
insert into r3 values(4567, 4500075);
insert into r3 values(9101, 17600075);
insert into r3 values(14202, 775);
insert into r3 values(18942, 18943075);
insert into r4 values(1234, -1);
insert into r5 values(1234, -1);
insert into r6 values(1234, -1);
insert into r7 values(1234, -1);
insert into coltest
select g, g*10, g*100, g*1000 from generate_series(0, 19999) g;
insert into coltest_part
select g, g*10, g*100, g*1000 from generate_series(0, 19999) g;
ANALYZE r1, r2, r3, coltest, coltest_part;
-- force nested loop
set enable_mergejoin=false;
set enable_hashjoin=false;
set enable_material=false;
-- test different kinds of expressions
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT * FROM r1, coltest WHERE
id1 = id AND x1 > 15000 AND x1::text > '000000' AND n1 % 10 = 0;
QUERY PLAN
---------------------------------------------------------------------
Nested Loop (actual rows=3 loops=1)
-> Seq Scan on r1 (actual rows=4 loops=1)
Filter: ((n1 % 10) = 0)
Rows Removed by Filter: 1
-> Custom Scan (ColumnarScan) on coltest (actual rows=1 loops=4)
Filter: ((x1 > 15000) AND (r1.id1 = id) AND ((x1)::text > '000000'::text))
Rows Removed by Filter: 999
Columnar Projected Columns: id, x1, x2, x3
Columnar Chunk Group Filters: ((x1 > 15000) AND (r1.id1 = id))
Columnar Chunk Groups Removed by Filter: 19
(10 rows)
SELECT * FROM r1, coltest WHERE
id1 = id AND x1 > 15000 AND x1::text > '000000' AND n1 % 10 = 0;
id1 | n1 | id | x1 | x2 | x3
---------------------------------------------------------------------
4567 | 45000 | 4567 | 45670 | 456700 | 4567000
9101 | 176000 | 9101 | 91010 | 910100 | 9101000
18942 | 189430 | 18942 | 189420 | 1894200 | 18942000
(3 rows)
-- test equivalence classes
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT * FROM r1, r2, r3, r4, r5, r6, r7, coltest WHERE
id = id1 AND id1 = id2 AND id2 = id3 AND id3 = id4 AND
id4 = id5 AND id5 = id6 AND id6 = id7;
QUERY PLAN
---------------------------------------------------------------------
Nested Loop (actual rows=1 loops=1)
Join Filter: (coltest.id = r7.id7)
-> Nested Loop (actual rows=1 loops=1)
Join Filter: (coltest.id = r6.id6)
-> Nested Loop (actual rows=1 loops=1)
Join Filter: (coltest.id = r5.id5)
-> Nested Loop (actual rows=1 loops=1)
Join Filter: (coltest.id = r4.id4)
Rows Removed by Join Filter: 4
-> Nested Loop (actual rows=5 loops=1)
-> Nested Loop (actual rows=5 loops=1)
Join Filter: (r1.id1 = r3.id3)
Rows Removed by Join Filter: 20
-> Nested Loop (actual rows=5 loops=1)
Join Filter: (r1.id1 = r2.id2)
Rows Removed by Join Filter: 20
-> Seq Scan on r1 (actual rows=5 loops=1)
-> Seq Scan on r2 (actual rows=5 loops=5)
-> Seq Scan on r3 (actual rows=5 loops=5)
-> Custom Scan (ColumnarScan) on coltest (actual rows=1 loops=5)
Filter: (r1.id1 = id)
Rows Removed by Filter: 999
Columnar Projected Columns: id, x1, x2, x3
Columnar Chunk Group Filters: (r1.id1 = id)
Columnar Chunk Groups Removed by Filter: 19
-> Seq Scan on r4 (actual rows=1 loops=5)
-> Seq Scan on r5 (actual rows=1 loops=1)
-> Seq Scan on r6 (actual rows=1 loops=1)
-> Seq Scan on r7 (actual rows=1 loops=1)
(29 rows)
SELECT * FROM r1, r2, r3, r4, r5, r6, r7, coltest WHERE
id = id1 AND id1 = id2 AND id2 = id3 AND id3 = id4 AND
id4 = id5 AND id5 = id6 AND id6 = id7;
id1 | n1 | id2 | n2 | id3 | n3 | id4 | n4 | id5 | n5 | id6 | n6 | id7 | n7 | id | x1 | x2 | x3
---------------------------------------------------------------------
1234 | 12350 | 1234 | 123502 | 1234 | 1235075 | 1234 | -1 | 1234 | -1 | 1234 | -1 | 1234 | -1 | 1234 | 12340 | 123400 | 1234000
(1 row)
-- test path generation with different thresholds
set columnar.planner_debug_level = 'notice';
set columnar.max_custom_scan_paths to 10;
EXPLAIN (costs off, timing off, summary off)
SELECT * FROM coltest c1, coltest c2, coltest c3, coltest c4 WHERE
c1.id = c2.id and c1.id = c3.id and c1.id = c4.id;
NOTICE: columnar planner: adding CustomScan path for c1
DETAIL: unparameterized; 0 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c1
DETAIL: parameterized by rels {c2}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c1
DETAIL: parameterized by rels {c2, c3}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c1
DETAIL: parameterized by rels {c2, c3, c4}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c1
DETAIL: parameterized by rels {c2, c4}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c1
DETAIL: parameterized by rels {c3}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c1
DETAIL: parameterized by rels {c3, c4}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c1
DETAIL: parameterized by rels {c4}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c2
DETAIL: unparameterized; 0 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c2
DETAIL: parameterized by rels {c1}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c2
DETAIL: parameterized by rels {c1, c3}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c2
DETAIL: parameterized by rels {c1, c3, c4}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c2
DETAIL: parameterized by rels {c1, c4}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c2
DETAIL: parameterized by rels {c3}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c2
DETAIL: parameterized by rels {c3, c4}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c2
DETAIL: parameterized by rels {c4}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c3
DETAIL: unparameterized; 0 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c3
DETAIL: parameterized by rels {c1}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c3
DETAIL: parameterized by rels {c1, c2}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c3
DETAIL: parameterized by rels {c1, c2, c4}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c3
DETAIL: parameterized by rels {c1, c4}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c3
DETAIL: parameterized by rels {c2}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c3
DETAIL: parameterized by rels {c2, c4}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c3
DETAIL: parameterized by rels {c4}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c4
DETAIL: unparameterized; 0 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c4
DETAIL: parameterized by rels {c1}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c4
DETAIL: parameterized by rels {c1, c2}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c4
DETAIL: parameterized by rels {c1, c2, c3}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c4
DETAIL: parameterized by rels {c1, c3}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c4
DETAIL: parameterized by rels {c2}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c4
DETAIL: parameterized by rels {c2, c3}; 1 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c4
DETAIL: parameterized by rels {c3}; 1 clauses pushed down
QUERY PLAN
---------------------------------------------------------------------
Nested Loop
-> Nested Loop
-> Nested Loop
-> Custom Scan (ColumnarScan) on coltest c1
Columnar Projected Columns: id, x1, x2, x3
-> Custom Scan (ColumnarScan) on coltest c2
Filter: (c1.id = id)
Columnar Projected Columns: id, x1, x2, x3
Columnar Chunk Group Filters: (c1.id = id)
-> Custom Scan (ColumnarScan) on coltest c3
Filter: (c1.id = id)
Columnar Projected Columns: id, x1, x2, x3
Columnar Chunk Group Filters: (c1.id = id)
-> Custom Scan (ColumnarScan) on coltest c4
Filter: (c1.id = id)
Columnar Projected Columns: id, x1, x2, x3
Columnar Chunk Group Filters: (c1.id = id)
(17 rows)
set columnar.max_custom_scan_paths to 2;
EXPLAIN (costs off, timing off, summary off)
SELECT * FROM coltest c1, coltest c2, coltest c3, coltest c4 WHERE
c1.id = c2.id and c1.id = c3.id and c1.id = c4.id;
NOTICE: columnar planner: adding CustomScan path for c1
DETAIL: unparameterized; 0 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c2
DETAIL: unparameterized; 0 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c3
DETAIL: unparameterized; 0 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for c4
DETAIL: unparameterized; 0 clauses pushed down
QUERY PLAN
---------------------------------------------------------------------
Nested Loop
Join Filter: (c1.id = c4.id)
-> Nested Loop
Join Filter: (c1.id = c3.id)
-> Nested Loop
Join Filter: (c1.id = c2.id)
-> Custom Scan (ColumnarScan) on coltest c1
Columnar Projected Columns: id, x1, x2, x3
-> Custom Scan (ColumnarScan) on coltest c2
Columnar Projected Columns: id, x1, x2, x3
-> Custom Scan (ColumnarScan) on coltest c3
Columnar Projected Columns: id, x1, x2, x3
-> Custom Scan (ColumnarScan) on coltest c4
Columnar Projected Columns: id, x1, x2, x3
(14 rows)
set columnar.max_custom_scan_paths to default;
set columnar.planner_debug_level to default;
-- test more complex parameterization
set columnar.planner_debug_level = 'notice';
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT * FROM r1, r2, r3, coltest WHERE
id1 = id2 AND id2 = id3 AND id3 = id AND
n1 > x1 AND n2 > x2 AND n3 > x3;
NOTICE: columnar planner: adding CustomScan path for coltest
DETAIL: unparameterized; 0 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for coltest
DETAIL: parameterized by rels {r1}; 2 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for coltest
DETAIL: parameterized by rels {r1, r2}; 3 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for coltest
DETAIL: parameterized by rels {r1, r2, r3}; 4 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for coltest
DETAIL: parameterized by rels {r1, r3}; 3 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for coltest
DETAIL: parameterized by rels {r2}; 2 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for coltest
DETAIL: parameterized by rels {r2, r3}; 3 clauses pushed down
NOTICE: columnar planner: adding CustomScan path for coltest
DETAIL: parameterized by rels {r3}; 2 clauses pushed down
QUERY PLAN
---------------------------------------------------------------------
Nested Loop (actual rows=3 loops=1)
Join Filter: ((r3.n3 > coltest.x3) AND (r1.id1 = r3.id3))
Rows Removed by Join Filter: 12
-> Nested Loop (actual rows=3 loops=1)
Join Filter: ((r2.n2 > coltest.x2) AND (r1.id1 = r2.id2))
Rows Removed by Join Filter: 12
-> Nested Loop (actual rows=3 loops=1)
-> Seq Scan on r1 (actual rows=5 loops=1)
-> Custom Scan (ColumnarScan) on coltest (actual rows=1 loops=5)
Filter: ((r1.n1 > x1) AND (r1.id1 = id))
Rows Removed by Filter: 799
Columnar Projected Columns: id, x1, x2, x3
Columnar Chunk Group Filters: ((r1.n1 > x1) AND (r1.id1 = id))
Columnar Chunk Groups Removed by Filter: 19
-> Seq Scan on r2 (actual rows=5 loops=3)
-> Seq Scan on r3 (actual rows=5 loops=3)
(16 rows)
set columnar.planner_debug_level to default;
SELECT * FROM r1, r2, r3, coltest WHERE
id1 = id2 AND id2 = id3 AND id3 = id AND
n1 > x1 AND n2 > x2 AND n3 > x3;
id1 | n1 | id2 | n2 | id3 | n3 | id | x1 | x2 | x3
---------------------------------------------------------------------
1234 | 12350 | 1234 | 123502 | 1234 | 1235075 | 1234 | 12340 | 123400 | 1234000
9101 | 176000 | 9101 | 1760002 | 9101 | 17600075 | 9101 | 91010 | 910100 | 9101000
18942 | 189430 | 18942 | 1894302 | 18942 | 18943075 | 18942 | 189420 | 1894200 | 18942000
(3 rows)
-- test partitioning parameterization
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT * FROM r1, coltest_part WHERE
id1 = id AND n1 > x1;
QUERY PLAN
---------------------------------------------------------------------
Nested Loop (actual rows=3 loops=1)
-> Seq Scan on r1 (actual rows=5 loops=1)
-> Append (actual rows=1 loops=5)
-> Custom Scan (ColumnarScan) on coltest_part0 (actual rows=1 loops=3)
Filter: ((r1.n1 > x1) AND (r1.id1 = id))
Rows Removed by Filter: 999
Columnar Projected Columns: id, x1, x2, x3
Columnar Chunk Group Filters: ((r1.n1 > x1) AND (r1.id1 = id))
Columnar Chunk Groups Removed by Filter: 9
-> Seq Scan on coltest_part1 (actual rows=0 loops=2)
Filter: ((r1.n1 > x1) AND (r1.id1 = id))
Rows Removed by Filter: 10000
(12 rows)
SELECT * FROM r1, coltest_part WHERE
id1 = id AND n1 > x1;
id1 | n1 | id | x1 | x2 | x3
---------------------------------------------------------------------
1234 | 12350 | 1234 | 12340 | 123400 | 1234000
9101 | 176000 | 9101 | 91010 | 910100 | 9101000
18942 | 189430 | 18942 | 189420 | 1894200 | 18942000
(3 rows)
set enable_mergejoin to default;
set enable_hashjoin to default;
set enable_material to default;
set columnar.planner_debug_level = 'notice';
alter table coltest add column x5 int default (random()*20000)::int;
analyze coltest;
-- test that expressions on whole-row references are not pushed down
select * from coltest where coltest = (1,1,1,1);
NOTICE: columnar planner: cannot push down clause: var is whole-row reference
NOTICE: columnar planner: adding CustomScan path for coltest
DETAIL: unparameterized; 0 clauses pushed down
id | x1 | x2 | x3 | x5
---------------------------------------------------------------------
(0 rows)
-- test that expressions on uncorrelated attributes are not pushed down
set columnar.qual_pushdown_correlation to default;
select * from coltest where x5 = 23484;
NOTICE: columnar planner: cannot push down clause: var attribute 5 is uncorrelated
NOTICE: columnar planner: adding CustomScan path for coltest
DETAIL: unparameterized; 0 clauses pushed down
id | x1 | x2 | x3 | x5
---------------------------------------------------------------------
(0 rows)
-- test that expressions on volatile functions are not pushed down
create function vol() returns int language plpgsql as $$
BEGIN
RETURN 1;
END;
$$;
select * from coltest where x3 = vol();
NOTICE: columnar planner: cannot push down clause: expr contains volatile functions
NOTICE: columnar planner: adding CustomScan path for coltest
DETAIL: unparameterized; 0 clauses pushed down
id | x1 | x2 | x3 | x5
---------------------------------------------------------------------
(0 rows)
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT * FROM coltest c1 WHERE ceil(x1) > 4222;
NOTICE: columnar planner: cannot push down clause: must match 'Var <op> Expr' or 'Expr <op> Var'
HINT: Var must only reference this rel, and Expr must not reference this rel
NOTICE: columnar planner: adding CustomScan path for c1
DETAIL: unparameterized; 0 clauses pushed down
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (ColumnarScan) on coltest c1 (actual rows=19577 loops=1)
Filter: (ceil((x1)::double precision) > '4222'::double precision)
Rows Removed by Filter: 423
Columnar Projected Columns: id, x1, x2, x3, x5
(4 rows)
set columnar.planner_debug_level to default;
--
-- https://github.com/citusdata/citus/issues/4488
--
create table columnar_prepared_stmt (x int, y int) using columnar;
insert into columnar_prepared_stmt select s, s from generate_series(1,5000000) s;
prepare foo (int) as select x from columnar_prepared_stmt where x = $1;
execute foo(3);
x
---------------------------------------------------------------------
3
(1 row)
execute foo(3);
x
---------------------------------------------------------------------
3
(1 row)
execute foo(3);
x
---------------------------------------------------------------------
3
(1 row)
execute foo(3);
x
---------------------------------------------------------------------
3
(1 row)
select filtered_row_count('execute foo(3)');
filtered_row_count
---------------------------------------------------------------------
9999
(1 row)
select filtered_row_count('execute foo(3)');
filtered_row_count
---------------------------------------------------------------------
9999
(1 row)
select filtered_row_count('execute foo(3)');
filtered_row_count
---------------------------------------------------------------------
9999
(1 row)
select filtered_row_count('execute foo(3)');
filtered_row_count
---------------------------------------------------------------------
9999
(1 row)
drop table columnar_prepared_stmt;

View File

@ -10,9 +10,10 @@ EXPLAIN (analyze on, costs off, timing off, summary off) SELECT * FROM test_curs
Custom Scan (ColumnarScan) on test_cursor (actual rows=101 loops=1)
Filter: (a = 25)
Rows Removed by Filter: 9899
Columnar Chunk Groups Removed by Filter: 1
Columnar Projected Columns: a, b
(5 rows)
Columnar Chunk Group Filters: (a = 25)
Columnar Chunk Groups Removed by Filter: 1
(6 rows)
BEGIN;
DECLARE a_25 SCROLL CURSOR
@ -112,9 +113,10 @@ EXPLAIN (analyze on, costs off, timing off, summary off) SELECT * FROM test_curs
Custom Scan (ColumnarScan) on test_cursor (actual rows=7575 loops=1)
Filter: (a > 25)
Rows Removed by Filter: 2626
Columnar Chunk Groups Removed by Filter: 0
Columnar Projected Columns: a, b
(5 rows)
Columnar Chunk Group Filters: (a > 25)
Columnar Chunk Groups Removed by Filter: 0
(6 rows)
BEGIN;
DECLARE a_25 SCROLL CURSOR

View File

@ -23,7 +23,7 @@ SELECT count(*)
FROM users
JOIN things ON (users.id = things.user_id)
WHERE things.id > 299990;
QUERY PLAN
QUERY PLAN
---------------------------------------------------------------------
Aggregate
-> Nested Loop
@ -31,9 +31,10 @@ WHERE things.id > 299990;
-> Custom Scan (ColumnarScan) on things
Filter: (id > 299990)
Columnar Projected Columns: id, user_id
Columnar Chunk Group Filters: (id > 299990)
-> Custom Scan (ColumnarScan) on users
Columnar Projected Columns: id
(8 rows)
(9 rows)
EXPLAIN (COSTS OFF)
SELECT u1.id, u2.id, COUNT(u2.*)
@ -41,7 +42,7 @@ FROM users u1
JOIN users u2 ON (u1.id::text = u2.name)
WHERE u2.id > 299990
GROUP BY u1.id, u2.id;
QUERY PLAN
QUERY PLAN
---------------------------------------------------------------------
HashAggregate
Group Key: u1.id, u2.id
@ -50,9 +51,10 @@ GROUP BY u1.id, u2.id;
-> Custom Scan (ColumnarScan) on users u2
Filter: (id > 299990)
Columnar Projected Columns: id, name
Columnar Chunk Group Filters: (id > 299990)
-> Custom Scan (ColumnarScan) on users u1
Columnar Projected Columns: id
(9 rows)
(10 rows)
SET client_min_messages TO warning;
DROP SCHEMA am_columnar_join CASCADE;

View File

@ -132,20 +132,22 @@ ANALYZE parent;
-- scan on heap partition
EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent
WHERE ts > '2020-02-20' AND n < 5;
QUERY PLAN
QUERY PLAN
---------------------------------------------------------------------
Aggregate
-> Append
-> Custom Scan (ColumnarScan) on p1 parent_1
Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
Columnar Projected Columns: ts, i, n
Columnar Chunk Group Filters: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
-> Index Scan using p2_n_idx on p2 parent_2
Index Cond: (n < '5'::numeric)
Filter: (ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone)
-> Custom Scan (ColumnarScan) on p3 parent_3
Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
Columnar Projected Columns: ts, i, n
(11 rows)
Columnar Chunk Group Filters: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
(13 rows)
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';

View File

@ -132,20 +132,22 @@ ANALYZE parent;
-- scan on heap partition
EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent
WHERE ts > '2020-02-20' AND n < 5;
QUERY PLAN
QUERY PLAN
---------------------------------------------------------------------
Aggregate
-> Append
-> Custom Scan (ColumnarScan) on p1
Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
Columnar Projected Columns: ts, i, n
Columnar Chunk Group Filters: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
-> Index Scan using p2_n_idx on p2
Index Cond: (n < '5'::numeric)
Filter: (ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone)
-> Custom Scan (ColumnarScan) on p3
Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
Columnar Projected Columns: ts, i, n
(11 rows)
Columnar Chunk Group Filters: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric))
(13 rows)
BEGIN;
SET LOCAL columnar.enable_custom_scan TO 'OFF';

View File

@ -4,6 +4,8 @@ CREATE TABLE full_correlated (a int, b text, c int, d int) USING columnar;
INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000000) i;
CREATE INDEX full_correlated_btree ON full_correlated (a);
ANALYZE full_correlated;
-- Prevent qual pushdown from competing with index scans.
SET columnar.enable_qual_pushdown = false;
SELECT columnar_test_helpers.uses_index_scan (
$$
SELECT a FROM full_correlated WHERE a=200;
@ -572,5 +574,54 @@ BEGIN;
(1 row)
ROLLBACK;
SET columnar.enable_qual_pushdown TO DEFAULT;
BEGIN;
SET LOCAL columnar.stripe_row_limit = 2000;
SET LOCAL columnar.chunk_group_row_limit = 1000;
CREATE TABLE correlated(x int) using columnar;
INSERT INTO correlated
SELECT g FROM generate_series(1,100000) g;
CREATE TABLE uncorrelated(x int) using columnar;
INSERT INTO uncorrelated
SELECT (g * 19) % 100000 FROM generate_series(1,100000) g;
COMMIT;
CREATE INDEX correlated_idx ON correlated(x);
CREATE INDEX uncorrelated_idx ON uncorrelated(x);
ANALYZE correlated, uncorrelated;
-- should choose chunk group filtering; selective and correlated
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT * FROM correlated WHERE x = 78910;
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (ColumnarScan) on correlated (actual rows=1 loops=1)
Filter: (x = 78910)
Rows Removed by Filter: 999
Columnar Projected Columns: x
Columnar Chunk Group Filters: (x = 78910)
Columnar Chunk Groups Removed by Filter: 99
(6 rows)
SELECT * FROM correlated WHERE x = 78910;
x
---------------------------------------------------------------------
78910
(1 row)
-- should choose index scan; selective but uncorrelated
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT * FROM uncorrelated WHERE x = 78910;
QUERY PLAN
---------------------------------------------------------------------
Index Only Scan using uncorrelated_idx on uncorrelated (actual rows=1 loops=1)
Index Cond: (x = 78910)
Heap Fetches: 1
(3 rows)
SELECT * FROM uncorrelated WHERE x = 78910;
x
---------------------------------------------------------------------
78910
(1 row)
SET client_min_messages TO WARNING;
DROP SCHEMA columnar_paths CASCADE;

View File

@ -139,12 +139,13 @@ explain (costs off, summary off) select * from
-> Custom Scan (ColumnarScan) on int8_tbl_columnar a
Columnar Projected Columns: q1, q2
-> Nested Loop
-> Custom Scan (ColumnarScan) on int8_tbl_columnar c
Columnar Projected Columns: q1
-> Custom Scan (ColumnarScan) on int8_tbl_columnar b
Filter: (a.q2 = q1)
Columnar Projected Columns: q1
-> Custom Scan (ColumnarScan) on int8_tbl_columnar c
Columnar Projected Columns: q1
(9 rows)
Columnar Chunk Group Filters: (a.q2 = q1)
(10 rows)
explain (costs off, summary off)
SELECT COUNT(*) FROM INT8_TBL_columnar t1 JOIN

View File

@ -260,7 +260,7 @@ EXPLAIN (ANALYZE true, COSTS off, TIMING off, SUMMARY off) EXECUTE p1(20);
(2 rows)
SELECT * FROM t ORDER BY a;
a | b
a | b
---------------------------------------------------------------------
1 | 2
2 | 4
@ -327,12 +327,13 @@ SELECT * FROM t ORDER BY a;
-- SELECT with 0 params
PREPARE p3 AS SELECT * FROM t WHERE a = 8;
EXPLAIN (COSTS OFF) EXECUTE p3;
QUERY PLAN
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (ColumnarScan) on t
Filter: (a = 8)
Columnar Projected Columns: a, b
(3 rows)
Columnar Chunk Group Filters: (a = 8)
(4 rows)
EXECUTE p3;
a | b
@ -347,9 +348,10 @@ EXPLAIN (ANALYZE true, COSTS off, TIMING off, SUMMARY off) EXECUTE p3;
Custom Scan (ColumnarScan) on t (actual rows=2 loops=1)
Filter: (a = 8)
Rows Removed by Filter: 2
Columnar Chunk Groups Removed by Filter: 8
Columnar Projected Columns: a, b
(5 rows)
Columnar Chunk Group Filters: (a = 8)
Columnar Chunk Groups Removed by Filter: 8
(6 rows)
SELECT * FROM t ORDER BY a;
a | b
@ -382,12 +384,13 @@ SELECT * FROM t ORDER BY a;
-- SELECT with 1 param
PREPARE p5(int) AS SELECT * FROM t WHERE a = $1;
EXPLAIN (COSTS OFF) EXECUTE p5(16);
QUERY PLAN
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (ColumnarScan) on t
Filter: (a = 16)
Columnar Projected Columns: a, b
(3 rows)
Columnar Chunk Group Filters: (a = 16)
(4 rows)
EXECUTE p5(16);
a | b
@ -400,9 +403,10 @@ EXPLAIN (ANALYZE true, COSTS off, TIMING off, SUMMARY off) EXECUTE p5(9);
Custom Scan (ColumnarScan) on t (actual rows=2 loops=1)
Filter: (a = 9)
Rows Removed by Filter: 2
Columnar Chunk Groups Removed by Filter: 8
Columnar Projected Columns: a, b
(5 rows)
Columnar Chunk Group Filters: (a = 9)
Columnar Chunk Groups Removed by Filter: 8
(6 rows)
SELECT * FROM t ORDER BY a;
a | b
@ -435,12 +439,13 @@ SELECT * FROM t ORDER BY a;
-- SELECT with >1 params
PREPARE p6(int, int) AS SELECT * FROM t WHERE a = $1+1 AND b = $2+1;
EXPLAIN (COSTS OFF) EXECUTE p6(30, 40);
QUERY PLAN
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (ColumnarScan) on t
Filter: ((a = 31) AND (b = 41))
Columnar Projected Columns: a, b
(3 rows)
Columnar Chunk Group Filters: ((a = 31) AND (b = 41))
(4 rows)
EXECUTE p6(30, 40);
a | b
@ -454,9 +459,10 @@ EXPLAIN (ANALYZE true, COSTS off, TIMING off, SUMMARY off) EXECUTE p6(50, 60);
Custom Scan (ColumnarScan) on t (actual rows=1 loops=1)
Filter: ((a = 51) AND (b = 61))
Rows Removed by Filter: 3
Columnar Chunk Groups Removed by Filter: 9
Columnar Projected Columns: a, b
(5 rows)
Columnar Chunk Group Filters: ((a = 51) AND (b = 61))
Columnar Chunk Groups Removed by Filter: 9
(6 rows)
SELECT * FROM t ORDER BY a;
a | b

View File

@ -26,6 +26,7 @@ $$
END;
$$ LANGUAGE PLPGSQL;
set columnar.qual_pushdown_correlation = 0.0;
-- Create and load data
-- chunk_group_row_limit '1000', stripe_row_limit '2000'
@ -152,3 +153,166 @@ create table part_2_columnar partition of part_table for values from (0) to (150
insert into part_table select generate_series(1,159999);
select filtered_row_count('select count(*) from part_table where id > 75000');
drop table part_table;
-- test join parameterization
set columnar.stripe_row_limit = 2000;
set columnar.chunk_group_row_limit = 1000;
create table r1(id1 int, n1 int); -- row
create table r2(id2 int, n2 int); -- row
create table r3(id3 int, n3 int); -- row
create table r4(id4 int, n4 int); -- row
create table r5(id5 int, n5 int); -- row
create table r6(id6 int, n6 int); -- row
create table r7(id7 int, n7 int); -- row
create table coltest(id int, x1 int, x2 int, x3 int) using columnar;
create table coltest_part(id int, x1 int, x2 int, x3 int)
partition by range (id);
create table coltest_part0
partition of coltest_part for values from (0) to (10000)
using columnar;
create table coltest_part1
partition of coltest_part for values from (10000) to (20000); -- row
set columnar.stripe_row_limit to default;
set columnar.chunk_group_row_limit to default;
insert into r1 values(1234, 12350);
insert into r1 values(4567, 45000);
insert into r1 values(9101, 176000);
insert into r1 values(14202, 7);
insert into r1 values(18942, 189430);
insert into r2 values(1234, 123502);
insert into r2 values(4567, 450002);
insert into r2 values(9101, 1760002);
insert into r2 values(14202, 72);
insert into r2 values(18942, 1894302);
insert into r3 values(1234, 1235075);
insert into r3 values(4567, 4500075);
insert into r3 values(9101, 17600075);
insert into r3 values(14202, 775);
insert into r3 values(18942, 18943075);
insert into r4 values(1234, -1);
insert into r5 values(1234, -1);
insert into r6 values(1234, -1);
insert into r7 values(1234, -1);
insert into coltest
select g, g*10, g*100, g*1000 from generate_series(0, 19999) g;
insert into coltest_part
select g, g*10, g*100, g*1000 from generate_series(0, 19999) g;
ANALYZE r1, r2, r3, coltest, coltest_part;
-- force nested loop
set enable_mergejoin=false;
set enable_hashjoin=false;
set enable_material=false;
-- test different kinds of expressions
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT * FROM r1, coltest WHERE
id1 = id AND x1 > 15000 AND x1::text > '000000' AND n1 % 10 = 0;
SELECT * FROM r1, coltest WHERE
id1 = id AND x1 > 15000 AND x1::text > '000000' AND n1 % 10 = 0;
-- test equivalence classes
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT * FROM r1, r2, r3, r4, r5, r6, r7, coltest WHERE
id = id1 AND id1 = id2 AND id2 = id3 AND id3 = id4 AND
id4 = id5 AND id5 = id6 AND id6 = id7;
SELECT * FROM r1, r2, r3, r4, r5, r6, r7, coltest WHERE
id = id1 AND id1 = id2 AND id2 = id3 AND id3 = id4 AND
id4 = id5 AND id5 = id6 AND id6 = id7;
-- test path generation with different thresholds
set columnar.planner_debug_level = 'notice';
set columnar.max_custom_scan_paths to 10;
EXPLAIN (costs off, timing off, summary off)
SELECT * FROM coltest c1, coltest c2, coltest c3, coltest c4 WHERE
c1.id = c2.id and c1.id = c3.id and c1.id = c4.id;
set columnar.max_custom_scan_paths to 2;
EXPLAIN (costs off, timing off, summary off)
SELECT * FROM coltest c1, coltest c2, coltest c3, coltest c4 WHERE
c1.id = c2.id and c1.id = c3.id and c1.id = c4.id;
set columnar.max_custom_scan_paths to default;
set columnar.planner_debug_level to default;
-- test more complex parameterization
set columnar.planner_debug_level = 'notice';
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT * FROM r1, r2, r3, coltest WHERE
id1 = id2 AND id2 = id3 AND id3 = id AND
n1 > x1 AND n2 > x2 AND n3 > x3;
set columnar.planner_debug_level to default;
SELECT * FROM r1, r2, r3, coltest WHERE
id1 = id2 AND id2 = id3 AND id3 = id AND
n1 > x1 AND n2 > x2 AND n3 > x3;
-- test partitioning parameterization
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT * FROM r1, coltest_part WHERE
id1 = id AND n1 > x1;
SELECT * FROM r1, coltest_part WHERE
id1 = id AND n1 > x1;
set enable_mergejoin to default;
set enable_hashjoin to default;
set enable_material to default;
set columnar.planner_debug_level = 'notice';
alter table coltest add column x5 int default (random()*20000)::int;
analyze coltest;
-- test that expressions on whole-row references are not pushed down
select * from coltest where coltest = (1,1,1,1);
-- test that expressions on uncorrelated attributes are not pushed down
set columnar.qual_pushdown_correlation to default;
select * from coltest where x5 = 23484;
-- test that expressions on volatile functions are not pushed down
create function vol() returns int language plpgsql as $$
BEGIN
RETURN 1;
END;
$$;
select * from coltest where x3 = vol();
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT * FROM coltest c1 WHERE ceil(x1) > 4222;
set columnar.planner_debug_level to default;
--
-- https://github.com/citusdata/citus/issues/4488
--
create table columnar_prepared_stmt (x int, y int) using columnar;
insert into columnar_prepared_stmt select s, s from generate_series(1,5000000) s;
prepare foo (int) as select x from columnar_prepared_stmt where x = $1;
execute foo(3);
execute foo(3);
execute foo(3);
execute foo(3);
select filtered_row_count('execute foo(3)');
select filtered_row_count('execute foo(3)');
select filtered_row_count('execute foo(3)');
select filtered_row_count('execute foo(3)');
drop table columnar_prepared_stmt;

View File

@ -6,6 +6,9 @@ INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000000) i
CREATE INDEX full_correlated_btree ON full_correlated (a);
ANALYZE full_correlated;
-- Prevent qual pushdown from competing with index scans.
SET columnar.enable_qual_pushdown = false;
SELECT columnar_test_helpers.uses_index_scan (
$$
SELECT a FROM full_correlated WHERE a=200;
@ -337,5 +340,35 @@ BEGIN;
);
ROLLBACK;
SET columnar.enable_qual_pushdown TO DEFAULT;
BEGIN;
SET LOCAL columnar.stripe_row_limit = 2000;
SET LOCAL columnar.chunk_group_row_limit = 1000;
CREATE TABLE correlated(x int) using columnar;
INSERT INTO correlated
SELECT g FROM generate_series(1,100000) g;
CREATE TABLE uncorrelated(x int) using columnar;
INSERT INTO uncorrelated
SELECT (g * 19) % 100000 FROM generate_series(1,100000) g;
COMMIT;
CREATE INDEX correlated_idx ON correlated(x);
CREATE INDEX uncorrelated_idx ON uncorrelated(x);
ANALYZE correlated, uncorrelated;
-- should choose chunk group filtering; selective and correlated
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT * FROM correlated WHERE x = 78910;
SELECT * FROM correlated WHERE x = 78910;
-- should choose index scan; selective but uncorrelated
EXPLAIN (analyze on, costs off, timing off, summary off)
SELECT * FROM uncorrelated WHERE x = 78910;
SELECT * FROM uncorrelated WHERE x = 78910;
SET client_min_messages TO WARNING;
DROP SCHEMA columnar_paths CASCADE;