mirror of https://github.com/citusdata/citus.git
389 lines
15 KiB
Plaintext
389 lines
15 KiB
Plaintext
--
|
|
-- Testing indexes on on columnar tables.
|
|
--
|
|
CREATE SCHEMA columnar_indexes;
|
|
SET search_path tO columnar_indexes, public;
|
|
--
|
|
-- create index with the concurrent option. We should
|
|
-- error out during index creation.
|
|
-- https://github.com/citusdata/citus/issues/4599
|
|
--
|
|
create table t(a int, b int) using columnar;
|
|
create index CONCURRENTLY t_idx on t(a, b);
|
|
ERROR: concurrent index commands are not supported for columnar tables
|
|
\d t
|
|
Table "columnar_indexes.t"
|
|
Column | Type | Collation | Nullable | Default
|
|
---------------------------------------------------------------------
|
|
a | integer | | |
|
|
b | integer | | |
|
|
|
|
explain insert into t values (1, 2);
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Insert on t (cost=0.00..0.01 rows=1 width=8)
|
|
-> Result (cost=0.00..0.01 rows=1 width=8)
|
|
(2 rows)
|
|
|
|
insert into t values (1, 2);
|
|
SELECT * FROM t;
|
|
a | b
|
|
---------------------------------------------------------------------
|
|
1 | 2
|
|
(1 row)
|
|
|
|
create index t_idx on t(a, b);
|
|
\d t
|
|
Table "columnar_indexes.t"
|
|
Column | Type | Collation | Nullable | Default
|
|
---------------------------------------------------------------------
|
|
a | integer | | |
|
|
b | integer | | |
|
|
Indexes:
|
|
"t_idx" btree (a, b)
|
|
|
|
explain insert into t values (1, 2);
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Insert on t (cost=0.00..0.01 rows=1 width=8)
|
|
-> Result (cost=0.00..0.01 rows=1 width=8)
|
|
(2 rows)
|
|
|
|
insert into t values (3, 4);
|
|
SELECT * FROM t;
|
|
a | b
|
|
---------------------------------------------------------------------
|
|
1 | 2
|
|
3 | 4
|
|
(2 rows)
|
|
|
|
-- make sure that we test index scan
|
|
set columnar.enable_custom_scan to 'off';
|
|
set enable_seqscan to off;
|
|
CREATE table columnar_table (a INT, b int) USING columnar;
|
|
INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(0, 16000) i;
|
|
-- unique --
|
|
BEGIN;
|
|
INSERT INTO columnar_table VALUES (100000000);
|
|
SAVEPOINT s1;
|
|
-- errors out due to unflushed data in upper transaction
|
|
CREATE UNIQUE INDEX ON columnar_table (a);
|
|
ERROR: cannot read from table when there is unflushed data in upper transactions
|
|
ROLLBACK;
|
|
CREATE UNIQUE INDEX ON columnar_table (a);
|
|
BEGIN;
|
|
INSERT INTO columnar_table VALUES (16050);
|
|
SAVEPOINT s1;
|
|
-- index scan errors out due to unflushed data in upper transaction
|
|
SELECT a FROM columnar_table WHERE a = 16050;
|
|
ERROR: cannot read from index when there is unflushed data in upper transactions
|
|
ROLLBACK;
|
|
EXPLAIN (COSTS OFF) SELECT * FROM columnar_table WHERE a=6456;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Index Scan using columnar_table_a_idx on columnar_table
|
|
Index Cond: (a = 6456)
|
|
(2 rows)
|
|
|
|
EXPLAIN (COSTS OFF) SELECT a FROM columnar_table WHERE a=6456;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Index Only Scan using columnar_table_a_idx on columnar_table
|
|
Index Cond: (a = 6456)
|
|
(2 rows)
|
|
|
|
SELECT (SELECT a FROM columnar_table WHERE a=6456 limit 1)=6456;
|
|
?column?
|
|
---------------------------------------------------------------------
|
|
t
|
|
(1 row)
|
|
|
|
SELECT (SELECT b FROM columnar_table WHERE a=6456 limit 1)=6456*2;
|
|
?column?
|
|
---------------------------------------------------------------------
|
|
t
|
|
(1 row)
|
|
|
|
-- even if a=16050 doesn't exist, we try to insert it twice so this should error out
|
|
INSERT INTO columnar_table VALUES (16050), (16050);
|
|
ERROR: duplicate key value violates unique constraint "columnar_table_a_idx"
|
|
DETAIL: Key (a)=(16050) already exists.
|
|
-- should work
|
|
INSERT INTO columnar_table VALUES (16050);
|
|
-- check edge cases around stripe boundaries, error out
|
|
INSERT INTO columnar_table VALUES (16050);
|
|
ERROR: duplicate key value violates unique constraint "columnar_table_a_idx"
|
|
DETAIL: Key (a)=(16050) already exists.
|
|
INSERT INTO columnar_table VALUES (15999);
|
|
ERROR: duplicate key value violates unique constraint "columnar_table_a_idx"
|
|
DETAIL: Key (a)=(15999) already exists.
|
|
DROP INDEX columnar_table_a_idx;
|
|
CREATE TABLE partial_unique_idx_test (a INT, b INT) USING columnar;
|
|
CREATE UNIQUE INDEX ON partial_unique_idx_test (a)
|
|
WHERE b > 500;
|
|
-- should work since b =< 500 and our partial index doesn't check this interval
|
|
INSERT INTO partial_unique_idx_test VALUES (1, 2), (1, 2);
|
|
-- should work since our partial index wouldn't cover the tuples that we inserted above
|
|
INSERT INTO partial_unique_idx_test VALUES (1, 800);
|
|
INSERT INTO partial_unique_idx_test VALUES (4, 600);
|
|
-- should error out due to (4, 600)
|
|
INSERT INTO partial_unique_idx_test VALUES (4, 700);
|
|
ERROR: duplicate key value violates unique constraint "partial_unique_idx_test_a_idx"
|
|
DETAIL: Key (a)=(4) already exists.
|
|
-- btree --
|
|
CREATE INDEX ON columnar_table (a);
|
|
SELECT (SELECT SUM(b) FROM columnar_table WHERE a>700 and a<965)=439560;
|
|
?column?
|
|
---------------------------------------------------------------------
|
|
t
|
|
(1 row)
|
|
|
|
CREATE INDEX ON columnar_table (b)
|
|
WHERE (b > 30000 AND b < 33000);
|
|
-- partial index should be way smaller than the non-partial index
|
|
SELECT pg_total_relation_size('columnar_table_b_idx') * 5 <
|
|
pg_total_relation_size('columnar_table_a_idx');
|
|
?column?
|
|
---------------------------------------------------------------------
|
|
t
|
|
(1 row)
|
|
|
|
-- can't use index scan due to partial index boundaries
|
|
EXPLAIN (COSTS OFF) SELECT b FROM columnar_table WHERE b = 30000;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Seq Scan on columnar_table
|
|
Filter: (b = 30000)
|
|
(2 rows)
|
|
|
|
-- can use index scan
|
|
EXPLAIN (COSTS OFF) SELECT b FROM columnar_table WHERE b = 30001;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Index Only Scan using columnar_table_b_idx on columnar_table
|
|
Index Cond: (b = 30001)
|
|
(2 rows)
|
|
|
|
-- some more rows
|
|
INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(16000, 17000) i;
|
|
DROP INDEX columnar_table_a_idx;
|
|
TRUNCATE columnar_table;
|
|
-- pkey --
|
|
INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(16000, 16499) i;
|
|
ALTER TABLE columnar_table ADD PRIMARY KEY (a);
|
|
INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(16500, 17000) i;
|
|
BEGIN;
|
|
INSERT INTO columnar_table (a) SELECT 1;
|
|
ROLLBACK;
|
|
-- should work
|
|
INSERT INTO columnar_table (a) SELECT 1;
|
|
-- error out
|
|
INSERT INTO columnar_table VALUES (16100), (16101);
|
|
ERROR: duplicate key value violates unique constraint "columnar_table_pkey"
|
|
DETAIL: Key (a)=(16100) already exists.
|
|
INSERT INTO columnar_table VALUES (16999);
|
|
ERROR: duplicate key value violates unique constraint "columnar_table_pkey"
|
|
DETAIL: Key (a)=(16999) already exists.
|
|
BEGIN;
|
|
REINDEX INDEX columnar_table_pkey;
|
|
-- should error even after reindex
|
|
INSERT INTO columnar_table VALUES (16999);
|
|
ERROR: duplicate key value violates unique constraint "columnar_table_pkey"
|
|
DETAIL: Key (a)=(16999) already exists.
|
|
ROLLBACK;
|
|
VACUUM FULL columnar_table;
|
|
-- should error even after vacuum
|
|
INSERT INTO columnar_table VALUES (16999);
|
|
ERROR: duplicate key value violates unique constraint "columnar_table_pkey"
|
|
DETAIL: Key (a)=(16999) already exists.
|
|
TRUNCATE columnar_table;
|
|
INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(1, 160000) i;
|
|
SELECT (SELECT b FROM columnar_table WHERE a = 150000)=300000;
|
|
?column?
|
|
---------------------------------------------------------------------
|
|
t
|
|
(1 row)
|
|
|
|
TRUNCATE columnar_table;
|
|
ALTER TABLE columnar_table DROP CONSTRAINT columnar_table_pkey;
|
|
-- hash --
|
|
INSERT INTO columnar_table (a, b) SELECT i*2,i FROM generate_series(1, 8000) i;
|
|
CREATE INDEX hash_idx ON columnar_table USING HASH (b);
|
|
BEGIN;
|
|
CREATE INDEX hash_idx_fill_factor ON columnar_table USING HASH (b) WITH (fillfactor=10);
|
|
-- same hash index with lower fillfactor should be way bigger
|
|
SELECT pg_total_relation_size ('hash_idx_fill_factor') >
|
|
pg_total_relation_size ('hash_idx') * 5;
|
|
?column?
|
|
---------------------------------------------------------------------
|
|
t
|
|
(1 row)
|
|
|
|
ROLLBACK;
|
|
BEGIN;
|
|
INSERT INTO columnar_table (a, b) SELECT i*3,i FROM generate_series(1, 8000) i;
|
|
ROLLBACK;
|
|
INSERT INTO columnar_table (a, b) SELECT i*4,i FROM generate_series(1, 8000) i;
|
|
SELECT SUM(a)=42000 FROM columnar_table WHERE b = 7000;
|
|
?column?
|
|
---------------------------------------------------------------------
|
|
t
|
|
(1 row)
|
|
|
|
BEGIN;
|
|
REINDEX TABLE columnar_table;
|
|
SELECT SUM(a)=42000 FROM columnar_table WHERE b = 7000;
|
|
?column?
|
|
---------------------------------------------------------------------
|
|
t
|
|
(1 row)
|
|
|
|
ROLLBACK;
|
|
VACUUM FULL columnar_table;
|
|
SELECT SUM(a)=42000 FROM columnar_table WHERE b = 7000;
|
|
?column?
|
|
---------------------------------------------------------------------
|
|
t
|
|
(1 row)
|
|
|
|
-- exclusion contraints --
|
|
CREATE TABLE exclusion_test (c1 INT,c2 INT, c3 INT, c4 BOX,
|
|
EXCLUDE USING btree (c1 WITH =) INCLUDE(c3,c4) WHERE (c1 < 10)) USING columnar;
|
|
-- error out since "c1" is "1" for all rows to be inserted
|
|
INSERT INTO exclusion_test SELECT 1, 2, 3*x, BOX('4,4,4,4') FROM generate_series(1,3) AS x;
|
|
ERROR: conflicting key value violates exclusion constraint "exclusion_test_c1_c3_c4_excl"
|
|
DETAIL: Key (c1)=(1) conflicts with existing key (c1)=(1).
|
|
BEGIN;
|
|
INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(1,3) AS x;
|
|
ROLLBACK;
|
|
-- should work
|
|
INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(1,3) AS x;
|
|
INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(10,15) AS x;
|
|
BEGIN;
|
|
-- should work thanks to "where" clause in exclusion constraint
|
|
INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(10,15) AS x;
|
|
ROLLBACK;
|
|
REINDEX TABLE exclusion_test;
|
|
-- should still work after reindex
|
|
INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(10,15) AS x;
|
|
-- make sure that we respect INCLUDE syntax --
|
|
CREATE TABLE include_test (a INT, b BIGINT, c BIGINT, d BIGINT) USING columnar;
|
|
INSERT INTO include_test SELECT i, i, i, i FROM generate_series (1, 1000) i;
|
|
CREATE UNIQUE INDEX unique_a ON include_test (a);
|
|
-- cannot use index only scan
|
|
EXPLAIN (COSTS OFF) SELECT b FROM include_test WHERE a = 500;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Index Scan using unique_a on include_test
|
|
Index Cond: (a = 500)
|
|
(2 rows)
|
|
|
|
CREATE UNIQUE INDEX unique_a_include_b_c_d ON include_test (a) INCLUDE(b, c, d);
|
|
-- same unique index that includes other columns should be way bigger
|
|
SELECT pg_total_relation_size ('unique_a') * 1.5 <
|
|
pg_total_relation_size ('unique_a_include_b_c_d');
|
|
?column?
|
|
---------------------------------------------------------------------
|
|
t
|
|
(1 row)
|
|
|
|
DROP INDEX unique_a;
|
|
-- should use index only scan since unique_a_include_b_c_d includes column "b" too
|
|
EXPLAIN (COSTS OFF) SELECT b FROM include_test WHERE a = 500;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Index Only Scan using unique_a_include_b_c_d on include_test
|
|
Index Cond: (a = 500)
|
|
(2 rows)
|
|
|
|
BEGIN;
|
|
SET enable_indexonlyscan = OFF;
|
|
-- show that we respect enable_indexonlyscan GUC
|
|
EXPLAIN (COSTS OFF) SELECT b FROM include_test WHERE a = 500;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Index Scan using unique_a_include_b_c_d on include_test
|
|
Index Cond: (a = 500)
|
|
(2 rows)
|
|
|
|
ROLLBACK;
|
|
-- make sure that we read the correct value for "b" when doing index only scan
|
|
SELECT b=980 FROM include_test WHERE a = 980;
|
|
?column?
|
|
---------------------------------------------------------------------
|
|
t
|
|
(1 row)
|
|
|
|
-- some tests with distributed & partitioned tables --
|
|
CREATE TABLE dist_part_table(
|
|
dist_col INT,
|
|
part_col TIMESTAMPTZ,
|
|
col1 TEXT
|
|
) PARTITION BY RANGE (part_col);
|
|
-- create an index before creating a columnar partition
|
|
CREATE INDEX dist_part_table_btree ON dist_part_table (col1);
|
|
-- columnar partition
|
|
CREATE TABLE p0 PARTITION OF dist_part_table
|
|
FOR VALUES FROM ('2020-01-01') TO ('2020-02-01')
|
|
USING columnar;
|
|
SELECT create_distributed_table('dist_part_table', 'dist_col');
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
-- columnar partition
|
|
CREATE TABLE p1 PARTITION OF dist_part_table
|
|
FOR VALUES FROM ('2020-02-01') TO ('2020-03-01')
|
|
USING columnar;
|
|
-- row partition
|
|
CREATE TABLE p2 PARTITION OF dist_part_table
|
|
FOR VALUES FROM ('2020-03-01') TO ('2020-04-01');
|
|
INSERT INTO dist_part_table VALUES (1, '2020-03-15', 'str1', POINT(1, 1));
|
|
ERROR: INSERT has more expressions than target columns
|
|
-- insert into columnar partitions
|
|
INSERT INTO dist_part_table VALUES (1, '2020-01-15', 'str2', POINT(2, 2));
|
|
ERROR: INSERT has more expressions than target columns
|
|
INSERT INTO dist_part_table VALUES (1, '2020-02-15', 'str3', POINT(3, 3));
|
|
ERROR: INSERT has more expressions than target columns
|
|
-- create another index after creating a columnar partition
|
|
CREATE UNIQUE INDEX dist_part_table_unique ON dist_part_table (dist_col, part_col);
|
|
-- verify that indexes are created on columnar partitions
|
|
SELECT COUNT(*)=2 FROM pg_indexes WHERE tablename = 'p0';
|
|
?column?
|
|
---------------------------------------------------------------------
|
|
t
|
|
(1 row)
|
|
|
|
SELECT COUNT(*)=2 FROM pg_indexes WHERE tablename = 'p1';
|
|
?column?
|
|
---------------------------------------------------------------------
|
|
t
|
|
(1 row)
|
|
|
|
-- unsupported index types --
|
|
-- gin --
|
|
CREATE TABLE testjsonb (j JSONB) USING columnar;
|
|
INSERT INTO testjsonb SELECT CAST('{"f1" : ' ||'"'|| i*4 ||'", ' || '"f2" : '||'"'|| i*10 ||'"}' AS JSON) FROM generate_series(1,10) i;
|
|
CREATE INDEX jidx ON testjsonb USING GIN (j);
|
|
ERROR: only btree and hash indexes are supported on columnar tables
|
|
INSERT INTO testjsonb SELECT CAST('{"f1" : ' ||'"'|| i*4 ||'", ' || '"f2" : '||'"'|| i*10 ||'"}' AS JSON) FROM generate_series(15,20) i;
|
|
-- gist --
|
|
CREATE TABLE gist_point_tbl(id INT4, p POINT) USING columnar;
|
|
INSERT INTO gist_point_tbl (id, p) SELECT g, point(g*10, g*10) FROM generate_series(1, 10) g;
|
|
CREATE INDEX gist_pointidx ON gist_point_tbl USING gist(p);
|
|
ERROR: only btree and hash indexes are supported on columnar tables
|
|
INSERT INTO gist_point_tbl (id, p) SELECT g, point(g*10, g*10) FROM generate_series(10, 20) g;
|
|
-- sp gist --
|
|
CREATE TABLE box_temp (f1 box) USING columnar;
|
|
INSERT INTO box_temp SELECT box(point(i, i), point(i * 2, i * 2)) FROM generate_series(1, 10) AS i;
|
|
CREATE INDEX box_spgist ON box_temp USING spgist (f1);
|
|
ERROR: only btree and hash indexes are supported on columnar tables
|
|
INSERT INTO box_temp SELECT box(point(i, i), point(i * 2, i * 2)) FROM generate_series(1, 10) AS i;
|
|
-- brin --
|
|
CREATE TABLE brin_summarize (value int) USING columnar;
|
|
CREATE INDEX brin_summarize_idx ON brin_summarize USING brin (value) WITH (pages_per_range=2);
|
|
ERROR: only btree and hash indexes are supported on columnar tables
|
|
SET client_min_messages TO WARNING;
|
|
DROP SCHEMA columnar_indexes CASCADE;
|