citus/src/test/regress/expected/columnar_indexes.out

389 lines
15 KiB
Plaintext

--
-- Testing indexes on on columnar tables.
--
CREATE SCHEMA columnar_indexes;
SET search_path tO columnar_indexes, public;
--
-- create index with the concurrent option. We should
-- error out during index creation.
-- https://github.com/citusdata/citus/issues/4599
--
create table t(a int, b int) using columnar;
create index CONCURRENTLY t_idx on t(a, b);
ERROR: concurrent index commands are not supported for columnar tables
\d t
Table "columnar_indexes.t"
Column | Type | Collation | Nullable | Default
---------------------------------------------------------------------
a | integer | | |
b | integer | | |
explain insert into t values (1, 2);
QUERY PLAN
---------------------------------------------------------------------
Insert on t (cost=0.00..0.01 rows=1 width=8)
-> Result (cost=0.00..0.01 rows=1 width=8)
(2 rows)
insert into t values (1, 2);
SELECT * FROM t;
a | b
---------------------------------------------------------------------
1 | 2
(1 row)
create index t_idx on t(a, b);
\d t
Table "columnar_indexes.t"
Column | Type | Collation | Nullable | Default
---------------------------------------------------------------------
a | integer | | |
b | integer | | |
Indexes:
"t_idx" btree (a, b)
explain insert into t values (1, 2);
QUERY PLAN
---------------------------------------------------------------------
Insert on t (cost=0.00..0.01 rows=1 width=8)
-> Result (cost=0.00..0.01 rows=1 width=8)
(2 rows)
insert into t values (3, 4);
SELECT * FROM t;
a | b
---------------------------------------------------------------------
1 | 2
3 | 4
(2 rows)
-- make sure that we test index scan
set columnar.enable_custom_scan to 'off';
set enable_seqscan to off;
CREATE table columnar_table (a INT, b int) USING columnar;
INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(0, 16000) i;
-- unique --
BEGIN;
INSERT INTO columnar_table VALUES (100000000);
SAVEPOINT s1;
-- errors out due to unflushed data in upper transaction
CREATE UNIQUE INDEX ON columnar_table (a);
ERROR: cannot read from table when there is unflushed data in upper transactions
ROLLBACK;
CREATE UNIQUE INDEX ON columnar_table (a);
BEGIN;
INSERT INTO columnar_table VALUES (16050);
SAVEPOINT s1;
-- index scan errors out due to unflushed data in upper transaction
SELECT a FROM columnar_table WHERE a = 16050;
ERROR: cannot read from index when there is unflushed data in upper transactions
ROLLBACK;
EXPLAIN (COSTS OFF) SELECT * FROM columnar_table WHERE a=6456;
QUERY PLAN
---------------------------------------------------------------------
Index Scan using columnar_table_a_idx on columnar_table
Index Cond: (a = 6456)
(2 rows)
EXPLAIN (COSTS OFF) SELECT a FROM columnar_table WHERE a=6456;
QUERY PLAN
---------------------------------------------------------------------
Index Only Scan using columnar_table_a_idx on columnar_table
Index Cond: (a = 6456)
(2 rows)
SELECT (SELECT a FROM columnar_table WHERE a=6456 limit 1)=6456;
?column?
---------------------------------------------------------------------
t
(1 row)
SELECT (SELECT b FROM columnar_table WHERE a=6456 limit 1)=6456*2;
?column?
---------------------------------------------------------------------
t
(1 row)
-- even if a=16050 doesn't exist, we try to insert it twice so this should error out
INSERT INTO columnar_table VALUES (16050), (16050);
ERROR: duplicate key value violates unique constraint "columnar_table_a_idx"
DETAIL: Key (a)=(16050) already exists.
-- should work
INSERT INTO columnar_table VALUES (16050);
-- check edge cases around stripe boundaries, error out
INSERT INTO columnar_table VALUES (16050);
ERROR: duplicate key value violates unique constraint "columnar_table_a_idx"
DETAIL: Key (a)=(16050) already exists.
INSERT INTO columnar_table VALUES (15999);
ERROR: duplicate key value violates unique constraint "columnar_table_a_idx"
DETAIL: Key (a)=(15999) already exists.
DROP INDEX columnar_table_a_idx;
CREATE TABLE partial_unique_idx_test (a INT, b INT) USING columnar;
CREATE UNIQUE INDEX ON partial_unique_idx_test (a)
WHERE b > 500;
-- should work since b =< 500 and our partial index doesn't check this interval
INSERT INTO partial_unique_idx_test VALUES (1, 2), (1, 2);
-- should work since our partial index wouldn't cover the tuples that we inserted above
INSERT INTO partial_unique_idx_test VALUES (1, 800);
INSERT INTO partial_unique_idx_test VALUES (4, 600);
-- should error out due to (4, 600)
INSERT INTO partial_unique_idx_test VALUES (4, 700);
ERROR: duplicate key value violates unique constraint "partial_unique_idx_test_a_idx"
DETAIL: Key (a)=(4) already exists.
-- btree --
CREATE INDEX ON columnar_table (a);
SELECT (SELECT SUM(b) FROM columnar_table WHERE a>700 and a<965)=439560;
?column?
---------------------------------------------------------------------
t
(1 row)
CREATE INDEX ON columnar_table (b)
WHERE (b > 30000 AND b < 33000);
-- partial index should be way smaller than the non-partial index
SELECT pg_total_relation_size('columnar_table_b_idx') * 5 <
pg_total_relation_size('columnar_table_a_idx');
?column?
---------------------------------------------------------------------
t
(1 row)
-- can't use index scan due to partial index boundaries
EXPLAIN (COSTS OFF) SELECT b FROM columnar_table WHERE b = 30000;
QUERY PLAN
---------------------------------------------------------------------
Seq Scan on columnar_table
Filter: (b = 30000)
(2 rows)
-- can use index scan
EXPLAIN (COSTS OFF) SELECT b FROM columnar_table WHERE b = 30001;
QUERY PLAN
---------------------------------------------------------------------
Index Only Scan using columnar_table_b_idx on columnar_table
Index Cond: (b = 30001)
(2 rows)
-- some more rows
INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(16000, 17000) i;
DROP INDEX columnar_table_a_idx;
TRUNCATE columnar_table;
-- pkey --
INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(16000, 16499) i;
ALTER TABLE columnar_table ADD PRIMARY KEY (a);
INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(16500, 17000) i;
BEGIN;
INSERT INTO columnar_table (a) SELECT 1;
ROLLBACK;
-- should work
INSERT INTO columnar_table (a) SELECT 1;
-- error out
INSERT INTO columnar_table VALUES (16100), (16101);
ERROR: duplicate key value violates unique constraint "columnar_table_pkey"
DETAIL: Key (a)=(16100) already exists.
INSERT INTO columnar_table VALUES (16999);
ERROR: duplicate key value violates unique constraint "columnar_table_pkey"
DETAIL: Key (a)=(16999) already exists.
BEGIN;
REINDEX INDEX columnar_table_pkey;
-- should error even after reindex
INSERT INTO columnar_table VALUES (16999);
ERROR: duplicate key value violates unique constraint "columnar_table_pkey"
DETAIL: Key (a)=(16999) already exists.
ROLLBACK;
VACUUM FULL columnar_table;
-- should error even after vacuum
INSERT INTO columnar_table VALUES (16999);
ERROR: duplicate key value violates unique constraint "columnar_table_pkey"
DETAIL: Key (a)=(16999) already exists.
TRUNCATE columnar_table;
INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(1, 160000) i;
SELECT (SELECT b FROM columnar_table WHERE a = 150000)=300000;
?column?
---------------------------------------------------------------------
t
(1 row)
TRUNCATE columnar_table;
ALTER TABLE columnar_table DROP CONSTRAINT columnar_table_pkey;
-- hash --
INSERT INTO columnar_table (a, b) SELECT i*2,i FROM generate_series(1, 8000) i;
CREATE INDEX hash_idx ON columnar_table USING HASH (b);
BEGIN;
CREATE INDEX hash_idx_fill_factor ON columnar_table USING HASH (b) WITH (fillfactor=10);
-- same hash index with lower fillfactor should be way bigger
SELECT pg_total_relation_size ('hash_idx_fill_factor') >
pg_total_relation_size ('hash_idx') * 5;
?column?
---------------------------------------------------------------------
t
(1 row)
ROLLBACK;
BEGIN;
INSERT INTO columnar_table (a, b) SELECT i*3,i FROM generate_series(1, 8000) i;
ROLLBACK;
INSERT INTO columnar_table (a, b) SELECT i*4,i FROM generate_series(1, 8000) i;
SELECT SUM(a)=42000 FROM columnar_table WHERE b = 7000;
?column?
---------------------------------------------------------------------
t
(1 row)
BEGIN;
REINDEX TABLE columnar_table;
SELECT SUM(a)=42000 FROM columnar_table WHERE b = 7000;
?column?
---------------------------------------------------------------------
t
(1 row)
ROLLBACK;
VACUUM FULL columnar_table;
SELECT SUM(a)=42000 FROM columnar_table WHERE b = 7000;
?column?
---------------------------------------------------------------------
t
(1 row)
-- exclusion contraints --
CREATE TABLE exclusion_test (c1 INT,c2 INT, c3 INT, c4 BOX,
EXCLUDE USING btree (c1 WITH =) INCLUDE(c3,c4) WHERE (c1 < 10)) USING columnar;
-- error out since "c1" is "1" for all rows to be inserted
INSERT INTO exclusion_test SELECT 1, 2, 3*x, BOX('4,4,4,4') FROM generate_series(1,3) AS x;
ERROR: conflicting key value violates exclusion constraint "exclusion_test_c1_c3_c4_excl"
DETAIL: Key (c1)=(1) conflicts with existing key (c1)=(1).
BEGIN;
INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(1,3) AS x;
ROLLBACK;
-- should work
INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(1,3) AS x;
INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(10,15) AS x;
BEGIN;
-- should work thanks to "where" clause in exclusion constraint
INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(10,15) AS x;
ROLLBACK;
REINDEX TABLE exclusion_test;
-- should still work after reindex
INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(10,15) AS x;
-- make sure that we respect INCLUDE syntax --
CREATE TABLE include_test (a INT, b BIGINT, c BIGINT, d BIGINT) USING columnar;
INSERT INTO include_test SELECT i, i, i, i FROM generate_series (1, 1000) i;
CREATE UNIQUE INDEX unique_a ON include_test (a);
-- cannot use index only scan
EXPLAIN (COSTS OFF) SELECT b FROM include_test WHERE a = 500;
QUERY PLAN
---------------------------------------------------------------------
Index Scan using unique_a on include_test
Index Cond: (a = 500)
(2 rows)
CREATE UNIQUE INDEX unique_a_include_b_c_d ON include_test (a) INCLUDE(b, c, d);
-- same unique index that includes other columns should be way bigger
SELECT pg_total_relation_size ('unique_a') * 1.5 <
pg_total_relation_size ('unique_a_include_b_c_d');
?column?
---------------------------------------------------------------------
t
(1 row)
DROP INDEX unique_a;
-- should use index only scan since unique_a_include_b_c_d includes column "b" too
EXPLAIN (COSTS OFF) SELECT b FROM include_test WHERE a = 500;
QUERY PLAN
---------------------------------------------------------------------
Index Only Scan using unique_a_include_b_c_d on include_test
Index Cond: (a = 500)
(2 rows)
BEGIN;
SET enable_indexonlyscan = OFF;
-- show that we respect enable_indexonlyscan GUC
EXPLAIN (COSTS OFF) SELECT b FROM include_test WHERE a = 500;
QUERY PLAN
---------------------------------------------------------------------
Index Scan using unique_a_include_b_c_d on include_test
Index Cond: (a = 500)
(2 rows)
ROLLBACK;
-- make sure that we read the correct value for "b" when doing index only scan
SELECT b=980 FROM include_test WHERE a = 980;
?column?
---------------------------------------------------------------------
t
(1 row)
-- some tests with distributed & partitioned tables --
CREATE TABLE dist_part_table(
dist_col INT,
part_col TIMESTAMPTZ,
col1 TEXT
) PARTITION BY RANGE (part_col);
-- create an index before creating a columnar partition
CREATE INDEX dist_part_table_btree ON dist_part_table (col1);
-- columnar partition
CREATE TABLE p0 PARTITION OF dist_part_table
FOR VALUES FROM ('2020-01-01') TO ('2020-02-01')
USING columnar;
SELECT create_distributed_table('dist_part_table', 'dist_col');
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- columnar partition
CREATE TABLE p1 PARTITION OF dist_part_table
FOR VALUES FROM ('2020-02-01') TO ('2020-03-01')
USING columnar;
-- row partition
CREATE TABLE p2 PARTITION OF dist_part_table
FOR VALUES FROM ('2020-03-01') TO ('2020-04-01');
INSERT INTO dist_part_table VALUES (1, '2020-03-15', 'str1', POINT(1, 1));
ERROR: INSERT has more expressions than target columns
-- insert into columnar partitions
INSERT INTO dist_part_table VALUES (1, '2020-01-15', 'str2', POINT(2, 2));
ERROR: INSERT has more expressions than target columns
INSERT INTO dist_part_table VALUES (1, '2020-02-15', 'str3', POINT(3, 3));
ERROR: INSERT has more expressions than target columns
-- create another index after creating a columnar partition
CREATE UNIQUE INDEX dist_part_table_unique ON dist_part_table (dist_col, part_col);
-- verify that indexes are created on columnar partitions
SELECT COUNT(*)=2 FROM pg_indexes WHERE tablename = 'p0';
?column?
---------------------------------------------------------------------
t
(1 row)
SELECT COUNT(*)=2 FROM pg_indexes WHERE tablename = 'p1';
?column?
---------------------------------------------------------------------
t
(1 row)
-- unsupported index types --
-- gin --
CREATE TABLE testjsonb (j JSONB) USING columnar;
INSERT INTO testjsonb SELECT CAST('{"f1" : ' ||'"'|| i*4 ||'", ' || '"f2" : '||'"'|| i*10 ||'"}' AS JSON) FROM generate_series(1,10) i;
CREATE INDEX jidx ON testjsonb USING GIN (j);
ERROR: only btree and hash indexes are supported on columnar tables
INSERT INTO testjsonb SELECT CAST('{"f1" : ' ||'"'|| i*4 ||'", ' || '"f2" : '||'"'|| i*10 ||'"}' AS JSON) FROM generate_series(15,20) i;
-- gist --
CREATE TABLE gist_point_tbl(id INT4, p POINT) USING columnar;
INSERT INTO gist_point_tbl (id, p) SELECT g, point(g*10, g*10) FROM generate_series(1, 10) g;
CREATE INDEX gist_pointidx ON gist_point_tbl USING gist(p);
ERROR: only btree and hash indexes are supported on columnar tables
INSERT INTO gist_point_tbl (id, p) SELECT g, point(g*10, g*10) FROM generate_series(10, 20) g;
-- sp gist --
CREATE TABLE box_temp (f1 box) USING columnar;
INSERT INTO box_temp SELECT box(point(i, i), point(i * 2, i * 2)) FROM generate_series(1, 10) AS i;
CREATE INDEX box_spgist ON box_temp USING spgist (f1);
ERROR: only btree and hash indexes are supported on columnar tables
INSERT INTO box_temp SELECT box(point(i, i), point(i * 2, i * 2)) FROM generate_series(1, 10) AS i;
-- brin --
CREATE TABLE brin_summarize (value int) USING columnar;
CREATE INDEX brin_summarize_idx ON brin_summarize USING brin (value) WITH (pages_per_range=2);
ERROR: only btree and hash indexes are supported on columnar tables
SET client_min_messages TO WARNING;
DROP SCHEMA columnar_indexes CASCADE;