-- -- Testing indexes on on columnar tables. -- CREATE SCHEMA columnar_indexes; SET search_path tO columnar_indexes, public; -- -- create index with the concurrent option. We should -- error out during index creation. -- https://github.com/citusdata/citus/issues/4599 -- create table t(a int, b int) using columnar; create index CONCURRENTLY t_idx on t(a, b); REINDEX INDEX CONCURRENTLY t_idx; \d t explain insert into t values (1, 2); insert into t values (1, 2); SELECT * FROM t; explain insert into t values (1, 2); insert into t values (3, 4); SELECT * FROM t; -- make sure that we test index scan set columnar.enable_custom_scan to 'off'; set enable_seqscan to off; set seq_page_cost TO 10000000; CREATE table columnar_table (a INT, b int) USING columnar; INSERT INTO columnar_table (a) VALUES (1), (1); CREATE UNIQUE INDEX CONCURRENTLY ON columnar_table (a); -- CONCURRENTLY should leave an invalid index behind SELECT COUNT(*)=1 FROM pg_index WHERE indrelid = 'columnar_table'::regclass AND indisvalid = 'false'; INSERT INTO columnar_table (a) VALUES (1), (1); REINDEX TABLE columnar_table; -- index is still invalid since REINDEX error'ed out SELECT COUNT(*)=1 FROM pg_index WHERE indrelid = 'columnar_table'::regclass AND indisvalid = 'false'; TRUNCATE columnar_table; REINDEX TABLE columnar_table; -- now it should be valid SELECT COUNT(*)=0 FROM pg_index WHERE indrelid = 'columnar_table'::regclass AND indisvalid = 'false'; DROP INDEX columnar_table_a_idx; INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(0, 16000) i; -- unique -- BEGIN; INSERT INTO columnar_table VALUES (100000000); SAVEPOINT s1; -- errors out due to unflushed data in upper transaction CREATE UNIQUE INDEX ON columnar_table (a); ROLLBACK; CREATE UNIQUE INDEX CONCURRENTLY ON columnar_table (a); BEGIN; INSERT INTO columnar_table VALUES (16050); SAVEPOINT s1; -- index scan errors out due to unflushed data in upper transaction SELECT a FROM columnar_table WHERE a = 16050; ROLLBACK; EXPLAIN (COSTS OFF) SELECT * FROM columnar_table WHERE a=6456; EXPLAIN (COSTS OFF) SELECT a FROM columnar_table WHERE a=6456; SELECT (SELECT a FROM columnar_table WHERE a=6456 limit 1)=6456; SELECT (SELECT b FROM columnar_table WHERE a=6456 limit 1)=6456*2; -- even if a=16050 doesn't exist, we try to insert it twice so this should error out INSERT INTO columnar_table VALUES (16050), (16050); -- should work INSERT INTO columnar_table VALUES (16050); -- check edge cases around stripe boundaries, error out INSERT INTO columnar_table VALUES (16050); INSERT INTO columnar_table VALUES (15999); DROP INDEX columnar_table_a_idx; CREATE TABLE partial_unique_idx_test (a INT, b INT) USING columnar; CREATE UNIQUE INDEX ON partial_unique_idx_test (a) WHERE b > 500; -- should work since b =< 500 and our partial index doesn't check this interval INSERT INTO partial_unique_idx_test VALUES (1, 2), (1, 2); -- should work since our partial index wouldn't cover the tuples that we inserted above INSERT INTO partial_unique_idx_test VALUES (1, 800); INSERT INTO partial_unique_idx_test VALUES (4, 600); -- should error out due to (4, 600) INSERT INTO partial_unique_idx_test VALUES (4, 700); -- btree -- CREATE INDEX CONCURRENTLY ON columnar_table (a); SELECT (SELECT SUM(b) FROM columnar_table WHERE a>700 and a<965)=439560; CREATE INDEX ON columnar_table (b) WHERE (b > 30000 AND b < 33000); -- partial index should be way smaller than the non-partial index SELECT pg_total_relation_size('columnar_table_b_idx') * 5 < pg_total_relation_size('columnar_table_a_idx'); -- can't use index scan due to partial index boundaries EXPLAIN (COSTS OFF) SELECT b FROM columnar_table WHERE b = 30000; -- can use index scan EXPLAIN (COSTS OFF) SELECT b FROM columnar_table WHERE b = 30001; -- some more rows INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(16000, 17000) i; DROP INDEX CONCURRENTLY columnar_table_a_idx; TRUNCATE columnar_table; -- pkey -- INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(16000, 16499) i; ALTER TABLE columnar_table ADD PRIMARY KEY (a); INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(16500, 17000) i; BEGIN; INSERT INTO columnar_table (a) SELECT 1; ROLLBACK; -- should work INSERT INTO columnar_table (a) SELECT 1; -- error out INSERT INTO columnar_table VALUES (16100), (16101); INSERT INTO columnar_table VALUES (16999); BEGIN; REINDEX INDEX columnar_table_pkey; -- should error even after reindex INSERT INTO columnar_table VALUES (16999); ROLLBACK; VACUUM FULL columnar_table; -- show that we don't support clustering columnar tables using indexes CLUSTER columnar_table USING columnar_table_pkey; ALTER TABLE columnar_table CLUSTER ON columnar_table_pkey; CLUSTER columnar_table; -- should error even after vacuum INSERT INTO columnar_table VALUES (16999); TRUNCATE columnar_table; INSERT INTO columnar_table (a, b) SELECT i,i*2 FROM generate_series(1, 160000) i; SELECT (SELECT b FROM columnar_table WHERE a = 150000)=300000; -- Since our index is highly correlated with the relation itself, we should -- de-serialize each chunk group only once. For this reason, if this test -- file hangs on below queries, then you should think that we are not properly -- caching the last-read chunk group during index reads. SELECT SUM(a)=312487500 FROM columnar_table WHERE a < 25000; SELECT SUM(a)=167000 FROM columnar_table WHERE a = 16000 OR a = 151000; SELECT SUM(a)=48000 FROM columnar_table WHERE a = 16000 OR a = 32000; TRUNCATE columnar_table; ALTER TABLE columnar_table DROP CONSTRAINT columnar_table_pkey; -- hash -- INSERT INTO columnar_table (a, b) SELECT i*2,i FROM generate_series(1, 8000) i; CREATE INDEX hash_idx ON columnar_table USING HASH (b); BEGIN; CREATE INDEX hash_idx_fill_factor ON columnar_table USING HASH (b) WITH (fillfactor=10); -- same hash index with lower fillfactor should be way bigger SELECT pg_total_relation_size ('hash_idx_fill_factor') > pg_total_relation_size ('hash_idx') * 5; ROLLBACK; BEGIN; INSERT INTO columnar_table (a, b) SELECT i*3,i FROM generate_series(1, 8000) i; ROLLBACK; INSERT INTO columnar_table (a, b) SELECT i*4,i FROM generate_series(1, 8000) i; SELECT SUM(a)=42000 FROM columnar_table WHERE b = 7000; BEGIN; REINDEX TABLE columnar_table; SELECT SUM(a)=42000 FROM columnar_table WHERE b = 7000; ROLLBACK; VACUUM FULL columnar_table; SELECT SUM(a)=42000 FROM columnar_table WHERE b = 7000; -- exclusion contraints -- CREATE TABLE exclusion_test (c1 INT,c2 INT, c3 INT, c4 BOX, EXCLUDE USING btree (c1 WITH =) INCLUDE(c3,c4) WHERE (c1 < 10)) USING columnar; -- error out since "c1" is "1" for all rows to be inserted INSERT INTO exclusion_test SELECT 1, 2, 3*x, BOX('4,4,4,4') FROM generate_series(1,3) AS x; BEGIN; INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(1,3) AS x; ROLLBACK; -- should work INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(1,3) AS x; INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(10,15) AS x; BEGIN; -- should work thanks to "where" clause in exclusion constraint INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(10,15) AS x; ROLLBACK; REINDEX TABLE exclusion_test; -- should still work after reindex INSERT INTO exclusion_test SELECT x, 2, 3*x, BOX('4,4,4,4') FROM generate_series(10,15) AS x; -- make sure that we respect INCLUDE syntax -- CREATE TABLE include_test (a INT, b BIGINT, c BIGINT, d BIGINT) USING columnar; INSERT INTO include_test SELECT i, i, i, i FROM generate_series (1, 1000) i; CREATE UNIQUE INDEX CONCURRENTLY unique_a ON include_test (a); -- cannot use index only scan EXPLAIN (COSTS OFF) SELECT b FROM include_test WHERE a = 500; CREATE UNIQUE INDEX unique_a_include_b_c_d ON include_test (a) INCLUDE(b, c, d); -- same unique index that includes other columns should be way bigger SELECT pg_total_relation_size ('unique_a') * 1.5 < pg_total_relation_size ('unique_a_include_b_c_d'); DROP INDEX unique_a; -- should use index only scan since unique_a_include_b_c_d includes column "b" too EXPLAIN (COSTS OFF) SELECT b FROM include_test WHERE a = 500; BEGIN; SET enable_indexonlyscan = OFF; -- show that we respect enable_indexonlyscan GUC EXPLAIN (COSTS OFF) SELECT b FROM include_test WHERE a = 500; ROLLBACK; -- make sure that we read the correct value for "b" when doing index only scan SELECT b=980 FROM include_test WHERE a = 980; -- some tests with distributed & partitioned tables -- CREATE TABLE dist_part_table( dist_col INT, part_col TIMESTAMPTZ, col1 TEXT ) PARTITION BY RANGE (part_col); -- create an index before creating a columnar partition CREATE INDEX dist_part_table_btree ON dist_part_table (col1); -- columnar partition CREATE TABLE p0 PARTITION OF dist_part_table FOR VALUES FROM ('2020-01-01') TO ('2020-02-01') USING columnar; SELECT create_distributed_table('dist_part_table', 'dist_col'); -- columnar partition CREATE TABLE p1 PARTITION OF dist_part_table FOR VALUES FROM ('2020-02-01') TO ('2020-03-01') USING columnar; -- row partition CREATE TABLE p2 PARTITION OF dist_part_table FOR VALUES FROM ('2020-03-01') TO ('2020-04-01'); INSERT INTO dist_part_table VALUES (1, '2020-03-15', 'str1', POINT(1, 1)); -- insert into columnar partitions INSERT INTO dist_part_table VALUES (1, '2020-01-15', 'str2', POINT(2, 2)); INSERT INTO dist_part_table VALUES (1, '2020-02-15', 'str3', POINT(3, 3)); -- create another index after creating a columnar partition CREATE UNIQUE INDEX dist_part_table_unique ON dist_part_table (dist_col, part_col); -- verify that indexes are created on columnar partitions SELECT COUNT(*)=2 FROM pg_indexes WHERE tablename = 'p0'; SELECT COUNT(*)=2 FROM pg_indexes WHERE tablename = 'p1'; -- unsupported index types -- -- gin -- CREATE TABLE testjsonb (j JSONB) USING columnar; INSERT INTO testjsonb SELECT CAST('{"f1" : ' ||'"'|| i*4 ||'", ' || '"f2" : '||'"'|| i*10 ||'"}' AS JSON) FROM generate_series(1,10) i; CREATE INDEX jidx ON testjsonb USING GIN (j); INSERT INTO testjsonb SELECT CAST('{"f1" : ' ||'"'|| i*4 ||'", ' || '"f2" : '||'"'|| i*10 ||'"}' AS JSON) FROM generate_series(15,20) i; -- gist -- CREATE TABLE gist_point_tbl(id INT4, p POINT) USING columnar; INSERT INTO gist_point_tbl (id, p) SELECT g, point(g*10, g*10) FROM generate_series(1, 10) g; CREATE INDEX gist_pointidx ON gist_point_tbl USING gist(p); INSERT INTO gist_point_tbl (id, p) SELECT g, point(g*10, g*10) FROM generate_series(10, 20) g; -- sp gist -- CREATE TABLE box_temp (f1 box) USING columnar; INSERT INTO box_temp SELECT box(point(i, i), point(i * 2, i * 2)) FROM generate_series(1, 10) AS i; CREATE INDEX CONCURRENTLY box_spgist ON box_temp USING spgist (f1); -- CONCURRENTLY should not leave an invalid index behind SELECT COUNT(*)=0 FROM pg_index WHERE indrelid = 'box_temp'::regclass AND indisvalid = 'false'; INSERT INTO box_temp SELECT box(point(i, i), point(i * 2, i * 2)) FROM generate_series(1, 10) AS i; -- brin -- CREATE TABLE brin_summarize (value int) USING columnar; CREATE INDEX brin_summarize_idx ON brin_summarize USING brin (value) WITH (pages_per_range=2); -- Show that we safely fallback to serial index build. CREATE TABLE parallel_scan_test(a int) USING columnar WITH ( parallel_workers = 2 ); INSERT INTO parallel_scan_test SELECT i FROM generate_series(1,10) i; CREATE INDEX ON parallel_scan_test (a); VACUUM FULL parallel_scan_test; REINDEX TABLE parallel_scan_test; CREATE INDEX CONCURRENTLY ON parallel_scan_test (a); REINDEX TABLE CONCURRENTLY parallel_scan_test; -- test with different data types & indexAM's -- CREATE TABLE hash_text(a INT, b TEXT) USING columnar; INSERT INTO hash_text SELECT i, (i*2)::TEXT FROM generate_series(1, 10) i; CREATE INDEX ON hash_text USING hash (b); SELECT b FROM hash_text WHERE b='10'; CREATE TABLE hash_int(a INT, b TEXT) USING columnar; INSERT INTO hash_int SELECT i, (i*3)::TEXT FROM generate_series(1, 10) i; CREATE INDEX ON hash_int USING hash (a); SELECT b='15' FROM hash_int WHERE a=5; CREATE TABLE mixed_data_types ( timestamp_col timestamp, box_col box, circle_col circle, float_col float, uuid_col uuid, text_col text, numeric_col numeric, PRIMARY KEY(timestamp_col, text_col) ) USING columnar; INSERT INTO mixed_data_types SELECT to_timestamp(i+36000), box(point(i, i+90)), circle(point(i*2, i*3), i*100), (i*1.2)::float, uuid_in(md5((i*10)::text || (i*15)::text)::cstring), (i*8)::text, (i*42)::numeric FROM generate_series(1, 10) i; SELECT text_col='64' FROM mixed_data_types WHERE timestamp_col='1970-01-01 02:00:08'; SELECT uuid_col='298923c8-1900-45e9-1288-b430794814c4' FROM mixed_data_types WHERE timestamp_col='1970-01-01 02:00:01'; CREATE INDEX hash_uuid ON mixed_data_types USING hash(uuid_col); SELECT box_col=box(point(1, 91)) AND timestamp_col='1970-01-01 02:00:01' FROM mixed_data_types WHERE uuid_col='298923c8-1900-45e9-1288-b430794814c4'; DROP INDEX hash_uuid; CREATE INDEX btree_multi_numeric_text_timestamp ON mixed_data_types (numeric_col, text_col, timestamp_col); SELECT uuid_col='ab2481c9-f93d-0ed3-033a-3281d865ccb2' FROM mixed_data_types WHERE numeric_col >= 120 AND numeric_col <= 220 AND circle_col >= circle(point(7, 7), 350) AND float_col <= 5.0; CREATE TABLE revisit_same_cgroup(a INT, b TEXT) USING columnar; CREATE INDEX ON revisit_same_cgroup USING HASH (b); INSERT INTO revisit_same_cgroup SELECT random()*500, (random()*500)::INT::TEXT FROM generate_series(1, 100000) i; SELECT sum(a)>-1 FROM revisit_same_cgroup WHERE b = '1'; SET client_min_messages TO WARNING; DROP SCHEMA columnar_indexes CASCADE;