SET citus.next_shard_id TO 1250000; CREATE TABLE reference_table_test (value_1 int, value_2 float, value_3 text, value_4 timestamp); -- insert some data, and make sure that cannot be create_distributed_table INSERT INTO reference_table_test VALUES (1, 1.0, '1', '2016-12-01'); -- create the reference table SELECT create_reference_table('reference_table_test'); NOTICE: Copying data from local table... NOTICE: copying the data has completed DETAIL: The local data in the table is no longer visible, but is still on disk. HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$public.reference_table_test$$) create_reference_table --------------------------------------------------------------------- (1 row) -- see that partkey is NULL SELECT partmethod, (partkey IS NULL) as partkeyisnull, repmodel FROM pg_dist_partition WHERE logicalrelid = 'reference_table_test'::regclass; partmethod | partkeyisnull | repmodel --------------------------------------------------------------------- n | t | t (1 row) -- now see that shard min/max values are NULL SELECT shardid, (shardminvalue IS NULL) as shardminvalueisnull, (shardmaxvalue IS NULL) as shardmaxvalueisnull FROM pg_dist_shard WHERE logicalrelid = 'reference_table_test'::regclass; shardid | shardminvalueisnull | shardmaxvalueisnull --------------------------------------------------------------------- 1250000 | t | t (1 row) SELECT count(*) active_primaries FROM pg_dist_node WHERE isactive AND noderole='primary' \gset SELECT shardid, bool_and(shardstate = 1) all_placements_healthy, COUNT(distinct nodeport) = :active_primaries replicated_to_all FROM pg_dist_shard_placement WHERE shardid IN (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'reference_table_test'::regclass) GROUP BY shardid ORDER BY shardid; shardid | all_placements_healthy | replicated_to_all --------------------------------------------------------------------- 1250000 | t | t (1 row) -- check whether data was copied into distributed table SELECT * FROM reference_table_test; value_1 | value_2 | value_3 | value_4 --------------------------------------------------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 (1 row) -- now, execute some modification queries INSERT INTO reference_table_test VALUES (2, 2.0, '2', '2016-12-02'); INSERT INTO reference_table_test VALUES (3, 3.0, '3', '2016-12-03'); INSERT INTO reference_table_test VALUES (4, 4.0, '4', '2016-12-04'); INSERT INTO reference_table_test VALUES (5, 5.0, '5', '2016-12-05'); -- most of the queries in this file are already tested on multi_router_planner.sql -- However, for the sake of completeness we need to run similar tests with -- reference tables as well -- run some queries on top of the data SELECT * FROM reference_table_test; value_1 | value_2 | value_3 | value_4 --------------------------------------------------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 2 | 2 | 2 | Fri Dec 02 00:00:00 2016 3 | 3 | 3 | Sat Dec 03 00:00:00 2016 4 | 4 | 4 | Sun Dec 04 00:00:00 2016 5 | 5 | 5 | Mon Dec 05 00:00:00 2016 (5 rows) SELECT * FROM reference_table_test WHERE value_1 = 1; value_1 | value_2 | value_3 | value_4 --------------------------------------------------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 (1 row) SELECT value_1, value_2 FROM reference_table_test ORDER BY 2 ASC LIMIT 3; value_1 | value_2 --------------------------------------------------------------------- 1 | 1 2 | 2 3 | 3 (3 rows) SELECT value_1, value_3 FROM reference_table_test WHERE value_2 >= 4 ORDER BY 2 LIMIT 3; value_1 | value_3 --------------------------------------------------------------------- 4 | 4 5 | 5 (2 rows) SELECT value_1, 15 * value_2 FROM reference_table_test ORDER BY 2 ASC LIMIT 2; value_1 | ?column? --------------------------------------------------------------------- 1 | 15 2 | 30 (2 rows) SELECT value_1, 15 * value_2 FROM reference_table_test ORDER BY 2 ASC LIMIT 2 OFFSET 2; value_1 | ?column? --------------------------------------------------------------------- 3 | 45 4 | 60 (2 rows) SELECT value_2, value_4 FROM reference_table_test WHERE value_2 = 2 OR value_2 = 3; value_2 | value_4 --------------------------------------------------------------------- 2 | Fri Dec 02 00:00:00 2016 3 | Sat Dec 03 00:00:00 2016 (2 rows) SELECT value_2, value_4 FROM reference_table_test WHERE value_2 = 2 AND value_2 = 3; value_2 | value_4 --------------------------------------------------------------------- (0 rows) SELECT value_2, value_4 FROM reference_table_test WHERE value_3 = '2' OR value_1 = 3; value_2 | value_4 --------------------------------------------------------------------- 2 | Fri Dec 02 00:00:00 2016 3 | Sat Dec 03 00:00:00 2016 (2 rows) SELECT value_2, value_4 FROM reference_table_test WHERE ( value_3 = '2' OR value_1 = 3 ) AND FALSE; value_2 | value_4 --------------------------------------------------------------------- (0 rows) SELECT * FROM reference_table_test WHERE value_2 IN ( SELECT value_3::FLOAT FROM reference_table_test ) AND value_1 < 3; value_1 | value_2 | value_3 | value_4 --------------------------------------------------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 2 | 2 | 2 | Fri Dec 02 00:00:00 2016 (2 rows) SELECT value_4 FROM reference_table_test WHERE value_3 IN ( '1', '2' ); value_4 --------------------------------------------------------------------- Thu Dec 01 00:00:00 2016 Fri Dec 02 00:00:00 2016 (2 rows) SELECT date_part('day', value_4) FROM reference_table_test WHERE value_3 IN ( '5', '2' ); date_part --------------------------------------------------------------------- 2 5 (2 rows) SELECT value_4 FROM reference_table_test WHERE value_2 <= 2 AND value_2 >= 4; value_4 --------------------------------------------------------------------- (0 rows) SELECT value_4 FROM reference_table_test WHERE value_2 <= 20 AND value_2 >= 4; value_4 --------------------------------------------------------------------- Sun Dec 04 00:00:00 2016 Mon Dec 05 00:00:00 2016 (2 rows) SELECT value_4 FROM reference_table_test WHERE value_2 >= 5 AND value_2 <= random(); value_4 --------------------------------------------------------------------- (0 rows) SELECT value_1 FROM reference_table_test WHERE value_4 BETWEEN '2016-12-01' AND '2016-12-03'; value_1 --------------------------------------------------------------------- 1 2 3 (3 rows) SELECT value_1 FROM reference_table_test WHERE FALSE; value_1 --------------------------------------------------------------------- (0 rows) SELECT value_1 FROM reference_table_test WHERE int4eq(1, 2); value_1 --------------------------------------------------------------------- (0 rows) -- rename output name and do some operations SELECT value_1 as id, value_2 * 15 as age FROM reference_table_test; id | age --------------------------------------------------------------------- 1 | 15 2 | 30 3 | 45 4 | 60 5 | 75 (5 rows) -- queries with CTEs are supported WITH some_data AS ( SELECT value_2, value_4 FROM reference_table_test WHERE value_2 >=3) SELECT * FROM some_data; value_2 | value_4 --------------------------------------------------------------------- 3 | Sat Dec 03 00:00:00 2016 4 | Sun Dec 04 00:00:00 2016 5 | Mon Dec 05 00:00:00 2016 (3 rows) -- queries with CTEs are supported even if CTE is not referenced inside query WITH some_data AS ( SELECT value_2, value_4 FROM reference_table_test WHERE value_2 >=3) SELECT * FROM reference_table_test ORDER BY 1 LIMIT 1; value_1 | value_2 | value_3 | value_4 --------------------------------------------------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 (1 row) -- queries which involve functions in FROM clause are supported if it goes to a single worker. SELECT * FROM reference_table_test, position('om' in 'Thomas') WHERE value_1 = 1; value_1 | value_2 | value_3 | value_4 | position --------------------------------------------------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 | 3 (1 row) SELECT * FROM reference_table_test, position('om' in 'Thomas') WHERE value_1 = 1 OR value_1 = 2; value_1 | value_2 | value_3 | value_4 | position --------------------------------------------------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 | 3 2 | 2 | 2 | Fri Dec 02 00:00:00 2016 | 3 (2 rows) -- set operations are supported SELECT * FROM ( SELECT * FROM reference_table_test WHERE value_1 = 1 UNION SELECT * FROM reference_table_test WHERE value_1 = 3 ) AS combination ORDER BY value_1; value_1 | value_2 | value_3 | value_4 --------------------------------------------------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 3 | 3 | 3 | Sat Dec 03 00:00:00 2016 (2 rows) SELECT * FROM ( SELECT * FROM reference_table_test WHERE value_1 = 1 EXCEPT SELECT * FROM reference_table_test WHERE value_1 = 3 ) AS combination ORDER BY value_1; value_1 | value_2 | value_3 | value_4 --------------------------------------------------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 (1 row) SELECT * FROM ( SELECT * FROM reference_table_test WHERE value_1 = 1 INTERSECT SELECT * FROM reference_table_test WHERE value_1 = 3 ) AS combination ORDER BY value_1; value_1 | value_2 | value_3 | value_4 --------------------------------------------------------------------- (0 rows) -- to make the tests more interested for aggregation tests, ingest some more data INSERT INTO reference_table_test VALUES (1, 1.0, '1', '2016-12-01'); INSERT INTO reference_table_test VALUES (2, 2.0, '2', '2016-12-02'); INSERT INTO reference_table_test VALUES (3, 3.0, '3', '2016-12-03'); -- some aggregations SELECT value_4, SUM(value_2) FROM reference_table_test GROUP BY value_4 HAVING SUM(value_2) > 3 ORDER BY 1; value_4 | sum --------------------------------------------------------------------- Fri Dec 02 00:00:00 2016 | 4 Sat Dec 03 00:00:00 2016 | 6 Sun Dec 04 00:00:00 2016 | 4 Mon Dec 05 00:00:00 2016 | 5 (4 rows) SELECT value_4, value_3, SUM(value_2) FROM reference_table_test GROUP BY GROUPING sets ((value_4), (value_3)) ORDER BY 1, 2, 3; value_4 | value_3 | sum --------------------------------------------------------------------- Thu Dec 01 00:00:00 2016 | | 2 Fri Dec 02 00:00:00 2016 | | 4 Sat Dec 03 00:00:00 2016 | | 6 Sun Dec 04 00:00:00 2016 | | 4 Mon Dec 05 00:00:00 2016 | | 5 | 1 | 2 | 2 | 4 | 3 | 6 | 4 | 4 | 5 | 5 (10 rows) -- distinct clauses also work fine SELECT DISTINCT value_4 FROM reference_table_test ORDER BY 1; value_4 --------------------------------------------------------------------- Thu Dec 01 00:00:00 2016 Fri Dec 02 00:00:00 2016 Sat Dec 03 00:00:00 2016 Sun Dec 04 00:00:00 2016 Mon Dec 05 00:00:00 2016 (5 rows) -- window functions are also supported SELECT value_4, RANK() OVER (PARTITION BY value_1 ORDER BY value_4) FROM reference_table_test; value_4 | rank --------------------------------------------------------------------- Thu Dec 01 00:00:00 2016 | 1 Thu Dec 01 00:00:00 2016 | 1 Fri Dec 02 00:00:00 2016 | 1 Fri Dec 02 00:00:00 2016 | 1 Sat Dec 03 00:00:00 2016 | 1 Sat Dec 03 00:00:00 2016 | 1 Sun Dec 04 00:00:00 2016 | 1 Mon Dec 05 00:00:00 2016 | 1 (8 rows) -- window functions are also supported SELECT value_4, AVG(value_1) OVER (PARTITION BY value_4 ORDER BY value_4) FROM reference_table_test; value_4 | avg --------------------------------------------------------------------- Thu Dec 01 00:00:00 2016 | 1.00000000000000000000 Thu Dec 01 00:00:00 2016 | 1.00000000000000000000 Fri Dec 02 00:00:00 2016 | 2.0000000000000000 Fri Dec 02 00:00:00 2016 | 2.0000000000000000 Sat Dec 03 00:00:00 2016 | 3.0000000000000000 Sat Dec 03 00:00:00 2016 | 3.0000000000000000 Sun Dec 04 00:00:00 2016 | 4.0000000000000000 Mon Dec 05 00:00:00 2016 | 5.0000000000000000 (8 rows) SELECT count(DISTINCT CASE WHEN value_2 >= 3 THEN value_2 ELSE NULL END) as c FROM reference_table_test; c --------------------------------------------------------------------- 3 (1 row) SELECT value_1, count(DISTINCT CASE WHEN value_2 >= 3 THEN value_2 ELSE NULL END) as c FROM reference_table_test GROUP BY value_1 ORDER BY 1; value_1 | c --------------------------------------------------------------------- 1 | 0 2 | 0 3 | 1 4 | 1 5 | 1 (5 rows) -- selects inside a transaction works fine as well BEGIN; SELECT * FROM reference_table_test; value_1 | value_2 | value_3 | value_4 --------------------------------------------------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 2 | 2 | 2 | Fri Dec 02 00:00:00 2016 3 | 3 | 3 | Sat Dec 03 00:00:00 2016 4 | 4 | 4 | Sun Dec 04 00:00:00 2016 5 | 5 | 5 | Mon Dec 05 00:00:00 2016 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 2 | 2 | 2 | Fri Dec 02 00:00:00 2016 3 | 3 | 3 | Sat Dec 03 00:00:00 2016 (8 rows) SELECT * FROM reference_table_test WHERE value_1 = 1; value_1 | value_2 | value_3 | value_4 --------------------------------------------------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 (2 rows) END; -- cursor queries also works fine BEGIN; DECLARE test_cursor CURSOR FOR SELECT * FROM reference_table_test WHERE value_1 = 1 OR value_1 = 2 ORDER BY value_1; FETCH test_cursor; value_1 | value_2 | value_3 | value_4 --------------------------------------------------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 (1 row) FETCH ALL test_cursor; value_1 | value_2 | value_3 | value_4 --------------------------------------------------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 2 | 2 | 2 | Fri Dec 02 00:00:00 2016 2 | 2 | 2 | Fri Dec 02 00:00:00 2016 (3 rows) FETCH test_cursor; -- fetch one row after the last value_1 | value_2 | value_3 | value_4 --------------------------------------------------------------------- (0 rows) FETCH BACKWARD test_cursor; value_1 | value_2 | value_3 | value_4 --------------------------------------------------------------------- 2 | 2 | 2 | Fri Dec 02 00:00:00 2016 (1 row) END; -- table creation queries inside can be router plannable CREATE TEMP TABLE temp_reference_test as SELECT * FROM reference_table_test WHERE value_1 = 1; -- all kinds of joins are supported among reference tables -- first create two more tables CREATE TABLE reference_table_test_second (value_1 int, value_2 float, value_3 text, value_4 timestamp); SELECT create_reference_table('reference_table_test_second'); create_reference_table --------------------------------------------------------------------- (1 row) CREATE TABLE reference_table_test_third (value_1 int, value_2 float, value_3 text, value_4 timestamp); SELECT create_reference_table('reference_table_test_third'); create_reference_table --------------------------------------------------------------------- (1 row) -- ingest some data to both tables INSERT INTO reference_table_test_second VALUES (1, 1.0, '1', '2016-12-01'); INSERT INTO reference_table_test_second VALUES (2, 2.0, '2', '2016-12-02'); INSERT INTO reference_table_test_second VALUES (3, 3.0, '3', '2016-12-03'); INSERT INTO reference_table_test_third VALUES (4, 4.0, '4', '2016-12-04'); INSERT INTO reference_table_test_third VALUES (5, 5.0, '5', '2016-12-05'); -- some very basic tests SELECT DISTINCT t1.value_1 FROM reference_table_test t1, reference_table_test_second t2 WHERE t1.value_2 = t2.value_2 ORDER BY 1; value_1 --------------------------------------------------------------------- 1 2 3 (3 rows) SELECT DISTINCT t1.value_1 FROM reference_table_test t1, reference_table_test_third t3 WHERE t1.value_2 = t3.value_2 ORDER BY 1; value_1 --------------------------------------------------------------------- 4 5 (2 rows) SELECT DISTINCT t2.value_1 FROM reference_table_test_second t2, reference_table_test_third t3 WHERE t2.value_2 = t3.value_2 ORDER BY 1; value_1 --------------------------------------------------------------------- (0 rows) -- join on different columns and different data types via casts SELECT DISTINCT t1.value_1 FROM reference_table_test t1, reference_table_test_second t2 WHERE t1.value_2 = t2.value_1 ORDER BY 1; value_1 --------------------------------------------------------------------- 1 2 3 (3 rows) SELECT DISTINCT t1.value_1 FROM reference_table_test t1, reference_table_test_second t2 WHERE t1.value_2 = t2.value_3::int ORDER BY 1; value_1 --------------------------------------------------------------------- 1 2 3 (3 rows) SELECT DISTINCT t1.value_1 FROM reference_table_test t1, reference_table_test_second t2 WHERE t1.value_2 = date_part('day', t2.value_4) ORDER BY 1; value_1 --------------------------------------------------------------------- 1 2 3 (3 rows) -- ingest a common row to see more meaningful results with joins involving 3 tables INSERT INTO reference_table_test_third VALUES (3, 3.0, '3', '2016-12-03'); SELECT DISTINCT t1.value_1 FROM reference_table_test t1, reference_table_test_second t2, reference_table_test_third t3 WHERE t1.value_2 = date_part('day', t2.value_4) AND t3.value_2 = t1.value_2 ORDER BY 1; value_1 --------------------------------------------------------------------- 3 (1 row) -- same query on different columns SELECT DISTINCT t1.value_1 FROM reference_table_test t1, reference_table_test_second t2, reference_table_test_third t3 WHERE t1.value_1 = date_part('day', t2.value_4) AND t3.value_2 = t1.value_1 ORDER BY 1; value_1 --------------------------------------------------------------------- 3 (1 row) -- with the JOIN syntax SELECT DISTINCT t1.value_1 FROM reference_table_test t1 JOIN reference_table_test_second t2 USING (value_1) JOIN reference_table_test_third t3 USING (value_1) ORDER BY 1; value_1 --------------------------------------------------------------------- 3 (1 row) -- and left/right joins SELECT DISTINCT t1.value_1 FROM reference_table_test t1 LEFT JOIN reference_table_test_second t2 USING (value_1) LEFT JOIN reference_table_test_third t3 USING (value_1) ORDER BY 1; value_1 --------------------------------------------------------------------- 1 2 3 4 5 (5 rows) SELECT DISTINCT t1.value_1 FROM reference_table_test t1 RIGHT JOIN reference_table_test_second t2 USING (value_1) RIGHT JOIN reference_table_test_third t3 USING (value_1) ORDER BY 1; value_1 --------------------------------------------------------------------- 3 (2 rows) -- now, lets have some tests on UPSERTs and uniquness CREATE TABLE reference_table_test_fourth (value_1 int, value_2 float PRIMARY KEY, value_3 text, value_4 timestamp); SELECT create_reference_table('reference_table_test_fourth'); create_reference_table --------------------------------------------------------------------- (1 row) \set VERBOSITY terse -- insert a row INSERT INTO reference_table_test_fourth VALUES (1, 1.0, '1', '2016-12-01'); -- now get the unique key violation INSERT INTO reference_table_test_fourth VALUES (1, 1.0, '1', '2016-12-01'); ERROR: duplicate key value violates unique constraint "reference_table_test_fourth_pkey_1250003" -- now get null constraint violation due to primary key INSERT INTO reference_table_test_fourth (value_1, value_3, value_4) VALUES (1, '1.0', '2016-12-01'); ERROR: null value in column "value_2" violates not-null constraint \set VERBOSITY default -- lets run some upserts INSERT INTO reference_table_test_fourth VALUES (1, 1.0, '1', '2016-12-01') ON CONFLICT DO NOTHING RETURNING *; value_1 | value_2 | value_3 | value_4 --------------------------------------------------------------------- (0 rows) INSERT INTO reference_table_test_fourth VALUES (1, 1.0, '10', '2016-12-01') ON CONFLICT (value_2) DO UPDATE SET value_3 = EXCLUDED.value_3, value_2 = EXCLUDED.value_2 RETURNING *; value_1 | value_2 | value_3 | value_4 --------------------------------------------------------------------- 1 | 1 | 10 | Thu Dec 01 00:00:00 2016 (1 row) -- update all columns INSERT INTO reference_table_test_fourth VALUES (1, 1.0, '10', '2016-12-01') ON CONFLICT (value_2) DO UPDATE SET value_3 = EXCLUDED.value_3 || '+10', value_2 = EXCLUDED.value_2 + 10, value_1 = EXCLUDED.value_1 + 10, value_4 = '2016-12-10' RETURNING *; value_1 | value_2 | value_3 | value_4 --------------------------------------------------------------------- 11 | 11 | 10+10 | Sat Dec 10 00:00:00 2016 (1 row) -- finally see that shard healths are OK SELECT shardid, bool_and(shardstate = 1) all_placements_healthy, COUNT(distinct nodeport) = :active_primaries replicated_to_all FROM pg_dist_shard_placement WHERE shardid IN (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'reference_table_test_fourth'::regclass) GROUP BY shardid ORDER BY shardid; shardid | all_placements_healthy | replicated_to_all --------------------------------------------------------------------- 1250003 | t | t (1 row) -- let's not run some update/delete queries on arbitrary columns DELETE FROM reference_table_test WHERE value_1 = 1 RETURNING *; value_1 | value_2 | value_3 | value_4 --------------------------------------------------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 (2 rows) DELETE FROM reference_table_test WHERE value_4 = '2016-12-05' RETURNING *; value_1 | value_2 | value_3 | value_4 --------------------------------------------------------------------- 5 | 5 | 5 | Mon Dec 05 00:00:00 2016 (1 row) UPDATE reference_table_test SET value_2 = 15 WHERE value_2 = 2 RETURNING *; value_1 | value_2 | value_3 | value_4 --------------------------------------------------------------------- 2 | 15 | 2 | Fri Dec 02 00:00:00 2016 2 | 15 | 2 | Fri Dec 02 00:00:00 2016 (2 rows) -- and some queries without any filters UPDATE reference_table_test SET value_2 = 15, value_1 = 45 RETURNING *; value_1 | value_2 | value_3 | value_4 --------------------------------------------------------------------- 45 | 15 | 2 | Fri Dec 02 00:00:00 2016 45 | 15 | 2 | Fri Dec 02 00:00:00 2016 45 | 15 | 3 | Sat Dec 03 00:00:00 2016 45 | 15 | 3 | Sat Dec 03 00:00:00 2016 45 | 15 | 4 | Sun Dec 04 00:00:00 2016 (5 rows) DELETE FROM reference_table_test RETURNING *; value_1 | value_2 | value_3 | value_4 --------------------------------------------------------------------- 45 | 15 | 2 | Fri Dec 02 00:00:00 2016 45 | 15 | 2 | Fri Dec 02 00:00:00 2016 45 | 15 | 3 | Sat Dec 03 00:00:00 2016 45 | 15 | 3 | Sat Dec 03 00:00:00 2016 45 | 15 | 4 | Sun Dec 04 00:00:00 2016 (5 rows) -- some tests with function evaluation and sequences CREATE TABLE reference_table_test_fifth (value_1 serial PRIMARY KEY, value_2 float, value_3 text, value_4 timestamp); SELECT create_reference_table('reference_table_test_fifth'); create_reference_table --------------------------------------------------------------------- (1 row) CREATE SEQUENCE example_ref_value_seq; -- see that sequences work as expected INSERT INTO reference_table_test_fifth (value_2) VALUES (2) RETURNING value_1, value_2; value_1 | value_2 --------------------------------------------------------------------- 1 | 2 (1 row) INSERT INTO reference_table_test_fifth (value_2) VALUES (2) RETURNING value_1, value_2; value_1 | value_2 --------------------------------------------------------------------- 2 | 2 (1 row) INSERT INTO reference_table_test_fifth (value_2, value_3) VALUES (nextval('example_ref_value_seq'), nextval('example_ref_value_seq')::text) RETURNING value_1, value_2, value_3; value_1 | value_2 | value_3 --------------------------------------------------------------------- 3 | 1 | 2 (1 row) INSERT INTO reference_table_test_fifth (value_4) VALUES (now()) RETURNING value_1, value_2, value_3; value_1 | value_2 | value_3 --------------------------------------------------------------------- 4 | | (1 row) UPDATE reference_table_test_fifth SET value_4 = now() WHERE value_1 = 1 RETURNING value_1, value_2, value_4 > '2000-01-01'; value_1 | value_2 | ?column? --------------------------------------------------------------------- 1 | 2 | t (1 row) -- test copying FROM / TO -- first delete all the data DELETE FROM reference_table_test; COPY reference_table_test FROM STDIN WITH CSV; COPY reference_table_test (value_2, value_3, value_4) FROM STDIN WITH CSV; COPY reference_table_test (value_3) FROM STDIN WITH CSV; COPY reference_table_test FROM STDIN WITH CSV; COPY reference_table_test TO STDOUT WITH CSV; 1,1,1,2016-01-01 00:00:00 ,2,2,2016-01-02 00:00:00 ,,3, ,,, -- INSERT INTO SELECT among reference tables DELETE FROM reference_table_test_second; INSERT INTO reference_table_test_second SELECT * FROM reference_table_test RETURNING *; value_1 | value_2 | value_3 | value_4 --------------------------------------------------------------------- 1 | 1 | 1 | Fri Jan 01 00:00:00 2016 | 2 | 2 | Sat Jan 02 00:00:00 2016 | | 3 | | | | (4 rows) INSERT INTO reference_table_test_second (value_2) SELECT reference_table_test.value_2 FROM reference_table_test JOIN reference_table_test_second USING (value_1) RETURNING *; value_1 | value_2 | value_3 | value_4 --------------------------------------------------------------------- | 1 | | (1 row) SET citus.shard_count TO 6; SET citus.shard_replication_factor TO 2; CREATE TABLE colocated_table_test (value_1 int, value_2 float, value_3 text, value_4 timestamp); SELECT create_distributed_table('colocated_table_test', 'value_1'); create_distributed_table --------------------------------------------------------------------- (1 row) CREATE TABLE colocated_table_test_2 (value_1 int, value_2 float, value_3 text, value_4 timestamp); SELECT create_distributed_table('colocated_table_test_2', 'value_1'); create_distributed_table --------------------------------------------------------------------- (1 row) DELETE FROM reference_table_test; INSERT INTO reference_table_test VALUES (1, 1.0, '1', '2016-12-01'); INSERT INTO reference_table_test VALUES (2, 2.0, '2', '2016-12-02'); INSERT INTO colocated_table_test VALUES (1, 1.0, '1', '2016-12-01'); INSERT INTO colocated_table_test VALUES (2, 2.0, '2', '2016-12-02'); INSERT INTO colocated_table_test_2 VALUES (1, 1.0, '1', '2016-12-01'); INSERT INTO colocated_table_test_2 VALUES (2, 2.0, '2', '2016-12-02'); SET client_min_messages TO DEBUG1; SET citus.log_multi_join_order TO TRUE; SELECT reference_table_test.value_1 FROM reference_table_test, colocated_table_test WHERE colocated_table_test.value_1 = reference_table_test.value_1 ORDER BY 1; LOG: join order: [ "colocated_table_test" ][ reference join "reference_table_test" ] value_1 --------------------------------------------------------------------- 1 2 (2 rows) SELECT colocated_table_test.value_2 FROM reference_table_test, colocated_table_test WHERE colocated_table_test.value_2 = reference_table_test.value_2 ORDER BY 1; LOG: join order: [ "colocated_table_test" ][ reference join "reference_table_test" ] value_2 --------------------------------------------------------------------- 1 2 (2 rows) SELECT colocated_table_test.value_2 FROM colocated_table_test, reference_table_test WHERE reference_table_test.value_1 = colocated_table_test.value_1 ORDER BY 1; LOG: join order: [ "colocated_table_test" ][ reference join "reference_table_test" ] value_2 --------------------------------------------------------------------- 1 2 (2 rows) SET citus.enable_repartition_joins = on; SELECT colocated_table_test.value_2 FROM reference_table_test, colocated_table_test, colocated_table_test_2 WHERE colocated_table_test.value_2 = reference_table_test.value_2 ORDER BY colocated_table_test.value_2; LOG: join order: [ "colocated_table_test_2" ][ cartesian product reference join "reference_table_test" ][ dual partition join "colocated_table_test" ] value_2 --------------------------------------------------------------------- 1 1 2 2 (4 rows) RESET citus.enable_repartition_joins; SELECT colocated_table_test.value_2 FROM reference_table_test, colocated_table_test, colocated_table_test_2 WHERE colocated_table_test.value_1 = colocated_table_test_2.value_1 AND colocated_table_test.value_2 = reference_table_test.value_2 ORDER BY 1; LOG: join order: [ "colocated_table_test" ][ reference join "reference_table_test" ][ local partition join "colocated_table_test_2" ] value_2 --------------------------------------------------------------------- 1 2 (2 rows) SET citus.enable_repartition_joins to ON; SELECT colocated_table_test.value_2 FROM reference_table_test, colocated_table_test, colocated_table_test_2 WHERE colocated_table_test.value_2 = colocated_table_test_2.value_2 AND colocated_table_test.value_2 = reference_table_test.value_2 ORDER BY colocated_table_test.value_2; LOG: join order: [ "colocated_table_test" ][ reference join "reference_table_test" ][ dual partition join "colocated_table_test_2" ] value_2 --------------------------------------------------------------------- 1 2 (2 rows) SELECT reference_table_test.value_2 FROM reference_table_test, colocated_table_test, colocated_table_test_2 WHERE colocated_table_test.value_1 = reference_table_test.value_1 AND colocated_table_test_2.value_1 = reference_table_test.value_1 ORDER BY reference_table_test.value_2; LOG: join order: [ "colocated_table_test" ][ reference join "reference_table_test" ][ dual partition join "colocated_table_test_2" ] value_2 --------------------------------------------------------------------- 1 2 (2 rows) SET citus.log_multi_join_order TO FALSE; SET citus.shard_count TO DEFAULT; -- some INSERT .. SELECT queries that involve both hash distributed and reference tables -- should go via coordinator since we're inserting into reference table where -- not all the participants are reference tables INSERT INTO reference_table_test (value_1) SELECT colocated_table_test.value_1 FROM colocated_table_test, colocated_table_test_2 WHERE colocated_table_test.value_1 = colocated_table_test_2.value_1; DEBUG: only reference tables may be queried when targeting a reference table with distributed INSERT ... SELECT DEBUG: Collecting INSERT ... SELECT results on coordinator -- should go via coordinator, same as the above INSERT INTO reference_table_test (value_1) SELECT colocated_table_test.value_1 FROM colocated_table_test, reference_table_test WHERE colocated_table_test.value_1 = reference_table_test.value_1; DEBUG: only reference tables may be queried when targeting a reference table with distributed INSERT ... SELECT DEBUG: Collecting INSERT ... SELECT results on coordinator -- safe to push down even lack of equality between partition column and column of reference table INSERT INTO colocated_table_test (value_1, value_2) SELECT colocated_table_test_2.value_1, reference_table_test.value_2 FROM colocated_table_test_2, reference_table_test WHERE colocated_table_test_2.value_4 = reference_table_test.value_4 ORDER BY 1 RETURNING value_1, value_2; value_1 | value_2 --------------------------------------------------------------------- 1 | 1 2 | 2 (2 rows) -- similar query with the above, this time partition key but without equality INSERT INTO colocated_table_test (value_1, value_2) SELECT colocated_table_test_2.value_1, reference_table_test.value_2 FROM colocated_table_test_2, reference_table_test WHERE colocated_table_test_2.value_1 > reference_table_test.value_2 RETURNING value_1, value_2; value_1 | value_2 --------------------------------------------------------------------- 2 | 1 (1 row) -- partition column value comes from reference table, goes via coordinator INSERT INTO colocated_table_test (value_1, value_2) SELECT reference_table_test.value_2, colocated_table_test_2.value_1 FROM colocated_table_test_2, reference_table_test WHERE colocated_table_test_2.value_4 = reference_table_test.value_4; DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match DETAIL: The data type of the target table's partition column should exactly match the data type of the corresponding simple column reference in the subquery. DEBUG: performing repartitioned INSERT ... SELECT INSERT INTO colocated_table_test (value_1, value_2) SELECT reference_table_test.value_1, colocated_table_test_2.value_1 FROM colocated_table_test_2, reference_table_test WHERE colocated_table_test_2.value_4 = reference_table_test.value_4; DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match DETAIL: The target table's partition column should correspond to a partition column in the subquery. DEBUG: performing repartitioned INSERT ... SELECT RESET client_min_messages; -- some tests for mark_tables_colocated -- should error out SELECT update_distributed_table_colocation('colocated_table_test_2', colocate_with => 'reference_table_test'); ERROR: relation reference_table_test should be a hash or single shard distributed table SELECT update_distributed_table_colocation('reference_table_test', colocate_with => 'reference_table_test_fifth'); ERROR: relation reference_table_test_fifth should be a hash or single shard distributed table -- ensure that reference tables on -- different queries works as expected CREATE SCHEMA reference_schema; -- create with schema prefix CREATE TABLE reference_schema.reference_table_test_sixth (value_1 serial PRIMARY KEY, value_2 float, value_3 text, value_4 timestamp); SELECT create_reference_table('reference_schema.reference_table_test_sixth'); create_reference_table --------------------------------------------------------------------- (1 row) SET search_path TO 'reference_schema'; -- create on the schema CREATE TABLE reference_table_test_seventh (value_1 serial PRIMARY KEY, value_2 float, value_3 text, value_4 timestamp); SELECT create_reference_table('reference_table_test_seventh'); create_reference_table --------------------------------------------------------------------- (1 row) -- ingest some data INSERT INTO reference_table_test_sixth VALUES (1, 1.0, '1', '2016-12-01'); INSERT INTO reference_table_test_seventh VALUES (1, 1.0, '1', '2016-12-01'); SET search_path TO 'public'; -- ingest some data INSERT INTO reference_schema.reference_table_test_sixth VALUES (2, 2.0, '2', '2016-12-02'); INSERT INTO reference_schema.reference_table_test_seventh VALUES (2, 2.0, '2', '2016-12-02'); -- some basic queries SELECT value_1 FROM reference_schema.reference_table_test_sixth; value_1 --------------------------------------------------------------------- 1 2 (2 rows) SET search_path TO 'reference_schema'; SELECT reference_table_test_sixth.value_1 FROM reference_table_test_sixth, reference_table_test_seventh WHERE reference_table_test_sixth.value_4 = reference_table_test_seventh.value_4 ORDER BY 1; value_1 --------------------------------------------------------------------- 1 2 (2 rows) -- last test with cross schemas SET search_path TO 'public'; SELECT reftable.value_2, colocated_table_test_2.value_1 FROM colocated_table_test_2, reference_schema.reference_table_test_sixth as reftable WHERE colocated_table_test_2.value_4 = reftable.value_4 ORDER BY 1, 2; value_2 | value_1 --------------------------------------------------------------------- 1 | 1 2 | 2 (2 rows) -- let's now test TRUNCATE and DROP TABLE -- delete all rows and ingest some data DELETE FROM reference_table_test; INSERT INTO reference_table_test VALUES (1, 1.0, '1', '2016-12-01'); INSERT INTO reference_table_test VALUES (2, 2.0, '2', '2016-12-02'); INSERT INTO reference_table_test VALUES (3, 3.0, '3', '2016-12-03'); INSERT INTO reference_table_test VALUES (4, 4.0, '4', '2016-12-04'); INSERT INTO reference_table_test VALUES (5, 5.0, '5', '2016-12-05'); SELECT count(*) FROM reference_table_test; count --------------------------------------------------------------------- 5 (1 row) -- truncate it and get the result back TRUNCATE reference_table_test; SELECT count(*) FROM reference_table_test; count --------------------------------------------------------------------- 0 (1 row) -- now try dropping one of the existing reference tables -- and check the metadata SELECT logicalrelid FROM pg_dist_partition WHERE logicalrelid::regclass::text LIKE '%reference_table_test_fifth%'; logicalrelid --------------------------------------------------------------------- reference_table_test_fifth (1 row) SELECT logicalrelid FROM pg_dist_shard WHERE logicalrelid::regclass::text LIKE '%reference_table_test_fifth%'; logicalrelid --------------------------------------------------------------------- reference_table_test_fifth (1 row) DROP TABLE reference_table_test_fifth; SELECT logicalrelid FROM pg_dist_partition WHERE logicalrelid::regclass::text LIKE '%reference_table_test_fifth%'; logicalrelid --------------------------------------------------------------------- (0 rows) SELECT logicalrelid FROM pg_dist_shard WHERE logicalrelid::regclass::text LIKE '%reference_table_test_fifth%'; logicalrelid --------------------------------------------------------------------- (0 rows) -- now test DDL changes CREATE TABLE reference_schema.reference_table_ddl (value_1 int, value_2 float, value_3 text, value_4 timestamp); SELECT create_reference_table('reference_schema.reference_table_ddl'); create_reference_table --------------------------------------------------------------------- (1 row) -- CREATE & DROP index and check the workers CREATE INDEX reference_index_1 ON reference_schema.reference_table_ddl(value_1); CREATE INDEX reference_index_2 ON reference_schema.reference_table_ddl(value_2, value_3); -- should be able to create/drop UNIQUE index on a reference table CREATE UNIQUE INDEX reference_index_3 ON reference_schema.reference_table_ddl(value_1); -- should be able to add a column ALTER TABLE reference_schema.reference_table_ddl ADD COLUMN value_5 INTEGER; ALTER TABLE reference_schema.reference_table_ddl ALTER COLUMN value_5 SET DATA TYPE FLOAT; ALTER TABLE reference_schema.reference_table_ddl DROP COLUMN value_1; ALTER TABLE reference_schema.reference_table_ddl ALTER COLUMN value_2 SET DEFAULT 25.0; ALTER TABLE reference_schema.reference_table_ddl ALTER COLUMN value_3 SET NOT NULL; -- see that Citus applied all DDLs to the table SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='reference_schema.reference_table_ddl'::regclass; Column | Type | Modifiers --------------------------------------------------------------------- value_2 | double precision | default 25.0 value_3 | text | not null value_4 | timestamp without time zone | value_5 | double precision | (4 rows) SELECT "Column", "Type", "Definition" FROM index_attrs WHERE relid = 'reference_schema.reference_index_2'::regclass; Column | Type | Definition --------------------------------------------------------------------- value_2 | double precision | value_2 value_3 | text | value_3 (2 rows) -- also to the shard placements \c - - - :worker_1_port SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='reference_schema.reference_table_ddl_1250019'::regclass; Column | Type | Modifiers --------------------------------------------------------------------- value_2 | double precision | default 25.0 value_3 | text | not null value_4 | timestamp without time zone | value_5 | double precision | (4 rows) SELECT "Column", "Type", "Definition" FROM index_attrs WHERE relid = 'reference_schema.reference_index_2_1250019'::regclass; Column | Type | Definition --------------------------------------------------------------------- value_2 | double precision | value_2 value_3 | text | value_3 (2 rows) \c - - - :master_port DROP INDEX reference_schema.reference_index_2; \c - - - :worker_1_port SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='reference_schema.reference_table_ddl_1250019'::regclass; Column | Type | Modifiers --------------------------------------------------------------------- value_2 | double precision | default 25.0 value_3 | text | not null value_4 | timestamp without time zone | value_5 | double precision | (4 rows) \di reference_schema.reference_index_2* List of relations Schema | Name | Type | Owner | Table --------------------------------------------------------------------- (0 rows) \c - - - :master_port SET citus.next_shard_id TO 1255000; -- now test the renaming of the table, and back to the expected name ALTER TABLE reference_schema.reference_table_ddl RENAME TO reference_table_ddl_test; ALTER TABLE reference_schema.reference_table_ddl_test RENAME TO reference_table_ddl; -- now test reference tables against some helper UDFs that Citus provides -- cannot add shards SELECT master_create_empty_shard('reference_schema.reference_table_ddl'); ERROR: relation "reference_schema.reference_table_ddl" is a reference table DETAIL: We currently don't support creating shards on reference tables -- get/update the statistics SELECT shardid AS a_shard_id FROM pg_dist_shard WHERE logicalrelid = 'reference_schema.reference_table_ddl'::regclass \gset SELECT master_update_shard_statistics(:a_shard_id); master_update_shard_statistics --------------------------------------------------------------------- 8192 (1 row) SELECT master_get_table_ddl_events('reference_schema.reference_table_ddl'); master_get_table_ddl_events --------------------------------------------------------------------- CREATE TABLE reference_schema.reference_table_ddl (value_2 double precision DEFAULT 25.0, value_3 text NOT NULL, value_4 timestamp without time zone, value_5 double precision) USING heap ALTER TABLE reference_schema.reference_table_ddl OWNER TO postgres (2 rows) -- some queries that are captured in functions CREATE OR REPLACE FUNCTION select_count_all() RETURNS bigint AS ' SELECT count(*) FROM reference_table_test; ' LANGUAGE SQL; CREATE OR REPLACE FUNCTION insert_into_ref_table(value_1 int, value_2 float, value_3 text, value_4 timestamp) RETURNS void AS ' INSERT INTO reference_table_test VALUES ($1, $2, $3, $4); ' LANGUAGE SQL; TRUNCATE reference_table_test; SELECT select_count_all(); select_count_all --------------------------------------------------------------------- 0 (1 row) SELECT insert_into_ref_table(1, 1.0, '1', '2016-12-01'); insert_into_ref_table --------------------------------------------------------------------- (1 row) SELECT insert_into_ref_table(2, 2.0, '2', '2016-12-02'); insert_into_ref_table --------------------------------------------------------------------- (1 row) SELECT insert_into_ref_table(3, 3.0, '3', '2016-12-03'); insert_into_ref_table --------------------------------------------------------------------- (1 row) SELECT insert_into_ref_table(4, 4.0, '4', '2016-12-04'); insert_into_ref_table --------------------------------------------------------------------- (1 row) SELECT insert_into_ref_table(5, 5.0, '5', '2016-12-05'); insert_into_ref_table --------------------------------------------------------------------- (1 row) SELECT insert_into_ref_table(6, 6.0, '6', '2016-12-06'); insert_into_ref_table --------------------------------------------------------------------- (1 row) SELECT select_count_all(); select_count_all --------------------------------------------------------------------- 6 (1 row) TRUNCATE reference_table_test; -- some prepared queries and pl/pgsql functions PREPARE insert_into_ref_table_pr (int, float, text, timestamp) AS INSERT INTO reference_table_test VALUES ($1, $2, $3, $4); -- reference tables do not have up-to-five execution limit as other tables EXECUTE insert_into_ref_table_pr(1, 1.0, '1', '2016-12-01'); EXECUTE insert_into_ref_table_pr(2, 2.0, '2', '2016-12-02'); EXECUTE insert_into_ref_table_pr(3, 3.0, '3', '2016-12-03'); EXECUTE insert_into_ref_table_pr(4, 4.0, '4', '2016-12-04'); EXECUTE insert_into_ref_table_pr(5, 5.0, '5', '2016-12-05'); EXECUTE insert_into_ref_table_pr(6, 6.0, '6', '2016-12-06'); -- see the count, then truncate the table SELECT select_count_all(); select_count_all --------------------------------------------------------------------- 6 (1 row) TRUNCATE reference_table_test; -- reference tables work with composite key -- and we even do not need to create hash function etc. -- first create the type on all nodes CREATE TYPE reference_comp_key as (key text, value text); CREATE TABLE reference_table_composite (id int PRIMARY KEY, data reference_comp_key); SELECT create_reference_table('reference_table_composite'); create_reference_table --------------------------------------------------------------------- (1 row) -- insert and query some data INSERT INTO reference_table_composite (id, data) VALUES (1, ('key_1', 'value_1')::reference_comp_key); INSERT INTO reference_table_composite (id, data) VALUES (2, ('key_2', 'value_2')::reference_comp_key); SELECT * FROM reference_table_composite; id | data --------------------------------------------------------------------- 1 | (key_1,value_1) 2 | (key_2,value_2) (2 rows) SELECT (data).key FROM reference_table_composite; key --------------------------------------------------------------------- key_1 key_2 (2 rows) -- make sure that reference tables obeys single shard transactions TRUNCATE reference_table_test; BEGIN; INSERT INTO reference_table_test VALUES (1, 1.0, '1', '2016-12-01'); SELECT * FROM reference_table_test; value_1 | value_2 | value_3 | value_4 --------------------------------------------------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 (1 row) ROLLBACK; SELECT * FROM reference_table_test; value_1 | value_2 | value_3 | value_4 --------------------------------------------------------------------- (0 rows) -- now insert a row and commit BEGIN; INSERT INTO reference_table_test VALUES (2, 2.0, '2', '2016-12-02'); COMMIT; SELECT * FROM reference_table_test; value_1 | value_2 | value_3 | value_4 --------------------------------------------------------------------- 2 | 2 | 2 | Fri Dec 02 00:00:00 2016 (1 row) -- one basic UPDATE test BEGIN; UPDATE reference_table_test SET value_1 = 10 WHERE value_1 = 2; COMMIT; SELECT * FROM reference_table_test; value_1 | value_2 | value_3 | value_4 --------------------------------------------------------------------- 10 | 2 | 2 | Fri Dec 02 00:00:00 2016 (1 row) -- DDL+DML is allowed BEGIN; ALTER TABLE reference_table_test ADD COLUMN value_dummy INT; INSERT INTO reference_table_test VALUES (2, 2.0, '2', '2016-12-02'); ROLLBACK; -- Previous issue failed to rename reference tables in subqueries SELECT public.explain_with_pg17_initplan_format($Q$ EXPLAIN (COSTS OFF) SELECT value_1, count(*) FROM colocated_table_test GROUP BY value_1 HAVING (SELECT rt.value_2 FROM reference_table_test rt where rt.value_2 = 2) > 0 ORDER BY 1; $Q$) as "QUERY PLAN"; QUERY PLAN --------------------------------------------------------------------- Sort Sort Key: remote_scan.value_1 -> Custom Scan (Citus Adaptive) -> Distributed Subplan XXX_1 -> Custom Scan (Citus Adaptive) Task Count: 1 Tasks Shown: All -> Task Node: host=localhost port=xxxxx dbname=regression -> Seq Scan on reference_table_test_1250000 rt Filter: (value_2 = '2'::double precision) Task Count: 6 Tasks Shown: One of 6 -> Task Node: host=localhost port=xxxxx dbname=regression -> HashAggregate Group Key: colocated_table_test.value_1 InitPlan 1 -> Function Scan on read_intermediate_result intermediate_result -> Result One-Time Filter: ((InitPlan 1).col1 > '0'::double precision) -> Seq Scan on colocated_table_test_1250005 colocated_table_test (22 rows) WITH a as (SELECT rt.value_2 FROM reference_table_test rt where rt.value_2 = 2) SELECT ct.value_1, count(*) FROM colocated_table_test ct join a on ct.value_1 = a.value_2 WHERE exists (select * from a) GROUP BY 1 ORDER BY 1; value_1 | count --------------------------------------------------------------------- 2 | 5 (1 row) -- clean up tables, ... SET client_min_messages TO ERROR; DROP SEQUENCE example_ref_value_seq; DROP TABLE reference_table_test, reference_table_test_second, reference_table_test_third, reference_table_test_fourth, reference_schema.reference_table_ddl, reference_table_composite, colocated_table_test, colocated_table_test_2; DROP TYPE reference_comp_key; DROP SCHEMA reference_schema CASCADE; RESET client_min_messages;