ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1250000; CREATE TABLE reference_table_test (value_1 int, value_2 float, value_3 text, value_4 timestamp); -- insert some data, and make sure that cannot be create_distributed_table INSERT INTO reference_table_test VALUES (1, 1.0, '1', '2016-12-01'); -- create the reference table SELECT create_reference_table('reference_table_test'); NOTICE: Copying data from local table... create_reference_table ------------------------ (1 row) -- see that partkey is NULL SELECT partmethod, (partkey IS NULL) as partkeyisnull, colocationid, repmodel FROM pg_dist_partition WHERE logicalrelid = 'reference_table_test'::regclass; partmethod | partkeyisnull | colocationid | repmodel ------------+---------------+--------------+---------- n | t | 1 | t (1 row) -- now see that shard min/max values are NULL SELECT shardid, (shardminvalue IS NULL) as shardminvalueisnull, (shardmaxvalue IS NULL) as shardmaxvalueisnull FROM pg_dist_shard WHERE logicalrelid = 'reference_table_test'::regclass; shardid | shardminvalueisnull | shardmaxvalueisnull ---------+---------------------+--------------------- 1250000 | t | t (1 row) SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement WHERE shardid IN (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'reference_table_test'::regclass) ORDER BY placementid; shardid | shardstate | nodename | nodeport ---------+------------+-----------+---------- 1250000 | 1 | localhost | 57637 1250000 | 1 | localhost | 57638 (2 rows) -- check whether data was copied into distributed table SELECT * FROM reference_table_test; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 (1 row) -- now, execute some modification queries INSERT INTO reference_table_test VALUES (2, 2.0, '2', '2016-12-02'); INSERT INTO reference_table_test VALUES (3, 3.0, '3', '2016-12-03'); INSERT INTO reference_table_test VALUES (4, 4.0, '4', '2016-12-04'); INSERT INTO reference_table_test VALUES (5, 5.0, '5', '2016-12-05'); -- most of the queries in this file are already tested on multi_router_planner.sql -- However, for the sake of completeness we need to run similar tests with -- reference tables as well -- run some queries on top of the data SELECT * FROM reference_table_test; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 2 | 2 | 2 | Fri Dec 02 00:00:00 2016 3 | 3 | 3 | Sat Dec 03 00:00:00 2016 4 | 4 | 4 | Sun Dec 04 00:00:00 2016 5 | 5 | 5 | Mon Dec 05 00:00:00 2016 (5 rows) SELECT * FROM reference_table_test WHERE value_1 = 1; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 (1 row) SELECT value_1, value_2 FROM reference_table_test ORDER BY 2 ASC LIMIT 3; value_1 | value_2 ---------+--------- 1 | 1 2 | 2 3 | 3 (3 rows) SELECT value_1, value_3 FROM reference_table_test WHERE value_2 >= 4 ORDER BY 2 LIMIT 3; value_1 | value_3 ---------+--------- 4 | 4 5 | 5 (2 rows) SELECT value_1, 15 * value_2 FROM reference_table_test ORDER BY 2 ASC LIMIT 2; value_1 | ?column? ---------+---------- 1 | 15 2 | 30 (2 rows) SELECT value_1, 15 * value_2 FROM reference_table_test ORDER BY 2 ASC LIMIT 2 OFFSET 2; value_1 | ?column? ---------+---------- 3 | 45 4 | 60 (2 rows) SELECT value_2, value_4 FROM reference_table_test WHERE value_2 = 2 OR value_2 = 3; value_2 | value_4 ---------+-------------------------- 2 | Fri Dec 02 00:00:00 2016 3 | Sat Dec 03 00:00:00 2016 (2 rows) SELECT value_2, value_4 FROM reference_table_test WHERE value_2 = 2 AND value_2 = 3; value_2 | value_4 ---------+--------- (0 rows) SELECT value_2, value_4 FROM reference_table_test WHERE value_3 = '2' OR value_1 = 3; value_2 | value_4 ---------+-------------------------- 2 | Fri Dec 02 00:00:00 2016 3 | Sat Dec 03 00:00:00 2016 (2 rows) SELECT value_2, value_4 FROM reference_table_test WHERE ( value_3 = '2' OR value_1 = 3 ) AND FALSE; value_2 | value_4 ---------+--------- (0 rows) SELECT * FROM reference_table_test WHERE value_2 IN ( SELECT value_3::FLOAT FROM reference_table_test ) AND value_1 < 3; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 2 | 2 | 2 | Fri Dec 02 00:00:00 2016 (2 rows) SELECT value_4 FROM reference_table_test WHERE value_3 IN ( '1', '2' ); value_4 -------------------------- Thu Dec 01 00:00:00 2016 Fri Dec 02 00:00:00 2016 (2 rows) SELECT date_part('day', value_4) FROM reference_table_test WHERE value_3 IN ( '5', '2' ); date_part ----------- 2 5 (2 rows) SELECT value_4 FROM reference_table_test WHERE value_2 <= 2 AND value_2 >= 4; value_4 --------- (0 rows) SELECT value_4 FROM reference_table_test WHERE value_2 <= 20 AND value_2 >= 4; value_4 -------------------------- Sun Dec 04 00:00:00 2016 Mon Dec 05 00:00:00 2016 (2 rows) SELECT value_4 FROM reference_table_test WHERE value_2 >= 5 AND value_2 <= random(); value_4 --------- (0 rows) SELECT value_1 FROM reference_table_test WHERE value_4 BETWEEN '2016-12-01' AND '2016-12-03'; value_1 --------- 1 2 3 (3 rows) SELECT value_1 FROM reference_table_test WHERE FALSE; value_1 --------- (0 rows) SELECT value_1 FROM reference_table_test WHERE int4eq(1, 2); value_1 --------- (0 rows) -- rename output name and do some operations SELECT value_1 as id, value_2 * 15 as age FROM reference_table_test; id | age ----+----- 1 | 15 2 | 30 3 | 45 4 | 60 5 | 75 (5 rows) -- queries with CTEs are supported WITH some_data AS ( SELECT value_2, value_4 FROM reference_table_test WHERE value_2 >=3) SELECT * FROM some_data; value_2 | value_4 ---------+-------------------------- 3 | Sat Dec 03 00:00:00 2016 4 | Sun Dec 04 00:00:00 2016 5 | Mon Dec 05 00:00:00 2016 (3 rows) -- queries with CTEs are supported even if CTE is not referenced inside query WITH some_data AS ( SELECT value_2, value_4 FROM reference_table_test WHERE value_2 >=3) SELECT * FROM reference_table_test ORDER BY 1 LIMIT 1; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 (1 row) -- queries which involve functions in FROM clause are supported if it goes to a single worker. SELECT * FROM reference_table_test, position('om' in 'Thomas') WHERE value_1 = 1; value_1 | value_2 | value_3 | value_4 | position ---------+---------+---------+--------------------------+---------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 | 3 (1 row) SELECT * FROM reference_table_test, position('om' in 'Thomas') WHERE value_1 = 1 OR value_1 = 2; value_1 | value_2 | value_3 | value_4 | position ---------+---------+---------+--------------------------+---------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 | 3 2 | 2 | 2 | Fri Dec 02 00:00:00 2016 | 3 (2 rows) -- set operations are supported SELECT * FROM ( SELECT * FROM reference_table_test WHERE value_1 = 1 UNION SELECT * FROM reference_table_test WHERE value_1 = 3 ) AS combination ORDER BY value_1; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 3 | 3 | 3 | Sat Dec 03 00:00:00 2016 (2 rows) SELECT * FROM ( SELECT * FROM reference_table_test WHERE value_1 = 1 EXCEPT SELECT * FROM reference_table_test WHERE value_1 = 3 ) AS combination ORDER BY value_1; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 (1 row) SELECT * FROM ( SELECT * FROM reference_table_test WHERE value_1 = 1 INTERSECT SELECT * FROM reference_table_test WHERE value_1 = 3 ) AS combination ORDER BY value_1; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+--------- (0 rows) -- to make the tests more interested for aggregation tests, ingest some more data INSERT INTO reference_table_test VALUES (1, 1.0, '1', '2016-12-01'); INSERT INTO reference_table_test VALUES (2, 2.0, '2', '2016-12-02'); INSERT INTO reference_table_test VALUES (3, 3.0, '3', '2016-12-03'); -- some aggregations SELECT value_4, SUM(value_2) FROM reference_table_test GROUP BY value_4 HAVING SUM(value_2) > 3 ORDER BY 1; value_4 | sum --------------------------+----- Fri Dec 02 00:00:00 2016 | 4 Sat Dec 03 00:00:00 2016 | 6 Sun Dec 04 00:00:00 2016 | 4 Mon Dec 05 00:00:00 2016 | 5 (4 rows) SELECT value_4, value_3, SUM(value_2) FROM reference_table_test GROUP BY GROUPING sets ((value_4), (value_3)) ORDER BY 1, 2, 3; value_4 | value_3 | sum --------------------------+---------+----- Thu Dec 01 00:00:00 2016 | | 2 Fri Dec 02 00:00:00 2016 | | 4 Sat Dec 03 00:00:00 2016 | | 6 Sun Dec 04 00:00:00 2016 | | 4 Mon Dec 05 00:00:00 2016 | | 5 | 1 | 2 | 2 | 4 | 3 | 6 | 4 | 4 | 5 | 5 (10 rows) -- distinct clauses also work fine SELECT DISTINCT value_4 FROM reference_table_test ORDER BY 1; value_4 -------------------------- Thu Dec 01 00:00:00 2016 Fri Dec 02 00:00:00 2016 Sat Dec 03 00:00:00 2016 Sun Dec 04 00:00:00 2016 Mon Dec 05 00:00:00 2016 (5 rows) -- window functions are also supported SELECT value_4, RANK() OVER (PARTITION BY value_1 ORDER BY value_4) FROM reference_table_test; value_4 | rank --------------------------+------ Thu Dec 01 00:00:00 2016 | 1 Thu Dec 01 00:00:00 2016 | 1 Fri Dec 02 00:00:00 2016 | 1 Fri Dec 02 00:00:00 2016 | 1 Sat Dec 03 00:00:00 2016 | 1 Sat Dec 03 00:00:00 2016 | 1 Sun Dec 04 00:00:00 2016 | 1 Mon Dec 05 00:00:00 2016 | 1 (8 rows) -- window functions are also supported SELECT value_4, AVG(value_1) OVER (PARTITION BY value_4 ORDER BY value_4) FROM reference_table_test; value_4 | avg --------------------------+------------------------ Thu Dec 01 00:00:00 2016 | 1.00000000000000000000 Thu Dec 01 00:00:00 2016 | 1.00000000000000000000 Fri Dec 02 00:00:00 2016 | 2.0000000000000000 Fri Dec 02 00:00:00 2016 | 2.0000000000000000 Sat Dec 03 00:00:00 2016 | 3.0000000000000000 Sat Dec 03 00:00:00 2016 | 3.0000000000000000 Sun Dec 04 00:00:00 2016 | 4.0000000000000000 Mon Dec 05 00:00:00 2016 | 5.0000000000000000 (8 rows) SELECT count(DISTINCT CASE WHEN value_2 >= 3 THEN value_2 ELSE NULL END) as c FROM reference_table_test; c --- 3 (1 row) SELECT value_1, count(DISTINCT CASE WHEN value_2 >= 3 THEN value_2 ELSE NULL END) as c FROM reference_table_test GROUP BY value_1 ORDER BY 1; value_1 | c ---------+--- 1 | 0 2 | 0 3 | 1 4 | 1 5 | 1 (5 rows) -- selects inside a transaction works fine as well BEGIN; SELECT * FROM reference_table_test; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 2 | 2 | 2 | Fri Dec 02 00:00:00 2016 3 | 3 | 3 | Sat Dec 03 00:00:00 2016 4 | 4 | 4 | Sun Dec 04 00:00:00 2016 5 | 5 | 5 | Mon Dec 05 00:00:00 2016 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 2 | 2 | 2 | Fri Dec 02 00:00:00 2016 3 | 3 | 3 | Sat Dec 03 00:00:00 2016 (8 rows) SELECT * FROM reference_table_test WHERE value_1 = 1; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 (2 rows) END; -- cursor queries also works fine BEGIN; DECLARE test_cursor CURSOR FOR SELECT * FROM reference_table_test WHERE value_1 = 1 OR value_1 = 2 ORDER BY value_1; FETCH test_cursor; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 (1 row) FETCH ALL test_cursor; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 2 | 2 | 2 | Fri Dec 02 00:00:00 2016 2 | 2 | 2 | Fri Dec 02 00:00:00 2016 (3 rows) FETCH test_cursor; -- fetch one row after the last value_1 | value_2 | value_3 | value_4 ---------+---------+---------+--------- (0 rows) FETCH BACKWARD test_cursor; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 2 | 2 | 2 | Fri Dec 02 00:00:00 2016 (1 row) END; -- table creation queries inside can be router plannable CREATE TEMP TABLE temp_reference_test as SELECT * FROM reference_table_test WHERE value_1 = 1; -- all kinds of joins are supported among reference tables -- first create two more tables CREATE TABLE reference_table_test_second (value_1 int, value_2 float, value_3 text, value_4 timestamp); SELECT create_reference_table('reference_table_test_second'); create_reference_table ------------------------ (1 row) CREATE TABLE reference_table_test_third (value_1 int, value_2 float, value_3 text, value_4 timestamp); SELECT create_reference_table('reference_table_test_third'); create_reference_table ------------------------ (1 row) -- ingest some data to both tables INSERT INTO reference_table_test_second VALUES (1, 1.0, '1', '2016-12-01'); INSERT INTO reference_table_test_second VALUES (2, 2.0, '2', '2016-12-02'); INSERT INTO reference_table_test_second VALUES (3, 3.0, '3', '2016-12-03'); INSERT INTO reference_table_test_third VALUES (4, 4.0, '4', '2016-12-04'); INSERT INTO reference_table_test_third VALUES (5, 5.0, '5', '2016-12-05'); -- some very basic tests SELECT DISTINCT t1.value_1 FROM reference_table_test t1, reference_table_test_second t2 WHERE t1.value_2 = t2.value_2 ORDER BY 1; value_1 --------- 1 2 3 (3 rows) SELECT DISTINCT t1.value_1 FROM reference_table_test t1, reference_table_test_third t3 WHERE t1.value_2 = t3.value_2 ORDER BY 1; value_1 --------- 4 5 (2 rows) SELECT DISTINCT t2.value_1 FROM reference_table_test_second t2, reference_table_test_third t3 WHERE t2.value_2 = t3.value_2 ORDER BY 1; value_1 --------- (0 rows) -- join on different columns and different data types via casts SELECT DISTINCT t1.value_1 FROM reference_table_test t1, reference_table_test_second t2 WHERE t1.value_2 = t2.value_1 ORDER BY 1; value_1 --------- 1 2 3 (3 rows) SELECT DISTINCT t1.value_1 FROM reference_table_test t1, reference_table_test_second t2 WHERE t1.value_2 = t2.value_3::int ORDER BY 1; value_1 --------- 1 2 3 (3 rows) SELECT DISTINCT t1.value_1 FROM reference_table_test t1, reference_table_test_second t2 WHERE t1.value_2 = date_part('day', t2.value_4) ORDER BY 1; value_1 --------- 1 2 3 (3 rows) -- ingest a common row to see more meaningful results with joins involving 3 tables INSERT INTO reference_table_test_third VALUES (3, 3.0, '3', '2016-12-03'); SELECT DISTINCT t1.value_1 FROM reference_table_test t1, reference_table_test_second t2, reference_table_test_third t3 WHERE t1.value_2 = date_part('day', t2.value_4) AND t3.value_2 = t1.value_2 ORDER BY 1; value_1 --------- 3 (1 row) -- same query on different columns SELECT DISTINCT t1.value_1 FROM reference_table_test t1, reference_table_test_second t2, reference_table_test_third t3 WHERE t1.value_1 = date_part('day', t2.value_4) AND t3.value_2 = t1.value_1 ORDER BY 1; value_1 --------- 3 (1 row) -- with the JOIN syntax SELECT DISTINCT t1.value_1 FROM reference_table_test t1 JOIN reference_table_test_second t2 USING (value_1) JOIN reference_table_test_third t3 USING (value_1) ORDER BY 1; value_1 --------- 3 (1 row) -- and left/right joins SELECT DISTINCT t1.value_1 FROM reference_table_test t1 LEFT JOIN reference_table_test_second t2 USING (value_1) LEFT JOIN reference_table_test_third t3 USING (value_1) ORDER BY 1; value_1 --------- 1 2 3 4 5 (5 rows) SELECT DISTINCT t1.value_1 FROM reference_table_test t1 RIGHT JOIN reference_table_test_second t2 USING (value_1) RIGHT JOIN reference_table_test_third t3 USING (value_1) ORDER BY 1; value_1 --------- 3 (2 rows) -- now, lets have some tests on UPSERTs and uniquness CREATE TABLE reference_table_test_fourth (value_1 int, value_2 float PRIMARY KEY, value_3 text, value_4 timestamp); SELECT create_reference_table('reference_table_test_fourth'); create_reference_table ------------------------ (1 row) -- insert a row INSERT INTO reference_table_test_fourth VALUES (1, 1.0, '1', '2016-12-01'); -- now get the unique key violation INSERT INTO reference_table_test_fourth VALUES (1, 1.0, '1', '2016-12-01'); ERROR: duplicate key value violates unique constraint "reference_table_test_fourth_pkey_1250003" DETAIL: Key (value_2)=(1) already exists. CONTEXT: while executing command on localhost:57637 -- now get null constraint violation due to primary key INSERT INTO reference_table_test_fourth (value_1, value_3, value_4) VALUES (1, '1.0', '2016-12-01'); ERROR: null value in column "value_2" violates not-null constraint DETAIL: Failing row contains (1, null, 1.0, 2016-12-01 00:00:00). CONTEXT: while executing command on localhost:57637 -- lets run some upserts INSERT INTO reference_table_test_fourth VALUES (1, 1.0, '1', '2016-12-01') ON CONFLICT DO NOTHING RETURNING *; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+--------- (0 rows) INSERT INTO reference_table_test_fourth VALUES (1, 1.0, '10', '2016-12-01') ON CONFLICT (value_2) DO UPDATE SET value_3 = EXCLUDED.value_3, value_2 = EXCLUDED.value_2 RETURNING *; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 1 | 1 | 10 | Thu Dec 01 00:00:00 2016 (1 row) -- update all columns INSERT INTO reference_table_test_fourth VALUES (1, 1.0, '10', '2016-12-01') ON CONFLICT (value_2) DO UPDATE SET value_3 = EXCLUDED.value_3 || '+10', value_2 = EXCLUDED.value_2 + 10, value_1 = EXCLUDED.value_1 + 10, value_4 = '2016-12-10' RETURNING *; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 11 | 11 | 10+10 | Sat Dec 10 00:00:00 2016 (1 row) -- finally see that shard healths are OK SELECT shardid, shardstate, nodename, nodeport FROM pg_dist_shard_placement WHERE shardid IN (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'reference_table_test_fourth'::regclass) ORDER BY placementid; shardid | shardstate | nodename | nodeport ---------+------------+-----------+---------- 1250003 | 1 | localhost | 57637 1250003 | 1 | localhost | 57638 (2 rows) -- let's not run some update/delete queries on arbitrary columns DELETE FROM reference_table_test WHERE value_1 = 1 RETURNING *; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 (2 rows) DELETE FROM reference_table_test WHERE value_4 = '2016-12-05' RETURNING *; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 5 | 5 | 5 | Mon Dec 05 00:00:00 2016 (1 row) UPDATE reference_table_test SET value_2 = 15 WHERE value_2 = 2 RETURNING *; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 2 | 15 | 2 | Fri Dec 02 00:00:00 2016 2 | 15 | 2 | Fri Dec 02 00:00:00 2016 (2 rows) -- and some queries without any filters UPDATE reference_table_test SET value_2 = 15, value_1 = 45 RETURNING *; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 45 | 15 | 3 | Sat Dec 03 00:00:00 2016 45 | 15 | 4 | Sun Dec 04 00:00:00 2016 45 | 15 | 3 | Sat Dec 03 00:00:00 2016 45 | 15 | 2 | Fri Dec 02 00:00:00 2016 45 | 15 | 2 | Fri Dec 02 00:00:00 2016 (5 rows) DELETE FROM reference_table_test RETURNING *; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 45 | 15 | 3 | Sat Dec 03 00:00:00 2016 45 | 15 | 4 | Sun Dec 04 00:00:00 2016 45 | 15 | 3 | Sat Dec 03 00:00:00 2016 45 | 15 | 2 | Fri Dec 02 00:00:00 2016 45 | 15 | 2 | Fri Dec 02 00:00:00 2016 (5 rows) -- some tests with function evaluation and sequences CREATE TABLE reference_table_test_fifth (value_1 serial PRIMARY KEY, value_2 float, value_3 text, value_4 timestamp); SELECT create_reference_table('reference_table_test_fifth'); create_reference_table ------------------------ (1 row) CREATE SEQUENCE example_ref_value_seq; -- see that sequences work as expected INSERT INTO reference_table_test_fifth (value_2) VALUES (2) RETURNING value_1, value_2; value_1 | value_2 ---------+--------- 1 | 2 (1 row) INSERT INTO reference_table_test_fifth (value_2) VALUES (2) RETURNING value_1, value_2; value_1 | value_2 ---------+--------- 2 | 2 (1 row) INSERT INTO reference_table_test_fifth (value_2, value_3) VALUES (nextval('example_ref_value_seq'), nextval('example_ref_value_seq')::text) RETURNING value_1, value_2, value_3; value_1 | value_2 | value_3 ---------+---------+--------- 3 | 1 | 2 (1 row) UPDATE reference_table_test_fifth SET value_4 = now() WHERE value_1 = 1 RETURNING value_1, value_2, value_4 > '2000-01-01'; value_1 | value_2 | ?column? ---------+---------+---------- 1 | 2 | t (1 row) -- test copying FROM / TO -- first delete all the data DELETE FROM reference_table_test; COPY reference_table_test FROM STDIN WITH CSV; COPY reference_table_test (value_2, value_3, value_4) FROM STDIN WITH CSV; COPY reference_table_test (value_3) FROM STDIN WITH CSV; COPY reference_table_test FROM STDIN WITH CSV; COPY reference_table_test TO STDOUT WITH CSV; 1,1,1,Fri Jan 01 00:00:00 2016 ,2,2,Sat Jan 02 00:00:00 2016 ,,3, ,,, -- INSERT INTO SELECT among reference tables DELETE FROM reference_table_test_second; INSERT INTO reference_table_test_second SELECT * FROM reference_table_test RETURNING *; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 1 | 1 | 1 | Fri Jan 01 00:00:00 2016 | 2 | 2 | Sat Jan 02 00:00:00 2016 | | 3 | | | | (4 rows) INSERT INTO reference_table_test_second (value_2) SELECT reference_table_test.value_2 FROM reference_table_test JOIN reference_table_test_second USING (value_1) RETURNING *; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+--------- | 1 | | (1 row) SET citus.shard_count TO 6; SET citus.shard_replication_factor TO 2; CREATE TABLE colocated_table_test (value_1 int, value_2 float, value_3 text, value_4 timestamp); SELECT create_distributed_table('colocated_table_test', 'value_1'); create_distributed_table -------------------------- (1 row) CREATE TABLE colocated_table_test_2 (value_1 int, value_2 float, value_3 text, value_4 timestamp); SELECT create_distributed_table('colocated_table_test_2', 'value_1'); create_distributed_table -------------------------- (1 row) DELETE FROM reference_table_test; INSERT INTO reference_table_test VALUES (1, 1.0, '1', '2016-12-01'); INSERT INTO reference_table_test VALUES (2, 2.0, '2', '2016-12-02'); INSERT INTO colocated_table_test VALUES (1, 1.0, '1', '2016-12-01'); INSERT INTO colocated_table_test VALUES (2, 2.0, '2', '2016-12-02'); INSERT INTO colocated_table_test_2 VALUES (1, 1.0, '1', '2016-12-01'); INSERT INTO colocated_table_test_2 VALUES (2, 2.0, '2', '2016-12-02'); SET client_min_messages TO DEBUG1; SET citus.log_multi_join_order TO TRUE; SELECT reference_table_test.value_1 FROM reference_table_test, colocated_table_test WHERE colocated_table_test.value_1 = reference_table_test.value_1; LOG: join order: [ "colocated_table_test" ][ broadcast join "reference_table_test" ] value_1 --------- 1 2 (2 rows) SELECT colocated_table_test.value_2 FROM reference_table_test, colocated_table_test WHERE colocated_table_test.value_2 = reference_table_test.value_2; LOG: join order: [ "colocated_table_test" ][ broadcast join "reference_table_test" ] value_2 --------- 1 2 (2 rows) SELECT colocated_table_test.value_2 FROM colocated_table_test, reference_table_test WHERE reference_table_test.value_1 = colocated_table_test.value_1; LOG: join order: [ "colocated_table_test" ][ broadcast join "reference_table_test" ] value_2 --------- 1 2 (2 rows) SELECT colocated_table_test.value_2 FROM reference_table_test, colocated_table_test, colocated_table_test_2 WHERE colocated_table_test.value_2 = reference_table_test.value_2; LOG: join order: [ "colocated_table_test" ][ broadcast join "reference_table_test" ][ cartesian product "colocated_table_test_2" ] ERROR: cannot perform distributed planning on this query DETAIL: Cartesian products are currently unsupported SELECT colocated_table_test.value_2 FROM reference_table_test, colocated_table_test, colocated_table_test_2 WHERE colocated_table_test.value_1 = colocated_table_test_2.value_1 AND colocated_table_test.value_2 = reference_table_test.value_2; LOG: join order: [ "colocated_table_test" ][ broadcast join "reference_table_test" ][ local partition join "colocated_table_test_2" ] value_2 --------- 1 2 (2 rows) SET citus.task_executor_type to "task-tracker"; SELECT colocated_table_test.value_2 FROM reference_table_test, colocated_table_test, colocated_table_test_2 WHERE colocated_table_test.value_2 = colocated_table_test_2.value_2 AND colocated_table_test.value_2 = reference_table_test.value_2; LOG: join order: [ "colocated_table_test" ][ broadcast join "reference_table_test" ][ dual partition join "colocated_table_test_2" ] value_2 --------- 1 2 (2 rows) SELECT reference_table_test.value_2 FROM reference_table_test, colocated_table_test, colocated_table_test_2 WHERE colocated_table_test.value_1 = reference_table_test.value_1 AND colocated_table_test_2.value_1 = reference_table_test.value_1; LOG: join order: [ "colocated_table_test" ][ broadcast join "reference_table_test" ][ dual partition join "colocated_table_test_2" ] value_2 --------- 1 2 (2 rows) SET citus.log_multi_join_order TO FALSE; SET citus.shard_count TO DEFAULT; SET citus.task_executor_type to "real-time"; -- some INSERT .. SELECT queries that involve both hash distributed and reference tables -- should go via coordinator since we're inserting into reference table where -- not all the participants are reference tables INSERT INTO reference_table_test (value_1) SELECT colocated_table_test.value_1 FROM colocated_table_test, colocated_table_test_2 WHERE colocated_table_test.value_1 = colocated_table_test_2.value_1; DEBUG: only reference tables may be queried when targeting a reference table with distributed INSERT ... SELECT DEBUG: Collecting INSERT ... SELECT results on coordinator -- should go via coordinator, same as the above INSERT INTO reference_table_test (value_1) SELECT colocated_table_test.value_1 FROM colocated_table_test, reference_table_test WHERE colocated_table_test.value_1 = reference_table_test.value_1; DEBUG: only reference tables may be queried when targeting a reference table with distributed INSERT ... SELECT DEBUG: Collecting INSERT ... SELECT results on coordinator -- now, insert into the hash partitioned table and use reference -- tables in the SELECT queries INSERT INTO colocated_table_test (value_1, value_2) SELECT colocated_table_test_2.value_1, reference_table_test.value_2 FROM colocated_table_test_2, reference_table_test WHERE colocated_table_test_2.value_4 = reference_table_test.value_4 RETURNING value_1, value_2; value_1 | value_2 ---------+--------- 1 | 1 2 | 2 (2 rows) -- some more complex queries (Note that there are more complex queries in multi_insert_select.sql) INSERT INTO colocated_table_test (value_1, value_2) SELECT colocated_table_test_2.value_1, reference_table_test.value_2 FROM colocated_table_test_2, reference_table_test WHERE colocated_table_test_2.value_2 = reference_table_test.value_2 RETURNING value_1, value_2; value_1 | value_2 ---------+--------- 1 | 1 2 | 2 (2 rows) -- partition column value comes from reference table, goes via coordinator INSERT INTO colocated_table_test (value_1, value_2) SELECT reference_table_test.value_2, colocated_table_test_2.value_1 FROM colocated_table_test_2, reference_table_test WHERE colocated_table_test_2.value_4 = reference_table_test.value_4; DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match DETAIL: The data type of the target table's partition column should exactly match the data type of the corresponding simple column reference in the subquery. DEBUG: Collecting INSERT ... SELECT results on coordinator INSERT INTO colocated_table_test (value_1, value_2) SELECT reference_table_test.value_1, colocated_table_test_2.value_1 FROM colocated_table_test_2, reference_table_test WHERE colocated_table_test_2.value_4 = reference_table_test.value_4; DEBUG: cannot perform distributed INSERT INTO ... SELECT becuase the partition columns in the source table and subquery do not match DETAIL: The target table's partition column should correspond to a partition column in the subquery. DEBUG: Collecting INSERT ... SELECT results on coordinator RESET client_min_messages; -- some tests for mark_tables_colocated -- should error out SELECT mark_tables_colocated('colocated_table_test_2', ARRAY['reference_table_test']); ERROR: cannot colocate tables colocated_table_test_2 and reference_table_test DETAIL: Replication models don't match for colocated_table_test_2 and reference_table_test. -- should work sliently SELECT mark_tables_colocated('reference_table_test', ARRAY['reference_table_test_fifth']); mark_tables_colocated ----------------------- (1 row) -- ensure that reference tables on -- different queries works as expected CREATE SCHEMA reference_schema; -- create with schema prefix CREATE TABLE reference_schema.reference_table_test_sixth (value_1 serial PRIMARY KEY, value_2 float, value_3 text, value_4 timestamp); SELECT create_reference_table('reference_schema.reference_table_test_sixth'); create_reference_table ------------------------ (1 row) SET search_path TO 'reference_schema'; -- create on the schema CREATE TABLE reference_table_test_seventh (value_1 serial PRIMARY KEY, value_2 float, value_3 text, value_4 timestamp); SELECT create_reference_table('reference_table_test_seventh'); create_reference_table ------------------------ (1 row) -- ingest some data INSERT INTO reference_table_test_sixth VALUES (1, 1.0, '1', '2016-12-01'); INSERT INTO reference_table_test_seventh VALUES (1, 1.0, '1', '2016-12-01'); SET search_path TO 'public'; -- ingest some data INSERT INTO reference_schema.reference_table_test_sixth VALUES (2, 2.0, '2', '2016-12-02'); INSERT INTO reference_schema.reference_table_test_seventh VALUES (2, 2.0, '2', '2016-12-02'); -- some basic queries SELECT value_1 FROM reference_schema.reference_table_test_sixth; value_1 --------- 1 2 (2 rows) SET search_path TO 'reference_schema'; SELECT reference_table_test_sixth.value_1 FROM reference_table_test_sixth, reference_table_test_seventh WHERE reference_table_test_sixth.value_4 = reference_table_test_seventh.value_4; value_1 --------- 1 2 (2 rows) -- last test with cross schemas SET search_path TO 'public'; SELECT reftable.value_2, colocated_table_test_2.value_1 FROM colocated_table_test_2, reference_schema.reference_table_test_sixth as reftable WHERE colocated_table_test_2.value_4 = reftable.value_4; value_2 | value_1 ---------+--------- 1 | 1 2 | 2 (2 rows) -- let's now test TRUNCATE and DROP TABLE -- delete all rows and ingest some data DELETE FROM reference_table_test; INSERT INTO reference_table_test VALUES (1, 1.0, '1', '2016-12-01'); INSERT INTO reference_table_test VALUES (2, 2.0, '2', '2016-12-02'); INSERT INTO reference_table_test VALUES (3, 3.0, '3', '2016-12-03'); INSERT INTO reference_table_test VALUES (4, 4.0, '4', '2016-12-04'); INSERT INTO reference_table_test VALUES (5, 5.0, '5', '2016-12-05'); SELECT count(*) FROM reference_table_test; count ------- 5 (1 row) -- truncate it and get the result back TRUNCATE reference_table_test; SELECT count(*) FROM reference_table_test; count ------- 0 (1 row) -- now try dropping one of the existing reference tables -- and check the metadata SELECT logicalrelid FROM pg_dist_partition WHERE logicalrelid::regclass::text LIKE '%reference_table_test_fifth%'; logicalrelid ---------------------------- reference_table_test_fifth (1 row) SELECT logicalrelid FROM pg_dist_shard WHERE logicalrelid::regclass::text LIKE '%reference_table_test_fifth%'; logicalrelid ---------------------------- reference_table_test_fifth (1 row) DROP TABLE reference_table_test_fifth; SELECT logicalrelid FROM pg_dist_partition WHERE logicalrelid::regclass::text LIKE '%reference_table_test_fifth%'; logicalrelid -------------- (0 rows) SELECT logicalrelid FROM pg_dist_shard WHERE logicalrelid::regclass::text LIKE '%reference_table_test_fifth%'; logicalrelid -------------- (0 rows) -- now test DDL changes CREATE TABLE reference_table_ddl (value_1 int, value_2 float, value_3 text, value_4 timestamp); SELECT create_reference_table('reference_table_ddl'); create_reference_table ------------------------ (1 row) -- CREATE & DROP index and check the workers CREATE INDEX reference_index_1 ON reference_table_ddl(value_1); NOTICE: using one-phase commit for distributed DDL commands HINT: You can enable two-phase commit for extra safety with: SET citus.multi_shard_commit_protocol TO '2pc' CREATE INDEX reference_index_2 ON reference_table_ddl(value_2, value_3); -- should be able to create/drop UNIQUE index on a reference table CREATE UNIQUE INDEX reference_index_3 ON reference_table_ddl(value_1); -- should be able to add a column ALTER TABLE reference_table_ddl ADD COLUMN value_5 INTEGER; ALTER TABLE reference_table_ddl ALTER COLUMN value_5 SET DATA TYPE FLOAT; ALTER TABLE reference_table_ddl DROP COLUMN value_1; ALTER TABLE reference_table_ddl ALTER COLUMN value_2 SET DEFAULT 25.0; ALTER TABLE reference_table_ddl ALTER COLUMN value_3 SET NOT NULL; -- see that Citus applied all DDLs to the table SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.reference_table_ddl'::regclass; Column | Type | Modifiers ---------+-----------------------------+-------------- value_2 | double precision | default 25.0 value_3 | text | not null value_4 | timestamp without time zone | value_5 | double precision | (4 rows) \d reference_index_2 Index "public.reference_index_2" Column | Type | Definition ---------+------------------+------------ value_2 | double precision | value_2 value_3 | text | value_3 btree, for table "public.reference_table_ddl" -- also to the shard placements \c - - - :worker_1_port SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.reference_table_ddl_1250019'::regclass; Column | Type | Modifiers ---------+-----------------------------+-------------- value_2 | double precision | default 25.0 value_3 | text | not null value_4 | timestamp without time zone | value_5 | double precision | (4 rows) \d reference_index_2_1250019 Index "public.reference_index_2_1250019" Column | Type | Definition ---------+------------------+------------ value_2 | double precision | value_2 value_3 | text | value_3 btree, for table "public.reference_table_ddl_1250019" \c - - - :master_port DROP INDEX reference_index_2; NOTICE: using one-phase commit for distributed DDL commands HINT: You can enable two-phase commit for extra safety with: SET citus.multi_shard_commit_protocol TO '2pc' \c - - - :worker_1_port SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.reference_table_ddl_1250019'::regclass; Column | Type | Modifiers ---------+-----------------------------+-------------- value_2 | double precision | default 25.0 value_3 | text | not null value_4 | timestamp without time zone | value_5 | double precision | (4 rows) \di reference_index_2* List of relations Schema | Name | Type | Owner | Table --------+------+------+-------+------- (0 rows) \c - - - :master_port -- as we expect, renaming and setting WITH OIDS does not work for reference tables ALTER TABLE reference_table_ddl RENAME TO reference_table_ddl_test; ERROR: renaming distributed tables is currently unsupported ALTER TABLE reference_table_ddl SET WITH OIDS; ERROR: alter table command is currently unsupported DETAIL: Only ADD|DROP COLUMN, SET|DROP NOT NULL, SET|DROP DEFAULT, ADD|DROP CONSTRAINT, ATTACH|DETACH PARTITION and TYPE subcommands are supported. -- now test reference tables against some helper UDFs that Citus provides -- cannot delete / drop shards from a reference table SELECT master_apply_delete_command('DELETE FROM reference_table_ddl'); ERROR: cannot delete from distributed table DETAIL: Delete statements on reference tables are not supported. -- cannot add shards SELECT master_create_empty_shard('reference_table_ddl'); ERROR: relation "reference_table_ddl" is a reference table DETAIL: We currently don't support creating shards on reference tables -- master_modify_multiple_shards works, but, does it make sense to use at all? INSERT INTO reference_table_ddl (value_2, value_3) VALUES (7, 'aa'); SELECT master_modify_multiple_shards('DELETE FROM reference_table_ddl WHERE value_2 = 7'); master_modify_multiple_shards ------------------------------- 1 (1 row) INSERT INTO reference_table_ddl (value_2, value_3) VALUES (7, 'bb'); SELECT master_modify_multiple_shards('DELETE FROM reference_table_ddl'); master_modify_multiple_shards ------------------------------- 1 (1 row) -- get/update the statistics SELECT part_storage_type, part_key, part_replica_count, part_max_size, part_placement_policy FROM master_get_table_metadata('reference_table_ddl'); part_storage_type | part_key | part_replica_count | part_max_size | part_placement_policy -------------------+----------+--------------------+---------------+----------------------- t | | 2 | 307200 | 2 (1 row) SELECT shardid AS a_shard_id FROM pg_dist_shard WHERE logicalrelid = 'reference_table_ddl'::regclass \gset SELECT master_update_shard_statistics(:a_shard_id); master_update_shard_statistics -------------------------------- 16384 (1 row) CREATE TABLE append_reference_tmp_table (id INT); SELECT master_append_table_to_shard(:a_shard_id, 'append_reference_tmp_table', 'localhost', :master_port); ERROR: cannot append to shardId 1250019 DETAIL: We currently don't support appending to shards in hash-partitioned or reference tables SELECT master_get_table_ddl_events('reference_table_ddl'); master_get_table_ddl_events ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- CREATE TABLE public.reference_table_ddl (value_2 double precision DEFAULT 25.0, value_3 text NOT NULL, value_4 timestamp without time zone, value_5 double precision) (1 row) -- in reality, we wouldn't need to repair any reference table shard placements -- however, the test could be relevant for other purposes SELECT placementid AS a_placement_id FROM pg_dist_shard_placement WHERE shardid = :a_shard_id AND nodeport = :worker_1_port \gset SELECT placementid AS b_placement_id FROM pg_dist_shard_placement WHERE shardid = :a_shard_id AND nodeport = :worker_2_port \gset UPDATE pg_dist_shard_placement SET shardstate = 3 WHERE placementid = :a_placement_id; SELECT master_copy_shard_placement(:a_shard_id, 'localhost', :worker_2_port, 'localhost', :worker_1_port); master_copy_shard_placement ----------------------------- (1 row) SELECT shardid, shardstate FROM pg_dist_shard_placement WHERE placementid = :a_placement_id; shardid | shardstate ---------+------------ 1250019 | 1 (1 row) -- some queries that are captured in functions CREATE FUNCTION select_count_all() RETURNS bigint AS ' SELECT count(*) FROM reference_table_test; ' LANGUAGE SQL; CREATE FUNCTION insert_into_ref_table(value_1 int, value_2 float, value_3 text, value_4 timestamp) RETURNS void AS ' INSERT INTO reference_table_test VALUES ($1, $2, $3, $4); ' LANGUAGE SQL; TRUNCATE reference_table_test; SELECT select_count_all(); select_count_all ------------------ 0 (1 row) SELECT insert_into_ref_table(1, 1.0, '1', '2016-12-01'); insert_into_ref_table ----------------------- (1 row) SELECT insert_into_ref_table(2, 2.0, '2', '2016-12-02'); insert_into_ref_table ----------------------- (1 row) SELECT insert_into_ref_table(3, 3.0, '3', '2016-12-03'); insert_into_ref_table ----------------------- (1 row) SELECT insert_into_ref_table(4, 4.0, '4', '2016-12-04'); insert_into_ref_table ----------------------- (1 row) SELECT insert_into_ref_table(5, 5.0, '5', '2016-12-05'); insert_into_ref_table ----------------------- (1 row) SELECT insert_into_ref_table(6, 6.0, '6', '2016-12-06'); insert_into_ref_table ----------------------- (1 row) SELECT select_count_all(); select_count_all ------------------ 6 (1 row) TRUNCATE reference_table_test; -- some prepared queries and pl/pgsql functions PREPARE insert_into_ref_table_pr (int, float, text, timestamp) AS INSERT INTO reference_table_test VALUES ($1, $2, $3, $4); -- reference tables do not have up-to-five execution limit as other tables EXECUTE insert_into_ref_table_pr(1, 1.0, '1', '2016-12-01'); EXECUTE insert_into_ref_table_pr(2, 2.0, '2', '2016-12-02'); EXECUTE insert_into_ref_table_pr(3, 3.0, '3', '2016-12-03'); EXECUTE insert_into_ref_table_pr(4, 4.0, '4', '2016-12-04'); EXECUTE insert_into_ref_table_pr(5, 5.0, '5', '2016-12-05'); EXECUTE insert_into_ref_table_pr(6, 6.0, '6', '2016-12-06'); -- see the count, then truncate the table SELECT select_count_all(); select_count_all ------------------ 6 (1 row) TRUNCATE reference_table_test; -- reference tables work with composite key -- and we even do not need to create hash -- function etc. -- first create the type on all nodes CREATE TYPE reference_comp_key as (key text, value text); \c - - - :worker_1_port CREATE TYPE reference_comp_key as (key text, value text); \c - - - :worker_2_port CREATE TYPE reference_comp_key as (key text, value text); \c - - - :master_port CREATE TABLE reference_table_composite (id int PRIMARY KEY, data reference_comp_key); SELECT create_reference_table('reference_table_composite'); create_reference_table ------------------------ (1 row) -- insert and query some data INSERT INTO reference_table_composite (id, data) VALUES (1, ('key_1', 'value_1')::reference_comp_key); INSERT INTO reference_table_composite (id, data) VALUES (2, ('key_2', 'value_2')::reference_comp_key); SELECT * FROM reference_table_composite; id | data ----+----------------- 1 | (key_1,value_1) 2 | (key_2,value_2) (2 rows) SELECT (data).key FROM reference_table_composite; key ------- key_1 key_2 (2 rows) -- make sure that reference tables obeys single shard transactions TRUNCATE reference_table_test; BEGIN; INSERT INTO reference_table_test VALUES (1, 1.0, '1', '2016-12-01'); SELECT * FROM reference_table_test; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 (1 row) ROLLBACK; SELECT * FROM reference_table_test; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+--------- (0 rows) -- now insert a row and commit BEGIN; INSERT INTO reference_table_test VALUES (2, 2.0, '2', '2016-12-02'); COMMIT; SELECT * FROM reference_table_test; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 2 | 2 | 2 | Fri Dec 02 00:00:00 2016 (1 row) -- one basic UPDATE test BEGIN; UPDATE reference_table_test SET value_1 = 10 WHERE value_1 = 2; COMMIT; SELECT * FROM reference_table_test; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 10 | 2 | 2 | Fri Dec 02 00:00:00 2016 (1 row) -- DML+master_modify_multiple_shards is allowed BEGIN; INSERT INTO reference_table_test VALUES (2, 2.0, '2', '2016-12-02'); SELECT master_modify_multiple_shards('DELETE FROM colocated_table_test'); master_modify_multiple_shards ------------------------------- 10 (1 row) ROLLBACK; -- DDL+DML is allowed BEGIN; ALTER TABLE reference_table_test ADD COLUMN value_dummy INT; NOTICE: using one-phase commit for distributed DDL commands HINT: You can enable two-phase commit for extra safety with: SET citus.multi_shard_commit_protocol TO '2pc' INSERT INTO reference_table_test VALUES (2, 2.0, '2', '2016-12-02'); ROLLBACK; -- clean up tables DROP TABLE reference_table_test, reference_table_test_second, reference_table_test_third, reference_table_test_fourth, reference_table_ddl, reference_table_composite; DROP SCHEMA reference_schema CASCADE; NOTICE: drop cascades to 2 other objects DETAIL: drop cascades to table reference_schema.reference_table_test_sixth drop cascades to table reference_schema.reference_table_test_seventh