ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1250000; \c - - - :master_port CREATE TABLE reference_table_test (value_1 int, value_2 float, value_3 text, value_4 timestamp); SELECT create_reference_table('reference_table_test'); create_reference_table ------------------------ (1 row) INSERT INTO reference_table_test VALUES (1, 1.0, '1', '2016-12-01'); INSERT INTO reference_table_test VALUES (2, 2.0, '2', '2016-12-02'); INSERT INTO reference_table_test VALUES (3, 3.0, '3', '2016-12-03'); INSERT INTO reference_table_test VALUES (4, 4.0, '4', '2016-12-04'); INSERT INTO reference_table_test VALUES (5, 5.0, '5', '2016-12-05'); \c - - - :worker_1_port -- run some queries on top of the data SELECT * FROM reference_table_test; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 2 | 2 | 2 | Fri Dec 02 00:00:00 2016 3 | 3 | 3 | Sat Dec 03 00:00:00 2016 4 | 4 | 4 | Sun Dec 04 00:00:00 2016 5 | 5 | 5 | Mon Dec 05 00:00:00 2016 (5 rows) SELECT * FROM reference_table_test WHERE value_1 = 1; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 (1 row) SELECT value_1, value_2 FROM reference_table_test ORDER BY 2 ASC LIMIT 3; value_1 | value_2 ---------+--------- 1 | 1 2 | 2 3 | 3 (3 rows) SELECT value_1, value_3 FROM reference_table_test WHERE value_2 >= 4 ORDER BY 2 LIMIT 3; value_1 | value_3 ---------+--------- 4 | 4 5 | 5 (2 rows) SELECT value_1, 15 * value_2 FROM reference_table_test ORDER BY 2 ASC LIMIT 2; value_1 | ?column? ---------+---------- 1 | 15 2 | 30 (2 rows) SELECT value_1, 15 * value_2 FROM reference_table_test ORDER BY 2 ASC LIMIT 2 OFFSET 2; value_1 | ?column? ---------+---------- 3 | 45 4 | 60 (2 rows) SELECT value_2, value_4 FROM reference_table_test WHERE value_2 = 2 OR value_2 = 3; value_2 | value_4 ---------+-------------------------- 2 | Fri Dec 02 00:00:00 2016 3 | Sat Dec 03 00:00:00 2016 (2 rows) SELECT value_2, value_4 FROM reference_table_test WHERE value_2 = 2 AND value_2 = 3; value_2 | value_4 ---------+--------- (0 rows) SELECT value_2, value_4 FROM reference_table_test WHERE value_3 = '2' OR value_1 = 3; value_2 | value_4 ---------+-------------------------- 2 | Fri Dec 02 00:00:00 2016 3 | Sat Dec 03 00:00:00 2016 (2 rows) SELECT value_2, value_4 FROM reference_table_test WHERE ( value_3 = '2' OR value_1 = 3 ) AND FALSE; value_2 | value_4 ---------+--------- (0 rows) SELECT * FROM reference_table_test WHERE value_2 IN ( SELECT value_3::FLOAT FROM reference_table_test ) AND value_1 < 3; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 2 | 2 | 2 | Fri Dec 02 00:00:00 2016 (2 rows) SELECT value_4 FROM reference_table_test WHERE value_3 IN ( '1', '2' ); value_4 -------------------------- Thu Dec 01 00:00:00 2016 Fri Dec 02 00:00:00 2016 (2 rows) SELECT date_part('day', value_4) FROM reference_table_test WHERE value_3 IN ( '5', '2' ); date_part ----------- 2 5 (2 rows) SELECT value_4 FROM reference_table_test WHERE value_2 <= 2 AND value_2 >= 4; value_4 --------- (0 rows) SELECT value_4 FROM reference_table_test WHERE value_2 <= 20 AND value_2 >= 4; value_4 -------------------------- Sun Dec 04 00:00:00 2016 Mon Dec 05 00:00:00 2016 (2 rows) SELECT value_4 FROM reference_table_test WHERE value_2 >= 5 AND value_2 <= random(); value_4 --------- (0 rows) SELECT value_1 FROM reference_table_test WHERE value_4 BETWEEN '2016-12-01' AND '2016-12-03'; value_1 --------- 1 2 3 (3 rows) SELECT value_1 FROM reference_table_test WHERE FALSE; value_1 --------- (0 rows) SELECT value_1 FROM reference_table_test WHERE int4eq(1, 2); value_1 --------- (0 rows) -- rename output name and do some operations SELECT value_1 as id, value_2 * 15 as age FROM reference_table_test; id | age ----+----- 1 | 15 2 | 30 3 | 45 4 | 60 5 | 75 (5 rows) -- queries with CTEs are supported WITH some_data AS ( SELECT value_2, value_4 FROM reference_table_test WHERE value_2 >=3) SELECT * FROM some_data; value_2 | value_4 ---------+-------------------------- 3 | Sat Dec 03 00:00:00 2016 4 | Sun Dec 04 00:00:00 2016 5 | Mon Dec 05 00:00:00 2016 (3 rows) -- queries with CTEs are supported even if CTE is not referenced inside query WITH some_data AS ( SELECT value_2, value_4 FROM reference_table_test WHERE value_2 >=3) SELECT * FROM reference_table_test ORDER BY 1 LIMIT 1; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 (1 row) -- queries which involve functions in FROM clause are supported if it goes to a single worker. SELECT * FROM reference_table_test, position('om' in 'Thomas') WHERE value_1 = 1; value_1 | value_2 | value_3 | value_4 | position ---------+---------+---------+--------------------------+---------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 | 3 (1 row) SELECT * FROM reference_table_test, position('om' in 'Thomas') WHERE value_1 = 1 OR value_1 = 2; value_1 | value_2 | value_3 | value_4 | position ---------+---------+---------+--------------------------+---------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 | 3 2 | 2 | 2 | Fri Dec 02 00:00:00 2016 | 3 (2 rows) -- set operations are supported SELECT * FROM ( SELECT * FROM reference_table_test WHERE value_1 = 1 UNION SELECT * FROM reference_table_test WHERE value_1 = 3 ) AS combination ORDER BY value_1; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 3 | 3 | 3 | Sat Dec 03 00:00:00 2016 (2 rows) SELECT * FROM ( SELECT * FROM reference_table_test WHERE value_1 = 1 EXCEPT SELECT * FROM reference_table_test WHERE value_1 = 3 ) AS combination ORDER BY value_1; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 (1 row) SELECT * FROM ( SELECT * FROM reference_table_test WHERE value_1 = 1 INTERSECT SELECT * FROM reference_table_test WHERE value_1 = 3 ) AS combination ORDER BY value_1; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+--------- (0 rows) -- to make the tests more interested for aggregation tests, ingest some more data \c - - - :master_port INSERT INTO reference_table_test VALUES (1, 1.0, '1', '2016-12-01'); INSERT INTO reference_table_test VALUES (2, 2.0, '2', '2016-12-02'); INSERT INTO reference_table_test VALUES (3, 3.0, '3', '2016-12-03'); \c - - - :worker_1_port -- some aggregations SELECT value_4, SUM(value_2) FROM reference_table_test GROUP BY value_4 HAVING SUM(value_2) > 3 ORDER BY 1; value_4 | sum --------------------------+----- Fri Dec 02 00:00:00 2016 | 4 Sat Dec 03 00:00:00 2016 | 6 Sun Dec 04 00:00:00 2016 | 4 Mon Dec 05 00:00:00 2016 | 5 (4 rows) SELECT value_4, value_3, SUM(value_2) FROM reference_table_test GROUP BY GROUPING sets ((value_4), (value_3)) ORDER BY 1, 2, 3; value_4 | value_3 | sum --------------------------+---------+----- Thu Dec 01 00:00:00 2016 | | 2 Fri Dec 02 00:00:00 2016 | | 4 Sat Dec 03 00:00:00 2016 | | 6 Sun Dec 04 00:00:00 2016 | | 4 Mon Dec 05 00:00:00 2016 | | 5 | 1 | 2 | 2 | 4 | 3 | 6 | 4 | 4 | 5 | 5 (10 rows) -- distinct clauses also work fine SELECT DISTINCT value_4 FROM reference_table_test ORDER BY 1; value_4 -------------------------- Thu Dec 01 00:00:00 2016 Fri Dec 02 00:00:00 2016 Sat Dec 03 00:00:00 2016 Sun Dec 04 00:00:00 2016 Mon Dec 05 00:00:00 2016 (5 rows) -- window functions are also supported SELECT value_4, RANK() OVER (PARTITION BY value_1 ORDER BY value_4) FROM reference_table_test; value_4 | rank --------------------------+------ Thu Dec 01 00:00:00 2016 | 1 Thu Dec 01 00:00:00 2016 | 1 Fri Dec 02 00:00:00 2016 | 1 Fri Dec 02 00:00:00 2016 | 1 Sat Dec 03 00:00:00 2016 | 1 Sat Dec 03 00:00:00 2016 | 1 Sun Dec 04 00:00:00 2016 | 1 Mon Dec 05 00:00:00 2016 | 1 (8 rows) -- window functions are also supported SELECT value_4, AVG(value_1) OVER (PARTITION BY value_4 ORDER BY value_4) FROM reference_table_test; value_4 | avg --------------------------+------------------------ Thu Dec 01 00:00:00 2016 | 1.00000000000000000000 Thu Dec 01 00:00:00 2016 | 1.00000000000000000000 Fri Dec 02 00:00:00 2016 | 2.0000000000000000 Fri Dec 02 00:00:00 2016 | 2.0000000000000000 Sat Dec 03 00:00:00 2016 | 3.0000000000000000 Sat Dec 03 00:00:00 2016 | 3.0000000000000000 Sun Dec 04 00:00:00 2016 | 4.0000000000000000 Mon Dec 05 00:00:00 2016 | 5.0000000000000000 (8 rows) SELECT count(DISTINCT CASE WHEN value_2 >= 3 THEN value_2 ELSE NULL END) as c FROM reference_table_test; c --- 3 (1 row) SELECT value_1, count(DISTINCT CASE WHEN value_2 >= 3 THEN value_2 ELSE NULL END) as c FROM reference_table_test GROUP BY value_1 ORDER BY 1; value_1 | c ---------+--- 1 | 0 2 | 0 3 | 1 4 | 1 5 | 1 (5 rows) -- selects inside a transaction works fine as well BEGIN; SELECT * FROM reference_table_test; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 2 | 2 | 2 | Fri Dec 02 00:00:00 2016 3 | 3 | 3 | Sat Dec 03 00:00:00 2016 4 | 4 | 4 | Sun Dec 04 00:00:00 2016 5 | 5 | 5 | Mon Dec 05 00:00:00 2016 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 2 | 2 | 2 | Fri Dec 02 00:00:00 2016 3 | 3 | 3 | Sat Dec 03 00:00:00 2016 (8 rows) SELECT * FROM reference_table_test WHERE value_1 = 1; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 (2 rows) END; -- cursor queries also works fine BEGIN; DECLARE test_cursor CURSOR FOR SELECT * FROM reference_table_test WHERE value_1 = 1 OR value_1 = 2 ORDER BY value_1; FETCH test_cursor; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 (1 row) FETCH ALL test_cursor; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 1 | 1 | 1 | Thu Dec 01 00:00:00 2016 2 | 2 | 2 | Fri Dec 02 00:00:00 2016 2 | 2 | 2 | Fri Dec 02 00:00:00 2016 (3 rows) FETCH test_cursor; -- fetch one row after the last value_1 | value_2 | value_3 | value_4 ---------+---------+---------+--------- (0 rows) FETCH BACKWARD test_cursor; value_1 | value_2 | value_3 | value_4 ---------+---------+---------+-------------------------- 2 | 2 | 2 | Fri Dec 02 00:00:00 2016 (1 row) END; -- table creation queries inside can be router plannable CREATE TEMP TABLE temp_reference_test as SELECT * FROM reference_table_test WHERE value_1 = 1; \c - - - :master_port -- all kinds of joins are supported among reference tables -- first create two more tables CREATE TABLE reference_table_test_second (value_1 int, value_2 float, value_3 text, value_4 timestamp); SELECT create_reference_table('reference_table_test_second'); create_reference_table ------------------------ (1 row) CREATE TABLE reference_table_test_third (value_1 int, value_2 float, value_3 text, value_4 timestamp); SELECT create_reference_table('reference_table_test_third'); create_reference_table ------------------------ (1 row) -- ingest some data to both tables INSERT INTO reference_table_test_second VALUES (1, 1.0, '1', '2016-12-01'); INSERT INTO reference_table_test_second VALUES (2, 2.0, '2', '2016-12-02'); INSERT INTO reference_table_test_second VALUES (3, 3.0, '3', '2016-12-03'); INSERT INTO reference_table_test_third VALUES (4, 4.0, '4', '2016-12-04'); INSERT INTO reference_table_test_third VALUES (5, 5.0, '5', '2016-12-05'); \c - - - :worker_2_port -- some very basic tests SELECT DISTINCT t1.value_1 FROM reference_table_test t1, reference_table_test_second t2 WHERE t1.value_2 = t2.value_2 ORDER BY 1; value_1 --------- 1 2 3 (3 rows) SELECT DISTINCT t1.value_1 FROM reference_table_test t1, reference_table_test_third t3 WHERE t1.value_2 = t3.value_2 ORDER BY 1; value_1 --------- 4 5 (2 rows) SELECT DISTINCT t2.value_1 FROM reference_table_test_second t2, reference_table_test_third t3 WHERE t2.value_2 = t3.value_2 ORDER BY 1; value_1 --------- (0 rows) -- join on different columns and different data types via casts SELECT DISTINCT t1.value_1 FROM reference_table_test t1, reference_table_test_second t2 WHERE t1.value_2 = t2.value_1 ORDER BY 1; value_1 --------- 1 2 3 (3 rows) SELECT DISTINCT t1.value_1 FROM reference_table_test t1, reference_table_test_second t2 WHERE t1.value_2 = t2.value_3::int ORDER BY 1; value_1 --------- 1 2 3 (3 rows) SELECT DISTINCT t1.value_1 FROM reference_table_test t1, reference_table_test_second t2 WHERE t1.value_2 = date_part('day', t2.value_4) ORDER BY 1; value_1 --------- 1 2 3 (3 rows) -- ingest a common row to see more meaningful results with joins involving 3 tables \c - - - :master_port INSERT INTO reference_table_test_third VALUES (3, 3.0, '3', '2016-12-03'); \c - - - :worker_1_port SELECT DISTINCT t1.value_1 FROM reference_table_test t1, reference_table_test_second t2, reference_table_test_third t3 WHERE t1.value_2 = date_part('day', t2.value_4) AND t3.value_2 = t1.value_2 ORDER BY 1; value_1 --------- 3 (1 row) -- same query on different columns SELECT DISTINCT t1.value_1 FROM reference_table_test t1, reference_table_test_second t2, reference_table_test_third t3 WHERE t1.value_1 = date_part('day', t2.value_4) AND t3.value_2 = t1.value_1 ORDER BY 1; value_1 --------- 3 (1 row) -- with the JOIN syntax SELECT DISTINCT t1.value_1 FROM reference_table_test t1 JOIN reference_table_test_second t2 USING (value_1) JOIN reference_table_test_third t3 USING (value_1) ORDER BY 1; value_1 --------- 3 (1 row) -- and left/right joins SELECT DISTINCT t1.value_1 FROM reference_table_test t1 LEFT JOIN reference_table_test_second t2 USING (value_1) LEFT JOIN reference_table_test_third t3 USING (value_1) ORDER BY 1; value_1 --------- 1 2 3 4 5 (5 rows) SELECT DISTINCT t1.value_1 FROM reference_table_test t1 RIGHT JOIN reference_table_test_second t2 USING (value_1) RIGHT JOIN reference_table_test_third t3 USING (value_1) ORDER BY 1; value_1 --------- 3 (2 rows) \c - - - :master_port SET citus.shard_count TO 6; SET citus.shard_replication_factor TO 1; SET citus.replication_model TO streaming; CREATE TABLE colocated_table_test (value_1 int, value_2 float, value_3 text, value_4 timestamp); SELECT create_distributed_table('colocated_table_test', 'value_1'); create_distributed_table -------------------------- (1 row) CREATE TABLE colocated_table_test_2 (value_1 int, value_2 float, value_3 text, value_4 timestamp); SELECT create_distributed_table('colocated_table_test_2', 'value_1'); create_distributed_table -------------------------- (1 row) DELETE FROM reference_table_test; INSERT INTO reference_table_test VALUES (1, 1.0, '1', '2016-12-01'); INSERT INTO reference_table_test VALUES (2, 2.0, '2', '2016-12-02'); INSERT INTO colocated_table_test VALUES (1, 1.0, '1', '2016-12-01'); INSERT INTO colocated_table_test VALUES (2, 2.0, '2', '2016-12-02'); INSERT INTO colocated_table_test_2 VALUES (1, 1.0, '1', '2016-12-01'); INSERT INTO colocated_table_test_2 VALUES (2, 2.0, '2', '2016-12-02'); \c - - - :worker_1_port SET client_min_messages TO DEBUG1; SET citus.log_multi_join_order TO TRUE; SELECT reference_table_test.value_1 FROM reference_table_test, colocated_table_test WHERE colocated_table_test.value_1 = reference_table_test.value_1; LOG: join order: [ "colocated_table_test" ][ broadcast join "reference_table_test" ] value_1 --------- 1 2 (2 rows) SELECT colocated_table_test.value_2 FROM reference_table_test, colocated_table_test WHERE colocated_table_test.value_2 = reference_table_test.value_2; LOG: join order: [ "colocated_table_test" ][ broadcast join "reference_table_test" ] value_2 --------- 1 2 (2 rows) SELECT colocated_table_test.value_2 FROM colocated_table_test, reference_table_test WHERE reference_table_test.value_1 = colocated_table_test.value_1; LOG: join order: [ "colocated_table_test" ][ broadcast join "reference_table_test" ] value_2 --------- 1 2 (2 rows) SELECT colocated_table_test.value_2 FROM reference_table_test, colocated_table_test, colocated_table_test_2 WHERE colocated_table_test.value_2 = reference_table_test.value_2; LOG: join order: [ "colocated_table_test" ][ broadcast join "reference_table_test" ][ cartesian product "colocated_table_test_2" ] ERROR: cannot perform distributed planning on this query DETAIL: Cartesian products are currently unsupported SELECT colocated_table_test.value_2 FROM reference_table_test, colocated_table_test, colocated_table_test_2 WHERE colocated_table_test.value_1 = colocated_table_test_2.value_1 AND colocated_table_test.value_2 = reference_table_test.value_2; LOG: join order: [ "colocated_table_test" ][ broadcast join "reference_table_test" ][ local partition join "colocated_table_test_2" ] value_2 --------- 1 2 (2 rows) SET citus.task_executor_type to "task-tracker"; SELECT colocated_table_test.value_2 FROM reference_table_test, colocated_table_test, colocated_table_test_2 WHERE colocated_table_test.value_2 = colocated_table_test_2.value_2 AND colocated_table_test.value_2 = reference_table_test.value_2; LOG: join order: [ "colocated_table_test" ][ broadcast join "reference_table_test" ][ dual partition join "colocated_table_test_2" ] value_2 --------- 1 2 (2 rows) SELECT reference_table_test.value_2 FROM reference_table_test, colocated_table_test, colocated_table_test_2 WHERE colocated_table_test.value_1 = reference_table_test.value_1 AND colocated_table_test_2.value_1 = reference_table_test.value_1; LOG: join order: [ "colocated_table_test" ][ broadcast join "reference_table_test" ][ dual partition join "colocated_table_test_2" ] value_2 --------- 1 2 (2 rows) SET client_min_messages TO NOTICE; SET citus.log_multi_join_order TO FALSE; -- clean up tables \c - - - :master_port DROP TABLE reference_table_test, reference_table_test_second, reference_table_test_third;;