-- -- MULTI_VIEW -- -- This file contains test cases for view support. It verifies various -- Citus features: simple selects, aggregates, joins, outer joins -- router queries, single row inserts, multi row inserts via insert -- into select, multi row insert via copy commands. -- print whether we're using version > 10 to make version-specific tests clear SHOW server_version \gset SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; version_above_ten ------------------- f (1 row) SELECT count(*) FROM lineitem_hash_part; count ------- 12000 (1 row) SELECT count(*) FROM orders_hash_part; count ------- 2985 (1 row) -- create a view for priority orders CREATE VIEW priority_orders AS SELECT * FROM orders_hash_part WHERE o_orderpriority < '3-MEDIUM'; -- aggregate pushdown SELECT o_orderpriority, count(*) FROM priority_orders GROUP BY 1 ORDER BY 2, 1; o_orderpriority | count -----------------+------- 2-HIGH | 593 1-URGENT | 604 (2 rows) SELECT o_orderpriority, count(*) FROM orders_hash_part WHERE o_orderpriority < '3-MEDIUM' GROUP BY 1 ORDER BY 2,1; o_orderpriority | count -----------------+------- 2-HIGH | 593 1-URGENT | 604 (2 rows) -- filters SELECT o_orderpriority, count(*) as all, count(*) FILTER (WHERE o_orderstatus ='F') as fullfilled FROM priority_orders GROUP BY 1 ORDER BY 2, 1; o_orderpriority | all | fullfilled -----------------+-----+------------ 2-HIGH | 593 | 271 1-URGENT | 604 | 280 (2 rows) -- having SELECT o_orderdate, count(*) from priority_orders group by 1 having (count(*) > 3) order by 2 desc, 1 desc; o_orderdate | count -------------+------- 08-20-1996 | 5 10-10-1994 | 4 05-05-1994 | 4 04-07-1994 | 4 03-17-1993 | 4 (5 rows) -- having with filters SELECT o_orderdate, count(*) as all, count(*) FILTER(WHERE o_orderstatus = 'F') from priority_orders group by 1 having (count(*) > 3) order by 2 desc, 1 desc; o_orderdate | all | count -------------+-----+------- 08-20-1996 | 5 | 0 10-10-1994 | 4 | 4 05-05-1994 | 4 | 4 04-07-1994 | 4 | 4 03-17-1993 | 4 | 4 (5 rows) -- limit SELECT o_orderkey, o_totalprice from orders_hash_part order by 2 desc, 1 asc limit 5 ; o_orderkey | o_totalprice ------------+-------------- 4421 | 401055.62 10209 | 400191.77 11142 | 395039.05 14179 | 384265.43 11296 | 378166.33 (5 rows) SELECT o_orderkey, o_totalprice from priority_orders order by 2 desc, 1 asc limit 1 ; o_orderkey | o_totalprice ------------+-------------- 14179 | 384265.43 (1 row) CREATE VIEW priority_lineitem AS SELECT li.* FROM lineitem_hash_part li JOIN priority_orders ON (l_orderkey = o_orderkey); SELECT l_orderkey, count(*) FROM priority_lineitem GROUP BY 1 ORDER BY 2 DESC, 1 LIMIT 5; l_orderkey | count ------------+------- 7 | 7 225 | 7 226 | 7 322 | 7 326 | 7 (5 rows) CREATE VIEW air_shipped_lineitems AS SELECT * FROM lineitem_hash_part WHERE l_shipmode = 'AIR'; -- join between view and table SELECT count(*) FROM orders_hash_part join air_shipped_lineitems ON (o_orderkey = l_orderkey); count ------- 1706 (1 row) -- join between views SELECT count(*) FROM priority_orders join air_shipped_lineitems ON (o_orderkey = l_orderkey); count ------- 700 (1 row) -- count distinct on partition column is supported SELECT count(distinct o_orderkey) FROM priority_orders join air_shipped_lineitems ON (o_orderkey = l_orderkey); count ------- 551 (1 row) -- count distinct on non-partition column is supported SELECT count(distinct o_orderpriority) FROM priority_orders join air_shipped_lineitems ON (o_orderkey = l_orderkey); count ------- 2 (1 row) -- count distinct on partition column is supported on router queries SELECT count(distinct o_orderkey) FROM priority_orders join air_shipped_lineitems ON (o_orderkey = l_orderkey) WHERE (o_orderkey = 231); count ------- 1 (1 row) -- select distinct on router joins of views also works SELECT distinct(o_orderkey) FROM priority_orders join air_shipped_lineitems ON (o_orderkey = l_orderkey) WHERE (o_orderkey = 231); o_orderkey ------------ 231 (1 row) -- left join support depends on flattening of the query -- following query fails since the inner part is kept as subquery SELECT * FROM priority_orders left join air_shipped_lineitems ON (o_orderkey = l_orderkey); ERROR: cannot perform distributed planning on this query DETAIL: Subqueries in outer joins are not supported -- however, this works SELECT count(*) FROM priority_orders left join lineitem_hash_part ON (o_orderkey = l_orderkey) WHERE l_shipmode ='AIR'; count ------- 700 (1 row) -- view at the inner side of is not supported SELECT count(*) FROM priority_orders right join lineitem_hash_part ON (o_orderkey = l_orderkey) WHERE l_shipmode ='AIR'; ERROR: cannot perform distributed planning on this query DETAIL: Subqueries in outer joins are not supported -- but view at the outer side is. This is essentially the same as a left join with arguments reversed. SELECT count(*) FROM lineitem_hash_part right join priority_orders ON (o_orderkey = l_orderkey) WHERE l_shipmode ='AIR'; count ------- 700 (1 row) -- left join on router query is supported SELECT o_orderkey, l_linenumber FROM priority_orders left join air_shipped_lineitems ON (o_orderkey = l_orderkey) WHERE o_orderkey = 2; o_orderkey | l_linenumber ------------+-------------- 2 | (1 row) -- repartition query on view join -- it passes planning, fails at execution stage SET client_min_messages TO DEBUG1; SELECT * FROM priority_orders JOIN air_shipped_lineitems ON (o_custkey = l_suppkey) ORDER BY o_orderkey DESC, o_custkey DESC, o_orderpriority DESC LIMIT 5; DEBUG: generating subplan 22_1 for subquery SELECT lineitem_hash_part.l_orderkey, lineitem_hash_part.l_partkey, lineitem_hash_part.l_suppkey, lineitem_hash_part.l_linenumber, lineitem_hash_part.l_quantity, lineitem_hash_part.l_extendedprice, lineitem_hash_part.l_discount, lineitem_hash_part.l_tax, lineitem_hash_part.l_returnflag, lineitem_hash_part.l_linestatus, lineitem_hash_part.l_shipdate, lineitem_hash_part.l_commitdate, lineitem_hash_part.l_receiptdate, lineitem_hash_part.l_shipinstruct, lineitem_hash_part.l_shipmode, lineitem_hash_part.l_comment FROM public.lineitem_hash_part WHERE (lineitem_hash_part.l_shipmode = 'AIR'::bpchar) DEBUG: Plan 22 query after replacing subqueries and CTEs: SELECT priority_orders.o_orderkey, priority_orders.o_custkey, priority_orders.o_orderstatus, priority_orders.o_totalprice, priority_orders.o_orderdate, priority_orders.o_orderpriority, priority_orders.o_clerk, priority_orders.o_shippriority, priority_orders.o_comment, air_shipped_lineitems.l_orderkey, air_shipped_lineitems.l_partkey, air_shipped_lineitems.l_suppkey, air_shipped_lineitems.l_linenumber, air_shipped_lineitems.l_quantity, air_shipped_lineitems.l_extendedprice, air_shipped_lineitems.l_discount, air_shipped_lineitems.l_tax, air_shipped_lineitems.l_returnflag, air_shipped_lineitems.l_linestatus, air_shipped_lineitems.l_shipdate, air_shipped_lineitems.l_commitdate, air_shipped_lineitems.l_receiptdate, air_shipped_lineitems.l_shipinstruct, air_shipped_lineitems.l_shipmode, air_shipped_lineitems.l_comment FROM ((SELECT orders_hash_part.o_orderkey, orders_hash_part.o_custkey, orders_hash_part.o_orderstatus, orders_hash_part.o_totalprice, orders_hash_part.o_orderdate, orders_hash_part.o_orderpriority, orders_hash_part.o_clerk, orders_hash_part.o_shippriority, orders_hash_part.o_comment FROM public.orders_hash_part WHERE (orders_hash_part.o_orderpriority < '3-MEDIUM'::bpchar)) priority_orders JOIN (SELECT intermediate_result.l_orderkey, intermediate_result.l_partkey, intermediate_result.l_suppkey, intermediate_result.l_linenumber, intermediate_result.l_quantity, intermediate_result.l_extendedprice, intermediate_result.l_discount, intermediate_result.l_tax, intermediate_result.l_returnflag, intermediate_result.l_linestatus, intermediate_result.l_shipdate, intermediate_result.l_commitdate, intermediate_result.l_receiptdate, intermediate_result.l_shipinstruct, intermediate_result.l_shipmode, intermediate_result.l_comment FROM read_intermediate_result('22_1'::text, 'binary'::citus_copy_format) intermediate_result(l_orderkey bigint, l_partkey integer, l_suppkey integer, l_linenumber integer, l_quantity numeric(15,2), l_extendedprice numeric(15,2), l_discount numeric(15,2), l_tax numeric(15,2), l_returnflag character(1), l_linestatus character(1), l_shipdate date, l_commitdate date, l_receiptdate date, l_shipinstruct character(25), l_shipmode character(10), l_comment character varying(44))) air_shipped_lineitems ON ((priority_orders.o_custkey = air_shipped_lineitems.l_suppkey))) ORDER BY priority_orders.o_orderkey DESC, priority_orders.o_custkey DESC, priority_orders.o_orderpriority DESC LIMIT 5 DEBUG: push down of limit count: 5 o_orderkey | o_custkey | o_orderstatus | o_totalprice | o_orderdate | o_orderpriority | o_clerk | o_shippriority | o_comment | l_orderkey | l_partkey | l_suppkey | l_linenumber | l_quantity | l_extendedprice | l_discount | l_tax | l_returnflag | l_linestatus | l_shipdate | l_commitdate | l_receiptdate | l_shipinstruct | l_shipmode | l_comment ------------+-----------+---------------+--------------+-------------+-----------------+-----------------+----------------+-------------------------------------------------------+------------+-----------+-----------+--------------+------------+-----------------+------------+-------+--------------+--------------+------------+--------------+---------------+---------------------------+------------+------------------------------------------- 14821 | 1435 | O | 322002.95 | 06-12-1998 | 2-HIGH | Clerk#000000630 | 0 | n packages are furiously ironic ideas. d | 1607 | 118923 | 1435 | 2 | 37.00 | 71851.04 | 0.05 | 0.02 | N | O | 02-27-1996 | 02-18-1996 | 03-16-1996 | NONE | AIR | alongside 14790 | 613 | O | 270163.54 | 08-21-1996 | 2-HIGH | Clerk#000000347 | 0 | p. regular deposits wake. final n | 2629 | 123076 | 613 | 2 | 31.00 | 34071.17 | 0.08 | 0.03 | N | O | 05-24-1998 | 05-26-1998 | 06-10-1998 | COLLECT COD | AIR | ate blithely bold, regular deposits. bold 14758 | 1225 | F | 37812.49 | 10-27-1993 | 2-HIGH | Clerk#000000687 | 0 | ages nag about the furio | 9156 | 176190 | 1225 | 2 | 22.00 | 27856.18 | 0.03 | 0.00 | R | F | 02-08-1994 | 04-01-1994 | 02-24-1994 | DELIVER IN PERSON | AIR | equests dete 14725 | 569 | O | 261801.45 | 06-17-1995 | 2-HIGH | Clerk#000000177 | 0 | ng asymptotes. final, ironic accounts cajole after | 14688 | 173017 | 569 | 3 | 10.00 | 10900.10 | 0.02 | 0.08 | N | O | 03-14-1997 | 04-22-1997 | 04-05-1997 | COLLECT COD | AIR | riously even packages sleep a 14657 | 370 | F | 116160.53 | 02-28-1994 | 1-URGENT | Clerk#000000756 | 0 | ly across the ironic, ironic instructions. bold ideas | 5153 | 67863 | 370 | 3 | 30.00 | 54925.80 | 0.09 | 0.01 | N | O | 11-10-1995 | 11-14-1995 | 11-16-1995 | DELIVER IN PERSON | AIR | beans sleep bl (5 rows) RESET client_min_messages; SELECT count(*) FROM priority_orders JOIN air_shipped_lineitems ON (o_custkey = l_suppkey); count ------- 192 (1 row) -- materialized views work -- insert into... select works with views CREATE TABLE temp_lineitem(LIKE lineitem_hash_part); SELECT create_distributed_table('temp_lineitem', 'l_orderkey', 'hash', 'lineitem_hash_part'); create_distributed_table -------------------------- (1 row) INSERT INTO temp_lineitem SELECT * FROM air_shipped_lineitems; SELECT count(*) FROM temp_lineitem; count ------- 1706 (1 row) -- following is a where false query, should not be inserting anything INSERT INTO temp_lineitem SELECT * FROM air_shipped_lineitems WHERE l_shipmode = 'MAIL'; SELECT count(*) FROM temp_lineitem; count ------- 1706 (1 row) -- can create and query materialized views CREATE MATERIALIZED VIEW mode_counts AS SELECT l_shipmode, count(*) FROM temp_lineitem GROUP BY l_shipmode; SELECT * FROM mode_counts WHERE l_shipmode = 'AIR' ORDER BY 2 DESC, 1 LIMIT 10; l_shipmode | count ------------+------- AIR | 1706 (1 row) -- materialized views are local, cannot join with distributed tables SELECT count(*) FROM mode_counts JOIN temp_lineitem USING (l_shipmode); ERROR: relation mode_counts is not distributed -- new data is not immediately reflected in the view INSERT INTO temp_lineitem SELECT * FROM air_shipped_lineitems; SELECT * FROM mode_counts WHERE l_shipmode = 'AIR' ORDER BY 2 DESC, 1 LIMIT 10; l_shipmode | count ------------+------- AIR | 1706 (1 row) -- refresh updates the materialised view with new data REFRESH MATERIALIZED VIEW mode_counts; SELECT * FROM mode_counts WHERE l_shipmode = 'AIR' ORDER BY 2 DESC, 1 LIMIT 10; l_shipmode | count ------------+------- AIR | 3412 (1 row) DROP MATERIALIZED VIEW mode_counts; SET citus.task_executor_type to "task-tracker"; -- single view repartition subqueries are not supported SELECT l_suppkey, count(*) FROM (SELECT l_suppkey, l_shipdate, count(*) FROM air_shipped_lineitems GROUP BY l_suppkey, l_shipdate) supps GROUP BY l_suppkey ORDER BY 2 DESC, 1 LIMIT 5; ERROR: cannot perform distributed planning on this query DETAIL: Subqueries without group by clause are not supported yet -- logically same query without a view works fine SELECT l_suppkey, count(*) FROM (SELECT l_suppkey, l_shipdate, count(*) FROM lineitem_hash_part WHERE l_shipmode = 'AIR' GROUP BY l_suppkey, l_shipdate) supps GROUP BY l_suppkey ORDER BY 2 DESC, 1 LIMIT 5; l_suppkey | count -----------+------- 7680 | 4 160 | 3 1042 | 3 1318 | 3 5873 | 3 (5 rows) -- when a view is replaced by actual query it still fails SELECT l_suppkey, count(*) FROM (SELECT l_suppkey, l_shipdate, count(*) FROM (SELECT * FROM lineitem_hash_part WHERE l_shipmode = 'AIR') asi GROUP BY l_suppkey, l_shipdate) supps GROUP BY l_suppkey ORDER BY 2 DESC, 1 LIMIT 5; ERROR: cannot perform distributed planning on this query DETAIL: Subqueries without group by clause are not supported yet -- repartition query on view with single table subquery CREATE VIEW supp_count_view AS SELECT * FROM (SELECT l_suppkey, count(*) FROM lineitem_hash_part GROUP BY 1) s1; SELECT * FROM supp_count_view ORDER BY 2 DESC, 1 LIMIT 10; l_suppkey | count -----------+------- 6104 | 8 1868 | 6 5532 | 6 5849 | 6 6169 | 6 6669 | 6 6692 | 6 7703 | 6 7869 | 6 8426 | 6 (10 rows) SET citus.task_executor_type to DEFAULT; -- create a view with aggregate CREATE VIEW lineitems_by_shipping_method AS SELECT l_shipmode, count(*) as cnt FROM lineitem_hash_part GROUP BY 1; -- following will be supported via recursive planning SELECT * FROM lineitems_by_shipping_method ORDER BY 1,2 LIMIT 5; l_shipmode | cnt ------------+------ AIR | 1706 FOB | 1709 MAIL | 1739 RAIL | 1706 REG AIR | 1679 (5 rows) -- create a view with group by on partition column CREATE VIEW lineitems_by_orderkey AS SELECT l_orderkey, count(*) FROM lineitem_hash_part GROUP BY 1; -- this should work since we're able to push down this query SELECT * FROM lineitems_by_orderkey ORDER BY 2 DESC, 1 ASC LIMIT 10; l_orderkey | count ------------+------- 7 | 7 68 | 7 129 | 7 164 | 7 194 | 7 225 | 7 226 | 7 322 | 7 326 | 7 354 | 7 (10 rows) -- it would also work since it is made router plannable SELECT * FROM lineitems_by_orderkey WHERE l_orderkey = 100; l_orderkey | count ------------+------- 100 | 5 (1 row) DROP TABLE temp_lineitem CASCADE; DROP VIEW supp_count_view; DROP VIEW lineitems_by_orderkey; DROP VIEW lineitems_by_shipping_method; DROP VIEW air_shipped_lineitems; DROP VIEW priority_lineitem; DROP VIEW priority_orders; -- new tests for real time use case including views and subqueries -- create view to display recent user who has an activity after a timestamp CREATE VIEW recent_users AS SELECT user_id, max(time) as lastseen FROM users_table GROUP BY user_id HAVING max(time) > '2017-11-23 16:20:33.264457'::timestamp order by 2 DESC; SELECT * FROM recent_users; user_id | lastseen ---------+--------------------------------- 1 | Thu Nov 23 17:30:34.635085 2017 5 | Thu Nov 23 16:48:32.08896 2017 3 | Thu Nov 23 17:18:51.048758 2017 (3 rows) -- create a view for recent_events CREATE VIEW recent_events AS SELECT user_id, time FROM events_table WHERE time > '2017-11-23 16:20:33.264457'::timestamp; SELECT count(*) FROM recent_events; count ------- 6 (1 row) -- count number of events of recent_users SELECT count(*) FROM recent_users ru JOIN events_table et ON (ru.user_id = et.user_id); count ------- 50 (1 row) -- count number of events of per recent users order by count SELECT ru.user_id, count(*) FROM recent_users ru JOIN events_table et ON (ru.user_id = et.user_id) GROUP BY ru.user_id ORDER BY 2 DESC, 1; user_id | count ---------+------- 3 | 21 1 | 15 5 | 14 (3 rows) -- the same query with a left join however, it would still generate the same result SELECT ru.user_id, count(*) FROM recent_users ru LEFT JOIN events_table et ON (ru.user_id = et.user_id) GROUP BY ru.user_id ORDER BY 2 DESC, 1; user_id | count ---------+------- 3 | 21 1 | 15 5 | 14 (3 rows) -- query wrapped inside a subquery, it needs another top level order by SELECT * FROM (SELECT ru.user_id, count(*) FROM recent_users ru JOIN events_table et ON (ru.user_id = et.user_id) GROUP BY ru.user_id ORDER BY 2 DESC, 1) s1 ORDER BY 2 DESC, 1; user_id | count ---------+------- 3 | 21 1 | 15 5 | 14 (3 rows) -- non-partition key joins are not supported inside subquery -- since the join with a table SELECT * FROM (SELECT ru.user_id, count(*) FROM recent_users ru JOIN events_table et ON (ru.user_id = et.event_type) GROUP BY ru.user_id ORDER BY 2 DESC, 1) s1 ORDER BY 2 DESC, 1; ERROR: bogus varno: 3 -- join between views -- recent users who has an event in recent events SELECT ru.user_id FROM recent_users ru JOIN recent_events re USING(user_id) GROUP BY ru.user_id ORDER BY ru.user_id; user_id --------- 1 3 (2 rows) -- outer join inside a subquery -- recent_events who are not done by recent users SELECT count(*) FROM ( SELECT re.*, ru.user_id AS recent_user FROM recent_events re LEFT JOIN recent_users ru USING(user_id)) reu WHERE recent_user IS NULL; count ------- 2 (1 row) -- same query with anti-join SELECT count(*) FROM recent_events re LEFT JOIN recent_users ru ON(ru.user_id = re.user_id) WHERE ru.user_id IS NULL; count ------- 2 (1 row) -- join between view and table -- users who has recent activity and they have an entry with value_1 is less than 3 SELECT ut.* FROM recent_users ru JOIN users_table ut USING (user_id) WHERE ut.value_1 < 3 ORDER BY 1,2; user_id | time | value_1 | value_2 | value_3 | value_4 ---------+---------------------------------+---------+---------+---------+--------- 1 | Thu Nov 23 09:26:42.145043 2017 | 1 | 3 | 3 | 3 | Wed Nov 22 18:43:51.450263 2017 | 1 | 1 | 4 | 3 | Wed Nov 22 20:43:31.008625 2017 | 1 | 3 | 2 | 3 | Thu Nov 23 00:15:45.610845 2017 | 1 | 1 | 4 | 3 | Thu Nov 23 03:23:24.702501 2017 | 1 | 2 | 5 | 3 | Thu Nov 23 06:20:05.854857 2017 | 1 | 4 | 2 | 3 | Thu Nov 23 09:57:41.540228 2017 | 2 | 2 | 3 | 3 | Thu Nov 23 11:18:53.114408 2017 | 2 | 2 | 0 | 3 | Thu Nov 23 12:56:49.29191 2017 | 0 | 5 | 1 | 3 | Thu Nov 23 17:18:51.048758 2017 | 1 | 5 | 5 | 5 | Wed Nov 22 20:43:18.667473 2017 | 0 | 3 | 2 | 5 | Wed Nov 22 21:02:07.575129 2017 | 2 | 0 | 2 | 5 | Wed Nov 22 22:10:24.315371 2017 | 1 | 2 | 1 | 5 | Thu Nov 23 00:54:44.192608 2017 | 1 | 3 | 2 | 5 | Thu Nov 23 07:47:09.542999 2017 | 1 | 4 | 3 | 5 | Thu Nov 23 09:05:08.53142 2017 | 2 | 2 | 2 | 5 | Thu Nov 23 09:17:47.706703 2017 | 2 | 5 | 3 | 5 | Thu Nov 23 10:15:31.764558 2017 | 2 | 2 | 2 | 5 | Thu Nov 23 14:29:02.557934 2017 | 2 | 1 | 2 | 5 | Thu Nov 23 15:55:08.493462 2017 | 0 | 3 | 3 | 5 | Thu Nov 23 16:28:38.455322 2017 | 2 | 5 | 4 | (21 rows) -- determine if a recent user has done a given event type or not SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event FROM recent_users ru LEFT JOIN events_table et ON(ru.user_id = et.user_id AND et.event_type = 6) ORDER BY 2 DESC, 1; user_id | done_event ---------+------------ 1 | YES 3 | NO 5 | NO (3 rows) -- view vs table join wrapped inside a subquery SELECT * FROM (SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event FROM recent_users ru LEFT JOIN events_table et ON(ru.user_id = et.user_id AND et.event_type = 6) ) s1 ORDER BY 2 DESC, 1; user_id | done_event ---------+------------ 1 | YES 3 | NO 5 | NO (3 rows) -- event vs table non-partition-key join is not supported -- given that we cannot recursively plan tables yet SELECT * FROM (SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event FROM recent_users ru LEFT JOIN events_table et ON(ru.user_id = et.event_type) ) s1 ORDER BY 2 DESC, 1; ERROR: bogus varno: 3 -- create a select only view CREATE VIEW selected_users AS SELECT * FROM users_table WHERE value_1 >= 1 and value_1 <3; CREATE VIEW recent_selected_users AS SELECT su.* FROM selected_users su JOIN recent_users ru USING(user_id); SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER BY 1; user_id --------- 1 3 5 (3 rows) -- this would be supported when we implement where partition_key in (subquery) support SELECT et.user_id, et.time FROM events_table et WHERE et.user_id IN (SELECT user_id FROM recent_selected_users) GROUP BY 1,2 ORDER BY 1 DESC,2 DESC LIMIT 5; user_id | time ---------+--------------------------------- 5 | Thu Nov 23 16:11:02.929469 2017 5 | Thu Nov 23 14:40:40.467511 2017 5 | Thu Nov 23 14:28:51.833214 2017 5 | Thu Nov 23 14:23:09.889786 2017 5 | Thu Nov 23 13:26:45.571108 2017 (5 rows) -- it is supported when it is a router query SELECT count(*) FROM events_table et WHERE et.user_id IN (SELECT user_id FROM recent_selected_users WHERE user_id = 1); count ------- 15 (1 row) -- union between views is supported through recursive planning (SELECT user_id FROM recent_users) UNION (SELECT user_id FROM selected_users) ORDER BY 1; user_id --------- 1 2 3 4 5 6 (6 rows) -- wrapping it inside a SELECT * works SELECT * FROM ( (SELECT user_id FROM recent_users) UNION (SELECT user_id FROM selected_users) ) u WHERE user_id < 2 AND user_id > 0 ORDER BY user_id; user_id --------- 1 (1 row) -- union all also works for views SELECT * FROM ( (SELECT user_id FROM recent_users) UNION ALL (SELECT user_id FROM selected_users) ) u WHERE user_id < 2 AND user_id > 0 ORDER BY user_id; user_id --------- 1 1 (2 rows) SELECT count(*) FROM ( (SELECT user_id FROM recent_users) UNION (SELECT user_id FROM selected_users) ) u WHERE user_id < 2 AND user_id > 0; count ------- 1 (1 row) -- UNION ALL between views is supported through recursive planning SELECT count(*) FROM ( (SELECT user_id FROM recent_users) UNION ALL (SELECT user_id FROM selected_users) ) u WHERE user_id < 2 AND user_id > 0; count ------- 2 (1 row) -- expand view definitions and re-run last 2 queries SELECT count(*) FROM ( (SELECT user_id FROM (SELECT user_id, max(time) as lastseen FROM users_table GROUP BY user_id HAVING max(time) > '2017-11-22 05:45:49.978738'::timestamp order by 2 DESC) aa ) UNION (SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 1 and value_1 < 3) bb) ) u WHERE user_id < 2 AND user_id > 0; count ------- 1 (1 row) SELECT count(*) FROM ( (SELECT user_id FROM (SELECT user_id, max(time) as lastseen FROM users_table GROUP BY user_id HAVING max(time) > '2017-11-22 05:45:49.978738'::timestamp order by 2 DESC) aa ) UNION ALL (SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 1 and value_1 < 3) bb) ) u WHERE user_id < 2 AND user_id > 0; count ------- 2 (1 row) -- test distinct -- distinct is supported if it is on a partition key CREATE VIEW distinct_user_with_value_1_3 AS SELECT DISTINCT user_id FROM users_table WHERE value_1 = 3; SELECT * FROM distinct_user_with_value_1_3 ORDER BY user_id; user_id --------- 1 2 3 4 5 6 (6 rows) -- distinct is not supported if it is on a non-partition key -- but will be supported via recursive planning CREATE VIEW distinct_value_1 AS SELECT DISTINCT value_1 FROM users_table WHERE value_2 = 3; SELECT * FROM distinct_value_1 ORDER BY 1 DESC LIMIT 5; value_1 --------- 5 4 3 2 1 (5 rows) -- CTEs are supported even if they are on views CREATE VIEW cte_view_1 AS WITH c1 AS (SELECT * FROM users_table WHERE value_1 = 3) SELECT * FROM c1 WHERE value_2 < 4; SELECT * FROM cte_view_1 ORDER BY 1,2,3,4,5 LIMIT 5; user_id | time | value_1 | value_2 | value_3 | value_4 ---------+---------------------------------+---------+---------+---------+--------- 1 | Thu Nov 23 03:32:50.803031 2017 | 3 | 2 | 1 | 2 | Thu Nov 23 13:52:54.83829 2017 | 3 | 1 | 4 | 3 | Wed Nov 22 23:24:32.080584 2017 | 3 | 2 | 5 | 4 | Wed Nov 22 23:59:46.493416 2017 | 3 | 1 | 3 | 4 | Thu Nov 23 01:55:21.824618 2017 | 3 | 1 | 4 | (5 rows) -- this is single shard query and still not supported since it has view + cte -- router planner can't detect it SELECT * FROM cte_view_1 WHERE user_id = 2 ORDER BY 1,2,3,4,5; user_id | time | value_1 | value_2 | value_3 | value_4 ---------+--------------------------------+---------+---------+---------+--------- 2 | Thu Nov 23 13:52:54.83829 2017 | 3 | 1 | 4 | (1 row) -- if CTE itself prunes down to a single shard than the view is supported (router plannable) CREATE VIEW cte_view_2 AS WITH c1 AS (SELECT * FROM users_table WHERE user_id = 2) SELECT * FROM c1 WHERE value_1 = 3; SELECT * FROM cte_view_2; user_id | time | value_1 | value_2 | value_3 | value_4 ---------+---------------------------------+---------+---------+---------+--------- 2 | Thu Nov 23 00:19:14.138058 2017 | 3 | 4 | 0 | 2 | Thu Nov 23 13:52:54.83829 2017 | 3 | 1 | 4 | 2 | Wed Nov 22 18:19:49.944985 2017 | 3 | 5 | 1 | 2 | Thu Nov 23 11:41:04.042936 2017 | 3 | 4 | 1 | (4 rows) CREATE VIEW router_view AS SELECT * FROM users_table WHERE user_id = 2; -- router plannable SELECT user_id FROM router_view GROUP BY 1; user_id --------- 2 (1 row) -- join a router view SELECT * FROM (SELECT user_id FROM router_view GROUP BY 1) rv JOIN recent_events USING (user_id) ORDER BY 2 LIMIT 3; user_id | time ---------+--------------------------------- 2 | Thu Nov 23 17:26:14.563216 2017 (1 row) SELECT * FROM (SELECT user_id FROM router_view GROUP BY 1) rv JOIN (SELECT * FROM recent_events) re USING (user_id) ORDER BY 2 LIMIT 3; user_id | time ---------+--------------------------------- 2 | Thu Nov 23 17:26:14.563216 2017 (1 row) -- views with limits CREATE VIEW recent_10_users AS SELECT user_id, max(time) as lastseen FROM users_table GROUP BY user_id ORDER BY lastseen DESC LIMIT 10; -- this is not supported since it has limit in it and subquery_pushdown is not set SELECT * FROM recent_10_users; user_id | lastseen ---------+--------------------------------- 1 | Thu Nov 23 17:30:34.635085 2017 3 | Thu Nov 23 17:18:51.048758 2017 5 | Thu Nov 23 16:48:32.08896 2017 4 | Thu Nov 23 15:32:02.360969 2017 6 | Thu Nov 23 14:43:18.024104 2017 2 | Thu Nov 23 13:52:54.83829 2017 (6 rows) SET citus.subquery_pushdown to ON; -- still not supported since outer query does not have limit -- it shows a different (subquery with single relation) error message SELECT * FROM recent_10_users; ERROR: cannot perform distributed planning on this query DETAIL: Subqueries with limit are not supported yet -- now it displays more correct error message SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id); ERROR: cannot push down this subquery DETAIL: Limit in subquery without limit in the outermost query is unsupported -- now both are supported when there is a limit on the outer most query SELECT * FROM recent_10_users ORDER BY lastseen DESC LIMIT 10; user_id | lastseen ---------+--------------------------------- 1 | Thu Nov 23 17:30:34.635085 2017 3 | Thu Nov 23 17:18:51.048758 2017 5 | Thu Nov 23 16:48:32.08896 2017 4 | Thu Nov 23 15:32:02.360969 2017 6 | Thu Nov 23 14:43:18.024104 2017 2 | Thu Nov 23 13:52:54.83829 2017 (6 rows) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10; user_id | time | event_type | value_2 | value_3 | value_4 ---------+---------------------------------+------------+---------+---------+--------- 1 | Thu Nov 23 21:54:46.924477 2017 | 6 | 4 | 5 | 4 | Thu Nov 23 18:10:21.338399 2017 | 1 | 2 | 4 | 3 | Thu Nov 23 18:08:26.550729 2017 | 2 | 4 | 3 | 2 | Thu Nov 23 17:26:14.563216 2017 | 1 | 5 | 3 | 3 | Thu Nov 23 16:44:41.903713 2017 | 4 | 2 | 2 | 3 | Thu Nov 23 16:31:56.219594 2017 | 5 | 1 | 2 | 4 | Thu Nov 23 16:20:33.264457 2017 | 0 | 0 | 3 | 5 | Thu Nov 23 16:11:02.929469 2017 | 4 | 2 | 0 | 2 | Thu Nov 23 15:58:49.273421 2017 | 5 | 1 | 2 | 5 | Thu Nov 23 14:40:40.467511 2017 | 1 | 4 | 1 | (10 rows) RESET citus.subquery_pushdown; VACUUM ANALYZE users_table; -- explain tests EXPLAIN (COSTS FALSE) SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER BY 1; QUERY PLAN --------------------------------------------------------------------------------------------------------------------------------------------------------- Sort Sort Key: remote_scan.user_id -> HashAggregate Group Key: remote_scan.user_id -> Custom Scan (Citus Real-Time) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=57637 dbname=regression -> HashAggregate Group Key: users_table.user_id -> Hash Join Hash Cond: (users_table.user_id = ru.user_id) -> Seq Scan on users_table_1400000 users_table Filter: ((value_1 >= 1) AND (value_1 < 3)) -> Hash -> Subquery Scan on ru -> Sort Sort Key: (max(users_table_1."time")) DESC -> HashAggregate Group Key: users_table_1.user_id Filter: (max(users_table_1."time") > '2017-11-23 16:20:33.264457'::timestamp without time zone) -> Seq Scan on users_table_1400000 users_table_1 (23 rows) EXPLAIN (COSTS FALSE) SELECT * FROM ( (SELECT user_id FROM recent_users) UNION (SELECT user_id FROM selected_users) ) u WHERE user_id < 4 AND user_id > 1 ORDER BY user_id; QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------- Sort Sort Key: remote_scan.user_id -> Custom Scan (Citus Real-Time) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=57637 dbname=regression -> Unique -> Sort Sort Key: recent_users.user_id -> Append -> Subquery Scan on recent_users -> Sort Sort Key: (max(users_table."time")) DESC -> GroupAggregate Group Key: users_table.user_id Filter: (max(users_table."time") > '2017-11-23 16:20:33.264457'::timestamp without time zone) -> Sort Sort Key: users_table.user_id -> Seq Scan on users_table_1400000 users_table Filter: ((user_id < 4) AND (user_id > 1)) -> Seq Scan on users_table_1400000 users_table_1 Filter: ((value_1 >= 1) AND (value_1 < 3) AND (user_id < 4) AND (user_id > 1)) (23 rows) EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10; QUERY PLAN --------------------------------------------------------------------------------------------------------------------- Limit -> Sort Sort Key: remote_scan."time" DESC -> Custom Scan (Citus Real-Time) -> Distributed Subplan 95_1 -> Limit -> Sort Sort Key: max((max(remote_scan.lastseen))) DESC -> HashAggregate Group Key: remote_scan.user_id -> Custom Scan (Citus Real-Time) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=57637 dbname=regression -> Limit -> Sort Sort Key: (max("time")) DESC -> HashAggregate Group Key: user_id -> Seq Scan on users_table_1400000 users_table Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=57637 dbname=regression -> Limit -> Sort Sort Key: et."time" DESC -> Hash Join Hash Cond: (intermediate_result.user_id = et.user_id) -> Function Scan on read_intermediate_result intermediate_result -> Hash -> Seq Scan on events_table_1400004 et (33 rows) SET citus.subquery_pushdown to ON; EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10; QUERY PLAN --------------------------------------------------------------------------------------------------------------------- Limit -> Sort Sort Key: remote_scan."time" DESC -> Custom Scan (Citus Real-Time) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=57637 dbname=regression -> Limit -> Sort Sort Key: et."time" DESC -> Hash Join Hash Cond: (et.user_id = recent_10_users.user_id) -> Seq Scan on events_table_1400004 et -> Hash -> Subquery Scan on recent_10_users -> Limit -> Sort Sort Key: (max(users_table."time")) DESC -> HashAggregate Group Key: users_table.user_id -> Seq Scan on users_table_1400000 users_table (22 rows) RESET citus.subquery_pushdown; DROP VIEW recent_10_users; DROP VIEW router_view; DROP VIEW cte_view_2; DROP VIEW cte_view_1; DROP VIEW distinct_value_1; DROP VIEW distinct_user_with_value_1_3; DROP VIEW recent_selected_users; DROP VIEW selected_users; DROP VIEW recent_events; DROP VIEW recent_users;