-- -- MULTI_VIEW -- -- This file contains test cases for view support. It verifies various -- Citus features: simple selects, aggregates, joins, outer joins -- router queries, single row inserts, multi row inserts via insert -- into select, multi row insert via copy commands. SELECT count(*) FROM lineitem_hash_part; count ------- 12000 (1 row) SELECT count(*) FROM orders_hash_part; count ------- 2984 (1 row) -- create a view for priority orders CREATE VIEW priority_orders AS SELECT * FROM orders_hash_part WHERE o_orderpriority < '3-MEDIUM'; -- aggregate pushdown SELECT o_orderpriority, count(*) FROM priority_orders GROUP BY 1 ORDER BY 2, 1; o_orderpriority | count -----------------+------- 2-HIGH | 593 1-URGENT | 603 (2 rows) SELECT o_orderpriority, count(*) FROM orders_hash_part WHERE o_orderpriority < '3-MEDIUM' GROUP BY 1 ORDER BY 2,1; o_orderpriority | count -----------------+------- 2-HIGH | 593 1-URGENT | 603 (2 rows) -- filters SELECT o_orderpriority, count(*) as all, count(*) FILTER (WHERE o_orderstatus ='F') as fullfilled FROM priority_orders GROUP BY 1 ORDER BY 2, 1; o_orderpriority | all | fullfilled -----------------+-----+------------ 2-HIGH | 593 | 271 1-URGENT | 603 | 280 (2 rows) -- having SELECT o_orderdate, count(*) from priority_orders group by 1 having (count(*) > 3) order by 2 desc, 1 desc; o_orderdate | count -------------+------- 08-20-1996 | 5 10-10-1994 | 4 05-05-1994 | 4 04-07-1994 | 4 03-17-1993 | 4 (5 rows) -- having with filters SELECT o_orderdate, count(*) as all, count(*) FILTER(WHERE o_orderstatus = 'F') from priority_orders group by 1 having (count(*) > 3) order by 2 desc, 1 desc; o_orderdate | all | count -------------+-----+------- 08-20-1996 | 5 | 0 10-10-1994 | 4 | 4 05-05-1994 | 4 | 4 04-07-1994 | 4 | 4 03-17-1993 | 4 | 4 (5 rows) -- limit SELECT o_orderkey, o_totalprice from orders_hash_part order by 2 desc, 1 asc limit 5 ; o_orderkey | o_totalprice ------------+-------------- 4421 | 401055.62 10209 | 400191.77 11142 | 395039.05 14179 | 384265.43 11296 | 378166.33 (5 rows) SELECT o_orderkey, o_totalprice from priority_orders order by 2 desc, 1 asc limit 1 ; o_orderkey | o_totalprice ------------+-------------- 14179 | 384265.43 (1 row) CREATE VIEW priority_lineitem AS SELECT li.* FROM lineitem_hash_part li JOIN priority_orders ON (l_orderkey = o_orderkey); SELECT l_orderkey, count(*) FROM priority_lineitem GROUP BY 1 ORDER BY 2 DESC, 1 LIMIT 5; l_orderkey | count ------------+------- 7 | 7 225 | 7 226 | 7 322 | 7 326 | 7 (5 rows) CREATE VIEW air_shipped_lineitems AS SELECT * FROM lineitem_hash_part WHERE l_shipmode = 'AIR'; -- join between view and table SELECT count(*) FROM orders_hash_part join air_shipped_lineitems ON (o_orderkey = l_orderkey); count ------- 1706 (1 row) -- join between views SELECT count(*) FROM priority_orders join air_shipped_lineitems ON (o_orderkey = l_orderkey); count ------- 700 (1 row) -- count distinct on partition column is supported SELECT count(distinct o_orderkey) FROM priority_orders join air_shipped_lineitems ON (o_orderkey = l_orderkey); count ------- 551 (1 row) -- count distinct on non-partition column is supported SELECT count(distinct o_orderpriority) FROM priority_orders join air_shipped_lineitems ON (o_orderkey = l_orderkey); count ------- 2 (1 row) -- count distinct on partition column is supported on router queries SELECT count(distinct o_orderkey) FROM priority_orders join air_shipped_lineitems ON (o_orderkey = l_orderkey) WHERE (o_orderkey = 231); count ------- 1 (1 row) -- select distinct on router joins of views also works SELECT distinct(o_orderkey) FROM priority_orders join air_shipped_lineitems ON (o_orderkey = l_orderkey) WHERE (o_orderkey = 231); o_orderkey ------------ 231 (1 row) -- left join support depends on flattening of the query -- following query fails since the inner part is kept as subquery SELECT * FROM priority_orders left join air_shipped_lineitems ON (o_orderkey = l_orderkey); ERROR: cannot perform distributed planning on this query DETAIL: Subqueries in outer joins are not supported -- however, this works SELECT count(*) FROM priority_orders left join lineitem_hash_part ON (o_orderkey = l_orderkey) WHERE l_shipmode ='AIR'; count ------- 700 (1 row) -- view at the inner side of is not supported SELECT count(*) FROM priority_orders right join lineitem_hash_part ON (o_orderkey = l_orderkey) WHERE l_shipmode ='AIR'; ERROR: cannot perform distributed planning on this query DETAIL: Subqueries in outer joins are not supported -- but view at the outer side is. This is essentially the same as a left join with arguments reversed. SELECT count(*) FROM lineitem_hash_part right join priority_orders ON (o_orderkey = l_orderkey) WHERE l_shipmode ='AIR'; count ------- 700 (1 row) -- left join on router query is supported SELECT o_orderkey, l_linenumber FROM priority_orders left join air_shipped_lineitems ON (o_orderkey = l_orderkey) WHERE o_orderkey = 2; o_orderkey | l_linenumber ------------+-------------- 2 | (1 row) -- repartition query on view join -- it passes planning, fails at execution stage SELECT * FROM priority_orders JOIN air_shipped_lineitems ON (o_custkey = l_suppkey); ERROR: the query contains a join that requires repartitioning HINT: Set citus.enable_repartition_joins to on to enable repartitioning SET citus.task_executor_type to "task-tracker"; SELECT count(*) FROM priority_orders JOIN air_shipped_lineitems ON (o_custkey = l_suppkey); count ------- 192 (1 row) SET citus.task_executor_type to DEFAULT; -- materialized views work -- insert into... select works with views CREATE TABLE temp_lineitem(LIKE lineitem_hash_part); SELECT create_distributed_table('temp_lineitem', 'l_orderkey', 'hash', 'lineitem_hash_part'); create_distributed_table -------------------------- (1 row) INSERT INTO temp_lineitem SELECT * FROM air_shipped_lineitems; SELECT count(*) FROM temp_lineitem; count ------- 1706 (1 row) -- following is a where false query, should not be inserting anything INSERT INTO temp_lineitem SELECT * FROM air_shipped_lineitems WHERE l_shipmode = 'MAIL'; SELECT count(*) FROM temp_lineitem; count ------- 1706 (1 row) -- can create and query materialized views CREATE MATERIALIZED VIEW mode_counts AS SELECT l_shipmode, count(*) FROM temp_lineitem GROUP BY l_shipmode; SELECT * FROM mode_counts WHERE l_shipmode = 'AIR' ORDER BY 2 DESC, 1 LIMIT 10; l_shipmode | count ------------+------- AIR | 1706 (1 row) -- materialized views are local, cannot join with distributed tables SELECT count(*) FROM mode_counts JOIN temp_lineitem USING (l_shipmode); ERROR: relation mode_counts is not distributed -- new data is not immediately reflected in the view INSERT INTO temp_lineitem SELECT * FROM air_shipped_lineitems; SELECT * FROM mode_counts WHERE l_shipmode = 'AIR' ORDER BY 2 DESC, 1 LIMIT 10; l_shipmode | count ------------+------- AIR | 1706 (1 row) -- refresh updates the materialised view with new data REFRESH MATERIALIZED VIEW mode_counts; SELECT * FROM mode_counts WHERE l_shipmode = 'AIR' ORDER BY 2 DESC, 1 LIMIT 10; l_shipmode | count ------------+------- AIR | 3412 (1 row) DROP MATERIALIZED VIEW mode_counts; SET citus.task_executor_type to "task-tracker"; -- single view repartition subqueries are not supported SELECT l_suppkey, count(*) FROM (SELECT l_suppkey, l_shipdate, count(*) FROM air_shipped_lineitems GROUP BY l_suppkey, l_shipdate) supps GROUP BY l_suppkey ORDER BY 2 DESC, 1 LIMIT 5; ERROR: cannot perform distributed planning on this query DETAIL: Subqueries without group by clause are not supported yet -- logically same query without a view works fine SELECT l_suppkey, count(*) FROM (SELECT l_suppkey, l_shipdate, count(*) FROM lineitem_hash_part WHERE l_shipmode = 'AIR' GROUP BY l_suppkey, l_shipdate) supps GROUP BY l_suppkey ORDER BY 2 DESC, 1 LIMIT 5; l_suppkey | count -----------+------- 7680 | 4 160 | 3 1042 | 3 1318 | 3 5873 | 3 (5 rows) -- when a view is replaced by actual query it still fails SELECT l_suppkey, count(*) FROM (SELECT l_suppkey, l_shipdate, count(*) FROM (SELECT * FROM lineitem_hash_part WHERE l_shipmode = 'AIR') asi GROUP BY l_suppkey, l_shipdate) supps GROUP BY l_suppkey ORDER BY 2 DESC, 1 LIMIT 5; ERROR: cannot perform distributed planning on this query DETAIL: Subqueries without group by clause are not supported yet -- repartition query on view with single table subquery CREATE VIEW supp_count_view AS SELECT * FROM (SELECT l_suppkey, count(*) FROM lineitem_hash_part GROUP BY 1) s1; SELECT * FROM supp_count_view ORDER BY 2 DESC, 1 LIMIT 10; l_suppkey | count -----------+------- 6104 | 8 1868 | 6 5532 | 6 5849 | 6 6169 | 6 6669 | 6 6692 | 6 7703 | 6 7869 | 6 8426 | 6 (10 rows) SET citus.task_executor_type to DEFAULT; -- create a view with aggregate CREATE VIEW lineitems_by_shipping_method AS SELECT l_shipmode, count(*) as cnt FROM lineitem_hash_part GROUP BY 1; -- following will fail due to non GROUP BY of partition key SELECT * FROM lineitems_by_shipping_method; ERROR: Unrecognized range table id 1 -- create a view with group by on partition column CREATE VIEW lineitems_by_orderkey AS SELECT l_orderkey, count(*) FROM lineitem_hash_part GROUP BY 1; -- this should work since we're able to push down this query SELECT * FROM lineitems_by_orderkey ORDER BY 2 DESC, 1 ASC LIMIT 10; l_orderkey | count ------------+------- 7 | 7 68 | 7 129 | 7 164 | 7 194 | 7 225 | 7 226 | 7 322 | 7 326 | 7 354 | 7 (10 rows) -- it would also work since it is made router plannable SELECT * FROM lineitems_by_orderkey WHERE l_orderkey = 100; l_orderkey | count ------------+------- 100 | 5 (1 row) DROP TABLE temp_lineitem CASCADE; DROP VIEW supp_count_view; DROP VIEW lineitems_by_orderkey; DROP VIEW lineitems_by_shipping_method; DROP VIEW air_shipped_lineitems; DROP VIEW priority_lineitem; DROP VIEW priority_orders; -- new tests for real time use case including views and subqueries -- create view to display recent user who has an activity after a timestamp CREATE VIEW recent_users AS SELECT user_id, max(time) as lastseen FROM users_table GROUP BY user_id HAVING max(time) > '2017-11-23 16:20:33.264457'::timestamp order by 2 DESC; SELECT * FROM recent_users; user_id | lastseen ---------+--------------------------------- 1 | Thu Nov 23 17:30:34.635085 2017 5 | Thu Nov 23 16:48:32.08896 2017 3 | Thu Nov 23 17:18:51.048758 2017 (3 rows) -- create a view for recent_events CREATE VIEW recent_events AS SELECT user_id, time FROM events_table WHERE time > '2017-11-23 16:20:33.264457'::timestamp; SELECT count(*) FROM recent_events; count ------- 6 (1 row) -- count number of events of recent_users SELECT count(*) FROM recent_users ru JOIN events_table et ON (ru.user_id = et.user_id); count ------- 50 (1 row) -- count number of events of per recent users order by count SELECT ru.user_id, count(*) FROM recent_users ru JOIN events_table et ON (ru.user_id = et.user_id) GROUP BY ru.user_id ORDER BY 2 DESC, 1; user_id | count ---------+------- 3 | 21 1 | 15 5 | 14 (3 rows) -- the same query with a left join however, it would still generate the same result SELECT ru.user_id, count(*) FROM recent_users ru LEFT JOIN events_table et ON (ru.user_id = et.user_id) GROUP BY ru.user_id ORDER BY 2 DESC, 1; user_id | count ---------+------- 3 | 21 1 | 15 5 | 14 (3 rows) -- query wrapped inside a subquery, it needs another top level order by SELECT * FROM (SELECT ru.user_id, count(*) FROM recent_users ru JOIN events_table et ON (ru.user_id = et.user_id) GROUP BY ru.user_id ORDER BY 2 DESC, 1) s1 ORDER BY 2 DESC, 1; user_id | count ---------+------- 3 | 21 1 | 15 5 | 14 (3 rows) -- non-partition key joins are not supported inside subquery SELECT * FROM (SELECT ru.user_id, count(*) FROM recent_users ru JOIN events_table et ON (ru.user_id = et.event_type) GROUP BY ru.user_id ORDER BY 2 DESC, 1) s1 ORDER BY 2 DESC, 1; ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. -- join between views -- recent users who has an event in recent events SELECT ru.user_id FROM recent_users ru JOIN recent_events re USING(user_id) GROUP BY ru.user_id ORDER BY ru.user_id; user_id --------- 1 3 (2 rows) -- outer join inside a subquery -- recent_events who are not done by recent users SELECT count(*) FROM ( SELECT re.*, ru.user_id AS recent_user FROM recent_events re LEFT JOIN recent_users ru USING(user_id)) reu WHERE recent_user IS NULL; count ------- 2 (1 row) -- same query with anti-join SELECT count(*) FROM recent_events re LEFT JOIN recent_users ru ON(ru.user_id = re.user_id) WHERE ru.user_id IS NULL; count ------- 2 (1 row) -- join between view and table -- users who has recent activity and they have an entry with value_1 is less than 3 SELECT ut.* FROM recent_users ru JOIN users_table ut USING (user_id) WHERE ut.value_1 < 3 ORDER BY 1,2; user_id | time | value_1 | value_2 | value_3 | value_4 ---------+---------------------------------+---------+---------+---------+--------- 1 | Thu Nov 23 09:26:42.145043 2017 | 1 | 3 | 3 | 3 | Wed Nov 22 18:43:51.450263 2017 | 1 | 1 | 4 | 3 | Wed Nov 22 20:43:31.008625 2017 | 1 | 3 | 2 | 3 | Thu Nov 23 00:15:45.610845 2017 | 1 | 1 | 4 | 3 | Thu Nov 23 03:23:24.702501 2017 | 1 | 2 | 5 | 3 | Thu Nov 23 06:20:05.854857 2017 | 1 | 4 | 2 | 3 | Thu Nov 23 09:57:41.540228 2017 | 2 | 2 | 3 | 3 | Thu Nov 23 11:18:53.114408 2017 | 2 | 2 | 0 | 3 | Thu Nov 23 12:56:49.29191 2017 | 0 | 5 | 1 | 3 | Thu Nov 23 17:18:51.048758 2017 | 1 | 5 | 5 | 5 | Wed Nov 22 20:43:18.667473 2017 | 0 | 3 | 2 | 5 | Wed Nov 22 21:02:07.575129 2017 | 2 | 0 | 2 | 5 | Wed Nov 22 22:10:24.315371 2017 | 1 | 2 | 1 | 5 | Thu Nov 23 00:54:44.192608 2017 | 1 | 3 | 2 | 5 | Thu Nov 23 07:47:09.542999 2017 | 1 | 4 | 3 | 5 | Thu Nov 23 09:05:08.53142 2017 | 2 | 2 | 2 | 5 | Thu Nov 23 09:17:47.706703 2017 | 2 | 5 | 3 | 5 | Thu Nov 23 10:15:31.764558 2017 | 2 | 2 | 2 | 5 | Thu Nov 23 14:29:02.557934 2017 | 2 | 1 | 2 | 5 | Thu Nov 23 15:55:08.493462 2017 | 0 | 3 | 3 | 5 | Thu Nov 23 16:28:38.455322 2017 | 2 | 5 | 4 | (21 rows) -- determine if a recent user has done a given event type or not SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event FROM recent_users ru LEFT JOIN events_table et ON(ru.user_id = et.user_id AND et.event_type = 6) ORDER BY 2 DESC, 1; user_id | done_event ---------+------------ 1 | YES 3 | NO 5 | NO (3 rows) -- view vs table join wrapped inside a subquery SELECT * FROM (SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event FROM recent_users ru LEFT JOIN events_table et ON(ru.user_id = et.user_id AND et.event_type = 6) ) s1 ORDER BY 2 DESC, 1; user_id | done_event ---------+------------ 1 | YES 3 | NO 5 | NO (3 rows) -- event vs table non-partition-key join is not supported SELECT * FROM (SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event FROM recent_users ru LEFT JOIN events_table et ON(ru.user_id = et.event_type) ) s1 ORDER BY 2 DESC, 1; ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. -- create a select only view CREATE VIEW selected_users AS SELECT * FROM users_table WHERE value_1 >= 1 and value_1 <3; CREATE VIEW recent_selected_users AS SELECT su.* FROM selected_users su JOIN recent_users ru USING(user_id); SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER BY 1; user_id --------- 1 3 5 (3 rows) -- this would be supported when we implement where partition_key in (subquery) support SELECT et.user_id, et.time FROM events_table et WHERE et.user_id IN (SELECT user_id FROM recent_selected_users) GROUP BY 1,2 ORDER BY 1 DESC,2 DESC LIMIT 5; user_id | time ---------+--------------------------------- 5 | Thu Nov 23 16:11:02.929469 2017 5 | Thu Nov 23 14:40:40.467511 2017 5 | Thu Nov 23 14:28:51.833214 2017 5 | Thu Nov 23 14:23:09.889786 2017 5 | Thu Nov 23 13:26:45.571108 2017 (5 rows) -- it is supported when it is a router query SELECT count(*) FROM events_table et WHERE et.user_id IN (SELECT user_id FROM recent_selected_users WHERE user_id = 1); count ------- 15 (1 row) -- expected this to work but it did not (SELECT user_id FROM recent_users) UNION (SELECT user_id FROM selected_users); ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT HINT: Consider using an equality filter on the distributed table's partition column. -- wrapping it inside a SELECT * works SELECT * FROM ( (SELECT user_id FROM recent_users) UNION (SELECT user_id FROM selected_users) ) u WHERE user_id < 2 AND user_id > 0 ORDER BY user_id; user_id --------- 1 (1 row) -- union all also works for views SELECT * FROM ( (SELECT user_id FROM recent_users) UNION ALL (SELECT user_id FROM selected_users) ) u WHERE user_id < 2 AND user_id > 0 ORDER BY user_id; user_id --------- 1 1 (2 rows) SELECT count(*) FROM ( (SELECT user_id FROM recent_users) UNION (SELECT user_id FROM selected_users) ) u WHERE user_id < 2 AND user_id > 0; count ------- 1 (1 row) -- expected this to work but it does not SELECT count(*) FROM ( (SELECT user_id FROM recent_users) UNION ALL (SELECT user_id FROM selected_users) ) u WHERE user_id < 2 AND user_id > 0; ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column -- expand view definitions and re-run last 2 queries SELECT count(*) FROM ( (SELECT user_id FROM (SELECT user_id, max(time) as lastseen FROM users_table GROUP BY user_id HAVING max(time) > '2017-11-22 05:45:49.978738'::timestamp order by 2 DESC) aa ) UNION (SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 1 and value_1 < 3) bb) ) u WHERE user_id < 2 AND user_id > 0; count ------- 1 (1 row) SELECT count(*) FROM ( (SELECT user_id FROM (SELECT user_id, max(time) as lastseen FROM users_table GROUP BY user_id HAVING max(time) > '2017-11-22 05:45:49.978738'::timestamp order by 2 DESC) aa ) UNION ALL (SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 1 and value_1 < 3) bb) ) u WHERE user_id < 2 AND user_id > 0; ERROR: cannot pushdown the subquery since not all subqueries in the UNION have the partition column in the same position DETAIL: Each leaf query of the UNION should return the partition column in the same position and all joins must be on the partition column -- test distinct -- distinct is supported if it is on a partition key CREATE VIEW distinct_user_with_value_1_3 AS SELECT DISTINCT user_id FROM users_table WHERE value_1 = 3; SELECT * FROM distinct_user_with_value_1_3 ORDER BY user_id; user_id --------- 1 2 3 4 5 6 (6 rows) -- distinct is not supported if it is on a non-partition key CREATE VIEW distinct_value_1 AS SELECT DISTINCT value_1 FROM users_table WHERE value_2 = 3; SELECT * FROM distinct_value_1; ERROR: cannot perform distributed planning on this query DETAIL: Subqueries without group by clause are not supported yet -- CTEs are not supported even if they are on views CREATE VIEW cte_view_1 AS WITH c1 AS (SELECT * FROM users_table WHERE value_1 = 3) SELECT * FROM c1 WHERE value_2 < 4; SELECT * FROM cte_view_1; ERROR: cannot push down this subquery DETAIL: CTEs in subqueries are currently unsupported -- this is single shard query but still not supported since it has view + cte -- router planner can't detect it SELECT * FROM cte_view_1 WHERE user_id = 2; ERROR: cannot push down this subquery DETAIL: CTEs in subqueries are currently unsupported -- if CTE itself prunes down to a single shard than the view is supported (router plannable) CREATE VIEW cte_view_2 AS WITH c1 AS (SELECT * FROM users_table WHERE user_id = 2) SELECT * FROM c1 WHERE value_1 = 3; SELECT * FROM cte_view_2; user_id | time | value_1 | value_2 | value_3 | value_4 ---------+---------------------------------+---------+---------+---------+--------- 2 | Thu Nov 23 00:19:14.138058 2017 | 3 | 4 | 0 | 2 | Thu Nov 23 13:52:54.83829 2017 | 3 | 1 | 4 | 2 | Wed Nov 22 18:19:49.944985 2017 | 3 | 5 | 1 | 2 | Thu Nov 23 11:41:04.042936 2017 | 3 | 4 | 1 | (4 rows) CREATE VIEW router_view AS SELECT * FROM users_table WHERE user_id = 2; -- router plannable SELECT user_id FROM router_view GROUP BY 1; user_id --------- 2 (1 row) -- join a router view SELECT * FROM (SELECT user_id FROM router_view GROUP BY 1) rv JOIN recent_events USING (user_id) ORDER BY 2 LIMIT 3; user_id | time ---------+--------------------------------- 2 | Thu Nov 23 17:26:14.563216 2017 (1 row) SELECT * FROM (SELECT user_id FROM router_view GROUP BY 1) rv JOIN (SELECT * FROM recent_events) re USING (user_id) ORDER BY 2 LIMIT 3; user_id | time ---------+--------------------------------- 2 | Thu Nov 23 17:26:14.563216 2017 (1 row) -- views with limits CREATE VIEW recent_10_users AS SELECT user_id, max(time) as lastseen FROM users_table GROUP BY user_id ORDER BY lastseen DESC LIMIT 10; -- this is not supported since it has limit in it and subquery_pushdown is not set SELECT * FROM recent_10_users; ERROR: cannot perform distributed planning on this query DETAIL: Subqueries with limit are not supported yet SET citus.subquery_pushdown to ON; -- still not supported since outer query does not have limit -- it shows a different (subquery with single relation) error message SELECT * FROM recent_10_users; ERROR: cannot perform distributed planning on this query DETAIL: Subqueries with limit are not supported yet -- now it displays more correct error message SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id); ERROR: cannot push down this subquery DETAIL: Limit in subquery without limit in the outermost query is unsupported -- now both are supported when there is a limit on the outer most query SELECT * FROM recent_10_users ORDER BY lastseen DESC LIMIT 10; user_id | lastseen ---------+--------------------------------- 1 | Thu Nov 23 17:30:34.635085 2017 3 | Thu Nov 23 17:18:51.048758 2017 5 | Thu Nov 23 16:48:32.08896 2017 4 | Thu Nov 23 15:32:02.360969 2017 6 | Thu Nov 23 14:43:18.024104 2017 2 | Thu Nov 23 13:52:54.83829 2017 (6 rows) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10; user_id | time | event_type | value_2 | value_3 | value_4 ---------+---------------------------------+------------+---------+---------+--------- 1 | Thu Nov 23 21:54:46.924477 2017 | 6 | 4 | 5 | 4 | Thu Nov 23 18:10:21.338399 2017 | 1 | 2 | 4 | 3 | Thu Nov 23 18:08:26.550729 2017 | 2 | 4 | 3 | 2 | Thu Nov 23 17:26:14.563216 2017 | 1 | 5 | 3 | 3 | Thu Nov 23 16:44:41.903713 2017 | 4 | 2 | 2 | 3 | Thu Nov 23 16:31:56.219594 2017 | 5 | 1 | 2 | 4 | Thu Nov 23 16:20:33.264457 2017 | 0 | 0 | 3 | 5 | Thu Nov 23 16:11:02.929469 2017 | 4 | 2 | 0 | 2 | Thu Nov 23 15:58:49.273421 2017 | 5 | 1 | 2 | 5 | Thu Nov 23 14:40:40.467511 2017 | 1 | 4 | 1 | (10 rows) RESET citus.subquery_pushdown; VACUUM ANALYZE users_table; -- explain tests EXPLAIN (COSTS FALSE) SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER BY 1; QUERY PLAN --------------------------------------------------------------------------------------------------------------------------------------------------------- Sort Sort Key: remote_scan.user_id -> HashAggregate Group Key: remote_scan.user_id -> Custom Scan (Citus Real-Time) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=57637 dbname=regression -> HashAggregate Group Key: users_table.user_id -> Hash Join Hash Cond: (users_table.user_id = ru.user_id) -> Seq Scan on users_table_1400000 users_table Filter: ((value_1 >= 1) AND (value_1 < 3)) -> Hash -> Subquery Scan on ru -> Sort Sort Key: (max(users_table_1."time")) DESC -> HashAggregate Group Key: users_table_1.user_id Filter: (max(users_table_1."time") > '2017-11-23 16:20:33.264457'::timestamp without time zone) -> Seq Scan on users_table_1400000 users_table_1 (23 rows) EXPLAIN (COSTS FALSE) SELECT * FROM ( (SELECT user_id FROM recent_users) UNION (SELECT user_id FROM selected_users) ) u WHERE user_id < 4 AND user_id > 1 ORDER BY user_id; QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------- Sort Sort Key: remote_scan.user_id -> Custom Scan (Citus Real-Time) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=57637 dbname=regression -> Unique -> Sort Sort Key: recent_users.user_id -> Append -> Subquery Scan on recent_users -> Sort Sort Key: (max(users_table."time")) DESC -> GroupAggregate Group Key: users_table.user_id Filter: (max(users_table."time") > '2017-11-23 16:20:33.264457'::timestamp without time zone) -> Sort Sort Key: users_table.user_id -> Seq Scan on users_table_1400000 users_table Filter: ((user_id < 4) AND (user_id > 1)) -> Seq Scan on users_table_1400000 users_table_1 Filter: ((value_1 >= 1) AND (value_1 < 3) AND (user_id < 4) AND (user_id > 1)) (23 rows) EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10; ERROR: cannot push down this subquery DETAIL: Limit in subquery is currently unsupported -> Distributed Subplan 83_1 SET citus.subquery_pushdown to ON; EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10; QUERY PLAN --------------------------------------------------------------------------------------------------------------------- Limit -> Sort Sort Key: remote_scan."time" DESC -> Custom Scan (Citus Real-Time) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=57637 dbname=regression -> Limit -> Sort Sort Key: et."time" DESC -> Hash Join Hash Cond: (et.user_id = recent_10_users.user_id) -> Seq Scan on events_table_1400004 et -> Hash -> Subquery Scan on recent_10_users -> Limit -> Sort Sort Key: (max(users_table."time")) DESC -> HashAggregate Group Key: users_table.user_id -> Seq Scan on users_table_1400000 users_table (22 rows) RESET citus.subquery_pushdown; DROP VIEW recent_10_users; DROP VIEW router_view; DROP VIEW cte_view_2; DROP VIEW cte_view_1; DROP VIEW distinct_value_1; DROP VIEW distinct_user_with_value_1_3; DROP VIEW recent_selected_users; DROP VIEW selected_users; DROP VIEW recent_events; DROP VIEW recent_users;