-- -- MULTI_VIEW -- -- This file contains test cases for view support. It verifies various -- Citus features: simple selects, aggregates, joins, outer joins -- router queries, single row inserts, multi row inserts via insert -- into select, multi row insert via copy commands. SELECT count(*) FROM lineitem_hash_part; count ------- 12000 (1 row) SELECT count(*) FROM orders_hash_part; count ------- 2984 (1 row) -- create a view for priority orders CREATE VIEW priority_orders AS SELECT * FROM orders_hash_part WHERE o_orderpriority < '3-MEDIUM'; -- aggregate pushdown SELECT o_orderpriority, count(*) FROM priority_orders GROUP BY 1 ORDER BY 2, 1; o_orderpriority | count -----------------+------- 2-HIGH | 593 1-URGENT | 603 (2 rows) SELECT o_orderpriority, count(*) FROM orders_hash_part WHERE o_orderpriority < '3-MEDIUM' GROUP BY 1 ORDER BY 2,1; o_orderpriority | count -----------------+------- 2-HIGH | 593 1-URGENT | 603 (2 rows) -- filters SELECT o_orderpriority, count(*) as all, count(*) FILTER (WHERE o_orderstatus ='F') as fullfilled FROM priority_orders GROUP BY 1 ORDER BY 2, 1; o_orderpriority | all | fullfilled -----------------+-----+------------ 2-HIGH | 593 | 271 1-URGENT | 603 | 280 (2 rows) -- having SELECT o_orderdate, count(*) from priority_orders group by 1 having (count(*) > 3) order by 2 desc, 1 desc; o_orderdate | count -------------+------- 08-20-1996 | 5 10-10-1994 | 4 05-05-1994 | 4 04-07-1994 | 4 03-17-1993 | 4 (5 rows) -- having with filters SELECT o_orderdate, count(*) as all, count(*) FILTER(WHERE o_orderstatus = 'F') from priority_orders group by 1 having (count(*) > 3) order by 2 desc, 1 desc; o_orderdate | all | count -------------+-----+------- 08-20-1996 | 5 | 0 10-10-1994 | 4 | 4 05-05-1994 | 4 | 4 04-07-1994 | 4 | 4 03-17-1993 | 4 | 4 (5 rows) -- limit SELECT o_orderkey, o_totalprice from orders_hash_part order by 2 desc, 1 asc limit 5 ; o_orderkey | o_totalprice ------------+-------------- 4421 | 401055.62 10209 | 400191.77 11142 | 395039.05 14179 | 384265.43 11296 | 378166.33 (5 rows) SELECT o_orderkey, o_totalprice from priority_orders order by 2 desc, 1 asc limit 1 ; o_orderkey | o_totalprice ------------+-------------- 14179 | 384265.43 (1 row) CREATE VIEW priority_lineitem AS SELECT li.* FROM lineitem_hash_part li JOIN priority_orders ON (l_orderkey = o_orderkey); SELECT l_orderkey, count(*) FROM priority_lineitem GROUP BY 1 ORDER BY 2 DESC, 1 LIMIT 5; l_orderkey | count ------------+------- 7 | 7 225 | 7 226 | 7 322 | 7 326 | 7 (5 rows) CREATE VIEW air_shipped_lineitems AS SELECT * FROM lineitem_hash_part WHERE l_shipmode = 'AIR'; -- join between view and table SELECT count(*) FROM orders_hash_part join air_shipped_lineitems ON (o_orderkey = l_orderkey); count ------- 1706 (1 row) -- join between views SELECT count(*) FROM priority_orders join air_shipped_lineitems ON (o_orderkey = l_orderkey); count ------- 700 (1 row) -- count distinct on partition column is not supported SELECT count(distinct o_orderkey) FROM priority_orders join air_shipped_lineitems ON (o_orderkey = l_orderkey); ERROR: cannot compute aggregate (distinct) DETAIL: table partitioning is unsuitable for aggregate (distinct) HINT: You can load the hll extension from contrib packages and enable distinct approximations. -- count distinct on partition column is supported on router queries SELECT count(distinct o_orderkey) FROM priority_orders join air_shipped_lineitems ON (o_orderkey = l_orderkey) WHERE (o_orderkey = 231); count ------- 1 (1 row) -- select distinct on router joins of views also works SELECT distinct(o_orderkey) FROM priority_orders join air_shipped_lineitems ON (o_orderkey = l_orderkey) WHERE (o_orderkey = 231); o_orderkey ------------ 231 (1 row) -- left join support depends on flattening of the query -- following query fails since the inner part is kept as subquery SELECT * FROM priority_orders left join air_shipped_lineitems ON (o_orderkey = l_orderkey); ERROR: cannot perform distributed planning on this query DETAIL: Subqueries in outer joins are not supported -- however, this works SELECT count(*) FROM priority_orders left join lineitem_hash_part ON (o_orderkey = l_orderkey) WHERE l_shipmode ='AIR'; count ------- 700 (1 row) -- view at the inner side of is not supported SELECT count(*) FROM priority_orders right join lineitem_hash_part ON (o_orderkey = l_orderkey) WHERE l_shipmode ='AIR'; ERROR: cannot perform distributed planning on this query DETAIL: Subqueries in outer joins are not supported -- but view at the outer side is. This is essentially the same as a left join with arguments reversed. SELECT count(*) FROM lineitem_hash_part right join priority_orders ON (o_orderkey = l_orderkey) WHERE l_shipmode ='AIR'; count ------- 700 (1 row) -- left join on router query is supported SELECT o_orderkey, l_linenumber FROM priority_orders left join air_shipped_lineitems ON (o_orderkey = l_orderkey) WHERE o_orderkey = 2; o_orderkey | l_linenumber ------------+-------------- 2 | (1 row) -- repartition query on view join -- it passes planning, fails at execution stage SELECT * FROM priority_orders JOIN air_shipped_lineitems ON (o_custkey = l_suppkey); ERROR: cannot use real time executor with repartition jobs HINT: Set citus.task_executor_type to "task-tracker". SET citus.task_executor_type to "task-tracker"; SELECT count(*) FROM priority_orders JOIN air_shipped_lineitems ON (o_custkey = l_suppkey); count ------- 192 (1 row) SET citus.task_executor_type to DEFAULT; -- insert into... select works with views CREATE TABLE temp_lineitem(LIKE lineitem_hash_part); SELECT create_distributed_table('temp_lineitem', 'l_orderkey', 'hash', 'lineitem_hash_part'); create_distributed_table -------------------------- (1 row) INSERT INTO temp_lineitem SELECT * FROM air_shipped_lineitems; SELECT count(*) FROM temp_lineitem; count ------- 1706 (1 row) -- following is a where false query, should not be inserting anything INSERT INTO temp_lineitem SELECT * FROM air_shipped_lineitems WHERE l_shipmode = 'MAIL'; SELECT count(*) FROM temp_lineitem; count ------- 1706 (1 row) SET citus.task_executor_type to "task-tracker"; -- single view repartition subqueries are not supported SELECT l_suppkey, count(*) FROM (SELECT l_suppkey, l_shipdate, count(*) FROM air_shipped_lineitems GROUP BY l_suppkey, l_shipdate) supps GROUP BY l_suppkey ORDER BY 2 DESC, 1 LIMIT 5; ERROR: cannot perform distributed planning on this query DETAIL: Subqueries without group by clause are not supported yet -- logically same query without a view works fine SELECT l_suppkey, count(*) FROM (SELECT l_suppkey, l_shipdate, count(*) FROM lineitem_hash_part WHERE l_shipmode = 'AIR' GROUP BY l_suppkey, l_shipdate) supps GROUP BY l_suppkey ORDER BY 2 DESC, 1 LIMIT 5; l_suppkey | count -----------+------- 7680 | 4 160 | 3 1042 | 3 1318 | 3 5873 | 3 (5 rows) -- when a view is replaced by actual query it still fails SELECT l_suppkey, count(*) FROM (SELECT l_suppkey, l_shipdate, count(*) FROM (SELECT * FROM lineitem_hash_part WHERE l_shipmode = 'AIR') asi GROUP BY l_suppkey, l_shipdate) supps GROUP BY l_suppkey ORDER BY 2 DESC, 1 LIMIT 5; ERROR: cannot perform distributed planning on this query DETAIL: Subqueries without group by clause are not supported yet -- repartition query on view with single table subquery CREATE VIEW supp_count_view AS SELECT * FROM (SELECT l_suppkey, count(*) FROM lineitem_hash_part GROUP BY 1) s1; SELECT * FROM supp_count_view ORDER BY 2 DESC, 1 LIMIT 10; l_suppkey | count -----------+------- 6104 | 8 1868 | 6 5532 | 6 5849 | 6 6169 | 6 6669 | 6 6692 | 6 7703 | 6 7869 | 6 8426 | 6 (10 rows) SET citus.task_executor_type to DEFAULT; -- create a view with aggregate CREATE VIEW lineitems_by_shipping_method AS SELECT l_shipmode, count(*) as cnt FROM lineitem_hash_part GROUP BY 1; -- following will fail due to non GROUP BY of partition key SELECT * FROM lineitems_by_shipping_method; ERROR: Unrecognized range table id 1 -- create a view with group by on partition column CREATE VIEW lineitems_by_orderkey AS SELECT l_orderkey, count(*) FROM lineitem_hash_part GROUP BY 1; -- this should work since we're able to push down this query SELECT * FROM lineitems_by_orderkey ORDER BY 2 DESC, 1 ASC LIMIT 10; l_orderkey | count ------------+------- 7 | 7 68 | 7 129 | 7 164 | 7 194 | 7 225 | 7 226 | 7 322 | 7 326 | 7 354 | 7 (10 rows) -- it would also work since it is made router plannable SELECT * FROM lineitems_by_orderkey WHERE l_orderkey = 100; l_orderkey | count ------------+------- 100 | 5 (1 row) DROP TABLE temp_lineitem CASCADE; DROP VIEW supp_count_view; DROP VIEW lineitems_by_orderkey; DROP VIEW lineitems_by_shipping_method; DROP VIEW air_shipped_lineitems; DROP VIEW priority_lineitem; DROP VIEW priority_orders; -- new tests for real time use case including views and subqueries -- create view to display recent user who has an activity after a timestamp CREATE VIEW recent_users AS SELECT user_id, max(time) as lastseen FROM users_table GROUP BY user_id HAVING max(time) > '2014-01-21 05:45:49.978738'::timestamp order by 2 DESC; SELECT * FROM recent_users; user_id | lastseen ---------+--------------------------------- 87 | Tue Jan 21 05:53:51.866813 2014 50 | Tue Jan 21 05:53:44.251016 2014 74 | Tue Jan 21 05:54:04.837808 2014 6 | Tue Jan 21 05:57:47.118755 2014 71 | Tue Jan 21 05:55:52.018461 2014 39 | Tue Jan 21 05:55:18.875997 2014 66 | Tue Jan 21 05:51:31.681997 2014 100 | Tue Jan 21 05:49:04.953009 2014 46 | Tue Jan 21 05:49:00.229807 2014 86 | Tue Jan 21 05:48:54.381334 2014 13 | Tue Jan 21 05:48:45.418146 2014 90 | Tue Jan 21 05:48:25.027491 2014 58 | Tue Jan 21 05:47:30.418553 2014 44 | Tue Jan 21 05:47:01.104523 2014 (14 rows) -- create a view for recent_events CREATE VIEW recent_events AS SELECT user_id, time FROM events_table WHERE time > '2014-01-20 01:45:49.978738'::timestamp; SELECT count(*) FROM recent_events; count ------- 1105 (1 row) -- count number of events of recent_users SELECT count(*) FROM recent_users ru JOIN events_table et ON (ru.user_id = et.user_id); count ------- 1336 (1 row) -- count number of events of per recent users order by count SELECT ru.user_id, count(*) FROM recent_users ru JOIN events_table et ON (ru.user_id = et.user_id) GROUP BY ru.user_id ORDER BY 2 DESC, 1; user_id | count ---------+------- 13 | 118 44 | 109 90 | 109 87 | 105 46 | 103 86 | 100 66 | 98 39 | 96 71 | 95 74 | 93 6 | 89 58 | 87 50 | 79 100 | 55 (14 rows) -- the same query with a left join however, it would still generate the same result SELECT ru.user_id, count(*) FROM recent_users ru LEFT JOIN events_table et ON (ru.user_id = et.user_id) GROUP BY ru.user_id ORDER BY 2 DESC, 1; user_id | count ---------+------- 13 | 118 44 | 109 90 | 109 87 | 105 46 | 103 86 | 100 66 | 98 39 | 96 71 | 95 74 | 93 6 | 89 58 | 87 50 | 79 100 | 55 (14 rows) -- query wrapped inside a subquery, it needs another top level order by SELECT * FROM (SELECT ru.user_id, count(*) FROM recent_users ru JOIN events_table et ON (ru.user_id = et.user_id) GROUP BY ru.user_id ORDER BY 2 DESC, 1) s1 ORDER BY 2 DESC, 1; user_id | count ---------+------- 13 | 118 44 | 109 90 | 109 87 | 105 46 | 103 86 | 100 66 | 98 39 | 96 71 | 95 74 | 93 6 | 89 58 | 87 50 | 79 100 | 55 (14 rows) -- non-partition key joins are not supported inside subquery SELECT * FROM (SELECT ru.user_id, count(*) FROM recent_users ru JOIN events_table et ON (ru.user_id = et.event_type) GROUP BY ru.user_id ORDER BY 2 DESC, 1) s1 ORDER BY 2 DESC, 1; ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. -- join between views -- recent users who has an event in recent events SELECT ru.user_id FROM recent_users ru JOIN recent_events re USING(user_id) GROUP BY ru.user_id ORDER BY ru.user_id; user_id --------- 6 13 39 44 46 50 58 66 71 74 86 87 90 100 (14 rows) -- outer join inside a subquery -- recent_events who are not done by recent users SELECT count(*) FROM ( SELECT re.*, ru.user_id AS recent_user FROM recent_events re LEFT JOIN recent_users ru USING(user_id)) reu WHERE recent_user IS NULL; count ------- 957 (1 row) -- same query with anti-join SELECT count(*) FROM recent_events re LEFT JOIN recent_users ru ON(ru.user_id = re.user_id) WHERE ru.user_id IS NULL; count ------- 957 (1 row) -- join between view and table -- users who has recent activity and they have an entry with value_1 is less than 15 SELECT ut.* FROM recent_users ru JOIN users_table ut USING (user_id) WHERE ut.value_1 < 15 ORDER BY 1,2; user_id | time | value_1 | value_2 | value_3 | value_4 ---------+---------------------------------+---------+---------+---------+--------- 6 | Mon Jan 13 05:30:08.289267 2014 | 12 | 140 | 618 | 6 | Thu Jan 16 15:17:16.779695 2014 | 6 | 978 | 430 | 6 | Sun Jan 19 06:09:39.900888 2014 | 3 | 908 | 688 | 13 | Sun Jan 19 22:09:26.256209 2014 | 2 | 755 | 584 | 39 | Wed Jan 15 05:46:51.48765 2014 | 14 | 657 | 137 | 39 | Sun Jan 19 11:26:47.45937 2014 | 12 | 118 | 165 | 44 | Wed Jan 15 14:23:52.532426 2014 | 8 | 204 | 735 | 44 | Sun Jan 19 05:53:34.829093 2014 | 4 | 758 | 205 | 46 | Mon Jan 13 20:39:11.211169 2014 | 0 | 235 | 475 | 46 | Wed Jan 15 09:14:57.471944 2014 | 2 | 407 | 664 | 50 | Sat Jan 11 11:07:13.089216 2014 | 6 | 292 | 425 | 58 | Sun Jan 19 22:36:14.795396 2014 | 2 | 86 | 311 | 66 | Tue Jan 14 20:16:31.219213 2014 | 14 | 347 | 655 | 74 | Tue Jan 21 01:38:39.570986 2014 | 9 | 334 | 642 | 86 | Sun Jan 19 06:18:51.466578 2014 | 14 | 712 | 490 | 87 | Sat Jan 11 20:46:28.439073 2014 | 2 | 528 | 311 | 90 | Sun Jan 12 21:37:30.778206 2014 | 11 | 458 | 377 | 100 | Sun Jan 19 22:32:08.284043 2014 | 2 | 384 | 149 | (18 rows) -- determine if a recent user has done a given event type or not SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event FROM recent_users ru LEFT JOIN events_table et ON(ru.user_id = et.user_id AND et.event_type = 625) ORDER BY 2 DESC, 1; user_id | done_event ---------+------------ 6 | YES 13 | NO 39 | NO 44 | NO 46 | NO 50 | NO 58 | NO 66 | NO 71 | NO 74 | NO 86 | NO 87 | NO 90 | NO 100 | NO (14 rows) -- view vs table join wrapped inside a subquery SELECT * FROM (SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event FROM recent_users ru LEFT JOIN events_table et ON(ru.user_id = et.user_id AND et.event_type = 625) ) s1 ORDER BY 2 DESC, 1; user_id | done_event ---------+------------ 6 | YES 13 | NO 39 | NO 44 | NO 46 | NO 50 | NO 58 | NO 66 | NO 71 | NO 74 | NO 86 | NO 87 | NO 90 | NO 100 | NO (14 rows) -- event vs table non-partition-key join is not supported SELECT * FROM (SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event FROM recent_users ru LEFT JOIN events_table et ON(ru.user_id = et.event_type) ) s1 ORDER BY 2 DESC, 1; ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. -- create a select only view CREATE VIEW selected_users AS SELECT * FROM users_table WHERE value_1 >= 120 and value_1 <150; CREATE VIEW recent_selected_users AS SELECT su.* FROM selected_users su JOIN recent_users ru USING(user_id); SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER BY 1; user_id --------- 6 13 39 44 46 50 58 66 71 74 86 90 (12 rows) -- this would be supported when we implement where partition_key in (subquery) support SELECT et.user_id, et.time FROM events_table et WHERE et.user_id IN (SELECT user_id FROM recent_selected_users) GROUP BY 1,2 ORDER BY 1 DESC,2 DESC LIMIT 5; user_id | time ---------+--------------------------------- 90 | Tue Jan 21 02:50:05.379732 2014 90 | Tue Jan 21 00:08:33.911898 2014 90 | Mon Jan 20 22:25:39.21906 2014 90 | Mon Jan 20 21:11:10.814326 2014 90 | Mon Jan 20 19:16:33.359257 2014 (5 rows) -- it is supported when it is a router query SELECT count(*) FROM events_table et WHERE et.user_id IN (SELECT user_id FROM recent_selected_users WHERE user_id = 90); count ------- 109 (1 row) -- expected this to work but it did not (SELECT user_id FROM recent_users) UNION (SELECT user_id FROM selected_users); ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT HINT: Consider using an equality filter on the distributed table's partition column. -- wrapping it inside a SELECT * works SELECT * FROM ( (SELECT user_id FROM recent_users) UNION (SELECT user_id FROM selected_users) ) u WHERE user_id < 15 AND user_id > 10 ORDER BY user_id; user_id --------- 11 12 13 14 (4 rows) -- union all also works for views SELECT * FROM ( (SELECT user_id FROM recent_users) UNION ALL (SELECT user_id FROM selected_users) ) u WHERE user_id < 15 AND user_id > 10 ORDER BY user_id; user_id --------- 11 11 11 12 12 12 12 12 12 13 13 13 13 13 14 (15 rows) SELECT count(*) FROM ( (SELECT user_id FROM recent_users) UNION (SELECT user_id FROM selected_users) ) u WHERE user_id < 15 AND user_id > 10; count ------- 4 (1 row) -- expected this to work but it does not SELECT count(*) FROM ( (SELECT user_id FROM recent_users) UNION ALL (SELECT user_id FROM selected_users) ) u WHERE user_id < 15 AND user_id > 10; ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list. -- expand view definitions and re-run last 2 queries SELECT count(*) FROM ( (SELECT user_id FROM (SELECT user_id, max(time) as lastseen FROM users_table GROUP BY user_id HAVING max(time) > '2014-01-21 05:45:49.978738'::timestamp order by 2 DESC) aa ) UNION (SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 120 and value_1 <150) bb) ) u WHERE user_id < 15 AND user_id > 10; count ------- 4 (1 row) SELECT count(*) FROM ( (SELECT user_id FROM (SELECT user_id, max(time) as lastseen FROM users_table GROUP BY user_id HAVING max(time) > '2014-01-21 05:45:49.978738'::timestamp order by 2 DESC) aa ) UNION ALL (SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 120 and value_1 <150) bb) ) u WHERE user_id < 15 AND user_id > 10; ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list. -- test distinct -- distinct is supported if it is on a partition key CREATE VIEW distinct_user_with_value_1_15 AS SELECT DISTINCT user_id FROM users_table WHERE value_1 = 15; SELECT * FROM distinct_user_with_value_1_15 ORDER BY user_id; user_id --------- 7 8 35 42 46 53 70 82 87 88 96 (11 rows) -- distinct is not supported if it is on a non-partition key CREATE VIEW distinct_value_1 AS SELECT DISTINCT value_1 FROM users_table WHERE value_2 = 15; SELECT * FROM distinct_value_1; ERROR: cannot perform distributed planning on this query DETAIL: Subqueries without group by clause are not supported yet -- CTEs are not supported even if they are on views CREATE VIEW cte_view_1 AS WITH c1 AS (SELECT * FROM users_table WHERE value_1 = 15) SELECT * FROM c1 WHERE value_2 < 500; SELECT * FROM cte_view_1; ERROR: cannot push down this subquery DETAIL: Table expressions other than simple relations and subqueries are currently unsupported -- this is single shard query but still not supported since it has view + cte -- router planner can't detect it SELECT * FROM cte_view_1 WHERE user_id = 8; ERROR: cannot push down this subquery DETAIL: Table expressions other than simple relations and subqueries are currently unsupported -- if CTE itself prunes down to a single shard than the view is supported (router plannable) CREATE VIEW cte_view_2 AS WITH c1 AS (SELECT * FROM users_table WHERE user_id = 8) SELECT * FROM c1 WHERE value_1 = 15; SELECT * FROM cte_view_2; user_id | time | value_1 | value_2 | value_3 | value_4 ---------+---------------------------------+---------+---------+---------+--------- 8 | Tue Jan 21 00:52:36.967785 2014 | 15 | 10 | 868 | (1 row) CREATE VIEW router_view AS SELECT * FROM users_table WHERE user_id = 2; -- router plannable SELECT user_id FROM router_view GROUP BY 1; user_id --------- 2 (1 row) -- join a router view SELECT * FROM (SELECT user_id FROM router_view GROUP BY 1) rv JOIN recent_events USING (user_id) ORDER BY 2 LIMIT 3; user_id | time ---------+--------------------------------- 2 | Mon Jan 20 02:02:03.208351 2014 2 | Mon Jan 20 02:34:14.54301 2014 2 | Mon Jan 20 03:16:38.418772 2014 (3 rows) SELECT * FROM (SELECT user_id FROM router_view GROUP BY 1) rv JOIN (SELECT * FROM recent_events) re USING (user_id) ORDER BY 2 LIMIT 3; user_id | time ---------+--------------------------------- 2 | Mon Jan 20 02:02:03.208351 2014 2 | Mon Jan 20 02:34:14.54301 2014 2 | Mon Jan 20 03:16:38.418772 2014 (3 rows) -- views with limits CREATE VIEW recent_10_users AS SELECT user_id, max(time) as lastseen FROM users_table GROUP BY user_id ORDER BY lastseen DESC LIMIT 10; -- this is not supported since it has limit in it and subquery_pushdown is not set SELECT * FROM recent_10_users; ERROR: cannot perform distributed planning on this query DETAIL: Subqueries with limit are not supported yet SET citus.subquery_pushdown to ON; -- still not supported since outer query does not have limit -- it shows a different (subquery with single relation) error message SELECT * FROM recent_10_users; ERROR: cannot perform distributed planning on this query DETAIL: Subqueries with limit are not supported yet -- now it displays more correct error message SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id); ERROR: cannot push down this subquery DETAIL: Limit in subquery without limit in the outermost query is unsupported -- now both are supported when there is a limit on the outer most query SELECT * FROM recent_10_users ORDER BY lastseen DESC LIMIT 10; user_id | lastseen ---------+--------------------------------- 6 | Tue Jan 21 05:57:47.118755 2014 71 | Tue Jan 21 05:55:52.018461 2014 39 | Tue Jan 21 05:55:18.875997 2014 74 | Tue Jan 21 05:54:04.837808 2014 87 | Tue Jan 21 05:53:51.866813 2014 50 | Tue Jan 21 05:53:44.251016 2014 66 | Tue Jan 21 05:51:31.681997 2014 100 | Tue Jan 21 05:49:04.953009 2014 46 | Tue Jan 21 05:49:00.229807 2014 86 | Tue Jan 21 05:48:54.381334 2014 (10 rows) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10; user_id | time | event_type | value_2 | value_3 | value_4 ---------+---------------------------------+------------+---------+---------+--------- 65 | Tue Jan 21 05:56:52.624231 2014 | 241 | 30 | 543 | 42 | Tue Jan 21 05:46:35.158342 2014 | 761 | 877 | 335 | 54 | Tue Jan 21 05:46:19.103645 2014 | 595 | 477 | 996 | 44 | Tue Jan 21 05:43:00.838945 2014 | 682 | 641 | 448 | 27 | Tue Jan 21 05:34:10.935865 2014 | 912 | 605 | 989 | 61 | Tue Jan 21 05:25:27.452065 2014 | 392 | 472 | 925 | 19 | Tue Jan 21 05:23:09.26298 2014 | 202 | 888 | 640 | 65 | Tue Jan 21 05:22:56.725329 2014 | 519 | 457 | 259 | 27 | Tue Jan 21 05:19:14.38026 2014 | 19 | 19 | 205 | 11 | Tue Jan 21 05:15:14.879531 2014 | 459 | 545 | 80 | (10 rows) RESET citus.subquery_pushdown; -- explain tests EXPLAIN (COSTS FALSE) SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER BY 1; QUERY PLAN --------------------------------------------------------------------------------------------------------------------------------------------------------- Sort Sort Key: remote_scan.user_id -> HashAggregate Group Key: remote_scan.user_id -> Custom Scan (Citus Real-Time) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=57637 dbname=regression -> HashAggregate Group Key: users_table.user_id -> Hash Join Hash Cond: (users_table.user_id = ru.user_id) -> Bitmap Heap Scan on users_table_1400000 users_table Recheck Cond: ((value_1 >= 120) AND (value_1 < 150)) -> Bitmap Index Scan on is_index3_1400000 Index Cond: ((value_1 >= 120) AND (value_1 < 150)) -> Hash -> Subquery Scan on ru -> Sort Sort Key: (max(users_table_1."time")) DESC -> HashAggregate Group Key: users_table_1.user_id Filter: (max(users_table_1."time") > '2014-01-21 05:45:49.978738'::timestamp without time zone) -> Seq Scan on users_table_1400000 users_table_1 (25 rows) EXPLAIN (COSTS FALSE) SELECT * FROM ( (SELECT user_id FROM recent_users) UNION (SELECT user_id FROM selected_users) ) u WHERE user_id < 15 AND user_id > 10 ORDER BY user_id; QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------- Sort Sort Key: remote_scan.user_id -> Custom Scan (Citus Real-Time) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=57637 dbname=regression -> Unique -> Sort Sort Key: recent_users.user_id -> Append -> Subquery Scan on recent_users -> Sort Sort Key: (max(users_table."time")) DESC -> GroupAggregate Group Key: users_table.user_id Filter: (max(users_table."time") > '2014-01-21 05:45:49.978738'::timestamp without time zone) -> Index Scan using is_index1_1400000 on users_table_1400000 users_table Index Cond: ((user_id < 15) AND (user_id > 10)) -> Index Scan using is_index1_1400000 on users_table_1400000 users_table_1 Index Cond: ((user_id < 15) AND (user_id > 10)) Filter: ((value_1 >= 120) AND (value_1 < 150)) (22 rows) EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10; ERROR: cannot push down this subquery DETAIL: Limit in subquery is currently unsupported SET citus.subquery_pushdown to ON; EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10; QUERY PLAN --------------------------------------------------------------------------------------------------------------------- Limit -> Sort Sort Key: remote_scan."time" DESC -> Custom Scan (Citus Real-Time) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=57637 dbname=regression -> Limit -> Sort Sort Key: et."time" DESC -> Hash Join Hash Cond: (et.user_id = recent_10_users.user_id) -> Seq Scan on events_table_1400004 et -> Hash -> Subquery Scan on recent_10_users -> Limit -> Sort Sort Key: (max(users_table."time")) DESC -> HashAggregate Group Key: users_table.user_id -> Seq Scan on users_table_1400000 users_table (22 rows) RESET citus.subquery_pushdown; DROP VIEW recent_10_users; DROP VIEW router_view; DROP VIEW cte_view_2; DROP VIEW cte_view_1; DROP VIEW distinct_value_1; DROP VIEW distinct_user_with_value_1_15; DROP VIEW recent_selected_users; DROP VIEW selected_users; DROP VIEW recent_events; DROP VIEW recent_users;