citus/src/test/regress/expected/multi_view.out

907 lines
32 KiB
Plaintext

--
-- MULTI_VIEW
--
-- This file contains test cases for view support. It verifies various
-- Citus features: simple selects, aggregates, joins, outer joins
-- router queries, single row inserts, multi row inserts via insert
-- into select, multi row insert via copy commands.
SELECT count(*) FROM lineitem_hash_part;
count
-------
12000
(1 row)
SELECT count(*) FROM orders_hash_part;
count
-------
2984
(1 row)
-- create a view for priority orders
CREATE VIEW priority_orders AS SELECT * FROM orders_hash_part WHERE o_orderpriority < '3-MEDIUM';
-- aggregate pushdown
SELECT o_orderpriority, count(*) FROM priority_orders GROUP BY 1 ORDER BY 2, 1;
o_orderpriority | count
-----------------+-------
2-HIGH | 593
1-URGENT | 603
(2 rows)
SELECT o_orderpriority, count(*) FROM orders_hash_part WHERE o_orderpriority < '3-MEDIUM' GROUP BY 1 ORDER BY 2,1;
o_orderpriority | count
-----------------+-------
2-HIGH | 593
1-URGENT | 603
(2 rows)
-- filters
SELECT o_orderpriority, count(*) as all, count(*) FILTER (WHERE o_orderstatus ='F') as fullfilled FROM priority_orders GROUP BY 1 ORDER BY 2, 1;
o_orderpriority | all | fullfilled
-----------------+-----+------------
2-HIGH | 593 | 271
1-URGENT | 603 | 280
(2 rows)
-- having
SELECT o_orderdate, count(*) from priority_orders group by 1 having (count(*) > 3) order by 2 desc, 1 desc;
o_orderdate | count
-------------+-------
08-20-1996 | 5
10-10-1994 | 4
05-05-1994 | 4
04-07-1994 | 4
03-17-1993 | 4
(5 rows)
-- having with filters
SELECT o_orderdate, count(*) as all, count(*) FILTER(WHERE o_orderstatus = 'F') from priority_orders group by 1 having (count(*) > 3) order by 2 desc, 1 desc;
o_orderdate | all | count
-------------+-----+-------
08-20-1996 | 5 | 0
10-10-1994 | 4 | 4
05-05-1994 | 4 | 4
04-07-1994 | 4 | 4
03-17-1993 | 4 | 4
(5 rows)
-- limit
SELECT o_orderkey, o_totalprice from orders_hash_part order by 2 desc, 1 asc limit 5 ;
o_orderkey | o_totalprice
------------+--------------
4421 | 401055.62
10209 | 400191.77
11142 | 395039.05
14179 | 384265.43
11296 | 378166.33
(5 rows)
SELECT o_orderkey, o_totalprice from priority_orders order by 2 desc, 1 asc limit 1 ;
o_orderkey | o_totalprice
------------+--------------
14179 | 384265.43
(1 row)
CREATE VIEW priority_lineitem AS SELECT li.* FROM lineitem_hash_part li JOIN priority_orders ON (l_orderkey = o_orderkey);
SELECT l_orderkey, count(*) FROM priority_lineitem GROUP BY 1 ORDER BY 2 DESC, 1 LIMIT 5;
l_orderkey | count
------------+-------
7 | 7
225 | 7
226 | 7
322 | 7
326 | 7
(5 rows)
CREATE VIEW air_shipped_lineitems AS SELECT * FROM lineitem_hash_part WHERE l_shipmode = 'AIR';
-- join between view and table
SELECT count(*) FROM orders_hash_part join air_shipped_lineitems ON (o_orderkey = l_orderkey);
count
-------
1706
(1 row)
-- join between views
SELECT count(*) FROM priority_orders join air_shipped_lineitems ON (o_orderkey = l_orderkey);
count
-------
700
(1 row)
-- count distinct on partition column is not supported
SELECT count(distinct o_orderkey) FROM priority_orders join air_shipped_lineitems ON (o_orderkey = l_orderkey);
ERROR: cannot compute aggregate (distinct)
DETAIL: table partitioning is unsuitable for aggregate (distinct)
HINT: You can load the hll extension from contrib packages and enable distinct approximations.
-- count distinct on partition column is supported on router queries
SELECT count(distinct o_orderkey) FROM priority_orders join air_shipped_lineitems
ON (o_orderkey = l_orderkey)
WHERE (o_orderkey = 231);
count
-------
1
(1 row)
-- select distinct on router joins of views also works
SELECT distinct(o_orderkey) FROM priority_orders join air_shipped_lineitems
ON (o_orderkey = l_orderkey)
WHERE (o_orderkey = 231);
o_orderkey
------------
231
(1 row)
-- left join support depends on flattening of the query
-- following query fails since the inner part is kept as subquery
SELECT * FROM priority_orders left join air_shipped_lineitems ON (o_orderkey = l_orderkey);
ERROR: cannot perform distributed planning on this query
DETAIL: Subqueries in outer joins are not supported
-- however, this works
SELECT count(*) FROM priority_orders left join lineitem_hash_part ON (o_orderkey = l_orderkey) WHERE l_shipmode ='AIR';
count
-------
700
(1 row)
-- view at the inner side of is not supported
SELECT count(*) FROM priority_orders right join lineitem_hash_part ON (o_orderkey = l_orderkey) WHERE l_shipmode ='AIR';
ERROR: cannot perform distributed planning on this query
DETAIL: Subqueries in outer joins are not supported
-- but view at the outer side is. This is essentially the same as a left join with arguments reversed.
SELECT count(*) FROM lineitem_hash_part right join priority_orders ON (o_orderkey = l_orderkey) WHERE l_shipmode ='AIR';
count
-------
700
(1 row)
-- left join on router query is supported
SELECT o_orderkey, l_linenumber FROM priority_orders left join air_shipped_lineitems ON (o_orderkey = l_orderkey)
WHERE o_orderkey = 2;
o_orderkey | l_linenumber
------------+--------------
2 |
(1 row)
-- repartition query on view join
-- it passes planning, fails at execution stage
SELECT * FROM priority_orders JOIN air_shipped_lineitems ON (o_custkey = l_suppkey);
ERROR: cannot use real time executor with repartition jobs
HINT: Set citus.task_executor_type to "task-tracker".
SET citus.task_executor_type to "task-tracker";
SELECT count(*) FROM priority_orders JOIN air_shipped_lineitems ON (o_custkey = l_suppkey);
count
-------
192
(1 row)
SET citus.task_executor_type to DEFAULT;
-- insert into... select works with views
CREATE TABLE temp_lineitem(LIKE lineitem_hash_part);
SELECT create_distributed_table('temp_lineitem', 'l_orderkey', 'hash', 'lineitem_hash_part');
create_distributed_table
--------------------------
(1 row)
INSERT INTO temp_lineitem SELECT * FROM air_shipped_lineitems;
SELECT count(*) FROM temp_lineitem;
count
-------
1706
(1 row)
-- following is a where false query, should not be inserting anything
INSERT INTO temp_lineitem SELECT * FROM air_shipped_lineitems WHERE l_shipmode = 'MAIL';
SELECT count(*) FROM temp_lineitem;
count
-------
1706
(1 row)
SET citus.task_executor_type to "task-tracker";
-- single view repartition subqueries are not supported
SELECT l_suppkey, count(*) FROM
(SELECT l_suppkey, l_shipdate, count(*)
FROM air_shipped_lineitems GROUP BY l_suppkey, l_shipdate) supps
GROUP BY l_suppkey ORDER BY 2 DESC, 1 LIMIT 5;
ERROR: cannot perform distributed planning on this query
DETAIL: Subqueries without group by clause are not supported yet
-- logically same query without a view works fine
SELECT l_suppkey, count(*) FROM
(SELECT l_suppkey, l_shipdate, count(*)
FROM lineitem_hash_part WHERE l_shipmode = 'AIR' GROUP BY l_suppkey, l_shipdate) supps
GROUP BY l_suppkey ORDER BY 2 DESC, 1 LIMIT 5;
l_suppkey | count
-----------+-------
7680 | 4
160 | 3
1042 | 3
1318 | 3
5873 | 3
(5 rows)
-- when a view is replaced by actual query it still fails
SELECT l_suppkey, count(*) FROM
(SELECT l_suppkey, l_shipdate, count(*)
FROM (SELECT * FROM lineitem_hash_part WHERE l_shipmode = 'AIR') asi
GROUP BY l_suppkey, l_shipdate) supps
GROUP BY l_suppkey ORDER BY 2 DESC, 1 LIMIT 5;
ERROR: cannot perform distributed planning on this query
DETAIL: Subqueries without group by clause are not supported yet
-- repartition query on view with single table subquery
CREATE VIEW supp_count_view AS SELECT * FROM (SELECT l_suppkey, count(*) FROM lineitem_hash_part GROUP BY 1) s1;
SELECT * FROM supp_count_view ORDER BY 2 DESC, 1 LIMIT 10;
l_suppkey | count
-----------+-------
6104 | 8
1868 | 6
5532 | 6
5849 | 6
6169 | 6
6669 | 6
6692 | 6
7703 | 6
7869 | 6
8426 | 6
(10 rows)
SET citus.task_executor_type to DEFAULT;
-- create a view with aggregate
CREATE VIEW lineitems_by_shipping_method AS
SELECT l_shipmode, count(*) as cnt FROM lineitem_hash_part GROUP BY 1;
-- following will fail due to non GROUP BY of partition key
SELECT * FROM lineitems_by_shipping_method;
ERROR: Unrecognized range table id 1
-- create a view with group by on partition column
CREATE VIEW lineitems_by_orderkey AS
SELECT
l_orderkey, count(*)
FROM
lineitem_hash_part
GROUP BY 1;
-- this should work since we're able to push down this query
SELECT * FROM lineitems_by_orderkey ORDER BY 2 DESC, 1 ASC LIMIT 10;
l_orderkey | count
------------+-------
7 | 7
68 | 7
129 | 7
164 | 7
194 | 7
225 | 7
226 | 7
322 | 7
326 | 7
354 | 7
(10 rows)
-- it would also work since it is made router plannable
SELECT * FROM lineitems_by_orderkey WHERE l_orderkey = 100;
l_orderkey | count
------------+-------
100 | 5
(1 row)
DROP TABLE temp_lineitem CASCADE;
DROP VIEW supp_count_view;
DROP VIEW lineitems_by_orderkey;
DROP VIEW lineitems_by_shipping_method;
DROP VIEW air_shipped_lineitems;
DROP VIEW priority_lineitem;
DROP VIEW priority_orders;
-- new tests for real time use case including views and subqueries
-- create view to display recent user who has an activity after a timestamp
CREATE VIEW recent_users AS
SELECT user_id, max(time) as lastseen FROM users_table
GROUP BY user_id
HAVING max(time) > '2014-01-21 05:45:49.978738'::timestamp order by 2 DESC;
SELECT * FROM recent_users;
user_id | lastseen
---------+---------------------------------
87 | Tue Jan 21 05:53:51.866813 2014
50 | Tue Jan 21 05:53:44.251016 2014
74 | Tue Jan 21 05:54:04.837808 2014
6 | Tue Jan 21 05:57:47.118755 2014
71 | Tue Jan 21 05:55:52.018461 2014
39 | Tue Jan 21 05:55:18.875997 2014
66 | Tue Jan 21 05:51:31.681997 2014
100 | Tue Jan 21 05:49:04.953009 2014
46 | Tue Jan 21 05:49:00.229807 2014
86 | Tue Jan 21 05:48:54.381334 2014
13 | Tue Jan 21 05:48:45.418146 2014
90 | Tue Jan 21 05:48:25.027491 2014
58 | Tue Jan 21 05:47:30.418553 2014
44 | Tue Jan 21 05:47:01.104523 2014
(14 rows)
-- create a view for recent_events
CREATE VIEW recent_events AS
SELECT user_id, time FROM events_table
WHERE time > '2014-01-20 01:45:49.978738'::timestamp;
SELECT count(*) FROM recent_events;
count
-------
1105
(1 row)
-- count number of events of recent_users
SELECT count(*) FROM recent_users ru JOIN events_table et ON (ru.user_id = et.user_id);
count
-------
1336
(1 row)
-- count number of events of per recent users order by count
SELECT ru.user_id, count(*)
FROM recent_users ru
JOIN events_table et
ON (ru.user_id = et.user_id)
GROUP BY ru.user_id
ORDER BY 2 DESC, 1;
user_id | count
---------+-------
13 | 118
44 | 109
90 | 109
87 | 105
46 | 103
86 | 100
66 | 98
39 | 96
71 | 95
74 | 93
6 | 89
58 | 87
50 | 79
100 | 55
(14 rows)
-- the same query with a left join however, it would still generate the same result
SELECT ru.user_id, count(*)
FROM recent_users ru
LEFT JOIN events_table et
ON (ru.user_id = et.user_id)
GROUP BY ru.user_id
ORDER BY 2 DESC, 1;
user_id | count
---------+-------
13 | 118
44 | 109
90 | 109
87 | 105
46 | 103
86 | 100
66 | 98
39 | 96
71 | 95
74 | 93
6 | 89
58 | 87
50 | 79
100 | 55
(14 rows)
-- query wrapped inside a subquery, it needs another top level order by
SELECT * FROM
(SELECT ru.user_id, count(*)
FROM recent_users ru
JOIN events_table et
ON (ru.user_id = et.user_id)
GROUP BY ru.user_id
ORDER BY 2 DESC, 1) s1
ORDER BY 2 DESC, 1;
user_id | count
---------+-------
13 | 118
44 | 109
90 | 109
87 | 105
46 | 103
86 | 100
66 | 98
39 | 96
71 | 95
74 | 93
6 | 89
58 | 87
50 | 79
100 | 55
(14 rows)
-- non-partition key joins are not supported inside subquery
SELECT * FROM
(SELECT ru.user_id, count(*)
FROM recent_users ru
JOIN events_table et
ON (ru.user_id = et.event_type)
GROUP BY ru.user_id
ORDER BY 2 DESC, 1) s1
ORDER BY 2 DESC, 1;
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
-- join between views
-- recent users who has an event in recent events
SELECT ru.user_id FROM recent_users ru JOIN recent_events re USING(user_id) GROUP BY ru.user_id ORDER BY ru.user_id;
user_id
---------
6
13
39
44
46
50
58
66
71
74
86
87
90
100
(14 rows)
-- outer join inside a subquery
-- recent_events who are not done by recent users
SELECT count(*) FROM (
SELECT re.*, ru.user_id AS recent_user
FROM recent_events re LEFT JOIN recent_users ru USING(user_id)) reu
WHERE recent_user IS NULL;
count
-------
957
(1 row)
-- same query with anti-join
SELECT count(*)
FROM recent_events re LEFT JOIN recent_users ru ON(ru.user_id = re.user_id)
WHERE ru.user_id IS NULL;
count
-------
957
(1 row)
-- join between view and table
-- users who has recent activity and they have an entry with value_1 is less than 15
SELECT ut.* FROM recent_users ru JOIN users_table ut USING (user_id) WHERE ut.value_1 < 15 ORDER BY 1,2;
user_id | time | value_1 | value_2 | value_3 | value_4
---------+---------------------------------+---------+---------+---------+---------
6 | Mon Jan 13 05:30:08.289267 2014 | 12 | 140 | 618 |
6 | Thu Jan 16 15:17:16.779695 2014 | 6 | 978 | 430 |
6 | Sun Jan 19 06:09:39.900888 2014 | 3 | 908 | 688 |
13 | Sun Jan 19 22:09:26.256209 2014 | 2 | 755 | 584 |
39 | Wed Jan 15 05:46:51.48765 2014 | 14 | 657 | 137 |
39 | Sun Jan 19 11:26:47.45937 2014 | 12 | 118 | 165 |
44 | Wed Jan 15 14:23:52.532426 2014 | 8 | 204 | 735 |
44 | Sun Jan 19 05:53:34.829093 2014 | 4 | 758 | 205 |
46 | Mon Jan 13 20:39:11.211169 2014 | 0 | 235 | 475 |
46 | Wed Jan 15 09:14:57.471944 2014 | 2 | 407 | 664 |
50 | Sat Jan 11 11:07:13.089216 2014 | 6 | 292 | 425 |
58 | Sun Jan 19 22:36:14.795396 2014 | 2 | 86 | 311 |
66 | Tue Jan 14 20:16:31.219213 2014 | 14 | 347 | 655 |
74 | Tue Jan 21 01:38:39.570986 2014 | 9 | 334 | 642 |
86 | Sun Jan 19 06:18:51.466578 2014 | 14 | 712 | 490 |
87 | Sat Jan 11 20:46:28.439073 2014 | 2 | 528 | 311 |
90 | Sun Jan 12 21:37:30.778206 2014 | 11 | 458 | 377 |
100 | Sun Jan 19 22:32:08.284043 2014 | 2 | 384 | 149 |
(18 rows)
-- determine if a recent user has done a given event type or not
SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event
FROM recent_users ru
LEFT JOIN events_table et
ON(ru.user_id = et.user_id AND et.event_type = 625)
ORDER BY 2 DESC, 1;
user_id | done_event
---------+------------
6 | YES
13 | NO
39 | NO
44 | NO
46 | NO
50 | NO
58 | NO
66 | NO
71 | NO
74 | NO
86 | NO
87 | NO
90 | NO
100 | NO
(14 rows)
-- view vs table join wrapped inside a subquery
SELECT * FROM
(SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event
FROM recent_users ru
LEFT JOIN events_table et
ON(ru.user_id = et.user_id AND et.event_type = 625)
) s1
ORDER BY 2 DESC, 1;
user_id | done_event
---------+------------
6 | YES
13 | NO
39 | NO
44 | NO
46 | NO
50 | NO
58 | NO
66 | NO
71 | NO
74 | NO
86 | NO
87 | NO
90 | NO
100 | NO
(14 rows)
-- event vs table non-partition-key join is not supported
SELECT * FROM
(SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event
FROM recent_users ru
LEFT JOIN events_table et
ON(ru.user_id = et.event_type)
) s1
ORDER BY 2 DESC, 1;
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
-- create a select only view
CREATE VIEW selected_users AS SELECT * FROM users_table WHERE value_1 >= 120 and value_1 <150;
CREATE VIEW recent_selected_users AS SELECT su.* FROM selected_users su JOIN recent_users ru USING(user_id);
SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER BY 1;
user_id
---------
6
13
39
44
46
50
58
66
71
74
86
90
(12 rows)
-- this would be supported when we implement where partition_key in (subquery) support
SELECT et.user_id, et.time FROM events_table et WHERE et.user_id IN (SELECT user_id FROM recent_selected_users) GROUP BY 1,2 ORDER BY 1 DESC,2 DESC LIMIT 5;
user_id | time
---------+---------------------------------
90 | Tue Jan 21 02:50:05.379732 2014
90 | Tue Jan 21 00:08:33.911898 2014
90 | Mon Jan 20 22:25:39.21906 2014
90 | Mon Jan 20 21:11:10.814326 2014
90 | Mon Jan 20 19:16:33.359257 2014
(5 rows)
-- it is supported when it is a router query
SELECT count(*) FROM events_table et WHERE et.user_id IN (SELECT user_id FROM recent_selected_users WHERE user_id = 90);
count
-------
109
(1 row)
-- expected this to work but it did not
(SELECT user_id FROM recent_users)
UNION
(SELECT user_id FROM selected_users);
ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT
HINT: Consider using an equality filter on the distributed table's partition column.
-- wrapping it inside a SELECT * works
SELECT *
FROM (
(SELECT user_id FROM recent_users)
UNION
(SELECT user_id FROM selected_users) ) u
WHERE user_id < 15 AND user_id > 10
ORDER BY user_id;
user_id
---------
11
12
13
14
(4 rows)
-- union all also works for views
SELECT *
FROM (
(SELECT user_id FROM recent_users)
UNION ALL
(SELECT user_id FROM selected_users) ) u
WHERE user_id < 15 AND user_id > 10
ORDER BY user_id;
user_id
---------
11
11
11
12
12
12
12
12
12
13
13
13
13
13
14
(15 rows)
SELECT count(*)
FROM (
(SELECT user_id FROM recent_users)
UNION
(SELECT user_id FROM selected_users) ) u
WHERE user_id < 15 AND user_id > 10;
count
-------
4
(1 row)
-- expected this to work but it does not
SELECT count(*)
FROM (
(SELECT user_id FROM recent_users)
UNION ALL
(SELECT user_id FROM selected_users) ) u
WHERE user_id < 15 AND user_id > 10;
ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position
DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list.
-- expand view definitions and re-run last 2 queries
SELECT count(*)
FROM (
(SELECT user_id FROM (SELECT user_id, max(time) as lastseen FROM users_table
GROUP BY user_id
HAVING max(time) > '2014-01-21 05:45:49.978738'::timestamp order by 2 DESC) aa
)
UNION
(SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 120 and value_1 <150) bb) ) u
WHERE user_id < 15 AND user_id > 10;
count
-------
4
(1 row)
SELECT count(*)
FROM (
(SELECT user_id FROM (SELECT user_id, max(time) as lastseen FROM users_table
GROUP BY user_id
HAVING max(time) > '2014-01-21 05:45:49.978738'::timestamp order by 2 DESC) aa
)
UNION ALL
(SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 120 and value_1 <150) bb) ) u
WHERE user_id < 15 AND user_id > 10;
ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position
DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list.
-- test distinct
-- distinct is supported if it is on a partition key
CREATE VIEW distinct_user_with_value_1_15 AS SELECT DISTINCT user_id FROM users_table WHERE value_1 = 15;
SELECT * FROM distinct_user_with_value_1_15 ORDER BY user_id;
user_id
---------
7
8
35
42
46
53
70
82
87
88
96
(11 rows)
-- distinct is not supported if it is on a non-partition key
CREATE VIEW distinct_value_1 AS SELECT DISTINCT value_1 FROM users_table WHERE value_2 = 15;
SELECT * FROM distinct_value_1;
ERROR: cannot perform distributed planning on this query
DETAIL: Subqueries without group by clause are not supported yet
-- CTEs are not supported even if they are on views
CREATE VIEW cte_view_1 AS
WITH c1 AS (SELECT * FROM users_table WHERE value_1 = 15) SELECT * FROM c1 WHERE value_2 < 500;
SELECT * FROM cte_view_1;
ERROR: cannot push down this subquery
DETAIL: Table expressions other than simple relations and subqueries are currently unsupported
-- this is single shard query but still not supported since it has view + cte
-- router planner can't detect it
SELECT * FROM cte_view_1 WHERE user_id = 8;
ERROR: cannot push down this subquery
DETAIL: Table expressions other than simple relations and subqueries are currently unsupported
-- if CTE itself prunes down to a single shard than the view is supported (router plannable)
CREATE VIEW cte_view_2 AS
WITH c1 AS (SELECT * FROM users_table WHERE user_id = 8) SELECT * FROM c1 WHERE value_1 = 15;
SELECT * FROM cte_view_2;
user_id | time | value_1 | value_2 | value_3 | value_4
---------+---------------------------------+---------+---------+---------+---------
8 | Tue Jan 21 00:52:36.967785 2014 | 15 | 10 | 868 |
(1 row)
CREATE VIEW router_view AS SELECT * FROM users_table WHERE user_id = 2;
-- router plannable
SELECT user_id FROM router_view GROUP BY 1;
user_id
---------
2
(1 row)
-- join a router view
SELECT * FROM (SELECT user_id FROM router_view GROUP BY 1) rv JOIN recent_events USING (user_id) ORDER BY 2 LIMIT 3;
user_id | time
---------+---------------------------------
2 | Mon Jan 20 02:02:03.208351 2014
2 | Mon Jan 20 02:34:14.54301 2014
2 | Mon Jan 20 03:16:38.418772 2014
(3 rows)
SELECT * FROM (SELECT user_id FROM router_view GROUP BY 1) rv JOIN (SELECT * FROM recent_events) re USING (user_id) ORDER BY 2 LIMIT 3;
user_id | time
---------+---------------------------------
2 | Mon Jan 20 02:02:03.208351 2014
2 | Mon Jan 20 02:34:14.54301 2014
2 | Mon Jan 20 03:16:38.418772 2014
(3 rows)
-- views with limits
CREATE VIEW recent_10_users AS
SELECT user_id, max(time) as lastseen FROM users_table
GROUP BY user_id
ORDER BY lastseen DESC
LIMIT 10;
-- this is not supported since it has limit in it and subquery_pushdown is not set
SELECT * FROM recent_10_users;
ERROR: cannot perform distributed planning on this query
DETAIL: Subqueries with limit are not supported yet
SET citus.subquery_pushdown to ON;
-- still not supported since outer query does not have limit
-- it shows a different (subquery with single relation) error message
SELECT * FROM recent_10_users;
ERROR: cannot perform distributed planning on this query
DETAIL: Subqueries with limit are not supported yet
-- now it displays more correct error message
SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id);
ERROR: cannot push down this subquery
DETAIL: Limit in subquery without limit in the outermost query is unsupported
-- now both are supported when there is a limit on the outer most query
SELECT * FROM recent_10_users ORDER BY lastseen DESC LIMIT 10;
user_id | lastseen
---------+---------------------------------
6 | Tue Jan 21 05:57:47.118755 2014
71 | Tue Jan 21 05:55:52.018461 2014
39 | Tue Jan 21 05:55:18.875997 2014
74 | Tue Jan 21 05:54:04.837808 2014
87 | Tue Jan 21 05:53:51.866813 2014
50 | Tue Jan 21 05:53:44.251016 2014
66 | Tue Jan 21 05:51:31.681997 2014
100 | Tue Jan 21 05:49:04.953009 2014
46 | Tue Jan 21 05:49:00.229807 2014
86 | Tue Jan 21 05:48:54.381334 2014
(10 rows)
SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;
user_id | time | event_type | value_2 | value_3 | value_4
---------+---------------------------------+------------+---------+---------+---------
65 | Tue Jan 21 05:56:52.624231 2014 | 241 | 30 | 543 |
42 | Tue Jan 21 05:46:35.158342 2014 | 761 | 877 | 335 |
54 | Tue Jan 21 05:46:19.103645 2014 | 595 | 477 | 996 |
44 | Tue Jan 21 05:43:00.838945 2014 | 682 | 641 | 448 |
27 | Tue Jan 21 05:34:10.935865 2014 | 912 | 605 | 989 |
61 | Tue Jan 21 05:25:27.452065 2014 | 392 | 472 | 925 |
19 | Tue Jan 21 05:23:09.26298 2014 | 202 | 888 | 640 |
65 | Tue Jan 21 05:22:56.725329 2014 | 519 | 457 | 259 |
27 | Tue Jan 21 05:19:14.38026 2014 | 19 | 19 | 205 |
11 | Tue Jan 21 05:15:14.879531 2014 | 459 | 545 | 80 |
(10 rows)
RESET citus.subquery_pushdown;
-- explain tests
EXPLAIN (COSTS FALSE) SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER BY 1;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------
Sort
Sort Key: remote_scan.user_id
-> HashAggregate
Group Key: remote_scan.user_id
-> Custom Scan (Citus Real-Time)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: users_table.user_id
-> Hash Join
Hash Cond: (users_table.user_id = ru.user_id)
-> Bitmap Heap Scan on users_table_1400000 users_table
Recheck Cond: ((value_1 >= 120) AND (value_1 < 150))
-> Bitmap Index Scan on is_index3_1400000
Index Cond: ((value_1 >= 120) AND (value_1 < 150))
-> Hash
-> Subquery Scan on ru
-> Sort
Sort Key: (max(users_table_1."time")) DESC
-> HashAggregate
Group Key: users_table_1.user_id
Filter: (max(users_table_1."time") > '2014-01-21 05:45:49.978738'::timestamp without time zone)
-> Seq Scan on users_table_1400000 users_table_1
(25 rows)
EXPLAIN (COSTS FALSE) SELECT *
FROM (
(SELECT user_id FROM recent_users)
UNION
(SELECT user_id FROM selected_users) ) u
WHERE user_id < 15 AND user_id > 10
ORDER BY user_id;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------
Sort
Sort Key: remote_scan.user_id
-> Custom Scan (Citus Real-Time)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> Unique
-> Sort
Sort Key: recent_users.user_id
-> Append
-> Subquery Scan on recent_users
-> Sort
Sort Key: (max(users_table."time")) DESC
-> GroupAggregate
Group Key: users_table.user_id
Filter: (max(users_table."time") > '2014-01-21 05:45:49.978738'::timestamp without time zone)
-> Index Scan using is_index1_1400000 on users_table_1400000 users_table
Index Cond: ((user_id < 15) AND (user_id > 10))
-> Index Scan using is_index1_1400000 on users_table_1400000 users_table_1
Index Cond: ((user_id < 15) AND (user_id > 10))
Filter: ((value_1 >= 120) AND (value_1 < 150))
(22 rows)
EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;
ERROR: cannot push down this subquery
DETAIL: Limit in subquery is currently unsupported
SET citus.subquery_pushdown to ON;
EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------
Limit
-> Sort
Sort Key: remote_scan."time" DESC
-> Custom Scan (Citus Real-Time)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> Limit
-> Sort
Sort Key: et."time" DESC
-> Hash Join
Hash Cond: (et.user_id = recent_10_users.user_id)
-> Seq Scan on events_table_1400004 et
-> Hash
-> Subquery Scan on recent_10_users
-> Limit
-> Sort
Sort Key: (max(users_table."time")) DESC
-> HashAggregate
Group Key: users_table.user_id
-> Seq Scan on users_table_1400000 users_table
(22 rows)
RESET citus.subquery_pushdown;
DROP VIEW recent_10_users;
DROP VIEW router_view;
DROP VIEW cte_view_2;
DROP VIEW cte_view_1;
DROP VIEW distinct_value_1;
DROP VIEW distinct_user_with_value_1_15;
DROP VIEW recent_selected_users;
DROP VIEW selected_users;
DROP VIEW recent_events;
DROP VIEW recent_users;