mirror of https://github.com/citusdata/citus.git
907 lines
32 KiB
Plaintext
907 lines
32 KiB
Plaintext
--
|
|
-- MULTI_VIEW
|
|
--
|
|
-- This file contains test cases for view support. It verifies various
|
|
-- Citus features: simple selects, aggregates, joins, outer joins
|
|
-- router queries, single row inserts, multi row inserts via insert
|
|
-- into select, multi row insert via copy commands.
|
|
SELECT count(*) FROM lineitem_hash_part;
|
|
count
|
|
-------
|
|
12000
|
|
(1 row)
|
|
|
|
SELECT count(*) FROM orders_hash_part;
|
|
count
|
|
-------
|
|
2984
|
|
(1 row)
|
|
|
|
-- create a view for priority orders
|
|
CREATE VIEW priority_orders AS SELECT * FROM orders_hash_part WHERE o_orderpriority < '3-MEDIUM';
|
|
-- aggregate pushdown
|
|
SELECT o_orderpriority, count(*) FROM priority_orders GROUP BY 1 ORDER BY 2, 1;
|
|
o_orderpriority | count
|
|
-----------------+-------
|
|
2-HIGH | 593
|
|
1-URGENT | 603
|
|
(2 rows)
|
|
|
|
SELECT o_orderpriority, count(*) FROM orders_hash_part WHERE o_orderpriority < '3-MEDIUM' GROUP BY 1 ORDER BY 2,1;
|
|
o_orderpriority | count
|
|
-----------------+-------
|
|
2-HIGH | 593
|
|
1-URGENT | 603
|
|
(2 rows)
|
|
|
|
-- filters
|
|
SELECT o_orderpriority, count(*) as all, count(*) FILTER (WHERE o_orderstatus ='F') as fullfilled FROM priority_orders GROUP BY 1 ORDER BY 2, 1;
|
|
o_orderpriority | all | fullfilled
|
|
-----------------+-----+------------
|
|
2-HIGH | 593 | 271
|
|
1-URGENT | 603 | 280
|
|
(2 rows)
|
|
|
|
-- having
|
|
SELECT o_orderdate, count(*) from priority_orders group by 1 having (count(*) > 3) order by 2 desc, 1 desc;
|
|
o_orderdate | count
|
|
-------------+-------
|
|
08-20-1996 | 5
|
|
10-10-1994 | 4
|
|
05-05-1994 | 4
|
|
04-07-1994 | 4
|
|
03-17-1993 | 4
|
|
(5 rows)
|
|
|
|
-- having with filters
|
|
SELECT o_orderdate, count(*) as all, count(*) FILTER(WHERE o_orderstatus = 'F') from priority_orders group by 1 having (count(*) > 3) order by 2 desc, 1 desc;
|
|
o_orderdate | all | count
|
|
-------------+-----+-------
|
|
08-20-1996 | 5 | 0
|
|
10-10-1994 | 4 | 4
|
|
05-05-1994 | 4 | 4
|
|
04-07-1994 | 4 | 4
|
|
03-17-1993 | 4 | 4
|
|
(5 rows)
|
|
|
|
-- limit
|
|
SELECT o_orderkey, o_totalprice from orders_hash_part order by 2 desc, 1 asc limit 5 ;
|
|
o_orderkey | o_totalprice
|
|
------------+--------------
|
|
4421 | 401055.62
|
|
10209 | 400191.77
|
|
11142 | 395039.05
|
|
14179 | 384265.43
|
|
11296 | 378166.33
|
|
(5 rows)
|
|
|
|
SELECT o_orderkey, o_totalprice from priority_orders order by 2 desc, 1 asc limit 1 ;
|
|
o_orderkey | o_totalprice
|
|
------------+--------------
|
|
14179 | 384265.43
|
|
(1 row)
|
|
|
|
CREATE VIEW priority_lineitem AS SELECT li.* FROM lineitem_hash_part li JOIN priority_orders ON (l_orderkey = o_orderkey);
|
|
SELECT l_orderkey, count(*) FROM priority_lineitem GROUP BY 1 ORDER BY 2 DESC, 1 LIMIT 5;
|
|
l_orderkey | count
|
|
------------+-------
|
|
7 | 7
|
|
225 | 7
|
|
226 | 7
|
|
322 | 7
|
|
326 | 7
|
|
(5 rows)
|
|
|
|
CREATE VIEW air_shipped_lineitems AS SELECT * FROM lineitem_hash_part WHERE l_shipmode = 'AIR';
|
|
-- join between view and table
|
|
SELECT count(*) FROM orders_hash_part join air_shipped_lineitems ON (o_orderkey = l_orderkey);
|
|
count
|
|
-------
|
|
1706
|
|
(1 row)
|
|
|
|
-- join between views
|
|
SELECT count(*) FROM priority_orders join air_shipped_lineitems ON (o_orderkey = l_orderkey);
|
|
count
|
|
-------
|
|
700
|
|
(1 row)
|
|
|
|
-- count distinct on partition column is not supported
|
|
SELECT count(distinct o_orderkey) FROM priority_orders join air_shipped_lineitems ON (o_orderkey = l_orderkey);
|
|
ERROR: cannot compute aggregate (distinct)
|
|
DETAIL: table partitioning is unsuitable for aggregate (distinct)
|
|
HINT: You can load the hll extension from contrib packages and enable distinct approximations.
|
|
-- count distinct on partition column is supported on router queries
|
|
SELECT count(distinct o_orderkey) FROM priority_orders join air_shipped_lineitems
|
|
ON (o_orderkey = l_orderkey)
|
|
WHERE (o_orderkey = 231);
|
|
count
|
|
-------
|
|
1
|
|
(1 row)
|
|
|
|
-- select distinct on router joins of views also works
|
|
SELECT distinct(o_orderkey) FROM priority_orders join air_shipped_lineitems
|
|
ON (o_orderkey = l_orderkey)
|
|
WHERE (o_orderkey = 231);
|
|
o_orderkey
|
|
------------
|
|
231
|
|
(1 row)
|
|
|
|
-- left join support depends on flattening of the query
|
|
-- following query fails since the inner part is kept as subquery
|
|
SELECT * FROM priority_orders left join air_shipped_lineitems ON (o_orderkey = l_orderkey);
|
|
ERROR: cannot perform distributed planning on this query
|
|
DETAIL: Subqueries in outer joins are not supported
|
|
-- however, this works
|
|
SELECT count(*) FROM priority_orders left join lineitem_hash_part ON (o_orderkey = l_orderkey) WHERE l_shipmode ='AIR';
|
|
count
|
|
-------
|
|
700
|
|
(1 row)
|
|
|
|
-- view at the inner side of is not supported
|
|
SELECT count(*) FROM priority_orders right join lineitem_hash_part ON (o_orderkey = l_orderkey) WHERE l_shipmode ='AIR';
|
|
ERROR: cannot perform distributed planning on this query
|
|
DETAIL: Subqueries in outer joins are not supported
|
|
-- but view at the outer side is. This is essentially the same as a left join with arguments reversed.
|
|
SELECT count(*) FROM lineitem_hash_part right join priority_orders ON (o_orderkey = l_orderkey) WHERE l_shipmode ='AIR';
|
|
count
|
|
-------
|
|
700
|
|
(1 row)
|
|
|
|
-- left join on router query is supported
|
|
SELECT o_orderkey, l_linenumber FROM priority_orders left join air_shipped_lineitems ON (o_orderkey = l_orderkey)
|
|
WHERE o_orderkey = 2;
|
|
o_orderkey | l_linenumber
|
|
------------+--------------
|
|
2 |
|
|
(1 row)
|
|
|
|
-- repartition query on view join
|
|
-- it passes planning, fails at execution stage
|
|
SELECT * FROM priority_orders JOIN air_shipped_lineitems ON (o_custkey = l_suppkey);
|
|
ERROR: cannot use real time executor with repartition jobs
|
|
HINT: Set citus.task_executor_type to "task-tracker".
|
|
SET citus.task_executor_type to "task-tracker";
|
|
SELECT count(*) FROM priority_orders JOIN air_shipped_lineitems ON (o_custkey = l_suppkey);
|
|
count
|
|
-------
|
|
192
|
|
(1 row)
|
|
|
|
SET citus.task_executor_type to DEFAULT;
|
|
-- insert into... select works with views
|
|
CREATE TABLE temp_lineitem(LIKE lineitem_hash_part);
|
|
SELECT create_distributed_table('temp_lineitem', 'l_orderkey', 'hash', 'lineitem_hash_part');
|
|
create_distributed_table
|
|
--------------------------
|
|
|
|
(1 row)
|
|
|
|
INSERT INTO temp_lineitem SELECT * FROM air_shipped_lineitems;
|
|
SELECT count(*) FROM temp_lineitem;
|
|
count
|
|
-------
|
|
1706
|
|
(1 row)
|
|
|
|
-- following is a where false query, should not be inserting anything
|
|
INSERT INTO temp_lineitem SELECT * FROM air_shipped_lineitems WHERE l_shipmode = 'MAIL';
|
|
SELECT count(*) FROM temp_lineitem;
|
|
count
|
|
-------
|
|
1706
|
|
(1 row)
|
|
|
|
SET citus.task_executor_type to "task-tracker";
|
|
-- single view repartition subqueries are not supported
|
|
SELECT l_suppkey, count(*) FROM
|
|
(SELECT l_suppkey, l_shipdate, count(*)
|
|
FROM air_shipped_lineitems GROUP BY l_suppkey, l_shipdate) supps
|
|
GROUP BY l_suppkey ORDER BY 2 DESC, 1 LIMIT 5;
|
|
ERROR: cannot perform distributed planning on this query
|
|
DETAIL: Subqueries without group by clause are not supported yet
|
|
-- logically same query without a view works fine
|
|
SELECT l_suppkey, count(*) FROM
|
|
(SELECT l_suppkey, l_shipdate, count(*)
|
|
FROM lineitem_hash_part WHERE l_shipmode = 'AIR' GROUP BY l_suppkey, l_shipdate) supps
|
|
GROUP BY l_suppkey ORDER BY 2 DESC, 1 LIMIT 5;
|
|
l_suppkey | count
|
|
-----------+-------
|
|
7680 | 4
|
|
160 | 3
|
|
1042 | 3
|
|
1318 | 3
|
|
5873 | 3
|
|
(5 rows)
|
|
|
|
-- when a view is replaced by actual query it still fails
|
|
SELECT l_suppkey, count(*) FROM
|
|
(SELECT l_suppkey, l_shipdate, count(*)
|
|
FROM (SELECT * FROM lineitem_hash_part WHERE l_shipmode = 'AIR') asi
|
|
GROUP BY l_suppkey, l_shipdate) supps
|
|
GROUP BY l_suppkey ORDER BY 2 DESC, 1 LIMIT 5;
|
|
ERROR: cannot perform distributed planning on this query
|
|
DETAIL: Subqueries without group by clause are not supported yet
|
|
-- repartition query on view with single table subquery
|
|
CREATE VIEW supp_count_view AS SELECT * FROM (SELECT l_suppkey, count(*) FROM lineitem_hash_part GROUP BY 1) s1;
|
|
SELECT * FROM supp_count_view ORDER BY 2 DESC, 1 LIMIT 10;
|
|
l_suppkey | count
|
|
-----------+-------
|
|
6104 | 8
|
|
1868 | 6
|
|
5532 | 6
|
|
5849 | 6
|
|
6169 | 6
|
|
6669 | 6
|
|
6692 | 6
|
|
7703 | 6
|
|
7869 | 6
|
|
8426 | 6
|
|
(10 rows)
|
|
|
|
SET citus.task_executor_type to DEFAULT;
|
|
-- create a view with aggregate
|
|
CREATE VIEW lineitems_by_shipping_method AS
|
|
SELECT l_shipmode, count(*) as cnt FROM lineitem_hash_part GROUP BY 1;
|
|
-- following will fail due to non GROUP BY of partition key
|
|
SELECT * FROM lineitems_by_shipping_method;
|
|
ERROR: Unrecognized range table id 1
|
|
-- create a view with group by on partition column
|
|
CREATE VIEW lineitems_by_orderkey AS
|
|
SELECT
|
|
l_orderkey, count(*)
|
|
FROM
|
|
lineitem_hash_part
|
|
GROUP BY 1;
|
|
-- this should work since we're able to push down this query
|
|
SELECT * FROM lineitems_by_orderkey ORDER BY 2 DESC, 1 ASC LIMIT 10;
|
|
l_orderkey | count
|
|
------------+-------
|
|
7 | 7
|
|
68 | 7
|
|
129 | 7
|
|
164 | 7
|
|
194 | 7
|
|
225 | 7
|
|
226 | 7
|
|
322 | 7
|
|
326 | 7
|
|
354 | 7
|
|
(10 rows)
|
|
|
|
-- it would also work since it is made router plannable
|
|
SELECT * FROM lineitems_by_orderkey WHERE l_orderkey = 100;
|
|
l_orderkey | count
|
|
------------+-------
|
|
100 | 5
|
|
(1 row)
|
|
|
|
DROP TABLE temp_lineitem CASCADE;
|
|
DROP VIEW supp_count_view;
|
|
DROP VIEW lineitems_by_orderkey;
|
|
DROP VIEW lineitems_by_shipping_method;
|
|
DROP VIEW air_shipped_lineitems;
|
|
DROP VIEW priority_lineitem;
|
|
DROP VIEW priority_orders;
|
|
-- new tests for real time use case including views and subqueries
|
|
-- create view to display recent user who has an activity after a timestamp
|
|
CREATE VIEW recent_users AS
|
|
SELECT user_id, max(time) as lastseen FROM users_table
|
|
GROUP BY user_id
|
|
HAVING max(time) > '2014-01-21 05:45:49.978738'::timestamp order by 2 DESC;
|
|
SELECT * FROM recent_users;
|
|
user_id | lastseen
|
|
---------+---------------------------------
|
|
87 | Tue Jan 21 05:53:51.866813 2014
|
|
50 | Tue Jan 21 05:53:44.251016 2014
|
|
74 | Tue Jan 21 05:54:04.837808 2014
|
|
6 | Tue Jan 21 05:57:47.118755 2014
|
|
71 | Tue Jan 21 05:55:52.018461 2014
|
|
39 | Tue Jan 21 05:55:18.875997 2014
|
|
66 | Tue Jan 21 05:51:31.681997 2014
|
|
100 | Tue Jan 21 05:49:04.953009 2014
|
|
46 | Tue Jan 21 05:49:00.229807 2014
|
|
86 | Tue Jan 21 05:48:54.381334 2014
|
|
13 | Tue Jan 21 05:48:45.418146 2014
|
|
90 | Tue Jan 21 05:48:25.027491 2014
|
|
58 | Tue Jan 21 05:47:30.418553 2014
|
|
44 | Tue Jan 21 05:47:01.104523 2014
|
|
(14 rows)
|
|
|
|
-- create a view for recent_events
|
|
CREATE VIEW recent_events AS
|
|
SELECT user_id, time FROM events_table
|
|
WHERE time > '2014-01-20 01:45:49.978738'::timestamp;
|
|
SELECT count(*) FROM recent_events;
|
|
count
|
|
-------
|
|
1105
|
|
(1 row)
|
|
|
|
-- count number of events of recent_users
|
|
SELECT count(*) FROM recent_users ru JOIN events_table et ON (ru.user_id = et.user_id);
|
|
count
|
|
-------
|
|
1336
|
|
(1 row)
|
|
|
|
-- count number of events of per recent users order by count
|
|
SELECT ru.user_id, count(*)
|
|
FROM recent_users ru
|
|
JOIN events_table et
|
|
ON (ru.user_id = et.user_id)
|
|
GROUP BY ru.user_id
|
|
ORDER BY 2 DESC, 1;
|
|
user_id | count
|
|
---------+-------
|
|
13 | 118
|
|
44 | 109
|
|
90 | 109
|
|
87 | 105
|
|
46 | 103
|
|
86 | 100
|
|
66 | 98
|
|
39 | 96
|
|
71 | 95
|
|
74 | 93
|
|
6 | 89
|
|
58 | 87
|
|
50 | 79
|
|
100 | 55
|
|
(14 rows)
|
|
|
|
-- the same query with a left join however, it would still generate the same result
|
|
SELECT ru.user_id, count(*)
|
|
FROM recent_users ru
|
|
LEFT JOIN events_table et
|
|
ON (ru.user_id = et.user_id)
|
|
GROUP BY ru.user_id
|
|
ORDER BY 2 DESC, 1;
|
|
user_id | count
|
|
---------+-------
|
|
13 | 118
|
|
44 | 109
|
|
90 | 109
|
|
87 | 105
|
|
46 | 103
|
|
86 | 100
|
|
66 | 98
|
|
39 | 96
|
|
71 | 95
|
|
74 | 93
|
|
6 | 89
|
|
58 | 87
|
|
50 | 79
|
|
100 | 55
|
|
(14 rows)
|
|
|
|
-- query wrapped inside a subquery, it needs another top level order by
|
|
SELECT * FROM
|
|
(SELECT ru.user_id, count(*)
|
|
FROM recent_users ru
|
|
JOIN events_table et
|
|
ON (ru.user_id = et.user_id)
|
|
GROUP BY ru.user_id
|
|
ORDER BY 2 DESC, 1) s1
|
|
ORDER BY 2 DESC, 1;
|
|
user_id | count
|
|
---------+-------
|
|
13 | 118
|
|
44 | 109
|
|
90 | 109
|
|
87 | 105
|
|
46 | 103
|
|
86 | 100
|
|
66 | 98
|
|
39 | 96
|
|
71 | 95
|
|
74 | 93
|
|
6 | 89
|
|
58 | 87
|
|
50 | 79
|
|
100 | 55
|
|
(14 rows)
|
|
|
|
-- non-partition key joins are not supported inside subquery
|
|
SELECT * FROM
|
|
(SELECT ru.user_id, count(*)
|
|
FROM recent_users ru
|
|
JOIN events_table et
|
|
ON (ru.user_id = et.event_type)
|
|
GROUP BY ru.user_id
|
|
ORDER BY 2 DESC, 1) s1
|
|
ORDER BY 2 DESC, 1;
|
|
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
|
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
|
-- join between views
|
|
-- recent users who has an event in recent events
|
|
SELECT ru.user_id FROM recent_users ru JOIN recent_events re USING(user_id) GROUP BY ru.user_id ORDER BY ru.user_id;
|
|
user_id
|
|
---------
|
|
6
|
|
13
|
|
39
|
|
44
|
|
46
|
|
50
|
|
58
|
|
66
|
|
71
|
|
74
|
|
86
|
|
87
|
|
90
|
|
100
|
|
(14 rows)
|
|
|
|
-- outer join inside a subquery
|
|
-- recent_events who are not done by recent users
|
|
SELECT count(*) FROM (
|
|
SELECT re.*, ru.user_id AS recent_user
|
|
FROM recent_events re LEFT JOIN recent_users ru USING(user_id)) reu
|
|
WHERE recent_user IS NULL;
|
|
count
|
|
-------
|
|
957
|
|
(1 row)
|
|
|
|
-- same query with anti-join
|
|
SELECT count(*)
|
|
FROM recent_events re LEFT JOIN recent_users ru ON(ru.user_id = re.user_id)
|
|
WHERE ru.user_id IS NULL;
|
|
count
|
|
-------
|
|
957
|
|
(1 row)
|
|
|
|
-- join between view and table
|
|
-- users who has recent activity and they have an entry with value_1 is less than 15
|
|
SELECT ut.* FROM recent_users ru JOIN users_table ut USING (user_id) WHERE ut.value_1 < 15 ORDER BY 1,2;
|
|
user_id | time | value_1 | value_2 | value_3 | value_4
|
|
---------+---------------------------------+---------+---------+---------+---------
|
|
6 | Mon Jan 13 05:30:08.289267 2014 | 12 | 140 | 618 |
|
|
6 | Thu Jan 16 15:17:16.779695 2014 | 6 | 978 | 430 |
|
|
6 | Sun Jan 19 06:09:39.900888 2014 | 3 | 908 | 688 |
|
|
13 | Sun Jan 19 22:09:26.256209 2014 | 2 | 755 | 584 |
|
|
39 | Wed Jan 15 05:46:51.48765 2014 | 14 | 657 | 137 |
|
|
39 | Sun Jan 19 11:26:47.45937 2014 | 12 | 118 | 165 |
|
|
44 | Wed Jan 15 14:23:52.532426 2014 | 8 | 204 | 735 |
|
|
44 | Sun Jan 19 05:53:34.829093 2014 | 4 | 758 | 205 |
|
|
46 | Mon Jan 13 20:39:11.211169 2014 | 0 | 235 | 475 |
|
|
46 | Wed Jan 15 09:14:57.471944 2014 | 2 | 407 | 664 |
|
|
50 | Sat Jan 11 11:07:13.089216 2014 | 6 | 292 | 425 |
|
|
58 | Sun Jan 19 22:36:14.795396 2014 | 2 | 86 | 311 |
|
|
66 | Tue Jan 14 20:16:31.219213 2014 | 14 | 347 | 655 |
|
|
74 | Tue Jan 21 01:38:39.570986 2014 | 9 | 334 | 642 |
|
|
86 | Sun Jan 19 06:18:51.466578 2014 | 14 | 712 | 490 |
|
|
87 | Sat Jan 11 20:46:28.439073 2014 | 2 | 528 | 311 |
|
|
90 | Sun Jan 12 21:37:30.778206 2014 | 11 | 458 | 377 |
|
|
100 | Sun Jan 19 22:32:08.284043 2014 | 2 | 384 | 149 |
|
|
(18 rows)
|
|
|
|
-- determine if a recent user has done a given event type or not
|
|
SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event
|
|
FROM recent_users ru
|
|
LEFT JOIN events_table et
|
|
ON(ru.user_id = et.user_id AND et.event_type = 625)
|
|
ORDER BY 2 DESC, 1;
|
|
user_id | done_event
|
|
---------+------------
|
|
6 | YES
|
|
13 | NO
|
|
39 | NO
|
|
44 | NO
|
|
46 | NO
|
|
50 | NO
|
|
58 | NO
|
|
66 | NO
|
|
71 | NO
|
|
74 | NO
|
|
86 | NO
|
|
87 | NO
|
|
90 | NO
|
|
100 | NO
|
|
(14 rows)
|
|
|
|
-- view vs table join wrapped inside a subquery
|
|
SELECT * FROM
|
|
(SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event
|
|
FROM recent_users ru
|
|
LEFT JOIN events_table et
|
|
ON(ru.user_id = et.user_id AND et.event_type = 625)
|
|
) s1
|
|
ORDER BY 2 DESC, 1;
|
|
user_id | done_event
|
|
---------+------------
|
|
6 | YES
|
|
13 | NO
|
|
39 | NO
|
|
44 | NO
|
|
46 | NO
|
|
50 | NO
|
|
58 | NO
|
|
66 | NO
|
|
71 | NO
|
|
74 | NO
|
|
86 | NO
|
|
87 | NO
|
|
90 | NO
|
|
100 | NO
|
|
(14 rows)
|
|
|
|
-- event vs table non-partition-key join is not supported
|
|
SELECT * FROM
|
|
(SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event
|
|
FROM recent_users ru
|
|
LEFT JOIN events_table et
|
|
ON(ru.user_id = et.event_type)
|
|
) s1
|
|
ORDER BY 2 DESC, 1;
|
|
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
|
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
|
-- create a select only view
|
|
CREATE VIEW selected_users AS SELECT * FROM users_table WHERE value_1 >= 120 and value_1 <150;
|
|
CREATE VIEW recent_selected_users AS SELECT su.* FROM selected_users su JOIN recent_users ru USING(user_id);
|
|
SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER BY 1;
|
|
user_id
|
|
---------
|
|
6
|
|
13
|
|
39
|
|
44
|
|
46
|
|
50
|
|
58
|
|
66
|
|
71
|
|
74
|
|
86
|
|
90
|
|
(12 rows)
|
|
|
|
-- this would be supported when we implement where partition_key in (subquery) support
|
|
SELECT et.user_id, et.time FROM events_table et WHERE et.user_id IN (SELECT user_id FROM recent_selected_users) GROUP BY 1,2 ORDER BY 1 DESC,2 DESC LIMIT 5;
|
|
user_id | time
|
|
---------+---------------------------------
|
|
90 | Tue Jan 21 02:50:05.379732 2014
|
|
90 | Tue Jan 21 00:08:33.911898 2014
|
|
90 | Mon Jan 20 22:25:39.21906 2014
|
|
90 | Mon Jan 20 21:11:10.814326 2014
|
|
90 | Mon Jan 20 19:16:33.359257 2014
|
|
(5 rows)
|
|
|
|
-- it is supported when it is a router query
|
|
SELECT count(*) FROM events_table et WHERE et.user_id IN (SELECT user_id FROM recent_selected_users WHERE user_id = 90);
|
|
count
|
|
-------
|
|
109
|
|
(1 row)
|
|
|
|
-- expected this to work but it did not
|
|
(SELECT user_id FROM recent_users)
|
|
UNION
|
|
(SELECT user_id FROM selected_users);
|
|
ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT
|
|
HINT: Consider using an equality filter on the distributed table's partition column.
|
|
-- wrapping it inside a SELECT * works
|
|
SELECT *
|
|
FROM (
|
|
(SELECT user_id FROM recent_users)
|
|
UNION
|
|
(SELECT user_id FROM selected_users) ) u
|
|
WHERE user_id < 15 AND user_id > 10
|
|
ORDER BY user_id;
|
|
user_id
|
|
---------
|
|
11
|
|
12
|
|
13
|
|
14
|
|
(4 rows)
|
|
|
|
-- union all also works for views
|
|
SELECT *
|
|
FROM (
|
|
(SELECT user_id FROM recent_users)
|
|
UNION ALL
|
|
(SELECT user_id FROM selected_users) ) u
|
|
WHERE user_id < 15 AND user_id > 10
|
|
ORDER BY user_id;
|
|
user_id
|
|
---------
|
|
11
|
|
11
|
|
11
|
|
12
|
|
12
|
|
12
|
|
12
|
|
12
|
|
12
|
|
13
|
|
13
|
|
13
|
|
13
|
|
13
|
|
14
|
|
(15 rows)
|
|
|
|
SELECT count(*)
|
|
FROM (
|
|
(SELECT user_id FROM recent_users)
|
|
UNION
|
|
(SELECT user_id FROM selected_users) ) u
|
|
WHERE user_id < 15 AND user_id > 10;
|
|
count
|
|
-------
|
|
4
|
|
(1 row)
|
|
|
|
-- expected this to work but it does not
|
|
SELECT count(*)
|
|
FROM (
|
|
(SELECT user_id FROM recent_users)
|
|
UNION ALL
|
|
(SELECT user_id FROM selected_users) ) u
|
|
WHERE user_id < 15 AND user_id > 10;
|
|
ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position
|
|
DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list.
|
|
-- expand view definitions and re-run last 2 queries
|
|
SELECT count(*)
|
|
FROM (
|
|
(SELECT user_id FROM (SELECT user_id, max(time) as lastseen FROM users_table
|
|
GROUP BY user_id
|
|
HAVING max(time) > '2014-01-21 05:45:49.978738'::timestamp order by 2 DESC) aa
|
|
)
|
|
UNION
|
|
(SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 120 and value_1 <150) bb) ) u
|
|
WHERE user_id < 15 AND user_id > 10;
|
|
count
|
|
-------
|
|
4
|
|
(1 row)
|
|
|
|
SELECT count(*)
|
|
FROM (
|
|
(SELECT user_id FROM (SELECT user_id, max(time) as lastseen FROM users_table
|
|
GROUP BY user_id
|
|
HAVING max(time) > '2014-01-21 05:45:49.978738'::timestamp order by 2 DESC) aa
|
|
)
|
|
UNION ALL
|
|
(SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 120 and value_1 <150) bb) ) u
|
|
WHERE user_id < 15 AND user_id > 10;
|
|
ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position
|
|
DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list.
|
|
-- test distinct
|
|
-- distinct is supported if it is on a partition key
|
|
CREATE VIEW distinct_user_with_value_1_15 AS SELECT DISTINCT user_id FROM users_table WHERE value_1 = 15;
|
|
SELECT * FROM distinct_user_with_value_1_15 ORDER BY user_id;
|
|
user_id
|
|
---------
|
|
7
|
|
8
|
|
35
|
|
42
|
|
46
|
|
53
|
|
70
|
|
82
|
|
87
|
|
88
|
|
96
|
|
(11 rows)
|
|
|
|
-- distinct is not supported if it is on a non-partition key
|
|
CREATE VIEW distinct_value_1 AS SELECT DISTINCT value_1 FROM users_table WHERE value_2 = 15;
|
|
SELECT * FROM distinct_value_1;
|
|
ERROR: cannot perform distributed planning on this query
|
|
DETAIL: Subqueries without group by clause are not supported yet
|
|
-- CTEs are not supported even if they are on views
|
|
CREATE VIEW cte_view_1 AS
|
|
WITH c1 AS (SELECT * FROM users_table WHERE value_1 = 15) SELECT * FROM c1 WHERE value_2 < 500;
|
|
SELECT * FROM cte_view_1;
|
|
ERROR: cannot push down this subquery
|
|
DETAIL: Table expressions other than simple relations and subqueries are currently unsupported
|
|
-- this is single shard query but still not supported since it has view + cte
|
|
-- router planner can't detect it
|
|
SELECT * FROM cte_view_1 WHERE user_id = 8;
|
|
ERROR: cannot push down this subquery
|
|
DETAIL: Table expressions other than simple relations and subqueries are currently unsupported
|
|
-- if CTE itself prunes down to a single shard than the view is supported (router plannable)
|
|
CREATE VIEW cte_view_2 AS
|
|
WITH c1 AS (SELECT * FROM users_table WHERE user_id = 8) SELECT * FROM c1 WHERE value_1 = 15;
|
|
SELECT * FROM cte_view_2;
|
|
user_id | time | value_1 | value_2 | value_3 | value_4
|
|
---------+---------------------------------+---------+---------+---------+---------
|
|
8 | Tue Jan 21 00:52:36.967785 2014 | 15 | 10 | 868 |
|
|
(1 row)
|
|
|
|
CREATE VIEW router_view AS SELECT * FROM users_table WHERE user_id = 2;
|
|
-- router plannable
|
|
SELECT user_id FROM router_view GROUP BY 1;
|
|
user_id
|
|
---------
|
|
2
|
|
(1 row)
|
|
|
|
-- join a router view
|
|
SELECT * FROM (SELECT user_id FROM router_view GROUP BY 1) rv JOIN recent_events USING (user_id) ORDER BY 2 LIMIT 3;
|
|
user_id | time
|
|
---------+---------------------------------
|
|
2 | Mon Jan 20 02:02:03.208351 2014
|
|
2 | Mon Jan 20 02:34:14.54301 2014
|
|
2 | Mon Jan 20 03:16:38.418772 2014
|
|
(3 rows)
|
|
|
|
SELECT * FROM (SELECT user_id FROM router_view GROUP BY 1) rv JOIN (SELECT * FROM recent_events) re USING (user_id) ORDER BY 2 LIMIT 3;
|
|
user_id | time
|
|
---------+---------------------------------
|
|
2 | Mon Jan 20 02:02:03.208351 2014
|
|
2 | Mon Jan 20 02:34:14.54301 2014
|
|
2 | Mon Jan 20 03:16:38.418772 2014
|
|
(3 rows)
|
|
|
|
-- views with limits
|
|
CREATE VIEW recent_10_users AS
|
|
SELECT user_id, max(time) as lastseen FROM users_table
|
|
GROUP BY user_id
|
|
ORDER BY lastseen DESC
|
|
LIMIT 10;
|
|
-- this is not supported since it has limit in it and subquery_pushdown is not set
|
|
SELECT * FROM recent_10_users;
|
|
ERROR: cannot perform distributed planning on this query
|
|
DETAIL: Subqueries with limit are not supported yet
|
|
SET citus.subquery_pushdown to ON;
|
|
-- still not supported since outer query does not have limit
|
|
-- it shows a different (subquery with single relation) error message
|
|
SELECT * FROM recent_10_users;
|
|
ERROR: cannot perform distributed planning on this query
|
|
DETAIL: Subqueries with limit are not supported yet
|
|
-- now it displays more correct error message
|
|
SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id);
|
|
ERROR: cannot push down this subquery
|
|
DETAIL: Limit in subquery without limit in the outermost query is unsupported
|
|
-- now both are supported when there is a limit on the outer most query
|
|
SELECT * FROM recent_10_users ORDER BY lastseen DESC LIMIT 10;
|
|
user_id | lastseen
|
|
---------+---------------------------------
|
|
6 | Tue Jan 21 05:57:47.118755 2014
|
|
71 | Tue Jan 21 05:55:52.018461 2014
|
|
39 | Tue Jan 21 05:55:18.875997 2014
|
|
74 | Tue Jan 21 05:54:04.837808 2014
|
|
87 | Tue Jan 21 05:53:51.866813 2014
|
|
50 | Tue Jan 21 05:53:44.251016 2014
|
|
66 | Tue Jan 21 05:51:31.681997 2014
|
|
100 | Tue Jan 21 05:49:04.953009 2014
|
|
46 | Tue Jan 21 05:49:00.229807 2014
|
|
86 | Tue Jan 21 05:48:54.381334 2014
|
|
(10 rows)
|
|
|
|
SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;
|
|
user_id | time | event_type | value_2 | value_3 | value_4
|
|
---------+---------------------------------+------------+---------+---------+---------
|
|
65 | Tue Jan 21 05:56:52.624231 2014 | 241 | 30 | 543 |
|
|
42 | Tue Jan 21 05:46:35.158342 2014 | 761 | 877 | 335 |
|
|
54 | Tue Jan 21 05:46:19.103645 2014 | 595 | 477 | 996 |
|
|
44 | Tue Jan 21 05:43:00.838945 2014 | 682 | 641 | 448 |
|
|
27 | Tue Jan 21 05:34:10.935865 2014 | 912 | 605 | 989 |
|
|
61 | Tue Jan 21 05:25:27.452065 2014 | 392 | 472 | 925 |
|
|
19 | Tue Jan 21 05:23:09.26298 2014 | 202 | 888 | 640 |
|
|
65 | Tue Jan 21 05:22:56.725329 2014 | 519 | 457 | 259 |
|
|
27 | Tue Jan 21 05:19:14.38026 2014 | 19 | 19 | 205 |
|
|
11 | Tue Jan 21 05:15:14.879531 2014 | 459 | 545 | 80 |
|
|
(10 rows)
|
|
|
|
RESET citus.subquery_pushdown;
|
|
-- explain tests
|
|
EXPLAIN (COSTS FALSE) SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER BY 1;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------------------------------------------------------------------------------------------
|
|
Sort
|
|
Sort Key: remote_scan.user_id
|
|
-> HashAggregate
|
|
Group Key: remote_scan.user_id
|
|
-> Custom Scan (Citus Real-Time)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=57637 dbname=regression
|
|
-> HashAggregate
|
|
Group Key: users_table.user_id
|
|
-> Hash Join
|
|
Hash Cond: (users_table.user_id = ru.user_id)
|
|
-> Bitmap Heap Scan on users_table_1400000 users_table
|
|
Recheck Cond: ((value_1 >= 120) AND (value_1 < 150))
|
|
-> Bitmap Index Scan on is_index3_1400000
|
|
Index Cond: ((value_1 >= 120) AND (value_1 < 150))
|
|
-> Hash
|
|
-> Subquery Scan on ru
|
|
-> Sort
|
|
Sort Key: (max(users_table_1."time")) DESC
|
|
-> HashAggregate
|
|
Group Key: users_table_1.user_id
|
|
Filter: (max(users_table_1."time") > '2014-01-21 05:45:49.978738'::timestamp without time zone)
|
|
-> Seq Scan on users_table_1400000 users_table_1
|
|
(25 rows)
|
|
|
|
EXPLAIN (COSTS FALSE) SELECT *
|
|
FROM (
|
|
(SELECT user_id FROM recent_users)
|
|
UNION
|
|
(SELECT user_id FROM selected_users) ) u
|
|
WHERE user_id < 15 AND user_id > 10
|
|
ORDER BY user_id;
|
|
QUERY PLAN
|
|
-------------------------------------------------------------------------------------------------------------------------------------------------
|
|
Sort
|
|
Sort Key: remote_scan.user_id
|
|
-> Custom Scan (Citus Real-Time)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=57637 dbname=regression
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: recent_users.user_id
|
|
-> Append
|
|
-> Subquery Scan on recent_users
|
|
-> Sort
|
|
Sort Key: (max(users_table."time")) DESC
|
|
-> GroupAggregate
|
|
Group Key: users_table.user_id
|
|
Filter: (max(users_table."time") > '2014-01-21 05:45:49.978738'::timestamp without time zone)
|
|
-> Index Scan using is_index1_1400000 on users_table_1400000 users_table
|
|
Index Cond: ((user_id < 15) AND (user_id > 10))
|
|
-> Index Scan using is_index1_1400000 on users_table_1400000 users_table_1
|
|
Index Cond: ((user_id < 15) AND (user_id > 10))
|
|
Filter: ((value_1 >= 120) AND (value_1 < 150))
|
|
(22 rows)
|
|
|
|
EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;
|
|
ERROR: cannot push down this subquery
|
|
DETAIL: Limit in subquery is currently unsupported
|
|
SET citus.subquery_pushdown to ON;
|
|
EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------------------------------------------------------
|
|
Limit
|
|
-> Sort
|
|
Sort Key: remote_scan."time" DESC
|
|
-> Custom Scan (Citus Real-Time)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=57637 dbname=regression
|
|
-> Limit
|
|
-> Sort
|
|
Sort Key: et."time" DESC
|
|
-> Hash Join
|
|
Hash Cond: (et.user_id = recent_10_users.user_id)
|
|
-> Seq Scan on events_table_1400004 et
|
|
-> Hash
|
|
-> Subquery Scan on recent_10_users
|
|
-> Limit
|
|
-> Sort
|
|
Sort Key: (max(users_table."time")) DESC
|
|
-> HashAggregate
|
|
Group Key: users_table.user_id
|
|
-> Seq Scan on users_table_1400000 users_table
|
|
(22 rows)
|
|
|
|
RESET citus.subquery_pushdown;
|
|
DROP VIEW recent_10_users;
|
|
DROP VIEW router_view;
|
|
DROP VIEW cte_view_2;
|
|
DROP VIEW cte_view_1;
|
|
DROP VIEW distinct_value_1;
|
|
DROP VIEW distinct_user_with_value_1_15;
|
|
DROP VIEW recent_selected_users;
|
|
DROP VIEW selected_users;
|
|
DROP VIEW recent_events;
|
|
DROP VIEW recent_users;
|