mirror of https://github.com/citusdata/citus.git
Add view regression tests for increased subquery coverage (#1348)
- joins between views and tables - joins between views - union/union all queries involving views - views with limit - explain queries with viewpull/1342/head
parent
7f38f07368
commit
6b1de6914e
|
@ -230,6 +230,23 @@ SELECT l_suppkey, count(*) FROM
|
|||
GROUP BY l_suppkey ORDER BY 2 DESC, 1 LIMIT 5;
|
||||
ERROR: cannot perform distributed planning on this query
|
||||
DETAIL: Subqueries without group by clause are not supported yet
|
||||
-- repartition query on view with single table subquery
|
||||
CREATE VIEW supp_count_view AS SELECT * FROM (SELECT l_suppkey, count(*) FROM lineitem_hash_part GROUP BY 1) s1;
|
||||
SELECT * FROM supp_count_view ORDER BY 2 DESC, 1 LIMIT 10;
|
||||
l_suppkey | count
|
||||
-----------+-------
|
||||
6104 | 8
|
||||
1868 | 6
|
||||
5532 | 6
|
||||
5849 | 6
|
||||
6169 | 6
|
||||
6669 | 6
|
||||
6692 | 6
|
||||
7703 | 6
|
||||
7869 | 6
|
||||
8426 | 6
|
||||
(10 rows)
|
||||
|
||||
SET citus.task_executor_type to DEFAULT;
|
||||
-- create a view with aggregate
|
||||
CREATE VIEW lineitems_by_shipping_method AS
|
||||
|
@ -268,3 +285,613 @@ SELECT * FROM lineitems_by_orderkey WHERE l_orderkey = 100;
|
|||
(1 row)
|
||||
|
||||
DROP TABLE temp_lineitem CASCADE;
|
||||
DROP VIEW supp_count_view;
|
||||
DROP VIEW lineitems_by_orderkey;
|
||||
DROP VIEW lineitems_by_shipping_method;
|
||||
DROP VIEW air_shipped_lineitems;
|
||||
DROP VIEW priority_lineitem;
|
||||
DROP VIEW priority_orders;
|
||||
-- new tests for real time use case including views and subqueries
|
||||
-- create view to display recent user who has an activity after a timestamp
|
||||
CREATE VIEW recent_users AS
|
||||
SELECT user_id, max(time) as lastseen FROM users_table
|
||||
GROUP BY user_id
|
||||
HAVING max(time) > '2014-01-21 05:45:49.978738'::timestamp order by 2 DESC;
|
||||
SELECT * FROM recent_users;
|
||||
user_id | lastseen
|
||||
---------+---------------------------------
|
||||
87 | Tue Jan 21 05:53:51.866813 2014
|
||||
50 | Tue Jan 21 05:53:44.251016 2014
|
||||
74 | Tue Jan 21 05:54:04.837808 2014
|
||||
6 | Tue Jan 21 05:57:47.118755 2014
|
||||
71 | Tue Jan 21 05:55:52.018461 2014
|
||||
39 | Tue Jan 21 05:55:18.875997 2014
|
||||
66 | Tue Jan 21 05:51:31.681997 2014
|
||||
100 | Tue Jan 21 05:49:04.953009 2014
|
||||
46 | Tue Jan 21 05:49:00.229807 2014
|
||||
86 | Tue Jan 21 05:48:54.381334 2014
|
||||
13 | Tue Jan 21 05:48:45.418146 2014
|
||||
90 | Tue Jan 21 05:48:25.027491 2014
|
||||
58 | Tue Jan 21 05:47:30.418553 2014
|
||||
44 | Tue Jan 21 05:47:01.104523 2014
|
||||
(14 rows)
|
||||
|
||||
-- create a view for recent_events
|
||||
CREATE VIEW recent_events AS
|
||||
SELECT user_id, time FROM events_table
|
||||
WHERE time > '2014-01-20 01:45:49.978738'::timestamp;
|
||||
SELECT count(*) FROM recent_events;
|
||||
count
|
||||
-------
|
||||
1105
|
||||
(1 row)
|
||||
|
||||
-- count number of events of recent_users
|
||||
SELECT count(*) FROM recent_users ru JOIN events_table et ON (ru.user_id = et.user_id);
|
||||
count
|
||||
-------
|
||||
1336
|
||||
(1 row)
|
||||
|
||||
-- count number of events of per recent users order by count
|
||||
SELECT ru.user_id, count(*)
|
||||
FROM recent_users ru
|
||||
JOIN events_table et
|
||||
ON (ru.user_id = et.user_id)
|
||||
GROUP BY ru.user_id
|
||||
ORDER BY 2 DESC, 1;
|
||||
user_id | count
|
||||
---------+-------
|
||||
13 | 118
|
||||
44 | 109
|
||||
90 | 109
|
||||
87 | 105
|
||||
46 | 103
|
||||
86 | 100
|
||||
66 | 98
|
||||
39 | 96
|
||||
71 | 95
|
||||
74 | 93
|
||||
6 | 89
|
||||
58 | 87
|
||||
50 | 79
|
||||
100 | 55
|
||||
(14 rows)
|
||||
|
||||
-- the same query with a left join however, it would still generate the same result
|
||||
SELECT ru.user_id, count(*)
|
||||
FROM recent_users ru
|
||||
LEFT JOIN events_table et
|
||||
ON (ru.user_id = et.user_id)
|
||||
GROUP BY ru.user_id
|
||||
ORDER BY 2 DESC, 1;
|
||||
user_id | count
|
||||
---------+-------
|
||||
13 | 118
|
||||
44 | 109
|
||||
90 | 109
|
||||
87 | 105
|
||||
46 | 103
|
||||
86 | 100
|
||||
66 | 98
|
||||
39 | 96
|
||||
71 | 95
|
||||
74 | 93
|
||||
6 | 89
|
||||
58 | 87
|
||||
50 | 79
|
||||
100 | 55
|
||||
(14 rows)
|
||||
|
||||
-- query wrapped inside a subquery, it needs another top level order by
|
||||
SELECT * FROM
|
||||
(SELECT ru.user_id, count(*)
|
||||
FROM recent_users ru
|
||||
JOIN events_table et
|
||||
ON (ru.user_id = et.user_id)
|
||||
GROUP BY ru.user_id
|
||||
ORDER BY 2 DESC, 1) s1
|
||||
ORDER BY 2 DESC, 1;
|
||||
user_id | count
|
||||
---------+-------
|
||||
13 | 118
|
||||
44 | 109
|
||||
90 | 109
|
||||
87 | 105
|
||||
46 | 103
|
||||
86 | 100
|
||||
66 | 98
|
||||
39 | 96
|
||||
71 | 95
|
||||
74 | 93
|
||||
6 | 89
|
||||
58 | 87
|
||||
50 | 79
|
||||
100 | 55
|
||||
(14 rows)
|
||||
|
||||
-- non-partition key joins are not supported inside subquery
|
||||
SELECT * FROM
|
||||
(SELECT ru.user_id, count(*)
|
||||
FROM recent_users ru
|
||||
JOIN events_table et
|
||||
ON (ru.user_id = et.event_type)
|
||||
GROUP BY ru.user_id
|
||||
ORDER BY 2 DESC, 1) s1
|
||||
ORDER BY 2 DESC, 1;
|
||||
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
||||
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
||||
-- join between views
|
||||
-- recent users who has an event in recent events
|
||||
SELECT ru.user_id FROM recent_users ru JOIN recent_events re USING(user_id) GROUP BY ru.user_id ORDER BY ru.user_id;
|
||||
user_id
|
||||
---------
|
||||
6
|
||||
13
|
||||
39
|
||||
44
|
||||
46
|
||||
50
|
||||
58
|
||||
66
|
||||
71
|
||||
74
|
||||
86
|
||||
87
|
||||
90
|
||||
100
|
||||
(14 rows)
|
||||
|
||||
-- outer join inside a subquery
|
||||
-- recent_events who are not done by recent users
|
||||
SELECT count(*) FROM (
|
||||
SELECT re.*, ru.user_id AS recent_user
|
||||
FROM recent_events re LEFT JOIN recent_users ru USING(user_id)) reu
|
||||
WHERE recent_user IS NULL;
|
||||
count
|
||||
-------
|
||||
957
|
||||
(1 row)
|
||||
|
||||
-- same query with anti-join
|
||||
SELECT count(*)
|
||||
FROM recent_events re LEFT JOIN recent_users ru ON(ru.user_id = re.user_id)
|
||||
WHERE ru.user_id IS NULL;
|
||||
count
|
||||
-------
|
||||
957
|
||||
(1 row)
|
||||
|
||||
-- join between view and table
|
||||
-- users who has recent activity and they have an entry with value_1 is less than 15
|
||||
SELECT ut.* FROM recent_users ru JOIN users_table ut USING (user_id) WHERE ut.value_1 < 15 ORDER BY 1,2;
|
||||
user_id | time | value_1 | value_2 | value_3 | value_4
|
||||
---------+---------------------------------+---------+---------+---------+---------
|
||||
6 | Mon Jan 13 05:30:08.289267 2014 | 12 | 140 | 618 |
|
||||
6 | Thu Jan 16 15:17:16.779695 2014 | 6 | 978 | 430 |
|
||||
6 | Sun Jan 19 06:09:39.900888 2014 | 3 | 908 | 688 |
|
||||
13 | Sun Jan 19 22:09:26.256209 2014 | 2 | 755 | 584 |
|
||||
39 | Wed Jan 15 05:46:51.48765 2014 | 14 | 657 | 137 |
|
||||
39 | Sun Jan 19 11:26:47.45937 2014 | 12 | 118 | 165 |
|
||||
44 | Wed Jan 15 14:23:52.532426 2014 | 8 | 204 | 735 |
|
||||
44 | Sun Jan 19 05:53:34.829093 2014 | 4 | 758 | 205 |
|
||||
46 | Mon Jan 13 20:39:11.211169 2014 | 0 | 235 | 475 |
|
||||
46 | Wed Jan 15 09:14:57.471944 2014 | 2 | 407 | 664 |
|
||||
50 | Sat Jan 11 11:07:13.089216 2014 | 6 | 292 | 425 |
|
||||
58 | Sun Jan 19 22:36:14.795396 2014 | 2 | 86 | 311 |
|
||||
66 | Tue Jan 14 20:16:31.219213 2014 | 14 | 347 | 655 |
|
||||
74 | Tue Jan 21 01:38:39.570986 2014 | 9 | 334 | 642 |
|
||||
86 | Sun Jan 19 06:18:51.466578 2014 | 14 | 712 | 490 |
|
||||
87 | Sat Jan 11 20:46:28.439073 2014 | 2 | 528 | 311 |
|
||||
90 | Sun Jan 12 21:37:30.778206 2014 | 11 | 458 | 377 |
|
||||
100 | Sun Jan 19 22:32:08.284043 2014 | 2 | 384 | 149 |
|
||||
(18 rows)
|
||||
|
||||
-- determine if a recent user has done a given event type or not
|
||||
SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event
|
||||
FROM recent_users ru
|
||||
LEFT JOIN events_table et
|
||||
ON(ru.user_id = et.user_id AND et.event_type = 625)
|
||||
ORDER BY 2 DESC, 1;
|
||||
user_id | done_event
|
||||
---------+------------
|
||||
6 | YES
|
||||
13 | NO
|
||||
39 | NO
|
||||
44 | NO
|
||||
46 | NO
|
||||
50 | NO
|
||||
58 | NO
|
||||
66 | NO
|
||||
71 | NO
|
||||
74 | NO
|
||||
86 | NO
|
||||
87 | NO
|
||||
90 | NO
|
||||
100 | NO
|
||||
(14 rows)
|
||||
|
||||
-- view vs table join wrapped inside a subquery
|
||||
SELECT * FROM
|
||||
(SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event
|
||||
FROM recent_users ru
|
||||
LEFT JOIN events_table et
|
||||
ON(ru.user_id = et.user_id AND et.event_type = 625)
|
||||
) s1
|
||||
ORDER BY 2 DESC, 1;
|
||||
user_id | done_event
|
||||
---------+------------
|
||||
6 | YES
|
||||
13 | NO
|
||||
39 | NO
|
||||
44 | NO
|
||||
46 | NO
|
||||
50 | NO
|
||||
58 | NO
|
||||
66 | NO
|
||||
71 | NO
|
||||
74 | NO
|
||||
86 | NO
|
||||
87 | NO
|
||||
90 | NO
|
||||
100 | NO
|
||||
(14 rows)
|
||||
|
||||
-- event vs table non-partition-key join is not supported
|
||||
SELECT * FROM
|
||||
(SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event
|
||||
FROM recent_users ru
|
||||
LEFT JOIN events_table et
|
||||
ON(ru.user_id = et.event_type)
|
||||
) s1
|
||||
ORDER BY 2 DESC, 1;
|
||||
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
|
||||
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
|
||||
-- create a select only view
|
||||
CREATE VIEW selected_users AS SELECT * FROM users_table WHERE value_1 >= 120 and value_1 <150;
|
||||
CREATE VIEW recent_selected_users AS SELECT su.* FROM selected_users su JOIN recent_users ru USING(user_id);
|
||||
SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER BY 1;
|
||||
user_id
|
||||
---------
|
||||
6
|
||||
13
|
||||
39
|
||||
44
|
||||
46
|
||||
50
|
||||
58
|
||||
66
|
||||
71
|
||||
74
|
||||
86
|
||||
90
|
||||
(12 rows)
|
||||
|
||||
-- this would be supported when we implement where partition_key in (subquery) support
|
||||
SELECT et.* FROM events_table et WHERE et.user_id IN (SELECT user_id FROM recent_selected_users);
|
||||
ERROR: could not run distributed query with subquery outside the FROM clause
|
||||
HINT: Consider using an equality filter on the distributed table's partition column.
|
||||
-- it is supported when it is a router query
|
||||
SELECT count(*) FROM events_table et WHERE et.user_id IN (SELECT user_id FROM recent_selected_users WHERE user_id = 90);
|
||||
count
|
||||
-------
|
||||
109
|
||||
(1 row)
|
||||
|
||||
-- expected this to work but it did not
|
||||
(SELECT user_id FROM recent_users)
|
||||
UNION
|
||||
(SELECT user_id FROM selected_users);
|
||||
ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT
|
||||
HINT: Consider using an equality filter on the distributed table's partition column.
|
||||
-- wrapping it inside a SELECT * works
|
||||
SELECT *
|
||||
FROM (
|
||||
(SELECT user_id FROM recent_users)
|
||||
UNION
|
||||
(SELECT user_id FROM selected_users) ) u
|
||||
WHERE user_id < 15 AND user_id > 10
|
||||
ORDER BY user_id;
|
||||
user_id
|
||||
---------
|
||||
11
|
||||
12
|
||||
13
|
||||
14
|
||||
(4 rows)
|
||||
|
||||
-- union all also works for views
|
||||
SELECT *
|
||||
FROM (
|
||||
(SELECT user_id FROM recent_users)
|
||||
UNION ALL
|
||||
(SELECT user_id FROM selected_users) ) u
|
||||
WHERE user_id < 15 AND user_id > 10
|
||||
ORDER BY user_id;
|
||||
user_id
|
||||
---------
|
||||
11
|
||||
11
|
||||
11
|
||||
12
|
||||
12
|
||||
12
|
||||
12
|
||||
12
|
||||
12
|
||||
13
|
||||
13
|
||||
13
|
||||
13
|
||||
13
|
||||
14
|
||||
(15 rows)
|
||||
|
||||
SELECT count(*)
|
||||
FROM (
|
||||
(SELECT user_id FROM recent_users)
|
||||
UNION
|
||||
(SELECT user_id FROM selected_users) ) u
|
||||
WHERE user_id < 15 AND user_id > 10;
|
||||
count
|
||||
-------
|
||||
4
|
||||
(1 row)
|
||||
|
||||
-- expected this to work but it does not
|
||||
SELECT count(*)
|
||||
FROM (
|
||||
(SELECT user_id FROM recent_users)
|
||||
UNION ALL
|
||||
(SELECT user_id FROM selected_users) ) u
|
||||
WHERE user_id < 15 AND user_id > 10;
|
||||
ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position
|
||||
DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list.
|
||||
-- expand view definitions and re-run last 2 queries
|
||||
SELECT count(*)
|
||||
FROM (
|
||||
(SELECT user_id FROM (SELECT user_id, max(time) as lastseen FROM users_table
|
||||
GROUP BY user_id
|
||||
HAVING max(time) > '2014-01-21 05:45:49.978738'::timestamp order by 2 DESC) aa
|
||||
)
|
||||
UNION
|
||||
(SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 120 and value_1 <150) bb) ) u
|
||||
WHERE user_id < 15 AND user_id > 10;
|
||||
count
|
||||
-------
|
||||
4
|
||||
(1 row)
|
||||
|
||||
SELECT count(*)
|
||||
FROM (
|
||||
(SELECT user_id FROM (SELECT user_id, max(time) as lastseen FROM users_table
|
||||
GROUP BY user_id
|
||||
HAVING max(time) > '2014-01-21 05:45:49.978738'::timestamp order by 2 DESC) aa
|
||||
)
|
||||
UNION ALL
|
||||
(SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 120 and value_1 <150) bb) ) u
|
||||
WHERE user_id < 15 AND user_id > 10;
|
||||
ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position
|
||||
DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list.
|
||||
-- test distinct
|
||||
-- distinct is supported if it is on a partition key
|
||||
CREATE VIEW distinct_user_with_value_1_15 AS SELECT DISTINCT user_id FROM users_table WHERE value_1 = 15;
|
||||
SELECT * FROM distinct_user_with_value_1_15 ORDER BY user_id;
|
||||
user_id
|
||||
---------
|
||||
7
|
||||
8
|
||||
35
|
||||
42
|
||||
46
|
||||
53
|
||||
70
|
||||
82
|
||||
87
|
||||
88
|
||||
96
|
||||
(11 rows)
|
||||
|
||||
-- distinct is not supported if it is on a non-partition key
|
||||
CREATE VIEW distinct_value_1 AS SELECT DISTINCT value_1 FROM users_table WHERE value_2 = 15;
|
||||
SELECT * FROM distinct_value_1;
|
||||
ERROR: cannot perform distributed planning on this query
|
||||
DETAIL: Subqueries without group by clause are not supported yet
|
||||
-- CTEs are not supported even if they are on views
|
||||
CREATE VIEW cte_view_1 AS
|
||||
WITH c1 AS (SELECT * FROM users_table WHERE value_1 = 15) SELECT * FROM c1 WHERE value_2 < 500;
|
||||
SELECT * FROM cte_view_1;
|
||||
ERROR: cannot push down this subquery
|
||||
DETAIL: Table expressions other than simple relations and subqueries are currently unsupported
|
||||
-- this is single shard query but still not supported since it has view + cte
|
||||
-- router planner can't detect it
|
||||
SELECT * FROM cte_view_1 WHERE user_id = 8;
|
||||
ERROR: cannot push down this subquery
|
||||
DETAIL: Table expressions other than simple relations and subqueries are currently unsupported
|
||||
-- if CTE itself prunes down to a single shard than the view is supported (router plannable)
|
||||
CREATE VIEW cte_view_2 AS
|
||||
WITH c1 AS (SELECT * FROM users_table WHERE user_id = 8) SELECT * FROM c1 WHERE value_1 = 15;
|
||||
SELECT * FROM cte_view_2;
|
||||
user_id | time | value_1 | value_2 | value_3 | value_4
|
||||
---------+---------------------------------+---------+---------+---------+---------
|
||||
8 | Tue Jan 21 00:52:36.967785 2014 | 15 | 10 | 868 |
|
||||
(1 row)
|
||||
|
||||
CREATE VIEW router_view AS SELECT * FROM users_table WHERE user_id = 2;
|
||||
-- router plannable
|
||||
SELECT user_id FROM router_view GROUP BY 1;
|
||||
user_id
|
||||
---------
|
||||
2
|
||||
(1 row)
|
||||
|
||||
-- There is a known issue with router plannable subqueries joined with non-router
|
||||
-- plannable subqueries. Following tests should be uncommented when we fix it
|
||||
-- join a router view (not implement error)
|
||||
-- SELECT * FROM (SELECT user_id FROM router_view GROUP BY 1) rv JOIN recent_events USING (user_id);
|
||||
-- it still does not work when converted to 2 subquery join
|
||||
-- SELECT * FROM (SELECT user_id FROM router_view GROUP BY 1) rv JOIN (SELECT * FROM recent_events) re USING (user_id);
|
||||
-- views are completely removed and still it does not work
|
||||
-- SELECT * FROM
|
||||
-- (SELECT user_id FROM (SELECT * FROM users_table WHERE user_id = 2) rv1 GROUP BY 1) rv2
|
||||
-- JOIN (SELECT user_id, time FROM events_table
|
||||
-- WHERE time > '2014-01-20 01:45:49.978738'::timestamp) re
|
||||
-- USING (user_id);
|
||||
-- views with limits
|
||||
CREATE VIEW recent_10_users AS
|
||||
SELECT user_id, max(time) as lastseen FROM users_table
|
||||
GROUP BY user_id
|
||||
ORDER BY lastseen DESC
|
||||
LIMIT 10;
|
||||
-- this is not supported since it has limit in it and subquery_pushdown is not set
|
||||
SELECT * FROM recent_10_users;
|
||||
ERROR: cannot perform distributed planning on this query
|
||||
DETAIL: Subqueries with limit are not supported yet
|
||||
SET citus.subquery_pushdown to ON;
|
||||
-- still not supported since outer query does not have limit
|
||||
-- it shows a different (subquery with single relation) error message
|
||||
SELECT * FROM recent_10_users;
|
||||
ERROR: cannot perform distributed planning on this query
|
||||
DETAIL: Subqueries with limit are not supported yet
|
||||
-- now it displays more correct error message
|
||||
SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id);
|
||||
ERROR: cannot push down this subquery
|
||||
DETAIL: Limit in subquery without limit in the outermost query is unsupported
|
||||
-- now both are supported when there is a limit on the outer most query
|
||||
SELECT * FROM recent_10_users ORDER BY lastseen DESC LIMIT 10;
|
||||
user_id | lastseen
|
||||
---------+---------------------------------
|
||||
6 | Tue Jan 21 05:57:47.118755 2014
|
||||
71 | Tue Jan 21 05:55:52.018461 2014
|
||||
39 | Tue Jan 21 05:55:18.875997 2014
|
||||
74 | Tue Jan 21 05:54:04.837808 2014
|
||||
87 | Tue Jan 21 05:53:51.866813 2014
|
||||
50 | Tue Jan 21 05:53:44.251016 2014
|
||||
66 | Tue Jan 21 05:51:31.681997 2014
|
||||
100 | Tue Jan 21 05:49:04.953009 2014
|
||||
46 | Tue Jan 21 05:49:00.229807 2014
|
||||
86 | Tue Jan 21 05:48:54.381334 2014
|
||||
(10 rows)
|
||||
|
||||
SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;
|
||||
user_id | time | event_type | value_2 | value_3 | value_4
|
||||
---------+---------------------------------+------------+---------+---------+---------
|
||||
65 | Tue Jan 21 05:56:52.624231 2014 | 241 | 30 | 543 |
|
||||
42 | Tue Jan 21 05:46:35.158342 2014 | 761 | 877 | 335 |
|
||||
54 | Tue Jan 21 05:46:19.103645 2014 | 595 | 477 | 996 |
|
||||
44 | Tue Jan 21 05:43:00.838945 2014 | 682 | 641 | 448 |
|
||||
27 | Tue Jan 21 05:34:10.935865 2014 | 912 | 605 | 989 |
|
||||
61 | Tue Jan 21 05:25:27.452065 2014 | 392 | 472 | 925 |
|
||||
19 | Tue Jan 21 05:23:09.26298 2014 | 202 | 888 | 640 |
|
||||
65 | Tue Jan 21 05:22:56.725329 2014 | 519 | 457 | 259 |
|
||||
27 | Tue Jan 21 05:19:14.38026 2014 | 19 | 19 | 205 |
|
||||
11 | Tue Jan 21 05:15:14.879531 2014 | 459 | 545 | 80 |
|
||||
(10 rows)
|
||||
|
||||
RESET citus.subquery_pushdown;
|
||||
-- explain tests
|
||||
EXPLAIN (COSTS FALSE) SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER BY 1;
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
Sort
|
||||
Sort Key: remote_scan.user_id
|
||||
-> HashAggregate
|
||||
Group Key: remote_scan.user_id
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: One of 4
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> HashAggregate
|
||||
Group Key: users_table.user_id
|
||||
-> Hash Join
|
||||
Hash Cond: (users_table.user_id = ru.user_id)
|
||||
-> Bitmap Heap Scan on users_table_1400000 users_table
|
||||
Recheck Cond: ((value_1 >= 120) AND (value_1 < 150))
|
||||
-> Bitmap Index Scan on is_index3_1400000
|
||||
Index Cond: ((value_1 >= 120) AND (value_1 < 150))
|
||||
-> Hash
|
||||
-> Subquery Scan on ru
|
||||
-> Sort
|
||||
Sort Key: (max(users_table_1."time")) DESC
|
||||
-> HashAggregate
|
||||
Group Key: users_table_1.user_id
|
||||
Filter: (max(users_table_1."time") > '2014-01-21 05:45:49.978738'::timestamp without time zone)
|
||||
-> Seq Scan on users_table_1400000 users_table_1
|
||||
(25 rows)
|
||||
|
||||
EXPLAIN (COSTS FALSE) SELECT *
|
||||
FROM (
|
||||
(SELECT user_id FROM recent_users)
|
||||
UNION
|
||||
(SELECT user_id FROM selected_users) ) u
|
||||
WHERE user_id < 15 AND user_id > 10
|
||||
ORDER BY user_id;
|
||||
QUERY PLAN
|
||||
-------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
Sort
|
||||
Sort Key: remote_scan.user_id
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: One of 4
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> Unique
|
||||
-> Sort
|
||||
Sort Key: recent_users.user_id
|
||||
-> Append
|
||||
-> Subquery Scan on recent_users
|
||||
-> Sort
|
||||
Sort Key: (max(users_table."time")) DESC
|
||||
-> GroupAggregate
|
||||
Group Key: users_table.user_id
|
||||
Filter: (max(users_table."time") > '2014-01-21 05:45:49.978738'::timestamp without time zone)
|
||||
-> Index Scan using is_index1_1400000 on users_table_1400000 users_table
|
||||
Index Cond: ((user_id < 15) AND (user_id > 10))
|
||||
-> Index Scan using is_index1_1400000 on users_table_1400000 users_table_1
|
||||
Index Cond: ((user_id < 15) AND (user_id > 10))
|
||||
Filter: ((value_1 >= 120) AND (value_1 < 150))
|
||||
(22 rows)
|
||||
|
||||
EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;
|
||||
ERROR: cannot push down this subquery
|
||||
DETAIL: Limit in subquery is currently unsupported
|
||||
SET citus.subquery_pushdown to ON;
|
||||
EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------------------------------------------------------
|
||||
Limit
|
||||
-> Sort
|
||||
Sort Key: remote_scan."time" DESC
|
||||
-> Custom Scan (Citus Real-Time)
|
||||
Task Count: 4
|
||||
Tasks Shown: One of 4
|
||||
-> Task
|
||||
Node: host=localhost port=57637 dbname=regression
|
||||
-> Limit
|
||||
-> Sort
|
||||
Sort Key: et."time" DESC
|
||||
-> Hash Join
|
||||
Hash Cond: (et.user_id = recent_10_users.user_id)
|
||||
-> Seq Scan on events_table_1400004 et
|
||||
-> Hash
|
||||
-> Subquery Scan on recent_10_users
|
||||
-> Limit
|
||||
-> Sort
|
||||
Sort Key: (max(users_table."time")) DESC
|
||||
-> HashAggregate
|
||||
Group Key: users_table.user_id
|
||||
-> Seq Scan on users_table_1400000 users_table
|
||||
(22 rows)
|
||||
|
||||
RESET citus.subquery_pushdown;
|
||||
DROP VIEW recent_10_users;
|
||||
DROP VIEW router_view;
|
||||
DROP VIEW cte_view_2;
|
||||
DROP VIEW cte_view_1;
|
||||
DROP VIEW distinct_value_1;
|
||||
DROP VIEW distinct_user_with_value_1_15;
|
||||
DROP VIEW recent_selected_users;
|
||||
DROP VIEW selected_users;
|
||||
DROP VIEW recent_events;
|
||||
DROP VIEW recent_users;
|
||||
|
|
|
@ -117,6 +117,10 @@ SELECT l_suppkey, count(*) FROM
|
|||
GROUP BY l_suppkey, l_shipdate) supps
|
||||
GROUP BY l_suppkey ORDER BY 2 DESC, 1 LIMIT 5;
|
||||
|
||||
-- repartition query on view with single table subquery
|
||||
CREATE VIEW supp_count_view AS SELECT * FROM (SELECT l_suppkey, count(*) FROM lineitem_hash_part GROUP BY 1) s1;
|
||||
SELECT * FROM supp_count_view ORDER BY 2 DESC, 1 LIMIT 10;
|
||||
|
||||
SET citus.task_executor_type to DEFAULT;
|
||||
|
||||
-- create a view with aggregate
|
||||
|
@ -141,3 +145,275 @@ SELECT * FROM lineitems_by_orderkey ORDER BY 2 DESC, 1 ASC LIMIT 10;
|
|||
SELECT * FROM lineitems_by_orderkey WHERE l_orderkey = 100;
|
||||
|
||||
DROP TABLE temp_lineitem CASCADE;
|
||||
|
||||
DROP VIEW supp_count_view;
|
||||
DROP VIEW lineitems_by_orderkey;
|
||||
DROP VIEW lineitems_by_shipping_method;
|
||||
DROP VIEW air_shipped_lineitems;
|
||||
DROP VIEW priority_lineitem;
|
||||
DROP VIEW priority_orders;
|
||||
|
||||
-- new tests for real time use case including views and subqueries
|
||||
|
||||
-- create view to display recent user who has an activity after a timestamp
|
||||
CREATE VIEW recent_users AS
|
||||
SELECT user_id, max(time) as lastseen FROM users_table
|
||||
GROUP BY user_id
|
||||
HAVING max(time) > '2014-01-21 05:45:49.978738'::timestamp order by 2 DESC;
|
||||
SELECT * FROM recent_users;
|
||||
|
||||
-- create a view for recent_events
|
||||
CREATE VIEW recent_events AS
|
||||
SELECT user_id, time FROM events_table
|
||||
WHERE time > '2014-01-20 01:45:49.978738'::timestamp;
|
||||
|
||||
SELECT count(*) FROM recent_events;
|
||||
|
||||
-- count number of events of recent_users
|
||||
SELECT count(*) FROM recent_users ru JOIN events_table et ON (ru.user_id = et.user_id);
|
||||
-- count number of events of per recent users order by count
|
||||
SELECT ru.user_id, count(*)
|
||||
FROM recent_users ru
|
||||
JOIN events_table et
|
||||
ON (ru.user_id = et.user_id)
|
||||
GROUP BY ru.user_id
|
||||
ORDER BY 2 DESC, 1;
|
||||
|
||||
-- the same query with a left join however, it would still generate the same result
|
||||
SELECT ru.user_id, count(*)
|
||||
FROM recent_users ru
|
||||
LEFT JOIN events_table et
|
||||
ON (ru.user_id = et.user_id)
|
||||
GROUP BY ru.user_id
|
||||
ORDER BY 2 DESC, 1;
|
||||
|
||||
-- query wrapped inside a subquery, it needs another top level order by
|
||||
SELECT * FROM
|
||||
(SELECT ru.user_id, count(*)
|
||||
FROM recent_users ru
|
||||
JOIN events_table et
|
||||
ON (ru.user_id = et.user_id)
|
||||
GROUP BY ru.user_id
|
||||
ORDER BY 2 DESC, 1) s1
|
||||
ORDER BY 2 DESC, 1;
|
||||
|
||||
-- non-partition key joins are not supported inside subquery
|
||||
SELECT * FROM
|
||||
(SELECT ru.user_id, count(*)
|
||||
FROM recent_users ru
|
||||
JOIN events_table et
|
||||
ON (ru.user_id = et.event_type)
|
||||
GROUP BY ru.user_id
|
||||
ORDER BY 2 DESC, 1) s1
|
||||
ORDER BY 2 DESC, 1;
|
||||
|
||||
-- join between views
|
||||
-- recent users who has an event in recent events
|
||||
SELECT ru.user_id FROM recent_users ru JOIN recent_events re USING(user_id) GROUP BY ru.user_id ORDER BY ru.user_id;
|
||||
|
||||
-- outer join inside a subquery
|
||||
-- recent_events who are not done by recent users
|
||||
SELECT count(*) FROM (
|
||||
SELECT re.*, ru.user_id AS recent_user
|
||||
FROM recent_events re LEFT JOIN recent_users ru USING(user_id)) reu
|
||||
WHERE recent_user IS NULL;
|
||||
|
||||
-- same query with anti-join
|
||||
SELECT count(*)
|
||||
FROM recent_events re LEFT JOIN recent_users ru ON(ru.user_id = re.user_id)
|
||||
WHERE ru.user_id IS NULL;
|
||||
|
||||
-- join between view and table
|
||||
-- users who has recent activity and they have an entry with value_1 is less than 15
|
||||
SELECT ut.* FROM recent_users ru JOIN users_table ut USING (user_id) WHERE ut.value_1 < 15 ORDER BY 1,2;
|
||||
|
||||
-- determine if a recent user has done a given event type or not
|
||||
SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event
|
||||
FROM recent_users ru
|
||||
LEFT JOIN events_table et
|
||||
ON(ru.user_id = et.user_id AND et.event_type = 625)
|
||||
ORDER BY 2 DESC, 1;
|
||||
|
||||
-- view vs table join wrapped inside a subquery
|
||||
SELECT * FROM
|
||||
(SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event
|
||||
FROM recent_users ru
|
||||
LEFT JOIN events_table et
|
||||
ON(ru.user_id = et.user_id AND et.event_type = 625)
|
||||
) s1
|
||||
ORDER BY 2 DESC, 1;
|
||||
|
||||
-- event vs table non-partition-key join is not supported
|
||||
SELECT * FROM
|
||||
(SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event
|
||||
FROM recent_users ru
|
||||
LEFT JOIN events_table et
|
||||
ON(ru.user_id = et.event_type)
|
||||
) s1
|
||||
ORDER BY 2 DESC, 1;
|
||||
|
||||
-- create a select only view
|
||||
CREATE VIEW selected_users AS SELECT * FROM users_table WHERE value_1 >= 120 and value_1 <150;
|
||||
CREATE VIEW recent_selected_users AS SELECT su.* FROM selected_users su JOIN recent_users ru USING(user_id);
|
||||
|
||||
SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER BY 1;
|
||||
|
||||
-- this would be supported when we implement where partition_key in (subquery) support
|
||||
SELECT et.* FROM events_table et WHERE et.user_id IN (SELECT user_id FROM recent_selected_users);
|
||||
|
||||
-- it is supported when it is a router query
|
||||
SELECT count(*) FROM events_table et WHERE et.user_id IN (SELECT user_id FROM recent_selected_users WHERE user_id = 90);
|
||||
|
||||
-- expected this to work but it did not
|
||||
(SELECT user_id FROM recent_users)
|
||||
UNION
|
||||
(SELECT user_id FROM selected_users);
|
||||
|
||||
-- wrapping it inside a SELECT * works
|
||||
SELECT *
|
||||
FROM (
|
||||
(SELECT user_id FROM recent_users)
|
||||
UNION
|
||||
(SELECT user_id FROM selected_users) ) u
|
||||
WHERE user_id < 15 AND user_id > 10
|
||||
ORDER BY user_id;
|
||||
|
||||
-- union all also works for views
|
||||
SELECT *
|
||||
FROM (
|
||||
(SELECT user_id FROM recent_users)
|
||||
UNION ALL
|
||||
(SELECT user_id FROM selected_users) ) u
|
||||
WHERE user_id < 15 AND user_id > 10
|
||||
ORDER BY user_id;
|
||||
|
||||
SELECT count(*)
|
||||
FROM (
|
||||
(SELECT user_id FROM recent_users)
|
||||
UNION
|
||||
(SELECT user_id FROM selected_users) ) u
|
||||
WHERE user_id < 15 AND user_id > 10;
|
||||
|
||||
-- expected this to work but it does not
|
||||
SELECT count(*)
|
||||
FROM (
|
||||
(SELECT user_id FROM recent_users)
|
||||
UNION ALL
|
||||
(SELECT user_id FROM selected_users) ) u
|
||||
WHERE user_id < 15 AND user_id > 10;
|
||||
|
||||
-- expand view definitions and re-run last 2 queries
|
||||
SELECT count(*)
|
||||
FROM (
|
||||
(SELECT user_id FROM (SELECT user_id, max(time) as lastseen FROM users_table
|
||||
GROUP BY user_id
|
||||
HAVING max(time) > '2014-01-21 05:45:49.978738'::timestamp order by 2 DESC) aa
|
||||
)
|
||||
UNION
|
||||
(SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 120 and value_1 <150) bb) ) u
|
||||
WHERE user_id < 15 AND user_id > 10;
|
||||
|
||||
SELECT count(*)
|
||||
FROM (
|
||||
(SELECT user_id FROM (SELECT user_id, max(time) as lastseen FROM users_table
|
||||
GROUP BY user_id
|
||||
HAVING max(time) > '2014-01-21 05:45:49.978738'::timestamp order by 2 DESC) aa
|
||||
)
|
||||
UNION ALL
|
||||
(SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 120 and value_1 <150) bb) ) u
|
||||
WHERE user_id < 15 AND user_id > 10;
|
||||
|
||||
-- test distinct
|
||||
-- distinct is supported if it is on a partition key
|
||||
CREATE VIEW distinct_user_with_value_1_15 AS SELECT DISTINCT user_id FROM users_table WHERE value_1 = 15;
|
||||
SELECT * FROM distinct_user_with_value_1_15 ORDER BY user_id;
|
||||
|
||||
-- distinct is not supported if it is on a non-partition key
|
||||
CREATE VIEW distinct_value_1 AS SELECT DISTINCT value_1 FROM users_table WHERE value_2 = 15;
|
||||
SELECT * FROM distinct_value_1;
|
||||
|
||||
-- CTEs are not supported even if they are on views
|
||||
CREATE VIEW cte_view_1 AS
|
||||
WITH c1 AS (SELECT * FROM users_table WHERE value_1 = 15) SELECT * FROM c1 WHERE value_2 < 500;
|
||||
|
||||
SELECT * FROM cte_view_1;
|
||||
|
||||
-- this is single shard query but still not supported since it has view + cte
|
||||
-- router planner can't detect it
|
||||
SELECT * FROM cte_view_1 WHERE user_id = 8;
|
||||
|
||||
-- if CTE itself prunes down to a single shard than the view is supported (router plannable)
|
||||
CREATE VIEW cte_view_2 AS
|
||||
WITH c1 AS (SELECT * FROM users_table WHERE user_id = 8) SELECT * FROM c1 WHERE value_1 = 15;
|
||||
SELECT * FROM cte_view_2;
|
||||
|
||||
CREATE VIEW router_view AS SELECT * FROM users_table WHERE user_id = 2;
|
||||
-- router plannable
|
||||
SELECT user_id FROM router_view GROUP BY 1;
|
||||
|
||||
-- There is a known issue with router plannable subqueries joined with non-router
|
||||
-- plannable subqueries. Following tests should be uncommented when we fix it
|
||||
|
||||
-- join a router view (not implement error)
|
||||
-- SELECT * FROM (SELECT user_id FROM router_view GROUP BY 1) rv JOIN recent_events USING (user_id);
|
||||
|
||||
-- it still does not work when converted to 2 subquery join
|
||||
-- SELECT * FROM (SELECT user_id FROM router_view GROUP BY 1) rv JOIN (SELECT * FROM recent_events) re USING (user_id);
|
||||
|
||||
-- views are completely removed and still it does not work
|
||||
-- SELECT * FROM
|
||||
-- (SELECT user_id FROM (SELECT * FROM users_table WHERE user_id = 2) rv1 GROUP BY 1) rv2
|
||||
-- JOIN (SELECT user_id, time FROM events_table
|
||||
-- WHERE time > '2014-01-20 01:45:49.978738'::timestamp) re
|
||||
-- USING (user_id);
|
||||
|
||||
-- views with limits
|
||||
CREATE VIEW recent_10_users AS
|
||||
SELECT user_id, max(time) as lastseen FROM users_table
|
||||
GROUP BY user_id
|
||||
ORDER BY lastseen DESC
|
||||
LIMIT 10;
|
||||
|
||||
-- this is not supported since it has limit in it and subquery_pushdown is not set
|
||||
SELECT * FROM recent_10_users;
|
||||
|
||||
SET citus.subquery_pushdown to ON;
|
||||
-- still not supported since outer query does not have limit
|
||||
-- it shows a different (subquery with single relation) error message
|
||||
SELECT * FROM recent_10_users;
|
||||
-- now it displays more correct error message
|
||||
SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id);
|
||||
|
||||
-- now both are supported when there is a limit on the outer most query
|
||||
SELECT * FROM recent_10_users ORDER BY lastseen DESC LIMIT 10;
|
||||
SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;
|
||||
|
||||
RESET citus.subquery_pushdown;
|
||||
|
||||
-- explain tests
|
||||
EXPLAIN (COSTS FALSE) SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER BY 1;
|
||||
|
||||
EXPLAIN (COSTS FALSE) SELECT *
|
||||
FROM (
|
||||
(SELECT user_id FROM recent_users)
|
||||
UNION
|
||||
(SELECT user_id FROM selected_users) ) u
|
||||
WHERE user_id < 15 AND user_id > 10
|
||||
ORDER BY user_id;
|
||||
|
||||
EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;
|
||||
SET citus.subquery_pushdown to ON;
|
||||
EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;
|
||||
|
||||
RESET citus.subquery_pushdown;
|
||||
|
||||
DROP VIEW recent_10_users;
|
||||
DROP VIEW router_view;
|
||||
DROP VIEW cte_view_2;
|
||||
DROP VIEW cte_view_1;
|
||||
DROP VIEW distinct_value_1;
|
||||
DROP VIEW distinct_user_with_value_1_15;
|
||||
DROP VIEW recent_selected_users;
|
||||
DROP VIEW selected_users;
|
||||
DROP VIEW recent_events;
|
||||
DROP VIEW recent_users;
|
||||
|
|
Loading…
Reference in New Issue