Add view regression tests for increased subquery coverage (#1348)

- joins between views and tables
- joins between views
- union/union all queries involving views
- views with limit
- explain queries with view
pull/1342/head
Murat Tuncer 2017-04-28 12:17:53 +03:00 committed by GitHub
parent 7f38f07368
commit 6b1de6914e
2 changed files with 903 additions and 0 deletions

View File

@ -230,6 +230,23 @@ SELECT l_suppkey, count(*) FROM
GROUP BY l_suppkey ORDER BY 2 DESC, 1 LIMIT 5;
ERROR: cannot perform distributed planning on this query
DETAIL: Subqueries without group by clause are not supported yet
-- repartition query on view with single table subquery
CREATE VIEW supp_count_view AS SELECT * FROM (SELECT l_suppkey, count(*) FROM lineitem_hash_part GROUP BY 1) s1;
SELECT * FROM supp_count_view ORDER BY 2 DESC, 1 LIMIT 10;
l_suppkey | count
-----------+-------
6104 | 8
1868 | 6
5532 | 6
5849 | 6
6169 | 6
6669 | 6
6692 | 6
7703 | 6
7869 | 6
8426 | 6
(10 rows)
SET citus.task_executor_type to DEFAULT;
-- create a view with aggregate
CREATE VIEW lineitems_by_shipping_method AS
@ -268,3 +285,613 @@ SELECT * FROM lineitems_by_orderkey WHERE l_orderkey = 100;
(1 row)
DROP TABLE temp_lineitem CASCADE;
DROP VIEW supp_count_view;
DROP VIEW lineitems_by_orderkey;
DROP VIEW lineitems_by_shipping_method;
DROP VIEW air_shipped_lineitems;
DROP VIEW priority_lineitem;
DROP VIEW priority_orders;
-- new tests for real time use case including views and subqueries
-- create view to display recent user who has an activity after a timestamp
CREATE VIEW recent_users AS
SELECT user_id, max(time) as lastseen FROM users_table
GROUP BY user_id
HAVING max(time) > '2014-01-21 05:45:49.978738'::timestamp order by 2 DESC;
SELECT * FROM recent_users;
user_id | lastseen
---------+---------------------------------
87 | Tue Jan 21 05:53:51.866813 2014
50 | Tue Jan 21 05:53:44.251016 2014
74 | Tue Jan 21 05:54:04.837808 2014
6 | Tue Jan 21 05:57:47.118755 2014
71 | Tue Jan 21 05:55:52.018461 2014
39 | Tue Jan 21 05:55:18.875997 2014
66 | Tue Jan 21 05:51:31.681997 2014
100 | Tue Jan 21 05:49:04.953009 2014
46 | Tue Jan 21 05:49:00.229807 2014
86 | Tue Jan 21 05:48:54.381334 2014
13 | Tue Jan 21 05:48:45.418146 2014
90 | Tue Jan 21 05:48:25.027491 2014
58 | Tue Jan 21 05:47:30.418553 2014
44 | Tue Jan 21 05:47:01.104523 2014
(14 rows)
-- create a view for recent_events
CREATE VIEW recent_events AS
SELECT user_id, time FROM events_table
WHERE time > '2014-01-20 01:45:49.978738'::timestamp;
SELECT count(*) FROM recent_events;
count
-------
1105
(1 row)
-- count number of events of recent_users
SELECT count(*) FROM recent_users ru JOIN events_table et ON (ru.user_id = et.user_id);
count
-------
1336
(1 row)
-- count number of events of per recent users order by count
SELECT ru.user_id, count(*)
FROM recent_users ru
JOIN events_table et
ON (ru.user_id = et.user_id)
GROUP BY ru.user_id
ORDER BY 2 DESC, 1;
user_id | count
---------+-------
13 | 118
44 | 109
90 | 109
87 | 105
46 | 103
86 | 100
66 | 98
39 | 96
71 | 95
74 | 93
6 | 89
58 | 87
50 | 79
100 | 55
(14 rows)
-- the same query with a left join however, it would still generate the same result
SELECT ru.user_id, count(*)
FROM recent_users ru
LEFT JOIN events_table et
ON (ru.user_id = et.user_id)
GROUP BY ru.user_id
ORDER BY 2 DESC, 1;
user_id | count
---------+-------
13 | 118
44 | 109
90 | 109
87 | 105
46 | 103
86 | 100
66 | 98
39 | 96
71 | 95
74 | 93
6 | 89
58 | 87
50 | 79
100 | 55
(14 rows)
-- query wrapped inside a subquery, it needs another top level order by
SELECT * FROM
(SELECT ru.user_id, count(*)
FROM recent_users ru
JOIN events_table et
ON (ru.user_id = et.user_id)
GROUP BY ru.user_id
ORDER BY 2 DESC, 1) s1
ORDER BY 2 DESC, 1;
user_id | count
---------+-------
13 | 118
44 | 109
90 | 109
87 | 105
46 | 103
86 | 100
66 | 98
39 | 96
71 | 95
74 | 93
6 | 89
58 | 87
50 | 79
100 | 55
(14 rows)
-- non-partition key joins are not supported inside subquery
SELECT * FROM
(SELECT ru.user_id, count(*)
FROM recent_users ru
JOIN events_table et
ON (ru.user_id = et.event_type)
GROUP BY ru.user_id
ORDER BY 2 DESC, 1) s1
ORDER BY 2 DESC, 1;
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
-- join between views
-- recent users who has an event in recent events
SELECT ru.user_id FROM recent_users ru JOIN recent_events re USING(user_id) GROUP BY ru.user_id ORDER BY ru.user_id;
user_id
---------
6
13
39
44
46
50
58
66
71
74
86
87
90
100
(14 rows)
-- outer join inside a subquery
-- recent_events who are not done by recent users
SELECT count(*) FROM (
SELECT re.*, ru.user_id AS recent_user
FROM recent_events re LEFT JOIN recent_users ru USING(user_id)) reu
WHERE recent_user IS NULL;
count
-------
957
(1 row)
-- same query with anti-join
SELECT count(*)
FROM recent_events re LEFT JOIN recent_users ru ON(ru.user_id = re.user_id)
WHERE ru.user_id IS NULL;
count
-------
957
(1 row)
-- join between view and table
-- users who has recent activity and they have an entry with value_1 is less than 15
SELECT ut.* FROM recent_users ru JOIN users_table ut USING (user_id) WHERE ut.value_1 < 15 ORDER BY 1,2;
user_id | time | value_1 | value_2 | value_3 | value_4
---------+---------------------------------+---------+---------+---------+---------
6 | Mon Jan 13 05:30:08.289267 2014 | 12 | 140 | 618 |
6 | Thu Jan 16 15:17:16.779695 2014 | 6 | 978 | 430 |
6 | Sun Jan 19 06:09:39.900888 2014 | 3 | 908 | 688 |
13 | Sun Jan 19 22:09:26.256209 2014 | 2 | 755 | 584 |
39 | Wed Jan 15 05:46:51.48765 2014 | 14 | 657 | 137 |
39 | Sun Jan 19 11:26:47.45937 2014 | 12 | 118 | 165 |
44 | Wed Jan 15 14:23:52.532426 2014 | 8 | 204 | 735 |
44 | Sun Jan 19 05:53:34.829093 2014 | 4 | 758 | 205 |
46 | Mon Jan 13 20:39:11.211169 2014 | 0 | 235 | 475 |
46 | Wed Jan 15 09:14:57.471944 2014 | 2 | 407 | 664 |
50 | Sat Jan 11 11:07:13.089216 2014 | 6 | 292 | 425 |
58 | Sun Jan 19 22:36:14.795396 2014 | 2 | 86 | 311 |
66 | Tue Jan 14 20:16:31.219213 2014 | 14 | 347 | 655 |
74 | Tue Jan 21 01:38:39.570986 2014 | 9 | 334 | 642 |
86 | Sun Jan 19 06:18:51.466578 2014 | 14 | 712 | 490 |
87 | Sat Jan 11 20:46:28.439073 2014 | 2 | 528 | 311 |
90 | Sun Jan 12 21:37:30.778206 2014 | 11 | 458 | 377 |
100 | Sun Jan 19 22:32:08.284043 2014 | 2 | 384 | 149 |
(18 rows)
-- determine if a recent user has done a given event type or not
SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event
FROM recent_users ru
LEFT JOIN events_table et
ON(ru.user_id = et.user_id AND et.event_type = 625)
ORDER BY 2 DESC, 1;
user_id | done_event
---------+------------
6 | YES
13 | NO
39 | NO
44 | NO
46 | NO
50 | NO
58 | NO
66 | NO
71 | NO
74 | NO
86 | NO
87 | NO
90 | NO
100 | NO
(14 rows)
-- view vs table join wrapped inside a subquery
SELECT * FROM
(SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event
FROM recent_users ru
LEFT JOIN events_table et
ON(ru.user_id = et.user_id AND et.event_type = 625)
) s1
ORDER BY 2 DESC, 1;
user_id | done_event
---------+------------
6 | YES
13 | NO
39 | NO
44 | NO
46 | NO
50 | NO
58 | NO
66 | NO
71 | NO
74 | NO
86 | NO
87 | NO
90 | NO
100 | NO
(14 rows)
-- event vs table non-partition-key join is not supported
SELECT * FROM
(SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event
FROM recent_users ru
LEFT JOIN events_table et
ON(ru.user_id = et.event_type)
) s1
ORDER BY 2 DESC, 1;
ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys
DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator.
-- create a select only view
CREATE VIEW selected_users AS SELECT * FROM users_table WHERE value_1 >= 120 and value_1 <150;
CREATE VIEW recent_selected_users AS SELECT su.* FROM selected_users su JOIN recent_users ru USING(user_id);
SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER BY 1;
user_id
---------
6
13
39
44
46
50
58
66
71
74
86
90
(12 rows)
-- this would be supported when we implement where partition_key in (subquery) support
SELECT et.* FROM events_table et WHERE et.user_id IN (SELECT user_id FROM recent_selected_users);
ERROR: could not run distributed query with subquery outside the FROM clause
HINT: Consider using an equality filter on the distributed table's partition column.
-- it is supported when it is a router query
SELECT count(*) FROM events_table et WHERE et.user_id IN (SELECT user_id FROM recent_selected_users WHERE user_id = 90);
count
-------
109
(1 row)
-- expected this to work but it did not
(SELECT user_id FROM recent_users)
UNION
(SELECT user_id FROM selected_users);
ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT
HINT: Consider using an equality filter on the distributed table's partition column.
-- wrapping it inside a SELECT * works
SELECT *
FROM (
(SELECT user_id FROM recent_users)
UNION
(SELECT user_id FROM selected_users) ) u
WHERE user_id < 15 AND user_id > 10
ORDER BY user_id;
user_id
---------
11
12
13
14
(4 rows)
-- union all also works for views
SELECT *
FROM (
(SELECT user_id FROM recent_users)
UNION ALL
(SELECT user_id FROM selected_users) ) u
WHERE user_id < 15 AND user_id > 10
ORDER BY user_id;
user_id
---------
11
11
11
12
12
12
12
12
12
13
13
13
13
13
14
(15 rows)
SELECT count(*)
FROM (
(SELECT user_id FROM recent_users)
UNION
(SELECT user_id FROM selected_users) ) u
WHERE user_id < 15 AND user_id > 10;
count
-------
4
(1 row)
-- expected this to work but it does not
SELECT count(*)
FROM (
(SELECT user_id FROM recent_users)
UNION ALL
(SELECT user_id FROM selected_users) ) u
WHERE user_id < 15 AND user_id > 10;
ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position
DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list.
-- expand view definitions and re-run last 2 queries
SELECT count(*)
FROM (
(SELECT user_id FROM (SELECT user_id, max(time) as lastseen FROM users_table
GROUP BY user_id
HAVING max(time) > '2014-01-21 05:45:49.978738'::timestamp order by 2 DESC) aa
)
UNION
(SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 120 and value_1 <150) bb) ) u
WHERE user_id < 15 AND user_id > 10;
count
-------
4
(1 row)
SELECT count(*)
FROM (
(SELECT user_id FROM (SELECT user_id, max(time) as lastseen FROM users_table
GROUP BY user_id
HAVING max(time) > '2014-01-21 05:45:49.978738'::timestamp order by 2 DESC) aa
)
UNION ALL
(SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 120 and value_1 <150) bb) ) u
WHERE user_id < 15 AND user_id > 10;
ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position
DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list.
-- test distinct
-- distinct is supported if it is on a partition key
CREATE VIEW distinct_user_with_value_1_15 AS SELECT DISTINCT user_id FROM users_table WHERE value_1 = 15;
SELECT * FROM distinct_user_with_value_1_15 ORDER BY user_id;
user_id
---------
7
8
35
42
46
53
70
82
87
88
96
(11 rows)
-- distinct is not supported if it is on a non-partition key
CREATE VIEW distinct_value_1 AS SELECT DISTINCT value_1 FROM users_table WHERE value_2 = 15;
SELECT * FROM distinct_value_1;
ERROR: cannot perform distributed planning on this query
DETAIL: Subqueries without group by clause are not supported yet
-- CTEs are not supported even if they are on views
CREATE VIEW cte_view_1 AS
WITH c1 AS (SELECT * FROM users_table WHERE value_1 = 15) SELECT * FROM c1 WHERE value_2 < 500;
SELECT * FROM cte_view_1;
ERROR: cannot push down this subquery
DETAIL: Table expressions other than simple relations and subqueries are currently unsupported
-- this is single shard query but still not supported since it has view + cte
-- router planner can't detect it
SELECT * FROM cte_view_1 WHERE user_id = 8;
ERROR: cannot push down this subquery
DETAIL: Table expressions other than simple relations and subqueries are currently unsupported
-- if CTE itself prunes down to a single shard than the view is supported (router plannable)
CREATE VIEW cte_view_2 AS
WITH c1 AS (SELECT * FROM users_table WHERE user_id = 8) SELECT * FROM c1 WHERE value_1 = 15;
SELECT * FROM cte_view_2;
user_id | time | value_1 | value_2 | value_3 | value_4
---------+---------------------------------+---------+---------+---------+---------
8 | Tue Jan 21 00:52:36.967785 2014 | 15 | 10 | 868 |
(1 row)
CREATE VIEW router_view AS SELECT * FROM users_table WHERE user_id = 2;
-- router plannable
SELECT user_id FROM router_view GROUP BY 1;
user_id
---------
2
(1 row)
-- There is a known issue with router plannable subqueries joined with non-router
-- plannable subqueries. Following tests should be uncommented when we fix it
-- join a router view (not implement error)
-- SELECT * FROM (SELECT user_id FROM router_view GROUP BY 1) rv JOIN recent_events USING (user_id);
-- it still does not work when converted to 2 subquery join
-- SELECT * FROM (SELECT user_id FROM router_view GROUP BY 1) rv JOIN (SELECT * FROM recent_events) re USING (user_id);
-- views are completely removed and still it does not work
-- SELECT * FROM
-- (SELECT user_id FROM (SELECT * FROM users_table WHERE user_id = 2) rv1 GROUP BY 1) rv2
-- JOIN (SELECT user_id, time FROM events_table
-- WHERE time > '2014-01-20 01:45:49.978738'::timestamp) re
-- USING (user_id);
-- views with limits
CREATE VIEW recent_10_users AS
SELECT user_id, max(time) as lastseen FROM users_table
GROUP BY user_id
ORDER BY lastseen DESC
LIMIT 10;
-- this is not supported since it has limit in it and subquery_pushdown is not set
SELECT * FROM recent_10_users;
ERROR: cannot perform distributed planning on this query
DETAIL: Subqueries with limit are not supported yet
SET citus.subquery_pushdown to ON;
-- still not supported since outer query does not have limit
-- it shows a different (subquery with single relation) error message
SELECT * FROM recent_10_users;
ERROR: cannot perform distributed planning on this query
DETAIL: Subqueries with limit are not supported yet
-- now it displays more correct error message
SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id);
ERROR: cannot push down this subquery
DETAIL: Limit in subquery without limit in the outermost query is unsupported
-- now both are supported when there is a limit on the outer most query
SELECT * FROM recent_10_users ORDER BY lastseen DESC LIMIT 10;
user_id | lastseen
---------+---------------------------------
6 | Tue Jan 21 05:57:47.118755 2014
71 | Tue Jan 21 05:55:52.018461 2014
39 | Tue Jan 21 05:55:18.875997 2014
74 | Tue Jan 21 05:54:04.837808 2014
87 | Tue Jan 21 05:53:51.866813 2014
50 | Tue Jan 21 05:53:44.251016 2014
66 | Tue Jan 21 05:51:31.681997 2014
100 | Tue Jan 21 05:49:04.953009 2014
46 | Tue Jan 21 05:49:00.229807 2014
86 | Tue Jan 21 05:48:54.381334 2014
(10 rows)
SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;
user_id | time | event_type | value_2 | value_3 | value_4
---------+---------------------------------+------------+---------+---------+---------
65 | Tue Jan 21 05:56:52.624231 2014 | 241 | 30 | 543 |
42 | Tue Jan 21 05:46:35.158342 2014 | 761 | 877 | 335 |
54 | Tue Jan 21 05:46:19.103645 2014 | 595 | 477 | 996 |
44 | Tue Jan 21 05:43:00.838945 2014 | 682 | 641 | 448 |
27 | Tue Jan 21 05:34:10.935865 2014 | 912 | 605 | 989 |
61 | Tue Jan 21 05:25:27.452065 2014 | 392 | 472 | 925 |
19 | Tue Jan 21 05:23:09.26298 2014 | 202 | 888 | 640 |
65 | Tue Jan 21 05:22:56.725329 2014 | 519 | 457 | 259 |
27 | Tue Jan 21 05:19:14.38026 2014 | 19 | 19 | 205 |
11 | Tue Jan 21 05:15:14.879531 2014 | 459 | 545 | 80 |
(10 rows)
RESET citus.subquery_pushdown;
-- explain tests
EXPLAIN (COSTS FALSE) SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER BY 1;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------
Sort
Sort Key: remote_scan.user_id
-> HashAggregate
Group Key: remote_scan.user_id
-> Custom Scan (Citus Real-Time)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: users_table.user_id
-> Hash Join
Hash Cond: (users_table.user_id = ru.user_id)
-> Bitmap Heap Scan on users_table_1400000 users_table
Recheck Cond: ((value_1 >= 120) AND (value_1 < 150))
-> Bitmap Index Scan on is_index3_1400000
Index Cond: ((value_1 >= 120) AND (value_1 < 150))
-> Hash
-> Subquery Scan on ru
-> Sort
Sort Key: (max(users_table_1."time")) DESC
-> HashAggregate
Group Key: users_table_1.user_id
Filter: (max(users_table_1."time") > '2014-01-21 05:45:49.978738'::timestamp without time zone)
-> Seq Scan on users_table_1400000 users_table_1
(25 rows)
EXPLAIN (COSTS FALSE) SELECT *
FROM (
(SELECT user_id FROM recent_users)
UNION
(SELECT user_id FROM selected_users) ) u
WHERE user_id < 15 AND user_id > 10
ORDER BY user_id;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------
Sort
Sort Key: remote_scan.user_id
-> Custom Scan (Citus Real-Time)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> Unique
-> Sort
Sort Key: recent_users.user_id
-> Append
-> Subquery Scan on recent_users
-> Sort
Sort Key: (max(users_table."time")) DESC
-> GroupAggregate
Group Key: users_table.user_id
Filter: (max(users_table."time") > '2014-01-21 05:45:49.978738'::timestamp without time zone)
-> Index Scan using is_index1_1400000 on users_table_1400000 users_table
Index Cond: ((user_id < 15) AND (user_id > 10))
-> Index Scan using is_index1_1400000 on users_table_1400000 users_table_1
Index Cond: ((user_id < 15) AND (user_id > 10))
Filter: ((value_1 >= 120) AND (value_1 < 150))
(22 rows)
EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;
ERROR: cannot push down this subquery
DETAIL: Limit in subquery is currently unsupported
SET citus.subquery_pushdown to ON;
EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------
Limit
-> Sort
Sort Key: remote_scan."time" DESC
-> Custom Scan (Citus Real-Time)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> Limit
-> Sort
Sort Key: et."time" DESC
-> Hash Join
Hash Cond: (et.user_id = recent_10_users.user_id)
-> Seq Scan on events_table_1400004 et
-> Hash
-> Subquery Scan on recent_10_users
-> Limit
-> Sort
Sort Key: (max(users_table."time")) DESC
-> HashAggregate
Group Key: users_table.user_id
-> Seq Scan on users_table_1400000 users_table
(22 rows)
RESET citus.subquery_pushdown;
DROP VIEW recent_10_users;
DROP VIEW router_view;
DROP VIEW cte_view_2;
DROP VIEW cte_view_1;
DROP VIEW distinct_value_1;
DROP VIEW distinct_user_with_value_1_15;
DROP VIEW recent_selected_users;
DROP VIEW selected_users;
DROP VIEW recent_events;
DROP VIEW recent_users;

View File

@ -117,6 +117,10 @@ SELECT l_suppkey, count(*) FROM
GROUP BY l_suppkey, l_shipdate) supps
GROUP BY l_suppkey ORDER BY 2 DESC, 1 LIMIT 5;
-- repartition query on view with single table subquery
CREATE VIEW supp_count_view AS SELECT * FROM (SELECT l_suppkey, count(*) FROM lineitem_hash_part GROUP BY 1) s1;
SELECT * FROM supp_count_view ORDER BY 2 DESC, 1 LIMIT 10;
SET citus.task_executor_type to DEFAULT;
-- create a view with aggregate
@ -141,3 +145,275 @@ SELECT * FROM lineitems_by_orderkey ORDER BY 2 DESC, 1 ASC LIMIT 10;
SELECT * FROM lineitems_by_orderkey WHERE l_orderkey = 100;
DROP TABLE temp_lineitem CASCADE;
DROP VIEW supp_count_view;
DROP VIEW lineitems_by_orderkey;
DROP VIEW lineitems_by_shipping_method;
DROP VIEW air_shipped_lineitems;
DROP VIEW priority_lineitem;
DROP VIEW priority_orders;
-- new tests for real time use case including views and subqueries
-- create view to display recent user who has an activity after a timestamp
CREATE VIEW recent_users AS
SELECT user_id, max(time) as lastseen FROM users_table
GROUP BY user_id
HAVING max(time) > '2014-01-21 05:45:49.978738'::timestamp order by 2 DESC;
SELECT * FROM recent_users;
-- create a view for recent_events
CREATE VIEW recent_events AS
SELECT user_id, time FROM events_table
WHERE time > '2014-01-20 01:45:49.978738'::timestamp;
SELECT count(*) FROM recent_events;
-- count number of events of recent_users
SELECT count(*) FROM recent_users ru JOIN events_table et ON (ru.user_id = et.user_id);
-- count number of events of per recent users order by count
SELECT ru.user_id, count(*)
FROM recent_users ru
JOIN events_table et
ON (ru.user_id = et.user_id)
GROUP BY ru.user_id
ORDER BY 2 DESC, 1;
-- the same query with a left join however, it would still generate the same result
SELECT ru.user_id, count(*)
FROM recent_users ru
LEFT JOIN events_table et
ON (ru.user_id = et.user_id)
GROUP BY ru.user_id
ORDER BY 2 DESC, 1;
-- query wrapped inside a subquery, it needs another top level order by
SELECT * FROM
(SELECT ru.user_id, count(*)
FROM recent_users ru
JOIN events_table et
ON (ru.user_id = et.user_id)
GROUP BY ru.user_id
ORDER BY 2 DESC, 1) s1
ORDER BY 2 DESC, 1;
-- non-partition key joins are not supported inside subquery
SELECT * FROM
(SELECT ru.user_id, count(*)
FROM recent_users ru
JOIN events_table et
ON (ru.user_id = et.event_type)
GROUP BY ru.user_id
ORDER BY 2 DESC, 1) s1
ORDER BY 2 DESC, 1;
-- join between views
-- recent users who has an event in recent events
SELECT ru.user_id FROM recent_users ru JOIN recent_events re USING(user_id) GROUP BY ru.user_id ORDER BY ru.user_id;
-- outer join inside a subquery
-- recent_events who are not done by recent users
SELECT count(*) FROM (
SELECT re.*, ru.user_id AS recent_user
FROM recent_events re LEFT JOIN recent_users ru USING(user_id)) reu
WHERE recent_user IS NULL;
-- same query with anti-join
SELECT count(*)
FROM recent_events re LEFT JOIN recent_users ru ON(ru.user_id = re.user_id)
WHERE ru.user_id IS NULL;
-- join between view and table
-- users who has recent activity and they have an entry with value_1 is less than 15
SELECT ut.* FROM recent_users ru JOIN users_table ut USING (user_id) WHERE ut.value_1 < 15 ORDER BY 1,2;
-- determine if a recent user has done a given event type or not
SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event
FROM recent_users ru
LEFT JOIN events_table et
ON(ru.user_id = et.user_id AND et.event_type = 625)
ORDER BY 2 DESC, 1;
-- view vs table join wrapped inside a subquery
SELECT * FROM
(SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event
FROM recent_users ru
LEFT JOIN events_table et
ON(ru.user_id = et.user_id AND et.event_type = 625)
) s1
ORDER BY 2 DESC, 1;
-- event vs table non-partition-key join is not supported
SELECT * FROM
(SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event
FROM recent_users ru
LEFT JOIN events_table et
ON(ru.user_id = et.event_type)
) s1
ORDER BY 2 DESC, 1;
-- create a select only view
CREATE VIEW selected_users AS SELECT * FROM users_table WHERE value_1 >= 120 and value_1 <150;
CREATE VIEW recent_selected_users AS SELECT su.* FROM selected_users su JOIN recent_users ru USING(user_id);
SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER BY 1;
-- this would be supported when we implement where partition_key in (subquery) support
SELECT et.* FROM events_table et WHERE et.user_id IN (SELECT user_id FROM recent_selected_users);
-- it is supported when it is a router query
SELECT count(*) FROM events_table et WHERE et.user_id IN (SELECT user_id FROM recent_selected_users WHERE user_id = 90);
-- expected this to work but it did not
(SELECT user_id FROM recent_users)
UNION
(SELECT user_id FROM selected_users);
-- wrapping it inside a SELECT * works
SELECT *
FROM (
(SELECT user_id FROM recent_users)
UNION
(SELECT user_id FROM selected_users) ) u
WHERE user_id < 15 AND user_id > 10
ORDER BY user_id;
-- union all also works for views
SELECT *
FROM (
(SELECT user_id FROM recent_users)
UNION ALL
(SELECT user_id FROM selected_users) ) u
WHERE user_id < 15 AND user_id > 10
ORDER BY user_id;
SELECT count(*)
FROM (
(SELECT user_id FROM recent_users)
UNION
(SELECT user_id FROM selected_users) ) u
WHERE user_id < 15 AND user_id > 10;
-- expected this to work but it does not
SELECT count(*)
FROM (
(SELECT user_id FROM recent_users)
UNION ALL
(SELECT user_id FROM selected_users) ) u
WHERE user_id < 15 AND user_id > 10;
-- expand view definitions and re-run last 2 queries
SELECT count(*)
FROM (
(SELECT user_id FROM (SELECT user_id, max(time) as lastseen FROM users_table
GROUP BY user_id
HAVING max(time) > '2014-01-21 05:45:49.978738'::timestamp order by 2 DESC) aa
)
UNION
(SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 120 and value_1 <150) bb) ) u
WHERE user_id < 15 AND user_id > 10;
SELECT count(*)
FROM (
(SELECT user_id FROM (SELECT user_id, max(time) as lastseen FROM users_table
GROUP BY user_id
HAVING max(time) > '2014-01-21 05:45:49.978738'::timestamp order by 2 DESC) aa
)
UNION ALL
(SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 120 and value_1 <150) bb) ) u
WHERE user_id < 15 AND user_id > 10;
-- test distinct
-- distinct is supported if it is on a partition key
CREATE VIEW distinct_user_with_value_1_15 AS SELECT DISTINCT user_id FROM users_table WHERE value_1 = 15;
SELECT * FROM distinct_user_with_value_1_15 ORDER BY user_id;
-- distinct is not supported if it is on a non-partition key
CREATE VIEW distinct_value_1 AS SELECT DISTINCT value_1 FROM users_table WHERE value_2 = 15;
SELECT * FROM distinct_value_1;
-- CTEs are not supported even if they are on views
CREATE VIEW cte_view_1 AS
WITH c1 AS (SELECT * FROM users_table WHERE value_1 = 15) SELECT * FROM c1 WHERE value_2 < 500;
SELECT * FROM cte_view_1;
-- this is single shard query but still not supported since it has view + cte
-- router planner can't detect it
SELECT * FROM cte_view_1 WHERE user_id = 8;
-- if CTE itself prunes down to a single shard than the view is supported (router plannable)
CREATE VIEW cte_view_2 AS
WITH c1 AS (SELECT * FROM users_table WHERE user_id = 8) SELECT * FROM c1 WHERE value_1 = 15;
SELECT * FROM cte_view_2;
CREATE VIEW router_view AS SELECT * FROM users_table WHERE user_id = 2;
-- router plannable
SELECT user_id FROM router_view GROUP BY 1;
-- There is a known issue with router plannable subqueries joined with non-router
-- plannable subqueries. Following tests should be uncommented when we fix it
-- join a router view (not implement error)
-- SELECT * FROM (SELECT user_id FROM router_view GROUP BY 1) rv JOIN recent_events USING (user_id);
-- it still does not work when converted to 2 subquery join
-- SELECT * FROM (SELECT user_id FROM router_view GROUP BY 1) rv JOIN (SELECT * FROM recent_events) re USING (user_id);
-- views are completely removed and still it does not work
-- SELECT * FROM
-- (SELECT user_id FROM (SELECT * FROM users_table WHERE user_id = 2) rv1 GROUP BY 1) rv2
-- JOIN (SELECT user_id, time FROM events_table
-- WHERE time > '2014-01-20 01:45:49.978738'::timestamp) re
-- USING (user_id);
-- views with limits
CREATE VIEW recent_10_users AS
SELECT user_id, max(time) as lastseen FROM users_table
GROUP BY user_id
ORDER BY lastseen DESC
LIMIT 10;
-- this is not supported since it has limit in it and subquery_pushdown is not set
SELECT * FROM recent_10_users;
SET citus.subquery_pushdown to ON;
-- still not supported since outer query does not have limit
-- it shows a different (subquery with single relation) error message
SELECT * FROM recent_10_users;
-- now it displays more correct error message
SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id);
-- now both are supported when there is a limit on the outer most query
SELECT * FROM recent_10_users ORDER BY lastseen DESC LIMIT 10;
SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;
RESET citus.subquery_pushdown;
-- explain tests
EXPLAIN (COSTS FALSE) SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER BY 1;
EXPLAIN (COSTS FALSE) SELECT *
FROM (
(SELECT user_id FROM recent_users)
UNION
(SELECT user_id FROM selected_users) ) u
WHERE user_id < 15 AND user_id > 10
ORDER BY user_id;
EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;
SET citus.subquery_pushdown to ON;
EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;
RESET citus.subquery_pushdown;
DROP VIEW recent_10_users;
DROP VIEW router_view;
DROP VIEW cte_view_2;
DROP VIEW cte_view_1;
DROP VIEW distinct_value_1;
DROP VIEW distinct_user_with_value_1_15;
DROP VIEW recent_selected_users;
DROP VIEW selected_users;
DROP VIEW recent_events;
DROP VIEW recent_users;