diff --git a/src/test/regress/expected/multi_view.out b/src/test/regress/expected/multi_view.out index 159ca78f2..a8d4c9b45 100644 --- a/src/test/regress/expected/multi_view.out +++ b/src/test/regress/expected/multi_view.out @@ -230,6 +230,23 @@ SELECT l_suppkey, count(*) FROM GROUP BY l_suppkey ORDER BY 2 DESC, 1 LIMIT 5; ERROR: cannot perform distributed planning on this query DETAIL: Subqueries without group by clause are not supported yet +-- repartition query on view with single table subquery +CREATE VIEW supp_count_view AS SELECT * FROM (SELECT l_suppkey, count(*) FROM lineitem_hash_part GROUP BY 1) s1; +SELECT * FROM supp_count_view ORDER BY 2 DESC, 1 LIMIT 10; + l_suppkey | count +-----------+------- + 6104 | 8 + 1868 | 6 + 5532 | 6 + 5849 | 6 + 6169 | 6 + 6669 | 6 + 6692 | 6 + 7703 | 6 + 7869 | 6 + 8426 | 6 +(10 rows) + SET citus.task_executor_type to DEFAULT; -- create a view with aggregate CREATE VIEW lineitems_by_shipping_method AS @@ -268,3 +285,613 @@ SELECT * FROM lineitems_by_orderkey WHERE l_orderkey = 100; (1 row) DROP TABLE temp_lineitem CASCADE; +DROP VIEW supp_count_view; +DROP VIEW lineitems_by_orderkey; +DROP VIEW lineitems_by_shipping_method; +DROP VIEW air_shipped_lineitems; +DROP VIEW priority_lineitem; +DROP VIEW priority_orders; +-- new tests for real time use case including views and subqueries +-- create view to display recent user who has an activity after a timestamp +CREATE VIEW recent_users AS + SELECT user_id, max(time) as lastseen FROM users_table + GROUP BY user_id + HAVING max(time) > '2014-01-21 05:45:49.978738'::timestamp order by 2 DESC; +SELECT * FROM recent_users; + user_id | lastseen +---------+--------------------------------- + 87 | Tue Jan 21 05:53:51.866813 2014 + 50 | Tue Jan 21 05:53:44.251016 2014 + 74 | Tue Jan 21 05:54:04.837808 2014 + 6 | Tue Jan 21 05:57:47.118755 2014 + 71 | Tue Jan 21 05:55:52.018461 2014 + 39 | Tue Jan 21 05:55:18.875997 2014 + 66 | Tue Jan 21 05:51:31.681997 2014 + 100 | Tue Jan 21 05:49:04.953009 2014 + 46 | Tue Jan 21 05:49:00.229807 2014 + 86 | Tue Jan 21 05:48:54.381334 2014 + 13 | Tue Jan 21 05:48:45.418146 2014 + 90 | Tue Jan 21 05:48:25.027491 2014 + 58 | Tue Jan 21 05:47:30.418553 2014 + 44 | Tue Jan 21 05:47:01.104523 2014 +(14 rows) + +-- create a view for recent_events +CREATE VIEW recent_events AS + SELECT user_id, time FROM events_table + WHERE time > '2014-01-20 01:45:49.978738'::timestamp; +SELECT count(*) FROM recent_events; + count +------- + 1105 +(1 row) + +-- count number of events of recent_users +SELECT count(*) FROM recent_users ru JOIN events_table et ON (ru.user_id = et.user_id); + count +------- + 1336 +(1 row) + +-- count number of events of per recent users order by count +SELECT ru.user_id, count(*) + FROM recent_users ru + JOIN events_table et + ON (ru.user_id = et.user_id) + GROUP BY ru.user_id + ORDER BY 2 DESC, 1; + user_id | count +---------+------- + 13 | 118 + 44 | 109 + 90 | 109 + 87 | 105 + 46 | 103 + 86 | 100 + 66 | 98 + 39 | 96 + 71 | 95 + 74 | 93 + 6 | 89 + 58 | 87 + 50 | 79 + 100 | 55 +(14 rows) + +-- the same query with a left join however, it would still generate the same result +SELECT ru.user_id, count(*) + FROM recent_users ru + LEFT JOIN events_table et + ON (ru.user_id = et.user_id) + GROUP BY ru.user_id + ORDER BY 2 DESC, 1; + user_id | count +---------+------- + 13 | 118 + 44 | 109 + 90 | 109 + 87 | 105 + 46 | 103 + 86 | 100 + 66 | 98 + 39 | 96 + 71 | 95 + 74 | 93 + 6 | 89 + 58 | 87 + 50 | 79 + 100 | 55 +(14 rows) + +-- query wrapped inside a subquery, it needs another top level order by +SELECT * FROM + (SELECT ru.user_id, count(*) + FROM recent_users ru + JOIN events_table et + ON (ru.user_id = et.user_id) + GROUP BY ru.user_id + ORDER BY 2 DESC, 1) s1 +ORDER BY 2 DESC, 1; + user_id | count +---------+------- + 13 | 118 + 44 | 109 + 90 | 109 + 87 | 105 + 46 | 103 + 86 | 100 + 66 | 98 + 39 | 96 + 71 | 95 + 74 | 93 + 6 | 89 + 58 | 87 + 50 | 79 + 100 | 55 +(14 rows) + +-- non-partition key joins are not supported inside subquery +SELECT * FROM + (SELECT ru.user_id, count(*) + FROM recent_users ru + JOIN events_table et + ON (ru.user_id = et.event_type) + GROUP BY ru.user_id + ORDER BY 2 DESC, 1) s1 +ORDER BY 2 DESC, 1; +ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys +DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +-- join between views +-- recent users who has an event in recent events +SELECT ru.user_id FROM recent_users ru JOIN recent_events re USING(user_id) GROUP BY ru.user_id ORDER BY ru.user_id; + user_id +--------- + 6 + 13 + 39 + 44 + 46 + 50 + 58 + 66 + 71 + 74 + 86 + 87 + 90 + 100 +(14 rows) + +-- outer join inside a subquery +-- recent_events who are not done by recent users +SELECT count(*) FROM ( + SELECT re.*, ru.user_id AS recent_user + FROM recent_events re LEFT JOIN recent_users ru USING(user_id)) reu + WHERE recent_user IS NULL; + count +------- + 957 +(1 row) + +-- same query with anti-join +SELECT count(*) + FROM recent_events re LEFT JOIN recent_users ru ON(ru.user_id = re.user_id) + WHERE ru.user_id IS NULL; + count +------- + 957 +(1 row) + +-- join between view and table +-- users who has recent activity and they have an entry with value_1 is less than 15 +SELECT ut.* FROM recent_users ru JOIN users_table ut USING (user_id) WHERE ut.value_1 < 15 ORDER BY 1,2; + user_id | time | value_1 | value_2 | value_3 | value_4 +---------+---------------------------------+---------+---------+---------+--------- + 6 | Mon Jan 13 05:30:08.289267 2014 | 12 | 140 | 618 | + 6 | Thu Jan 16 15:17:16.779695 2014 | 6 | 978 | 430 | + 6 | Sun Jan 19 06:09:39.900888 2014 | 3 | 908 | 688 | + 13 | Sun Jan 19 22:09:26.256209 2014 | 2 | 755 | 584 | + 39 | Wed Jan 15 05:46:51.48765 2014 | 14 | 657 | 137 | + 39 | Sun Jan 19 11:26:47.45937 2014 | 12 | 118 | 165 | + 44 | Wed Jan 15 14:23:52.532426 2014 | 8 | 204 | 735 | + 44 | Sun Jan 19 05:53:34.829093 2014 | 4 | 758 | 205 | + 46 | Mon Jan 13 20:39:11.211169 2014 | 0 | 235 | 475 | + 46 | Wed Jan 15 09:14:57.471944 2014 | 2 | 407 | 664 | + 50 | Sat Jan 11 11:07:13.089216 2014 | 6 | 292 | 425 | + 58 | Sun Jan 19 22:36:14.795396 2014 | 2 | 86 | 311 | + 66 | Tue Jan 14 20:16:31.219213 2014 | 14 | 347 | 655 | + 74 | Tue Jan 21 01:38:39.570986 2014 | 9 | 334 | 642 | + 86 | Sun Jan 19 06:18:51.466578 2014 | 14 | 712 | 490 | + 87 | Sat Jan 11 20:46:28.439073 2014 | 2 | 528 | 311 | + 90 | Sun Jan 12 21:37:30.778206 2014 | 11 | 458 | 377 | + 100 | Sun Jan 19 22:32:08.284043 2014 | 2 | 384 | 149 | +(18 rows) + +-- determine if a recent user has done a given event type or not +SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event + FROM recent_users ru + LEFT JOIN events_table et + ON(ru.user_id = et.user_id AND et.event_type = 625) + ORDER BY 2 DESC, 1; + user_id | done_event +---------+------------ + 6 | YES + 13 | NO + 39 | NO + 44 | NO + 46 | NO + 50 | NO + 58 | NO + 66 | NO + 71 | NO + 74 | NO + 86 | NO + 87 | NO + 90 | NO + 100 | NO +(14 rows) + +-- view vs table join wrapped inside a subquery +SELECT * FROM + (SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event + FROM recent_users ru + LEFT JOIN events_table et + ON(ru.user_id = et.user_id AND et.event_type = 625) + ) s1 +ORDER BY 2 DESC, 1; + user_id | done_event +---------+------------ + 6 | YES + 13 | NO + 39 | NO + 44 | NO + 46 | NO + 50 | NO + 58 | NO + 66 | NO + 71 | NO + 74 | NO + 86 | NO + 87 | NO + 90 | NO + 100 | NO +(14 rows) + +-- event vs table non-partition-key join is not supported +SELECT * FROM + (SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event + FROM recent_users ru + LEFT JOIN events_table et + ON(ru.user_id = et.event_type) + ) s1 +ORDER BY 2 DESC, 1; +ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys +DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +-- create a select only view +CREATE VIEW selected_users AS SELECT * FROM users_table WHERE value_1 >= 120 and value_1 <150; +CREATE VIEW recent_selected_users AS SELECT su.* FROM selected_users su JOIN recent_users ru USING(user_id); +SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER BY 1; + user_id +--------- + 6 + 13 + 39 + 44 + 46 + 50 + 58 + 66 + 71 + 74 + 86 + 90 +(12 rows) + +-- this would be supported when we implement where partition_key in (subquery) support +SELECT et.* FROM events_table et WHERE et.user_id IN (SELECT user_id FROM recent_selected_users); +ERROR: could not run distributed query with subquery outside the FROM clause +HINT: Consider using an equality filter on the distributed table's partition column. +-- it is supported when it is a router query +SELECT count(*) FROM events_table et WHERE et.user_id IN (SELECT user_id FROM recent_selected_users WHERE user_id = 90); + count +------- + 109 +(1 row) + +-- expected this to work but it did not +(SELECT user_id FROM recent_users) +UNION +(SELECT user_id FROM selected_users); +ERROR: could not run distributed query with UNION, INTERSECT, or EXCEPT +HINT: Consider using an equality filter on the distributed table's partition column. +-- wrapping it inside a SELECT * works +SELECT * + FROM ( + (SELECT user_id FROM recent_users) + UNION + (SELECT user_id FROM selected_users) ) u + WHERE user_id < 15 AND user_id > 10 + ORDER BY user_id; + user_id +--------- + 11 + 12 + 13 + 14 +(4 rows) + +-- union all also works for views +SELECT * + FROM ( + (SELECT user_id FROM recent_users) + UNION ALL + (SELECT user_id FROM selected_users) ) u + WHERE user_id < 15 AND user_id > 10 + ORDER BY user_id; + user_id +--------- + 11 + 11 + 11 + 12 + 12 + 12 + 12 + 12 + 12 + 13 + 13 + 13 + 13 + 13 + 14 +(15 rows) + +SELECT count(*) + FROM ( + (SELECT user_id FROM recent_users) + UNION + (SELECT user_id FROM selected_users) ) u + WHERE user_id < 15 AND user_id > 10; + count +------- + 4 +(1 row) + +-- expected this to work but it does not +SELECT count(*) + FROM ( + (SELECT user_id FROM recent_users) + UNION ALL + (SELECT user_id FROM selected_users) ) u + WHERE user_id < 15 AND user_id > 10; +ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position +DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list. +-- expand view definitions and re-run last 2 queries +SELECT count(*) + FROM ( + (SELECT user_id FROM (SELECT user_id, max(time) as lastseen FROM users_table + GROUP BY user_id + HAVING max(time) > '2014-01-21 05:45:49.978738'::timestamp order by 2 DESC) aa + ) + UNION + (SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 120 and value_1 <150) bb) ) u + WHERE user_id < 15 AND user_id > 10; + count +------- + 4 +(1 row) + +SELECT count(*) + FROM ( + (SELECT user_id FROM (SELECT user_id, max(time) as lastseen FROM users_table + GROUP BY user_id + HAVING max(time) > '2014-01-21 05:45:49.978738'::timestamp order by 2 DESC) aa + ) + UNION ALL + (SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 120 and value_1 <150) bb) ) u + WHERE user_id < 15 AND user_id > 10; +ERROR: cannot pushdown the subquery since all leaves of the UNION does not include partition key at the same position +DETAIL: Each leaf query of the UNION should return partition key at the same position on its target list. +-- test distinct +-- distinct is supported if it is on a partition key +CREATE VIEW distinct_user_with_value_1_15 AS SELECT DISTINCT user_id FROM users_table WHERE value_1 = 15; +SELECT * FROM distinct_user_with_value_1_15 ORDER BY user_id; + user_id +--------- + 7 + 8 + 35 + 42 + 46 + 53 + 70 + 82 + 87 + 88 + 96 +(11 rows) + +-- distinct is not supported if it is on a non-partition key +CREATE VIEW distinct_value_1 AS SELECT DISTINCT value_1 FROM users_table WHERE value_2 = 15; +SELECT * FROM distinct_value_1; +ERROR: cannot perform distributed planning on this query +DETAIL: Subqueries without group by clause are not supported yet +-- CTEs are not supported even if they are on views +CREATE VIEW cte_view_1 AS +WITH c1 AS (SELECT * FROM users_table WHERE value_1 = 15) SELECT * FROM c1 WHERE value_2 < 500; +SELECT * FROM cte_view_1; +ERROR: cannot push down this subquery +DETAIL: Table expressions other than simple relations and subqueries are currently unsupported +-- this is single shard query but still not supported since it has view + cte +-- router planner can't detect it +SELECT * FROM cte_view_1 WHERE user_id = 8; +ERROR: cannot push down this subquery +DETAIL: Table expressions other than simple relations and subqueries are currently unsupported +-- if CTE itself prunes down to a single shard than the view is supported (router plannable) +CREATE VIEW cte_view_2 AS +WITH c1 AS (SELECT * FROM users_table WHERE user_id = 8) SELECT * FROM c1 WHERE value_1 = 15; +SELECT * FROM cte_view_2; + user_id | time | value_1 | value_2 | value_3 | value_4 +---------+---------------------------------+---------+---------+---------+--------- + 8 | Tue Jan 21 00:52:36.967785 2014 | 15 | 10 | 868 | +(1 row) + +CREATE VIEW router_view AS SELECT * FROM users_table WHERE user_id = 2; +-- router plannable +SELECT user_id FROM router_view GROUP BY 1; + user_id +--------- + 2 +(1 row) + +-- There is a known issue with router plannable subqueries joined with non-router +-- plannable subqueries. Following tests should be uncommented when we fix it +-- join a router view (not implement error) +-- SELECT * FROM (SELECT user_id FROM router_view GROUP BY 1) rv JOIN recent_events USING (user_id); +-- it still does not work when converted to 2 subquery join +-- SELECT * FROM (SELECT user_id FROM router_view GROUP BY 1) rv JOIN (SELECT * FROM recent_events) re USING (user_id); +-- views are completely removed and still it does not work +-- SELECT * FROM +-- (SELECT user_id FROM (SELECT * FROM users_table WHERE user_id = 2) rv1 GROUP BY 1) rv2 +-- JOIN (SELECT user_id, time FROM events_table +-- WHERE time > '2014-01-20 01:45:49.978738'::timestamp) re +-- USING (user_id); +-- views with limits +CREATE VIEW recent_10_users AS + SELECT user_id, max(time) as lastseen FROM users_table + GROUP BY user_id + ORDER BY lastseen DESC + LIMIT 10; +-- this is not supported since it has limit in it and subquery_pushdown is not set +SELECT * FROM recent_10_users; +ERROR: cannot perform distributed planning on this query +DETAIL: Subqueries with limit are not supported yet +SET citus.subquery_pushdown to ON; +-- still not supported since outer query does not have limit +-- it shows a different (subquery with single relation) error message +SELECT * FROM recent_10_users; +ERROR: cannot perform distributed planning on this query +DETAIL: Subqueries with limit are not supported yet +-- now it displays more correct error message +SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id); +ERROR: cannot push down this subquery +DETAIL: Limit in subquery without limit in the outermost query is unsupported +-- now both are supported when there is a limit on the outer most query +SELECT * FROM recent_10_users ORDER BY lastseen DESC LIMIT 10; + user_id | lastseen +---------+--------------------------------- + 6 | Tue Jan 21 05:57:47.118755 2014 + 71 | Tue Jan 21 05:55:52.018461 2014 + 39 | Tue Jan 21 05:55:18.875997 2014 + 74 | Tue Jan 21 05:54:04.837808 2014 + 87 | Tue Jan 21 05:53:51.866813 2014 + 50 | Tue Jan 21 05:53:44.251016 2014 + 66 | Tue Jan 21 05:51:31.681997 2014 + 100 | Tue Jan 21 05:49:04.953009 2014 + 46 | Tue Jan 21 05:49:00.229807 2014 + 86 | Tue Jan 21 05:48:54.381334 2014 +(10 rows) + +SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10; + user_id | time | event_type | value_2 | value_3 | value_4 +---------+---------------------------------+------------+---------+---------+--------- + 65 | Tue Jan 21 05:56:52.624231 2014 | 241 | 30 | 543 | + 42 | Tue Jan 21 05:46:35.158342 2014 | 761 | 877 | 335 | + 54 | Tue Jan 21 05:46:19.103645 2014 | 595 | 477 | 996 | + 44 | Tue Jan 21 05:43:00.838945 2014 | 682 | 641 | 448 | + 27 | Tue Jan 21 05:34:10.935865 2014 | 912 | 605 | 989 | + 61 | Tue Jan 21 05:25:27.452065 2014 | 392 | 472 | 925 | + 19 | Tue Jan 21 05:23:09.26298 2014 | 202 | 888 | 640 | + 65 | Tue Jan 21 05:22:56.725329 2014 | 519 | 457 | 259 | + 27 | Tue Jan 21 05:19:14.38026 2014 | 19 | 19 | 205 | + 11 | Tue Jan 21 05:15:14.879531 2014 | 459 | 545 | 80 | +(10 rows) + +RESET citus.subquery_pushdown; +-- explain tests +EXPLAIN (COSTS FALSE) SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER BY 1; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------------- + Sort + Sort Key: remote_scan.user_id + -> HashAggregate + Group Key: remote_scan.user_id + -> Custom Scan (Citus Real-Time) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=57637 dbname=regression + -> HashAggregate + Group Key: users_table.user_id + -> Hash Join + Hash Cond: (users_table.user_id = ru.user_id) + -> Bitmap Heap Scan on users_table_1400000 users_table + Recheck Cond: ((value_1 >= 120) AND (value_1 < 150)) + -> Bitmap Index Scan on is_index3_1400000 + Index Cond: ((value_1 >= 120) AND (value_1 < 150)) + -> Hash + -> Subquery Scan on ru + -> Sort + Sort Key: (max(users_table_1."time")) DESC + -> HashAggregate + Group Key: users_table_1.user_id + Filter: (max(users_table_1."time") > '2014-01-21 05:45:49.978738'::timestamp without time zone) + -> Seq Scan on users_table_1400000 users_table_1 +(25 rows) + +EXPLAIN (COSTS FALSE) SELECT * + FROM ( + (SELECT user_id FROM recent_users) + UNION + (SELECT user_id FROM selected_users) ) u + WHERE user_id < 15 AND user_id > 10 + ORDER BY user_id; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------- + Sort + Sort Key: remote_scan.user_id + -> Custom Scan (Citus Real-Time) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=57637 dbname=regression + -> Unique + -> Sort + Sort Key: recent_users.user_id + -> Append + -> Subquery Scan on recent_users + -> Sort + Sort Key: (max(users_table."time")) DESC + -> GroupAggregate + Group Key: users_table.user_id + Filter: (max(users_table."time") > '2014-01-21 05:45:49.978738'::timestamp without time zone) + -> Index Scan using is_index1_1400000 on users_table_1400000 users_table + Index Cond: ((user_id < 15) AND (user_id > 10)) + -> Index Scan using is_index1_1400000 on users_table_1400000 users_table_1 + Index Cond: ((user_id < 15) AND (user_id > 10)) + Filter: ((value_1 >= 120) AND (value_1 < 150)) +(22 rows) + +EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10; +ERROR: cannot push down this subquery +DETAIL: Limit in subquery is currently unsupported +SET citus.subquery_pushdown to ON; +EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------- + Limit + -> Sort + Sort Key: remote_scan."time" DESC + -> Custom Scan (Citus Real-Time) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=57637 dbname=regression + -> Limit + -> Sort + Sort Key: et."time" DESC + -> Hash Join + Hash Cond: (et.user_id = recent_10_users.user_id) + -> Seq Scan on events_table_1400004 et + -> Hash + -> Subquery Scan on recent_10_users + -> Limit + -> Sort + Sort Key: (max(users_table."time")) DESC + -> HashAggregate + Group Key: users_table.user_id + -> Seq Scan on users_table_1400000 users_table +(22 rows) + +RESET citus.subquery_pushdown; +DROP VIEW recent_10_users; +DROP VIEW router_view; +DROP VIEW cte_view_2; +DROP VIEW cte_view_1; +DROP VIEW distinct_value_1; +DROP VIEW distinct_user_with_value_1_15; +DROP VIEW recent_selected_users; +DROP VIEW selected_users; +DROP VIEW recent_events; +DROP VIEW recent_users; diff --git a/src/test/regress/sql/multi_view.sql b/src/test/regress/sql/multi_view.sql index af7294334..26619b439 100644 --- a/src/test/regress/sql/multi_view.sql +++ b/src/test/regress/sql/multi_view.sql @@ -117,6 +117,10 @@ SELECT l_suppkey, count(*) FROM GROUP BY l_suppkey, l_shipdate) supps GROUP BY l_suppkey ORDER BY 2 DESC, 1 LIMIT 5; +-- repartition query on view with single table subquery +CREATE VIEW supp_count_view AS SELECT * FROM (SELECT l_suppkey, count(*) FROM lineitem_hash_part GROUP BY 1) s1; +SELECT * FROM supp_count_view ORDER BY 2 DESC, 1 LIMIT 10; + SET citus.task_executor_type to DEFAULT; -- create a view with aggregate @@ -141,3 +145,275 @@ SELECT * FROM lineitems_by_orderkey ORDER BY 2 DESC, 1 ASC LIMIT 10; SELECT * FROM lineitems_by_orderkey WHERE l_orderkey = 100; DROP TABLE temp_lineitem CASCADE; + +DROP VIEW supp_count_view; +DROP VIEW lineitems_by_orderkey; +DROP VIEW lineitems_by_shipping_method; +DROP VIEW air_shipped_lineitems; +DROP VIEW priority_lineitem; +DROP VIEW priority_orders; + +-- new tests for real time use case including views and subqueries + +-- create view to display recent user who has an activity after a timestamp +CREATE VIEW recent_users AS + SELECT user_id, max(time) as lastseen FROM users_table + GROUP BY user_id + HAVING max(time) > '2014-01-21 05:45:49.978738'::timestamp order by 2 DESC; +SELECT * FROM recent_users; + +-- create a view for recent_events +CREATE VIEW recent_events AS + SELECT user_id, time FROM events_table + WHERE time > '2014-01-20 01:45:49.978738'::timestamp; + +SELECT count(*) FROM recent_events; + +-- count number of events of recent_users +SELECT count(*) FROM recent_users ru JOIN events_table et ON (ru.user_id = et.user_id); +-- count number of events of per recent users order by count +SELECT ru.user_id, count(*) + FROM recent_users ru + JOIN events_table et + ON (ru.user_id = et.user_id) + GROUP BY ru.user_id + ORDER BY 2 DESC, 1; + +-- the same query with a left join however, it would still generate the same result +SELECT ru.user_id, count(*) + FROM recent_users ru + LEFT JOIN events_table et + ON (ru.user_id = et.user_id) + GROUP BY ru.user_id + ORDER BY 2 DESC, 1; + +-- query wrapped inside a subquery, it needs another top level order by +SELECT * FROM + (SELECT ru.user_id, count(*) + FROM recent_users ru + JOIN events_table et + ON (ru.user_id = et.user_id) + GROUP BY ru.user_id + ORDER BY 2 DESC, 1) s1 +ORDER BY 2 DESC, 1; + +-- non-partition key joins are not supported inside subquery +SELECT * FROM + (SELECT ru.user_id, count(*) + FROM recent_users ru + JOIN events_table et + ON (ru.user_id = et.event_type) + GROUP BY ru.user_id + ORDER BY 2 DESC, 1) s1 +ORDER BY 2 DESC, 1; + +-- join between views +-- recent users who has an event in recent events +SELECT ru.user_id FROM recent_users ru JOIN recent_events re USING(user_id) GROUP BY ru.user_id ORDER BY ru.user_id; + +-- outer join inside a subquery +-- recent_events who are not done by recent users +SELECT count(*) FROM ( + SELECT re.*, ru.user_id AS recent_user + FROM recent_events re LEFT JOIN recent_users ru USING(user_id)) reu + WHERE recent_user IS NULL; + +-- same query with anti-join +SELECT count(*) + FROM recent_events re LEFT JOIN recent_users ru ON(ru.user_id = re.user_id) + WHERE ru.user_id IS NULL; + +-- join between view and table +-- users who has recent activity and they have an entry with value_1 is less than 15 +SELECT ut.* FROM recent_users ru JOIN users_table ut USING (user_id) WHERE ut.value_1 < 15 ORDER BY 1,2; + +-- determine if a recent user has done a given event type or not +SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event + FROM recent_users ru + LEFT JOIN events_table et + ON(ru.user_id = et.user_id AND et.event_type = 625) + ORDER BY 2 DESC, 1; + +-- view vs table join wrapped inside a subquery +SELECT * FROM + (SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event + FROM recent_users ru + LEFT JOIN events_table et + ON(ru.user_id = et.user_id AND et.event_type = 625) + ) s1 +ORDER BY 2 DESC, 1; + +-- event vs table non-partition-key join is not supported +SELECT * FROM + (SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event + FROM recent_users ru + LEFT JOIN events_table et + ON(ru.user_id = et.event_type) + ) s1 +ORDER BY 2 DESC, 1; + +-- create a select only view +CREATE VIEW selected_users AS SELECT * FROM users_table WHERE value_1 >= 120 and value_1 <150; +CREATE VIEW recent_selected_users AS SELECT su.* FROM selected_users su JOIN recent_users ru USING(user_id); + +SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER BY 1; + +-- this would be supported when we implement where partition_key in (subquery) support +SELECT et.* FROM events_table et WHERE et.user_id IN (SELECT user_id FROM recent_selected_users); + +-- it is supported when it is a router query +SELECT count(*) FROM events_table et WHERE et.user_id IN (SELECT user_id FROM recent_selected_users WHERE user_id = 90); + +-- expected this to work but it did not +(SELECT user_id FROM recent_users) +UNION +(SELECT user_id FROM selected_users); + +-- wrapping it inside a SELECT * works +SELECT * + FROM ( + (SELECT user_id FROM recent_users) + UNION + (SELECT user_id FROM selected_users) ) u + WHERE user_id < 15 AND user_id > 10 + ORDER BY user_id; + +-- union all also works for views +SELECT * + FROM ( + (SELECT user_id FROM recent_users) + UNION ALL + (SELECT user_id FROM selected_users) ) u + WHERE user_id < 15 AND user_id > 10 + ORDER BY user_id; + +SELECT count(*) + FROM ( + (SELECT user_id FROM recent_users) + UNION + (SELECT user_id FROM selected_users) ) u + WHERE user_id < 15 AND user_id > 10; + +-- expected this to work but it does not +SELECT count(*) + FROM ( + (SELECT user_id FROM recent_users) + UNION ALL + (SELECT user_id FROM selected_users) ) u + WHERE user_id < 15 AND user_id > 10; + +-- expand view definitions and re-run last 2 queries +SELECT count(*) + FROM ( + (SELECT user_id FROM (SELECT user_id, max(time) as lastseen FROM users_table + GROUP BY user_id + HAVING max(time) > '2014-01-21 05:45:49.978738'::timestamp order by 2 DESC) aa + ) + UNION + (SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 120 and value_1 <150) bb) ) u + WHERE user_id < 15 AND user_id > 10; + +SELECT count(*) + FROM ( + (SELECT user_id FROM (SELECT user_id, max(time) as lastseen FROM users_table + GROUP BY user_id + HAVING max(time) > '2014-01-21 05:45:49.978738'::timestamp order by 2 DESC) aa + ) + UNION ALL + (SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 120 and value_1 <150) bb) ) u + WHERE user_id < 15 AND user_id > 10; + +-- test distinct +-- distinct is supported if it is on a partition key +CREATE VIEW distinct_user_with_value_1_15 AS SELECT DISTINCT user_id FROM users_table WHERE value_1 = 15; +SELECT * FROM distinct_user_with_value_1_15 ORDER BY user_id; + +-- distinct is not supported if it is on a non-partition key +CREATE VIEW distinct_value_1 AS SELECT DISTINCT value_1 FROM users_table WHERE value_2 = 15; +SELECT * FROM distinct_value_1; + +-- CTEs are not supported even if they are on views +CREATE VIEW cte_view_1 AS +WITH c1 AS (SELECT * FROM users_table WHERE value_1 = 15) SELECT * FROM c1 WHERE value_2 < 500; + +SELECT * FROM cte_view_1; + +-- this is single shard query but still not supported since it has view + cte +-- router planner can't detect it +SELECT * FROM cte_view_1 WHERE user_id = 8; + +-- if CTE itself prunes down to a single shard than the view is supported (router plannable) +CREATE VIEW cte_view_2 AS +WITH c1 AS (SELECT * FROM users_table WHERE user_id = 8) SELECT * FROM c1 WHERE value_1 = 15; +SELECT * FROM cte_view_2; + +CREATE VIEW router_view AS SELECT * FROM users_table WHERE user_id = 2; +-- router plannable +SELECT user_id FROM router_view GROUP BY 1; + +-- There is a known issue with router plannable subqueries joined with non-router +-- plannable subqueries. Following tests should be uncommented when we fix it + +-- join a router view (not implement error) +-- SELECT * FROM (SELECT user_id FROM router_view GROUP BY 1) rv JOIN recent_events USING (user_id); + +-- it still does not work when converted to 2 subquery join +-- SELECT * FROM (SELECT user_id FROM router_view GROUP BY 1) rv JOIN (SELECT * FROM recent_events) re USING (user_id); + +-- views are completely removed and still it does not work +-- SELECT * FROM +-- (SELECT user_id FROM (SELECT * FROM users_table WHERE user_id = 2) rv1 GROUP BY 1) rv2 +-- JOIN (SELECT user_id, time FROM events_table +-- WHERE time > '2014-01-20 01:45:49.978738'::timestamp) re +-- USING (user_id); + +-- views with limits +CREATE VIEW recent_10_users AS + SELECT user_id, max(time) as lastseen FROM users_table + GROUP BY user_id + ORDER BY lastseen DESC + LIMIT 10; + +-- this is not supported since it has limit in it and subquery_pushdown is not set +SELECT * FROM recent_10_users; + +SET citus.subquery_pushdown to ON; +-- still not supported since outer query does not have limit +-- it shows a different (subquery with single relation) error message +SELECT * FROM recent_10_users; +-- now it displays more correct error message +SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id); + +-- now both are supported when there is a limit on the outer most query +SELECT * FROM recent_10_users ORDER BY lastseen DESC LIMIT 10; +SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10; + +RESET citus.subquery_pushdown; + +-- explain tests +EXPLAIN (COSTS FALSE) SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER BY 1; + +EXPLAIN (COSTS FALSE) SELECT * + FROM ( + (SELECT user_id FROM recent_users) + UNION + (SELECT user_id FROM selected_users) ) u + WHERE user_id < 15 AND user_id > 10 + ORDER BY user_id; + +EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10; +SET citus.subquery_pushdown to ON; +EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10; + +RESET citus.subquery_pushdown; + +DROP VIEW recent_10_users; +DROP VIEW router_view; +DROP VIEW cte_view_2; +DROP VIEW cte_view_1; +DROP VIEW distinct_value_1; +DROP VIEW distinct_user_with_value_1_15; +DROP VIEW recent_selected_users; +DROP VIEW selected_users; +DROP VIEW recent_events; +DROP VIEW recent_users;