citus/src/test/regress/sql/multi_view.sql

436 lines
16 KiB
SQL

--
-- MULTI_VIEW
--
-- This file contains test cases for view support. It verifies various
-- Citus features: simple selects, aggregates, joins, outer joins
-- router queries, single row inserts, multi row inserts via insert
-- into select, multi row insert via copy commands.
-- print whether we're using version > 10 to make version-specific tests clear
SHOW server_version \gset
SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten;
SELECT count(*) FROM lineitem_hash_part;
SELECT count(*) FROM orders_hash_part;
-- create a view for priority orders
CREATE VIEW priority_orders AS SELECT * FROM orders_hash_part WHERE o_orderpriority < '3-MEDIUM';
-- aggregate pushdown
SELECT o_orderpriority, count(*) FROM priority_orders GROUP BY 1 ORDER BY 2, 1;
SELECT o_orderpriority, count(*) FROM orders_hash_part WHERE o_orderpriority < '3-MEDIUM' GROUP BY 1 ORDER BY 2,1;
-- filters
SELECT o_orderpriority, count(*) as all, count(*) FILTER (WHERE o_orderstatus ='F') as fullfilled FROM priority_orders GROUP BY 1 ORDER BY 2, 1;
-- having
SELECT o_orderdate, count(*) from priority_orders group by 1 having (count(*) > 3) order by 2 desc, 1 desc;
-- having with filters
SELECT o_orderdate, count(*) as all, count(*) FILTER(WHERE o_orderstatus = 'F') from priority_orders group by 1 having (count(*) > 3) order by 2 desc, 1 desc;
-- limit
SELECT o_orderkey, o_totalprice from orders_hash_part order by 2 desc, 1 asc limit 5 ;
SELECT o_orderkey, o_totalprice from priority_orders order by 2 desc, 1 asc limit 1 ;
CREATE VIEW priority_lineitem AS SELECT li.* FROM lineitem_hash_part li JOIN priority_orders ON (l_orderkey = o_orderkey);
SELECT l_orderkey, count(*) FROM priority_lineitem GROUP BY 1 ORDER BY 2 DESC, 1 LIMIT 5;
CREATE VIEW air_shipped_lineitems AS SELECT * FROM lineitem_hash_part WHERE l_shipmode = 'AIR';
-- join between view and table
SELECT count(*) FROM orders_hash_part join air_shipped_lineitems ON (o_orderkey = l_orderkey);
-- join between views
SELECT count(*) FROM priority_orders join air_shipped_lineitems ON (o_orderkey = l_orderkey);
-- count distinct on partition column is supported
SELECT count(distinct o_orderkey) FROM priority_orders join air_shipped_lineitems ON (o_orderkey = l_orderkey);
-- count distinct on non-partition column is supported
SELECT count(distinct o_orderpriority) FROM priority_orders join air_shipped_lineitems ON (o_orderkey = l_orderkey);
-- count distinct on partition column is supported on router queries
SELECT count(distinct o_orderkey) FROM priority_orders join air_shipped_lineitems
ON (o_orderkey = l_orderkey)
WHERE (o_orderkey = 231);
-- select distinct on router joins of views also works
SELECT distinct(o_orderkey) FROM priority_orders join air_shipped_lineitems
ON (o_orderkey = l_orderkey)
WHERE (o_orderkey = 231);
-- left join support depends on flattening of the query
SELECT o_orderkey, l_orderkey FROM priority_orders left join air_shipped_lineitems ON (o_orderkey = l_orderkey) ORDER BY o_orderkey LIMIT 1;
-- however, this works
SELECT count(*) FROM priority_orders left join lineitem_hash_part ON (o_orderkey = l_orderkey) WHERE l_shipmode ='AIR';
-- view on the inner side is supported
SELECT count(*) FROM priority_orders right join lineitem_hash_part ON (o_orderkey = l_orderkey) WHERE l_shipmode ='AIR';
-- view on the outer side is supported
SELECT count(*) FROM lineitem_hash_part right join priority_orders ON (o_orderkey = l_orderkey) WHERE l_shipmode ='AIR';
-- left join on router query is supported
SELECT o_orderkey, l_linenumber FROM priority_orders left join air_shipped_lineitems ON (o_orderkey = l_orderkey)
WHERE o_orderkey = 2;
-- repartition query on view join
-- it passes planning, fails at execution stage
SET client_min_messages TO DEBUG1;
SELECT * FROM priority_orders JOIN air_shipped_lineitems ON (o_custkey = l_suppkey) ORDER BY o_orderkey DESC, o_custkey DESC, o_orderpriority DESC LIMIT 5;
RESET client_min_messages;
SELECT count(*) FROM priority_orders JOIN air_shipped_lineitems ON (o_custkey = l_suppkey);
-- materialized views work
-- insert into... select works with views
CREATE TABLE temp_lineitem(LIKE lineitem_hash_part);
SELECT create_distributed_table('temp_lineitem', 'l_orderkey', 'hash', 'lineitem_hash_part');
INSERT INTO temp_lineitem SELECT * FROM air_shipped_lineitems;
SELECT count(*) FROM temp_lineitem;
-- following is a where false query, should not be inserting anything
INSERT INTO temp_lineitem SELECT * FROM air_shipped_lineitems WHERE l_shipmode = 'MAIL';
SELECT count(*) FROM temp_lineitem;
-- can create and query materialized views
CREATE MATERIALIZED VIEW mode_counts
AS SELECT l_shipmode, count(*) FROM temp_lineitem GROUP BY l_shipmode;
SELECT * FROM mode_counts WHERE l_shipmode = 'AIR' ORDER BY 2 DESC, 1 LIMIT 10;
-- materialized views are local, cannot join with distributed tables
SELECT count(*) FROM mode_counts JOIN temp_lineitem USING (l_shipmode);
-- new data is not immediately reflected in the view
INSERT INTO temp_lineitem SELECT * FROM air_shipped_lineitems;
SELECT * FROM mode_counts WHERE l_shipmode = 'AIR' ORDER BY 2 DESC, 1 LIMIT 10;
-- refresh updates the materialised view with new data
REFRESH MATERIALIZED VIEW mode_counts;
SELECT * FROM mode_counts WHERE l_shipmode = 'AIR' ORDER BY 2 DESC, 1 LIMIT 10;
DROP MATERIALIZED VIEW mode_counts;
SET citus.task_executor_type to "task-tracker";
-- single view repartition subqueries are not supported
SELECT l_suppkey, count(*) FROM
(SELECT l_suppkey, l_shipdate, count(*)
FROM air_shipped_lineitems GROUP BY l_suppkey, l_shipdate) supps
GROUP BY l_suppkey ORDER BY 2 DESC, 1 LIMIT 5;
-- logically same query without a view works fine
SELECT l_suppkey, count(*) FROM
(SELECT l_suppkey, l_shipdate, count(*)
FROM lineitem_hash_part WHERE l_shipmode = 'AIR' GROUP BY l_suppkey, l_shipdate) supps
GROUP BY l_suppkey ORDER BY 2 DESC, 1 LIMIT 5;
-- when a view is replaced by actual query it still fails
SELECT l_suppkey, count(*) FROM
(SELECT l_suppkey, l_shipdate, count(*)
FROM (SELECT * FROM lineitem_hash_part WHERE l_shipmode = 'AIR') asi
GROUP BY l_suppkey, l_shipdate) supps
GROUP BY l_suppkey ORDER BY 2 DESC, 1 LIMIT 5;
-- repartition query on view with single table subquery
CREATE VIEW supp_count_view AS SELECT * FROM (SELECT l_suppkey, count(*) FROM lineitem_hash_part GROUP BY 1) s1;
SELECT * FROM supp_count_view ORDER BY 2 DESC, 1 LIMIT 10;
SET citus.task_executor_type to DEFAULT;
-- create a view with aggregate
CREATE VIEW lineitems_by_shipping_method AS
SELECT l_shipmode, count(*) as cnt FROM lineitem_hash_part GROUP BY 1;
-- following will be supported via recursive planning
SELECT * FROM lineitems_by_shipping_method ORDER BY 1,2 LIMIT 5;
-- create a view with group by on partition column
CREATE VIEW lineitems_by_orderkey AS
SELECT
l_orderkey, count(*)
FROM
lineitem_hash_part
GROUP BY 1;
-- this should work since we're able to push down this query
SELECT * FROM lineitems_by_orderkey ORDER BY 2 DESC, 1 ASC LIMIT 10;
-- it would also work since it is made router plannable
SELECT * FROM lineitems_by_orderkey WHERE l_orderkey = 100;
DROP TABLE temp_lineitem CASCADE;
DROP VIEW supp_count_view;
DROP VIEW lineitems_by_orderkey;
DROP VIEW lineitems_by_shipping_method;
DROP VIEW air_shipped_lineitems;
DROP VIEW priority_lineitem;
DROP VIEW priority_orders;
-- new tests for real time use case including views and subqueries
-- create view to display recent user who has an activity after a timestamp
CREATE VIEW recent_users AS
SELECT user_id, max(time) as lastseen FROM users_table
GROUP BY user_id
HAVING max(time) > '2017-11-23 16:20:33.264457'::timestamp order by 2 DESC;
SELECT * FROM recent_users ORDER BY 2 DESC, 1 DESC;
-- create a view for recent_events
CREATE VIEW recent_events AS
SELECT user_id, time FROM events_table
WHERE time > '2017-11-23 16:20:33.264457'::timestamp;
SELECT count(*) FROM recent_events;
-- count number of events of recent_users
SELECT count(*) FROM recent_users ru JOIN events_table et ON (ru.user_id = et.user_id);
-- count number of events of per recent users order by count
SELECT ru.user_id, count(*)
FROM recent_users ru
JOIN events_table et
ON (ru.user_id = et.user_id)
GROUP BY ru.user_id
ORDER BY 2 DESC, 1;
-- the same query with a left join however, it would still generate the same result
SELECT ru.user_id, count(*)
FROM recent_users ru
LEFT JOIN events_table et
ON (ru.user_id = et.user_id)
GROUP BY ru.user_id
ORDER BY 2 DESC, 1;
-- query wrapped inside a subquery, it needs another top level order by
SELECT * FROM
(SELECT ru.user_id, count(*)
FROM recent_users ru
JOIN events_table et
ON (ru.user_id = et.user_id)
GROUP BY ru.user_id
ORDER BY 2 DESC, 1) s1
ORDER BY 2 DESC, 1;
-- non-partition key joins are supported inside subquery
-- via pull-push execution
SELECT * FROM
(SELECT ru.user_id, count(*)
FROM recent_users ru
JOIN events_table et
ON (ru.user_id = et.event_type)
GROUP BY ru.user_id
ORDER BY 2 DESC, 1) s1
ORDER BY 2 DESC, 1;
-- join between views
-- recent users who has an event in recent events
SELECT ru.user_id FROM recent_users ru JOIN recent_events re USING(user_id) GROUP BY ru.user_id ORDER BY ru.user_id;
-- outer join inside a subquery
-- recent_events who are not done by recent users
SELECT count(*) FROM (
SELECT re.*, ru.user_id AS recent_user
FROM recent_events re LEFT JOIN recent_users ru USING(user_id)) reu
WHERE recent_user IS NULL;
-- same query with anti-join
SELECT count(*)
FROM recent_events re LEFT JOIN recent_users ru ON(ru.user_id = re.user_id)
WHERE ru.user_id IS NULL;
-- join between view and table
-- users who has recent activity and they have an entry with value_1 is less than 3
SELECT ut.* FROM recent_users ru JOIN users_table ut USING (user_id) WHERE ut.value_1 < 3 ORDER BY 1,2;
-- determine if a recent user has done a given event type or not
SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event
FROM recent_users ru
LEFT JOIN events_table et
ON(ru.user_id = et.user_id AND et.event_type = 6)
ORDER BY 2 DESC, 1;
-- view vs table join wrapped inside a subquery
SELECT * FROM
(SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event
FROM recent_users ru
LEFT JOIN events_table et
ON(ru.user_id = et.user_id AND et.event_type = 6)
) s1
ORDER BY 2 DESC, 1;
-- event vs table non-partition-key join is not supported
-- given that we cannot recursively plan tables yet
SELECT * FROM
(SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event
FROM recent_users ru
LEFT JOIN events_table et
ON(ru.user_id = et.event_type)
) s1
ORDER BY 2 DESC, 1;
-- create a select only view
CREATE VIEW selected_users AS SELECT * FROM users_table WHERE value_1 >= 1 and value_1 <3;
CREATE VIEW recent_selected_users AS SELECT su.* FROM selected_users su JOIN recent_users ru USING(user_id);
SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER BY 1;
-- this would be supported when we implement where partition_key in (subquery) support
SELECT et.user_id, et.time FROM events_table et WHERE et.user_id IN (SELECT user_id FROM recent_selected_users) GROUP BY 1,2 ORDER BY 1 DESC,2 DESC LIMIT 5;
-- it is supported when it is a router query
SELECT count(*) FROM events_table et WHERE et.user_id IN (SELECT user_id FROM recent_selected_users WHERE user_id = 1);
-- union between views is supported through recursive planning
(SELECT user_id FROM recent_users)
UNION
(SELECT user_id FROM selected_users)
ORDER BY 1;
-- wrapping it inside a SELECT * works
SELECT *
FROM (
(SELECT user_id FROM recent_users)
UNION
(SELECT user_id FROM selected_users) ) u
WHERE user_id < 2 AND user_id > 0
ORDER BY user_id;
-- union all also works for views
SELECT *
FROM (
(SELECT user_id FROM recent_users)
UNION ALL
(SELECT user_id FROM selected_users) ) u
WHERE user_id < 2 AND user_id > 0
ORDER BY user_id;
SELECT count(*)
FROM (
(SELECT user_id FROM recent_users)
UNION
(SELECT user_id FROM selected_users) ) u
WHERE user_id < 2 AND user_id > 0;
-- UNION ALL between views is supported through recursive planning
SELECT count(*)
FROM (
(SELECT user_id FROM recent_users)
UNION ALL
(SELECT user_id FROM selected_users) ) u
WHERE user_id < 2 AND user_id > 0;
-- expand view definitions and re-run last 2 queries
SELECT count(*)
FROM (
(SELECT user_id FROM (SELECT user_id, max(time) as lastseen FROM users_table
GROUP BY user_id
HAVING max(time) > '2017-11-22 05:45:49.978738'::timestamp order by 2 DESC) aa
)
UNION
(SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 1 and value_1 < 3) bb) ) u
WHERE user_id < 2 AND user_id > 0;
SELECT count(*)
FROM (
(SELECT user_id FROM (SELECT user_id, max(time) as lastseen FROM users_table
GROUP BY user_id
HAVING max(time) > '2017-11-22 05:45:49.978738'::timestamp order by 2 DESC) aa
)
UNION ALL
(SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 1 and value_1 < 3) bb) ) u
WHERE user_id < 2 AND user_id > 0;
-- test distinct
-- distinct is supported if it is on a partition key
CREATE VIEW distinct_user_with_value_1_3 AS SELECT DISTINCT user_id FROM users_table WHERE value_1 = 3;
SELECT * FROM distinct_user_with_value_1_3 ORDER BY user_id;
-- distinct is not supported if it is on a non-partition key
-- but will be supported via recursive planning
CREATE VIEW distinct_value_1 AS SELECT DISTINCT value_1 FROM users_table WHERE value_2 = 3;
SELECT * FROM distinct_value_1 ORDER BY 1 DESC LIMIT 5;
-- CTEs are supported even if they are on views
CREATE VIEW cte_view_1 AS
WITH c1 AS (SELECT * FROM users_table WHERE value_1 = 3) SELECT * FROM c1 WHERE value_2 < 4;
SELECT * FROM cte_view_1 ORDER BY 1,2,3,4,5 LIMIT 5;
-- this is single shard query and still not supported since it has view + cte
-- router planner can't detect it
SELECT * FROM cte_view_1 WHERE user_id = 2 ORDER BY 1,2,3,4,5;
-- if CTE itself prunes down to a single shard than the view is supported (router plannable)
CREATE VIEW cte_view_2 AS
WITH c1 AS (SELECT * FROM users_table WHERE user_id = 2) SELECT * FROM c1 WHERE value_1 = 3;
SELECT * FROM cte_view_2;
CREATE VIEW router_view AS SELECT * FROM users_table WHERE user_id = 2;
-- router plannable
SELECT user_id FROM router_view GROUP BY 1;
-- join a router view
SELECT * FROM (SELECT user_id FROM router_view GROUP BY 1) rv JOIN recent_events USING (user_id) ORDER BY 2 LIMIT 3;
SELECT * FROM (SELECT user_id FROM router_view GROUP BY 1) rv JOIN (SELECT * FROM recent_events) re USING (user_id) ORDER BY 2 LIMIT 3;
-- views with limits
CREATE VIEW recent_10_users AS
SELECT user_id, max(time) as lastseen FROM users_table
GROUP BY user_id
ORDER BY lastseen DESC
LIMIT 10;
-- this is not supported since it has limit in it and subquery_pushdown is not set
SELECT * FROM recent_10_users;
SET citus.subquery_pushdown to ON;
-- still not supported since outer query does not have limit
-- it shows a different (subquery with single relation) error message
SELECT * FROM recent_10_users;
-- now it displays more correct error message
SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id);
-- now both are supported when there is a limit on the outer most query
SELECT * FROM recent_10_users ORDER BY lastseen DESC LIMIT 10;
SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;
RESET citus.subquery_pushdown;
VACUUM ANALYZE users_table;
-- explain tests
EXPLAIN (COSTS FALSE) SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER BY 1;
EXPLAIN (COSTS FALSE) SELECT *
FROM (
(SELECT user_id FROM recent_users)
UNION
(SELECT user_id FROM selected_users) ) u
WHERE user_id < 4 AND user_id > 1
ORDER BY user_id;
EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;
SET citus.subquery_pushdown to ON;
EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;
RESET citus.subquery_pushdown;
DROP VIEW recent_10_users;
DROP VIEW router_view;
DROP VIEW cte_view_2;
DROP VIEW cte_view_1;
DROP VIEW distinct_value_1;
DROP VIEW distinct_user_with_value_1_3;
DROP VIEW recent_selected_users;
DROP VIEW selected_users;
DROP VIEW recent_events;
DROP VIEW recent_users;