mirror of https://github.com/citusdata/citus.git
437 lines
16 KiB
SQL
437 lines
16 KiB
SQL
--
|
|
-- MULTI_VIEW
|
|
--
|
|
|
|
-- This file contains test cases for view support. It verifies various
|
|
-- Citus features: simple selects, aggregates, joins, outer joins
|
|
-- router queries, single row inserts, multi row inserts via insert
|
|
-- into select, multi row insert via copy commands.
|
|
|
|
-- print whether we're using version > 10 to make version-specific tests clear
|
|
SHOW server_version \gset
|
|
SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten;
|
|
|
|
SELECT count(*) FROM lineitem_hash_part;
|
|
|
|
SELECT count(*) FROM orders_hash_part;
|
|
|
|
-- create a view for priority orders
|
|
CREATE VIEW priority_orders AS SELECT * FROM orders_hash_part WHERE o_orderpriority < '3-MEDIUM';
|
|
|
|
-- aggregate pushdown
|
|
SELECT o_orderpriority, count(*) FROM priority_orders GROUP BY 1 ORDER BY 2, 1;
|
|
|
|
SELECT o_orderpriority, count(*) FROM orders_hash_part WHERE o_orderpriority < '3-MEDIUM' GROUP BY 1 ORDER BY 2,1;
|
|
|
|
-- filters
|
|
SELECT o_orderpriority, count(*) as all, count(*) FILTER (WHERE o_orderstatus ='F') as fullfilled FROM priority_orders GROUP BY 1 ORDER BY 2, 1;
|
|
|
|
-- having
|
|
SELECT o_orderdate, count(*) from priority_orders group by 1 having (count(*) > 3) order by 2 desc, 1 desc;
|
|
|
|
-- having with filters
|
|
SELECT o_orderdate, count(*) as all, count(*) FILTER(WHERE o_orderstatus = 'F') from priority_orders group by 1 having (count(*) > 3) order by 2 desc, 1 desc;
|
|
|
|
-- limit
|
|
SELECT o_orderkey, o_totalprice from orders_hash_part order by 2 desc, 1 asc limit 5 ;
|
|
|
|
SELECT o_orderkey, o_totalprice from priority_orders order by 2 desc, 1 asc limit 1 ;
|
|
|
|
CREATE VIEW priority_lineitem AS SELECT li.* FROM lineitem_hash_part li JOIN priority_orders ON (l_orderkey = o_orderkey);
|
|
|
|
SELECT l_orderkey, count(*) FROM priority_lineitem GROUP BY 1 ORDER BY 2 DESC, 1 LIMIT 5;
|
|
|
|
CREATE VIEW air_shipped_lineitems AS SELECT * FROM lineitem_hash_part WHERE l_shipmode = 'AIR';
|
|
|
|
-- join between view and table
|
|
SELECT count(*) FROM orders_hash_part join air_shipped_lineitems ON (o_orderkey = l_orderkey);
|
|
|
|
-- join between views
|
|
SELECT count(*) FROM priority_orders join air_shipped_lineitems ON (o_orderkey = l_orderkey);
|
|
|
|
-- count distinct on partition column is supported
|
|
SELECT count(distinct o_orderkey) FROM priority_orders join air_shipped_lineitems ON (o_orderkey = l_orderkey);
|
|
|
|
-- count distinct on non-partition column is supported
|
|
SELECT count(distinct o_orderpriority) FROM priority_orders join air_shipped_lineitems ON (o_orderkey = l_orderkey);
|
|
|
|
-- count distinct on partition column is supported on router queries
|
|
SELECT count(distinct o_orderkey) FROM priority_orders join air_shipped_lineitems
|
|
ON (o_orderkey = l_orderkey)
|
|
WHERE (o_orderkey = 231);
|
|
|
|
-- select distinct on router joins of views also works
|
|
SELECT distinct(o_orderkey) FROM priority_orders join air_shipped_lineitems
|
|
ON (o_orderkey = l_orderkey)
|
|
WHERE (o_orderkey = 231);
|
|
|
|
-- left join support depends on flattening of the query
|
|
-- following query fails since the inner part is kept as subquery
|
|
SELECT * FROM priority_orders left join air_shipped_lineitems ON (o_orderkey = l_orderkey);
|
|
|
|
-- however, this works
|
|
SELECT count(*) FROM priority_orders left join lineitem_hash_part ON (o_orderkey = l_orderkey) WHERE l_shipmode ='AIR';
|
|
|
|
-- view at the inner side of is not supported
|
|
SELECT count(*) FROM priority_orders right join lineitem_hash_part ON (o_orderkey = l_orderkey) WHERE l_shipmode ='AIR';
|
|
|
|
-- but view at the outer side is. This is essentially the same as a left join with arguments reversed.
|
|
SELECT count(*) FROM lineitem_hash_part right join priority_orders ON (o_orderkey = l_orderkey) WHERE l_shipmode ='AIR';
|
|
|
|
-- left join on router query is supported
|
|
SELECT o_orderkey, l_linenumber FROM priority_orders left join air_shipped_lineitems ON (o_orderkey = l_orderkey)
|
|
WHERE o_orderkey = 2;
|
|
|
|
-- repartition query on view join
|
|
-- it passes planning, fails at execution stage
|
|
SET client_min_messages TO DEBUG1;
|
|
SELECT * FROM priority_orders JOIN air_shipped_lineitems ON (o_custkey = l_suppkey) ORDER BY o_orderkey DESC, o_custkey DESC, o_orderpriority DESC LIMIT 5;
|
|
RESET client_min_messages;
|
|
|
|
SELECT count(*) FROM priority_orders JOIN air_shipped_lineitems ON (o_custkey = l_suppkey);
|
|
|
|
-- materialized views work
|
|
-- insert into... select works with views
|
|
CREATE TABLE temp_lineitem(LIKE lineitem_hash_part);
|
|
SELECT create_distributed_table('temp_lineitem', 'l_orderkey', 'hash', 'lineitem_hash_part');
|
|
INSERT INTO temp_lineitem SELECT * FROM air_shipped_lineitems;
|
|
SELECT count(*) FROM temp_lineitem;
|
|
-- following is a where false query, should not be inserting anything
|
|
INSERT INTO temp_lineitem SELECT * FROM air_shipped_lineitems WHERE l_shipmode = 'MAIL';
|
|
SELECT count(*) FROM temp_lineitem;
|
|
|
|
-- can create and query materialized views
|
|
CREATE MATERIALIZED VIEW mode_counts
|
|
AS SELECT l_shipmode, count(*) FROM temp_lineitem GROUP BY l_shipmode;
|
|
|
|
SELECT * FROM mode_counts WHERE l_shipmode = 'AIR' ORDER BY 2 DESC, 1 LIMIT 10;
|
|
|
|
-- materialized views are local, cannot join with distributed tables
|
|
SELECT count(*) FROM mode_counts JOIN temp_lineitem USING (l_shipmode);
|
|
|
|
-- new data is not immediately reflected in the view
|
|
INSERT INTO temp_lineitem SELECT * FROM air_shipped_lineitems;
|
|
SELECT * FROM mode_counts WHERE l_shipmode = 'AIR' ORDER BY 2 DESC, 1 LIMIT 10;
|
|
|
|
-- refresh updates the materialised view with new data
|
|
REFRESH MATERIALIZED VIEW mode_counts;
|
|
SELECT * FROM mode_counts WHERE l_shipmode = 'AIR' ORDER BY 2 DESC, 1 LIMIT 10;
|
|
|
|
DROP MATERIALIZED VIEW mode_counts;
|
|
|
|
SET citus.task_executor_type to "task-tracker";
|
|
|
|
-- single view repartition subqueries are not supported
|
|
SELECT l_suppkey, count(*) FROM
|
|
(SELECT l_suppkey, l_shipdate, count(*)
|
|
FROM air_shipped_lineitems GROUP BY l_suppkey, l_shipdate) supps
|
|
GROUP BY l_suppkey ORDER BY 2 DESC, 1 LIMIT 5;
|
|
|
|
-- logically same query without a view works fine
|
|
SELECT l_suppkey, count(*) FROM
|
|
(SELECT l_suppkey, l_shipdate, count(*)
|
|
FROM lineitem_hash_part WHERE l_shipmode = 'AIR' GROUP BY l_suppkey, l_shipdate) supps
|
|
GROUP BY l_suppkey ORDER BY 2 DESC, 1 LIMIT 5;
|
|
|
|
-- when a view is replaced by actual query it still fails
|
|
SELECT l_suppkey, count(*) FROM
|
|
(SELECT l_suppkey, l_shipdate, count(*)
|
|
FROM (SELECT * FROM lineitem_hash_part WHERE l_shipmode = 'AIR') asi
|
|
GROUP BY l_suppkey, l_shipdate) supps
|
|
GROUP BY l_suppkey ORDER BY 2 DESC, 1 LIMIT 5;
|
|
|
|
-- repartition query on view with single table subquery
|
|
CREATE VIEW supp_count_view AS SELECT * FROM (SELECT l_suppkey, count(*) FROM lineitem_hash_part GROUP BY 1) s1;
|
|
SELECT * FROM supp_count_view ORDER BY 2 DESC, 1 LIMIT 10;
|
|
|
|
SET citus.task_executor_type to DEFAULT;
|
|
|
|
-- create a view with aggregate
|
|
CREATE VIEW lineitems_by_shipping_method AS
|
|
SELECT l_shipmode, count(*) as cnt FROM lineitem_hash_part GROUP BY 1;
|
|
|
|
-- following will be supported via recursive planning
|
|
SELECT * FROM lineitems_by_shipping_method ORDER BY 1,2 LIMIT 5;
|
|
|
|
-- create a view with group by on partition column
|
|
CREATE VIEW lineitems_by_orderkey AS
|
|
SELECT
|
|
l_orderkey, count(*)
|
|
FROM
|
|
lineitem_hash_part
|
|
GROUP BY 1;
|
|
|
|
-- this should work since we're able to push down this query
|
|
SELECT * FROM lineitems_by_orderkey ORDER BY 2 DESC, 1 ASC LIMIT 10;
|
|
|
|
-- it would also work since it is made router plannable
|
|
SELECT * FROM lineitems_by_orderkey WHERE l_orderkey = 100;
|
|
|
|
DROP TABLE temp_lineitem CASCADE;
|
|
|
|
DROP VIEW supp_count_view;
|
|
DROP VIEW lineitems_by_orderkey;
|
|
DROP VIEW lineitems_by_shipping_method;
|
|
DROP VIEW air_shipped_lineitems;
|
|
DROP VIEW priority_lineitem;
|
|
DROP VIEW priority_orders;
|
|
|
|
-- new tests for real time use case including views and subqueries
|
|
|
|
-- create view to display recent user who has an activity after a timestamp
|
|
CREATE VIEW recent_users AS
|
|
SELECT user_id, max(time) as lastseen FROM users_table
|
|
GROUP BY user_id
|
|
HAVING max(time) > '2017-11-23 16:20:33.264457'::timestamp order by 2 DESC;
|
|
SELECT * FROM recent_users;
|
|
|
|
-- create a view for recent_events
|
|
CREATE VIEW recent_events AS
|
|
SELECT user_id, time FROM events_table
|
|
WHERE time > '2017-11-23 16:20:33.264457'::timestamp;
|
|
|
|
SELECT count(*) FROM recent_events;
|
|
|
|
-- count number of events of recent_users
|
|
SELECT count(*) FROM recent_users ru JOIN events_table et ON (ru.user_id = et.user_id);
|
|
-- count number of events of per recent users order by count
|
|
SELECT ru.user_id, count(*)
|
|
FROM recent_users ru
|
|
JOIN events_table et
|
|
ON (ru.user_id = et.user_id)
|
|
GROUP BY ru.user_id
|
|
ORDER BY 2 DESC, 1;
|
|
|
|
-- the same query with a left join however, it would still generate the same result
|
|
SELECT ru.user_id, count(*)
|
|
FROM recent_users ru
|
|
LEFT JOIN events_table et
|
|
ON (ru.user_id = et.user_id)
|
|
GROUP BY ru.user_id
|
|
ORDER BY 2 DESC, 1;
|
|
|
|
-- query wrapped inside a subquery, it needs another top level order by
|
|
SELECT * FROM
|
|
(SELECT ru.user_id, count(*)
|
|
FROM recent_users ru
|
|
JOIN events_table et
|
|
ON (ru.user_id = et.user_id)
|
|
GROUP BY ru.user_id
|
|
ORDER BY 2 DESC, 1) s1
|
|
ORDER BY 2 DESC, 1;
|
|
|
|
-- non-partition key joins are supported inside subquery
|
|
-- via pull-push execution
|
|
SELECT * FROM
|
|
(SELECT ru.user_id, count(*)
|
|
FROM recent_users ru
|
|
JOIN events_table et
|
|
ON (ru.user_id = et.event_type)
|
|
GROUP BY ru.user_id
|
|
ORDER BY 2 DESC, 1) s1
|
|
ORDER BY 2 DESC, 1;
|
|
|
|
-- join between views
|
|
-- recent users who has an event in recent events
|
|
SELECT ru.user_id FROM recent_users ru JOIN recent_events re USING(user_id) GROUP BY ru.user_id ORDER BY ru.user_id;
|
|
|
|
-- outer join inside a subquery
|
|
-- recent_events who are not done by recent users
|
|
SELECT count(*) FROM (
|
|
SELECT re.*, ru.user_id AS recent_user
|
|
FROM recent_events re LEFT JOIN recent_users ru USING(user_id)) reu
|
|
WHERE recent_user IS NULL;
|
|
|
|
-- same query with anti-join
|
|
SELECT count(*)
|
|
FROM recent_events re LEFT JOIN recent_users ru ON(ru.user_id = re.user_id)
|
|
WHERE ru.user_id IS NULL;
|
|
|
|
-- join between view and table
|
|
-- users who has recent activity and they have an entry with value_1 is less than 3
|
|
SELECT ut.* FROM recent_users ru JOIN users_table ut USING (user_id) WHERE ut.value_1 < 3 ORDER BY 1,2;
|
|
|
|
-- determine if a recent user has done a given event type or not
|
|
SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event
|
|
FROM recent_users ru
|
|
LEFT JOIN events_table et
|
|
ON(ru.user_id = et.user_id AND et.event_type = 6)
|
|
ORDER BY 2 DESC, 1;
|
|
|
|
-- view vs table join wrapped inside a subquery
|
|
SELECT * FROM
|
|
(SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event
|
|
FROM recent_users ru
|
|
LEFT JOIN events_table et
|
|
ON(ru.user_id = et.user_id AND et.event_type = 6)
|
|
) s1
|
|
ORDER BY 2 DESC, 1;
|
|
|
|
-- event vs table non-partition-key join is not supported
|
|
-- given that we cannot recursively plan tables yet
|
|
SELECT * FROM
|
|
(SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event
|
|
FROM recent_users ru
|
|
LEFT JOIN events_table et
|
|
ON(ru.user_id = et.event_type)
|
|
) s1
|
|
ORDER BY 2 DESC, 1;
|
|
|
|
-- create a select only view
|
|
CREATE VIEW selected_users AS SELECT * FROM users_table WHERE value_1 >= 1 and value_1 <3;
|
|
CREATE VIEW recent_selected_users AS SELECT su.* FROM selected_users su JOIN recent_users ru USING(user_id);
|
|
|
|
SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER BY 1;
|
|
|
|
-- this would be supported when we implement where partition_key in (subquery) support
|
|
SELECT et.user_id, et.time FROM events_table et WHERE et.user_id IN (SELECT user_id FROM recent_selected_users) GROUP BY 1,2 ORDER BY 1 DESC,2 DESC LIMIT 5;
|
|
|
|
-- it is supported when it is a router query
|
|
SELECT count(*) FROM events_table et WHERE et.user_id IN (SELECT user_id FROM recent_selected_users WHERE user_id = 1);
|
|
|
|
-- union between views is supported through recursive planning
|
|
(SELECT user_id FROM recent_users)
|
|
UNION
|
|
(SELECT user_id FROM selected_users)
|
|
ORDER BY 1;
|
|
|
|
-- wrapping it inside a SELECT * works
|
|
SELECT *
|
|
FROM (
|
|
(SELECT user_id FROM recent_users)
|
|
UNION
|
|
(SELECT user_id FROM selected_users) ) u
|
|
WHERE user_id < 2 AND user_id > 0
|
|
ORDER BY user_id;
|
|
|
|
-- union all also works for views
|
|
SELECT *
|
|
FROM (
|
|
(SELECT user_id FROM recent_users)
|
|
UNION ALL
|
|
(SELECT user_id FROM selected_users) ) u
|
|
WHERE user_id < 2 AND user_id > 0
|
|
ORDER BY user_id;
|
|
|
|
SELECT count(*)
|
|
FROM (
|
|
(SELECT user_id FROM recent_users)
|
|
UNION
|
|
(SELECT user_id FROM selected_users) ) u
|
|
WHERE user_id < 2 AND user_id > 0;
|
|
|
|
-- UNION ALL between views is supported through recursive planning
|
|
SELECT count(*)
|
|
FROM (
|
|
(SELECT user_id FROM recent_users)
|
|
UNION ALL
|
|
(SELECT user_id FROM selected_users) ) u
|
|
WHERE user_id < 2 AND user_id > 0;
|
|
|
|
-- expand view definitions and re-run last 2 queries
|
|
SELECT count(*)
|
|
FROM (
|
|
(SELECT user_id FROM (SELECT user_id, max(time) as lastseen FROM users_table
|
|
GROUP BY user_id
|
|
HAVING max(time) > '2017-11-22 05:45:49.978738'::timestamp order by 2 DESC) aa
|
|
)
|
|
UNION
|
|
(SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 1 and value_1 < 3) bb) ) u
|
|
WHERE user_id < 2 AND user_id > 0;
|
|
|
|
SELECT count(*)
|
|
FROM (
|
|
(SELECT user_id FROM (SELECT user_id, max(time) as lastseen FROM users_table
|
|
GROUP BY user_id
|
|
HAVING max(time) > '2017-11-22 05:45:49.978738'::timestamp order by 2 DESC) aa
|
|
)
|
|
UNION ALL
|
|
(SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 1 and value_1 < 3) bb) ) u
|
|
WHERE user_id < 2 AND user_id > 0;
|
|
|
|
-- test distinct
|
|
-- distinct is supported if it is on a partition key
|
|
CREATE VIEW distinct_user_with_value_1_3 AS SELECT DISTINCT user_id FROM users_table WHERE value_1 = 3;
|
|
SELECT * FROM distinct_user_with_value_1_3 ORDER BY user_id;
|
|
|
|
-- distinct is not supported if it is on a non-partition key
|
|
-- but will be supported via recursive planning
|
|
CREATE VIEW distinct_value_1 AS SELECT DISTINCT value_1 FROM users_table WHERE value_2 = 3;
|
|
SELECT * FROM distinct_value_1 ORDER BY 1 DESC LIMIT 5;
|
|
|
|
-- CTEs are supported even if they are on views
|
|
CREATE VIEW cte_view_1 AS
|
|
WITH c1 AS (SELECT * FROM users_table WHERE value_1 = 3) SELECT * FROM c1 WHERE value_2 < 4;
|
|
|
|
SELECT * FROM cte_view_1 ORDER BY 1,2,3,4,5 LIMIT 5;
|
|
|
|
-- this is single shard query and still not supported since it has view + cte
|
|
-- router planner can't detect it
|
|
SELECT * FROM cte_view_1 WHERE user_id = 2 ORDER BY 1,2,3,4,5;
|
|
|
|
-- if CTE itself prunes down to a single shard than the view is supported (router plannable)
|
|
CREATE VIEW cte_view_2 AS
|
|
WITH c1 AS (SELECT * FROM users_table WHERE user_id = 2) SELECT * FROM c1 WHERE value_1 = 3;
|
|
SELECT * FROM cte_view_2;
|
|
|
|
CREATE VIEW router_view AS SELECT * FROM users_table WHERE user_id = 2;
|
|
-- router plannable
|
|
SELECT user_id FROM router_view GROUP BY 1;
|
|
|
|
-- join a router view
|
|
SELECT * FROM (SELECT user_id FROM router_view GROUP BY 1) rv JOIN recent_events USING (user_id) ORDER BY 2 LIMIT 3;
|
|
SELECT * FROM (SELECT user_id FROM router_view GROUP BY 1) rv JOIN (SELECT * FROM recent_events) re USING (user_id) ORDER BY 2 LIMIT 3;
|
|
|
|
-- views with limits
|
|
CREATE VIEW recent_10_users AS
|
|
SELECT user_id, max(time) as lastseen FROM users_table
|
|
GROUP BY user_id
|
|
ORDER BY lastseen DESC
|
|
LIMIT 10;
|
|
|
|
-- this is not supported since it has limit in it and subquery_pushdown is not set
|
|
SELECT * FROM recent_10_users;
|
|
|
|
SET citus.subquery_pushdown to ON;
|
|
-- still not supported since outer query does not have limit
|
|
-- it shows a different (subquery with single relation) error message
|
|
SELECT * FROM recent_10_users;
|
|
-- now it displays more correct error message
|
|
SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id);
|
|
|
|
-- now both are supported when there is a limit on the outer most query
|
|
SELECT * FROM recent_10_users ORDER BY lastseen DESC LIMIT 10;
|
|
SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;
|
|
|
|
RESET citus.subquery_pushdown;
|
|
|
|
VACUUM ANALYZE users_table;
|
|
|
|
-- explain tests
|
|
EXPLAIN (COSTS FALSE) SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER BY 1;
|
|
|
|
EXPLAIN (COSTS FALSE) SELECT *
|
|
FROM (
|
|
(SELECT user_id FROM recent_users)
|
|
UNION
|
|
(SELECT user_id FROM selected_users) ) u
|
|
WHERE user_id < 4 AND user_id > 1
|
|
ORDER BY user_id;
|
|
|
|
EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;
|
|
SET citus.subquery_pushdown to ON;
|
|
EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;
|
|
|
|
RESET citus.subquery_pushdown;
|
|
|
|
DROP VIEW recent_10_users;
|
|
DROP VIEW router_view;
|
|
DROP VIEW cte_view_2;
|
|
DROP VIEW cte_view_1;
|
|
DROP VIEW distinct_value_1;
|
|
DROP VIEW distinct_user_with_value_1_3;
|
|
DROP VIEW recent_selected_users;
|
|
DROP VIEW selected_users;
|
|
DROP VIEW recent_events;
|
|
DROP VIEW recent_users;
|