citus/src/test/regress/expected/multi_view_0.out

938 lines
40 KiB
Plaintext

--
-- MULTI_VIEW
--
-- This file contains test cases for view support. It verifies various
-- Citus features: simple selects, aggregates, joins, outer joins
-- router queries, single row inserts, multi row inserts via insert
-- into select, multi row insert via copy commands.
-- print whether we're using version > 10 to make version-specific tests clear
SHOW server_version \gset
SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten;
version_above_ten
-------------------
f
(1 row)
SELECT count(*) FROM lineitem_hash_part;
count
-------
12000
(1 row)
SELECT count(*) FROM orders_hash_part;
count
-------
2985
(1 row)
-- create a view for priority orders
CREATE VIEW priority_orders AS SELECT * FROM orders_hash_part WHERE o_orderpriority < '3-MEDIUM';
-- aggregate pushdown
SELECT o_orderpriority, count(*) FROM priority_orders GROUP BY 1 ORDER BY 2, 1;
o_orderpriority | count
-----------------+-------
2-HIGH | 593
1-URGENT | 604
(2 rows)
SELECT o_orderpriority, count(*) FROM orders_hash_part WHERE o_orderpriority < '3-MEDIUM' GROUP BY 1 ORDER BY 2,1;
o_orderpriority | count
-----------------+-------
2-HIGH | 593
1-URGENT | 604
(2 rows)
-- filters
SELECT o_orderpriority, count(*) as all, count(*) FILTER (WHERE o_orderstatus ='F') as fullfilled FROM priority_orders GROUP BY 1 ORDER BY 2, 1;
o_orderpriority | all | fullfilled
-----------------+-----+------------
2-HIGH | 593 | 271
1-URGENT | 604 | 280
(2 rows)
-- having
SELECT o_orderdate, count(*) from priority_orders group by 1 having (count(*) > 3) order by 2 desc, 1 desc;
o_orderdate | count
-------------+-------
08-20-1996 | 5
10-10-1994 | 4
05-05-1994 | 4
04-07-1994 | 4
03-17-1993 | 4
(5 rows)
-- having with filters
SELECT o_orderdate, count(*) as all, count(*) FILTER(WHERE o_orderstatus = 'F') from priority_orders group by 1 having (count(*) > 3) order by 2 desc, 1 desc;
o_orderdate | all | count
-------------+-----+-------
08-20-1996 | 5 | 0
10-10-1994 | 4 | 4
05-05-1994 | 4 | 4
04-07-1994 | 4 | 4
03-17-1993 | 4 | 4
(5 rows)
-- limit
SELECT o_orderkey, o_totalprice from orders_hash_part order by 2 desc, 1 asc limit 5 ;
o_orderkey | o_totalprice
------------+--------------
4421 | 401055.62
10209 | 400191.77
11142 | 395039.05
14179 | 384265.43
11296 | 378166.33
(5 rows)
SELECT o_orderkey, o_totalprice from priority_orders order by 2 desc, 1 asc limit 1 ;
o_orderkey | o_totalprice
------------+--------------
14179 | 384265.43
(1 row)
CREATE VIEW priority_lineitem AS SELECT li.* FROM lineitem_hash_part li JOIN priority_orders ON (l_orderkey = o_orderkey);
SELECT l_orderkey, count(*) FROM priority_lineitem GROUP BY 1 ORDER BY 2 DESC, 1 LIMIT 5;
l_orderkey | count
------------+-------
7 | 7
225 | 7
226 | 7
322 | 7
326 | 7
(5 rows)
CREATE VIEW air_shipped_lineitems AS SELECT * FROM lineitem_hash_part WHERE l_shipmode = 'AIR';
-- join between view and table
SELECT count(*) FROM orders_hash_part join air_shipped_lineitems ON (o_orderkey = l_orderkey);
count
-------
1706
(1 row)
-- join between views
SELECT count(*) FROM priority_orders join air_shipped_lineitems ON (o_orderkey = l_orderkey);
count
-------
700
(1 row)
-- count distinct on partition column is supported
SELECT count(distinct o_orderkey) FROM priority_orders join air_shipped_lineitems ON (o_orderkey = l_orderkey);
count
-------
551
(1 row)
-- count distinct on non-partition column is supported
SELECT count(distinct o_orderpriority) FROM priority_orders join air_shipped_lineitems ON (o_orderkey = l_orderkey);
count
-------
2
(1 row)
-- count distinct on partition column is supported on router queries
SELECT count(distinct o_orderkey) FROM priority_orders join air_shipped_lineitems
ON (o_orderkey = l_orderkey)
WHERE (o_orderkey = 231);
count
-------
1
(1 row)
-- select distinct on router joins of views also works
SELECT distinct(o_orderkey) FROM priority_orders join air_shipped_lineitems
ON (o_orderkey = l_orderkey)
WHERE (o_orderkey = 231);
o_orderkey
------------
231
(1 row)
-- left join support depends on flattening of the query
-- following query fails since the inner part is kept as subquery
SELECT * FROM priority_orders left join air_shipped_lineitems ON (o_orderkey = l_orderkey);
ERROR: cannot perform distributed planning on this query
DETAIL: Subqueries in outer joins are not supported
-- however, this works
SELECT count(*) FROM priority_orders left join lineitem_hash_part ON (o_orderkey = l_orderkey) WHERE l_shipmode ='AIR';
count
-------
700
(1 row)
-- view at the inner side of is not supported
SELECT count(*) FROM priority_orders right join lineitem_hash_part ON (o_orderkey = l_orderkey) WHERE l_shipmode ='AIR';
ERROR: cannot perform distributed planning on this query
DETAIL: Subqueries in outer joins are not supported
-- but view at the outer side is. This is essentially the same as a left join with arguments reversed.
SELECT count(*) FROM lineitem_hash_part right join priority_orders ON (o_orderkey = l_orderkey) WHERE l_shipmode ='AIR';
count
-------
700
(1 row)
-- left join on router query is supported
SELECT o_orderkey, l_linenumber FROM priority_orders left join air_shipped_lineitems ON (o_orderkey = l_orderkey)
WHERE o_orderkey = 2;
o_orderkey | l_linenumber
------------+--------------
2 |
(1 row)
-- repartition query on view join
-- it passes planning, fails at execution stage
SET client_min_messages TO DEBUG1;
SELECT * FROM priority_orders JOIN air_shipped_lineitems ON (o_custkey = l_suppkey) ORDER BY o_orderkey DESC, o_custkey DESC, o_orderpriority DESC LIMIT 5;
DEBUG: generating subplan 22_1 for subquery SELECT lineitem_hash_part.l_orderkey, lineitem_hash_part.l_partkey, lineitem_hash_part.l_suppkey, lineitem_hash_part.l_linenumber, lineitem_hash_part.l_quantity, lineitem_hash_part.l_extendedprice, lineitem_hash_part.l_discount, lineitem_hash_part.l_tax, lineitem_hash_part.l_returnflag, lineitem_hash_part.l_linestatus, lineitem_hash_part.l_shipdate, lineitem_hash_part.l_commitdate, lineitem_hash_part.l_receiptdate, lineitem_hash_part.l_shipinstruct, lineitem_hash_part.l_shipmode, lineitem_hash_part.l_comment FROM public.lineitem_hash_part WHERE (lineitem_hash_part.l_shipmode = 'AIR'::bpchar)
DEBUG: Plan 22 query after replacing subqueries and CTEs: SELECT priority_orders.o_orderkey, priority_orders.o_custkey, priority_orders.o_orderstatus, priority_orders.o_totalprice, priority_orders.o_orderdate, priority_orders.o_orderpriority, priority_orders.o_clerk, priority_orders.o_shippriority, priority_orders.o_comment, air_shipped_lineitems.l_orderkey, air_shipped_lineitems.l_partkey, air_shipped_lineitems.l_suppkey, air_shipped_lineitems.l_linenumber, air_shipped_lineitems.l_quantity, air_shipped_lineitems.l_extendedprice, air_shipped_lineitems.l_discount, air_shipped_lineitems.l_tax, air_shipped_lineitems.l_returnflag, air_shipped_lineitems.l_linestatus, air_shipped_lineitems.l_shipdate, air_shipped_lineitems.l_commitdate, air_shipped_lineitems.l_receiptdate, air_shipped_lineitems.l_shipinstruct, air_shipped_lineitems.l_shipmode, air_shipped_lineitems.l_comment FROM ((SELECT orders_hash_part.o_orderkey, orders_hash_part.o_custkey, orders_hash_part.o_orderstatus, orders_hash_part.o_totalprice, orders_hash_part.o_orderdate, orders_hash_part.o_orderpriority, orders_hash_part.o_clerk, orders_hash_part.o_shippriority, orders_hash_part.o_comment FROM public.orders_hash_part WHERE (orders_hash_part.o_orderpriority < '3-MEDIUM'::bpchar)) priority_orders JOIN (SELECT intermediate_result.l_orderkey, intermediate_result.l_partkey, intermediate_result.l_suppkey, intermediate_result.l_linenumber, intermediate_result.l_quantity, intermediate_result.l_extendedprice, intermediate_result.l_discount, intermediate_result.l_tax, intermediate_result.l_returnflag, intermediate_result.l_linestatus, intermediate_result.l_shipdate, intermediate_result.l_commitdate, intermediate_result.l_receiptdate, intermediate_result.l_shipinstruct, intermediate_result.l_shipmode, intermediate_result.l_comment FROM read_intermediate_result('22_1'::text, 'binary'::citus_copy_format) intermediate_result(l_orderkey bigint, l_partkey integer, l_suppkey integer, l_linenumber integer, l_quantity numeric(15,2), l_extendedprice numeric(15,2), l_discount numeric(15,2), l_tax numeric(15,2), l_returnflag character(1), l_linestatus character(1), l_shipdate date, l_commitdate date, l_receiptdate date, l_shipinstruct character(25), l_shipmode character(10), l_comment character varying(44))) air_shipped_lineitems ON ((priority_orders.o_custkey = air_shipped_lineitems.l_suppkey))) ORDER BY priority_orders.o_orderkey DESC, priority_orders.o_custkey DESC, priority_orders.o_orderpriority DESC LIMIT 5
DEBUG: push down of limit count: 5
o_orderkey | o_custkey | o_orderstatus | o_totalprice | o_orderdate | o_orderpriority | o_clerk | o_shippriority | o_comment | l_orderkey | l_partkey | l_suppkey | l_linenumber | l_quantity | l_extendedprice | l_discount | l_tax | l_returnflag | l_linestatus | l_shipdate | l_commitdate | l_receiptdate | l_shipinstruct | l_shipmode | l_comment
------------+-----------+---------------+--------------+-------------+-----------------+-----------------+----------------+-------------------------------------------------------+------------+-----------+-----------+--------------+------------+-----------------+------------+-------+--------------+--------------+------------+--------------+---------------+---------------------------+------------+-------------------------------------------
14821 | 1435 | O | 322002.95 | 06-12-1998 | 2-HIGH | Clerk#000000630 | 0 | n packages are furiously ironic ideas. d | 1607 | 118923 | 1435 | 2 | 37.00 | 71851.04 | 0.05 | 0.02 | N | O | 02-27-1996 | 02-18-1996 | 03-16-1996 | NONE | AIR | alongside
14790 | 613 | O | 270163.54 | 08-21-1996 | 2-HIGH | Clerk#000000347 | 0 | p. regular deposits wake. final n | 2629 | 123076 | 613 | 2 | 31.00 | 34071.17 | 0.08 | 0.03 | N | O | 05-24-1998 | 05-26-1998 | 06-10-1998 | COLLECT COD | AIR | ate blithely bold, regular deposits. bold
14758 | 1225 | F | 37812.49 | 10-27-1993 | 2-HIGH | Clerk#000000687 | 0 | ages nag about the furio | 9156 | 176190 | 1225 | 2 | 22.00 | 27856.18 | 0.03 | 0.00 | R | F | 02-08-1994 | 04-01-1994 | 02-24-1994 | DELIVER IN PERSON | AIR | equests dete
14725 | 569 | O | 261801.45 | 06-17-1995 | 2-HIGH | Clerk#000000177 | 0 | ng asymptotes. final, ironic accounts cajole after | 14688 | 173017 | 569 | 3 | 10.00 | 10900.10 | 0.02 | 0.08 | N | O | 03-14-1997 | 04-22-1997 | 04-05-1997 | COLLECT COD | AIR | riously even packages sleep a
14657 | 370 | F | 116160.53 | 02-28-1994 | 1-URGENT | Clerk#000000756 | 0 | ly across the ironic, ironic instructions. bold ideas | 5153 | 67863 | 370 | 3 | 30.00 | 54925.80 | 0.09 | 0.01 | N | O | 11-10-1995 | 11-14-1995 | 11-16-1995 | DELIVER IN PERSON | AIR | beans sleep bl
(5 rows)
RESET client_min_messages;
SELECT count(*) FROM priority_orders JOIN air_shipped_lineitems ON (o_custkey = l_suppkey);
count
-------
192
(1 row)
-- materialized views work
-- insert into... select works with views
CREATE TABLE temp_lineitem(LIKE lineitem_hash_part);
SELECT create_distributed_table('temp_lineitem', 'l_orderkey', 'hash', 'lineitem_hash_part');
create_distributed_table
--------------------------
(1 row)
INSERT INTO temp_lineitem SELECT * FROM air_shipped_lineitems;
SELECT count(*) FROM temp_lineitem;
count
-------
1706
(1 row)
-- following is a where false query, should not be inserting anything
INSERT INTO temp_lineitem SELECT * FROM air_shipped_lineitems WHERE l_shipmode = 'MAIL';
SELECT count(*) FROM temp_lineitem;
count
-------
1706
(1 row)
-- can create and query materialized views
CREATE MATERIALIZED VIEW mode_counts
AS SELECT l_shipmode, count(*) FROM temp_lineitem GROUP BY l_shipmode;
SELECT * FROM mode_counts WHERE l_shipmode = 'AIR' ORDER BY 2 DESC, 1 LIMIT 10;
l_shipmode | count
------------+-------
AIR | 1706
(1 row)
-- materialized views are local, cannot join with distributed tables
SELECT count(*) FROM mode_counts JOIN temp_lineitem USING (l_shipmode);
ERROR: relation mode_counts is not distributed
-- new data is not immediately reflected in the view
INSERT INTO temp_lineitem SELECT * FROM air_shipped_lineitems;
SELECT * FROM mode_counts WHERE l_shipmode = 'AIR' ORDER BY 2 DESC, 1 LIMIT 10;
l_shipmode | count
------------+-------
AIR | 1706
(1 row)
-- refresh updates the materialised view with new data
REFRESH MATERIALIZED VIEW mode_counts;
SELECT * FROM mode_counts WHERE l_shipmode = 'AIR' ORDER BY 2 DESC, 1 LIMIT 10;
l_shipmode | count
------------+-------
AIR | 3412
(1 row)
DROP MATERIALIZED VIEW mode_counts;
SET citus.task_executor_type to "task-tracker";
-- single view repartition subqueries are not supported
SELECT l_suppkey, count(*) FROM
(SELECT l_suppkey, l_shipdate, count(*)
FROM air_shipped_lineitems GROUP BY l_suppkey, l_shipdate) supps
GROUP BY l_suppkey ORDER BY 2 DESC, 1 LIMIT 5;
ERROR: cannot perform distributed planning on this query
DETAIL: Subqueries without group by clause are not supported yet
-- logically same query without a view works fine
SELECT l_suppkey, count(*) FROM
(SELECT l_suppkey, l_shipdate, count(*)
FROM lineitem_hash_part WHERE l_shipmode = 'AIR' GROUP BY l_suppkey, l_shipdate) supps
GROUP BY l_suppkey ORDER BY 2 DESC, 1 LIMIT 5;
l_suppkey | count
-----------+-------
7680 | 4
160 | 3
1042 | 3
1318 | 3
5873 | 3
(5 rows)
-- when a view is replaced by actual query it still fails
SELECT l_suppkey, count(*) FROM
(SELECT l_suppkey, l_shipdate, count(*)
FROM (SELECT * FROM lineitem_hash_part WHERE l_shipmode = 'AIR') asi
GROUP BY l_suppkey, l_shipdate) supps
GROUP BY l_suppkey ORDER BY 2 DESC, 1 LIMIT 5;
ERROR: cannot perform distributed planning on this query
DETAIL: Subqueries without group by clause are not supported yet
-- repartition query on view with single table subquery
CREATE VIEW supp_count_view AS SELECT * FROM (SELECT l_suppkey, count(*) FROM lineitem_hash_part GROUP BY 1) s1;
SELECT * FROM supp_count_view ORDER BY 2 DESC, 1 LIMIT 10;
l_suppkey | count
-----------+-------
6104 | 8
1868 | 6
5532 | 6
5849 | 6
6169 | 6
6669 | 6
6692 | 6
7703 | 6
7869 | 6
8426 | 6
(10 rows)
SET citus.task_executor_type to DEFAULT;
-- create a view with aggregate
CREATE VIEW lineitems_by_shipping_method AS
SELECT l_shipmode, count(*) as cnt FROM lineitem_hash_part GROUP BY 1;
-- following will be supported via recursive planning
SELECT * FROM lineitems_by_shipping_method ORDER BY 1,2 LIMIT 5;
l_shipmode | cnt
------------+------
AIR | 1706
FOB | 1709
MAIL | 1739
RAIL | 1706
REG AIR | 1679
(5 rows)
-- create a view with group by on partition column
CREATE VIEW lineitems_by_orderkey AS
SELECT
l_orderkey, count(*)
FROM
lineitem_hash_part
GROUP BY 1;
-- this should work since we're able to push down this query
SELECT * FROM lineitems_by_orderkey ORDER BY 2 DESC, 1 ASC LIMIT 10;
l_orderkey | count
------------+-------
7 | 7
68 | 7
129 | 7
164 | 7
194 | 7
225 | 7
226 | 7
322 | 7
326 | 7
354 | 7
(10 rows)
-- it would also work since it is made router plannable
SELECT * FROM lineitems_by_orderkey WHERE l_orderkey = 100;
l_orderkey | count
------------+-------
100 | 5
(1 row)
DROP TABLE temp_lineitem CASCADE;
DROP VIEW supp_count_view;
DROP VIEW lineitems_by_orderkey;
DROP VIEW lineitems_by_shipping_method;
DROP VIEW air_shipped_lineitems;
DROP VIEW priority_lineitem;
DROP VIEW priority_orders;
-- new tests for real time use case including views and subqueries
-- create view to display recent user who has an activity after a timestamp
CREATE VIEW recent_users AS
SELECT user_id, max(time) as lastseen FROM users_table
GROUP BY user_id
HAVING max(time) > '2017-11-23 16:20:33.264457'::timestamp order by 2 DESC;
SELECT * FROM recent_users;
user_id | lastseen
---------+---------------------------------
1 | Thu Nov 23 17:30:34.635085 2017
5 | Thu Nov 23 16:48:32.08896 2017
3 | Thu Nov 23 17:18:51.048758 2017
(3 rows)
-- create a view for recent_events
CREATE VIEW recent_events AS
SELECT user_id, time FROM events_table
WHERE time > '2017-11-23 16:20:33.264457'::timestamp;
SELECT count(*) FROM recent_events;
count
-------
6
(1 row)
-- count number of events of recent_users
SELECT count(*) FROM recent_users ru JOIN events_table et ON (ru.user_id = et.user_id);
count
-------
50
(1 row)
-- count number of events of per recent users order by count
SELECT ru.user_id, count(*)
FROM recent_users ru
JOIN events_table et
ON (ru.user_id = et.user_id)
GROUP BY ru.user_id
ORDER BY 2 DESC, 1;
user_id | count
---------+-------
3 | 21
1 | 15
5 | 14
(3 rows)
-- the same query with a left join however, it would still generate the same result
SELECT ru.user_id, count(*)
FROM recent_users ru
LEFT JOIN events_table et
ON (ru.user_id = et.user_id)
GROUP BY ru.user_id
ORDER BY 2 DESC, 1;
user_id | count
---------+-------
3 | 21
1 | 15
5 | 14
(3 rows)
-- query wrapped inside a subquery, it needs another top level order by
SELECT * FROM
(SELECT ru.user_id, count(*)
FROM recent_users ru
JOIN events_table et
ON (ru.user_id = et.user_id)
GROUP BY ru.user_id
ORDER BY 2 DESC, 1) s1
ORDER BY 2 DESC, 1;
user_id | count
---------+-------
3 | 21
1 | 15
5 | 14
(3 rows)
-- non-partition key joins are not supported inside subquery
-- since the join with a table
SELECT * FROM
(SELECT ru.user_id, count(*)
FROM recent_users ru
JOIN events_table et
ON (ru.user_id = et.event_type)
GROUP BY ru.user_id
ORDER BY 2 DESC, 1) s1
ORDER BY 2 DESC, 1;
ERROR: bogus varno: 3
-- join between views
-- recent users who has an event in recent events
SELECT ru.user_id FROM recent_users ru JOIN recent_events re USING(user_id) GROUP BY ru.user_id ORDER BY ru.user_id;
user_id
---------
1
3
(2 rows)
-- outer join inside a subquery
-- recent_events who are not done by recent users
SELECT count(*) FROM (
SELECT re.*, ru.user_id AS recent_user
FROM recent_events re LEFT JOIN recent_users ru USING(user_id)) reu
WHERE recent_user IS NULL;
count
-------
2
(1 row)
-- same query with anti-join
SELECT count(*)
FROM recent_events re LEFT JOIN recent_users ru ON(ru.user_id = re.user_id)
WHERE ru.user_id IS NULL;
count
-------
2
(1 row)
-- join between view and table
-- users who has recent activity and they have an entry with value_1 is less than 3
SELECT ut.* FROM recent_users ru JOIN users_table ut USING (user_id) WHERE ut.value_1 < 3 ORDER BY 1,2;
user_id | time | value_1 | value_2 | value_3 | value_4
---------+---------------------------------+---------+---------+---------+---------
1 | Thu Nov 23 09:26:42.145043 2017 | 1 | 3 | 3 |
3 | Wed Nov 22 18:43:51.450263 2017 | 1 | 1 | 4 |
3 | Wed Nov 22 20:43:31.008625 2017 | 1 | 3 | 2 |
3 | Thu Nov 23 00:15:45.610845 2017 | 1 | 1 | 4 |
3 | Thu Nov 23 03:23:24.702501 2017 | 1 | 2 | 5 |
3 | Thu Nov 23 06:20:05.854857 2017 | 1 | 4 | 2 |
3 | Thu Nov 23 09:57:41.540228 2017 | 2 | 2 | 3 |
3 | Thu Nov 23 11:18:53.114408 2017 | 2 | 2 | 0 |
3 | Thu Nov 23 12:56:49.29191 2017 | 0 | 5 | 1 |
3 | Thu Nov 23 17:18:51.048758 2017 | 1 | 5 | 5 |
5 | Wed Nov 22 20:43:18.667473 2017 | 0 | 3 | 2 |
5 | Wed Nov 22 21:02:07.575129 2017 | 2 | 0 | 2 |
5 | Wed Nov 22 22:10:24.315371 2017 | 1 | 2 | 1 |
5 | Thu Nov 23 00:54:44.192608 2017 | 1 | 3 | 2 |
5 | Thu Nov 23 07:47:09.542999 2017 | 1 | 4 | 3 |
5 | Thu Nov 23 09:05:08.53142 2017 | 2 | 2 | 2 |
5 | Thu Nov 23 09:17:47.706703 2017 | 2 | 5 | 3 |
5 | Thu Nov 23 10:15:31.764558 2017 | 2 | 2 | 2 |
5 | Thu Nov 23 14:29:02.557934 2017 | 2 | 1 | 2 |
5 | Thu Nov 23 15:55:08.493462 2017 | 0 | 3 | 3 |
5 | Thu Nov 23 16:28:38.455322 2017 | 2 | 5 | 4 |
(21 rows)
-- determine if a recent user has done a given event type or not
SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event
FROM recent_users ru
LEFT JOIN events_table et
ON(ru.user_id = et.user_id AND et.event_type = 6)
ORDER BY 2 DESC, 1;
user_id | done_event
---------+------------
1 | YES
3 | NO
5 | NO
(3 rows)
-- view vs table join wrapped inside a subquery
SELECT * FROM
(SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event
FROM recent_users ru
LEFT JOIN events_table et
ON(ru.user_id = et.user_id AND et.event_type = 6)
) s1
ORDER BY 2 DESC, 1;
user_id | done_event
---------+------------
1 | YES
3 | NO
5 | NO
(3 rows)
-- event vs table non-partition-key join is not supported
-- given that we cannot recursively plan tables yet
SELECT * FROM
(SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event
FROM recent_users ru
LEFT JOIN events_table et
ON(ru.user_id = et.event_type)
) s1
ORDER BY 2 DESC, 1;
ERROR: bogus varno: 3
-- create a select only view
CREATE VIEW selected_users AS SELECT * FROM users_table WHERE value_1 >= 1 and value_1 <3;
CREATE VIEW recent_selected_users AS SELECT su.* FROM selected_users su JOIN recent_users ru USING(user_id);
SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER BY 1;
user_id
---------
1
3
5
(3 rows)
-- this would be supported when we implement where partition_key in (subquery) support
SELECT et.user_id, et.time FROM events_table et WHERE et.user_id IN (SELECT user_id FROM recent_selected_users) GROUP BY 1,2 ORDER BY 1 DESC,2 DESC LIMIT 5;
user_id | time
---------+---------------------------------
5 | Thu Nov 23 16:11:02.929469 2017
5 | Thu Nov 23 14:40:40.467511 2017
5 | Thu Nov 23 14:28:51.833214 2017
5 | Thu Nov 23 14:23:09.889786 2017
5 | Thu Nov 23 13:26:45.571108 2017
(5 rows)
-- it is supported when it is a router query
SELECT count(*) FROM events_table et WHERE et.user_id IN (SELECT user_id FROM recent_selected_users WHERE user_id = 1);
count
-------
15
(1 row)
-- union between views is supported through recursive planning
(SELECT user_id FROM recent_users)
UNION
(SELECT user_id FROM selected_users)
ORDER BY 1;
user_id
---------
1
2
3
4
5
6
(6 rows)
-- wrapping it inside a SELECT * works
SELECT *
FROM (
(SELECT user_id FROM recent_users)
UNION
(SELECT user_id FROM selected_users) ) u
WHERE user_id < 2 AND user_id > 0
ORDER BY user_id;
user_id
---------
1
(1 row)
-- union all also works for views
SELECT *
FROM (
(SELECT user_id FROM recent_users)
UNION ALL
(SELECT user_id FROM selected_users) ) u
WHERE user_id < 2 AND user_id > 0
ORDER BY user_id;
user_id
---------
1
1
(2 rows)
SELECT count(*)
FROM (
(SELECT user_id FROM recent_users)
UNION
(SELECT user_id FROM selected_users) ) u
WHERE user_id < 2 AND user_id > 0;
count
-------
1
(1 row)
-- UNION ALL between views is supported through recursive planning
SELECT count(*)
FROM (
(SELECT user_id FROM recent_users)
UNION ALL
(SELECT user_id FROM selected_users) ) u
WHERE user_id < 2 AND user_id > 0;
count
-------
2
(1 row)
-- expand view definitions and re-run last 2 queries
SELECT count(*)
FROM (
(SELECT user_id FROM (SELECT user_id, max(time) as lastseen FROM users_table
GROUP BY user_id
HAVING max(time) > '2017-11-22 05:45:49.978738'::timestamp order by 2 DESC) aa
)
UNION
(SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 1 and value_1 < 3) bb) ) u
WHERE user_id < 2 AND user_id > 0;
count
-------
1
(1 row)
SELECT count(*)
FROM (
(SELECT user_id FROM (SELECT user_id, max(time) as lastseen FROM users_table
GROUP BY user_id
HAVING max(time) > '2017-11-22 05:45:49.978738'::timestamp order by 2 DESC) aa
)
UNION ALL
(SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 1 and value_1 < 3) bb) ) u
WHERE user_id < 2 AND user_id > 0;
count
-------
2
(1 row)
-- test distinct
-- distinct is supported if it is on a partition key
CREATE VIEW distinct_user_with_value_1_3 AS SELECT DISTINCT user_id FROM users_table WHERE value_1 = 3;
SELECT * FROM distinct_user_with_value_1_3 ORDER BY user_id;
user_id
---------
1
2
3
4
5
6
(6 rows)
-- distinct is not supported if it is on a non-partition key
-- but will be supported via recursive planning
CREATE VIEW distinct_value_1 AS SELECT DISTINCT value_1 FROM users_table WHERE value_2 = 3;
SELECT * FROM distinct_value_1 ORDER BY 1 DESC LIMIT 5;
value_1
---------
5
4
3
2
1
(5 rows)
-- CTEs are supported even if they are on views
CREATE VIEW cte_view_1 AS
WITH c1 AS (SELECT * FROM users_table WHERE value_1 = 3) SELECT * FROM c1 WHERE value_2 < 4;
SELECT * FROM cte_view_1 ORDER BY 1,2,3,4,5 LIMIT 5;
user_id | time | value_1 | value_2 | value_3 | value_4
---------+---------------------------------+---------+---------+---------+---------
1 | Thu Nov 23 03:32:50.803031 2017 | 3 | 2 | 1 |
2 | Thu Nov 23 13:52:54.83829 2017 | 3 | 1 | 4 |
3 | Wed Nov 22 23:24:32.080584 2017 | 3 | 2 | 5 |
4 | Wed Nov 22 23:59:46.493416 2017 | 3 | 1 | 3 |
4 | Thu Nov 23 01:55:21.824618 2017 | 3 | 1 | 4 |
(5 rows)
-- this is single shard query and still not supported since it has view + cte
-- router planner can't detect it
SELECT * FROM cte_view_1 WHERE user_id = 2 ORDER BY 1,2,3,4,5;
user_id | time | value_1 | value_2 | value_3 | value_4
---------+--------------------------------+---------+---------+---------+---------
2 | Thu Nov 23 13:52:54.83829 2017 | 3 | 1 | 4 |
(1 row)
-- if CTE itself prunes down to a single shard than the view is supported (router plannable)
CREATE VIEW cte_view_2 AS
WITH c1 AS (SELECT * FROM users_table WHERE user_id = 2) SELECT * FROM c1 WHERE value_1 = 3;
SELECT * FROM cte_view_2;
user_id | time | value_1 | value_2 | value_3 | value_4
---------+---------------------------------+---------+---------+---------+---------
2 | Thu Nov 23 00:19:14.138058 2017 | 3 | 4 | 0 |
2 | Thu Nov 23 13:52:54.83829 2017 | 3 | 1 | 4 |
2 | Wed Nov 22 18:19:49.944985 2017 | 3 | 5 | 1 |
2 | Thu Nov 23 11:41:04.042936 2017 | 3 | 4 | 1 |
(4 rows)
CREATE VIEW router_view AS SELECT * FROM users_table WHERE user_id = 2;
-- router plannable
SELECT user_id FROM router_view GROUP BY 1;
user_id
---------
2
(1 row)
-- join a router view
SELECT * FROM (SELECT user_id FROM router_view GROUP BY 1) rv JOIN recent_events USING (user_id) ORDER BY 2 LIMIT 3;
user_id | time
---------+---------------------------------
2 | Thu Nov 23 17:26:14.563216 2017
(1 row)
SELECT * FROM (SELECT user_id FROM router_view GROUP BY 1) rv JOIN (SELECT * FROM recent_events) re USING (user_id) ORDER BY 2 LIMIT 3;
user_id | time
---------+---------------------------------
2 | Thu Nov 23 17:26:14.563216 2017
(1 row)
-- views with limits
CREATE VIEW recent_10_users AS
SELECT user_id, max(time) as lastseen FROM users_table
GROUP BY user_id
ORDER BY lastseen DESC
LIMIT 10;
-- this is not supported since it has limit in it and subquery_pushdown is not set
SELECT * FROM recent_10_users;
user_id | lastseen
---------+---------------------------------
1 | Thu Nov 23 17:30:34.635085 2017
3 | Thu Nov 23 17:18:51.048758 2017
5 | Thu Nov 23 16:48:32.08896 2017
4 | Thu Nov 23 15:32:02.360969 2017
6 | Thu Nov 23 14:43:18.024104 2017
2 | Thu Nov 23 13:52:54.83829 2017
(6 rows)
SET citus.subquery_pushdown to ON;
-- still not supported since outer query does not have limit
-- it shows a different (subquery with single relation) error message
SELECT * FROM recent_10_users;
ERROR: cannot perform distributed planning on this query
DETAIL: Subqueries with limit are not supported yet
-- now it displays more correct error message
SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id);
ERROR: cannot push down this subquery
DETAIL: Limit in subquery without limit in the outermost query is unsupported
-- now both are supported when there is a limit on the outer most query
SELECT * FROM recent_10_users ORDER BY lastseen DESC LIMIT 10;
user_id | lastseen
---------+---------------------------------
1 | Thu Nov 23 17:30:34.635085 2017
3 | Thu Nov 23 17:18:51.048758 2017
5 | Thu Nov 23 16:48:32.08896 2017
4 | Thu Nov 23 15:32:02.360969 2017
6 | Thu Nov 23 14:43:18.024104 2017
2 | Thu Nov 23 13:52:54.83829 2017
(6 rows)
SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;
user_id | time | event_type | value_2 | value_3 | value_4
---------+---------------------------------+------------+---------+---------+---------
1 | Thu Nov 23 21:54:46.924477 2017 | 6 | 4 | 5 |
4 | Thu Nov 23 18:10:21.338399 2017 | 1 | 2 | 4 |
3 | Thu Nov 23 18:08:26.550729 2017 | 2 | 4 | 3 |
2 | Thu Nov 23 17:26:14.563216 2017 | 1 | 5 | 3 |
3 | Thu Nov 23 16:44:41.903713 2017 | 4 | 2 | 2 |
3 | Thu Nov 23 16:31:56.219594 2017 | 5 | 1 | 2 |
4 | Thu Nov 23 16:20:33.264457 2017 | 0 | 0 | 3 |
5 | Thu Nov 23 16:11:02.929469 2017 | 4 | 2 | 0 |
2 | Thu Nov 23 15:58:49.273421 2017 | 5 | 1 | 2 |
5 | Thu Nov 23 14:40:40.467511 2017 | 1 | 4 | 1 |
(10 rows)
RESET citus.subquery_pushdown;
VACUUM ANALYZE users_table;
-- explain tests
EXPLAIN (COSTS FALSE) SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER BY 1;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------
Sort
Sort Key: remote_scan.user_id
-> HashAggregate
Group Key: remote_scan.user_id
-> Custom Scan (Citus Real-Time)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> HashAggregate
Group Key: users_table.user_id
-> Hash Join
Hash Cond: (users_table.user_id = ru.user_id)
-> Seq Scan on users_table_1400000 users_table
Filter: ((value_1 >= 1) AND (value_1 < 3))
-> Hash
-> Subquery Scan on ru
-> Sort
Sort Key: (max(users_table_1."time")) DESC
-> HashAggregate
Group Key: users_table_1.user_id
Filter: (max(users_table_1."time") > '2017-11-23 16:20:33.264457'::timestamp without time zone)
-> Seq Scan on users_table_1400000 users_table_1
(23 rows)
EXPLAIN (COSTS FALSE) SELECT *
FROM (
(SELECT user_id FROM recent_users)
UNION
(SELECT user_id FROM selected_users) ) u
WHERE user_id < 4 AND user_id > 1
ORDER BY user_id;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------
Sort
Sort Key: remote_scan.user_id
-> Custom Scan (Citus Real-Time)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> Unique
-> Sort
Sort Key: recent_users.user_id
-> Append
-> Subquery Scan on recent_users
-> Sort
Sort Key: (max(users_table."time")) DESC
-> GroupAggregate
Group Key: users_table.user_id
Filter: (max(users_table."time") > '2017-11-23 16:20:33.264457'::timestamp without time zone)
-> Sort
Sort Key: users_table.user_id
-> Seq Scan on users_table_1400000 users_table
Filter: ((user_id < 4) AND (user_id > 1))
-> Seq Scan on users_table_1400000 users_table_1
Filter: ((value_1 >= 1) AND (value_1 < 3) AND (user_id < 4) AND (user_id > 1))
(23 rows)
EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------
Limit
-> Sort
Sort Key: remote_scan."time" DESC
-> Custom Scan (Citus Real-Time)
-> Distributed Subplan 95_1
-> Limit
-> Sort
Sort Key: max((max(remote_scan.lastseen))) DESC
-> HashAggregate
Group Key: remote_scan.user_id
-> Custom Scan (Citus Real-Time)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> Limit
-> Sort
Sort Key: (max("time")) DESC
-> HashAggregate
Group Key: user_id
-> Seq Scan on users_table_1400000 users_table
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> Limit
-> Sort
Sort Key: et."time" DESC
-> Hash Join
Hash Cond: (intermediate_result.user_id = et.user_id)
-> Function Scan on read_intermediate_result intermediate_result
-> Hash
-> Seq Scan on events_table_1400004 et
(33 rows)
SET citus.subquery_pushdown to ON;
EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------
Limit
-> Sort
Sort Key: remote_scan."time" DESC
-> Custom Scan (Citus Real-Time)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=57637 dbname=regression
-> Limit
-> Sort
Sort Key: et."time" DESC
-> Hash Join
Hash Cond: (et.user_id = recent_10_users.user_id)
-> Seq Scan on events_table_1400004 et
-> Hash
-> Subquery Scan on recent_10_users
-> Limit
-> Sort
Sort Key: (max(users_table."time")) DESC
-> HashAggregate
Group Key: users_table.user_id
-> Seq Scan on users_table_1400000 users_table
(22 rows)
RESET citus.subquery_pushdown;
DROP VIEW recent_10_users;
DROP VIEW router_view;
DROP VIEW cte_view_2;
DROP VIEW cte_view_1;
DROP VIEW distinct_value_1;
DROP VIEW distinct_user_with_value_1_3;
DROP VIEW recent_selected_users;
DROP VIEW selected_users;
DROP VIEW recent_events;
DROP VIEW recent_users;