mirror of https://github.com/citusdata/citus.git
938 lines
40 KiB
Plaintext
938 lines
40 KiB
Plaintext
--
|
|
-- MULTI_VIEW
|
|
--
|
|
-- This file contains test cases for view support. It verifies various
|
|
-- Citus features: simple selects, aggregates, joins, outer joins
|
|
-- router queries, single row inserts, multi row inserts via insert
|
|
-- into select, multi row insert via copy commands.
|
|
-- print whether we're using version > 10 to make version-specific tests clear
|
|
SHOW server_version \gset
|
|
SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten;
|
|
version_above_ten
|
|
-------------------
|
|
f
|
|
(1 row)
|
|
|
|
SELECT count(*) FROM lineitem_hash_part;
|
|
count
|
|
-------
|
|
12000
|
|
(1 row)
|
|
|
|
SELECT count(*) FROM orders_hash_part;
|
|
count
|
|
-------
|
|
2985
|
|
(1 row)
|
|
|
|
-- create a view for priority orders
|
|
CREATE VIEW priority_orders AS SELECT * FROM orders_hash_part WHERE o_orderpriority < '3-MEDIUM';
|
|
-- aggregate pushdown
|
|
SELECT o_orderpriority, count(*) FROM priority_orders GROUP BY 1 ORDER BY 2, 1;
|
|
o_orderpriority | count
|
|
-----------------+-------
|
|
2-HIGH | 593
|
|
1-URGENT | 604
|
|
(2 rows)
|
|
|
|
SELECT o_orderpriority, count(*) FROM orders_hash_part WHERE o_orderpriority < '3-MEDIUM' GROUP BY 1 ORDER BY 2,1;
|
|
o_orderpriority | count
|
|
-----------------+-------
|
|
2-HIGH | 593
|
|
1-URGENT | 604
|
|
(2 rows)
|
|
|
|
-- filters
|
|
SELECT o_orderpriority, count(*) as all, count(*) FILTER (WHERE o_orderstatus ='F') as fullfilled FROM priority_orders GROUP BY 1 ORDER BY 2, 1;
|
|
o_orderpriority | all | fullfilled
|
|
-----------------+-----+------------
|
|
2-HIGH | 593 | 271
|
|
1-URGENT | 604 | 280
|
|
(2 rows)
|
|
|
|
-- having
|
|
SELECT o_orderdate, count(*) from priority_orders group by 1 having (count(*) > 3) order by 2 desc, 1 desc;
|
|
o_orderdate | count
|
|
-------------+-------
|
|
08-20-1996 | 5
|
|
10-10-1994 | 4
|
|
05-05-1994 | 4
|
|
04-07-1994 | 4
|
|
03-17-1993 | 4
|
|
(5 rows)
|
|
|
|
-- having with filters
|
|
SELECT o_orderdate, count(*) as all, count(*) FILTER(WHERE o_orderstatus = 'F') from priority_orders group by 1 having (count(*) > 3) order by 2 desc, 1 desc;
|
|
o_orderdate | all | count
|
|
-------------+-----+-------
|
|
08-20-1996 | 5 | 0
|
|
10-10-1994 | 4 | 4
|
|
05-05-1994 | 4 | 4
|
|
04-07-1994 | 4 | 4
|
|
03-17-1993 | 4 | 4
|
|
(5 rows)
|
|
|
|
-- limit
|
|
SELECT o_orderkey, o_totalprice from orders_hash_part order by 2 desc, 1 asc limit 5 ;
|
|
o_orderkey | o_totalprice
|
|
------------+--------------
|
|
4421 | 401055.62
|
|
10209 | 400191.77
|
|
11142 | 395039.05
|
|
14179 | 384265.43
|
|
11296 | 378166.33
|
|
(5 rows)
|
|
|
|
SELECT o_orderkey, o_totalprice from priority_orders order by 2 desc, 1 asc limit 1 ;
|
|
o_orderkey | o_totalprice
|
|
------------+--------------
|
|
14179 | 384265.43
|
|
(1 row)
|
|
|
|
CREATE VIEW priority_lineitem AS SELECT li.* FROM lineitem_hash_part li JOIN priority_orders ON (l_orderkey = o_orderkey);
|
|
SELECT l_orderkey, count(*) FROM priority_lineitem GROUP BY 1 ORDER BY 2 DESC, 1 LIMIT 5;
|
|
l_orderkey | count
|
|
------------+-------
|
|
7 | 7
|
|
225 | 7
|
|
226 | 7
|
|
322 | 7
|
|
326 | 7
|
|
(5 rows)
|
|
|
|
CREATE VIEW air_shipped_lineitems AS SELECT * FROM lineitem_hash_part WHERE l_shipmode = 'AIR';
|
|
-- join between view and table
|
|
SELECT count(*) FROM orders_hash_part join air_shipped_lineitems ON (o_orderkey = l_orderkey);
|
|
count
|
|
-------
|
|
1706
|
|
(1 row)
|
|
|
|
-- join between views
|
|
SELECT count(*) FROM priority_orders join air_shipped_lineitems ON (o_orderkey = l_orderkey);
|
|
count
|
|
-------
|
|
700
|
|
(1 row)
|
|
|
|
-- count distinct on partition column is supported
|
|
SELECT count(distinct o_orderkey) FROM priority_orders join air_shipped_lineitems ON (o_orderkey = l_orderkey);
|
|
count
|
|
-------
|
|
551
|
|
(1 row)
|
|
|
|
-- count distinct on non-partition column is supported
|
|
SELECT count(distinct o_orderpriority) FROM priority_orders join air_shipped_lineitems ON (o_orderkey = l_orderkey);
|
|
count
|
|
-------
|
|
2
|
|
(1 row)
|
|
|
|
-- count distinct on partition column is supported on router queries
|
|
SELECT count(distinct o_orderkey) FROM priority_orders join air_shipped_lineitems
|
|
ON (o_orderkey = l_orderkey)
|
|
WHERE (o_orderkey = 231);
|
|
count
|
|
-------
|
|
1
|
|
(1 row)
|
|
|
|
-- select distinct on router joins of views also works
|
|
SELECT distinct(o_orderkey) FROM priority_orders join air_shipped_lineitems
|
|
ON (o_orderkey = l_orderkey)
|
|
WHERE (o_orderkey = 231);
|
|
o_orderkey
|
|
------------
|
|
231
|
|
(1 row)
|
|
|
|
-- left join support depends on flattening of the query
|
|
-- following query fails since the inner part is kept as subquery
|
|
SELECT * FROM priority_orders left join air_shipped_lineitems ON (o_orderkey = l_orderkey);
|
|
ERROR: cannot perform distributed planning on this query
|
|
DETAIL: Subqueries in outer joins are not supported
|
|
-- however, this works
|
|
SELECT count(*) FROM priority_orders left join lineitem_hash_part ON (o_orderkey = l_orderkey) WHERE l_shipmode ='AIR';
|
|
count
|
|
-------
|
|
700
|
|
(1 row)
|
|
|
|
-- view at the inner side of is not supported
|
|
SELECT count(*) FROM priority_orders right join lineitem_hash_part ON (o_orderkey = l_orderkey) WHERE l_shipmode ='AIR';
|
|
ERROR: cannot perform distributed planning on this query
|
|
DETAIL: Subqueries in outer joins are not supported
|
|
-- but view at the outer side is. This is essentially the same as a left join with arguments reversed.
|
|
SELECT count(*) FROM lineitem_hash_part right join priority_orders ON (o_orderkey = l_orderkey) WHERE l_shipmode ='AIR';
|
|
count
|
|
-------
|
|
700
|
|
(1 row)
|
|
|
|
-- left join on router query is supported
|
|
SELECT o_orderkey, l_linenumber FROM priority_orders left join air_shipped_lineitems ON (o_orderkey = l_orderkey)
|
|
WHERE o_orderkey = 2;
|
|
o_orderkey | l_linenumber
|
|
------------+--------------
|
|
2 |
|
|
(1 row)
|
|
|
|
-- repartition query on view join
|
|
-- it passes planning, fails at execution stage
|
|
SET client_min_messages TO DEBUG1;
|
|
SELECT * FROM priority_orders JOIN air_shipped_lineitems ON (o_custkey = l_suppkey) ORDER BY o_orderkey DESC, o_custkey DESC, o_orderpriority DESC LIMIT 5;
|
|
DEBUG: generating subplan 22_1 for subquery SELECT lineitem_hash_part.l_orderkey, lineitem_hash_part.l_partkey, lineitem_hash_part.l_suppkey, lineitem_hash_part.l_linenumber, lineitem_hash_part.l_quantity, lineitem_hash_part.l_extendedprice, lineitem_hash_part.l_discount, lineitem_hash_part.l_tax, lineitem_hash_part.l_returnflag, lineitem_hash_part.l_linestatus, lineitem_hash_part.l_shipdate, lineitem_hash_part.l_commitdate, lineitem_hash_part.l_receiptdate, lineitem_hash_part.l_shipinstruct, lineitem_hash_part.l_shipmode, lineitem_hash_part.l_comment FROM public.lineitem_hash_part WHERE (lineitem_hash_part.l_shipmode = 'AIR'::bpchar)
|
|
DEBUG: Plan 22 query after replacing subqueries and CTEs: SELECT priority_orders.o_orderkey, priority_orders.o_custkey, priority_orders.o_orderstatus, priority_orders.o_totalprice, priority_orders.o_orderdate, priority_orders.o_orderpriority, priority_orders.o_clerk, priority_orders.o_shippriority, priority_orders.o_comment, air_shipped_lineitems.l_orderkey, air_shipped_lineitems.l_partkey, air_shipped_lineitems.l_suppkey, air_shipped_lineitems.l_linenumber, air_shipped_lineitems.l_quantity, air_shipped_lineitems.l_extendedprice, air_shipped_lineitems.l_discount, air_shipped_lineitems.l_tax, air_shipped_lineitems.l_returnflag, air_shipped_lineitems.l_linestatus, air_shipped_lineitems.l_shipdate, air_shipped_lineitems.l_commitdate, air_shipped_lineitems.l_receiptdate, air_shipped_lineitems.l_shipinstruct, air_shipped_lineitems.l_shipmode, air_shipped_lineitems.l_comment FROM ((SELECT orders_hash_part.o_orderkey, orders_hash_part.o_custkey, orders_hash_part.o_orderstatus, orders_hash_part.o_totalprice, orders_hash_part.o_orderdate, orders_hash_part.o_orderpriority, orders_hash_part.o_clerk, orders_hash_part.o_shippriority, orders_hash_part.o_comment FROM public.orders_hash_part WHERE (orders_hash_part.o_orderpriority < '3-MEDIUM'::bpchar)) priority_orders JOIN (SELECT intermediate_result.l_orderkey, intermediate_result.l_partkey, intermediate_result.l_suppkey, intermediate_result.l_linenumber, intermediate_result.l_quantity, intermediate_result.l_extendedprice, intermediate_result.l_discount, intermediate_result.l_tax, intermediate_result.l_returnflag, intermediate_result.l_linestatus, intermediate_result.l_shipdate, intermediate_result.l_commitdate, intermediate_result.l_receiptdate, intermediate_result.l_shipinstruct, intermediate_result.l_shipmode, intermediate_result.l_comment FROM read_intermediate_result('22_1'::text, 'binary'::citus_copy_format) intermediate_result(l_orderkey bigint, l_partkey integer, l_suppkey integer, l_linenumber integer, l_quantity numeric(15,2), l_extendedprice numeric(15,2), l_discount numeric(15,2), l_tax numeric(15,2), l_returnflag character(1), l_linestatus character(1), l_shipdate date, l_commitdate date, l_receiptdate date, l_shipinstruct character(25), l_shipmode character(10), l_comment character varying(44))) air_shipped_lineitems ON ((priority_orders.o_custkey = air_shipped_lineitems.l_suppkey))) ORDER BY priority_orders.o_orderkey DESC, priority_orders.o_custkey DESC, priority_orders.o_orderpriority DESC LIMIT 5
|
|
DEBUG: push down of limit count: 5
|
|
o_orderkey | o_custkey | o_orderstatus | o_totalprice | o_orderdate | o_orderpriority | o_clerk | o_shippriority | o_comment | l_orderkey | l_partkey | l_suppkey | l_linenumber | l_quantity | l_extendedprice | l_discount | l_tax | l_returnflag | l_linestatus | l_shipdate | l_commitdate | l_receiptdate | l_shipinstruct | l_shipmode | l_comment
|
|
------------+-----------+---------------+--------------+-------------+-----------------+-----------------+----------------+-------------------------------------------------------+------------+-----------+-----------+--------------+------------+-----------------+------------+-------+--------------+--------------+------------+--------------+---------------+---------------------------+------------+-------------------------------------------
|
|
14821 | 1435 | O | 322002.95 | 06-12-1998 | 2-HIGH | Clerk#000000630 | 0 | n packages are furiously ironic ideas. d | 1607 | 118923 | 1435 | 2 | 37.00 | 71851.04 | 0.05 | 0.02 | N | O | 02-27-1996 | 02-18-1996 | 03-16-1996 | NONE | AIR | alongside
|
|
14790 | 613 | O | 270163.54 | 08-21-1996 | 2-HIGH | Clerk#000000347 | 0 | p. regular deposits wake. final n | 2629 | 123076 | 613 | 2 | 31.00 | 34071.17 | 0.08 | 0.03 | N | O | 05-24-1998 | 05-26-1998 | 06-10-1998 | COLLECT COD | AIR | ate blithely bold, regular deposits. bold
|
|
14758 | 1225 | F | 37812.49 | 10-27-1993 | 2-HIGH | Clerk#000000687 | 0 | ages nag about the furio | 9156 | 176190 | 1225 | 2 | 22.00 | 27856.18 | 0.03 | 0.00 | R | F | 02-08-1994 | 04-01-1994 | 02-24-1994 | DELIVER IN PERSON | AIR | equests dete
|
|
14725 | 569 | O | 261801.45 | 06-17-1995 | 2-HIGH | Clerk#000000177 | 0 | ng asymptotes. final, ironic accounts cajole after | 14688 | 173017 | 569 | 3 | 10.00 | 10900.10 | 0.02 | 0.08 | N | O | 03-14-1997 | 04-22-1997 | 04-05-1997 | COLLECT COD | AIR | riously even packages sleep a
|
|
14657 | 370 | F | 116160.53 | 02-28-1994 | 1-URGENT | Clerk#000000756 | 0 | ly across the ironic, ironic instructions. bold ideas | 5153 | 67863 | 370 | 3 | 30.00 | 54925.80 | 0.09 | 0.01 | N | O | 11-10-1995 | 11-14-1995 | 11-16-1995 | DELIVER IN PERSON | AIR | beans sleep bl
|
|
(5 rows)
|
|
|
|
RESET client_min_messages;
|
|
SELECT count(*) FROM priority_orders JOIN air_shipped_lineitems ON (o_custkey = l_suppkey);
|
|
count
|
|
-------
|
|
192
|
|
(1 row)
|
|
|
|
-- materialized views work
|
|
-- insert into... select works with views
|
|
CREATE TABLE temp_lineitem(LIKE lineitem_hash_part);
|
|
SELECT create_distributed_table('temp_lineitem', 'l_orderkey', 'hash', 'lineitem_hash_part');
|
|
create_distributed_table
|
|
--------------------------
|
|
|
|
(1 row)
|
|
|
|
INSERT INTO temp_lineitem SELECT * FROM air_shipped_lineitems;
|
|
SELECT count(*) FROM temp_lineitem;
|
|
count
|
|
-------
|
|
1706
|
|
(1 row)
|
|
|
|
-- following is a where false query, should not be inserting anything
|
|
INSERT INTO temp_lineitem SELECT * FROM air_shipped_lineitems WHERE l_shipmode = 'MAIL';
|
|
SELECT count(*) FROM temp_lineitem;
|
|
count
|
|
-------
|
|
1706
|
|
(1 row)
|
|
|
|
-- can create and query materialized views
|
|
CREATE MATERIALIZED VIEW mode_counts
|
|
AS SELECT l_shipmode, count(*) FROM temp_lineitem GROUP BY l_shipmode;
|
|
SELECT * FROM mode_counts WHERE l_shipmode = 'AIR' ORDER BY 2 DESC, 1 LIMIT 10;
|
|
l_shipmode | count
|
|
------------+-------
|
|
AIR | 1706
|
|
(1 row)
|
|
|
|
-- materialized views are local, cannot join with distributed tables
|
|
SELECT count(*) FROM mode_counts JOIN temp_lineitem USING (l_shipmode);
|
|
ERROR: relation mode_counts is not distributed
|
|
-- new data is not immediately reflected in the view
|
|
INSERT INTO temp_lineitem SELECT * FROM air_shipped_lineitems;
|
|
SELECT * FROM mode_counts WHERE l_shipmode = 'AIR' ORDER BY 2 DESC, 1 LIMIT 10;
|
|
l_shipmode | count
|
|
------------+-------
|
|
AIR | 1706
|
|
(1 row)
|
|
|
|
-- refresh updates the materialised view with new data
|
|
REFRESH MATERIALIZED VIEW mode_counts;
|
|
SELECT * FROM mode_counts WHERE l_shipmode = 'AIR' ORDER BY 2 DESC, 1 LIMIT 10;
|
|
l_shipmode | count
|
|
------------+-------
|
|
AIR | 3412
|
|
(1 row)
|
|
|
|
DROP MATERIALIZED VIEW mode_counts;
|
|
SET citus.task_executor_type to "task-tracker";
|
|
-- single view repartition subqueries are not supported
|
|
SELECT l_suppkey, count(*) FROM
|
|
(SELECT l_suppkey, l_shipdate, count(*)
|
|
FROM air_shipped_lineitems GROUP BY l_suppkey, l_shipdate) supps
|
|
GROUP BY l_suppkey ORDER BY 2 DESC, 1 LIMIT 5;
|
|
ERROR: cannot perform distributed planning on this query
|
|
DETAIL: Subqueries without group by clause are not supported yet
|
|
-- logically same query without a view works fine
|
|
SELECT l_suppkey, count(*) FROM
|
|
(SELECT l_suppkey, l_shipdate, count(*)
|
|
FROM lineitem_hash_part WHERE l_shipmode = 'AIR' GROUP BY l_suppkey, l_shipdate) supps
|
|
GROUP BY l_suppkey ORDER BY 2 DESC, 1 LIMIT 5;
|
|
l_suppkey | count
|
|
-----------+-------
|
|
7680 | 4
|
|
160 | 3
|
|
1042 | 3
|
|
1318 | 3
|
|
5873 | 3
|
|
(5 rows)
|
|
|
|
-- when a view is replaced by actual query it still fails
|
|
SELECT l_suppkey, count(*) FROM
|
|
(SELECT l_suppkey, l_shipdate, count(*)
|
|
FROM (SELECT * FROM lineitem_hash_part WHERE l_shipmode = 'AIR') asi
|
|
GROUP BY l_suppkey, l_shipdate) supps
|
|
GROUP BY l_suppkey ORDER BY 2 DESC, 1 LIMIT 5;
|
|
ERROR: cannot perform distributed planning on this query
|
|
DETAIL: Subqueries without group by clause are not supported yet
|
|
-- repartition query on view with single table subquery
|
|
CREATE VIEW supp_count_view AS SELECT * FROM (SELECT l_suppkey, count(*) FROM lineitem_hash_part GROUP BY 1) s1;
|
|
SELECT * FROM supp_count_view ORDER BY 2 DESC, 1 LIMIT 10;
|
|
l_suppkey | count
|
|
-----------+-------
|
|
6104 | 8
|
|
1868 | 6
|
|
5532 | 6
|
|
5849 | 6
|
|
6169 | 6
|
|
6669 | 6
|
|
6692 | 6
|
|
7703 | 6
|
|
7869 | 6
|
|
8426 | 6
|
|
(10 rows)
|
|
|
|
SET citus.task_executor_type to DEFAULT;
|
|
-- create a view with aggregate
|
|
CREATE VIEW lineitems_by_shipping_method AS
|
|
SELECT l_shipmode, count(*) as cnt FROM lineitem_hash_part GROUP BY 1;
|
|
-- following will be supported via recursive planning
|
|
SELECT * FROM lineitems_by_shipping_method ORDER BY 1,2 LIMIT 5;
|
|
l_shipmode | cnt
|
|
------------+------
|
|
AIR | 1706
|
|
FOB | 1709
|
|
MAIL | 1739
|
|
RAIL | 1706
|
|
REG AIR | 1679
|
|
(5 rows)
|
|
|
|
-- create a view with group by on partition column
|
|
CREATE VIEW lineitems_by_orderkey AS
|
|
SELECT
|
|
l_orderkey, count(*)
|
|
FROM
|
|
lineitem_hash_part
|
|
GROUP BY 1;
|
|
-- this should work since we're able to push down this query
|
|
SELECT * FROM lineitems_by_orderkey ORDER BY 2 DESC, 1 ASC LIMIT 10;
|
|
l_orderkey | count
|
|
------------+-------
|
|
7 | 7
|
|
68 | 7
|
|
129 | 7
|
|
164 | 7
|
|
194 | 7
|
|
225 | 7
|
|
226 | 7
|
|
322 | 7
|
|
326 | 7
|
|
354 | 7
|
|
(10 rows)
|
|
|
|
-- it would also work since it is made router plannable
|
|
SELECT * FROM lineitems_by_orderkey WHERE l_orderkey = 100;
|
|
l_orderkey | count
|
|
------------+-------
|
|
100 | 5
|
|
(1 row)
|
|
|
|
DROP TABLE temp_lineitem CASCADE;
|
|
DROP VIEW supp_count_view;
|
|
DROP VIEW lineitems_by_orderkey;
|
|
DROP VIEW lineitems_by_shipping_method;
|
|
DROP VIEW air_shipped_lineitems;
|
|
DROP VIEW priority_lineitem;
|
|
DROP VIEW priority_orders;
|
|
-- new tests for real time use case including views and subqueries
|
|
-- create view to display recent user who has an activity after a timestamp
|
|
CREATE VIEW recent_users AS
|
|
SELECT user_id, max(time) as lastseen FROM users_table
|
|
GROUP BY user_id
|
|
HAVING max(time) > '2017-11-23 16:20:33.264457'::timestamp order by 2 DESC;
|
|
SELECT * FROM recent_users;
|
|
user_id | lastseen
|
|
---------+---------------------------------
|
|
1 | Thu Nov 23 17:30:34.635085 2017
|
|
5 | Thu Nov 23 16:48:32.08896 2017
|
|
3 | Thu Nov 23 17:18:51.048758 2017
|
|
(3 rows)
|
|
|
|
-- create a view for recent_events
|
|
CREATE VIEW recent_events AS
|
|
SELECT user_id, time FROM events_table
|
|
WHERE time > '2017-11-23 16:20:33.264457'::timestamp;
|
|
SELECT count(*) FROM recent_events;
|
|
count
|
|
-------
|
|
6
|
|
(1 row)
|
|
|
|
-- count number of events of recent_users
|
|
SELECT count(*) FROM recent_users ru JOIN events_table et ON (ru.user_id = et.user_id);
|
|
count
|
|
-------
|
|
50
|
|
(1 row)
|
|
|
|
-- count number of events of per recent users order by count
|
|
SELECT ru.user_id, count(*)
|
|
FROM recent_users ru
|
|
JOIN events_table et
|
|
ON (ru.user_id = et.user_id)
|
|
GROUP BY ru.user_id
|
|
ORDER BY 2 DESC, 1;
|
|
user_id | count
|
|
---------+-------
|
|
3 | 21
|
|
1 | 15
|
|
5 | 14
|
|
(3 rows)
|
|
|
|
-- the same query with a left join however, it would still generate the same result
|
|
SELECT ru.user_id, count(*)
|
|
FROM recent_users ru
|
|
LEFT JOIN events_table et
|
|
ON (ru.user_id = et.user_id)
|
|
GROUP BY ru.user_id
|
|
ORDER BY 2 DESC, 1;
|
|
user_id | count
|
|
---------+-------
|
|
3 | 21
|
|
1 | 15
|
|
5 | 14
|
|
(3 rows)
|
|
|
|
-- query wrapped inside a subquery, it needs another top level order by
|
|
SELECT * FROM
|
|
(SELECT ru.user_id, count(*)
|
|
FROM recent_users ru
|
|
JOIN events_table et
|
|
ON (ru.user_id = et.user_id)
|
|
GROUP BY ru.user_id
|
|
ORDER BY 2 DESC, 1) s1
|
|
ORDER BY 2 DESC, 1;
|
|
user_id | count
|
|
---------+-------
|
|
3 | 21
|
|
1 | 15
|
|
5 | 14
|
|
(3 rows)
|
|
|
|
-- non-partition key joins are not supported inside subquery
|
|
-- since the join with a table
|
|
SELECT * FROM
|
|
(SELECT ru.user_id, count(*)
|
|
FROM recent_users ru
|
|
JOIN events_table et
|
|
ON (ru.user_id = et.event_type)
|
|
GROUP BY ru.user_id
|
|
ORDER BY 2 DESC, 1) s1
|
|
ORDER BY 2 DESC, 1;
|
|
ERROR: bogus varno: 3
|
|
-- join between views
|
|
-- recent users who has an event in recent events
|
|
SELECT ru.user_id FROM recent_users ru JOIN recent_events re USING(user_id) GROUP BY ru.user_id ORDER BY ru.user_id;
|
|
user_id
|
|
---------
|
|
1
|
|
3
|
|
(2 rows)
|
|
|
|
-- outer join inside a subquery
|
|
-- recent_events who are not done by recent users
|
|
SELECT count(*) FROM (
|
|
SELECT re.*, ru.user_id AS recent_user
|
|
FROM recent_events re LEFT JOIN recent_users ru USING(user_id)) reu
|
|
WHERE recent_user IS NULL;
|
|
count
|
|
-------
|
|
2
|
|
(1 row)
|
|
|
|
-- same query with anti-join
|
|
SELECT count(*)
|
|
FROM recent_events re LEFT JOIN recent_users ru ON(ru.user_id = re.user_id)
|
|
WHERE ru.user_id IS NULL;
|
|
count
|
|
-------
|
|
2
|
|
(1 row)
|
|
|
|
-- join between view and table
|
|
-- users who has recent activity and they have an entry with value_1 is less than 3
|
|
SELECT ut.* FROM recent_users ru JOIN users_table ut USING (user_id) WHERE ut.value_1 < 3 ORDER BY 1,2;
|
|
user_id | time | value_1 | value_2 | value_3 | value_4
|
|
---------+---------------------------------+---------+---------+---------+---------
|
|
1 | Thu Nov 23 09:26:42.145043 2017 | 1 | 3 | 3 |
|
|
3 | Wed Nov 22 18:43:51.450263 2017 | 1 | 1 | 4 |
|
|
3 | Wed Nov 22 20:43:31.008625 2017 | 1 | 3 | 2 |
|
|
3 | Thu Nov 23 00:15:45.610845 2017 | 1 | 1 | 4 |
|
|
3 | Thu Nov 23 03:23:24.702501 2017 | 1 | 2 | 5 |
|
|
3 | Thu Nov 23 06:20:05.854857 2017 | 1 | 4 | 2 |
|
|
3 | Thu Nov 23 09:57:41.540228 2017 | 2 | 2 | 3 |
|
|
3 | Thu Nov 23 11:18:53.114408 2017 | 2 | 2 | 0 |
|
|
3 | Thu Nov 23 12:56:49.29191 2017 | 0 | 5 | 1 |
|
|
3 | Thu Nov 23 17:18:51.048758 2017 | 1 | 5 | 5 |
|
|
5 | Wed Nov 22 20:43:18.667473 2017 | 0 | 3 | 2 |
|
|
5 | Wed Nov 22 21:02:07.575129 2017 | 2 | 0 | 2 |
|
|
5 | Wed Nov 22 22:10:24.315371 2017 | 1 | 2 | 1 |
|
|
5 | Thu Nov 23 00:54:44.192608 2017 | 1 | 3 | 2 |
|
|
5 | Thu Nov 23 07:47:09.542999 2017 | 1 | 4 | 3 |
|
|
5 | Thu Nov 23 09:05:08.53142 2017 | 2 | 2 | 2 |
|
|
5 | Thu Nov 23 09:17:47.706703 2017 | 2 | 5 | 3 |
|
|
5 | Thu Nov 23 10:15:31.764558 2017 | 2 | 2 | 2 |
|
|
5 | Thu Nov 23 14:29:02.557934 2017 | 2 | 1 | 2 |
|
|
5 | Thu Nov 23 15:55:08.493462 2017 | 0 | 3 | 3 |
|
|
5 | Thu Nov 23 16:28:38.455322 2017 | 2 | 5 | 4 |
|
|
(21 rows)
|
|
|
|
-- determine if a recent user has done a given event type or not
|
|
SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event
|
|
FROM recent_users ru
|
|
LEFT JOIN events_table et
|
|
ON(ru.user_id = et.user_id AND et.event_type = 6)
|
|
ORDER BY 2 DESC, 1;
|
|
user_id | done_event
|
|
---------+------------
|
|
1 | YES
|
|
3 | NO
|
|
5 | NO
|
|
(3 rows)
|
|
|
|
-- view vs table join wrapped inside a subquery
|
|
SELECT * FROM
|
|
(SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event
|
|
FROM recent_users ru
|
|
LEFT JOIN events_table et
|
|
ON(ru.user_id = et.user_id AND et.event_type = 6)
|
|
) s1
|
|
ORDER BY 2 DESC, 1;
|
|
user_id | done_event
|
|
---------+------------
|
|
1 | YES
|
|
3 | NO
|
|
5 | NO
|
|
(3 rows)
|
|
|
|
-- event vs table non-partition-key join is not supported
|
|
-- given that we cannot recursively plan tables yet
|
|
SELECT * FROM
|
|
(SELECT ru.user_id, CASE WHEN et.user_id IS NULL THEN 'NO' ELSE 'YES' END as done_event
|
|
FROM recent_users ru
|
|
LEFT JOIN events_table et
|
|
ON(ru.user_id = et.event_type)
|
|
) s1
|
|
ORDER BY 2 DESC, 1;
|
|
ERROR: bogus varno: 3
|
|
-- create a select only view
|
|
CREATE VIEW selected_users AS SELECT * FROM users_table WHERE value_1 >= 1 and value_1 <3;
|
|
CREATE VIEW recent_selected_users AS SELECT su.* FROM selected_users su JOIN recent_users ru USING(user_id);
|
|
SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER BY 1;
|
|
user_id
|
|
---------
|
|
1
|
|
3
|
|
5
|
|
(3 rows)
|
|
|
|
-- this would be supported when we implement where partition_key in (subquery) support
|
|
SELECT et.user_id, et.time FROM events_table et WHERE et.user_id IN (SELECT user_id FROM recent_selected_users) GROUP BY 1,2 ORDER BY 1 DESC,2 DESC LIMIT 5;
|
|
user_id | time
|
|
---------+---------------------------------
|
|
5 | Thu Nov 23 16:11:02.929469 2017
|
|
5 | Thu Nov 23 14:40:40.467511 2017
|
|
5 | Thu Nov 23 14:28:51.833214 2017
|
|
5 | Thu Nov 23 14:23:09.889786 2017
|
|
5 | Thu Nov 23 13:26:45.571108 2017
|
|
(5 rows)
|
|
|
|
-- it is supported when it is a router query
|
|
SELECT count(*) FROM events_table et WHERE et.user_id IN (SELECT user_id FROM recent_selected_users WHERE user_id = 1);
|
|
count
|
|
-------
|
|
15
|
|
(1 row)
|
|
|
|
-- union between views is supported through recursive planning
|
|
(SELECT user_id FROM recent_users)
|
|
UNION
|
|
(SELECT user_id FROM selected_users)
|
|
ORDER BY 1;
|
|
user_id
|
|
---------
|
|
1
|
|
2
|
|
3
|
|
4
|
|
5
|
|
6
|
|
(6 rows)
|
|
|
|
-- wrapping it inside a SELECT * works
|
|
SELECT *
|
|
FROM (
|
|
(SELECT user_id FROM recent_users)
|
|
UNION
|
|
(SELECT user_id FROM selected_users) ) u
|
|
WHERE user_id < 2 AND user_id > 0
|
|
ORDER BY user_id;
|
|
user_id
|
|
---------
|
|
1
|
|
(1 row)
|
|
|
|
-- union all also works for views
|
|
SELECT *
|
|
FROM (
|
|
(SELECT user_id FROM recent_users)
|
|
UNION ALL
|
|
(SELECT user_id FROM selected_users) ) u
|
|
WHERE user_id < 2 AND user_id > 0
|
|
ORDER BY user_id;
|
|
user_id
|
|
---------
|
|
1
|
|
1
|
|
(2 rows)
|
|
|
|
SELECT count(*)
|
|
FROM (
|
|
(SELECT user_id FROM recent_users)
|
|
UNION
|
|
(SELECT user_id FROM selected_users) ) u
|
|
WHERE user_id < 2 AND user_id > 0;
|
|
count
|
|
-------
|
|
1
|
|
(1 row)
|
|
|
|
-- UNION ALL between views is supported through recursive planning
|
|
SELECT count(*)
|
|
FROM (
|
|
(SELECT user_id FROM recent_users)
|
|
UNION ALL
|
|
(SELECT user_id FROM selected_users) ) u
|
|
WHERE user_id < 2 AND user_id > 0;
|
|
count
|
|
-------
|
|
2
|
|
(1 row)
|
|
|
|
-- expand view definitions and re-run last 2 queries
|
|
SELECT count(*)
|
|
FROM (
|
|
(SELECT user_id FROM (SELECT user_id, max(time) as lastseen FROM users_table
|
|
GROUP BY user_id
|
|
HAVING max(time) > '2017-11-22 05:45:49.978738'::timestamp order by 2 DESC) aa
|
|
)
|
|
UNION
|
|
(SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 1 and value_1 < 3) bb) ) u
|
|
WHERE user_id < 2 AND user_id > 0;
|
|
count
|
|
-------
|
|
1
|
|
(1 row)
|
|
|
|
SELECT count(*)
|
|
FROM (
|
|
(SELECT user_id FROM (SELECT user_id, max(time) as lastseen FROM users_table
|
|
GROUP BY user_id
|
|
HAVING max(time) > '2017-11-22 05:45:49.978738'::timestamp order by 2 DESC) aa
|
|
)
|
|
UNION ALL
|
|
(SELECT user_id FROM (SELECT * FROM users_table WHERE value_1 >= 1 and value_1 < 3) bb) ) u
|
|
WHERE user_id < 2 AND user_id > 0;
|
|
count
|
|
-------
|
|
2
|
|
(1 row)
|
|
|
|
-- test distinct
|
|
-- distinct is supported if it is on a partition key
|
|
CREATE VIEW distinct_user_with_value_1_3 AS SELECT DISTINCT user_id FROM users_table WHERE value_1 = 3;
|
|
SELECT * FROM distinct_user_with_value_1_3 ORDER BY user_id;
|
|
user_id
|
|
---------
|
|
1
|
|
2
|
|
3
|
|
4
|
|
5
|
|
6
|
|
(6 rows)
|
|
|
|
-- distinct is not supported if it is on a non-partition key
|
|
-- but will be supported via recursive planning
|
|
CREATE VIEW distinct_value_1 AS SELECT DISTINCT value_1 FROM users_table WHERE value_2 = 3;
|
|
SELECT * FROM distinct_value_1 ORDER BY 1 DESC LIMIT 5;
|
|
value_1
|
|
---------
|
|
5
|
|
4
|
|
3
|
|
2
|
|
1
|
|
(5 rows)
|
|
|
|
-- CTEs are supported even if they are on views
|
|
CREATE VIEW cte_view_1 AS
|
|
WITH c1 AS (SELECT * FROM users_table WHERE value_1 = 3) SELECT * FROM c1 WHERE value_2 < 4;
|
|
SELECT * FROM cte_view_1 ORDER BY 1,2,3,4,5 LIMIT 5;
|
|
user_id | time | value_1 | value_2 | value_3 | value_4
|
|
---------+---------------------------------+---------+---------+---------+---------
|
|
1 | Thu Nov 23 03:32:50.803031 2017 | 3 | 2 | 1 |
|
|
2 | Thu Nov 23 13:52:54.83829 2017 | 3 | 1 | 4 |
|
|
3 | Wed Nov 22 23:24:32.080584 2017 | 3 | 2 | 5 |
|
|
4 | Wed Nov 22 23:59:46.493416 2017 | 3 | 1 | 3 |
|
|
4 | Thu Nov 23 01:55:21.824618 2017 | 3 | 1 | 4 |
|
|
(5 rows)
|
|
|
|
-- this is single shard query and still not supported since it has view + cte
|
|
-- router planner can't detect it
|
|
SELECT * FROM cte_view_1 WHERE user_id = 2 ORDER BY 1,2,3,4,5;
|
|
user_id | time | value_1 | value_2 | value_3 | value_4
|
|
---------+--------------------------------+---------+---------+---------+---------
|
|
2 | Thu Nov 23 13:52:54.83829 2017 | 3 | 1 | 4 |
|
|
(1 row)
|
|
|
|
-- if CTE itself prunes down to a single shard than the view is supported (router plannable)
|
|
CREATE VIEW cte_view_2 AS
|
|
WITH c1 AS (SELECT * FROM users_table WHERE user_id = 2) SELECT * FROM c1 WHERE value_1 = 3;
|
|
SELECT * FROM cte_view_2;
|
|
user_id | time | value_1 | value_2 | value_3 | value_4
|
|
---------+---------------------------------+---------+---------+---------+---------
|
|
2 | Thu Nov 23 00:19:14.138058 2017 | 3 | 4 | 0 |
|
|
2 | Thu Nov 23 13:52:54.83829 2017 | 3 | 1 | 4 |
|
|
2 | Wed Nov 22 18:19:49.944985 2017 | 3 | 5 | 1 |
|
|
2 | Thu Nov 23 11:41:04.042936 2017 | 3 | 4 | 1 |
|
|
(4 rows)
|
|
|
|
CREATE VIEW router_view AS SELECT * FROM users_table WHERE user_id = 2;
|
|
-- router plannable
|
|
SELECT user_id FROM router_view GROUP BY 1;
|
|
user_id
|
|
---------
|
|
2
|
|
(1 row)
|
|
|
|
-- join a router view
|
|
SELECT * FROM (SELECT user_id FROM router_view GROUP BY 1) rv JOIN recent_events USING (user_id) ORDER BY 2 LIMIT 3;
|
|
user_id | time
|
|
---------+---------------------------------
|
|
2 | Thu Nov 23 17:26:14.563216 2017
|
|
(1 row)
|
|
|
|
SELECT * FROM (SELECT user_id FROM router_view GROUP BY 1) rv JOIN (SELECT * FROM recent_events) re USING (user_id) ORDER BY 2 LIMIT 3;
|
|
user_id | time
|
|
---------+---------------------------------
|
|
2 | Thu Nov 23 17:26:14.563216 2017
|
|
(1 row)
|
|
|
|
-- views with limits
|
|
CREATE VIEW recent_10_users AS
|
|
SELECT user_id, max(time) as lastseen FROM users_table
|
|
GROUP BY user_id
|
|
ORDER BY lastseen DESC
|
|
LIMIT 10;
|
|
-- this is not supported since it has limit in it and subquery_pushdown is not set
|
|
SELECT * FROM recent_10_users;
|
|
user_id | lastseen
|
|
---------+---------------------------------
|
|
1 | Thu Nov 23 17:30:34.635085 2017
|
|
3 | Thu Nov 23 17:18:51.048758 2017
|
|
5 | Thu Nov 23 16:48:32.08896 2017
|
|
4 | Thu Nov 23 15:32:02.360969 2017
|
|
6 | Thu Nov 23 14:43:18.024104 2017
|
|
2 | Thu Nov 23 13:52:54.83829 2017
|
|
(6 rows)
|
|
|
|
SET citus.subquery_pushdown to ON;
|
|
-- still not supported since outer query does not have limit
|
|
-- it shows a different (subquery with single relation) error message
|
|
SELECT * FROM recent_10_users;
|
|
ERROR: cannot perform distributed planning on this query
|
|
DETAIL: Subqueries with limit are not supported yet
|
|
-- now it displays more correct error message
|
|
SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id);
|
|
ERROR: cannot push down this subquery
|
|
DETAIL: Limit in subquery without limit in the outermost query is unsupported
|
|
-- now both are supported when there is a limit on the outer most query
|
|
SELECT * FROM recent_10_users ORDER BY lastseen DESC LIMIT 10;
|
|
user_id | lastseen
|
|
---------+---------------------------------
|
|
1 | Thu Nov 23 17:30:34.635085 2017
|
|
3 | Thu Nov 23 17:18:51.048758 2017
|
|
5 | Thu Nov 23 16:48:32.08896 2017
|
|
4 | Thu Nov 23 15:32:02.360969 2017
|
|
6 | Thu Nov 23 14:43:18.024104 2017
|
|
2 | Thu Nov 23 13:52:54.83829 2017
|
|
(6 rows)
|
|
|
|
SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;
|
|
user_id | time | event_type | value_2 | value_3 | value_4
|
|
---------+---------------------------------+------------+---------+---------+---------
|
|
1 | Thu Nov 23 21:54:46.924477 2017 | 6 | 4 | 5 |
|
|
4 | Thu Nov 23 18:10:21.338399 2017 | 1 | 2 | 4 |
|
|
3 | Thu Nov 23 18:08:26.550729 2017 | 2 | 4 | 3 |
|
|
2 | Thu Nov 23 17:26:14.563216 2017 | 1 | 5 | 3 |
|
|
3 | Thu Nov 23 16:44:41.903713 2017 | 4 | 2 | 2 |
|
|
3 | Thu Nov 23 16:31:56.219594 2017 | 5 | 1 | 2 |
|
|
4 | Thu Nov 23 16:20:33.264457 2017 | 0 | 0 | 3 |
|
|
5 | Thu Nov 23 16:11:02.929469 2017 | 4 | 2 | 0 |
|
|
2 | Thu Nov 23 15:58:49.273421 2017 | 5 | 1 | 2 |
|
|
5 | Thu Nov 23 14:40:40.467511 2017 | 1 | 4 | 1 |
|
|
(10 rows)
|
|
|
|
RESET citus.subquery_pushdown;
|
|
VACUUM ANALYZE users_table;
|
|
-- explain tests
|
|
EXPLAIN (COSTS FALSE) SELECT user_id FROM recent_selected_users GROUP BY 1 ORDER BY 1;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------------------------------------------------------------------------------------------
|
|
Sort
|
|
Sort Key: remote_scan.user_id
|
|
-> HashAggregate
|
|
Group Key: remote_scan.user_id
|
|
-> Custom Scan (Citus Real-Time)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=57637 dbname=regression
|
|
-> HashAggregate
|
|
Group Key: users_table.user_id
|
|
-> Hash Join
|
|
Hash Cond: (users_table.user_id = ru.user_id)
|
|
-> Seq Scan on users_table_1400000 users_table
|
|
Filter: ((value_1 >= 1) AND (value_1 < 3))
|
|
-> Hash
|
|
-> Subquery Scan on ru
|
|
-> Sort
|
|
Sort Key: (max(users_table_1."time")) DESC
|
|
-> HashAggregate
|
|
Group Key: users_table_1.user_id
|
|
Filter: (max(users_table_1."time") > '2017-11-23 16:20:33.264457'::timestamp without time zone)
|
|
-> Seq Scan on users_table_1400000 users_table_1
|
|
(23 rows)
|
|
|
|
EXPLAIN (COSTS FALSE) SELECT *
|
|
FROM (
|
|
(SELECT user_id FROM recent_users)
|
|
UNION
|
|
(SELECT user_id FROM selected_users) ) u
|
|
WHERE user_id < 4 AND user_id > 1
|
|
ORDER BY user_id;
|
|
QUERY PLAN
|
|
-------------------------------------------------------------------------------------------------------------------------------------------------
|
|
Sort
|
|
Sort Key: remote_scan.user_id
|
|
-> Custom Scan (Citus Real-Time)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=57637 dbname=regression
|
|
-> Unique
|
|
-> Sort
|
|
Sort Key: recent_users.user_id
|
|
-> Append
|
|
-> Subquery Scan on recent_users
|
|
-> Sort
|
|
Sort Key: (max(users_table."time")) DESC
|
|
-> GroupAggregate
|
|
Group Key: users_table.user_id
|
|
Filter: (max(users_table."time") > '2017-11-23 16:20:33.264457'::timestamp without time zone)
|
|
-> Sort
|
|
Sort Key: users_table.user_id
|
|
-> Seq Scan on users_table_1400000 users_table
|
|
Filter: ((user_id < 4) AND (user_id > 1))
|
|
-> Seq Scan on users_table_1400000 users_table_1
|
|
Filter: ((value_1 >= 1) AND (value_1 < 3) AND (user_id < 4) AND (user_id > 1))
|
|
(23 rows)
|
|
|
|
EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------------------------------------------------------
|
|
Limit
|
|
-> Sort
|
|
Sort Key: remote_scan."time" DESC
|
|
-> Custom Scan (Citus Real-Time)
|
|
-> Distributed Subplan 95_1
|
|
-> Limit
|
|
-> Sort
|
|
Sort Key: max((max(remote_scan.lastseen))) DESC
|
|
-> HashAggregate
|
|
Group Key: remote_scan.user_id
|
|
-> Custom Scan (Citus Real-Time)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=57637 dbname=regression
|
|
-> Limit
|
|
-> Sort
|
|
Sort Key: (max("time")) DESC
|
|
-> HashAggregate
|
|
Group Key: user_id
|
|
-> Seq Scan on users_table_1400000 users_table
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=57637 dbname=regression
|
|
-> Limit
|
|
-> Sort
|
|
Sort Key: et."time" DESC
|
|
-> Hash Join
|
|
Hash Cond: (intermediate_result.user_id = et.user_id)
|
|
-> Function Scan on read_intermediate_result intermediate_result
|
|
-> Hash
|
|
-> Seq Scan on events_table_1400004 et
|
|
(33 rows)
|
|
|
|
SET citus.subquery_pushdown to ON;
|
|
EXPLAIN (COSTS FALSE) SELECT et.* FROM recent_10_users JOIN events_table et USING(user_id) ORDER BY et.time DESC LIMIT 10;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------------------------------------------------------
|
|
Limit
|
|
-> Sort
|
|
Sort Key: remote_scan."time" DESC
|
|
-> Custom Scan (Citus Real-Time)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=57637 dbname=regression
|
|
-> Limit
|
|
-> Sort
|
|
Sort Key: et."time" DESC
|
|
-> Hash Join
|
|
Hash Cond: (et.user_id = recent_10_users.user_id)
|
|
-> Seq Scan on events_table_1400004 et
|
|
-> Hash
|
|
-> Subquery Scan on recent_10_users
|
|
-> Limit
|
|
-> Sort
|
|
Sort Key: (max(users_table."time")) DESC
|
|
-> HashAggregate
|
|
Group Key: users_table.user_id
|
|
-> Seq Scan on users_table_1400000 users_table
|
|
(22 rows)
|
|
|
|
RESET citus.subquery_pushdown;
|
|
DROP VIEW recent_10_users;
|
|
DROP VIEW router_view;
|
|
DROP VIEW cte_view_2;
|
|
DROP VIEW cte_view_1;
|
|
DROP VIEW distinct_value_1;
|
|
DROP VIEW distinct_user_with_value_1_3;
|
|
DROP VIEW recent_selected_users;
|
|
DROP VIEW selected_users;
|
|
DROP VIEW recent_events;
|
|
DROP VIEW recent_users;
|