diff --git a/src/test/regress/expected/with_basics.out b/src/test/regress/expected/with_basics.out new file mode 100644 index 000000000..5172ad302 --- /dev/null +++ b/src/test/regress/expected/with_basics.out @@ -0,0 +1,604 @@ +-- Test the basic CTE functionality and expected error messages +CREATE TYPE xy AS (x int, y int); +SELECT run_command_on_workers('CREATE TYPE xy AS (x int, y int)'); + run_command_on_workers +----------------------------------- + (localhost,57637,t,"CREATE TYPE") + (localhost,57638,t,"CREATE TYPE") +(2 rows) + +-- CTEs in FROM should work +WITH cte AS ( + SELECT user_id, value_2 from users_table WHERE user_id IN (1, 2) ORDER BY 1,2 LIMIT 5 +) +SELECT * FROM cte; + user_id | value_2 +---------+--------- + 1 | 0 + 1 | 2 + 1 | 3 + 1 | 3 + 1 | 4 +(5 rows) + +-- CTEs in WHERE should work +WITH cte AS ( + SELECT user_id from users_table ORDER BY user_id DESC LIMIT 10 +) +SELECT + value_2 +FROM + users_table +WHERE + user_id IN (SELECT user_id FROM users_table) +ORDER BY + value_2 +LIMIT + 5; + value_2 +--------- + 0 + 0 + 0 + 0 + 0 +(5 rows) + +-- nested CTEs should work +WITH cte_1 AS ( + WITH cte_1_1 AS ( + SELECT user_id, value_2 from users_table WHERE user_id IN (1, 2) ORDER BY 2 LIMIT 5 + ), + cte_1_2 AS ( + SELECT max(user_id) AS user_id FROM cte_1_1 + ) + SELECT user_id FROM cte_1_2 ORDER BY user_id +) +SELECT value_2 FROM users_table WHERE user_id IN (SELECT user_id FROM cte_1) ORDER BY value_2 LIMIT 1; + value_2 +--------- + 0 +(1 row) + +-- Mix of FROM/WHERE queries +WITH cte_from AS ( + SELECT max(user_id) AS user_id, value_2, value_1 FROM users_table GROUP BY value_2, value_1 +), +cte_where AS ( + SELECT value_2 FROM events_table +) +SELECT + * +FROM + (SELECT max(user_id), max(value_2) AS value_2 FROM cte_from GROUP BY value_1) f +WHERE + value_2 IN (SELECT * FROM cte_where) +ORDER BY + 1, 2 +LIMIT + 5; + max | value_2 +-----+--------- + 5 | 5 + 6 | 5 + 6 | 5 + 6 | 5 + 6 | 5 +(5 rows) + +-- CTE in subquery errors out +SELECT user_id FROM ( + WITH cte AS ( + SELECT user_id, value_2 from users_table WHERE user_id IN (1, 2) ORDER BY 2 LIMIT 5 + ) + SELECT user_id FROM cte WHERE value_2 > 0 +) a; +ERROR: cannot push down this subquery +DETAIL: CTEs in subqueries are currently unsupported +-- CTE outside of FROM/WHERE errors out +WITH cte AS ( + SELECT user_id FROM users_table WHERE value_2 IN (1, 2) +) +SELECT (SELECT * FROM cte); +WARNING: more than one row returned by a subquery used as an expression +CONTEXT: while executing command on localhost:57638 +ERROR: could not receive query results +WITH cte_basic AS ( + SELECT user_id FROM users_table WHERE user_id = 1 +) +SELECT + (SELECT user_id FROM cte_basic), user_id +FROM + users_table; +ERROR: could not run distributed query with subquery outside the FROM and WHERE clauses +HINT: Consider using an equality filter on the distributed table's partition column. +-- single-row sublink is acceptable when there is no FROM +WITH cte AS ( + SELECT user_id FROM users_table WHERE value_2 IN (1, 2) +) +SELECT (SELECT * FROM cte ORDER BY 1 LIMIT 1); + user_id +--------- + 1 +(1 row) + +-- group by partition column +WITH series AS ( + SELECT s AS once, s*2 AS twice FROM generate_series(1,10) s +) +SELECT user_id, count(*) +FROM + users_table +JOIN + series ON (user_id = once) +GROUP BY + user_id +ORDER BY + 1, 2 +LIMIT 5; + user_id | count +---------+------- + 1 | 7 + 2 | 18 + 3 | 17 + 4 | 23 + 5 | 26 +(5 rows) + +-- group by non-partition column +WITH series AS ( + SELECT s AS once, s*2 AS twice FROM generate_series(1,10) s +) +SELECT + twice, min(user_id) +FROM + users_table +JOIN + series ON (user_id = once) +GROUP BY + twice +HAVING + twice > 5 +ORDER BY + 1, 2 +LIMIT 5; + twice | min +-------+----- + 6 | 3 + 8 | 4 + 10 | 5 + 12 | 6 +(4 rows) + +-- distinct in subquery on CTE +WITH one_user AS ( + SELECT user_id from users_table WHERE user_id = 1 +) +SELECT + user_id +FROM + users_table +WHERE + value_2 IN (SELECT DISTINCT user_id FROM one_user) +ORDER BY + user_id +LIMIT + 1; + user_id +--------- + 2 +(1 row) + +-- having in subquery on CTE +WITH one_user AS ( + SELECT user_id from users_table WHERE user_id = 1 +) +SELECT + user_id +FROM + users_table +WHERE + value_2 IN (SELECT user_id FROM one_user GROUP BY user_id HAVING count(*) > 0) +ORDER BY + user_id +LIMIT + 1; + user_id +--------- + 2 +(1 row) + +-- aggregate in subquery on CTE +WITH top_users AS ( + SELECT user_id, value_2 FROM users_table ORDER BY user_id DESC LIMIT 10 +) +SELECT + user_id +FROM + (SELECT min(user_id) AS user_id FROM top_users) top_users +JOIN + users_table USING (user_id); +ERROR: cannot push down this subquery +DETAIL: Aggregates without group by are currently unsupported +-- FOR UPDATE in subquery on CTE +WITH top_users AS ( + SELECT user_id, value_2 FROM users_table ORDER BY user_id DESC LIMIT 10 +) +SELECT + user_id +FROM + (SELECT user_id FROM top_users FOR UPDATE) top_users +JOIN + users_table USING (user_id) +ORDER BY + user_id +LIMIT + 5; + user_id +--------- + 6 + 6 + 6 + 6 + 6 +(5 rows) + +-- LIMIT in subquery on CTE +WITH top_users AS ( + SELECT user_id, value_2 FROM users_table ORDER BY user_id DESC LIMIT 10 +) +SELECT + user_id +FROM + (SELECT user_id FROM top_users LIMIT 5) top_users +JOIN + users_table USING (user_id) +ORDER BY + user_id +LIMIT + 5; +ERROR: cannot push down this subquery +DETAIL: Limit in subquery is currently unsupported +-- OFFSET in subquery on CTE +WITH top_users AS ( + SELECT user_id, value_2 FROM users_table ORDER BY user_id DESC LIMIT 10 +) +SELECT + user_id +FROM + (SELECT user_id FROM top_users OFFSET 5) top_users +JOIN + users_table USING (user_id) +ORDER BY + user_id +LIMIT + 5; +ERROR: cannot push down this subquery +DETAIL: Offset clause is currently unsupported +-- Unsupported join in CTE +WITH top_users AS ( + SELECT DISTINCT e.user_id FROM users_table u JOIN events_table e ON (u.user_id = e.user_id AND u.value_1 > e.value_2) +) +SELECT + user_id +FROM + (SELECT user_id FROM top_users WHERE user_id > 5) top_users +JOIN + users_table USING (user_id) +ORDER BY + user_id +LIMIT + 5; +ERROR: unsupported clause type +-- Join can be supported with another CTE +WITH events_table AS ( + SELECT * FROM events_table +), +top_users AS ( + SELECT DISTINCT e.user_id FROM users_table u JOIN events_table e ON (u.user_id = e.user_id AND u.value_1 > e.value_2) +) +SELECT + user_id +FROM + (SELECT user_id FROM top_users WHERE user_id > 5) top_users +JOIN + users_table USING (user_id) +ORDER BY + user_id +LIMIT + 5; + user_id +--------- + 6 + 6 + 6 + 6 + 6 +(5 rows) + +-- Window functions in CTE +WITH top_users AS ( + SELECT row_number() OVER(), user_id FROM users_table ORDER BY user_id DESC LIMIT 10 +) +SELECT + user_id +FROM + (SELECT user_id FROM top_users WHERE user_id > 5) top_users +JOIN + users_table USING (user_id) +ORDER BY + user_id +LIMIT + 5; +ERROR: could not run distributed query with window functions +HINT: Consider using an equality filter on the distributed table's partition column. +-- Window functions that partition by the distribution column in subqueries in CTEs are ok +WITH top_users AS + (SELECT * + FROM + (SELECT row_number() OVER(PARTITION BY user_id) AS row_number, + user_id + FROM users_table) AS foo +) +SELECT user_id +FROM + (SELECT user_id + FROM top_users + WHERE row_number > 5) top_users +JOIN + users_table USING (user_id) +ORDER BY + user_id +LIMIT + 5; + user_id +--------- + 1 + 1 + 1 + 1 + 1 +(5 rows) + +-- Unsupported aggregate in CTE +WITH top_users AS ( + SELECT array_agg(user_id ORDER BY value_2) user_ids FROM users_table +) +SELECT + user_id +FROM + (SELECT unnest(user_ids) user_id FROM top_users) top_users +JOIN + users_table USING (user_id) +ORDER BY + user_id +LIMIT + 5; +ERROR: array_agg with order by is unsupported +-- array_agg in CTE +WITH top_users AS ( + SELECT array_agg(user_id) user_ids FROM users_table +) +SELECT + user_id +FROM + (SELECT unnest(user_ids) user_id FROM top_users) top_users +JOIN + users_table USING (user_id) +ORDER BY + user_id +LIMIT + 5; + user_id +--------- + 1 + 1 + 1 + 1 + 1 +(5 rows) + +-- composite type array +WITH top_users AS ( + SELECT array_agg((value_1,value_2)::xy) AS p FROM users_table WHERE user_id % 2 = 0 +) +SELECT + e.user_id, sum(y) +FROM + (SELECT (unnest(p)).* FROM top_users) tops +JOIN + events_table e ON (tops.x = e.user_id) +GROUP BY + e.user_id +ORDER BY + 2 DESC, 1 +LIMIT + 5; + user_id | sum +---------+----- + 3 | 651 + 2 | 552 + 4 | 544 + 5 | 126 + 1 | 90 +(5 rows) + +-- SELECT * FROM (SELECT * FROM cte UNION SELECT * FROM distributed_table) a; should error out +WITH cte AS ( + SELECT * FROM users_table +) +SELECT * FROM ( + SELECT * FROM cte UNION (SELECT * FROM events_table) + ) a +ORDER BY + 1,2,3,4,5,6 +LIMIT + 10; +ERROR: cannot push down this subquery +DETAIL: Complex subqueries and CTEs are not supported within a UNION +SELECT * FROM ( + SELECT * FROM (WITH cte AS ( + SELECT * FROM users_table + ) + SELECT * FROM cte + )b UNION (SELECT * FROM events_table)) a +ORDER BY +1,2,3,4,5,6 +LIMIT +10; +ERROR: cannot push down this subquery +DETAIL: CTEs in subqueries are currently unsupported +-- SELECT * FROM (SELECT * FROM cte UNION SELECT * FROM cte) a; should work +WITH cte AS ( + SELECT * FROM users_table WHERE user_id IN (1, 2) +) +SELECT + * +FROM + (SELECT * FROM cte UNION (SELECT * FROM cte)) a +ORDER BY + 1,2,3,4,5,6 +LIMIT + 5; + user_id | time | value_1 | value_2 | value_3 | value_4 +---------+---------------------------------+---------+---------+---------+--------- + 1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 | + 1 | Thu Nov 23 03:32:50.803031 2017 | 3 | 2 | 1 | + 1 | Thu Nov 23 09:26:42.145043 2017 | 1 | 3 | 3 | + 1 | Thu Nov 23 11:11:24.40789 2017 | 3 | 4 | 0 | + 1 | Thu Nov 23 11:44:57.515981 2017 | 4 | 3 | 4 | +(5 rows) + +WITH cte AS ( + SELECT * FROM users_table WHERE user_id IN (1, 2) ORDER BY 1,2,3 LIMIT 5 +), +cte_2 AS ( + SELECT * FROM users_table WHERE user_id IN (3, 4) ORDER BY 1,2,3 LIMIT 5 +) +SELECT * FROM cte UNION ALL SELECT * FROM cte_2; + user_id | time | value_1 | value_2 | value_3 | value_4 +---------+---------------------------------+---------+---------+---------+--------- + 1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 | + 1 | Thu Nov 23 03:32:50.803031 2017 | 3 | 2 | 1 | + 1 | Thu Nov 23 09:26:42.145043 2017 | 1 | 3 | 3 | + 1 | Thu Nov 23 11:11:24.40789 2017 | 3 | 4 | 0 | + 1 | Thu Nov 23 11:44:57.515981 2017 | 4 | 3 | 4 | + 3 | Wed Nov 22 18:43:51.450263 2017 | 1 | 1 | 4 | + 3 | Wed Nov 22 20:43:31.008625 2017 | 1 | 3 | 2 | + 3 | Wed Nov 22 23:24:32.080584 2017 | 3 | 2 | 5 | + 3 | Thu Nov 23 00:15:45.610845 2017 | 1 | 1 | 4 | + 3 | Thu Nov 23 03:23:24.702501 2017 | 1 | 2 | 5 | +(10 rows) + +-- basic recursive CTE which should all error out +WITH RECURSIVE basic_recursive(x) AS ( + VALUES (1) + UNION ALL + SELECT user_id + 1 FROM users_table WHERE user_id < 100 +) +SELECT sum(x) FROM basic_recursive; +ERROR: recursive CTEs are not supported in distributed queries +WITH RECURSIVE basic_recursive AS ( + SELECT -1 as user_id, '2017-11-22 20:16:16.614779'::timestamp, -1, -1, -1, -1 + UNION ALL + SELECT * FROM users_table WHERE user_id>1 +) +SELECT * FROM basic_recursive ORDER BY user_id LIMIT 1; +ERROR: recursive CTEs are not supported in distributed queries +-- basic_recursive in FROM should error out +SELECT + * +FROM +(WITH RECURSIVE basic_recursive AS ( + SELECT -1 as user_id, '2017-11-22 20:16:16.614779'::timestamp, -1, -1, -1, -1 + UNION ALL + SELECT * FROM users_table WHERE user_id>1 + ) + SELECT * FROM basic_recursive ORDER BY user_id LIMIT 1) cte_rec; +ERROR: cannot push down this subquery +DETAIL: CTEs in subqueries are currently unsupported +-- basic_recursive in WHERE with UNION ALL +SELECT + * +FROM + users_table +WHERE + user_id in +(WITH RECURSIVE basic_recursive AS ( + SELECT -1 as user_id + UNION ALL + SELECT user_id FROM users_table WHERE user_id>1 + ) + SELECT * FROM basic_recursive ORDER BY user_id LIMIT 1); +ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys +DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +-- one recursive one regular CTE should error out +WITH RECURSIVE basic_recursive(x) AS( + VALUES (1) + UNION ALL + SELECT user_id + 1 FROM users_table WHERE user_id < 100 +), +basic AS ( + SELECT count(user_id) FROM users_table +) +SELECT x FROM basic, basic_recursive; +ERROR: recursive CTEs are not supported in distributed queries +-- one recursive one regular which SELECTs from the recursive CTE under a simple SELECT +WITH RECURSIVE basic_recursive(x) AS( + VALUES (1) + UNION ALL + SELECT user_id + 1 FROM users_table WHERE user_id < 100 +), +basic AS ( + SELECT count(x) FROM basic_recursive +) +SELECT * FROM basic; +ERROR: recursive CTEs are not supported in distributed queries +-- recursive CTE in a NESTED manner +WITH regular_cte AS ( + WITH regular_2 AS ( + WITH RECURSIVE recursive AS ( + VALUES (1) + UNION ALL + SELECT user_id + 1 FROM users_table WHERE user_id < 100 + ) + SELECT * FROM recursive + ) + SELECT * FROM regular_2 +) +SELECT * FROM regular_cte; +ERROR: recursive CTEs are not supported in distributed queries +-- CTEs should work with VIEWs as well +CREATE VIEW basic_view AS +SELECT * FROM users_table; +CREATE VIEW cte_view AS +WITH cte AS ( + SELECT * FROM users_table +) +SELECT user_id, max(value_1) as value_1 FROM cte GROUP BY 1; +WITH cte_user AS ( + SELECT basic_view.user_id,events_table.value_2 FROM basic_view join events_table on (basic_view.user_id = events_table.user_id) +) +SELECT user_id, sum(value_2) FROM cte_user GROUP BY 1 ORDER BY 1, 2; + user_id | sum +---------+------ + 1 | 294 + 2 | 1026 + 3 | 782 + 4 | 943 + 5 | 806 + 6 | 220 +(6 rows) + +SELECT * FROM cte_view; +ERROR: cannot push down this subquery +DETAIL: CTEs in subqueries are currently unsupported +WITH cte_user_with_view AS +( + SELECT * FROM cte_view WHERE user_id < 3 +) +SELECT user_id, value_1 FROM cte_user_with_view ORDER BY 1, 2 LIMIT 10 OFFSET 3; +ERROR: cannot push down this subquery +DETAIL: CTEs in subqueries are currently unsupported +DROP VIEW basic_view; +DROP VIEW cte_view; diff --git a/src/test/regress/expected/with_executors.out b/src/test/regress/expected/with_executors.out new file mode 100644 index 000000000..5e7955c94 --- /dev/null +++ b/src/test/regress/expected/with_executors.out @@ -0,0 +1,385 @@ +-- Confirm we can use local, router, real-time, and task-tracker execution +CREATE SCHEMA with_executors; +SET search_path TO with_executors, public; +SET citus.enable_repartition_joins TO on; +CREATE TABLE with_executors.local_table (id int); +INSERT INTO local_table VALUES (0), (1), (2), (3), (4), (5), (6), (7), (8), (9), (10); +-- CTEs should be able to use local queries +WITH cte AS ( + WITH local_cte AS ( + SELECT * FROM local_table + ), + dist_cte AS ( + SELECT user_id FROM events_table + ) + SELECT * FROM local_cte join dist_cte on dist_cte.user_id=local_cte.id +) +SELECT count(*) FROM cte; + count +------- + 101 +(1 row) + +WITH cte AS ( + WITH local_cte AS ( + SELECT * FROM local_table + ), + dist_cte AS ( + SELECT user_id FROM events_table + ), + merger_cte AS ( + SELECT id as user_id FROM local_cte UNION (SELECT * FROM dist_cte) + ) + SELECT * FROM merger_cte WHERE user_id IN (1, 2, 3) +) +SELECT * FROM cte ORDER BY 1; + user_id +--------- + 1 + 2 + 3 +(3 rows) + +WITH cte AS ( + WITH local_cte AS ( + SELECT * FROM local_table WHERE id < 5 + ), + local_cte_2 AS ( + SELECT * FROM local_table WHERE id > 5 + ) + SELECT local_cte.id as id_1, local_cte_2.id as id_2 FROM local_cte,local_cte_2 +) +SELECT + * +FROM + cte +join + users_table +on + cte.id_1 = users_table.user_id +WHERE + cte.id_1 IN (3, 4, 5) +ORDER BY + 1,2,3,4,5,6,7 +LIMIT + 10; + id_1 | id_2 | user_id | time | value_1 | value_2 | value_3 | value_4 +------+------+---------+---------------------------------+---------+---------+---------+--------- + 3 | 6 | 3 | Wed Nov 22 18:43:51.450263 2017 | 1 | 1 | 4 | + 3 | 6 | 3 | Wed Nov 22 20:43:31.008625 2017 | 1 | 3 | 2 | + 3 | 6 | 3 | Wed Nov 22 23:24:32.080584 2017 | 3 | 2 | 5 | + 3 | 6 | 3 | Thu Nov 23 00:15:45.610845 2017 | 1 | 1 | 4 | + 3 | 6 | 3 | Thu Nov 23 03:23:24.702501 2017 | 1 | 2 | 5 | + 3 | 6 | 3 | Thu Nov 23 03:52:32.008895 2017 | 4 | 2 | 0 | + 3 | 6 | 3 | Thu Nov 23 04:01:08.04806 2017 | 5 | 5 | 3 | + 3 | 6 | 3 | Thu Nov 23 05:01:44.885505 2017 | 3 | 5 | 4 | + 3 | 6 | 3 | Thu Nov 23 06:20:05.854857 2017 | 1 | 4 | 2 | + 3 | 6 | 3 | Thu Nov 23 09:57:41.540228 2017 | 2 | 2 | 3 | +(10 rows) + +-- CTEs should be able to use router queries +WITH cte AS ( + WITH router_cte AS ( + SELECT user_id, value_2 FROM users_table WHERE user_id = 1 + ), + router_cte_2 AS ( + SELECT user_id, event_type, value_2 FROM events_table WHERE user_id = 1 + ) + SELECT + router_cte.user_id as uid, event_type + FROM + router_cte, router_cte_2 +) +SELECT * FROM cte ORDER BY 2 LIMIT 5; + uid | event_type +-----+------------ + 1 | 0 + 1 | 0 + 1 | 0 + 1 | 0 + 1 | 0 +(5 rows) + +-- CTEs should be able to use real-time queries +WITH real_time_cte AS ( + SELECT * FROM users_table WHERE value_2 IN (1, 2, 3) +) +SELECT * FROM real_time_cte ORDER BY 1, 2, 3, 4, 5, 6 LIMIT 10; + user_id | time | value_1 | value_2 | value_3 | value_4 +---------+---------------------------------+---------+---------+---------+--------- + 1 | Thu Nov 23 03:32:50.803031 2017 | 3 | 2 | 1 | + 1 | Thu Nov 23 09:26:42.145043 2017 | 1 | 3 | 3 | + 1 | Thu Nov 23 11:44:57.515981 2017 | 4 | 3 | 4 | + 2 | Thu Nov 23 01:04:26.198826 2017 | 4 | 3 | 4 | + 2 | Thu Nov 23 03:27:50.327051 2017 | 2 | 2 | 0 | + 2 | Thu Nov 23 06:50:30.797805 2017 | 1 | 1 | 1 | + 2 | Thu Nov 23 06:56:38.46819 2017 | 0 | 1 | 3 | + 2 | Thu Nov 23 08:22:22.169158 2017 | 4 | 2 | 5 | + 2 | Thu Nov 23 08:49:47.029236 2017 | 4 | 2 | 4 | + 2 | Thu Nov 23 09:54:28.13665 2017 | 0 | 3 | 4 | +(10 rows) + +-- router & real-time together +WITH cte AS ( + WITH router_cte AS ( + SELECT user_id, value_2 FROM users_table WHERE user_id = 1 + ), + real_time AS ( + SELECT user_id, event_type, value_2 FROM events_table + ) + SELECT + router_cte.user_id as uid, event_type + FROM + router_cte, real_time + WHERE + router_cte.user_id=real_time.user_id +) +SELECT * FROM cte WHERE uid=1 ORDER BY 2 LIMIT 5; + uid | event_type +-----+------------ + 1 | 0 + 1 | 0 + 1 | 0 + 1 | 0 + 1 | 0 +(5 rows) + +-- CTEs should be able to use task-tracker queries +WITH cte AS ( + WITH task_tracker_1 AS ( + SELECT + users_table.user_id as uid_1, users_table.value_2 + FROM + users_table + JOIN + events_table + ON + users_table.value_2=events_table.value_2 + ), + task_tracker_2 AS ( + SELECT + users_table.user_id as uid_2, users_table.value_3 + FROM + users_table + JOIN + events_table + ON + users_table.value_3=events_table.value_3 + ) + SELECT + uid_1, uid_2, value_2, value_3 + FROM + task_tracker_1 + JOIN + task_tracker_2 + ON + value_2 = value_3 +) +SELECT + uid_1, uid_2, cte.value_2, cte.value_3 +FROM + cte +JOIN + events_table +ON + cte.value_2 = events_table.event_type +ORDER BY + 1, 2, 3, 4 +LIMIT 10; + uid_1 | uid_2 | value_2 | value_3 +-------+-------+---------+--------- + 1 | 1 | 0 | 0 + 1 | 1 | 0 | 0 + 1 | 1 | 0 | 0 + 1 | 1 | 0 | 0 + 1 | 1 | 0 | 0 + 1 | 1 | 0 | 0 + 1 | 1 | 0 | 0 + 1 | 1 | 0 | 0 + 1 | 1 | 0 | 0 + 1 | 1 | 0 | 0 +(10 rows) + +-- All combined +WITH cte AS ( + WITH task_tracker AS ( + SELECT + users_table.user_id as uid_1, users_table.value_2 as val_2 + FROM + users_table + JOIN + events_table + ON + users_table.value_2=events_table.value_2 + ), + real_time AS ( + SELECT * FROM users_table + ), + router_exec AS ( + SELECT * FROM events_table WHERE user_id = 1 + ), + local_table AS ( + SELECT * FROM local_table + ), + join_first_two AS ( + SELECT uid_1, time, value_3 FROM task_tracker JOIN real_time ON val_2=value_3 + ), + join_last_two AS ( + SELECT + router_exec.user_id, local_table.id + FROM + router_exec + JOIN + local_table + ON + router_exec.user_id=local_table.id + ) + SELECT * FROM join_first_two JOIN join_last_two ON id = value_3 ORDER BY 1,2,3,4,5 LIMIT 10 +) +SELECT DISTINCT uid_1, time, value_3 FROM cte ORDER BY 1, 2, 3 LIMIT 20; + uid_1 | time | value_3 +-------+---------------------------------+--------- + 2 | Wed Nov 22 18:19:49.944985 2017 | 1 +(1 row) + +-- All combined with outer join +WITH cte AS ( + WITH task_tracker AS ( + SELECT + users_table.user_id as uid_1, users_table.value_2 as val_2 + FROM + users_table + JOIN + events_table + ON + users_table.value_2=events_table.value_2 + ), + real_time AS ( + SELECT * FROM users_table + ), + router_exec AS ( + SELECT * FROM events_table WHERE user_id = 1 + ), + local_table AS ( + SELECT * FROM local_table + ), + join_first_two AS ( + SELECT uid_1, time, value_3 FROM task_tracker JOIN real_time ON val_2=value_3 + ), + join_last_two AS ( + SELECT + router_exec.user_id, local_table.id + FROM + router_exec + JOIN + local_table + ON + router_exec.user_id=local_table.id + ) + SELECT uid_1, value_3 as val_3 FROM join_first_two JOIN join_last_two ON id = value_3 ORDER BY 1,2 LIMIT 10 +) +SELECT DISTINCT uid_1, val_3 FROM cte join events_table on cte.val_3=events_table.event_type ORDER BY 1, 2; + uid_1 | val_3 +-------+------- + 2 | 1 +(1 row) + +-- CTEs should not be able to terminate (the last SELECT) in a local query +WITH cte AS ( + SELECT * FROM users_table +) +SELECT count(*) FROM cte JOIN local_table ON (user_id = id); +ERROR: relation local_table is not distributed +-- CTEs should be able to terminate a router query +WITH cte AS ( + WITH cte_1 AS ( + SELECT * FROM local_table WHERE id < 7 + ), + cte_2 AS ( + SELECT * FROM local_table WHERE id > 3 + ), + cte_dist AS ( + SELECT count(*) as u_id FROM users_table + ), + cte_merge AS ( + SELECT cte_1.id as id FROM cte_1 join cte_2 on TRUE + ) + SELECT count(*) FROM users_table join cte_merge on id=user_id +) +SELECT + row_number() OVER (), count(*) +FROM + cte, users_table +WHERE + cte.count=user_id and user_id=5; + row_number | count +------------+------- + 1 | 0 +(1 row) + +-- CTEs should be able to terminate a real-time query +WITH cte AS ( + WITH cte_1 AS ( + SELECT * FROM local_table WHERE id < 7 + ), + cte_2 AS ( + SELECT * FROM local_table WHERE id > 3 + ), + cte_dist AS ( + SELECT count(*) as u_id FROM users_table + ), + cte_merge AS ( + SELECT cte_1.id as id FROM cte_1 join cte_2 on TRUE + ) + SELECT count(*) FROM users_table join cte_merge on id=user_id +) +SELECT count(*) FROM cte, users_table where cte.count=user_id; + count +------- + 0 +(1 row) + +SET citus.task_executor_type='task-tracker'; +-- CTEs shouldn't be able to terminate a task-tracker query +WITH cte_1 AS ( + SELECT + u_table.user_id as u_id, e_table.event_type + FROM + users_table as u_table + join + events_table as e_table + on + u_table.value_2=e_table.event_type + WHERE + u_table.user_id < 7 +), +cte_2 AS ( + SELECT + u_table.user_id as u_id, e_table.event_type + FROM + users_table as u_table + join + events_table as e_table + on + u_table.value_2=e_table.event_type + WHERE + u_table.user_id > 3 +), +cte_merge AS ( + SELECT + cte_1.u_id, cte_2.event_type + FROM + cte_1 + join + cte_2 + on cte_1.event_type=cte_2.u_id +) +SELECT + count(*) +FROM + users_table, cte_merge +WHERE + users_table.user_id = cte_merge.u_id; +ERROR: Complex subqueries and CTEs are not supported when task_executor_type is set to 'task-tracker' +DROP SCHEMA with_executors CASCADE; +NOTICE: drop cascades to table local_table diff --git a/src/test/regress/expected/with_join.out b/src/test/regress/expected/with_join.out new file mode 100644 index 000000000..dc44d6042 --- /dev/null +++ b/src/test/regress/expected/with_join.out @@ -0,0 +1,263 @@ +CREATE SCHEMA with_join; +SET search_path TO with_join, public; +SET citus.next_shard_id TO 1501000; +CREATE TABLE with_join.reference_table(user_id int); +SELECT create_reference_table('with_join.reference_table'); + create_reference_table +------------------------ + +(1 row) + +INSERT INTO reference_table VALUES (6), (7); +SET citus.enable_repartition_joins TO on; +-- Two colocated CTE under a non-colocated join +WITH colocated_1 AS ( + SELECT + users_table.user_id, events_table.value_2 + FROM + users_table, events_table + WHERE + users_table.user_id = events_table.user_id AND event_type IN (1, 2, 3) +), +colocated_2 AS ( + SELECT + users_table.user_id, events_table.value_2 + FROM + users_table, events_table + WHERE + users_table.user_id = events_table.user_id AND event_type IN (4, 5, 6) +) +SELECT colocated_1.user_id, count(*) +FROM + colocated_1, colocated_2 +WHERE + colocated_1.value_2 = colocated_2.value_2 +GROUP BY + 1 +ORDER BY + 2 DESC, 1; + user_id | count +---------+------- + 3 | 26792 + 4 | 25024 + 5 | 22724 + 2 | 22554 + 6 | 5720 + 1 | 5593 +(6 rows) + +-- Two non-colocated CTE under a co-located join +WITH non_colocated_1 AS ( + SELECT + users_table.user_id + FROM + users_table, events_table + WHERE + users_table.user_id = events_table.value_2 AND event_type IN (1, 2, 3) +), +non_colocated_2 AS ( + SELECT + users_table.user_id + FROM + users_table, events_table + WHERE + users_table.user_id = events_table.value_2 AND event_type IN (4, 5, 6) +) +SELECT non_colocated_1.user_id, count(*) +FROM + non_colocated_1, non_colocated_2 +WHERE + non_colocated_1.user_id = non_colocated_2.user_id +GROUP BY + 1 +ORDER BY + 2 DESC, 1; + user_id | count +---------+------- + 2 | 60588 + 4 | 21160 + 3 | 13005 + 5 | 10140 + 1 | 4802 +(5 rows) + + +-- Subqueries in WHERE and FROM are mixed +-- In this query, only subquery in WHERE is not a colocated join +WITH users_events AS ( + WITH colocated_join AS ( + SELECT + users_table.user_id as uid, event_type + FROM + users_table + join + events_table + on (users_table.user_id = events_table.user_id) + WHERE + events_table.event_type IN (1, 2, 3) + ), + colocated_join_2 AS ( + SELECT + users_table.user_id, event_type + FROM + users_table + join + events_table + on (users_table.user_id = events_table.user_id) + WHERE + events_table.event_type IN (4, 5, 6) + ) + SELECT + uid, colocated_join.event_type + FROM + colocated_join, + colocated_join_2 + WHERE + colocated_join.uid = colocated_join_2.user_id AND + colocated_join.event_type IN ( + WITH some_events AS ( + SELECT + event_type + FROM + events_table + WHERE + user_id < 100 + GROUP BY + 1 + ORDER BY + 1 + LIMIT 10 + ) + SELECT + * + FROM + some_events + ) +) +SELECT + * +FROM + users_events +ORDER BY + 1, 2 +LIMIT + 20; +ERROR: cannot pushdown the subquery +DETAIL: Complex subqueries and CTEs are not allowed in the FROM clause when the query has subqueries in the WHERE clause +-- cte LEFT JOIN distributed_table should error out +WITH cte AS ( + SELECT * FROM users_table WHERE user_id = 1 ORDER BY value_1 +) +SELECT + cte.user_id, cte.time, events_table.event_type +FROM + cte +LEFT JOIN + events_table ON cte.user_id = events_table.user_id +ORDER BY + 1,2,3 +LIMIT + 5; +ERROR: cannot pushdown the subquery +DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join +-- cte RIGHT JOIN distributed_table should work +WITH cte AS ( + SELECT * FROM users_table WHERE user_id = 1 ORDER BY value_1 +) +SELECT + cte.user_id, cte.time, events_table.event_type +FROM + cte +RIGHT JOIN + events_table ON cte.user_id = events_table.user_id +ORDER BY + 1,2,3 +LIMIT + 5; + user_id | time | event_type +---------+---------------------------------+------------ + 1 | Wed Nov 22 22:51:43.132261 2017 | 0 + 1 | Wed Nov 22 22:51:43.132261 2017 | 0 + 1 | Wed Nov 22 22:51:43.132261 2017 | 1 + 1 | Wed Nov 22 22:51:43.132261 2017 | 1 + 1 | Wed Nov 22 22:51:43.132261 2017 | 2 +(5 rows) + +-- distributed_table LEFT JOIN cte should work +WITH cte AS ( + SELECT * FROM users_table WHERE value_1 = 1 ORDER BY value_1 +) +SELECT + cte.user_id, cte.time, events_table.event_type +FROM + events_table +LEFT JOIN + cte ON cte.user_id = events_table.user_id +ORDER BY + 1,2,3 +LIMIT + 5; + user_id | time | event_type +---------+---------------------------------+------------ + 1 | Thu Nov 23 09:26:42.145043 2017 | 0 + 1 | Thu Nov 23 09:26:42.145043 2017 | 0 + 1 | Thu Nov 23 09:26:42.145043 2017 | 1 + 1 | Thu Nov 23 09:26:42.145043 2017 | 1 + 1 | Thu Nov 23 09:26:42.145043 2017 | 2 +(5 rows) + +-- distributed_table RIGHT JOIN cte should error out +WITH cte AS ( + SELECT * FROM users_table WHERE value_1 = 1 ORDER BY value_1 +) +SELECT + cte.user_id, cte.time, events_table.event_type +FROM + events_table +RIGHT JOIN + cte ON cte.user_id = events_table.user_id +ORDER BY + 1,2,3 +LIMIT + 5; +ERROR: cannot pushdown the subquery +DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join +-- cte FULL JOIN distributed_table should error out +WITH cte AS ( + SELECT * FROM users_table WHERE user_id = 1 ORDER BY value_1 +) +SELECT + cte.user_id, cte.time, events_table.event_type +FROM + events_table +FULL JOIN + cte ON cte.user_id = events_table.user_id +ORDER BY + 1,2,3 +LIMIT + 5; +ERROR: cannot pushdown the subquery +DETAIL: Complex subqueries and CTEs cannot be in the outer part of the outer join +-- Joins with reference tables are planned as router queries +WITH cte AS ( + SELECT value_2, max(user_id) AS user_id FROM users_table WHERE value_2 = 1 GROUP BY value_2 HAVING count(*) > 1 +) +SELECT + row_number() OVER(), cte.user_id +FROM + cte +FULL JOIN + reference_table ON cte.user_id + 1 = reference_table.user_id +ORDER BY + user_id +LIMIT + 5; + row_number | user_id +------------+--------- + 2 | 6 + 1 | +(2 rows) + +RESET client_min_messages; +DROP SCHEMA with_join CASCADE; +NOTICE: drop cascades to table reference_table diff --git a/src/test/regress/expected/with_modifying.out b/src/test/regress/expected/with_modifying.out new file mode 100644 index 000000000..61a2f6371 --- /dev/null +++ b/src/test/regress/expected/with_modifying.out @@ -0,0 +1,127 @@ +-- Tests for modifying CTEs and CTEs in modifications +SET citus.next_shard_id TO 1502000; +CREATE SCHEMA with_modifying; +SET search_path TO with_modifying, public; +CREATE TABLE with_modifying.modify_table (id int, val int); +SELECT create_distributed_table('modify_table', 'id'); + create_distributed_table +-------------------------- + +(1 row) + +CREATE TABLE with_modifying.users_table (LIKE public.users_table INCLUDING ALL); +SELECT create_distributed_table('with_modifying.users_table', 'user_id'); + create_distributed_table +-------------------------- + +(1 row) + +INSERT INTO with_modifying.users_table SELECT * FROM public.users_table; +-- basic insert query in CTE +WITH basic_insert AS ( + INSERT INTO users_table VALUES (1), (2), (3) RETURNING * +) +SELECT + * +FROM + basic_insert; +ERROR: data-modifying statements are not supported in the WITH clauses of distributed queries +-- single-shard UPDATE in CTE +WITH basic_update AS ( + UPDATE users_table SET value_3=42 WHERE user_id=0 RETURNING * +) +SELECT + * +FROM + basic_update; +ERROR: data-modifying statements are not supported in the WITH clauses of distributed queries +-- multi-shard UPDATE in CTE +WITH basic_update AS ( + UPDATE users_table SET value_3=42 WHERE value_2=1 RETURNING * +) +SELECT + * +FROM + basic_update; +ERROR: data-modifying statements are not supported in the WITH clauses of distributed queries +-- single-shard DELETE in CTE +WITH basic_delete AS ( + DELETE FROM users_table WHERE user_id=42 RETURNING * +) +SELECT + * +FROM + basic_delete; +ERROR: data-modifying statements are not supported in the WITH clauses of distributed queries +-- multi-shard DELETE in CTE +WITH basic_delete AS ( + DELETE FROM users_table WHERE value_2=42 RETURNING * +) +SELECT + * +FROM + basic_delete; +ERROR: data-modifying statements are not supported in the WITH clauses of distributed queries +-- INSERT...SELECT query in CTE +WITH copy_table AS ( + INSERT INTO users_table SELECT * FROM users_table RETURNING * +) +SELECT + * +FROM + copy_table; +ERROR: data-modifying statements are not supported in the WITH clauses of distributed queries +-- CTEs prior to INSERT...SELECT via the coordinator should work +WITH cte AS ( + SELECT user_id FROM users_table WHERE value_2 IN (1, 2) +) +INSERT INTO modify_table (SELECT * FROM cte); +WITH cte_1 AS ( + SELECT user_id, value_2 FROM users_table WHERE value_2 IN (1, 2, 3, 4) +), +cte_2 AS ( + SELECT user_id, value_2 FROM users_table WHERE value_2 IN (3, 4, 5, 6) +) +INSERT INTO modify_table (SELECT cte_1.user_id FROM cte_1 join cte_2 on cte_1.value_2=cte_2.value_2); +-- even if this is an INSERT...SELECT, the CTE is under SELECT +WITH cte AS ( + SELECT user_id, value_2 FROM users_table WHERE value_2 IN (1, 2) +) +INSERT INTO modify_table (SELECT (SELECT value_2 FROM cte GROUP BY value_2)); +WARNING: more than one row returned by a subquery used as an expression +CONTEXT: while executing command on localhost:57638 +ERROR: could not receive query results +-- CTEs prior to any other modification should error out +WITH cte AS ( + SELECT value_2 FROM users_table WHERE user_id IN (1, 2, 3) +) +DELETE FROM modify_table WHERE id IN (SELECT value_2 FROM cte); +ERROR: common table expressions are not supported in distributed modifications +WITH cte AS ( + SELECT value_2 FROM users_table WHERE user_id IN (1, 2, 3) +) +UPDATE modify_table SET val=-1 WHERE val IN (SELECT * FROM cte); +ERROR: common table expressions are not supported in distributed modifications +WITH cte AS ( + WITH basic AS ( + SELECT value_2 FROM users_table WHERE user_id IN (1, 2, 3) + ) + INSERT INTO modify_table (SELECT * FROM basic) RETURNING * +) +UPDATE modify_table SET val=-2 WHERE id IN (SELECT id FROM cte); +ERROR: common table expressions are not supported in distributed modifications +WITH cte AS ( + WITH basic AS ( + SELECT * FROM events_table WHERE event_type = 5 + ), + basic_2 AS ( + SELECT user_id FROM users_table + ) + INSERT INTO modify_table (SELECT user_id FROM events_table) RETURNING * +) +SELECT * FROM cte; +ERROR: data-modifying statements are not supported in the WITH clauses of distributed queries +DROP SCHEMA with_modifying CASCADE; +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to table modify_table +drop cascades to table users_table diff --git a/src/test/regress/expected/with_nested.out b/src/test/regress/expected/with_nested.out new file mode 100644 index 000000000..cc158b62f --- /dev/null +++ b/src/test/regress/expected/with_nested.out @@ -0,0 +1,408 @@ +-- Complex nested CTEs +CREATE SCHEMA with_nested; +SET search_path tO with_nested, public; +CREATE TABLE with_nested.local_users (user_id int, event_type int); +INSERT INTO local_users VALUES (0, 0), (1, 4), (1, 7), (2, 1), (3, 3), (5, 4), (6, 2), (10, 7); +-- Can refer to outer CTE because it is recursively planned first +WITH cte_1 AS ( + SELECT DISTINCT user_id FROM users_table +), +cte_2 AS ( + WITH cte_1_1 AS ( + SELECT * FROM cte_1 WHERE user_id > 1 + ) + SELECT * FROM cte_1_1 WHERE user_id < 3 +) +SELECT user_id FROM cte_2 LIMIT 1; + user_id +--------- + 2 +(1 row) + +-- Nested CTEs +WITH users_events AS ( + WITH users_events_2 AS ( + SELECT + users_table.user_id as user_id, + events_table.event_type as event_type + FROM + users_table, + events_table + WHERE + users_table.user_id = events_table.user_id + GROUP BY + users_table.user_id, + events_table.event_type + ) + SELECT + u_events.user_id, events_table.event_type + FROM + users_events_2 as u_events, + events_table + WHERE + u_events.user_id = events_table.user_id + ) +SELECT + * +FROM + users_events +ORDER BY + 1, 2 +LIMIT 20; + user_id | event_type +---------+------------ + 1 | 0 + 1 | 0 + 1 | 0 + 1 | 0 + 1 | 0 + 1 | 0 + 1 | 0 + 1 | 0 + 1 | 0 + 1 | 0 + 1 | 0 + 1 | 0 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 +(20 rows) + +-- Nested CTEs +WITH users_events AS ( + WITH users_events AS ( + WITH users_events AS ( + WITH users_events AS ( + WITH users_events AS ( + WITH users_events AS ( + WITH users_events AS ( + SELECT + users_table.user_id as user_id, + events_table.event_type as event_type + FROM + users_table, + events_table + WHERE + users_table.user_id = events_table.user_id + GROUP BY + users_table.user_id, + events_table.event_type + + ) + SELECT + uid, event_type, value_2, value_3 + FROM + ( + ( + SELECT + user_id as uid + FROM + users_events + ) users + join + events_table + on + users.uid = events_table.event_type + ) a + ) + SELECT + * + FROM + users_events + ORDER BY + 1, 3, 2, 4 + LIMIT 100 + ) + SELECT + * + FROM + users_events + LIMIT 90 + ) + SELECT + * + FROM + users_events + LIMIT 50 + ) + SELECT + uid, event_type, value_2, sum(value_3) as sum_3 + FROM + users_events + GROUP BY + 1, 2, 3 + LIMIT 40 + ) + SELECT + uid, event_type, sum(value_2) as sum_2, sum(sum_3) as sum_3 + FROM + users_events + GROUP BY + 1, 2 + LIMIT 30 +) +SELECT + uid, avg(event_type), sum(sum_2), sum(sum_3) +FROM + users_events +GROUP BY + 1; + uid | avg | sum | sum +-----+------------------------+-----+----- + 1 | 1.00000000000000000000 | 3 | 72 +(1 row) + +-- Nested CTEs +WITH users_events AS ( + -- router select query + WITH users_events_1 AS ( + SELECT + * + FROM + users_table + WHERE + user_id = 1 + ), + -- real-time select query + users_events_2_3 AS ( + SELECT + * + FROM + users_table + WHERE + user_id = 2 + OR + user_id = 3 + ), + -- router select query + -- sub CTE is a real-time executor query but the top level is router select + users_events_4 AS ( + WITH users_events_4_5 AS ( + SELECT + * + FROM + users_table + WHERE + user_id = 4 + OR + user_id = 5 + ) + SELECT + * + FROM + users_events_4_5 + WHERE + user_id = 4 + ), + -- merge all the results from CTEs + merged_users AS ( + SELECT + * + FROM + users_events_1 + UNION + SELECT + * + FROM + users_events_2_3 + UNION + SELECT + * + FROM + users_events_4 + ) + SELECT + * + FROM + merged_users +) +SELECT + * +FROM + users_events +ORDER BY + 1, 2, 3, 4, 5, 6 +LIMIT + 20; + user_id | time | value_1 | value_2 | value_3 | value_4 +---------+---------------------------------+---------+---------+---------+--------- + 1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 | + 1 | Thu Nov 23 03:32:50.803031 2017 | 3 | 2 | 1 | + 1 | Thu Nov 23 09:26:42.145043 2017 | 1 | 3 | 3 | + 1 | Thu Nov 23 11:11:24.40789 2017 | 3 | 4 | 0 | + 1 | Thu Nov 23 11:44:57.515981 2017 | 4 | 3 | 4 | + 1 | Thu Nov 23 17:23:03.441394 2017 | 5 | 4 | 3 | + 1 | Thu Nov 23 17:30:34.635085 2017 | 3 | 4 | 4 | + 2 | Wed Nov 22 18:19:49.944985 2017 | 3 | 5 | 1 | + 2 | Thu Nov 23 00:19:14.138058 2017 | 3 | 4 | 0 | + 2 | Thu Nov 23 01:04:26.198826 2017 | 4 | 3 | 4 | + 2 | Thu Nov 23 01:14:27.658529 2017 | 4 | 4 | 4 | + 2 | Thu Nov 23 03:27:50.327051 2017 | 2 | 2 | 0 | + 2 | Thu Nov 23 06:01:08.148777 2017 | 2 | 4 | 2 | + 2 | Thu Nov 23 06:23:53.572592 2017 | 4 | 4 | 5 | + 2 | Thu Nov 23 06:50:30.797805 2017 | 1 | 1 | 1 | + 2 | Thu Nov 23 06:56:38.46819 2017 | 0 | 1 | 3 | + 2 | Thu Nov 23 08:22:22.169158 2017 | 4 | 2 | 5 | + 2 | Thu Nov 23 08:49:47.029236 2017 | 4 | 2 | 4 | + 2 | Thu Nov 23 09:54:28.13665 2017 | 0 | 3 | 4 | + 2 | Thu Nov 23 09:59:08.975079 2017 | 2 | 2 | 4 | +(20 rows) + +-- Nested CTEs - joined with local table. Not supported yet. +WITH users_events AS ( + -- router select query + WITH users_events_1 AS ( + SELECT + * + FROM + users_table + WHERE + user_id = 1 + ), + -- real-time select query + users_events_2_3 AS ( + SELECT + * + FROM + users_table + WHERE + user_id = 2 + OR + user_id = 3 + ), + -- router select query + -- sub CTE is a real-time executor query but the top level is router select + users_events_4 AS ( + WITH users_events_4_5 AS ( + SELECT + * + FROM + users_table + WHERE + user_id = 4 + OR + user_id = 5 + ) + SELECT + * + FROM + users_events_4_5 + WHERE + user_id = 4 + ) + -- merge all the results from CTEs + SELECT + * + FROM + users_events_1 + UNION + SELECT + * + FROM + users_events_2_3 + UNION + SELECT + * + FROM + users_events_4 +) +SELECT + * +FROM + users_events +ORDER BY + 1, 2, 3, 4, 5, 6 +LIMIT + 20; + user_id | time | value_1 | value_2 | value_3 | value_4 +---------+---------------------------------+---------+---------+---------+--------- + 1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 | + 1 | Thu Nov 23 03:32:50.803031 2017 | 3 | 2 | 1 | + 1 | Thu Nov 23 09:26:42.145043 2017 | 1 | 3 | 3 | + 1 | Thu Nov 23 11:11:24.40789 2017 | 3 | 4 | 0 | + 1 | Thu Nov 23 11:44:57.515981 2017 | 4 | 3 | 4 | + 1 | Thu Nov 23 17:23:03.441394 2017 | 5 | 4 | 3 | + 1 | Thu Nov 23 17:30:34.635085 2017 | 3 | 4 | 4 | + 2 | Wed Nov 22 18:19:49.944985 2017 | 3 | 5 | 1 | + 2 | Thu Nov 23 00:19:14.138058 2017 | 3 | 4 | 0 | + 2 | Thu Nov 23 01:04:26.198826 2017 | 4 | 3 | 4 | + 2 | Thu Nov 23 01:14:27.658529 2017 | 4 | 4 | 4 | + 2 | Thu Nov 23 03:27:50.327051 2017 | 2 | 2 | 0 | + 2 | Thu Nov 23 06:01:08.148777 2017 | 2 | 4 | 2 | + 2 | Thu Nov 23 06:23:53.572592 2017 | 4 | 4 | 5 | + 2 | Thu Nov 23 06:50:30.797805 2017 | 1 | 1 | 1 | + 2 | Thu Nov 23 06:56:38.46819 2017 | 0 | 1 | 3 | + 2 | Thu Nov 23 08:22:22.169158 2017 | 4 | 2 | 5 | + 2 | Thu Nov 23 08:49:47.029236 2017 | 4 | 2 | 4 | + 2 | Thu Nov 23 09:54:28.13665 2017 | 0 | 3 | 4 | + 2 | Thu Nov 23 09:59:08.975079 2017 | 2 | 2 | 4 | +(20 rows) + +-- access to uncle, use window function, apply aggregates, use group by, LIMIT/OFFSET +WITH cte1 AS ( + WITH cte1_1 AS ( + WITH cte1_1_1 AS ( + SELECT user_id, time, value_2 FROM users_table + ), + cte1_1_2 AS ( + SELECT + user_id, count + FROM ( + SELECT + user_id, + count(value_2) OVER (PARTITION BY user_id) + FROM + users_table + GROUP BY 1, users_table.value_2 + )aa + GROUP BY + 1,2 + ORDER BY + 1,2 + LIMIT + 4 + OFFSET + 2 + ) + SELECT cte1_1_1.user_id, cte1_1_1.time, cte1_1_2.count FROM cte1_1_1 join cte1_1_2 on cte1_1_1.user_id=cte1_1_2.user_id + ), + cte1_2 AS ( + WITH cte1_2_1 AS ( + SELECT + user_id, time, avg(value_1)::real as value_1, min(value_2) as value_2 + FROM + users_table + GROUP BY + 1, 2 + ), + cte1_2_2 AS ( + SELECT cte1_2_1.user_id, cte1_1.time, cte1_2_1.value_1, cte1_1.count FROM cte1_2_1 join cte1_1 on cte1_2_1.time=cte1_1.time and cte1_2_1.user_id=cte1_1.user_id + ) + SELECT * FROM cte1_2_2 + ) + SELECT * FROM cte1_2 +), +cte2 AS ( + WITH cte2_1 AS ( + WITH cte2_1_1 AS ( + SELECT * FROM cte1 + ) + SELECT user_id, time, value_1, min(count) FROM cte2_1_1 GROUP BY 1, 2, 3 ORDER BY 1,2,3 + ) + SELECT * FROM cte2_1 LIMIT 3 OFFSET 2 +) +SELECT * FROM cte2; + user_id | time | value_1 | min +---------+---------------------------------+---------+----- + 3 | Wed Nov 22 23:24:32.080584 2017 | 3 | 5 + 3 | Thu Nov 23 00:15:45.610845 2017 | 1 | 5 + 3 | Thu Nov 23 03:23:24.702501 2017 | 1 | 5 +(3 rows) + +DROP SCHEMA with_nested CASCADE; +NOTICE: drop cascades to table local_users diff --git a/src/test/regress/expected/with_partitioning.out b/src/test/regress/expected/with_partitioning.out new file mode 100644 index 000000000..db812304d --- /dev/null +++ b/src/test/regress/expected/with_partitioning.out @@ -0,0 +1,94 @@ +CREATE SCHEMA with_partitioning; +SET search_path TO with_partitioning, public; +SET citus.shard_replication_factor TO 1; +CREATE TABLE with_partitioning.local_users_2 (user_id int, event_type int); +INSERT INTO local_users_2 VALUES (0, 0), (1, 4), (1, 7), (2, 1), (3, 3), (5, 4), (6, 2), (10, 7); +CREATE TABLE with_partitioning.partitioning_test(id int, time date) PARTITION BY RANGE (time); + +-- create its partitions +CREATE TABLE with_partitioning.partitioning_test_2017 PARTITION OF partitioning_test FOR VALUES FROM ('2017-01-01') TO ('2018-01-01'); +CREATE TABLE with_partitioning.partitioning_test_2010 PARTITION OF partitioning_test FOR VALUES FROM ('2010-01-01') TO ('2011-01-01'); +-- load some data and distribute tables +INSERT INTO partitioning_test VALUES (1, '2017-11-23'); +INSERT INTO partitioning_test VALUES (2, '2010-07-07'); +INSERT INTO partitioning_test_2017 VALUES (3, '2017-11-22'); +INSERT INTO partitioning_test_2010 VALUES (4, '2010-03-03'); +-- distribute partitioned table +SELECT create_distributed_table('with_partitioning.partitioning_test', 'id'); +NOTICE: Copying data from local table... +NOTICE: Copying data from local table... + create_distributed_table +-------------------------- + +(1 row) + +-- Join of a CTE on distributed table and then join with a partitioned table +WITH cte AS ( + SELECT * FROM users_table +) +SELECT DISTINCT ON (id) id, cte.time FROM cte join partitioning_test on cte.time::date=partitioning_test.time ORDER BY 1, 2 LIMIT 3; + id | time +----+--------------------------------- + 1 | Thu Nov 23 00:07:11.068353 2017 + 3 | Wed Nov 22 18:19:49.944985 2017 +(2 rows) + +-- Join of a CTE on distributed table and then join with a partitioned table hitting on only one partition +WITH cte AS ( + SELECT * FROM users_table +) +SELECT DISTINCT ON (id) id, cte.time FROM cte join partitioning_test on cte.time::date=partitioning_test.time WHERE partitioning_test.time >'2017-11-20' ORDER BY 1, 2 LIMIT 3; + id | time +----+--------------------------------- + 1 | Thu Nov 23 00:07:11.068353 2017 + 3 | Wed Nov 22 18:19:49.944985 2017 +(2 rows) + +-- Join with a distributed table and then join of two CTEs +WITH cte AS ( + SELECT id, time FROM partitioning_test +), +cte_2 AS ( + SELECT * FROM partitioning_test WHERE id > 2 +), +cte_joined AS ( + SELECT user_id, cte_2.time FROM users_table join cte_2 on (users_table.time::date = cte_2.time) +), +cte_joined_2 AS ( + SELECT user_id, cte_joined.time FROM cte_joined join cte on (cte_joined.time = cte.time) +) +SELECT DISTINCT ON (event_type) event_type, cte_joined_2.user_id FROM events_table join cte_joined_2 on (cte_joined_2.time=events_table.time::date) ORDER BY 1, 2 LIMIT 10 OFFSET 2; + event_type | user_id +------------+--------- + 2 | 1 + 3 | 1 + 4 | 1 + 5 | 1 +(4 rows) + +-- Join a partitioned table with a local table (both in CTEs) +-- and then with a distributed table. After all join with a +-- partitioned table again +WITH cte AS ( + SELECT id, time FROM partitioning_test +), +cte_2 AS ( + SELECT * FROM local_users_2 +), +cte_joined AS ( + SELECT user_id, cte.time FROM cte join cte_2 on (cte.id = cte_2.user_id) +), +cte_joined_2 AS ( + SELECT users_table.user_id, cte_joined.time FROM cte_joined join users_table on (cte_joined.time = users_table.time::date) +) +SELECT DISTINCT ON (id) id, cte_joined_2.time FROM cte_joined_2 join partitioning_test on (cte_joined_2.time=partitioning_test.time) ORDER BY 1, 2; + id | time +----+------------ + 1 | 11-23-2017 + 3 | 11-22-2017 +(2 rows) + +DROP SCHEMA with_partitioning CASCADE; +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to table local_users_2 +drop cascades to table partitioning_test diff --git a/src/test/regress/expected/with_partitioning_0.out b/src/test/regress/expected/with_partitioning_0.out new file mode 100644 index 000000000..c7b95adb8 --- /dev/null +++ b/src/test/regress/expected/with_partitioning_0.out @@ -0,0 +1,95 @@ +CREATE SCHEMA with_partitioning; +SET search_path TO with_partitioning, public; +SET citus.shard_replication_factor TO 1; +CREATE TABLE with_partitioning.local_users_2 (user_id int, event_type int); +INSERT INTO local_users_2 VALUES (0, 0), (1, 4), (1, 7), (2, 1), (3, 3), (5, 4), (6, 2), (10, 7); +CREATE TABLE with_partitioning.partitioning_test(id int, time date) PARTITION BY RANGE (time); +ERROR: syntax error at or near "PARTITION" +LINE 1: ...partitioning.partitioning_test(id int, time date) PARTITION ... + ^ + +-- create its partitions +CREATE TABLE with_partitioning.partitioning_test_2017 PARTITION OF partitioning_test FOR VALUES FROM ('2017-01-01') TO ('2018-01-01'); +ERROR: syntax error at or near "PARTITION" +LINE 1: ...TE TABLE with_partitioning.partitioning_test_2017 PARTITION ... + ^ +CREATE TABLE with_partitioning.partitioning_test_2010 PARTITION OF partitioning_test FOR VALUES FROM ('2010-01-01') TO ('2011-01-01'); +ERROR: syntax error at or near "PARTITION" +LINE 1: ...TE TABLE with_partitioning.partitioning_test_2010 PARTITION ... + ^ +-- load some data and distribute tables +INSERT INTO partitioning_test VALUES (1, '2017-11-23'); +ERROR: relation "partitioning_test" does not exist +LINE 1: INSERT INTO partitioning_test VALUES (1, '2017-11-23'); + ^ +INSERT INTO partitioning_test VALUES (2, '2010-07-07'); +ERROR: relation "partitioning_test" does not exist +LINE 1: INSERT INTO partitioning_test VALUES (2, '2010-07-07'); + ^ +INSERT INTO partitioning_test_2017 VALUES (3, '2017-11-22'); +ERROR: relation "partitioning_test_2017" does not exist +LINE 1: INSERT INTO partitioning_test_2017 VALUES (3, '2017-11-22'); + ^ +INSERT INTO partitioning_test_2010 VALUES (4, '2010-03-03'); +ERROR: relation "partitioning_test_2010" does not exist +LINE 1: INSERT INTO partitioning_test_2010 VALUES (4, '2010-03-03'); + ^ +-- distribute partitioned table +SELECT create_distributed_table('with_partitioning.partitioning_test', 'id'); +ERROR: relation "with_partitioning.partitioning_test" does not exist +LINE 1: SELECT create_distributed_table('with_partitioning.partition... + ^ +-- Join of a CTE on distributed table and then join with a partitioned table +WITH cte AS ( + SELECT * FROM users_table +) +SELECT DISTINCT ON (id) id, cte.time FROM cte join partitioning_test on cte.time::date=partitioning_test.time ORDER BY 1, 2 LIMIT 3; +ERROR: relation "partitioning_test" does not exist +LINE 4: ...ELECT DISTINCT ON (id) id, cte.time FROM cte join partitioni... + ^ +-- Join of a CTE on distributed table and then join with a partitioned table hitting on only one partition +WITH cte AS ( + SELECT * FROM users_table +) +SELECT DISTINCT ON (id) id, cte.time FROM cte join partitioning_test on cte.time::date=partitioning_test.time WHERE partitioning_test.time >'2017-11-20' ORDER BY 1, 2 LIMIT 3; +ERROR: relation "partitioning_test" does not exist +LINE 4: ...ELECT DISTINCT ON (id) id, cte.time FROM cte join partitioni... + ^ +-- Join with a distributed table and then join of two CTEs +WITH cte AS ( + SELECT id, time FROM partitioning_test +), +cte_2 AS ( + SELECT * FROM partitioning_test WHERE id > 2 +), +cte_joined AS ( + SELECT user_id, cte_2.time FROM users_table join cte_2 on (users_table.time::date = cte_2.time) +), +cte_joined_2 AS ( + SELECT user_id, cte_joined.time FROM cte_joined join cte on (cte_joined.time = cte.time) +) +SELECT DISTINCT ON (event_type) event_type, cte_joined_2.user_id FROM events_table join cte_joined_2 on (cte_joined_2.time=events_table.time::date) ORDER BY 1, 2 LIMIT 10 OFFSET 2; +ERROR: relation "partitioning_test" does not exist +LINE 2: SELECT id, time FROM partitioning_test + ^ +-- Join a partitioned table with a local table (both in CTEs) +-- and then with a distributed table. After all join with a +-- partitioned table again +WITH cte AS ( + SELECT id, time FROM partitioning_test +), +cte_2 AS ( + SELECT * FROM local_users_2 +), +cte_joined AS ( + SELECT user_id, cte.time FROM cte join cte_2 on (cte.id = cte_2.user_id) +), +cte_joined_2 AS ( + SELECT users_table.user_id, cte_joined.time FROM cte_joined join users_table on (cte_joined.time = users_table.time::date) +) +SELECT DISTINCT ON (id) id, cte_joined_2.time FROM cte_joined_2 join partitioning_test on (cte_joined_2.time=partitioning_test.time) ORDER BY 1, 2; +ERROR: relation "partitioning_test" does not exist +LINE 2: SELECT id, time FROM partitioning_test + ^ +DROP SCHEMA with_partitioning CASCADE; +NOTICE: drop cascades to table local_users_2 diff --git a/src/test/regress/expected/with_prepare.out b/src/test/regress/expected/with_prepare.out new file mode 100644 index 000000000..629607c72 --- /dev/null +++ b/src/test/regress/expected/with_prepare.out @@ -0,0 +1,547 @@ +-- prepared statements +PREPARE prepared_test_1 AS +WITH basic AS( + SELECT * FROM users_table +) +SELECT + * +FROM + basic +WHERE + basic.value_2 IN (1, 2, 3) +ORDER BY + 1, 2, 3, 4, 5, 6 +LIMIT 10; +PREPARE prepared_test_2 AS +WITH users_events AS( + SELECT + users_table.user_id as user_id, + events_table.event_type as event_type + FROM + users_table, + events_table + WHERE + users_table.user_id = events_table.user_id + GROUP BY + users_table.user_id, + events_table.event_type +), +event_attendee_count AS( + SELECT + event_type, count(user_id) + FROM + users_events + GROUP BY + 1 +), +user_coolness AS( + SELECT + user_id, + sum(count) + FROM + users_events + join + event_attendee_count + on (users_events.event_type = event_attendee_count.event_type) + GROUP BY + user_id +) +SELECT + * +FROM + user_coolness +ORDER BY + 2, 1 +LIMIT + 10; +PREPARE prepared_test_3(integer) AS +WITH users_events AS( + -- events 1 and 2 only + WITH spec_events AS( + SELECT + * + FROM + events_table + WHERE + event_type IN (1, 2) + ) + -- users who have done 1 or 2 + SELECT + users_table.user_id, + spec_events.event_type + FROM + users_table + join + spec_events + on (users_table.user_id = spec_events.user_id) + ORDER BY + 1, + event_type +), +event_attendee_count AS( + -- distinct attendee count of each event in users_event + WITH event_attendee_count AS( + SELECT + event_type, count(user_id) + FROM + users_events + GROUP BY + 1 + ) + -- distinct attendee count of first 3 events + SELECT + * + FROM + event_attendee_count + ORDER BY + event_type + LIMIT 3 +), +-- measure how cool an attendee is by checking the number of events he attended +user_coolness AS( + SELECT + user_id, + sum(count) + FROM + users_events + join + event_attendee_count + on (users_events.event_type = $1) + GROUP BY + user_id +) +SELECT + * +FROM + user_coolness +ORDER BY + 2, 1 +LIMIT + 10; +PREPARE prepared_test_4(integer, integer, integer) AS +WITH basic AS( + SELECT * FROM users_table WHERE value_2 IN ($1, $2, $3) +) +SELECT + * +FROM + basic +ORDER BY + 1, 2, 3, 4, 5, 6 +LIMIT 10; +-- prepared statement which inserts in a CTE should fail +PREPARE prepared_partition_column_insert(integer) AS +WITH prepared_insert AS ( + INSERT INTO users_table VALUES ($1) RETURNING * +) +SELECT * FROM prepared_insert; +PREPARE prepared_test_5(integer, integer, integer) AS +-- router select query +WITH users_events_1 AS ( + SELECT + * + FROM + users_table + WHERE + user_id = $1 +), +-- real-time select query +users_events_2_3 AS ( + SELECT + * + FROM + users_table + WHERE + user_id = $2 + OR + user_id = $3 +), +merged_users AS ( + SELECT + * + FROM + users_events_1 + UNION + SELECT + * + FROM + users_events_2_3 +) +SELECT + * +FROM + merged_users +ORDER BY + 1, 2, 3, 4, 5, 6 +LIMIT 10; +EXECUTE prepared_test_1; + user_id | time | value_1 | value_2 | value_3 | value_4 +---------+---------------------------------+---------+---------+---------+--------- + 1 | Thu Nov 23 03:32:50.803031 2017 | 3 | 2 | 1 | + 1 | Thu Nov 23 09:26:42.145043 2017 | 1 | 3 | 3 | + 1 | Thu Nov 23 11:44:57.515981 2017 | 4 | 3 | 4 | + 2 | Thu Nov 23 01:04:26.198826 2017 | 4 | 3 | 4 | + 2 | Thu Nov 23 03:27:50.327051 2017 | 2 | 2 | 0 | + 2 | Thu Nov 23 06:50:30.797805 2017 | 1 | 1 | 1 | + 2 | Thu Nov 23 06:56:38.46819 2017 | 0 | 1 | 3 | + 2 | Thu Nov 23 08:22:22.169158 2017 | 4 | 2 | 5 | + 2 | Thu Nov 23 08:49:47.029236 2017 | 4 | 2 | 4 | + 2 | Thu Nov 23 09:54:28.13665 2017 | 0 | 3 | 4 | +(10 rows) + +EXECUTE prepared_test_1; + user_id | time | value_1 | value_2 | value_3 | value_4 +---------+---------------------------------+---------+---------+---------+--------- + 1 | Thu Nov 23 03:32:50.803031 2017 | 3 | 2 | 1 | + 1 | Thu Nov 23 09:26:42.145043 2017 | 1 | 3 | 3 | + 1 | Thu Nov 23 11:44:57.515981 2017 | 4 | 3 | 4 | + 2 | Thu Nov 23 01:04:26.198826 2017 | 4 | 3 | 4 | + 2 | Thu Nov 23 03:27:50.327051 2017 | 2 | 2 | 0 | + 2 | Thu Nov 23 06:50:30.797805 2017 | 1 | 1 | 1 | + 2 | Thu Nov 23 06:56:38.46819 2017 | 0 | 1 | 3 | + 2 | Thu Nov 23 08:22:22.169158 2017 | 4 | 2 | 5 | + 2 | Thu Nov 23 08:49:47.029236 2017 | 4 | 2 | 4 | + 2 | Thu Nov 23 09:54:28.13665 2017 | 0 | 3 | 4 | +(10 rows) + +EXECUTE prepared_test_1; + user_id | time | value_1 | value_2 | value_3 | value_4 +---------+---------------------------------+---------+---------+---------+--------- + 1 | Thu Nov 23 03:32:50.803031 2017 | 3 | 2 | 1 | + 1 | Thu Nov 23 09:26:42.145043 2017 | 1 | 3 | 3 | + 1 | Thu Nov 23 11:44:57.515981 2017 | 4 | 3 | 4 | + 2 | Thu Nov 23 01:04:26.198826 2017 | 4 | 3 | 4 | + 2 | Thu Nov 23 03:27:50.327051 2017 | 2 | 2 | 0 | + 2 | Thu Nov 23 06:50:30.797805 2017 | 1 | 1 | 1 | + 2 | Thu Nov 23 06:56:38.46819 2017 | 0 | 1 | 3 | + 2 | Thu Nov 23 08:22:22.169158 2017 | 4 | 2 | 5 | + 2 | Thu Nov 23 08:49:47.029236 2017 | 4 | 2 | 4 | + 2 | Thu Nov 23 09:54:28.13665 2017 | 0 | 3 | 4 | +(10 rows) + +EXECUTE prepared_test_1; + user_id | time | value_1 | value_2 | value_3 | value_4 +---------+---------------------------------+---------+---------+---------+--------- + 1 | Thu Nov 23 03:32:50.803031 2017 | 3 | 2 | 1 | + 1 | Thu Nov 23 09:26:42.145043 2017 | 1 | 3 | 3 | + 1 | Thu Nov 23 11:44:57.515981 2017 | 4 | 3 | 4 | + 2 | Thu Nov 23 01:04:26.198826 2017 | 4 | 3 | 4 | + 2 | Thu Nov 23 03:27:50.327051 2017 | 2 | 2 | 0 | + 2 | Thu Nov 23 06:50:30.797805 2017 | 1 | 1 | 1 | + 2 | Thu Nov 23 06:56:38.46819 2017 | 0 | 1 | 3 | + 2 | Thu Nov 23 08:22:22.169158 2017 | 4 | 2 | 5 | + 2 | Thu Nov 23 08:49:47.029236 2017 | 4 | 2 | 4 | + 2 | Thu Nov 23 09:54:28.13665 2017 | 0 | 3 | 4 | +(10 rows) + +EXECUTE prepared_test_1; + user_id | time | value_1 | value_2 | value_3 | value_4 +---------+---------------------------------+---------+---------+---------+--------- + 1 | Thu Nov 23 03:32:50.803031 2017 | 3 | 2 | 1 | + 1 | Thu Nov 23 09:26:42.145043 2017 | 1 | 3 | 3 | + 1 | Thu Nov 23 11:44:57.515981 2017 | 4 | 3 | 4 | + 2 | Thu Nov 23 01:04:26.198826 2017 | 4 | 3 | 4 | + 2 | Thu Nov 23 03:27:50.327051 2017 | 2 | 2 | 0 | + 2 | Thu Nov 23 06:50:30.797805 2017 | 1 | 1 | 1 | + 2 | Thu Nov 23 06:56:38.46819 2017 | 0 | 1 | 3 | + 2 | Thu Nov 23 08:22:22.169158 2017 | 4 | 2 | 5 | + 2 | Thu Nov 23 08:49:47.029236 2017 | 4 | 2 | 4 | + 2 | Thu Nov 23 09:54:28.13665 2017 | 0 | 3 | 4 | +(10 rows) + +EXECUTE prepared_test_1; + user_id | time | value_1 | value_2 | value_3 | value_4 +---------+---------------------------------+---------+---------+---------+--------- + 1 | Thu Nov 23 03:32:50.803031 2017 | 3 | 2 | 1 | + 1 | Thu Nov 23 09:26:42.145043 2017 | 1 | 3 | 3 | + 1 | Thu Nov 23 11:44:57.515981 2017 | 4 | 3 | 4 | + 2 | Thu Nov 23 01:04:26.198826 2017 | 4 | 3 | 4 | + 2 | Thu Nov 23 03:27:50.327051 2017 | 2 | 2 | 0 | + 2 | Thu Nov 23 06:50:30.797805 2017 | 1 | 1 | 1 | + 2 | Thu Nov 23 06:56:38.46819 2017 | 0 | 1 | 3 | + 2 | Thu Nov 23 08:22:22.169158 2017 | 4 | 2 | 5 | + 2 | Thu Nov 23 08:49:47.029236 2017 | 4 | 2 | 4 | + 2 | Thu Nov 23 09:54:28.13665 2017 | 0 | 3 | 4 | +(10 rows) + +EXECUTE prepared_test_2; + user_id | sum +---------+----- + 1 | 29 + 3 | 29 + 6 | 29 + 2 | 33 + 4 | 33 + 5 | 33 +(6 rows) + +EXECUTE prepared_test_2; + user_id | sum +---------+----- + 1 | 29 + 3 | 29 + 6 | 29 + 2 | 33 + 4 | 33 + 5 | 33 +(6 rows) + +EXECUTE prepared_test_2; + user_id | sum +---------+----- + 1 | 29 + 3 | 29 + 6 | 29 + 2 | 33 + 4 | 33 + 5 | 33 +(6 rows) + +EXECUTE prepared_test_2; + user_id | sum +---------+----- + 1 | 29 + 3 | 29 + 6 | 29 + 2 | 33 + 4 | 33 + 5 | 33 +(6 rows) + +EXECUTE prepared_test_2; + user_id | sum +---------+----- + 1 | 29 + 3 | 29 + 6 | 29 + 2 | 33 + 4 | 33 + 5 | 33 +(6 rows) + +EXECUTE prepared_test_2; + user_id | sum +---------+----- + 1 | 29 + 3 | 29 + 6 | 29 + 2 | 33 + 4 | 33 + 5 | 33 +(6 rows) + +EXECUTE prepared_test_3(1); + user_id | sum +---------+-------- + 1 | 10850 + 6 | 15500 + 3 | 52700 + 4 | 71300 + 2 | 97650 + 5 | 100750 +(6 rows) + +EXECUTE prepared_test_3(2); + user_id | sum +---------+------- + 1 | 10850 + 6 | 15500 + 5 | 20150 + 2 | 41850 + 4 | 71300 + 3 | 92225 +(6 rows) + +EXECUTE prepared_test_3(3); + user_id | sum +---------+----- +(0 rows) + +EXECUTE prepared_test_3(4); + user_id | sum +---------+----- +(0 rows) + +EXECUTE prepared_test_3(5); + user_id | sum +---------+----- +(0 rows) + +EXECUTE prepared_test_3(6); + user_id | sum +---------+----- +(0 rows) + +EXECUTE prepared_test_4(1, 2, 3); + user_id | time | value_1 | value_2 | value_3 | value_4 +---------+---------------------------------+---------+---------+---------+--------- + 1 | Thu Nov 23 03:32:50.803031 2017 | 3 | 2 | 1 | + 1 | Thu Nov 23 09:26:42.145043 2017 | 1 | 3 | 3 | + 1 | Thu Nov 23 11:44:57.515981 2017 | 4 | 3 | 4 | + 2 | Thu Nov 23 01:04:26.198826 2017 | 4 | 3 | 4 | + 2 | Thu Nov 23 03:27:50.327051 2017 | 2 | 2 | 0 | + 2 | Thu Nov 23 06:50:30.797805 2017 | 1 | 1 | 1 | + 2 | Thu Nov 23 06:56:38.46819 2017 | 0 | 1 | 3 | + 2 | Thu Nov 23 08:22:22.169158 2017 | 4 | 2 | 5 | + 2 | Thu Nov 23 08:49:47.029236 2017 | 4 | 2 | 4 | + 2 | Thu Nov 23 09:54:28.13665 2017 | 0 | 3 | 4 | +(10 rows) + +EXECUTE prepared_test_4(2, 3, 4); + user_id | time | value_1 | value_2 | value_3 | value_4 +---------+---------------------------------+---------+---------+---------+--------- + 1 | Thu Nov 23 03:32:50.803031 2017 | 3 | 2 | 1 | + 1 | Thu Nov 23 09:26:42.145043 2017 | 1 | 3 | 3 | + 1 | Thu Nov 23 11:11:24.40789 2017 | 3 | 4 | 0 | + 1 | Thu Nov 23 11:44:57.515981 2017 | 4 | 3 | 4 | + 1 | Thu Nov 23 17:23:03.441394 2017 | 5 | 4 | 3 | + 1 | Thu Nov 23 17:30:34.635085 2017 | 3 | 4 | 4 | + 2 | Thu Nov 23 00:19:14.138058 2017 | 3 | 4 | 0 | + 2 | Thu Nov 23 01:04:26.198826 2017 | 4 | 3 | 4 | + 2 | Thu Nov 23 01:14:27.658529 2017 | 4 | 4 | 4 | + 2 | Thu Nov 23 03:27:50.327051 2017 | 2 | 2 | 0 | +(10 rows) + +EXECUTE prepared_test_4(3, 4, 5); + user_id | time | value_1 | value_2 | value_3 | value_4 +---------+---------------------------------+---------+---------+---------+--------- + 1 | Thu Nov 23 09:26:42.145043 2017 | 1 | 3 | 3 | + 1 | Thu Nov 23 11:11:24.40789 2017 | 3 | 4 | 0 | + 1 | Thu Nov 23 11:44:57.515981 2017 | 4 | 3 | 4 | + 1 | Thu Nov 23 17:23:03.441394 2017 | 5 | 4 | 3 | + 1 | Thu Nov 23 17:30:34.635085 2017 | 3 | 4 | 4 | + 2 | Wed Nov 22 18:19:49.944985 2017 | 3 | 5 | 1 | + 2 | Thu Nov 23 00:19:14.138058 2017 | 3 | 4 | 0 | + 2 | Thu Nov 23 01:04:26.198826 2017 | 4 | 3 | 4 | + 2 | Thu Nov 23 01:14:27.658529 2017 | 4 | 4 | 4 | + 2 | Thu Nov 23 06:01:08.148777 2017 | 2 | 4 | 2 | +(10 rows) + +EXECUTE prepared_test_4(4, 5, 6); + user_id | time | value_1 | value_2 | value_3 | value_4 +---------+---------------------------------+---------+---------+---------+--------- + 1 | Thu Nov 23 11:11:24.40789 2017 | 3 | 4 | 0 | + 1 | Thu Nov 23 17:23:03.441394 2017 | 5 | 4 | 3 | + 1 | Thu Nov 23 17:30:34.635085 2017 | 3 | 4 | 4 | + 2 | Wed Nov 22 18:19:49.944985 2017 | 3 | 5 | 1 | + 2 | Thu Nov 23 00:19:14.138058 2017 | 3 | 4 | 0 | + 2 | Thu Nov 23 01:14:27.658529 2017 | 4 | 4 | 4 | + 2 | Thu Nov 23 06:01:08.148777 2017 | 2 | 4 | 2 | + 2 | Thu Nov 23 06:23:53.572592 2017 | 4 | 4 | 5 | + 2 | Thu Nov 23 11:41:04.042936 2017 | 3 | 4 | 1 | + 2 | Thu Nov 23 11:48:24.943542 2017 | 0 | 5 | 5 | +(10 rows) + +EXECUTE prepared_test_4(5, 6, 7); + user_id | time | value_1 | value_2 | value_3 | value_4 +---------+---------------------------------+---------+---------+---------+--------- + 2 | Wed Nov 22 18:19:49.944985 2017 | 3 | 5 | 1 | + 2 | Thu Nov 23 11:48:24.943542 2017 | 0 | 5 | 5 | + 3 | Thu Nov 23 04:01:08.04806 2017 | 5 | 5 | 3 | + 3 | Thu Nov 23 05:01:44.885505 2017 | 3 | 5 | 4 | + 3 | Thu Nov 23 11:31:17.403189 2017 | 4 | 5 | 3 | + 3 | Thu Nov 23 11:41:21.157066 2017 | 3 | 5 | 3 | + 3 | Thu Nov 23 12:56:49.29191 2017 | 0 | 5 | 1 | + 3 | Thu Nov 23 17:18:51.048758 2017 | 1 | 5 | 5 | + 4 | Thu Nov 23 07:28:42.537255 2017 | 3 | 5 | 3 | + 4 | Thu Nov 23 11:45:39.744961 2017 | 4 | 5 | 4 | +(10 rows) + +EXECUTE prepared_test_4(6, 7, 8); + user_id | time | value_1 | value_2 | value_3 | value_4 +---------+------+---------+---------+---------+--------- +(0 rows) + +EXECUTE prepared_test_5(1, 2, 3); + user_id | time | value_1 | value_2 | value_3 | value_4 +---------+---------------------------------+---------+---------+---------+--------- + 1 | Wed Nov 22 22:51:43.132261 2017 | 4 | 0 | 3 | + 1 | Thu Nov 23 03:32:50.803031 2017 | 3 | 2 | 1 | + 1 | Thu Nov 23 09:26:42.145043 2017 | 1 | 3 | 3 | + 1 | Thu Nov 23 11:11:24.40789 2017 | 3 | 4 | 0 | + 1 | Thu Nov 23 11:44:57.515981 2017 | 4 | 3 | 4 | + 1 | Thu Nov 23 17:23:03.441394 2017 | 5 | 4 | 3 | + 1 | Thu Nov 23 17:30:34.635085 2017 | 3 | 4 | 4 | + 2 | Wed Nov 22 18:19:49.944985 2017 | 3 | 5 | 1 | + 2 | Thu Nov 23 00:19:14.138058 2017 | 3 | 4 | 0 | + 2 | Thu Nov 23 01:04:26.198826 2017 | 4 | 3 | 4 | +(10 rows) + +EXECUTE prepared_test_5(2, 3, 4); + user_id | time | value_1 | value_2 | value_3 | value_4 +---------+---------------------------------+---------+---------+---------+--------- + 2 | Wed Nov 22 18:19:49.944985 2017 | 3 | 5 | 1 | + 2 | Thu Nov 23 00:19:14.138058 2017 | 3 | 4 | 0 | + 2 | Thu Nov 23 01:04:26.198826 2017 | 4 | 3 | 4 | + 2 | Thu Nov 23 01:14:27.658529 2017 | 4 | 4 | 4 | + 2 | Thu Nov 23 03:27:50.327051 2017 | 2 | 2 | 0 | + 2 | Thu Nov 23 06:01:08.148777 2017 | 2 | 4 | 2 | + 2 | Thu Nov 23 06:23:53.572592 2017 | 4 | 4 | 5 | + 2 | Thu Nov 23 06:50:30.797805 2017 | 1 | 1 | 1 | + 2 | Thu Nov 23 06:56:38.46819 2017 | 0 | 1 | 3 | + 2 | Thu Nov 23 08:22:22.169158 2017 | 4 | 2 | 5 | +(10 rows) + +EXECUTE prepared_test_5(3, 4, 5); + user_id | time | value_1 | value_2 | value_3 | value_4 +---------+---------------------------------+---------+---------+---------+--------- + 3 | Wed Nov 22 18:43:51.450263 2017 | 1 | 1 | 4 | + 3 | Wed Nov 22 20:43:31.008625 2017 | 1 | 3 | 2 | + 3 | Wed Nov 22 23:24:32.080584 2017 | 3 | 2 | 5 | + 3 | Thu Nov 23 00:15:45.610845 2017 | 1 | 1 | 4 | + 3 | Thu Nov 23 03:23:24.702501 2017 | 1 | 2 | 5 | + 3 | Thu Nov 23 03:52:32.008895 2017 | 4 | 2 | 0 | + 3 | Thu Nov 23 04:01:08.04806 2017 | 5 | 5 | 3 | + 3 | Thu Nov 23 05:01:44.885505 2017 | 3 | 5 | 4 | + 3 | Thu Nov 23 06:20:05.854857 2017 | 1 | 4 | 2 | + 3 | Thu Nov 23 09:57:41.540228 2017 | 2 | 2 | 3 | +(10 rows) + +EXECUTE prepared_test_5(4, 5, 6); + user_id | time | value_1 | value_2 | value_3 | value_4 +---------+---------------------------------+---------+---------+---------+--------- + 4 | Wed Nov 22 21:33:03.616802 2017 | 5 | 4 | 1 | + 4 | Wed Nov 22 23:48:11.949567 2017 | 2 | 0 | 0 | + 4 | Wed Nov 22 23:59:46.493416 2017 | 3 | 1 | 3 | + 4 | Thu Nov 23 00:28:45.060248 2017 | 4 | 4 | 3 | + 4 | Thu Nov 23 01:55:21.824618 2017 | 3 | 1 | 4 | + 4 | Thu Nov 23 02:14:35.047974 2017 | 4 | 4 | 1 | + 4 | Thu Nov 23 03:34:40.419294 2017 | 1 | 0 | 4 | + 4 | Thu Nov 23 05:42:12.89386 2017 | 2 | 3 | 3 | + 4 | Thu Nov 23 06:39:06.287818 2017 | 3 | 3 | 2 | + 4 | Thu Nov 23 06:50:08.101207 2017 | 2 | 1 | 5 | +(10 rows) + +EXECUTE prepared_test_5(5, 6, 7); + user_id | time | value_1 | value_2 | value_3 | value_4 +---------+---------------------------------+---------+---------+---------+--------- + 5 | Wed Nov 22 20:43:18.667473 2017 | 0 | 3 | 2 | + 5 | Wed Nov 22 21:02:07.575129 2017 | 2 | 0 | 2 | + 5 | Wed Nov 22 22:10:24.315371 2017 | 1 | 2 | 1 | + 5 | Wed Nov 22 22:31:47.62577 2017 | 3 | 1 | 4 | + 5 | Wed Nov 22 23:10:42.777699 2017 | 3 | 4 | 5 | + 5 | Thu Nov 23 00:46:13.498577 2017 | 3 | 2 | 2 | + 5 | Thu Nov 23 00:54:44.192608 2017 | 1 | 3 | 2 | + 5 | Thu Nov 23 02:09:42.27857 2017 | 3 | 2 | 4 | + 5 | Thu Nov 23 02:50:32.678074 2017 | 4 | 2 | 4 | + 5 | Thu Nov 23 06:35:05.166535 2017 | 5 | 5 | 1 | +(10 rows) + +EXECUTE prepared_test_5(6, 7, 8); + user_id | time | value_1 | value_2 | value_3 | value_4 +---------+---------------------------------+---------+---------+---------+--------- + 6 | Wed Nov 22 20:15:53.317797 2017 | 1 | 1 | 1 | + 6 | Wed Nov 22 23:01:24.82289 2017 | 2 | 4 | 1 | + 6 | Thu Nov 23 00:07:11.068353 2017 | 1 | 1 | 4 | + 6 | Thu Nov 23 00:09:44.19812 2017 | 5 | 2 | 0 | + 6 | Thu Nov 23 01:13:50.526322 2017 | 2 | 4 | 1 | + 6 | Thu Nov 23 01:14:55.769581 2017 | 0 | 0 | 5 | + 6 | Thu Nov 23 10:22:11.02918 2017 | 5 | 0 | 5 | + 6 | Thu Nov 23 11:08:04.244582 2017 | 2 | 3 | 2 | + 6 | Thu Nov 23 13:51:16.92838 2017 | 0 | 4 | 2 | + 6 | Thu Nov 23 14:43:18.024104 2017 | 3 | 2 | 5 | +(10 rows) + +EXECUTE prepared_partition_column_insert(1); +ERROR: data-modifying statements are not supported in the WITH clauses of distributed queries +DEALLOCATE ALL; diff --git a/src/test/regress/expected/with_where.out b/src/test/regress/expected/with_where.out new file mode 100644 index 000000000..03fd85f8a --- /dev/null +++ b/src/test/regress/expected/with_where.out @@ -0,0 +1,168 @@ +-- More complex CTEs in WHERE +SET citus.enable_repartition_joins TO on; +-- CTE in WHERE basic +WITH events AS ( + SELECT + event_type + FROM + events_table + WHERE + user_id < 5 + GROUP BY + event_type + ORDER BY + event_type + LIMIT 10 +) +SELECT + count(*) +FROM + events_table +WHERE + event_type +IN + (SELECT + event_type + FROM + events); + count +------- + 101 +(1 row) + +WITH users AS ( + SELECT + events_table.user_id + FROM + events_table, users_table + WHERE + events_table.user_id = users_table.user_id + GROUP BY + 1 + ORDER BY + 1 + LIMIT 10 +) +SELECT + count(*) +FROM + events_table +WHERE + user_id IN + ( + SELECT + * + FROM + users + ); + count +------- + 101 +(1 row) + +WITH users AS ( + SELECT + events_table.user_id + FROM + events_table, users_table + WHERE + events_table.user_id = users_table.user_id + GROUP BY + 1 + ORDER BY + 1 + LIMIT 10 +) +SELECT + count(*) +FROM + events_table +WHERE + user_id IN + ( + SELECT + * + FROM + users + ); + count +------- + 101 +(1 row) + +-- CTE with non-colocated join in WHERE +WITH users AS ( + SELECT + events_table.user_id + FROM + events_table, users_table + WHERE + events_table.value_2 = users_table.value_2 + GROUP BY + 1 + ORDER BY + 1 + LIMIT 10 +) +SELECT + count(*) +FROM + events_table +WHERE + user_id IN + ( + SELECT + * + FROM + users + ); + count +------- + 101 +(1 row) + +-- CTE in WHERE basic +SELECT + count(*) +FROM + events_table +WHERE + event_type +IN + (WITH events AS ( + SELECT + event_type + FROM + events_table + WHERE user_id < 5 + GROUP BY + 1 + ORDER BY + 1) + SELECT * FROM events LIMIT 10 + ); +ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys +DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. +-- CTE with non-colocated join in WHERE +SELECT + count(*) +FROM + events_table +WHERE + user_id IN + (WITH users AS + (SELECT + events_table.user_id + FROM + events_table, users_table + WHERE + events_table.value_2 = users_table.value_2 + GROUP BY + 1 + ORDER BY + 1 + ) + SELECT * FROM users LIMIT 10 + ); +ERROR: cannot pushdown the subquery since all relations are not joined using distribution keys +DETAIL: Each relation should be joined with at least one another relation using distribution keys and equality operator. diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule index bef6fb31b..6cd6a6a80 100644 --- a/src/test/regress/multi_schedule +++ b/src/test/regress/multi_schedule @@ -83,6 +83,12 @@ test: multi_join_order_tpch_large # ---------- test: multi_large_table_join_planning multi_large_table_pruning multi_large_table_task_assignment +# --------- +# Tests for recursive planning. +# --------- + +test: with_nested with_where with_basics with_prepare with_modifying with_executors with_join with_partitioning + # ---------- # Tests to check our large record loading and shard deletion behavior # ---------- diff --git a/src/test/regress/sql/with_basics.sql b/src/test/regress/sql/with_basics.sql new file mode 100644 index 000000000..5d5e80e0a --- /dev/null +++ b/src/test/regress/sql/with_basics.sql @@ -0,0 +1,471 @@ +-- Test the basic CTE functionality and expected error messages + +CREATE TYPE xy AS (x int, y int); +SELECT run_command_on_workers('CREATE TYPE xy AS (x int, y int)'); + +-- CTEs in FROM should work +WITH cte AS ( + SELECT user_id, value_2 from users_table WHERE user_id IN (1, 2) ORDER BY 1,2 LIMIT 5 +) +SELECT * FROM cte; + +-- CTEs in WHERE should work +WITH cte AS ( + SELECT user_id from users_table ORDER BY user_id DESC LIMIT 10 +) +SELECT + value_2 +FROM + users_table +WHERE + user_id IN (SELECT user_id FROM users_table) +ORDER BY + value_2 +LIMIT + 5; + +-- nested CTEs should work +WITH cte_1 AS ( + WITH cte_1_1 AS ( + SELECT user_id, value_2 from users_table WHERE user_id IN (1, 2) ORDER BY 2 LIMIT 5 + ), + cte_1_2 AS ( + SELECT max(user_id) AS user_id FROM cte_1_1 + ) + SELECT user_id FROM cte_1_2 ORDER BY user_id +) +SELECT value_2 FROM users_table WHERE user_id IN (SELECT user_id FROM cte_1) ORDER BY value_2 LIMIT 1; + +-- Mix of FROM/WHERE queries +WITH cte_from AS ( + SELECT max(user_id) AS user_id, value_2, value_1 FROM users_table GROUP BY value_2, value_1 +), +cte_where AS ( + SELECT value_2 FROM events_table +) +SELECT + * +FROM + (SELECT max(user_id), max(value_2) AS value_2 FROM cte_from GROUP BY value_1) f +WHERE + value_2 IN (SELECT * FROM cte_where) +ORDER BY + 1, 2 +LIMIT + 5; + +-- CTE in subquery errors out +SELECT user_id FROM ( + WITH cte AS ( + SELECT user_id, value_2 from users_table WHERE user_id IN (1, 2) ORDER BY 2 LIMIT 5 + ) + SELECT user_id FROM cte WHERE value_2 > 0 +) a; + +-- CTE outside of FROM/WHERE errors out +WITH cte AS ( + SELECT user_id FROM users_table WHERE value_2 IN (1, 2) +) +SELECT (SELECT * FROM cte); + +WITH cte_basic AS ( + SELECT user_id FROM users_table WHERE user_id = 1 +) +SELECT + (SELECT user_id FROM cte_basic), user_id +FROM + users_table; + +-- single-row sublink is acceptable when there is no FROM +WITH cte AS ( + SELECT user_id FROM users_table WHERE value_2 IN (1, 2) +) +SELECT (SELECT * FROM cte ORDER BY 1 LIMIT 1); + +-- group by partition column +WITH series AS ( + SELECT s AS once, s*2 AS twice FROM generate_series(1,10) s +) +SELECT user_id, count(*) +FROM + users_table +JOIN + series ON (user_id = once) +GROUP BY + user_id +ORDER BY + 1, 2 +LIMIT 5; + +-- group by non-partition column +WITH series AS ( + SELECT s AS once, s*2 AS twice FROM generate_series(1,10) s +) +SELECT + twice, min(user_id) +FROM + users_table +JOIN + series ON (user_id = once) +GROUP BY + twice +HAVING + twice > 5 +ORDER BY + 1, 2 +LIMIT 5; + +-- distinct in subquery on CTE +WITH one_user AS ( + SELECT user_id from users_table WHERE user_id = 1 +) +SELECT + user_id +FROM + users_table +WHERE + value_2 IN (SELECT DISTINCT user_id FROM one_user) +ORDER BY + user_id +LIMIT + 1; + +-- having in subquery on CTE +WITH one_user AS ( + SELECT user_id from users_table WHERE user_id = 1 +) +SELECT + user_id +FROM + users_table +WHERE + value_2 IN (SELECT user_id FROM one_user GROUP BY user_id HAVING count(*) > 0) +ORDER BY + user_id +LIMIT + 1; + +-- aggregate in subquery on CTE +WITH top_users AS ( + SELECT user_id, value_2 FROM users_table ORDER BY user_id DESC LIMIT 10 +) +SELECT + user_id +FROM + (SELECT min(user_id) AS user_id FROM top_users) top_users +JOIN + users_table USING (user_id); + +-- FOR UPDATE in subquery on CTE +WITH top_users AS ( + SELECT user_id, value_2 FROM users_table ORDER BY user_id DESC LIMIT 10 +) +SELECT + user_id +FROM + (SELECT user_id FROM top_users FOR UPDATE) top_users +JOIN + users_table USING (user_id) +ORDER BY + user_id +LIMIT + 5; + +-- LIMIT in subquery on CTE +WITH top_users AS ( + SELECT user_id, value_2 FROM users_table ORDER BY user_id DESC LIMIT 10 +) +SELECT + user_id +FROM + (SELECT user_id FROM top_users LIMIT 5) top_users +JOIN + users_table USING (user_id) +ORDER BY + user_id +LIMIT + 5; + +-- OFFSET in subquery on CTE +WITH top_users AS ( + SELECT user_id, value_2 FROM users_table ORDER BY user_id DESC LIMIT 10 +) +SELECT + user_id +FROM + (SELECT user_id FROM top_users OFFSET 5) top_users +JOIN + users_table USING (user_id) +ORDER BY + user_id +LIMIT + 5; + +-- Unsupported join in CTE +WITH top_users AS ( + SELECT DISTINCT e.user_id FROM users_table u JOIN events_table e ON (u.user_id = e.user_id AND u.value_1 > e.value_2) +) +SELECT + user_id +FROM + (SELECT user_id FROM top_users WHERE user_id > 5) top_users +JOIN + users_table USING (user_id) +ORDER BY + user_id +LIMIT + 5; + +-- Join can be supported with another CTE +WITH events_table AS ( + SELECT * FROM events_table +), +top_users AS ( + SELECT DISTINCT e.user_id FROM users_table u JOIN events_table e ON (u.user_id = e.user_id AND u.value_1 > e.value_2) +) +SELECT + user_id +FROM + (SELECT user_id FROM top_users WHERE user_id > 5) top_users +JOIN + users_table USING (user_id) +ORDER BY + user_id +LIMIT + 5; + +-- Window functions in CTE +WITH top_users AS ( + SELECT row_number() OVER(), user_id FROM users_table ORDER BY user_id DESC LIMIT 10 +) +SELECT + user_id +FROM + (SELECT user_id FROM top_users WHERE user_id > 5) top_users +JOIN + users_table USING (user_id) +ORDER BY + user_id +LIMIT + 5; + +-- Window functions that partition by the distribution column in subqueries in CTEs are ok +WITH top_users AS + (SELECT * + FROM + (SELECT row_number() OVER(PARTITION BY user_id) AS row_number, + user_id + FROM users_table) AS foo +) +SELECT user_id +FROM + (SELECT user_id + FROM top_users + WHERE row_number > 5) top_users +JOIN + users_table USING (user_id) +ORDER BY + user_id +LIMIT + 5; + +-- Unsupported aggregate in CTE +WITH top_users AS ( + SELECT array_agg(user_id ORDER BY value_2) user_ids FROM users_table +) +SELECT + user_id +FROM + (SELECT unnest(user_ids) user_id FROM top_users) top_users +JOIN + users_table USING (user_id) +ORDER BY + user_id +LIMIT + 5; + +-- array_agg in CTE +WITH top_users AS ( + SELECT array_agg(user_id) user_ids FROM users_table +) +SELECT + user_id +FROM + (SELECT unnest(user_ids) user_id FROM top_users) top_users +JOIN + users_table USING (user_id) +ORDER BY + user_id +LIMIT + 5; + +-- composite type array +WITH top_users AS ( + SELECT array_agg((value_1,value_2)::xy) AS p FROM users_table WHERE user_id % 2 = 0 +) +SELECT + e.user_id, sum(y) +FROM + (SELECT (unnest(p)).* FROM top_users) tops +JOIN + events_table e ON (tops.x = e.user_id) +GROUP BY + e.user_id +ORDER BY + 2 DESC, 1 +LIMIT + 5; + +-- SELECT * FROM (SELECT * FROM cte UNION SELECT * FROM distributed_table) a; should error out +WITH cte AS ( + SELECT * FROM users_table +) +SELECT * FROM ( + SELECT * FROM cte UNION (SELECT * FROM events_table) + ) a +ORDER BY + 1,2,3,4,5,6 +LIMIT + 10; + +SELECT * FROM ( + SELECT * FROM (WITH cte AS ( + SELECT * FROM users_table + ) + SELECT * FROM cte + )b UNION (SELECT * FROM events_table)) a +ORDER BY +1,2,3,4,5,6 +LIMIT +10; + +-- SELECT * FROM (SELECT * FROM cte UNION SELECT * FROM cte) a; should work +WITH cte AS ( + SELECT * FROM users_table WHERE user_id IN (1, 2) +) +SELECT + * +FROM + (SELECT * FROM cte UNION (SELECT * FROM cte)) a +ORDER BY + 1,2,3,4,5,6 +LIMIT + 5; + +WITH cte AS ( + SELECT * FROM users_table WHERE user_id IN (1, 2) ORDER BY 1,2,3 LIMIT 5 +), +cte_2 AS ( + SELECT * FROM users_table WHERE user_id IN (3, 4) ORDER BY 1,2,3 LIMIT 5 +) +SELECT * FROM cte UNION ALL SELECT * FROM cte_2; + +-- basic recursive CTE which should all error out +WITH RECURSIVE basic_recursive(x) AS ( + VALUES (1) + UNION ALL + SELECT user_id + 1 FROM users_table WHERE user_id < 100 +) +SELECT sum(x) FROM basic_recursive; + +WITH RECURSIVE basic_recursive AS ( + SELECT -1 as user_id, '2017-11-22 20:16:16.614779'::timestamp, -1, -1, -1, -1 + UNION ALL + SELECT * FROM users_table WHERE user_id>1 +) +SELECT * FROM basic_recursive ORDER BY user_id LIMIT 1; + + +-- basic_recursive in FROM should error out +SELECT + * +FROM +(WITH RECURSIVE basic_recursive AS ( + SELECT -1 as user_id, '2017-11-22 20:16:16.614779'::timestamp, -1, -1, -1, -1 + UNION ALL + SELECT * FROM users_table WHERE user_id>1 + ) + SELECT * FROM basic_recursive ORDER BY user_id LIMIT 1) cte_rec; + + +-- basic_recursive in WHERE with UNION ALL +SELECT + * +FROM + users_table +WHERE + user_id in +(WITH RECURSIVE basic_recursive AS ( + SELECT -1 as user_id + UNION ALL + SELECT user_id FROM users_table WHERE user_id>1 + ) + SELECT * FROM basic_recursive ORDER BY user_id LIMIT 1); + + +-- one recursive one regular CTE should error out +WITH RECURSIVE basic_recursive(x) AS( + VALUES (1) + UNION ALL + SELECT user_id + 1 FROM users_table WHERE user_id < 100 +), +basic AS ( + SELECT count(user_id) FROM users_table +) +SELECT x FROM basic, basic_recursive; + + +-- one recursive one regular which SELECTs from the recursive CTE under a simple SELECT +WITH RECURSIVE basic_recursive(x) AS( + VALUES (1) + UNION ALL + SELECT user_id + 1 FROM users_table WHERE user_id < 100 +), +basic AS ( + SELECT count(x) FROM basic_recursive +) +SELECT * FROM basic; + + +-- recursive CTE in a NESTED manner +WITH regular_cte AS ( + WITH regular_2 AS ( + WITH RECURSIVE recursive AS ( + VALUES (1) + UNION ALL + SELECT user_id + 1 FROM users_table WHERE user_id < 100 + ) + SELECT * FROM recursive + ) + SELECT * FROM regular_2 +) +SELECT * FROM regular_cte; + +-- CTEs should work with VIEWs as well +CREATE VIEW basic_view AS +SELECT * FROM users_table; + + +CREATE VIEW cte_view AS +WITH cte AS ( + SELECT * FROM users_table +) +SELECT user_id, max(value_1) as value_1 FROM cte GROUP BY 1; + + +WITH cte_user AS ( + SELECT basic_view.user_id,events_table.value_2 FROM basic_view join events_table on (basic_view.user_id = events_table.user_id) +) +SELECT user_id, sum(value_2) FROM cte_user GROUP BY 1 ORDER BY 1, 2; + +SELECT * FROM cte_view; + + +WITH cte_user_with_view AS +( + SELECT * FROM cte_view WHERE user_id < 3 +) +SELECT user_id, value_1 FROM cte_user_with_view ORDER BY 1, 2 LIMIT 10 OFFSET 3; + +DROP VIEW basic_view; +DROP VIEW cte_view; diff --git a/src/test/regress/sql/with_executors.sql b/src/test/regress/sql/with_executors.sql new file mode 100644 index 000000000..c1a89bd4c --- /dev/null +++ b/src/test/regress/sql/with_executors.sql @@ -0,0 +1,316 @@ +-- Confirm we can use local, router, real-time, and task-tracker execution + +CREATE SCHEMA with_executors; +SET search_path TO with_executors, public; +SET citus.enable_repartition_joins TO on; + +CREATE TABLE with_executors.local_table (id int); +INSERT INTO local_table VALUES (0), (1), (2), (3), (4), (5), (6), (7), (8), (9), (10); + +-- CTEs should be able to use local queries +WITH cte AS ( + WITH local_cte AS ( + SELECT * FROM local_table + ), + dist_cte AS ( + SELECT user_id FROM events_table + ) + SELECT * FROM local_cte join dist_cte on dist_cte.user_id=local_cte.id +) +SELECT count(*) FROM cte; + + +WITH cte AS ( + WITH local_cte AS ( + SELECT * FROM local_table + ), + dist_cte AS ( + SELECT user_id FROM events_table + ), + merger_cte AS ( + SELECT id as user_id FROM local_cte UNION (SELECT * FROM dist_cte) + ) + SELECT * FROM merger_cte WHERE user_id IN (1, 2, 3) +) +SELECT * FROM cte ORDER BY 1; + + +WITH cte AS ( + WITH local_cte AS ( + SELECT * FROM local_table WHERE id < 5 + ), + local_cte_2 AS ( + SELECT * FROM local_table WHERE id > 5 + ) + SELECT local_cte.id as id_1, local_cte_2.id as id_2 FROM local_cte,local_cte_2 +) +SELECT + * +FROM + cte +join + users_table +on + cte.id_1 = users_table.user_id +WHERE + cte.id_1 IN (3, 4, 5) +ORDER BY + 1,2,3,4,5,6,7 +LIMIT + 10; + + +-- CTEs should be able to use router queries +WITH cte AS ( + WITH router_cte AS ( + SELECT user_id, value_2 FROM users_table WHERE user_id = 1 + ), + router_cte_2 AS ( + SELECT user_id, event_type, value_2 FROM events_table WHERE user_id = 1 + ) + SELECT + router_cte.user_id as uid, event_type + FROM + router_cte, router_cte_2 +) +SELECT * FROM cte ORDER BY 2 LIMIT 5; + + +-- CTEs should be able to use real-time queries +WITH real_time_cte AS ( + SELECT * FROM users_table WHERE value_2 IN (1, 2, 3) +) +SELECT * FROM real_time_cte ORDER BY 1, 2, 3, 4, 5, 6 LIMIT 10; + + +-- router & real-time together +WITH cte AS ( + WITH router_cte AS ( + SELECT user_id, value_2 FROM users_table WHERE user_id = 1 + ), + real_time AS ( + SELECT user_id, event_type, value_2 FROM events_table + ) + SELECT + router_cte.user_id as uid, event_type + FROM + router_cte, real_time + WHERE + router_cte.user_id=real_time.user_id +) +SELECT * FROM cte WHERE uid=1 ORDER BY 2 LIMIT 5; + + +-- CTEs should be able to use task-tracker queries +WITH cte AS ( + WITH task_tracker_1 AS ( + SELECT + users_table.user_id as uid_1, users_table.value_2 + FROM + users_table + JOIN + events_table + ON + users_table.value_2=events_table.value_2 + ), + task_tracker_2 AS ( + SELECT + users_table.user_id as uid_2, users_table.value_3 + FROM + users_table + JOIN + events_table + ON + users_table.value_3=events_table.value_3 + ) + SELECT + uid_1, uid_2, value_2, value_3 + FROM + task_tracker_1 + JOIN + task_tracker_2 + ON + value_2 = value_3 +) +SELECT + uid_1, uid_2, cte.value_2, cte.value_3 +FROM + cte +JOIN + events_table +ON + cte.value_2 = events_table.event_type +ORDER BY + 1, 2, 3, 4 +LIMIT 10; + + +-- All combined +WITH cte AS ( + WITH task_tracker AS ( + SELECT + users_table.user_id as uid_1, users_table.value_2 as val_2 + FROM + users_table + JOIN + events_table + ON + users_table.value_2=events_table.value_2 + ), + real_time AS ( + SELECT * FROM users_table + ), + router_exec AS ( + SELECT * FROM events_table WHERE user_id = 1 + ), + local_table AS ( + SELECT * FROM local_table + ), + join_first_two AS ( + SELECT uid_1, time, value_3 FROM task_tracker JOIN real_time ON val_2=value_3 + ), + join_last_two AS ( + SELECT + router_exec.user_id, local_table.id + FROM + router_exec + JOIN + local_table + ON + router_exec.user_id=local_table.id + ) + SELECT * FROM join_first_two JOIN join_last_two ON id = value_3 ORDER BY 1,2,3,4,5 LIMIT 10 +) +SELECT DISTINCT uid_1, time, value_3 FROM cte ORDER BY 1, 2, 3 LIMIT 20; + +-- All combined with outer join +WITH cte AS ( + WITH task_tracker AS ( + SELECT + users_table.user_id as uid_1, users_table.value_2 as val_2 + FROM + users_table + JOIN + events_table + ON + users_table.value_2=events_table.value_2 + ), + real_time AS ( + SELECT * FROM users_table + ), + router_exec AS ( + SELECT * FROM events_table WHERE user_id = 1 + ), + local_table AS ( + SELECT * FROM local_table + ), + join_first_two AS ( + SELECT uid_1, time, value_3 FROM task_tracker JOIN real_time ON val_2=value_3 + ), + join_last_two AS ( + SELECT + router_exec.user_id, local_table.id + FROM + router_exec + JOIN + local_table + ON + router_exec.user_id=local_table.id + ) + SELECT uid_1, value_3 as val_3 FROM join_first_two JOIN join_last_two ON id = value_3 ORDER BY 1,2 LIMIT 10 +) +SELECT DISTINCT uid_1, val_3 FROM cte join events_table on cte.val_3=events_table.event_type ORDER BY 1, 2; + + +-- CTEs should not be able to terminate (the last SELECT) in a local query +WITH cte AS ( + SELECT * FROM users_table +) +SELECT count(*) FROM cte JOIN local_table ON (user_id = id); + +-- CTEs should be able to terminate a router query +WITH cte AS ( + WITH cte_1 AS ( + SELECT * FROM local_table WHERE id < 7 + ), + cte_2 AS ( + SELECT * FROM local_table WHERE id > 3 + ), + cte_dist AS ( + SELECT count(*) as u_id FROM users_table + ), + cte_merge AS ( + SELECT cte_1.id as id FROM cte_1 join cte_2 on TRUE + ) + SELECT count(*) FROM users_table join cte_merge on id=user_id +) +SELECT + row_number() OVER (), count(*) +FROM + cte, users_table +WHERE + cte.count=user_id and user_id=5; + + +-- CTEs should be able to terminate a real-time query +WITH cte AS ( + WITH cte_1 AS ( + SELECT * FROM local_table WHERE id < 7 + ), + cte_2 AS ( + SELECT * FROM local_table WHERE id > 3 + ), + cte_dist AS ( + SELECT count(*) as u_id FROM users_table + ), + cte_merge AS ( + SELECT cte_1.id as id FROM cte_1 join cte_2 on TRUE + ) + SELECT count(*) FROM users_table join cte_merge on id=user_id +) +SELECT count(*) FROM cte, users_table where cte.count=user_id; + + +SET citus.task_executor_type='task-tracker'; +-- CTEs shouldn't be able to terminate a task-tracker query +WITH cte_1 AS ( + SELECT + u_table.user_id as u_id, e_table.event_type + FROM + users_table as u_table + join + events_table as e_table + on + u_table.value_2=e_table.event_type + WHERE + u_table.user_id < 7 +), +cte_2 AS ( + SELECT + u_table.user_id as u_id, e_table.event_type + FROM + users_table as u_table + join + events_table as e_table + on + u_table.value_2=e_table.event_type + WHERE + u_table.user_id > 3 +), +cte_merge AS ( + SELECT + cte_1.u_id, cte_2.event_type + FROM + cte_1 + join + cte_2 + on cte_1.event_type=cte_2.u_id +) +SELECT + count(*) +FROM + users_table, cte_merge +WHERE + users_table.user_id = cte_merge.u_id; + +DROP SCHEMA with_executors CASCADE; diff --git a/src/test/regress/sql/with_join.sql b/src/test/regress/sql/with_join.sql new file mode 100644 index 000000000..f1d72a2df --- /dev/null +++ b/src/test/regress/sql/with_join.sql @@ -0,0 +1,219 @@ +CREATE SCHEMA with_join; +SET search_path TO with_join, public; +SET citus.next_shard_id TO 1501000; + +CREATE TABLE with_join.reference_table(user_id int); +SELECT create_reference_table('with_join.reference_table'); +INSERT INTO reference_table VALUES (6), (7); + +SET citus.enable_repartition_joins TO on; + +-- Two colocated CTE under a non-colocated join +WITH colocated_1 AS ( + SELECT + users_table.user_id, events_table.value_2 + FROM + users_table, events_table + WHERE + users_table.user_id = events_table.user_id AND event_type IN (1, 2, 3) +), +colocated_2 AS ( + SELECT + users_table.user_id, events_table.value_2 + FROM + users_table, events_table + WHERE + users_table.user_id = events_table.user_id AND event_type IN (4, 5, 6) +) +SELECT colocated_1.user_id, count(*) +FROM + colocated_1, colocated_2 +WHERE + colocated_1.value_2 = colocated_2.value_2 +GROUP BY + 1 +ORDER BY + 2 DESC, 1; + +-- Two non-colocated CTE under a co-located join +WITH non_colocated_1 AS ( + SELECT + users_table.user_id + FROM + users_table, events_table + WHERE + users_table.user_id = events_table.value_2 AND event_type IN (1, 2, 3) +), +non_colocated_2 AS ( + SELECT + users_table.user_id + FROM + users_table, events_table + WHERE + users_table.user_id = events_table.value_2 AND event_type IN (4, 5, 6) +) + +SELECT non_colocated_1.user_id, count(*) +FROM + non_colocated_1, non_colocated_2 +WHERE + non_colocated_1.user_id = non_colocated_2.user_id +GROUP BY + 1 +ORDER BY + 2 DESC, 1; + + +-- Subqueries in WHERE and FROM are mixed +-- In this query, only subquery in WHERE is not a colocated join +WITH users_events AS ( + WITH colocated_join AS ( + SELECT + users_table.user_id as uid, event_type + FROM + users_table + join + events_table + on (users_table.user_id = events_table.user_id) + WHERE + events_table.event_type IN (1, 2, 3) + ), + colocated_join_2 AS ( + SELECT + users_table.user_id, event_type + FROM + users_table + join + events_table + on (users_table.user_id = events_table.user_id) + WHERE + events_table.event_type IN (4, 5, 6) + ) + SELECT + uid, colocated_join.event_type + FROM + colocated_join, + colocated_join_2 + WHERE + colocated_join.uid = colocated_join_2.user_id AND + colocated_join.event_type IN ( + WITH some_events AS ( + SELECT + event_type + FROM + events_table + WHERE + user_id < 100 + GROUP BY + 1 + ORDER BY + 1 + LIMIT 10 + ) + SELECT + * + FROM + some_events + ) +) +SELECT + * +FROM + users_events +ORDER BY + 1, 2 +LIMIT + 20; + +-- cte LEFT JOIN distributed_table should error out +WITH cte AS ( + SELECT * FROM users_table WHERE user_id = 1 ORDER BY value_1 +) +SELECT + cte.user_id, cte.time, events_table.event_type +FROM + cte +LEFT JOIN + events_table ON cte.user_id = events_table.user_id +ORDER BY + 1,2,3 +LIMIT + 5; + +-- cte RIGHT JOIN distributed_table should work +WITH cte AS ( + SELECT * FROM users_table WHERE user_id = 1 ORDER BY value_1 +) +SELECT + cte.user_id, cte.time, events_table.event_type +FROM + cte +RIGHT JOIN + events_table ON cte.user_id = events_table.user_id +ORDER BY + 1,2,3 +LIMIT + 5; + +-- distributed_table LEFT JOIN cte should work +WITH cte AS ( + SELECT * FROM users_table WHERE value_1 = 1 ORDER BY value_1 +) +SELECT + cte.user_id, cte.time, events_table.event_type +FROM + events_table +LEFT JOIN + cte ON cte.user_id = events_table.user_id +ORDER BY + 1,2,3 +LIMIT + 5; + +-- distributed_table RIGHT JOIN cte should error out +WITH cte AS ( + SELECT * FROM users_table WHERE value_1 = 1 ORDER BY value_1 +) +SELECT + cte.user_id, cte.time, events_table.event_type +FROM + events_table +RIGHT JOIN + cte ON cte.user_id = events_table.user_id +ORDER BY + 1,2,3 +LIMIT + 5; + +-- cte FULL JOIN distributed_table should error out +WITH cte AS ( + SELECT * FROM users_table WHERE user_id = 1 ORDER BY value_1 +) +SELECT + cte.user_id, cte.time, events_table.event_type +FROM + events_table +FULL JOIN + cte ON cte.user_id = events_table.user_id +ORDER BY + 1,2,3 +LIMIT + 5; + +-- Joins with reference tables are planned as router queries +WITH cte AS ( + SELECT value_2, max(user_id) AS user_id FROM users_table WHERE value_2 = 1 GROUP BY value_2 HAVING count(*) > 1 +) +SELECT + row_number() OVER(), cte.user_id +FROM + cte +FULL JOIN + reference_table ON cte.user_id + 1 = reference_table.user_id +ORDER BY + user_id +LIMIT + 5; + +RESET client_min_messages; +DROP SCHEMA with_join CASCADE; diff --git a/src/test/regress/sql/with_modifying.sql b/src/test/regress/sql/with_modifying.sql new file mode 100644 index 000000000..60ced959a --- /dev/null +++ b/src/test/regress/sql/with_modifying.sql @@ -0,0 +1,124 @@ +-- Tests for modifying CTEs and CTEs in modifications +SET citus.next_shard_id TO 1502000; + +CREATE SCHEMA with_modifying; +SET search_path TO with_modifying, public; + +CREATE TABLE with_modifying.modify_table (id int, val int); +SELECT create_distributed_table('modify_table', 'id'); + +CREATE TABLE with_modifying.users_table (LIKE public.users_table INCLUDING ALL); +SELECT create_distributed_table('with_modifying.users_table', 'user_id'); +INSERT INTO with_modifying.users_table SELECT * FROM public.users_table; + +-- basic insert query in CTE +WITH basic_insert AS ( + INSERT INTO users_table VALUES (1), (2), (3) RETURNING * +) +SELECT + * +FROM + basic_insert; + +-- single-shard UPDATE in CTE +WITH basic_update AS ( + UPDATE users_table SET value_3=42 WHERE user_id=0 RETURNING * +) +SELECT + * +FROM + basic_update; + +-- multi-shard UPDATE in CTE +WITH basic_update AS ( + UPDATE users_table SET value_3=42 WHERE value_2=1 RETURNING * +) +SELECT + * +FROM + basic_update; + +-- single-shard DELETE in CTE +WITH basic_delete AS ( + DELETE FROM users_table WHERE user_id=42 RETURNING * +) +SELECT + * +FROM + basic_delete; + +-- multi-shard DELETE in CTE +WITH basic_delete AS ( + DELETE FROM users_table WHERE value_2=42 RETURNING * +) +SELECT + * +FROM + basic_delete; + +-- INSERT...SELECT query in CTE +WITH copy_table AS ( + INSERT INTO users_table SELECT * FROM users_table RETURNING * +) +SELECT + * +FROM + copy_table; + +-- CTEs prior to INSERT...SELECT via the coordinator should work +WITH cte AS ( + SELECT user_id FROM users_table WHERE value_2 IN (1, 2) +) +INSERT INTO modify_table (SELECT * FROM cte); + + +WITH cte_1 AS ( + SELECT user_id, value_2 FROM users_table WHERE value_2 IN (1, 2, 3, 4) +), +cte_2 AS ( + SELECT user_id, value_2 FROM users_table WHERE value_2 IN (3, 4, 5, 6) +) +INSERT INTO modify_table (SELECT cte_1.user_id FROM cte_1 join cte_2 on cte_1.value_2=cte_2.value_2); + + +-- even if this is an INSERT...SELECT, the CTE is under SELECT +WITH cte AS ( + SELECT user_id, value_2 FROM users_table WHERE value_2 IN (1, 2) +) +INSERT INTO modify_table (SELECT (SELECT value_2 FROM cte GROUP BY value_2)); + + +-- CTEs prior to any other modification should error out +WITH cte AS ( + SELECT value_2 FROM users_table WHERE user_id IN (1, 2, 3) +) +DELETE FROM modify_table WHERE id IN (SELECT value_2 FROM cte); + + +WITH cte AS ( + SELECT value_2 FROM users_table WHERE user_id IN (1, 2, 3) +) +UPDATE modify_table SET val=-1 WHERE val IN (SELECT * FROM cte); + + +WITH cte AS ( + WITH basic AS ( + SELECT value_2 FROM users_table WHERE user_id IN (1, 2, 3) + ) + INSERT INTO modify_table (SELECT * FROM basic) RETURNING * +) +UPDATE modify_table SET val=-2 WHERE id IN (SELECT id FROM cte); + + +WITH cte AS ( + WITH basic AS ( + SELECT * FROM events_table WHERE event_type = 5 + ), + basic_2 AS ( + SELECT user_id FROM users_table + ) + INSERT INTO modify_table (SELECT user_id FROM events_table) RETURNING * +) +SELECT * FROM cte; + +DROP SCHEMA with_modifying CASCADE; diff --git a/src/test/regress/sql/with_nested.sql b/src/test/regress/sql/with_nested.sql new file mode 100644 index 000000000..4a3a9b392 --- /dev/null +++ b/src/test/regress/sql/with_nested.sql @@ -0,0 +1,328 @@ +-- Complex nested CTEs +CREATE SCHEMA with_nested; +SET search_path tO with_nested, public; + +CREATE TABLE with_nested.local_users (user_id int, event_type int); +INSERT INTO local_users VALUES (0, 0), (1, 4), (1, 7), (2, 1), (3, 3), (5, 4), (6, 2), (10, 7); + +-- Can refer to outer CTE because it is recursively planned first +WITH cte_1 AS ( + SELECT DISTINCT user_id FROM users_table +), +cte_2 AS ( + WITH cte_1_1 AS ( + SELECT * FROM cte_1 WHERE user_id > 1 + ) + SELECT * FROM cte_1_1 WHERE user_id < 3 +) +SELECT user_id FROM cte_2 LIMIT 1; + +-- Nested CTEs +WITH users_events AS ( + WITH users_events_2 AS ( + SELECT + users_table.user_id as user_id, + events_table.event_type as event_type + FROM + users_table, + events_table + WHERE + users_table.user_id = events_table.user_id + GROUP BY + users_table.user_id, + events_table.event_type + ) + SELECT + u_events.user_id, events_table.event_type + FROM + users_events_2 as u_events, + events_table + WHERE + u_events.user_id = events_table.user_id + ) +SELECT + * +FROM + users_events +ORDER BY + 1, 2 +LIMIT 20; + +-- Nested CTEs +WITH users_events AS ( + WITH users_events AS ( + WITH users_events AS ( + WITH users_events AS ( + WITH users_events AS ( + WITH users_events AS ( + WITH users_events AS ( + SELECT + users_table.user_id as user_id, + events_table.event_type as event_type + FROM + users_table, + events_table + WHERE + users_table.user_id = events_table.user_id + GROUP BY + users_table.user_id, + events_table.event_type + + ) + SELECT + uid, event_type, value_2, value_3 + FROM + ( + ( + SELECT + user_id as uid + FROM + users_events + ) users + join + events_table + on + users.uid = events_table.event_type + ) a + ) + SELECT + * + FROM + users_events + ORDER BY + 1, 3, 2, 4 + LIMIT 100 + ) + SELECT + * + FROM + users_events + LIMIT 90 + ) + SELECT + * + FROM + users_events + LIMIT 50 + ) + SELECT + uid, event_type, value_2, sum(value_3) as sum_3 + FROM + users_events + GROUP BY + 1, 2, 3 + LIMIT 40 + ) + SELECT + uid, event_type, sum(value_2) as sum_2, sum(sum_3) as sum_3 + FROM + users_events + GROUP BY + 1, 2 + LIMIT 30 +) +SELECT + uid, avg(event_type), sum(sum_2), sum(sum_3) +FROM + users_events +GROUP BY + 1; + + +-- Nested CTEs +WITH users_events AS ( + -- router select query + WITH users_events_1 AS ( + SELECT + * + FROM + users_table + WHERE + user_id = 1 + ), + -- real-time select query + users_events_2_3 AS ( + SELECT + * + FROM + users_table + WHERE + user_id = 2 + OR + user_id = 3 + ), + -- router select query + -- sub CTE is a real-time executor query but the top level is router select + users_events_4 AS ( + WITH users_events_4_5 AS ( + SELECT + * + FROM + users_table + WHERE + user_id = 4 + OR + user_id = 5 + ) + SELECT + * + FROM + users_events_4_5 + WHERE + user_id = 4 + ), + -- merge all the results from CTEs + merged_users AS ( + SELECT + * + FROM + users_events_1 + UNION + SELECT + * + FROM + users_events_2_3 + UNION + SELECT + * + FROM + users_events_4 + ) + SELECT + * + FROM + merged_users +) +SELECT + * +FROM + users_events +ORDER BY + 1, 2, 3, 4, 5, 6 +LIMIT + 20; + + +-- Nested CTEs - joined with local table. Not supported yet. +WITH users_events AS ( + -- router select query + WITH users_events_1 AS ( + SELECT + * + FROM + users_table + WHERE + user_id = 1 + ), + -- real-time select query + users_events_2_3 AS ( + SELECT + * + FROM + users_table + WHERE + user_id = 2 + OR + user_id = 3 + ), + -- router select query + -- sub CTE is a real-time executor query but the top level is router select + users_events_4 AS ( + WITH users_events_4_5 AS ( + SELECT + * + FROM + users_table + WHERE + user_id = 4 + OR + user_id = 5 + ) + SELECT + * + FROM + users_events_4_5 + WHERE + user_id = 4 + ) + -- merge all the results from CTEs + SELECT + * + FROM + users_events_1 + UNION + SELECT + * + FROM + users_events_2_3 + UNION + SELECT + * + FROM + users_events_4 +) +SELECT + * +FROM + users_events +ORDER BY + 1, 2, 3, 4, 5, 6 +LIMIT + 20; + +-- access to uncle, use window function, apply aggregates, use group by, LIMIT/OFFSET +WITH cte1 AS ( + WITH cte1_1 AS ( + WITH cte1_1_1 AS ( + SELECT user_id, time, value_2 FROM users_table + ), + cte1_1_2 AS ( + SELECT + user_id, count + FROM ( + SELECT + user_id, + count(value_2) OVER (PARTITION BY user_id) + FROM + users_table + GROUP BY 1, users_table.value_2 + )aa + GROUP BY + 1,2 + ORDER BY + 1,2 + LIMIT + 4 + OFFSET + 2 + ) + SELECT cte1_1_1.user_id, cte1_1_1.time, cte1_1_2.count FROM cte1_1_1 join cte1_1_2 on cte1_1_1.user_id=cte1_1_2.user_id + ), + cte1_2 AS ( + WITH cte1_2_1 AS ( + SELECT + user_id, time, avg(value_1)::real as value_1, min(value_2) as value_2 + FROM + users_table + GROUP BY + 1, 2 + ), + cte1_2_2 AS ( + SELECT cte1_2_1.user_id, cte1_1.time, cte1_2_1.value_1, cte1_1.count FROM cte1_2_1 join cte1_1 on cte1_2_1.time=cte1_1.time and cte1_2_1.user_id=cte1_1.user_id + ) + SELECT * FROM cte1_2_2 + ) + SELECT * FROM cte1_2 +), +cte2 AS ( + WITH cte2_1 AS ( + WITH cte2_1_1 AS ( + SELECT * FROM cte1 + ) + SELECT user_id, time, value_1, min(count) FROM cte2_1_1 GROUP BY 1, 2, 3 ORDER BY 1,2,3 + ) + SELECT * FROM cte2_1 LIMIT 3 OFFSET 2 +) +SELECT * FROM cte2; + +DROP SCHEMA with_nested CASCADE; diff --git a/src/test/regress/sql/with_partitioning.sql b/src/test/regress/sql/with_partitioning.sql new file mode 100644 index 000000000..d21faeba6 --- /dev/null +++ b/src/test/regress/sql/with_partitioning.sql @@ -0,0 +1,73 @@ +CREATE SCHEMA with_partitioning; +SET search_path TO with_partitioning, public; + +SET citus.shard_replication_factor TO 1; + +CREATE TABLE with_partitioning.local_users_2 (user_id int, event_type int); +INSERT INTO local_users_2 VALUES (0, 0), (1, 4), (1, 7), (2, 1), (3, 3), (5, 4), (6, 2), (10, 7); + +CREATE TABLE with_partitioning.partitioning_test(id int, time date) PARTITION BY RANGE (time); + +-- create its partitions +CREATE TABLE with_partitioning.partitioning_test_2017 PARTITION OF partitioning_test FOR VALUES FROM ('2017-01-01') TO ('2018-01-01'); +CREATE TABLE with_partitioning.partitioning_test_2010 PARTITION OF partitioning_test FOR VALUES FROM ('2010-01-01') TO ('2011-01-01'); + +-- load some data and distribute tables +INSERT INTO partitioning_test VALUES (1, '2017-11-23'); +INSERT INTO partitioning_test VALUES (2, '2010-07-07'); + +INSERT INTO partitioning_test_2017 VALUES (3, '2017-11-22'); +INSERT INTO partitioning_test_2010 VALUES (4, '2010-03-03'); + +-- distribute partitioned table +SELECT create_distributed_table('with_partitioning.partitioning_test', 'id'); + + +-- Join of a CTE on distributed table and then join with a partitioned table +WITH cte AS ( + SELECT * FROM users_table +) +SELECT DISTINCT ON (id) id, cte.time FROM cte join partitioning_test on cte.time::date=partitioning_test.time ORDER BY 1, 2 LIMIT 3; + + +-- Join of a CTE on distributed table and then join with a partitioned table hitting on only one partition +WITH cte AS ( + SELECT * FROM users_table +) +SELECT DISTINCT ON (id) id, cte.time FROM cte join partitioning_test on cte.time::date=partitioning_test.time WHERE partitioning_test.time >'2017-11-20' ORDER BY 1, 2 LIMIT 3; + + +-- Join with a distributed table and then join of two CTEs +WITH cte AS ( + SELECT id, time FROM partitioning_test +), +cte_2 AS ( + SELECT * FROM partitioning_test WHERE id > 2 +), +cte_joined AS ( + SELECT user_id, cte_2.time FROM users_table join cte_2 on (users_table.time::date = cte_2.time) +), +cte_joined_2 AS ( + SELECT user_id, cte_joined.time FROM cte_joined join cte on (cte_joined.time = cte.time) +) +SELECT DISTINCT ON (event_type) event_type, cte_joined_2.user_id FROM events_table join cte_joined_2 on (cte_joined_2.time=events_table.time::date) ORDER BY 1, 2 LIMIT 10 OFFSET 2; + + +-- Join a partitioned table with a local table (both in CTEs) +-- and then with a distributed table. After all join with a +-- partitioned table again +WITH cte AS ( + SELECT id, time FROM partitioning_test +), +cte_2 AS ( + SELECT * FROM local_users_2 +), +cte_joined AS ( + SELECT user_id, cte.time FROM cte join cte_2 on (cte.id = cte_2.user_id) +), +cte_joined_2 AS ( + SELECT users_table.user_id, cte_joined.time FROM cte_joined join users_table on (cte_joined.time = users_table.time::date) +) +SELECT DISTINCT ON (id) id, cte_joined_2.time FROM cte_joined_2 join partitioning_test on (cte_joined_2.time=partitioning_test.time) ORDER BY 1, 2; + +DROP SCHEMA with_partitioning CASCADE; diff --git a/src/test/regress/sql/with_prepare.sql b/src/test/regress/sql/with_prepare.sql new file mode 100644 index 000000000..ac26508f1 --- /dev/null +++ b/src/test/regress/sql/with_prepare.sql @@ -0,0 +1,227 @@ +-- prepared statements +PREPARE prepared_test_1 AS +WITH basic AS( + SELECT * FROM users_table +) +SELECT + * +FROM + basic +WHERE + basic.value_2 IN (1, 2, 3) +ORDER BY + 1, 2, 3, 4, 5, 6 +LIMIT 10; + + +PREPARE prepared_test_2 AS +WITH users_events AS( + SELECT + users_table.user_id as user_id, + events_table.event_type as event_type + FROM + users_table, + events_table + WHERE + users_table.user_id = events_table.user_id + GROUP BY + users_table.user_id, + events_table.event_type +), +event_attendee_count AS( + SELECT + event_type, count(user_id) + FROM + users_events + GROUP BY + 1 +), +user_coolness AS( + SELECT + user_id, + sum(count) + FROM + users_events + join + event_attendee_count + on (users_events.event_type = event_attendee_count.event_type) + GROUP BY + user_id +) +SELECT + * +FROM + user_coolness +ORDER BY + 2, 1 +LIMIT + 10; + + +PREPARE prepared_test_3(integer) AS +WITH users_events AS( + -- events 1 and 2 only + WITH spec_events AS( + SELECT + * + FROM + events_table + WHERE + event_type IN (1, 2) + ) + -- users who have done 1 or 2 + SELECT + users_table.user_id, + spec_events.event_type + FROM + users_table + join + spec_events + on (users_table.user_id = spec_events.user_id) + ORDER BY + 1, + event_type +), +event_attendee_count AS( + -- distinct attendee count of each event in users_event + WITH event_attendee_count AS( + SELECT + event_type, count(user_id) + FROM + users_events + GROUP BY + 1 + ) + -- distinct attendee count of first 3 events + SELECT + * + FROM + event_attendee_count + ORDER BY + event_type + LIMIT 3 +), +-- measure how cool an attendee is by checking the number of events he attended +user_coolness AS( + SELECT + user_id, + sum(count) + FROM + users_events + join + event_attendee_count + on (users_events.event_type = $1) + GROUP BY + user_id +) +SELECT + * +FROM + user_coolness +ORDER BY + 2, 1 +LIMIT + 10; + + +PREPARE prepared_test_4(integer, integer, integer) AS +WITH basic AS( + SELECT * FROM users_table WHERE value_2 IN ($1, $2, $3) +) +SELECT + * +FROM + basic +ORDER BY + 1, 2, 3, 4, 5, 6 +LIMIT 10; + + +-- prepared statement which inserts in a CTE should fail +PREPARE prepared_partition_column_insert(integer) AS +WITH prepared_insert AS ( + INSERT INTO users_table VALUES ($1) RETURNING * +) +SELECT * FROM prepared_insert; + + +PREPARE prepared_test_5(integer, integer, integer) AS +-- router select query +WITH users_events_1 AS ( + SELECT + * + FROM + users_table + WHERE + user_id = $1 +), +-- real-time select query +users_events_2_3 AS ( + SELECT + * + FROM + users_table + WHERE + user_id = $2 + OR + user_id = $3 +), +merged_users AS ( + SELECT + * + FROM + users_events_1 + UNION + SELECT + * + FROM + users_events_2_3 +) +SELECT + * +FROM + merged_users +ORDER BY + 1, 2, 3, 4, 5, 6 +LIMIT 10; + + + +EXECUTE prepared_test_1; +EXECUTE prepared_test_1; +EXECUTE prepared_test_1; +EXECUTE prepared_test_1; +EXECUTE prepared_test_1; +EXECUTE prepared_test_1; + +EXECUTE prepared_test_2; +EXECUTE prepared_test_2; +EXECUTE prepared_test_2; +EXECUTE prepared_test_2; +EXECUTE prepared_test_2; +EXECUTE prepared_test_2; + +EXECUTE prepared_test_3(1); +EXECUTE prepared_test_3(2); +EXECUTE prepared_test_3(3); +EXECUTE prepared_test_3(4); +EXECUTE prepared_test_3(5); +EXECUTE prepared_test_3(6); + +EXECUTE prepared_test_4(1, 2, 3); +EXECUTE prepared_test_4(2, 3, 4); +EXECUTE prepared_test_4(3, 4, 5); +EXECUTE prepared_test_4(4, 5, 6); +EXECUTE prepared_test_4(5, 6, 7); +EXECUTE prepared_test_4(6, 7, 8); + +EXECUTE prepared_test_5(1, 2, 3); +EXECUTE prepared_test_5(2, 3, 4); +EXECUTE prepared_test_5(3, 4, 5); +EXECUTE prepared_test_5(4, 5, 6); +EXECUTE prepared_test_5(5, 6, 7); +EXECUTE prepared_test_5(6, 7, 8); + +EXECUTE prepared_partition_column_insert(1); + +DEALLOCATE ALL; diff --git a/src/test/regress/sql/with_where.sql b/src/test/regress/sql/with_where.sql new file mode 100644 index 000000000..930c51474 --- /dev/null +++ b/src/test/regress/sql/with_where.sql @@ -0,0 +1,153 @@ +-- More complex CTEs in WHERE + +SET citus.enable_repartition_joins TO on; + +-- CTE in WHERE basic +WITH events AS ( + SELECT + event_type + FROM + events_table + WHERE + user_id < 5 + GROUP BY + event_type + ORDER BY + event_type + LIMIT 10 +) +SELECT + count(*) +FROM + events_table +WHERE + event_type +IN + (SELECT + event_type + FROM + events); + +WITH users AS ( + SELECT + events_table.user_id + FROM + events_table, users_table + WHERE + events_table.user_id = users_table.user_id + GROUP BY + 1 + ORDER BY + 1 + LIMIT 10 +) +SELECT + count(*) +FROM + events_table +WHERE + user_id IN + ( + SELECT + * + FROM + users + ); + + +WITH users AS ( + SELECT + events_table.user_id + FROM + events_table, users_table + WHERE + events_table.user_id = users_table.user_id + GROUP BY + 1 + ORDER BY + 1 + LIMIT 10 +) +SELECT + count(*) +FROM + events_table +WHERE + user_id IN + ( + SELECT + * + FROM + users + ); + + +-- CTE with non-colocated join in WHERE +WITH users AS ( + SELECT + events_table.user_id + FROM + events_table, users_table + WHERE + events_table.value_2 = users_table.value_2 + GROUP BY + 1 + ORDER BY + 1 + LIMIT 10 +) +SELECT + count(*) +FROM + events_table +WHERE + user_id IN + ( + SELECT + * + FROM + users + ); + +-- CTE in WHERE basic +SELECT + count(*) +FROM + events_table +WHERE + event_type +IN + (WITH events AS ( + SELECT + event_type + FROM + events_table + WHERE user_id < 5 + GROUP BY + 1 + ORDER BY + 1) + SELECT * FROM events LIMIT 10 + ); + +-- CTE with non-colocated join in WHERE +SELECT + count(*) +FROM + events_table +WHERE + user_id IN + (WITH users AS + (SELECT + events_table.user_id + FROM + events_table, users_table + WHERE + events_table.value_2 = users_table.value_2 + GROUP BY + 1 + ORDER BY + 1 + ) + SELECT * FROM users LIMIT 10 + );