citus/src/test/regress/sql/multi_subquery_in_where_cla...

574 lines
13 KiB
SQL

--
-- multi subquery in where queries aims to expand existing subquery pushdown
-- regression tests to cover more cases specifically subqueries in WHERE clause
-- the tables that are used depends to multi_insert_select_behavioral_analytics_create_table.sql
--
-- We don't need shard id sequence here, so commented out to prevent conflicts with concurrent tests
-- subqueries in WHERE with greater operator
SELECT
user_id
FROM
users_table
WHERE
value_2 >
(SELECT
max(value_2)
FROM
events_table
WHERE
users_table.user_id = events_table.user_id AND event_type = 1
GROUP BY
user_id
)
GROUP BY user_id
HAVING count(*) > 2
ORDER BY user_id
LIMIT 5;
-- same query with one additional join on non distribution column
SELECT
user_id
FROM
users_table
WHERE
value_2 >
(SELECT
max(value_2)
FROM
events_table
WHERE
users_table.user_id = events_table.user_id AND event_type = 1 AND
users_table.time > events_table.time
GROUP BY
user_id
)
GROUP BY user_id
HAVING count(*) > 1
ORDER BY user_id
LIMIT 5;
-- the other way around is not supported
SELECT
user_id
FROM
users_table
WHERE
value_2 >
(SELECT
max(value_2)
FROM
events_table
WHERE
users_table.user_id > events_table.user_id AND event_type = 1 AND
users_table.time = events_table.time
GROUP BY
user_id
)
GROUP BY user_id
HAVING count(*) > 1
ORDER BY user_id
LIMIT 5;
-- subqueries in where with ALL operator
SELECT
user_id
FROM
users_table
WHERE
value_2 > 1 AND
value_2 < ALL (SELECT avg(value_3) FROM events_table WHERE users_table.user_id = events_table.user_id GROUP BY user_id)
GROUP BY
1
ORDER BY
1 DESC
LIMIT 3;
-- IN operator on non-partition key
SELECT
user_id
FROM
events_table as e1
WHERE
event_type IN
(SELECT
event_type
FROM
events_table as e2
WHERE
value_2 = 1 AND value_3 > 3 AND
e1.user_id = e2.user_id
)
ORDER BY 1;
-- NOT IN on non-partition key
SELECT
user_id
FROM
events_table as e1
WHERE
event_type NOT IN
(SELECT
event_type
FROM
events_table as e2
WHERE
value_2 = 1 AND value_3 > 3 AND
e1.user_id = e2.user_id
)
GROUP BY 1
HAVING count(*) > 2
ORDER BY 1;
-- non-correlated query with =ANY on partition keys
SELECT
user_id, count(*)
FROM
users_table
WHERE
user_id =ANY(SELECT user_id FROM users_table WHERE value_1 >= 1 AND value_1 <= 2) GROUP BY 1 ORDER BY 2 DESC LIMIT 5;
-- users that appeared more than 118 times
SELECT
user_id
FROM
users_table
WHERE 2 <=
(SELECT
count(*)
FROM
events_table
WHERE
users_table.user_id = events_table.user_id
GROUP BY
user_id)
GROUP BY
user_id
ORDER BY
user_id;
-- the following query doesn't have a meaningful result
-- but it is a valid query with an arbitrary subquery in
-- WHERE clause
SELECT user_id, value_2 FROM users_table WHERE
value_1 > 1 AND value_1 < 3
AND value_2 >= 1
AND user_id IN
(
SELECT
e1.user_id
FROM (
-- Get the first time each user viewed the homepage.
SELECT
user_id,
1 AS view_homepage,
min(time) AS view_homepage_time
FROM events_table
WHERE
event_type IN (0, 1)
GROUP BY user_id
) e1 LEFT JOIN LATERAL (
SELECT
user_id,
1 AS use_demo,
time AS use_demo_time
FROM events_table
WHERE
user_id = e1.user_id AND
event_type IN (1, 2)
ORDER BY time
) e2 ON true LEFT JOIN LATERAL (
SELECT
user_id,
1 AS enter_credit_card,
time AS enter_credit_card_time
FROM events_table
WHERE
user_id = e2.user_id AND
event_type IN (2, 3)
ORDER BY time
) e3 ON true LEFT JOIN LATERAL (
SELECT
1 AS submit_card_info,
user_id,
time AS enter_credit_card_time
FROM events_table
WHERE
user_id = e3.user_id AND
event_type IN (3, 4)
ORDER BY time
) e4 ON true LEFT JOIN LATERAL (
SELECT
1 AS see_bought_screen
FROM events_table
WHERE
user_id = e4.user_id AND
event_type IN (5, 6)
ORDER BY time
) e5 ON true
group by e1.user_id
HAVING sum(submit_card_info) > 0
)
ORDER BY 1, 2;
-- similar to the above query
-- the following query doesn't have a meaningful result
-- but it is a valid query with an arbitrary subquery in
-- WHERE clause
SELECT
user_id
FROM
users_table
WHERE
user_id IN
(
SELECT
user_id
FROM (
SELECT
subquery_1.user_id, count_pay
FROM
(
(SELECT
users_table.user_id,
'action=>1' AS event,
events_table.time
FROM
users_table,
events_table
WHERE
users_table.user_id = events_table.user_id AND
users_table.user_id >= 1 AND
users_table.user_id <= 3 AND
events_table.event_type > 1 AND events_table.event_type < 3
)
UNION
(SELECT
users_table.user_id,
'action=>2' AS event,
events_table.time
FROM
users_table,
events_table
WHERE
users_table.user_id = events_table.user_id AND
users_table.user_id >= 1 AND
users_table.user_id <= 3 AND
events_table.event_type > 2 AND events_table.event_type < 4
)
) AS subquery_1
LEFT JOIN
(SELECT
user_id,
COUNT(*) AS count_pay
FROM
users_table
WHERE
user_id >= 1 AND
user_id <= 3 AND
users_table.value_1 > 3 AND users_table.value_1 < 5
GROUP BY
user_id
HAVING
COUNT(*) > 1) AS subquery_2
ON
subquery_1.user_id = subquery_2.user_id
GROUP BY
subquery_1.user_id,
count_pay) AS subquery_top
GROUP BY
count_pay, user_id
)
GROUP BY user_id
HAVING count(*) > 1 AND sum(value_2) > 29
ORDER BY 1;
-- the following query doesn't have a meaningful result
-- but it is a valid query with an arbitrary subquery in
-- FROM clause involving a complex query in WHERE clause
SELECT user_id, array_length(events_table, 1)
FROM (
SELECT user_id, array_agg(event ORDER BY time) AS events_table
FROM (
SELECT
u.user_id, e.event_type::text AS event, e.time
FROM
users_table AS u,
events_table AS e
WHERE u.user_id = e.user_id AND
u.user_id IN
(
SELECT
user_id
FROM
users_table
WHERE value_2 >= 1
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 1 AND event_type <= 3 AND value_3 > 1 AND user_id = users_table.user_id)
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 3 AND event_type <= 4 AND value_3 > 1 AND user_id = users_table.user_id)
)
) t
GROUP BY user_id
) q
ORDER BY 2 DESC, 1;
--
-- below tests only aims for cases where all relations
-- are not joined on partition key
--
-- e4 is not joined on the partition key
SELECT user_id, value_2 FROM users_table WHERE
value_1 > 1 AND value_1 < 2
AND value_2 >= 1
AND user_id IN
(
SELECT
e1.user_id
FROM (
-- Get the first time each user viewed the homepage.
SELECT
user_id,
1 AS view_homepage,
min(time) AS view_homepage_time
FROM events_table
WHERE
event_type IN (0, 1)
GROUP BY user_id
) e1 LEFT JOIN LATERAL (
SELECT
user_id,
1 AS use_demo,
time AS use_demo_time
FROM events_table
WHERE
user_id = e1.user_id AND
event_type IN (1, 2)
ORDER BY time
) e2 ON true LEFT JOIN LATERAL (
SELECT
user_id,
1 AS enter_credit_card,
time AS enter_credit_card_time
FROM events_table
WHERE
user_id = e2.user_id AND
event_type IN (2, 3)
ORDER BY time
) e3 ON true LEFT JOIN LATERAL (
SELECT
1 AS submit_card_info,
user_id,
time AS enter_credit_card_time
FROM events_table
WHERE
value_2 = e3.user_id AND
event_type IN (3, 4)
ORDER BY time
) e4 ON true LEFT JOIN LATERAL (
SELECT
1 AS see_bought_screen
FROM events_table
WHERE
user_id = e4.user_id AND
event_type IN (5, 6)
ORDER BY time
) e5 ON true
group by e1.user_id
HAVING sum(submit_card_info) > 0
);
-- left leaf query does not return partition key
SELECT
user_id
FROM
users_table
WHERE
user_id IN
(
SELECT
user_id
FROM (
SELECT
subquery_1.user_id, count_pay
FROM
(
(SELECT
2 * users_table.user_id as user_id,
'action=>1' AS event,
events_table.time
FROM
users_table,
events_table
WHERE
users_table.user_id = events_table.user_id AND
users_table.user_id >= 1 AND
users_table.user_id <= 3 AND
events_table.event_type > 1 AND events_table.event_type < 3
)
UNION
(SELECT
users_table.user_id,
'action=>2' AS event,
events_table.time
FROM
users_table,
events_table
WHERE
users_table.user_id = events_table.user_id AND
users_table.user_id >= 1 AND
users_table.user_id <= 3 AND
events_table.event_type > 2 AND events_table.event_type < 4
)
) AS subquery_1
LEFT JOIN
(SELECT
user_id,
COUNT(*) AS count_pay
FROM
users_table
WHERE
user_id >= 1 AND
user_id <= 3 AND
users_table.value_1 > 3 AND users_table.value_1 < 5
GROUP BY
user_id
HAVING
COUNT(*) > 1) AS subquery_2
ON
subquery_1.user_id = subquery_2.user_id
GROUP BY
subquery_1.user_id,
count_pay) AS subquery_top
GROUP BY
count_pay, user_id
)
GROUP BY user_id
HAVING count(*) > 1 AND sum(value_2) > 29
ORDER BY 1;
-- NOT EXISTS query has non-equi join
SELECT user_id, array_length(events_table, 1)
FROM (
SELECT user_id, array_agg(event ORDER BY time) AS events_table
FROM (
SELECT
u.user_id, e.event_type::text AS event, e.time
FROM
users_table AS u,
events_table AS e
WHERE u.user_id = e.user_id AND
u.user_id IN
(
SELECT
user_id
FROM
users_table
WHERE value_2 >= 5
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 1 AND event_type <= 3 AND value_3 > 1 AND user_id = users_table.user_id)
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 3 AND event_type <= 4 AND value_3 > 1 AND user_id != users_table.user_id)
)
) t
GROUP BY user_id
) q
ORDER BY 2 DESC, 1;
-- subquery in where clause doesn't have a relation, but is constant
SELECT
user_id
FROM
users_table
WHERE
value_2 >
(SELECT 1)
ORDER BY 1 ASC
LIMIT 2;
-- subquery in where clause has a volatile function and no relation
-- thus we recursively plan it
SELECT
user_id
FROM
users_table
WHERE
value_2 >
(SELECT random()) AND user_id < 0
ORDER BY 1 ASC
LIMIT 2;
-- OFFSET is not supported in the subquey
SELECT
user_id
FROM
users_table
WHERE
value_2 >
(SELECT
max(value_2)
FROM
events_table
WHERE
users_table.user_id = events_table.user_id AND event_type = 2
GROUP BY
user_id
OFFSET 3
);
-- we can detect unsupported subqueries even if they appear
-- in WHERE subquery -> FROM subquery -> WHERE subquery
-- but we can recursively plan that anyway
SELECT DISTINCT user_id
FROM users_table
WHERE user_id
IN (SELECT
f_inner.user_id
FROM
(
SELECT
e1.user_id
FROM
users_table u1, events_table e1
WHERE
e1.user_id = u1.user_id
) as f_inner,
(
SELECT
e1.user_id
FROM
users_table u1, events_table e1
WHERE
e1.user_id = u1.user_id
AND e1.user_id IN (SELECT user_id FROM users_table ORDER BY user_id LIMIT 3)
) as f_outer
WHERE f_inner.user_id = f_outer.user_id
) ORDER BY 1 LIMIT 3;
-- semi join is not on the partition key for the third subquery, and recursively planned
SET client_min_messages TO DEBUG1;
SELECT user_id
FROM users_table
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 1 AND value_1 <= 2)
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 3 AND value_1 <= 4)
AND value_2 IN (SELECT user_id FROM users_table WHERE value_1 >= 5 AND value_1 <= 6) ORDER BY 1 DESC LIMIT 3;
RESET client_min_messages;
CREATE FUNCTION test_join_function(integer, integer) RETURNS bool
AS 'select $1 > $2;'
LANGUAGE SQL
IMMUTABLE
RETURNS NULL ON NULL INPUT;
-- we disallow JOINs via functions
SELECT user_id, value_2 FROM users_table WHERE
value_1 = 1
AND value_2 >= 2
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=1 AND value_3 > 1 AND test_join_function(events_table.user_id, users_table.user_id))
ORDER BY 1 DESC, 2 DESC
LIMIT 3;
DROP FUNCTION test_join_function(int,int);