mirror of https://github.com/citusdata/citus.git
574 lines
13 KiB
SQL
574 lines
13 KiB
SQL
--
|
|
-- multi subquery in where queries aims to expand existing subquery pushdown
|
|
-- regression tests to cover more cases specifically subqueries in WHERE clause
|
|
-- the tables that are used depends to multi_insert_select_behavioral_analytics_create_table.sql
|
|
--
|
|
-- We don't need shard id sequence here, so commented out to prevent conflicts with concurrent tests
|
|
|
|
-- subqueries in WHERE with greater operator
|
|
SELECT
|
|
user_id
|
|
FROM
|
|
users_table
|
|
WHERE
|
|
value_2 >
|
|
(SELECT
|
|
max(value_2)
|
|
FROM
|
|
events_table
|
|
WHERE
|
|
users_table.user_id = events_table.user_id AND event_type = 1
|
|
GROUP BY
|
|
user_id
|
|
)
|
|
GROUP BY user_id
|
|
HAVING count(*) > 2
|
|
ORDER BY user_id
|
|
LIMIT 5;
|
|
|
|
-- same query with one additional join on non distribution column
|
|
SELECT
|
|
user_id
|
|
FROM
|
|
users_table
|
|
WHERE
|
|
value_2 >
|
|
(SELECT
|
|
max(value_2)
|
|
FROM
|
|
events_table
|
|
WHERE
|
|
users_table.user_id = events_table.user_id AND event_type = 1 AND
|
|
users_table.time > events_table.time
|
|
GROUP BY
|
|
user_id
|
|
)
|
|
GROUP BY user_id
|
|
HAVING count(*) > 1
|
|
ORDER BY user_id
|
|
LIMIT 5;
|
|
|
|
-- the other way around is not supported
|
|
SELECT
|
|
user_id
|
|
FROM
|
|
users_table
|
|
WHERE
|
|
value_2 >
|
|
(SELECT
|
|
max(value_2)
|
|
FROM
|
|
events_table
|
|
WHERE
|
|
users_table.user_id > events_table.user_id AND event_type = 1 AND
|
|
users_table.time = events_table.time
|
|
GROUP BY
|
|
user_id
|
|
)
|
|
GROUP BY user_id
|
|
HAVING count(*) > 1
|
|
ORDER BY user_id
|
|
LIMIT 5;
|
|
|
|
-- subqueries in where with ALL operator
|
|
SELECT
|
|
user_id
|
|
FROM
|
|
users_table
|
|
WHERE
|
|
value_2 > 1 AND
|
|
value_2 < ALL (SELECT avg(value_3) FROM events_table WHERE users_table.user_id = events_table.user_id GROUP BY user_id)
|
|
GROUP BY
|
|
1
|
|
ORDER BY
|
|
1 DESC
|
|
LIMIT 3;
|
|
|
|
-- IN operator on non-partition key
|
|
SELECT
|
|
user_id
|
|
FROM
|
|
events_table as e1
|
|
WHERE
|
|
event_type IN
|
|
(SELECT
|
|
event_type
|
|
FROM
|
|
events_table as e2
|
|
WHERE
|
|
value_2 = 1 AND value_3 > 3 AND
|
|
e1.user_id = e2.user_id
|
|
)
|
|
ORDER BY 1;
|
|
|
|
-- NOT IN on non-partition key
|
|
SELECT
|
|
user_id
|
|
FROM
|
|
events_table as e1
|
|
WHERE
|
|
event_type NOT IN
|
|
(SELECT
|
|
event_type
|
|
FROM
|
|
events_table as e2
|
|
WHERE
|
|
value_2 = 1 AND value_3 > 3 AND
|
|
e1.user_id = e2.user_id
|
|
)
|
|
GROUP BY 1
|
|
|
|
HAVING count(*) > 2
|
|
ORDER BY 1;
|
|
|
|
-- non-correlated query with =ANY on partition keys
|
|
SELECT
|
|
user_id, count(*)
|
|
FROM
|
|
users_table
|
|
WHERE
|
|
user_id =ANY(SELECT user_id FROM users_table WHERE value_1 >= 1 AND value_1 <= 2) GROUP BY 1 ORDER BY 2 DESC LIMIT 5;
|
|
|
|
-- users that appeared more than 118 times
|
|
SELECT
|
|
user_id
|
|
FROM
|
|
users_table
|
|
WHERE 2 <=
|
|
(SELECT
|
|
count(*)
|
|
FROM
|
|
events_table
|
|
WHERE
|
|
users_table.user_id = events_table.user_id
|
|
GROUP BY
|
|
user_id)
|
|
GROUP BY
|
|
user_id
|
|
ORDER BY
|
|
user_id;
|
|
|
|
|
|
-- the following query doesn't have a meaningful result
|
|
-- but it is a valid query with an arbitrary subquery in
|
|
-- WHERE clause
|
|
SELECT user_id, value_2 FROM users_table WHERE
|
|
value_1 > 1 AND value_1 < 3
|
|
AND value_2 >= 1
|
|
AND user_id IN
|
|
(
|
|
SELECT
|
|
e1.user_id
|
|
FROM (
|
|
-- Get the first time each user viewed the homepage.
|
|
SELECT
|
|
user_id,
|
|
1 AS view_homepage,
|
|
min(time) AS view_homepage_time
|
|
FROM events_table
|
|
WHERE
|
|
event_type IN (0, 1)
|
|
GROUP BY user_id
|
|
) e1 LEFT JOIN LATERAL (
|
|
SELECT
|
|
user_id,
|
|
1 AS use_demo,
|
|
time AS use_demo_time
|
|
FROM events_table
|
|
WHERE
|
|
user_id = e1.user_id AND
|
|
event_type IN (1, 2)
|
|
ORDER BY time
|
|
) e2 ON true LEFT JOIN LATERAL (
|
|
SELECT
|
|
user_id,
|
|
1 AS enter_credit_card,
|
|
time AS enter_credit_card_time
|
|
FROM events_table
|
|
WHERE
|
|
user_id = e2.user_id AND
|
|
event_type IN (2, 3)
|
|
ORDER BY time
|
|
) e3 ON true LEFT JOIN LATERAL (
|
|
SELECT
|
|
1 AS submit_card_info,
|
|
user_id,
|
|
time AS enter_credit_card_time
|
|
FROM events_table
|
|
WHERE
|
|
user_id = e3.user_id AND
|
|
event_type IN (3, 4)
|
|
ORDER BY time
|
|
) e4 ON true LEFT JOIN LATERAL (
|
|
SELECT
|
|
1 AS see_bought_screen
|
|
FROM events_table
|
|
WHERE
|
|
user_id = e4.user_id AND
|
|
event_type IN (5, 6)
|
|
ORDER BY time
|
|
) e5 ON true
|
|
group by e1.user_id
|
|
HAVING sum(submit_card_info) > 0
|
|
)
|
|
ORDER BY 1, 2;
|
|
|
|
|
|
-- similar to the above query
|
|
-- the following query doesn't have a meaningful result
|
|
-- but it is a valid query with an arbitrary subquery in
|
|
-- WHERE clause
|
|
SELECT
|
|
user_id
|
|
FROM
|
|
users_table
|
|
WHERE
|
|
user_id IN
|
|
(
|
|
SELECT
|
|
user_id
|
|
FROM (
|
|
SELECT
|
|
subquery_1.user_id, count_pay
|
|
FROM
|
|
(
|
|
(SELECT
|
|
users_table.user_id,
|
|
'action=>1' AS event,
|
|
events_table.time
|
|
FROM
|
|
users_table,
|
|
events_table
|
|
WHERE
|
|
users_table.user_id = events_table.user_id AND
|
|
users_table.user_id >= 1 AND
|
|
users_table.user_id <= 3 AND
|
|
events_table.event_type > 1 AND events_table.event_type < 3
|
|
)
|
|
UNION
|
|
(SELECT
|
|
users_table.user_id,
|
|
'action=>2' AS event,
|
|
events_table.time
|
|
FROM
|
|
users_table,
|
|
events_table
|
|
WHERE
|
|
users_table.user_id = events_table.user_id AND
|
|
users_table.user_id >= 1 AND
|
|
users_table.user_id <= 3 AND
|
|
events_table.event_type > 2 AND events_table.event_type < 4
|
|
)
|
|
) AS subquery_1
|
|
LEFT JOIN
|
|
(SELECT
|
|
user_id,
|
|
COUNT(*) AS count_pay
|
|
FROM
|
|
users_table
|
|
WHERE
|
|
user_id >= 1 AND
|
|
user_id <= 3 AND
|
|
users_table.value_1 > 3 AND users_table.value_1 < 5
|
|
GROUP BY
|
|
user_id
|
|
HAVING
|
|
COUNT(*) > 1) AS subquery_2
|
|
ON
|
|
subquery_1.user_id = subquery_2.user_id
|
|
GROUP BY
|
|
subquery_1.user_id,
|
|
count_pay) AS subquery_top
|
|
GROUP BY
|
|
count_pay, user_id
|
|
)
|
|
GROUP BY user_id
|
|
HAVING count(*) > 1 AND sum(value_2) > 29
|
|
ORDER BY 1;
|
|
|
|
|
|
-- the following query doesn't have a meaningful result
|
|
-- but it is a valid query with an arbitrary subquery in
|
|
-- FROM clause involving a complex query in WHERE clause
|
|
SELECT user_id, array_length(events_table, 1)
|
|
FROM (
|
|
SELECT user_id, array_agg(event ORDER BY time) AS events_table
|
|
FROM (
|
|
SELECT
|
|
u.user_id, e.event_type::text AS event, e.time
|
|
FROM
|
|
users_table AS u,
|
|
events_table AS e
|
|
WHERE u.user_id = e.user_id AND
|
|
u.user_id IN
|
|
(
|
|
SELECT
|
|
user_id
|
|
FROM
|
|
users_table
|
|
WHERE value_2 >= 1
|
|
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 1 AND event_type <= 3 AND value_3 > 1 AND user_id = users_table.user_id)
|
|
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 3 AND event_type <= 4 AND value_3 > 1 AND user_id = users_table.user_id)
|
|
)
|
|
) t
|
|
GROUP BY user_id
|
|
) q
|
|
ORDER BY 2 DESC, 1;
|
|
|
|
--
|
|
-- below tests only aims for cases where all relations
|
|
-- are not joined on partition key
|
|
--
|
|
|
|
-- e4 is not joined on the partition key
|
|
SELECT user_id, value_2 FROM users_table WHERE
|
|
value_1 > 1 AND value_1 < 2
|
|
AND value_2 >= 1
|
|
AND user_id IN
|
|
(
|
|
SELECT
|
|
e1.user_id
|
|
FROM (
|
|
-- Get the first time each user viewed the homepage.
|
|
SELECT
|
|
user_id,
|
|
1 AS view_homepage,
|
|
min(time) AS view_homepage_time
|
|
FROM events_table
|
|
WHERE
|
|
event_type IN (0, 1)
|
|
GROUP BY user_id
|
|
) e1 LEFT JOIN LATERAL (
|
|
SELECT
|
|
user_id,
|
|
1 AS use_demo,
|
|
time AS use_demo_time
|
|
FROM events_table
|
|
WHERE
|
|
user_id = e1.user_id AND
|
|
event_type IN (1, 2)
|
|
ORDER BY time
|
|
) e2 ON true LEFT JOIN LATERAL (
|
|
SELECT
|
|
user_id,
|
|
1 AS enter_credit_card,
|
|
time AS enter_credit_card_time
|
|
FROM events_table
|
|
WHERE
|
|
user_id = e2.user_id AND
|
|
event_type IN (2, 3)
|
|
ORDER BY time
|
|
) e3 ON true LEFT JOIN LATERAL (
|
|
SELECT
|
|
1 AS submit_card_info,
|
|
user_id,
|
|
time AS enter_credit_card_time
|
|
FROM events_table
|
|
WHERE
|
|
value_2 = e3.user_id AND
|
|
event_type IN (3, 4)
|
|
ORDER BY time
|
|
) e4 ON true LEFT JOIN LATERAL (
|
|
SELECT
|
|
1 AS see_bought_screen
|
|
FROM events_table
|
|
WHERE
|
|
user_id = e4.user_id AND
|
|
event_type IN (5, 6)
|
|
ORDER BY time
|
|
) e5 ON true
|
|
group by e1.user_id
|
|
HAVING sum(submit_card_info) > 0
|
|
);
|
|
|
|
|
|
-- left leaf query does not return partition key
|
|
SELECT
|
|
user_id
|
|
FROM
|
|
users_table
|
|
WHERE
|
|
user_id IN
|
|
(
|
|
SELECT
|
|
user_id
|
|
FROM (
|
|
SELECT
|
|
subquery_1.user_id, count_pay
|
|
FROM
|
|
(
|
|
(SELECT
|
|
2 * users_table.user_id as user_id,
|
|
'action=>1' AS event,
|
|
events_table.time
|
|
FROM
|
|
users_table,
|
|
events_table
|
|
WHERE
|
|
users_table.user_id = events_table.user_id AND
|
|
users_table.user_id >= 1 AND
|
|
users_table.user_id <= 3 AND
|
|
events_table.event_type > 1 AND events_table.event_type < 3
|
|
)
|
|
UNION
|
|
(SELECT
|
|
users_table.user_id,
|
|
'action=>2' AS event,
|
|
events_table.time
|
|
FROM
|
|
users_table,
|
|
events_table
|
|
WHERE
|
|
users_table.user_id = events_table.user_id AND
|
|
users_table.user_id >= 1 AND
|
|
users_table.user_id <= 3 AND
|
|
events_table.event_type > 2 AND events_table.event_type < 4
|
|
)
|
|
) AS subquery_1
|
|
LEFT JOIN
|
|
(SELECT
|
|
user_id,
|
|
COUNT(*) AS count_pay
|
|
FROM
|
|
users_table
|
|
WHERE
|
|
user_id >= 1 AND
|
|
user_id <= 3 AND
|
|
users_table.value_1 > 3 AND users_table.value_1 < 5
|
|
GROUP BY
|
|
user_id
|
|
HAVING
|
|
COUNT(*) > 1) AS subquery_2
|
|
ON
|
|
subquery_1.user_id = subquery_2.user_id
|
|
GROUP BY
|
|
subquery_1.user_id,
|
|
count_pay) AS subquery_top
|
|
GROUP BY
|
|
count_pay, user_id
|
|
)
|
|
GROUP BY user_id
|
|
HAVING count(*) > 1 AND sum(value_2) > 29
|
|
ORDER BY 1;
|
|
|
|
-- NOT EXISTS query has non-equi join
|
|
SELECT user_id, array_length(events_table, 1)
|
|
FROM (
|
|
SELECT user_id, array_agg(event ORDER BY time) AS events_table
|
|
FROM (
|
|
SELECT
|
|
u.user_id, e.event_type::text AS event, e.time
|
|
FROM
|
|
users_table AS u,
|
|
events_table AS e
|
|
WHERE u.user_id = e.user_id AND
|
|
u.user_id IN
|
|
(
|
|
SELECT
|
|
user_id
|
|
FROM
|
|
users_table
|
|
WHERE value_2 >= 5
|
|
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 1 AND event_type <= 3 AND value_3 > 1 AND user_id = users_table.user_id)
|
|
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 3 AND event_type <= 4 AND value_3 > 1 AND user_id != users_table.user_id)
|
|
)
|
|
) t
|
|
GROUP BY user_id
|
|
) q
|
|
ORDER BY 2 DESC, 1;
|
|
|
|
-- subquery in where clause doesn't have a relation, but is constant
|
|
SELECT
|
|
user_id
|
|
FROM
|
|
users_table
|
|
WHERE
|
|
value_2 >
|
|
(SELECT 1)
|
|
ORDER BY 1 ASC
|
|
LIMIT 2;
|
|
|
|
-- subquery in where clause has a volatile function and no relation
|
|
-- thus we recursively plan it
|
|
SELECT
|
|
user_id
|
|
FROM
|
|
users_table
|
|
WHERE
|
|
value_2 >
|
|
(SELECT random()) AND user_id < 0
|
|
ORDER BY 1 ASC
|
|
LIMIT 2;
|
|
|
|
-- OFFSET is not supported in the subquey
|
|
SELECT
|
|
user_id
|
|
FROM
|
|
users_table
|
|
WHERE
|
|
value_2 >
|
|
(SELECT
|
|
max(value_2)
|
|
FROM
|
|
events_table
|
|
WHERE
|
|
users_table.user_id = events_table.user_id AND event_type = 2
|
|
GROUP BY
|
|
user_id
|
|
OFFSET 3
|
|
);
|
|
|
|
-- we can detect unsupported subqueries even if they appear
|
|
-- in WHERE subquery -> FROM subquery -> WHERE subquery
|
|
-- but we can recursively plan that anyway
|
|
SELECT DISTINCT user_id
|
|
FROM users_table
|
|
WHERE user_id
|
|
IN (SELECT
|
|
f_inner.user_id
|
|
FROM
|
|
(
|
|
SELECT
|
|
e1.user_id
|
|
FROM
|
|
users_table u1, events_table e1
|
|
WHERE
|
|
e1.user_id = u1.user_id
|
|
) as f_inner,
|
|
(
|
|
SELECT
|
|
e1.user_id
|
|
FROM
|
|
users_table u1, events_table e1
|
|
WHERE
|
|
e1.user_id = u1.user_id
|
|
AND e1.user_id IN (SELECT user_id FROM users_table ORDER BY user_id LIMIT 3)
|
|
) as f_outer
|
|
WHERE f_inner.user_id = f_outer.user_id
|
|
) ORDER BY 1 LIMIT 3;
|
|
|
|
-- semi join is not on the partition key for the third subquery, and recursively planned
|
|
SET client_min_messages TO DEBUG1;
|
|
SELECT user_id
|
|
FROM users_table
|
|
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 1 AND value_1 <= 2)
|
|
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 3 AND value_1 <= 4)
|
|
AND value_2 IN (SELECT user_id FROM users_table WHERE value_1 >= 5 AND value_1 <= 6) ORDER BY 1 DESC LIMIT 3;
|
|
RESET client_min_messages;
|
|
|
|
CREATE FUNCTION test_join_function(integer, integer) RETURNS bool
|
|
AS 'select $1 > $2;'
|
|
LANGUAGE SQL
|
|
IMMUTABLE
|
|
RETURNS NULL ON NULL INPUT;
|
|
|
|
-- we disallow JOINs via functions
|
|
SELECT user_id, value_2 FROM users_table WHERE
|
|
value_1 = 1
|
|
AND value_2 >= 2
|
|
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=1 AND value_3 > 1 AND test_join_function(events_table.user_id, users_table.user_id))
|
|
ORDER BY 1 DESC, 2 DESC
|
|
LIMIT 3;
|
|
|
|
DROP FUNCTION test_join_function(int,int);
|