Add some new tests for complex correlated subqueries in WHERE

pull/4385/head
Marco Slot 2020-12-04 13:23:54 +01:00
parent 707a6554b1
commit 23dccd8941
3 changed files with 319 additions and 2 deletions

View File

@ -282,8 +282,12 @@ SELECT (SELECT value_2 FROM view_1 WHERE user_id = e.user_id GROUP BY value_2)
FROM events_table e
GROUP BY 1
ORDER BY 1 LIMIT 3;
ERROR: cannot push down this subquery
DETAIL: Group by list without partition column is currently unsupported when a subquery references a column from another query
value_2
---------------------------------------------------------------------
3
(2 rows)
-- without view in the outer query FROM
SELECT (SELECT value_2 FROM view_1 WHERE user_id = e.user_id GROUP BY user_id, value_2)
FROM view_1 e

View File

@ -788,6 +788,185 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS c
10
(1 row)
-- basic NOT IN correlated subquery
SELECT
count(*)
FROM
events_table e
WHERE
value_2 NOT IN (SELECT value_2 FROM users_table WHERE user_id = e.user_id);
count
---------------------------------------------------------------------
7
(1 row)
-- correlated subquery with limit
SELECT
count(*)
FROM
events_table e
WHERE
value_2 IN (SELECT value_2 FROM users_table WHERE user_id = e.user_id ORDER BY value_2 LIMIT 1);
count
---------------------------------------------------------------------
10
(1 row)
-- correlated subquery with distinct
SELECT
count(*)
FROM
events_table e
WHERE
value_2 IN (SELECT DISTINCT (value_3) FROM users_table WHERE user_id = e.user_id);
count
---------------------------------------------------------------------
90
(1 row)
-- correlated subquery with aggregate
SELECT
count(*)
FROM
events_table e
WHERE
value_2 = (SELECT max(value_2) FROM users_table WHERE user_id = e.user_id);
count
---------------------------------------------------------------------
11
(1 row)
-- correlated subquery with window function
SELECT
count(*)
FROM
events_table e
WHERE
value_2 IN (SELECT row_number() OVER () FROM users_table WHERE user_id = e.user_id);
count
---------------------------------------------------------------------
94
(1 row)
-- correlated subquery with group by
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (SELECT min(value_3) FROM users_table WHERE user_id = e.user_id GROUP BY value_2);
count
---------------------------------------------------------------------
72
(1 row)
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (SELECT min(value_3) FROM users_table WHERE user_id = e.user_id GROUP BY value_2);
count
---------------------------------------------------------------------
72
(1 row)
-- correlated subquery with group by
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (SELECT min(value_3) v FROM users_table WHERE user_id = e.user_id GROUP BY e.value_2);
count
---------------------------------------------------------------------
10
(1 row)
-- correlated subquery with having
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (SELECT min(value_3) v FROM users_table WHERE user_id = e.user_id GROUP BY e.value_2 HAVING min(value_3) > (SELECT 1));
count
---------------------------------------------------------------------
0
(1 row)
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (SELECT min(value_3) v FROM users_table WHERE user_id = e.user_id GROUP BY e.value_2 HAVING min(value_3) > (SELECT e.value_3));
ERROR: Subqueries in HAVING cannot refer to outer query
-- nested correlated subquery
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (
SELECT min(r.value_3) v FROM users_reference_table r JOIN (SELECT * FROM users_table WHERE user_id = e.user_id) u USING (user_id)
WHERE u.value_2 > 3
GROUP BY e.value_2 HAVING min(r.value_3) > e.value_3);
count
---------------------------------------------------------------------
0
(1 row)
-- not co-located correlated subquery
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (
SELECT min(r.value_3) v FROM users_reference_table r JOIN (SELECT * FROM users_table WHERE value_2 = e.user_id) u USING (user_id)
WHERE u.value_2 > 3
GROUP BY e.value_2 HAVING min(r.value_3) > e.value_3);
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- cartesian correlated subquery
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (
SELECT min(r.value_3) v FROM users_reference_table r JOIN users_table u USING (user_id)
WHERE u.value_2 > 3
GROUP BY e.value_2 HAVING min(r.value_3) > e.value_3);
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- even more subtle cartesian correlated subquery
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (
SELECT min(r.value_3) v FROM users_reference_table r JOIN users_table u USING (user_id)
WHERE u.value_2 > 3
GROUP BY u.value_2 HAVING min(r.value_3) > e.value_3);
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- not a correlated subquery, uses recursive planning
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (
SELECT min(r.value_3) v FROM users_reference_table r JOIN users_table u USING (user_id)
WHERE u.value_2 > 3
GROUP BY r.value_2 HAVING min(r.value_3) > 0);
DEBUG: generating subplan XXX_1 for subquery SELECT min(r.value_3) AS v FROM (public.users_reference_table r JOIN public.users_table u USING (user_id)) WHERE (u.value_2 OPERATOR(pg_catalog.>) 3) GROUP BY r.value_2 HAVING (min(r.value_3) OPERATOR(pg_catalog.>) (0)::double precision)
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM public.events_table e WHERE (value_3 OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.v FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(v double precision)))
count
---------------------------------------------------------------------
24
(1 row)
SET client_min_messages TO DEFAULT;
DROP TABLE local_table;
DROP SCHEMA subquery_in_where CASCADE;

View File

@ -579,6 +579,140 @@ IN
FROM
local_table);
-- basic NOT IN correlated subquery
SELECT
count(*)
FROM
events_table e
WHERE
value_2 NOT IN (SELECT value_2 FROM users_table WHERE user_id = e.user_id);
-- correlated subquery with limit
SELECT
count(*)
FROM
events_table e
WHERE
value_2 IN (SELECT value_2 FROM users_table WHERE user_id = e.user_id ORDER BY value_2 LIMIT 1);
-- correlated subquery with distinct
SELECT
count(*)
FROM
events_table e
WHERE
value_2 IN (SELECT DISTINCT (value_3) FROM users_table WHERE user_id = e.user_id);
-- correlated subquery with aggregate
SELECT
count(*)
FROM
events_table e
WHERE
value_2 = (SELECT max(value_2) FROM users_table WHERE user_id = e.user_id);
-- correlated subquery with window function
SELECT
count(*)
FROM
events_table e
WHERE
value_2 IN (SELECT row_number() OVER () FROM users_table WHERE user_id = e.user_id);
-- correlated subquery with group by
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (SELECT min(value_3) FROM users_table WHERE user_id = e.user_id GROUP BY value_2);
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (SELECT min(value_3) FROM users_table WHERE user_id = e.user_id GROUP BY value_2);
-- correlated subquery with group by
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (SELECT min(value_3) v FROM users_table WHERE user_id = e.user_id GROUP BY e.value_2);
-- correlated subquery with having
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (SELECT min(value_3) v FROM users_table WHERE user_id = e.user_id GROUP BY e.value_2 HAVING min(value_3) > (SELECT 1));
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (SELECT min(value_3) v FROM users_table WHERE user_id = e.user_id GROUP BY e.value_2 HAVING min(value_3) > (SELECT e.value_3));
-- nested correlated subquery
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (
SELECT min(r.value_3) v FROM users_reference_table r JOIN (SELECT * FROM users_table WHERE user_id = e.user_id) u USING (user_id)
WHERE u.value_2 > 3
GROUP BY e.value_2 HAVING min(r.value_3) > e.value_3);
-- not co-located correlated subquery
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (
SELECT min(r.value_3) v FROM users_reference_table r JOIN (SELECT * FROM users_table WHERE value_2 = e.user_id) u USING (user_id)
WHERE u.value_2 > 3
GROUP BY e.value_2 HAVING min(r.value_3) > e.value_3);
-- cartesian correlated subquery
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (
SELECT min(r.value_3) v FROM users_reference_table r JOIN users_table u USING (user_id)
WHERE u.value_2 > 3
GROUP BY e.value_2 HAVING min(r.value_3) > e.value_3);
-- even more subtle cartesian correlated subquery
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (
SELECT min(r.value_3) v FROM users_reference_table r JOIN users_table u USING (user_id)
WHERE u.value_2 > 3
GROUP BY u.value_2 HAVING min(r.value_3) > e.value_3);
-- not a correlated subquery, uses recursive planning
SELECT
count(*)
FROM
events_table e
WHERE
value_3 IN (
SELECT min(r.value_3) v FROM users_reference_table r JOIN users_table u USING (user_id)
WHERE u.value_2 > 3
GROUP BY r.value_2 HAVING min(r.value_3) > 0);
SET client_min_messages TO DEFAULT;
DROP TABLE local_table;