mirror of https://github.com/citusdata/citus.git
1151 lines
45 KiB
Plaintext
1151 lines
45 KiB
Plaintext
-- ===================================================================
|
|
-- test recursive planning functionality with subqueries in WHERE
|
|
-- ===================================================================
|
|
CREATE SCHEMA subquery_in_where;
|
|
SET search_path TO subquery_in_where, public;
|
|
SET client_min_messages TO DEBUG1;
|
|
--CTEs can be used as a recurring tuple with subqueries in WHERE
|
|
WITH event_id
|
|
AS MATERIALIZED (SELECT user_id AS events_user_id,
|
|
time AS events_time,
|
|
event_type
|
|
FROM events_table)
|
|
SELECT Count(*)
|
|
FROM event_id
|
|
WHERE events_user_id IN (SELECT user_id
|
|
FROM users_table);
|
|
DEBUG: generating subplan XXX_1 for CTE event_id: SELECT user_id AS events_user_id, "time" AS events_time, event_type FROM public.events_table
|
|
DEBUG: generating subplan XXX_2 for subquery SELECT user_id FROM public.users_table
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.events_user_id, intermediate_result.events_time, intermediate_result.event_type FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(events_user_id integer, events_time timestamp without time zone, event_type integer)) event_id WHERE (events_user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)))
|
|
count
|
|
---------------------------------------------------------------------
|
|
101
|
|
(1 row)
|
|
|
|
--Correlated subqueries can not be used in WHERE clause
|
|
WITH event_id
|
|
AS(SELECT user_id AS events_user_id,
|
|
time AS events_time,
|
|
event_type
|
|
FROM events_table)
|
|
SELECT Count(*)
|
|
FROM event_id
|
|
WHERE events_user_id IN (SELECT user_id
|
|
FROM users_table
|
|
WHERE users_table.time = events_time);
|
|
DEBUG: CTE event_id is going to be inlined via distributed planning
|
|
DEBUG: generating subplan XXX_1 for CTE event_id: SELECT user_id AS events_user_id, "time" AS events_time, event_type FROM public.events_table
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.events_user_id, intermediate_result.events_time, intermediate_result.event_type FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(events_user_id integer, events_time timestamp without time zone, event_type integer)) event_id WHERE (events_user_id OPERATOR(pg_catalog.=) ANY (SELECT users_table.user_id FROM public.users_table WHERE (users_table."time" OPERATOR(pg_catalog.=) event_id.events_time)))
|
|
ERROR: correlated subqueries are not supported when the FROM clause contains a CTE or subquery
|
|
-- Recurring tuples as empty join tree
|
|
SELECT *
|
|
FROM (SELECT 1 AS id, 2 AS value_1, 3 AS value_3
|
|
UNION ALL SELECT 2 as id, 3 as value_1, 4 as value_3) AS tt1
|
|
WHERE id IN (SELECT user_id
|
|
FROM events_table);
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT 1 AS id, 2 AS value_1, 3 AS value_3 UNION ALL SELECT 2 AS id, 3 AS value_1, 4 AS value_3
|
|
DEBUG: generating subplan XXX_2 for subquery SELECT user_id FROM public.events_table
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT id, value_1, value_3 FROM (SELECT intermediate_result.id, intermediate_result.value_1, intermediate_result.value_3 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, value_1 integer, value_3 integer)) tt1 WHERE (id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)))
|
|
id | value_1 | value_3
|
|
---------------------------------------------------------------------
|
|
1 | 2 | 3
|
|
2 | 3 | 4
|
|
(2 rows)
|
|
|
|
-- Recurring tuples in from clause as CTE and SET operation in WHERE clause
|
|
SELECT Count(*)
|
|
FROM (WITH event_id AS
|
|
(SELECT user_id AS events_user_id, time AS events_time, event_type
|
|
FROM events_table)
|
|
SELECT events_user_id, events_time, event_type
|
|
FROM event_id
|
|
ORDER BY 1,2,3
|
|
LIMIT 10) AS sub_table
|
|
WHERE events_user_id IN (
|
|
(SELECT user_id
|
|
FROM users_table
|
|
ORDER BY 1
|
|
LIMIT 10)
|
|
UNION ALL
|
|
(SELECT value_1
|
|
FROM users_table
|
|
ORDER BY 1
|
|
limit 10));
|
|
DEBUG: CTE event_id is going to be inlined via distributed planning
|
|
DEBUG: push down of limit count: 10
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM public.users_table ORDER BY user_id LIMIT 10
|
|
DEBUG: push down of limit count: 10
|
|
DEBUG: generating subplan XXX_2 for subquery SELECT value_1 FROM public.users_table ORDER BY value_1 LIMIT 10
|
|
DEBUG: generating subplan XXX_3 for subquery SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) UNION ALL SELECT intermediate_result.value_1 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer)
|
|
DEBUG: push down of limit count: 10
|
|
DEBUG: generating subplan XXX_4 for subquery SELECT events_user_id, events_time, event_type FROM (SELECT events_table.user_id AS events_user_id, events_table."time" AS events_time, events_table.event_type FROM public.events_table) event_id ORDER BY events_user_id, events_time, event_type LIMIT 10
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.events_user_id, intermediate_result.events_time, intermediate_result.event_type FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(events_user_id integer, events_time timestamp without time zone, event_type integer)) sub_table WHERE (events_user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)))
|
|
count
|
|
---------------------------------------------------------------------
|
|
10
|
|
(1 row)
|
|
|
|
-- Recurring tuples in from clause as SET operation on recursively plannable
|
|
-- queries and CTE in WHERE clause
|
|
SELECT
|
|
*
|
|
FROM
|
|
(
|
|
(SELECT
|
|
user_id
|
|
FROM
|
|
users_table
|
|
ORDER BY
|
|
user_id ASC
|
|
LIMIT
|
|
10
|
|
)
|
|
UNION ALL
|
|
(SELECT
|
|
value_1
|
|
FROM
|
|
users_table
|
|
ORDER BY
|
|
value_1 ASC
|
|
LIMIT
|
|
10
|
|
)
|
|
) as SUB_TABLE
|
|
WHERE
|
|
user_id
|
|
IN
|
|
(
|
|
WITH event_id AS (
|
|
SELECT
|
|
user_id as events_user_id, time as events_time, event_type
|
|
FROM
|
|
events_table
|
|
)
|
|
SELECT
|
|
events_user_id
|
|
FROM
|
|
event_id
|
|
ORDER BY
|
|
events_user_id
|
|
LIMIT
|
|
10
|
|
);
|
|
DEBUG: CTE event_id is going to be inlined via distributed planning
|
|
DEBUG: push down of limit count: 10
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT events_user_id FROM (SELECT events_table.user_id AS events_user_id, events_table."time" AS events_time, events_table.event_type FROM public.events_table) event_id ORDER BY events_user_id LIMIT 10
|
|
DEBUG: push down of limit count: 10
|
|
DEBUG: generating subplan XXX_2 for subquery SELECT user_id FROM public.users_table ORDER BY user_id LIMIT 10
|
|
DEBUG: push down of limit count: 10
|
|
DEBUG: generating subplan XXX_3 for subquery SELECT value_1 FROM public.users_table ORDER BY value_1 LIMIT 10
|
|
DEBUG: generating subplan XXX_4 for subquery SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) UNION ALL SELECT intermediate_result.value_1 FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer)
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) sub_table WHERE (user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.events_user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(events_user_id integer)))
|
|
user_id
|
|
---------------------------------------------------------------------
|
|
1
|
|
1
|
|
1
|
|
1
|
|
1
|
|
1
|
|
1
|
|
(7 rows)
|
|
|
|
-- Complex target list in WHERE clause
|
|
SELECT
|
|
COUNT(*)
|
|
FROM
|
|
(SELECT
|
|
user_id as events_user_id, time as events_time, event_type
|
|
FROM
|
|
events_table
|
|
ORDER BY
|
|
1,2
|
|
LIMIT
|
|
10
|
|
) as SUB_TABLE
|
|
WHERE
|
|
events_user_id
|
|
<=ANY (
|
|
SELECT
|
|
max(abs(user_id * 1) + mod(user_id, 3)) as val_1
|
|
FROM
|
|
users_table
|
|
GROUP BY
|
|
user_id
|
|
);
|
|
DEBUG: push down of limit count: 10
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT user_id AS events_user_id, "time" AS events_time, event_type FROM public.events_table ORDER BY user_id, "time" LIMIT 10
|
|
DEBUG: generating subplan XXX_2 for subquery SELECT max((abs((user_id OPERATOR(pg_catalog.*) 1)) OPERATOR(pg_catalog.+) mod(user_id, 3))) AS val_1 FROM public.users_table GROUP BY user_id
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.events_user_id, intermediate_result.events_time, intermediate_result.event_type FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(events_user_id integer, events_time timestamp without time zone, event_type integer)) sub_table WHERE (events_user_id OPERATOR(pg_catalog.<=) ANY (SELECT intermediate_result.val_1 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(val_1 integer)))
|
|
count
|
|
---------------------------------------------------------------------
|
|
10
|
|
(1 row)
|
|
|
|
-- DISTINCT clause in WHERE
|
|
SELECT
|
|
COUNT(*)
|
|
FROM
|
|
(SELECT
|
|
user_id as events_user_id, time as events_time, event_type
|
|
FROM
|
|
events_table
|
|
LIMIT
|
|
10
|
|
) as SUB_TABLE
|
|
WHERE
|
|
events_user_id
|
|
IN (
|
|
SELECT
|
|
distinct user_id
|
|
FROM
|
|
users_table
|
|
GROUP BY
|
|
user_id
|
|
);
|
|
DEBUG: push down of limit count: 10
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT user_id AS events_user_id, "time" AS events_time, event_type FROM public.events_table LIMIT 10
|
|
DEBUG: generating subplan XXX_2 for subquery SELECT DISTINCT user_id FROM public.users_table GROUP BY user_id
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.events_user_id, intermediate_result.events_time, intermediate_result.event_type FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(events_user_id integer, events_time timestamp without time zone, event_type integer)) sub_table WHERE (events_user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)))
|
|
count
|
|
---------------------------------------------------------------------
|
|
10
|
|
(1 row)
|
|
|
|
-- AND in WHERE clause
|
|
SELECT
|
|
COUNT(*)
|
|
FROM
|
|
(SELECT
|
|
user_id as events_user_id, time as events_time, event_type
|
|
FROM
|
|
events_table
|
|
ORDER BY
|
|
1,2,3
|
|
LIMIT
|
|
10
|
|
) as SUB_TABLE
|
|
WHERE
|
|
events_user_id
|
|
>=ANY (
|
|
SELECT
|
|
min(user_id)
|
|
FROM
|
|
users_table
|
|
GROUP BY
|
|
user_id
|
|
)
|
|
AND
|
|
events_user_id
|
|
<=ANY (
|
|
SELECT
|
|
max(user_id)
|
|
FROM
|
|
users_table
|
|
GROUP BY
|
|
user_id
|
|
);
|
|
DEBUG: push down of limit count: 10
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT user_id AS events_user_id, "time" AS events_time, event_type FROM public.events_table ORDER BY user_id, "time", event_type LIMIT 10
|
|
DEBUG: generating subplan XXX_2 for subquery SELECT min(user_id) AS min FROM public.users_table GROUP BY user_id
|
|
DEBUG: generating subplan XXX_3 for subquery SELECT max(user_id) AS max FROM public.users_table GROUP BY user_id
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.events_user_id, intermediate_result.events_time, intermediate_result.event_type FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(events_user_id integer, events_time timestamp without time zone, event_type integer)) sub_table WHERE ((events_user_id OPERATOR(pg_catalog.>=) ANY (SELECT intermediate_result.min FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(min integer))) AND (events_user_id OPERATOR(pg_catalog.<=) ANY (SELECT intermediate_result.max FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(max integer))))
|
|
count
|
|
---------------------------------------------------------------------
|
|
10
|
|
(1 row)
|
|
|
|
-- AND in WHERE clause, part of the AND is pushdownable other is not
|
|
SELECT
|
|
COUNT(*)
|
|
FROM
|
|
(SELECT
|
|
user_id as events_user_id, time as events_time, event_type
|
|
FROM
|
|
events_table
|
|
ORDER BY
|
|
1,2,3
|
|
LIMIT
|
|
10
|
|
) as SUB_TABLE
|
|
WHERE
|
|
events_user_id
|
|
>=ANY (
|
|
SELECT
|
|
min(user_id)
|
|
FROM
|
|
users_table
|
|
GROUP BY
|
|
user_id
|
|
)
|
|
AND
|
|
events_user_id
|
|
<=ANY (
|
|
SELECT
|
|
max(value_2)
|
|
FROM
|
|
users_table
|
|
GROUP BY
|
|
user_id
|
|
);
|
|
DEBUG: push down of limit count: 10
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT user_id AS events_user_id, "time" AS events_time, event_type FROM public.events_table ORDER BY user_id, "time", event_type LIMIT 10
|
|
DEBUG: generating subplan XXX_2 for subquery SELECT min(user_id) AS min FROM public.users_table GROUP BY user_id
|
|
DEBUG: generating subplan XXX_3 for subquery SELECT max(value_2) AS max FROM public.users_table GROUP BY user_id
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.events_user_id, intermediate_result.events_time, intermediate_result.event_type FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(events_user_id integer, events_time timestamp without time zone, event_type integer)) sub_table WHERE ((events_user_id OPERATOR(pg_catalog.>=) ANY (SELECT intermediate_result.min FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(min integer))) AND (events_user_id OPERATOR(pg_catalog.<=) ANY (SELECT intermediate_result.max FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(max integer))))
|
|
count
|
|
---------------------------------------------------------------------
|
|
10
|
|
(1 row)
|
|
|
|
-- Planning subqueries in WHERE clause in CTE recursively
|
|
WITH cte AS (
|
|
SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
*
|
|
FROM
|
|
users_table
|
|
ORDER BY
|
|
user_id ASC,
|
|
value_2 DESC
|
|
LIMIT
|
|
10
|
|
) as sub_table
|
|
WHERE
|
|
user_id
|
|
IN
|
|
(SELECT
|
|
value_2
|
|
FROM
|
|
events_table
|
|
)
|
|
)
|
|
SELECT
|
|
COUNT(*)
|
|
FROM
|
|
cte;
|
|
DEBUG: CTE cte is going to be inlined via distributed planning
|
|
DEBUG: push down of limit count: 10
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table ORDER BY user_id, value_2 DESC LIMIT 10
|
|
DEBUG: generating subplan XXX_2 for subquery SELECT value_2 FROM public.events_table
|
|
DEBUG: generating subplan XXX_3 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) sub_table WHERE (user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)))
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) cte
|
|
count
|
|
---------------------------------------------------------------------
|
|
10
|
|
(1 row)
|
|
|
|
-- Planing subquery in WHERE clause in FROM clause of a subquery recursively
|
|
SELECT
|
|
COUNT(*)
|
|
FROM
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
*
|
|
FROM
|
|
users_table
|
|
ORDER BY
|
|
user_id ASC,
|
|
value_2 DESC
|
|
LIMIT
|
|
10
|
|
) as sub_table_1
|
|
WHERE
|
|
user_id
|
|
IN
|
|
(SELECT
|
|
value_2
|
|
FROM
|
|
events_table
|
|
)
|
|
) as sub_table_2;
|
|
DEBUG: push down of limit count: 10
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table ORDER BY user_id, value_2 DESC LIMIT 10
|
|
DEBUG: generating subplan XXX_2 for subquery SELECT value_2 FROM public.events_table
|
|
DEBUG: generating subplan XXX_3 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) sub_table_1 WHERE (user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)))
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) sub_table_2
|
|
count
|
|
---------------------------------------------------------------------
|
|
10
|
|
(1 row)
|
|
|
|
-- Recurring table in the FROM clause of a subquery in the FROM clause
|
|
-- Recurring table is created by joining a two recurrign table
|
|
SELECT
|
|
SUM(user_id)
|
|
FROM
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
user_id
|
|
FROM
|
|
users_table
|
|
ORDER BY
|
|
user_id
|
|
LIMIT 10) as t1
|
|
INNER JOIN
|
|
(SELECT
|
|
user_id as user_id_2
|
|
FROM
|
|
users_table
|
|
ORDER BY
|
|
user_id
|
|
LIMIT
|
|
10) as t2
|
|
ON
|
|
t1.user_id = t2.user_id_2
|
|
WHERE
|
|
t1.user_id
|
|
IN
|
|
(SELECT
|
|
value_2
|
|
FROM
|
|
events_table)
|
|
) as t3
|
|
WHERE
|
|
user_id
|
|
>ANY
|
|
(SELECT
|
|
min(user_id)
|
|
FROM
|
|
events_table
|
|
GROUP BY
|
|
user_id);
|
|
DEBUG: push down of limit count: 10
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM public.users_table ORDER BY user_id LIMIT 10
|
|
DEBUG: push down of limit count: 10
|
|
DEBUG: generating subplan XXX_2 for subquery SELECT user_id AS user_id_2 FROM public.users_table ORDER BY user_id LIMIT 10
|
|
DEBUG: generating subplan XXX_3 for subquery SELECT value_2 FROM public.events_table
|
|
DEBUG: generating subplan XXX_4 for subquery SELECT t1.user_id, t2.user_id_2 FROM ((SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) t1 JOIN (SELECT intermediate_result.user_id_2 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id_2 integer)) t2 ON ((t1.user_id OPERATOR(pg_catalog.=) t2.user_id_2))) WHERE (t1.user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)))
|
|
DEBUG: generating subplan XXX_5 for subquery SELECT min(user_id) AS min FROM public.events_table GROUP BY user_id
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT sum(user_id) AS sum FROM (SELECT intermediate_result.user_id, intermediate_result.user_id_2 FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, user_id_2 integer)) t3 WHERE (user_id OPERATOR(pg_catalog.>) ANY (SELECT intermediate_result.min FROM read_intermediate_result('XXX_5'::text, 'binary'::citus_copy_format) intermediate_result(min integer)))
|
|
sum
|
|
---------------------------------------------------------------------
|
|
18
|
|
(1 row)
|
|
|
|
-- Same example with the above query, but now check the rows with EXISTS
|
|
SELECT
|
|
SUM(user_id)
|
|
FROM
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
user_id
|
|
FROM
|
|
users_table
|
|
ORDER BY
|
|
user_id
|
|
LIMIT 10) as t1
|
|
INNER JOIN
|
|
(SELECT
|
|
user_id as user_id_2
|
|
FROM
|
|
users_table
|
|
ORDER BY
|
|
user_id
|
|
LIMIT
|
|
10) as t2
|
|
ON
|
|
t1.user_id = t2.user_id_2
|
|
WHERE
|
|
t1.user_id
|
|
IN
|
|
(SELECT
|
|
value_2
|
|
FROM
|
|
events_table)
|
|
) as t3
|
|
WHERE EXISTS
|
|
(SELECT
|
|
1,2
|
|
FROM
|
|
events_table
|
|
WHERE
|
|
events_table.value_2 = events_table.user_id);
|
|
DEBUG: push down of limit count: 10
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM public.users_table ORDER BY user_id LIMIT 10
|
|
DEBUG: push down of limit count: 10
|
|
DEBUG: generating subplan XXX_2 for subquery SELECT user_id AS user_id_2 FROM public.users_table ORDER BY user_id LIMIT 10
|
|
DEBUG: generating subplan XXX_3 for subquery SELECT value_2 FROM public.events_table
|
|
DEBUG: generating subplan XXX_4 for subquery SELECT t1.user_id, t2.user_id_2 FROM ((SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) t1 JOIN (SELECT intermediate_result.user_id_2 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id_2 integer)) t2 ON ((t1.user_id OPERATOR(pg_catalog.=) t2.user_id_2))) WHERE (t1.user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)))
|
|
DEBUG: generating subplan XXX_5 for subquery SELECT 1, 2 FROM public.events_table WHERE (value_2 OPERATOR(pg_catalog.=) user_id)
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT sum(user_id) AS sum FROM (SELECT intermediate_result.user_id, intermediate_result.user_id_2 FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, user_id_2 integer)) t3 WHERE (EXISTS (SELECT intermediate_result."?column?", intermediate_result."?column?_1" AS "?column?" FROM read_intermediate_result('XXX_5'::text, 'binary'::citus_copy_format) intermediate_result("?column?" integer, "?column?_1" integer)))
|
|
sum
|
|
---------------------------------------------------------------------
|
|
67
|
|
(1 row)
|
|
|
|
-- Same query with the above one, yet now we check the row's NON-existence
|
|
-- by NOT EXISTS. Note that, max value_2 of events_table is 5
|
|
SELECT
|
|
SUM(user_id)
|
|
FROM
|
|
(SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
user_id
|
|
FROM
|
|
users_table
|
|
ORDER BY
|
|
user_id
|
|
LIMIT 10) as t1
|
|
INNER JOIN
|
|
(SELECT
|
|
user_id as user_id_2
|
|
FROM
|
|
users_table
|
|
ORDER BY
|
|
user_id
|
|
LIMIT
|
|
10) as t2
|
|
ON
|
|
t1.user_id = t2.user_id_2
|
|
WHERE
|
|
t1.user_id
|
|
IN
|
|
(SELECT
|
|
value_2
|
|
FROM
|
|
events_table)
|
|
) as t3
|
|
WHERE NOT EXISTS
|
|
(SELECT
|
|
1,2
|
|
FROM
|
|
events_table
|
|
WHERE
|
|
events_table.value_2 = events_table.user_id + 6);
|
|
DEBUG: push down of limit count: 10
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM public.users_table ORDER BY user_id LIMIT 10
|
|
DEBUG: push down of limit count: 10
|
|
DEBUG: generating subplan XXX_2 for subquery SELECT user_id AS user_id_2 FROM public.users_table ORDER BY user_id LIMIT 10
|
|
DEBUG: generating subplan XXX_3 for subquery SELECT value_2 FROM public.events_table
|
|
DEBUG: generating subplan XXX_4 for subquery SELECT t1.user_id, t2.user_id_2 FROM ((SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) t1 JOIN (SELECT intermediate_result.user_id_2 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id_2 integer)) t2 ON ((t1.user_id OPERATOR(pg_catalog.=) t2.user_id_2))) WHERE (t1.user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)))
|
|
DEBUG: generating subplan XXX_5 for subquery SELECT 1, 2 FROM public.events_table WHERE (value_2 OPERATOR(pg_catalog.=) (user_id OPERATOR(pg_catalog.+) 6))
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT sum(user_id) AS sum FROM (SELECT intermediate_result.user_id, intermediate_result.user_id_2 FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, user_id_2 integer)) t3 WHERE (NOT (EXISTS (SELECT intermediate_result."?column?", intermediate_result."?column?_1" AS "?column?" FROM read_intermediate_result('XXX_5'::text, 'binary'::citus_copy_format) intermediate_result("?column?" integer, "?column?_1" integer))))
|
|
sum
|
|
---------------------------------------------------------------------
|
|
67
|
|
(1 row)
|
|
|
|
-- Check the existence of row by comparing it with the result of subquery in
|
|
-- WHERE clause. Note that subquery is planned recursively since there is no
|
|
-- distributed table in the from
|
|
SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
user_id, value_1
|
|
FROM
|
|
users_table
|
|
ORDER BY
|
|
user_id ASC,
|
|
value_1 ASC
|
|
LIMIT 10) as t3
|
|
WHERE row(user_id, value_1) =
|
|
(SELECT
|
|
min(user_id) + 1, min(user_id) + 1
|
|
FROM
|
|
events_table);
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT (min(user_id) OPERATOR(pg_catalog.+) 1), (min(user_id) OPERATOR(pg_catalog.+) 1) FROM public.events_table
|
|
DEBUG: push down of limit count: 10
|
|
DEBUG: generating subplan XXX_2 for subquery SELECT user_id, value_1 FROM public.users_table ORDER BY user_id, value_1 LIMIT 10
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, value_1 FROM (SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer)) t3 WHERE ((user_id, value_1) OPERATOR(pg_catalog.=) (SELECT intermediate_result."?column?", intermediate_result."?column?_1" AS "?column?" FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result("?column?" integer, "?column?_1" integer)))
|
|
user_id | value_1
|
|
---------------------------------------------------------------------
|
|
(0 rows)
|
|
|
|
-- Recursively plan subquery in WHERE clause when the FROM clause has a subquery
|
|
-- generated by generate_series function
|
|
SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
*
|
|
FROM
|
|
generate_series(1,10)
|
|
) as gst
|
|
WHERE
|
|
generate_series
|
|
IN
|
|
(SELECT
|
|
value_2
|
|
FROM
|
|
events_table
|
|
)
|
|
ORDER BY
|
|
generate_series ASC;
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT value_2 FROM public.events_table
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT generate_series FROM (SELECT generate_series.generate_series FROM generate_series(1, 10) generate_series(generate_series)) gst WHERE (generate_series OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer))) ORDER BY generate_series
|
|
generate_series
|
|
---------------------------------------------------------------------
|
|
1
|
|
2
|
|
3
|
|
4
|
|
5
|
|
(5 rows)
|
|
|
|
-- Similar to the test above, now we also have a generate_series in WHERE clause
|
|
SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
*
|
|
FROM
|
|
generate_series(1,10)
|
|
) as gst
|
|
WHERE
|
|
generate_series
|
|
IN
|
|
(SELECT
|
|
user_id
|
|
FROM
|
|
users_table
|
|
WHERE
|
|
user_id
|
|
IN
|
|
(SELECT
|
|
*
|
|
FROM
|
|
generate_series(1,3)
|
|
)
|
|
)
|
|
ORDER BY
|
|
generate_series ASC;
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM public.users_table WHERE (user_id OPERATOR(pg_catalog.=) ANY (SELECT generate_series.generate_series FROM generate_series(1, 3) generate_series(generate_series)))
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT generate_series FROM (SELECT generate_series.generate_series FROM generate_series(1, 10) generate_series(generate_series)) gst WHERE (generate_series OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer))) ORDER BY generate_series
|
|
generate_series
|
|
---------------------------------------------------------------------
|
|
1
|
|
2
|
|
3
|
|
(3 rows)
|
|
|
|
-- non-colocated subquery in WHERE clause ANDed with false
|
|
SELECT count(*)
|
|
FROM users_Table
|
|
WHERE (FALSE AND EXISTS (SELECT * FROM events_table));
|
|
count
|
|
---------------------------------------------------------------------
|
|
0
|
|
(1 row)
|
|
|
|
-- multiple non-colocated subqueries in WHERE clause ANDed with false
|
|
SELECT count(*)
|
|
FROM users_Table
|
|
WHERE value_1 IN
|
|
(SELECT value_1
|
|
FROM users_Table) OR (FALSE AND EXISTS (SELECT * FROM events_table));
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT value_1 FROM public.users_table
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM public.users_table WHERE ((value_1 OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.value_1 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer))) OR (false AND (EXISTS (SELECT events_table.user_id, events_table."time", events_table.event_type, events_table.value_2, events_table.value_3, events_table.value_4 FROM public.events_table))))
|
|
count
|
|
---------------------------------------------------------------------
|
|
101
|
|
(1 row)
|
|
|
|
-- multiple non-colocated subqueries in WHERE clause ANDed with false
|
|
SELECT count(*)
|
|
FROM users_Table
|
|
WHERE value_1 IN
|
|
(SELECT value_1
|
|
FROM users_Table) AND (FALSE AND EXISTS (SELECT * FROM events_table));
|
|
count
|
|
---------------------------------------------------------------------
|
|
0
|
|
(1 row)
|
|
|
|
-- non-colocated subquery in WHERE clause ANDed with true
|
|
SELECT count(*)
|
|
FROM users_Table
|
|
WHERE (TRUE AND EXISTS (SELECT * FROM events_table));
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT user_id, "time", event_type, value_2, value_3, value_4 FROM public.events_table
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM public.users_table WHERE (true AND (EXISTS (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.event_type, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, event_type integer, value_2 integer, value_3 double precision, value_4 bigint))))
|
|
count
|
|
---------------------------------------------------------------------
|
|
101
|
|
(1 row)
|
|
|
|
-- multiple non-colocated subqueries in WHERE clause ANDed with true
|
|
SELECT count(*)
|
|
FROM users_Table
|
|
WHERE value_1 IN
|
|
(SELECT value_1
|
|
FROM users_Table) OR (EXISTS (SELECT * FROM events_table));
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT value_1 FROM public.users_table
|
|
DEBUG: generating subplan XXX_2 for subquery SELECT user_id, "time", event_type, value_2, value_3, value_4 FROM public.events_table
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM public.users_table WHERE ((value_1 OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.value_1 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer))) OR (EXISTS (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.event_type, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, event_type integer, value_2 integer, value_3 double precision, value_4 bigint))))
|
|
count
|
|
---------------------------------------------------------------------
|
|
101
|
|
(1 row)
|
|
|
|
-- correlated subquery with aggregate in WHERE
|
|
SELECT
|
|
*
|
|
FROM
|
|
users_table
|
|
WHERE
|
|
user_id IN
|
|
(
|
|
SELECT
|
|
SUM(events_table.user_id)
|
|
FROM
|
|
events_table
|
|
WHERE
|
|
users_table.user_id = events_table.user_id
|
|
)
|
|
;
|
|
user_id | time | value_1 | value_2 | value_3 | value_4
|
|
---------------------------------------------------------------------
|
|
(0 rows)
|
|
|
|
-- correlated subquery with aggregate in HAVING
|
|
SELECT
|
|
*
|
|
FROM
|
|
users_table
|
|
WHERE
|
|
user_id IN
|
|
(
|
|
SELECT
|
|
SUM(events_table.user_id)
|
|
FROM
|
|
events_table
|
|
WHERE
|
|
events_table.user_id = users_table.user_id
|
|
HAVING
|
|
MIN(value_2) > 2
|
|
)
|
|
;
|
|
user_id | time | value_1 | value_2 | value_3 | value_4
|
|
---------------------------------------------------------------------
|
|
(0 rows)
|
|
|
|
-- Local tables also planned recursively, so using it as part of the FROM clause
|
|
-- make the clause recurring
|
|
CREATE TABLE local_table(id int, value_1 int);
|
|
INSERT INTO local_table VALUES(1,1), (2,2);
|
|
SELECT
|
|
*
|
|
FROM
|
|
(SELECT
|
|
*
|
|
FROM
|
|
local_table) as sub_table
|
|
WHERE
|
|
id
|
|
IN
|
|
(SELECT
|
|
user_id
|
|
FROM
|
|
users_table);
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT id, value_1 FROM subquery_in_where.local_table
|
|
DEBUG: generating subplan XXX_2 for subquery SELECT user_id FROM public.users_table
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT id, value_1 FROM (SELECT intermediate_result.id, intermediate_result.value_1 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, value_1 integer)) sub_table WHERE (id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)))
|
|
id | value_1
|
|
---------------------------------------------------------------------
|
|
1 | 1
|
|
2 | 2
|
|
(2 rows)
|
|
|
|
-- Use local table in WHERE clause
|
|
SELECT
|
|
COUNT(*)
|
|
FROM
|
|
(SELECT
|
|
*
|
|
FROM
|
|
users_table
|
|
ORDER BY
|
|
user_id
|
|
LIMIT
|
|
10) as sub_table
|
|
WHERE
|
|
user_id
|
|
IN
|
|
(SELECT
|
|
id
|
|
FROM
|
|
local_table);
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT id FROM subquery_in_where.local_table
|
|
DEBUG: push down of limit count: 10
|
|
DEBUG: generating subplan XXX_2 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table ORDER BY user_id LIMIT 10
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) sub_table WHERE (user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)))
|
|
count
|
|
---------------------------------------------------------------------
|
|
10
|
|
(1 row)
|
|
|
|
-- basic NOT IN correlated subquery
|
|
SELECT
|
|
count(*)
|
|
FROM
|
|
events_table e
|
|
WHERE
|
|
value_2 NOT IN (SELECT value_2 FROM users_table WHERE user_id = e.user_id);
|
|
count
|
|
---------------------------------------------------------------------
|
|
7
|
|
(1 row)
|
|
|
|
-- correlated subquery with limit
|
|
SELECT
|
|
count(*)
|
|
FROM
|
|
events_table e
|
|
WHERE
|
|
value_2 IN (SELECT value_2 FROM users_table WHERE user_id = e.user_id ORDER BY value_2 LIMIT 1);
|
|
count
|
|
---------------------------------------------------------------------
|
|
10
|
|
(1 row)
|
|
|
|
-- correlated subquery with distinct
|
|
SELECT
|
|
count(*)
|
|
FROM
|
|
events_table e
|
|
WHERE
|
|
value_2 IN (SELECT DISTINCT (value_3) FROM users_table WHERE user_id = e.user_id);
|
|
count
|
|
---------------------------------------------------------------------
|
|
90
|
|
(1 row)
|
|
|
|
-- correlated subquery with aggregate
|
|
SELECT
|
|
count(*)
|
|
FROM
|
|
events_table e
|
|
WHERE
|
|
value_2 = (SELECT max(value_2) FROM users_table WHERE user_id = e.user_id);
|
|
count
|
|
---------------------------------------------------------------------
|
|
11
|
|
(1 row)
|
|
|
|
-- correlated subquery with window function
|
|
SELECT
|
|
count(*)
|
|
FROM
|
|
events_table e
|
|
WHERE
|
|
value_2 IN (SELECT row_number() OVER () FROM users_table WHERE user_id = e.user_id);
|
|
count
|
|
---------------------------------------------------------------------
|
|
94
|
|
(1 row)
|
|
|
|
-- correlated subquery with group by
|
|
SELECT
|
|
count(*)
|
|
FROM
|
|
events_table e
|
|
WHERE
|
|
value_3 IN (SELECT min(value_3) FROM users_table WHERE user_id = e.user_id GROUP BY value_2);
|
|
count
|
|
---------------------------------------------------------------------
|
|
72
|
|
(1 row)
|
|
|
|
SELECT
|
|
count(*)
|
|
FROM
|
|
events_table e
|
|
WHERE
|
|
value_3 IN (SELECT min(value_3) FROM users_table WHERE user_id = e.user_id GROUP BY value_2);
|
|
count
|
|
---------------------------------------------------------------------
|
|
72
|
|
(1 row)
|
|
|
|
-- correlated subquery with group by
|
|
SELECT
|
|
count(*)
|
|
FROM
|
|
events_table e
|
|
WHERE
|
|
value_3 IN (SELECT min(value_3) v FROM users_table WHERE user_id = e.user_id GROUP BY e.value_2);
|
|
count
|
|
---------------------------------------------------------------------
|
|
10
|
|
(1 row)
|
|
|
|
-- correlated subquery with having
|
|
SELECT
|
|
count(*)
|
|
FROM
|
|
events_table e
|
|
WHERE
|
|
value_3 IN (SELECT min(value_3) v FROM users_table WHERE user_id = e.user_id GROUP BY e.value_2 HAVING min(value_3) > (SELECT 1));
|
|
count
|
|
---------------------------------------------------------------------
|
|
0
|
|
(1 row)
|
|
|
|
SELECT
|
|
count(*)
|
|
FROM
|
|
events_table e
|
|
WHERE
|
|
value_3 IN (SELECT min(value_3) v FROM users_table WHERE user_id = e.user_id GROUP BY e.value_2 HAVING min(value_3) > (SELECT e.value_3));
|
|
ERROR: Subqueries in HAVING cannot refer to outer query
|
|
-- nested correlated subquery
|
|
SELECT
|
|
count(*)
|
|
FROM
|
|
events_table e
|
|
WHERE
|
|
value_3 IN (
|
|
SELECT min(r.value_3) v FROM users_reference_table r JOIN (SELECT * FROM users_table WHERE user_id = e.user_id) u USING (user_id)
|
|
WHERE u.value_2 > 3
|
|
GROUP BY e.value_2 HAVING min(r.value_3) > e.value_3);
|
|
count
|
|
---------------------------------------------------------------------
|
|
0
|
|
(1 row)
|
|
|
|
-- not co-located correlated subquery
|
|
SELECT
|
|
count(*)
|
|
FROM
|
|
events_table e
|
|
WHERE
|
|
value_3 IN (
|
|
SELECT min(r.value_3) v FROM users_reference_table r JOIN (SELECT * FROM users_table WHERE value_2 = e.user_id) u USING (user_id)
|
|
WHERE u.value_2 > 3
|
|
GROUP BY e.value_2 HAVING min(r.value_3) > e.value_3);
|
|
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
|
|
-- cartesian correlated subquery
|
|
SELECT
|
|
count(*)
|
|
FROM
|
|
events_table e
|
|
WHERE
|
|
value_3 IN (
|
|
SELECT min(r.value_3) v FROM users_reference_table r JOIN users_table u USING (user_id)
|
|
WHERE u.value_2 > 3
|
|
GROUP BY e.value_2 HAVING min(r.value_3) > e.value_3);
|
|
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
|
|
-- even more subtle cartesian correlated subquery
|
|
SELECT
|
|
count(*)
|
|
FROM
|
|
events_table e
|
|
WHERE
|
|
value_3 IN (
|
|
SELECT min(r.value_3) v FROM users_reference_table r JOIN users_table u USING (user_id)
|
|
WHERE u.value_2 > 3
|
|
GROUP BY u.value_2 HAVING min(r.value_3) > e.value_3);
|
|
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
|
|
-- not a correlated subquery, uses recursive planning
|
|
SELECT
|
|
count(*)
|
|
FROM
|
|
events_table e
|
|
WHERE
|
|
value_3 IN (
|
|
SELECT min(r.value_3) v FROM users_reference_table r JOIN users_table u USING (user_id)
|
|
WHERE u.value_2 > 3
|
|
GROUP BY r.value_2 HAVING min(r.value_3) > 0);
|
|
DEBUG: generating subplan XXX_1 for subquery SELECT min(r.value_3) AS v FROM (public.users_reference_table r JOIN public.users_table u USING (user_id)) WHERE (u.value_2 OPERATOR(pg_catalog.>) 3) GROUP BY r.value_2 HAVING (min(r.value_3) OPERATOR(pg_catalog.>) (0)::double precision)
|
|
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM public.events_table e WHERE (value_3 OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.v FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(v double precision)))
|
|
count
|
|
---------------------------------------------------------------------
|
|
24
|
|
(1 row)
|
|
|
|
-- two levels of correlation should also allow
|
|
-- merge step in the subquery
|
|
SELECT sum(value_1)
|
|
FROM users_table u
|
|
WHERE EXISTS
|
|
(SELECT 1
|
|
FROM events_table e
|
|
WHERE u.user_id = e.user_id AND
|
|
EXISTS
|
|
(SELECT 1
|
|
FROM users_table u2
|
|
WHERE u2.user_id = u.user_id AND u2.value_1 = 5
|
|
LIMIT 1));
|
|
sum
|
|
---------------------------------------------------------------------
|
|
216
|
|
(1 row)
|
|
|
|
-- correlated subquery in WHERE, with a slightly
|
|
-- different syntax that the result of the subquery
|
|
-- is compared with a constant
|
|
SELECT sum(value_1)
|
|
FROM users_table u1
|
|
WHERE (SELECT COUNT(DISTINCT e1.value_2)
|
|
FROM events_table e1
|
|
WHERE e1.user_id = u1.user_id
|
|
) > 115;
|
|
sum
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
-- a correlated subquery which requires merge step
|
|
-- can be pushed down on UPDATE/DELETE queries as well
|
|
-- rollback to keep the rest of the tests unchanged
|
|
BEGIN;
|
|
UPDATE users_table u1
|
|
SET value_1 = (SELECT count(DISTINCT value_2)
|
|
FROM events_table e1
|
|
WHERE e1.user_id = u1.user_id);
|
|
DELETE FROM users_table u1 WHERE (SELECT count(DISTINCT value_2)
|
|
FROM events_table e1
|
|
WHERE e1.user_id = u1.user_id) > 10;
|
|
ROLLBACK;
|
|
-- a correlated anti-join can also be pushed down even if the subquery
|
|
-- has a LIMIT
|
|
SELECT avg(value_1)
|
|
FROM users_table u
|
|
WHERE NOT EXISTS
|
|
(SELECT 'XXX'
|
|
FROM events_table e
|
|
WHERE u.user_id = e.user_id and e.value_2 > 10000 LIMIT 1);
|
|
avg
|
|
---------------------------------------------------------------------
|
|
2.5544554455445545
|
|
(1 row)
|
|
|
|
-- a [correlated] lateral join can also be pushed down even if the subquery
|
|
-- has an aggregate wout a GROUP BY
|
|
SELECT
|
|
max(min_of_val_2), max(u1.value_1)
|
|
FROM
|
|
users_table u1
|
|
LEFT JOIN LATERAL
|
|
(SELECT min(e1.value_2) as min_of_val_2 FROM events_table e1 WHERE e1.user_id = u1.user_id) as foo ON (true);
|
|
max | max
|
|
---------------------------------------------------------------------
|
|
1 | 5
|
|
(1 row)
|
|
|
|
-- a self join is followed by a correlated subquery
|
|
EXPLAIN (COSTS OFF)
|
|
SELECT
|
|
*
|
|
FROM
|
|
users_table u1 JOIN users_table u2 USING (user_id)
|
|
WHERE
|
|
u1.value_1 < u2.value_1 AND
|
|
(SELECT
|
|
count(*)
|
|
FROM
|
|
events_table e1
|
|
WHERE
|
|
e1.user_id = u2.user_id) > 10;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Custom Scan (Citus Adaptive)
|
|
Task Count: 4
|
|
Tasks Shown: One of 4
|
|
-> Task
|
|
Node: host=localhost port=xxxxx dbname=regression
|
|
-> Hash Join
|
|
Hash Cond: (u2.user_id = u1.user_id)
|
|
Join Filter: (u1.value_1 < u2.value_1)
|
|
-> Seq Scan on users_table_1400256 u2
|
|
Filter: ((SubPlan 1) > 10)
|
|
SubPlan 1
|
|
-> Aggregate
|
|
-> Seq Scan on events_table_1400260 e1
|
|
Filter: (user_id = u2.user_id)
|
|
-> Hash
|
|
-> Seq Scan on users_table_1400256 u1
|
|
(16 rows)
|
|
|
|
-- when the colocated join of the FROM clause
|
|
-- entries happen on WHERE clause, Citus cannot
|
|
-- pushdown
|
|
-- Likely that the colocation checks should be
|
|
-- improved
|
|
SELECT
|
|
u1.user_id, u2.user_id
|
|
FROM
|
|
users_table u1, users_table u2
|
|
WHERE
|
|
u1.value_1 < u2.value_1 AND
|
|
(SELECT
|
|
count(*)
|
|
FROM
|
|
events_table e1
|
|
WHERE
|
|
e1.user_id = u2.user_id AND
|
|
u1.user_id = u2.user_id) > 10
|
|
ORDER BY 1,2;
|
|
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
|
|
-- create a view that contains correlated subquery
|
|
CREATE TEMPORARY VIEW correlated_subquery_view AS
|
|
SELECT u1.user_id
|
|
FROM users_table u1
|
|
WHERE (SELECT COUNT(DISTINCT e1.value_2)
|
|
FROM events_table e1
|
|
WHERE e1.user_id = u1.user_id
|
|
) > 0;
|
|
SELECT sum(user_id) FROM correlated_subquery_view;
|
|
sum
|
|
---------------------------------------------------------------------
|
|
376
|
|
(1 row)
|
|
|
|
-- now, join the view with another correlated subquery
|
|
SELECT
|
|
sum(mx)
|
|
FROM
|
|
correlated_subquery_view
|
|
LEFT JOIN LATERAL
|
|
(SELECT max(value_2) as mx FROM events_table WHERE correlated_subquery_view.user_id = events_table.user_id) as foo ON (true);
|
|
sum
|
|
---------------------------------------------------------------------
|
|
459
|
|
(1 row)
|
|
|
|
-- as an edge case, JOIN is on false
|
|
SELECT
|
|
sum(mx)
|
|
FROM
|
|
correlated_subquery_view
|
|
LEFT JOIN LATERAL
|
|
(SELECT max(value_2) as mx FROM events_table WHERE correlated_subquery_view.user_id = events_table.user_id) as foo ON (false);
|
|
sum
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
SELECT sum(value_1)
|
|
FROM users_table u1
|
|
WHERE (SELECT COUNT(DISTINCT e1.value_2)
|
|
FROM events_table e1
|
|
WHERE e1.user_id = u1.user_id AND false
|
|
) > 115;
|
|
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
|
|
SELECT sum(value_1)
|
|
FROM users_table u1
|
|
WHERE (SELECT COUNT(DISTINCT e1.value_2)
|
|
FROM events_table e1
|
|
WHERE e1.user_id = u1.user_id
|
|
) > 115 AND false;
|
|
sum
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
SET client_min_messages TO DEFAULT;
|
|
DROP TABLE local_table;
|
|
DROP SCHEMA subquery_in_where CASCADE;
|
|
SET search_path TO public;
|