citus/src/test/regress/expected/non_colocated_subquery_join...

1102 lines
88 KiB
Plaintext

-- ===================================================================
-- test recursive planning functionality for non-colocated subqueries
-- We preferred to use EXPLAIN almost all the queries here,
-- otherwise the execution time of so many repartition queries would
-- be too high for the regression tests. Also, note that we're mostly
-- interested in recurive planning side of the things, thus supressing
-- the actual explain output.
-- ===================================================================
SET client_min_messages TO DEBUG1;
CREATE SCHEMA non_colocated_subquery;
DEBUG: switching to sequential query execution mode
DETAIL: A command for a distributed schema is run. To make sure subsequent commands see the schema correctly we need to make sure to use only one connection for all future commands
SET search_path TO non_colocated_subquery, public;
-- we don't use the data anyway
CREATE TABLE users_table_local AS SELECT * FROM users_table LIMIT 0;
DEBUG: push down of limit count: 0
CREATE TABLE events_table_local AS SELECT * FROM events_table LIMIT 0;
DEBUG: push down of limit count: 0
SET citus.enable_repartition_joins TO ON;
\set VERBOSITY terse
-- Function that parses explain output as JSON
-- copied from multi_explain.sql and had to give
-- a different name via postfix to prevent concurrent
-- create/drop etc.
CREATE OR REPLACE FUNCTION explain_json_2(query text)
RETURNS jsonb
AS $BODY$
DECLARE
result jsonb;
BEGIN
EXECUTE format('EXPLAIN (FORMAT JSON) %s', query) INTO result;
RETURN result;
END;
$BODY$ LANGUAGE plpgsql;
DEBUG: switching to sequential query execution mode
-- leaf queries contain colocated joins
-- but not the subquery
SELECT true AS valid FROM explain_json_2($$
SELECT
foo.value_2
FROM
(SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo,
(SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar
WHERE
foo.value_2 = bar.value_2;
$$);
DEBUG: generating subplan XXX_1 for subquery SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT foo.value_2 FROM (SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) foo, (SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) bar WHERE (foo.value_2 OPERATOR(pg_catalog.=) bar.value_2)
valid
---------------------------------------------------------------------
t
(1 row)
-- simple non colocated join with subqueries in WHERE clause
SELECT true AS valid FROM explain_json_2($$
SELECT
count(*)
FROM
events_table
WHERE
event_type
IN
(SELECT event_type FROM events_table WHERE user_id < 100);
$$);
DEBUG: generating subplan XXX_1 for subquery SELECT event_type FROM public.events_table WHERE (user_id OPERATOR(pg_catalog.<) 100)
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM public.events_table WHERE (event_type OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.event_type FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(event_type integer)))
valid
---------------------------------------------------------------------
t
(1 row)
-- simple non colocated join with subqueries in WHERE clause with NOT IN
SELECT true AS valid FROM explain_json_2($$
SELECT
count(*)
FROM
events_table
WHERE
user_id
NOT IN
(SELECT user_id FROM events_table WHERE event_type = 2);
$$);
DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM public.events_table WHERE (event_type OPERATOR(pg_catalog.=) 2)
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM public.events_table WHERE (NOT (user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer))))
valid
---------------------------------------------------------------------
t
(1 row)
-- Subqueries in WHERE and FROM are mixed
-- In this query, only subquery in WHERE is not a colocated join
SELECT true AS valid FROM explain_json_2($$
SELECT
foo.user_id
FROM
(SELECT users_table.user_id, event_type FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo,
(SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar
WHERE
foo.user_id = bar.user_id AND
foo.event_type IN (SELECT event_type FROM events_table WHERE user_id < 3);
$$);
DEBUG: generating subplan XXX_1 for subquery SELECT event_type FROM public.events_table WHERE (user_id OPERATOR(pg_catalog.<) 3)
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT foo.user_id FROM (SELECT users_table.user_id, events_table.event_type FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) foo, (SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))) bar WHERE ((foo.user_id OPERATOR(pg_catalog.=) bar.user_id) AND (foo.event_type OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.event_type FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(event_type integer))))
valid
---------------------------------------------------------------------
t
(1 row)
-- Subqueries in WHERE and FROM are mixed
-- In this query, one of the joins in the FROM clause is not colocated
SELECT true AS valid FROM explain_json_2($$
SELECT
foo.user_id
FROM
(SELECT users_table.user_id, event_type FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo,
(SELECT (users_table.user_id / 2) as user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar
WHERE
foo.user_id = bar.user_id AND
foo.user_id IN (SELECT user_id FROM events_table WHERE user_id < 10);
$$);
DEBUG: generating subplan XXX_1 for subquery SELECT (users_table.user_id OPERATOR(pg_catalog./) 2) AS user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT foo.user_id FROM (SELECT users_table.user_id, events_table.event_type FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) foo, (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) bar WHERE ((foo.user_id OPERATOR(pg_catalog.=) bar.user_id) AND (foo.user_id OPERATOR(pg_catalog.=) ANY (SELECT events_table.user_id FROM public.events_table WHERE (events_table.user_id OPERATOR(pg_catalog.<) 10))))
valid
---------------------------------------------------------------------
t
(1 row)
-- Subqueries in WHERE and FROM are mixed
-- In this query, both the joins in the FROM clause is not colocated
SELECT true AS valid FROM explain_json_2($$
SELECT
foo.user_id
FROM
(SELECT users_table.user_id, event_type FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo,
(SELECT (users_table.user_id / 2) as user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar
WHERE
foo.user_id = bar.user_id AND
foo.user_id NOT IN (SELECT user_id FROM events_table WHERE user_id < 10);
$$);
DEBUG: generating subplan XXX_1 for subquery SELECT (users_table.user_id OPERATOR(pg_catalog./) 2) AS user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))
DEBUG: generating subplan XXX_2 for subquery SELECT user_id FROM public.events_table WHERE (user_id OPERATOR(pg_catalog.<) 10)
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT foo.user_id FROM (SELECT users_table.user_id, events_table.event_type FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) foo, (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) bar WHERE ((foo.user_id OPERATOR(pg_catalog.=) bar.user_id) AND (NOT (foo.user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)))))
valid
---------------------------------------------------------------------
t
(1 row)
-- Subqueries in WHERE and FROM are mixed
-- In this query, one of the joins in the FROM clause is not colocated and subquery in WHERE clause is not colocated
-- similar to the above, but, this time bar is the anchor subquery
SELECT true AS valid FROM explain_json_2($$
SELECT
foo.user_id
FROM
(SELECT users_table.user_id, event_type FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) as foo,
(SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar
WHERE
foo.user_id = bar.user_id AND
foo.event_type IN (SELECT event_type FROM events_table WHERE user_id < 4);
$$);
DEBUG: generating subplan XXX_1 for subquery SELECT users_table.user_id, events_table.event_type FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))
DEBUG: generating subplan XXX_2 for subquery SELECT event_type FROM public.events_table WHERE (user_id OPERATOR(pg_catalog.<) 4)
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT foo.user_id FROM (SELECT intermediate_result.user_id, intermediate_result.event_type FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event_type integer)) foo, (SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))) bar WHERE ((foo.user_id OPERATOR(pg_catalog.=) bar.user_id) AND (foo.event_type OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.event_type FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(event_type integer))))
valid
---------------------------------------------------------------------
t
(1 row)
-- The inner subqueries and the subquery in WHERE are non-located joins
SELECT true AS valid FROM explain_json_2($$
SELECT foo_top.*, events_table.user_id FROM
(
SELECT
foo.user_id, random()
FROM
(SELECT users_table.user_id, event_type FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) as foo,
(SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.event_type AND event_type IN (5,6,7,8)) as bar
WHERE
foo.user_id = bar.user_id AND
foo.event_type IN (SELECT event_type FROM events_table WHERE user_id = 5)
) as foo_top, events_table WHERE events_table.user_id = foo_top.user_id;
$$);
DEBUG: generating subplan XXX_1 for subquery SELECT users_table.user_id, events_table.event_type FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))
DEBUG: generating subplan XXX_2 for subquery SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.event_type) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))
DEBUG: generating subplan XXX_3 for subquery SELECT event_type FROM public.events_table WHERE (user_id OPERATOR(pg_catalog.=) 5)
DEBUG: generating subplan XXX_4 for subquery SELECT foo.user_id, random() AS random FROM (SELECT intermediate_result.user_id, intermediate_result.event_type FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event_type integer)) foo, (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) bar WHERE ((foo.user_id OPERATOR(pg_catalog.=) bar.user_id) AND (foo.event_type OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.event_type FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(event_type integer))))
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT foo_top.user_id, foo_top.random, events_table.user_id FROM (SELECT intermediate_result.user_id, intermediate_result.random FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, random double precision)) foo_top, public.events_table WHERE (events_table.user_id OPERATOR(pg_catalog.=) foo_top.user_id)
valid
---------------------------------------------------------------------
t
(1 row)
-- Slightly more complex query where there are 5 joins, 1 of them is non-colocated
SELECT true AS valid FROM explain_json_2($$
SELECT * FROM
(
SELECT
foo1.user_id, random()
FROM
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo1,
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as foo2,
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo3,
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (13,14,15,16)) as foo4,
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (17,18,19,20)) as foo5
WHERE
foo1.user_id = foo4.user_id AND
foo1.user_id = foo2.user_id AND
foo1.user_id = foo3.user_id AND
foo1.user_id = foo4.user_id AND
foo1.user_id = foo5.value_1
) as foo_top;
$$);
DEBUG: generating subplan XXX_1 for subquery SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[17, 18, 19, 20])))
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, random FROM (SELECT foo1.user_id, random() AS random FROM (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) foo1, (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))) foo2, (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[9, 10, 11, 12])))) foo3, (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[13, 14, 15, 16])))) foo4, (SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer)) foo5 WHERE ((foo1.user_id OPERATOR(pg_catalog.=) foo4.user_id) AND (foo1.user_id OPERATOR(pg_catalog.=) foo2.user_id) AND (foo1.user_id OPERATOR(pg_catalog.=) foo3.user_id) AND (foo1.user_id OPERATOR(pg_catalog.=) foo4.user_id) AND (foo1.user_id OPERATOR(pg_catalog.=) foo5.value_1))) foo_top
valid
---------------------------------------------------------------------
t
(1 row)
-- Very similar to the above query
-- One of the queries is not joined on partition key, but this time subquery itself
SELECT true AS valid FROM explain_json_2($$
SELECT * FROM
(
SELECT
foo1.user_id, random()
FROM
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo1,
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as foo2,
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo3,
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (13,14,15,16)) as foo4,
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (17,18,19,20)) as foo5
WHERE
foo1.user_id = foo4.user_id AND
foo1.user_id = foo2.user_id AND
foo1.user_id = foo3.user_id AND
foo1.user_id = foo4.user_id AND
foo1.user_id = foo5.user_id
) as foo_top;
$$);
DEBUG: generating subplan XXX_1 for subquery SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[17, 18, 19, 20])))
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, random FROM (SELECT foo1.user_id, random() AS random FROM (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) foo1, (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))) foo2, (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[9, 10, 11, 12])))) foo3, (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[13, 14, 15, 16])))) foo4, (SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer)) foo5 WHERE ((foo1.user_id OPERATOR(pg_catalog.=) foo4.user_id) AND (foo1.user_id OPERATOR(pg_catalog.=) foo2.user_id) AND (foo1.user_id OPERATOR(pg_catalog.=) foo3.user_id) AND (foo1.user_id OPERATOR(pg_catalog.=) foo4.user_id) AND (foo1.user_id OPERATOR(pg_catalog.=) foo5.user_id))) foo_top
valid
---------------------------------------------------------------------
t
(1 row)
-- There are two non colocated joins, one is in the one of the leaf queries,
-- the other is on the top-level subquery
SELECT true AS valid FROM explain_json_2($$
SELECT * FROM
(
SELECT
foo1.user_id, random()
FROM
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo1,
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (5,6,7,8)) as foo2,
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo3,
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (13,14,15,16)) as foo4,
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (17,18,19,20)) as foo5
WHERE
foo1.user_id = foo4.user_id AND
foo1.user_id = foo2.user_id AND
foo1.user_id = foo3.user_id AND
foo1.user_id = foo4.user_id AND
foo1.user_id = foo5.value_1
) as foo_top;
$$);
DEBUG: generating subplan XXX_1 for subquery SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))
DEBUG: generating subplan XXX_2 for subquery SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[17, 18, 19, 20])))
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, random FROM (SELECT foo1.user_id, random() AS random FROM (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) foo1, (SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer)) foo2, (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[9, 10, 11, 12])))) foo3, (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[13, 14, 15, 16])))) foo4, (SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer)) foo5 WHERE ((foo1.user_id OPERATOR(pg_catalog.=) foo4.user_id) AND (foo1.user_id OPERATOR(pg_catalog.=) foo2.user_id) AND (foo1.user_id OPERATOR(pg_catalog.=) foo3.user_id) AND (foo1.user_id OPERATOR(pg_catalog.=) foo4.user_id) AND (foo1.user_id OPERATOR(pg_catalog.=) foo5.value_1))) foo_top
valid
---------------------------------------------------------------------
t
(1 row)
-- a similar query to the above, but, this sime the second
-- non colocated join is on the already recursively planned subquery
-- the results should be the same
SELECT true AS valid FROM explain_json_2($$
SELECT * FROM
(
SELECT
foo1.user_id, random()
FROM
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo1,
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (5,6,7,8)) as foo2,
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo3,
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (13,14,15,16)) as foo4,
(SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (17,18,19,20)) as foo5
WHERE
foo1.user_id = foo4.user_id AND
foo1.user_id = foo2.user_id AND
foo1.user_id = foo3.user_id AND
foo1.user_id = foo4.user_id AND
foo2.user_id = foo5.value_1
) as foo_top;
$$);
DEBUG: generating subplan XXX_1 for subquery SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))
DEBUG: generating subplan XXX_2 for subquery SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[17, 18, 19, 20])))
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, random FROM (SELECT foo1.user_id, random() AS random FROM (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) foo1, (SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer)) foo2, (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[9, 10, 11, 12])))) foo3, (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[13, 14, 15, 16])))) foo4, (SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer)) foo5 WHERE ((foo1.user_id OPERATOR(pg_catalog.=) foo4.user_id) AND (foo1.user_id OPERATOR(pg_catalog.=) foo2.user_id) AND (foo1.user_id OPERATOR(pg_catalog.=) foo3.user_id) AND (foo1.user_id OPERATOR(pg_catalog.=) foo4.user_id) AND (foo2.user_id OPERATOR(pg_catalog.=) foo5.value_1))) foo_top
valid
---------------------------------------------------------------------
t
(1 row)
-- Deeper subqueries are non-colocated
SELECT true AS valid FROM explain_json_2($$
SELECT
count(*)
FROM
(
SELECT
foo.user_id
FROM
(SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) as foo,
(SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar
WHERE
foo.user_id = bar.user_id) as foo_top JOIN
(
SELECT
foo.user_id
FROM
(SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) as foo,
(SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar
WHERE
foo.user_id = bar.user_id) as bar_top
ON (foo_top.user_id = bar_top.user_id);
$$);
DEBUG: generating subplan XXX_1 for subquery SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))
DEBUG: generating subplan XXX_2 for subquery SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT foo.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) foo, (SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))) bar WHERE (foo.user_id OPERATOR(pg_catalog.=) bar.user_id)) foo_top JOIN (SELECT foo.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) foo, (SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))) bar WHERE (foo.user_id OPERATOR(pg_catalog.=) bar.user_id)) bar_top ON ((foo_top.user_id OPERATOR(pg_catalog.=) bar_top.user_id)))
valid
---------------------------------------------------------------------
t
(1 row)
-- Top level Subquery is not colocated
SELECT true AS valid FROM explain_json_2($$
SELECT
count(*)
FROM
(
SELECT
foo.user_id, foo.value_2
FROM
(SELECT DISTINCT users_table.user_id, users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo,
(SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar
WHERE
foo.user_id = bar.user_id) as foo_top JOIN
(
SELECT
foo.user_id
FROM
(SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo,
(SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (13,14,15,16)) as bar
WHERE
foo.user_id = bar.user_id) as bar_top
ON (foo_top.value_2 = bar_top.user_id);
$$);
DEBUG: generating subplan XXX_1 for subquery SELECT foo.user_id FROM (SELECT DISTINCT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[9, 10, 11, 12])))) foo, (SELECT DISTINCT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[13, 14, 15, 16])))) bar WHERE (foo.user_id OPERATOR(pg_catalog.=) bar.user_id)
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT foo.user_id, foo.value_2 FROM (SELECT DISTINCT users_table.user_id, users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) foo, (SELECT DISTINCT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))) bar WHERE (foo.user_id OPERATOR(pg_catalog.=) bar.user_id)) foo_top JOIN (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) bar_top ON ((foo_top.value_2 OPERATOR(pg_catalog.=) bar_top.user_id)))
valid
---------------------------------------------------------------------
t
(1 row)
-- Top level Subquery is not colocated as the above
SELECT true AS valid FROM explain_json_2($$
SELECT
count(*)
FROM
(
SELECT
foo.user_id, foo.value_2
FROM
(SELECT DISTINCT users_table.user_id, users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo,
(SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar
WHERE
foo.user_id = bar.user_id) as foo_top JOIN
(
SELECT
foo.user_id
FROM
(SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo,
(SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (13,14,15,16)) as bar
WHERE
foo.user_id = bar.user_id) as bar_top
ON (foo_top.value_2 = bar_top.user_id);
$$);
DEBUG: generating subplan XXX_1 for subquery SELECT DISTINCT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[13, 14, 15, 16])))
DEBUG: generating subplan XXX_2 for subquery SELECT foo.user_id FROM (SELECT DISTINCT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[9, 10, 11, 12])))) foo, (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) bar WHERE (foo.user_id OPERATOR(pg_catalog.=) bar.user_id)
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT foo.user_id, foo.value_2 FROM (SELECT DISTINCT users_table.user_id, users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) foo, (SELECT DISTINCT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))) bar WHERE (foo.user_id OPERATOR(pg_catalog.=) bar.user_id)) foo_top JOIN (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) bar_top ON ((foo_top.value_2 OPERATOR(pg_catalog.=) bar_top.user_id)))
valid
---------------------------------------------------------------------
t
(1 row)
-- non colocated joins are deep inside the query
SELECT true AS valid FROM explain_json_2($$
SELECT
count(*)
FROM
(
SELECT * FROM
(SELECT DISTINCT users_table.user_id FROM users_table,
(SELECT events_table.user_id as my_users FROM events_table, users_table WHERE events_table.event_type = users_table.user_id) as foo
WHERE foo.my_users = users_table.user_id) as mid_level_query
) as bar;
$$);
DEBUG: generating subplan XXX_1 for subquery SELECT events_table.user_id AS my_users FROM public.events_table, public.users_table WHERE (events_table.event_type OPERATOR(pg_catalog.=) users_table.user_id)
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT mid_level_query.user_id FROM (SELECT DISTINCT users_table.user_id FROM public.users_table, (SELECT intermediate_result.my_users FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(my_users integer)) foo WHERE (foo.my_users OPERATOR(pg_catalog.=) users_table.user_id)) mid_level_query) bar
valid
---------------------------------------------------------------------
t
(1 row)
-- similar to the above, with relation rtes
-- we're able to recursively plan foo
-- note that if we haven't added random() to the subquery, we'd be able run the query
-- via regular repartitioning since PostgreSQL would pull the query up
SELECT true AS valid FROM explain_json_2($$
SELECT count(*) FROM ( SELECT * FROM
(SELECT DISTINCT users_table.user_id FROM users_table,
(SELECT events_table.event_type as my_users, random() FROM events_table, users_table WHERE events_table.user_id = users_table.user_id) as foo
WHERE foo.my_users = users_table.user_id) as mid_level_query ) as bar;
$$);
DEBUG: generating subplan XXX_1 for subquery SELECT events_table.event_type AS my_users, random() AS random FROM public.events_table, public.users_table WHERE (events_table.user_id OPERATOR(pg_catalog.=) users_table.user_id)
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT mid_level_query.user_id FROM (SELECT DISTINCT users_table.user_id FROM public.users_table, (SELECT intermediate_result.my_users, intermediate_result.random FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(my_users integer, random double precision)) foo WHERE (foo.my_users OPERATOR(pg_catalog.=) users_table.user_id)) mid_level_query) bar
valid
---------------------------------------------------------------------
t
(1 row)
-- same as the above query, but, one level deeper subquery
SELECT true AS valid FROM explain_json_2($$
SELECT
count(*)
FROM
(
SELECT * FROM
(SELECT DISTINCT users_table.user_id FROM users_table,
(SELECT events_table.user_id as my_users FROM events_table,
(SELECT events_table.user_id, random() FROM users_table, events_table WHERE users_table.user_id = events_table.user_id) as selected_users
WHERE events_table.event_type = selected_users.user_id) as foo
WHERE foo.my_users = users_table.user_id) as mid_level_query
) as bar;
$$);
DEBUG: generating subplan XXX_1 for subquery SELECT events_table.user_id, random() AS random FROM public.users_table, public.events_table WHERE (users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id)
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT mid_level_query.user_id FROM (SELECT DISTINCT users_table.user_id FROM public.users_table, (SELECT events_table.user_id AS my_users FROM public.events_table, (SELECT intermediate_result.user_id, intermediate_result.random FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, random double precision)) selected_users WHERE (events_table.event_type OPERATOR(pg_catalog.=) selected_users.user_id)) foo WHERE (foo.my_users OPERATOR(pg_catalog.=) users_table.user_id)) mid_level_query) bar
valid
---------------------------------------------------------------------
t
(1 row)
-- deeper query, subquery in WHERE clause
-- this time successfull plan the query since the join on the relation and
-- the subquery on the distribution key
SELECT true AS valid FROM explain_json_2($$
SELECT
count(*)
FROM
(
SELECT * FROM
(SELECT DISTINCT users_table.user_id FROM users_table,
(SELECT events_table.user_id as my_users FROM events_table,
(SELECT events_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND
users_table.user_id IN (SELECT value_2 FROM events_table)
) as selected_users
WHERE events_table.user_id = selected_users.user_id) as foo
WHERE foo.my_users = users_table.user_id) as mid_level_query
) as bar;
$$);
DEBUG: generating subplan XXX_1 for subquery SELECT value_2 FROM public.events_table
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT mid_level_query.user_id FROM (SELECT DISTINCT users_table.user_id FROM public.users_table, (SELECT events_table.user_id AS my_users FROM public.events_table, (SELECT events_table_1.user_id FROM public.users_table users_table_1, public.events_table events_table_1 WHERE ((users_table_1.user_id OPERATOR(pg_catalog.=) events_table_1.user_id) AND (users_table_1.user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer))))) selected_users WHERE (events_table.user_id OPERATOR(pg_catalog.=) selected_users.user_id)) foo WHERE (foo.my_users OPERATOR(pg_catalog.=) users_table.user_id)) mid_level_query) bar
valid
---------------------------------------------------------------------
t
(1 row)
-- should recursively plan the subquery in WHERE clause
SELECT true AS valid FROM explain_json_2($$SELECT
count(*)
FROM
users_table
WHERE
value_1
IN
(SELECT
users_table.user_id
FROM
users_table, events_table
WHERE
users_table.user_id = events_table.value_2 AND event_type IN (5,6));$$);
DEBUG: generating subplan XXX_1 for subquery SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6])))
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM public.users_table WHERE (value_1 OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)))
valid
---------------------------------------------------------------------
t
(1 row)
-- leaf subquery repartitioning should work fine when used with CTEs
SELECT true AS valid FROM explain_json_2($$
WITH q1 AS (SELECT user_id FROM users_table)
SELECT count(*) FROM q1, (SELECT
users_table.user_id, random()
FROM
users_table, events_table
WHERE
users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) as bar WHERE bar.user_id = q1.user_id ;$$);
DEBUG: CTE q1 is going to be inlined via distributed planning
DEBUG: generating subplan XXX_1 for subquery SELECT users_table.user_id, random() AS random FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT users_table.user_id FROM public.users_table) q1, (SELECT intermediate_result.user_id, intermediate_result.random FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, random double precision)) bar WHERE (bar.user_id OPERATOR(pg_catalog.=) q1.user_id)
valid
---------------------------------------------------------------------
t
(1 row)
-- subquery joins should work fine when used with CTEs
SELECT true AS valid FROM explain_json_2($$
WITH q1 AS MATERIALIZED (SELECT user_id FROM users_table)
SELECT count(*) FROM q1, (SELECT
users_table.user_id, random()
FROM
users_table, events_table
WHERE
users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as bar WHERE bar.user_id = q1.user_id ;$$);
DEBUG: generating subplan XXX_1 for CTE q1: SELECT user_id FROM public.users_table
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) q1, (SELECT users_table.user_id, random() AS random FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) bar WHERE (bar.user_id OPERATOR(pg_catalog.=) q1.user_id)
valid
---------------------------------------------------------------------
t
(1 row)
-- should work fine within UNIONs
SELECT true AS valid FROM explain_json_2($$
(SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) UNION
(SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8));$$);
DEBUG: generating subplan XXX_1 for subquery SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))
DEBUG: generating subplan XXX_2 for subquery SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) UNION SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)
valid
---------------------------------------------------------------------
t
(1 row)
-- should work fine within leaf queries of deeper subqueries
SELECT true AS valid FROM explain_json_2($$
SELECT event, array_length(events_table, 1)
FROM (
SELECT event, array_agg(t.user_id) AS events_table
FROM (
SELECT
DISTINCT ON(e.event_type::text) e.event_type::text as event, e.time, e.user_id
FROM
users_table AS u,
events_table AS e,
(SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (5,6,7,8)) as bar
WHERE u.user_id = e.user_id AND
u.user_id IN
(
SELECT
user_id
FROM
users_table
WHERE value_2 >= 5
AND EXISTS (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4))
LIMIT 5
)
) t, users_table WHERE users_table.value_1 = t.event::int
GROUP BY event
) q
ORDER BY 2 DESC, 1;
$$);
DEBUG: generating subplan XXX_1 for subquery SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))
DEBUG: push down of limit count: 5
DEBUG: generating subplan XXX_2 for subquery SELECT user_id FROM public.users_table WHERE ((value_2 OPERATOR(pg_catalog.>=) 5) AND (EXISTS (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)))) LIMIT 5
DEBUG: generating subplan XXX_3 for subquery SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))
DEBUG: generating subplan XXX_4 for subquery SELECT DISTINCT ON ((e.event_type)::text) (e.event_type)::text AS event, e."time", e.user_id FROM public.users_table u, public.events_table e, (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) bar WHERE ((u.user_id OPERATOR(pg_catalog.=) e.user_id) AND (u.user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer))))
DEBUG: generating subplan XXX_5 for subquery SELECT t.event, array_agg(t.user_id) AS events_table FROM (SELECT intermediate_result.event, intermediate_result."time", intermediate_result.user_id FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(event text, "time" timestamp without time zone, user_id integer)) t, public.users_table WHERE (users_table.value_1 OPERATOR(pg_catalog.=) (t.event)::integer) GROUP BY t.event
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT event, array_length(events_table, 1) AS array_length FROM (SELECT intermediate_result.event, intermediate_result.events_table FROM read_intermediate_result('XXX_5'::text, 'binary'::citus_copy_format) intermediate_result(event text, events_table integer[])) q ORDER BY (array_length(events_table, 1)) DESC, event
valid
---------------------------------------------------------------------
t
(1 row)
-- this is also supported since we can recursively plan relations as well
-- the relations are joined under a join tree with an alias
SELECT true AS valid FROM explain_json_2($$
SELECT
count(*)
FROM
(users_table u1 JOIN users_table u2 using(value_1)) a JOIN (SELECT value_1, random() FROM users_table) as u3 USING (value_1);
$$);
DEBUG: generating subplan XXX_1 for subquery SELECT value_1, random() AS random FROM public.users_table
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN public.users_table u2 USING (value_1)) a(value_1, user_id, "time", value_2, value_3, value_4, user_id_1, time_1, value_2_1, value_3_1, value_4_1) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1))
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- a very similar query to the above
-- however, this time we users a subquery instead of join alias, and it works
SELECT true AS valid FROM explain_json_2($$
SELECT
count(*)
FROM
(SELECT * FROM users_table u1 JOIN users_table u2 using(value_1)) a JOIN (SELECT value_1, random() FROM users_table) as u3 USING (value_1);
$$);
DEBUG: generating subplan XXX_1 for subquery SELECT u1.value_1, u1.user_id, u1."time", u1.value_2, u1.value_3, u1.value_4, u2.user_id, u2."time", u2.value_2, u2.value_3, u2.value_4 FROM (public.users_table u1 JOIN public.users_table u2 USING (value_1))
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.value_1, intermediate_result.user_id, intermediate_result."time", intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4, intermediate_result.user_id_1 AS user_id, intermediate_result.time_1 AS "time", intermediate_result.value_2_1 AS value_2, intermediate_result.value_3_1 AS value_3, intermediate_result.value_4_1 AS value_4 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, user_id integer, "time" timestamp without time zone, value_2 integer, value_3 double precision, value_4 bigint, user_id_1 integer, time_1 timestamp without time zone, value_2_1 integer, value_3_1 double precision, value_4_1 bigint)) a(value_1, user_id, "time", value_2, value_3, value_4, user_id_1, time_1, value_2_1, value_3_1, value_4_1) JOIN (SELECT users_table.value_1, random() AS random FROM public.users_table) u3 USING (value_1))
valid
---------------------------------------------------------------------
t
(1 row)
-- a similar query to the above, this time subquery is on the left
-- and the relation is on the right of the join tree
SELECT true AS valid FROM explain_json_2($$
SELECT
count(*)
FROM
(SELECT value_2, random() FROM users_table) as u1
JOIN
events_table
using (value_2);
$$);
DEBUG: generating subplan XXX_1 for subquery SELECT value_2, random() AS random FROM public.users_table
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.value_2, intermediate_result.random FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer, random double precision)) u1 JOIN public.events_table USING (value_2))
valid
---------------------------------------------------------------------
t
(1 row)
-- recursive planning should kick in for outer joins as well
SELECT true AS valid FROM explain_json_2($$
SELECT
count(*)
FROM
(SELECT value_2, random() FROM users_table) as u1
LEFT JOIN
(SELECT value_2, random() FROM users_table) as u2
USING(value_2);
$$);
DEBUG: generating subplan XXX_1 for subquery SELECT value_2, random() AS random FROM public.users_table
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT users_table.value_2, random() AS random FROM public.users_table) u1 LEFT JOIN (SELECT intermediate_result.value_2, intermediate_result.random FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer, random double precision)) u2 USING (value_2))
valid
---------------------------------------------------------------------
t
(1 row)
-- recursive planning should kick in for outer joins as well
SELECT true AS valid FROM explain_json_2($$
SELECT
count(*)
FROM
(SELECT value_2, random() FROM users_table) as u1
RIGHT JOIN
(SELECT value_2, random() FROM users_table) as u2
USING(value_2);
$$);
DEBUG: generating subplan XXX_1 for subquery SELECT value_2, random() AS random FROM public.users_table
DEBUG: recursively planning left side of the right join since the outer side is a recurring rel
DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel
DEBUG: generating subplan XXX_2 for subquery SELECT value_2, random() AS random FROM public.users_table
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.value_2, intermediate_result.random FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer, random double precision)) u1 RIGHT JOIN (SELECT intermediate_result.value_2, intermediate_result.random FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer, random double precision)) u2 USING (value_2))
valid
---------------------------------------------------------------------
t
(1 row)
-- set operations may produce not very efficient plans
-- although we could have picked a as our anchor subquery,
-- we pick foo in this case and recursively plan a
SELECT true AS valid FROM explain_json_2($$
SELECT * FROM
(
(
SELECT user_id FROM users_table
UNION
SELECT user_id FROM users_table
) a
JOIN
(SELECT value_1 FROM users_table) as foo ON (a.user_id = foo.value_1)
);
$$);
DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM public.users_table
DEBUG: generating subplan XXX_2 for subquery SELECT user_id FROM public.users_table
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) UNION SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)
DEBUG: generating subplan XXX_1 for subquery SELECT users_table.user_id FROM public.users_table UNION SELECT users_table.user_id FROM public.users_table
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT a.user_id, foo.value_1 FROM ((SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) a JOIN (SELECT users_table.value_1 FROM public.users_table) foo ON ((a.user_id OPERATOR(pg_catalog.=) foo.value_1)))
valid
---------------------------------------------------------------------
t
(1 row)
-- we could do the same with regular tables as well
SELECT true AS valid FROM explain_json_2($$
SELECT * FROM
(
(
SELECT user_id FROM users_table
UNION
SELECT user_id FROM users_table
) a
JOIN
users_table as foo ON (a.user_id = foo.value_1)
);
$$);
DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM public.users_table
DEBUG: generating subplan XXX_2 for subquery SELECT user_id FROM public.users_table
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) UNION SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)
DEBUG: generating subplan XXX_1 for subquery SELECT users_table.user_id FROM public.users_table UNION SELECT users_table.user_id FROM public.users_table
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT a.user_id, foo.user_id, foo."time", foo.value_1, foo.value_2, foo.value_3, foo.value_4 FROM ((SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) a JOIN public.users_table foo ON ((a.user_id OPERATOR(pg_catalog.=) foo.value_1)))
valid
---------------------------------------------------------------------
t
(1 row)
-- this time the plan is optimial, we are
-- able to keep the UNION query given that foo
-- is the anchor
SELECT true AS valid FROM explain_json_2($$
SELECT * FROM
(
(SELECT user_id FROM users_table) as foo
JOIN
(
SELECT user_id FROM users_table WHERE user_id IN (1,2,3,4)
UNION
SELECT user_id FROM users_table WHERE user_id IN (5,6,7,8)
) a
ON (a.user_id = foo.user_id)
JOIN
(SELECT value_1 FROM users_table) as bar
ON(foo.user_id = bar.value_1)
);
$$);
DEBUG: generating subplan XXX_1 for subquery SELECT value_1 FROM public.users_table
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT foo.user_id, a.user_id, bar.value_1 FROM (((SELECT users_table.user_id FROM public.users_table) foo JOIN (SELECT users_table.user_id FROM public.users_table WHERE (users_table.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])) UNION SELECT users_table.user_id FROM public.users_table WHERE (users_table.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8]))) a ON ((a.user_id OPERATOR(pg_catalog.=) foo.user_id))) JOIN (SELECT intermediate_result.value_1 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer)) bar ON ((foo.user_id OPERATOR(pg_catalog.=) bar.value_1)))
valid
---------------------------------------------------------------------
t
(1 row)
-- it should be safe to recursively plan non colocated subqueries
-- inside a CTE
SELECT true AS valid FROM explain_json_2($$
WITH non_colocated_subquery AS
(
SELECT
foo.value_2
FROM
(SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo,
(SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar
WHERE
foo.value_2 = bar.value_2
),
non_colocated_subquery_2 AS
(
SELECT
count(*) as cnt
FROM
events_table
WHERE
event_type
IN
(SELECT event_type FROM events_table WHERE user_id < 4)
)
SELECT
*
FROM
non_colocated_subquery, non_colocated_subquery_2
WHERE
non_colocated_subquery.value_2 != non_colocated_subquery_2.cnt
$$);
DEBUG: CTE non_colocated_subquery is going to be inlined via distributed planning
DEBUG: CTE non_colocated_subquery_2 is going to be inlined via distributed planning
DEBUG: generating subplan XXX_1 for subquery SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))
DEBUG: generating subplan XXX_2 for subquery SELECT event_type FROM public.events_table WHERE (user_id OPERATOR(pg_catalog.<) 4)
DEBUG: generating subplan XXX_3 for subquery SELECT count(*) AS cnt FROM public.events_table WHERE (event_type OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.event_type FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(event_type integer)))
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT non_colocated_subquery.value_2, non_colocated_subquery_2.cnt FROM (SELECT foo.value_2 FROM (SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) foo, (SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) bar WHERE (foo.value_2 OPERATOR(pg_catalog.=) bar.value_2)) non_colocated_subquery, (SELECT intermediate_result.cnt FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(cnt bigint)) non_colocated_subquery_2 WHERE (non_colocated_subquery.value_2 OPERATOR(pg_catalog.<>) non_colocated_subquery_2.cnt)
valid
---------------------------------------------------------------------
t
(1 row)
-- non colocated subquery joins should work fine along with local tables
SELECT true AS valid FROM explain_json_2($$
SELECT
count(*)
FROM
(SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo,
(SELECT users_table_local.value_2 FROM users_table_local, events_table_local WHERE users_table_local.user_id = events_table_local.user_id AND event_type IN (5,6,7,8)) as bar,
(SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as baz
WHERE
foo.value_2 = bar.value_2
AND
foo.value_2 = baz.value_2
$$);
DEBUG: generating subplan XXX_1 for subquery SELECT users_table_local.value_2 FROM non_colocated_subquery.users_table_local, non_colocated_subquery.events_table_local WHERE ((users_table_local.user_id OPERATOR(pg_catalog.=) events_table_local.user_id) AND (events_table_local.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))
DEBUG: generating subplan XXX_2 for subquery SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[9, 10, 11, 12])))
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) foo, (SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) bar, (SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) baz WHERE ((foo.value_2 OPERATOR(pg_catalog.=) bar.value_2) AND (foo.value_2 OPERATOR(pg_catalog.=) baz.value_2))
valid
---------------------------------------------------------------------
t
(1 row)
-- a combination of subqueries in FROM and WHERE clauses
SELECT true AS valid FROM explain_json_2($$
SELECT
count(*)
FROM
(SELECT user_id FROM users_table) as foo
JOIN
(
SELECT user_id FROM users_table WHERE user_id IN (1,2,3,4)
UNION
SELECT user_id FROM users_table WHERE user_id IN (5,6,7,8)
) a
ON (a.user_id = foo.user_id)
JOIN
(SELECT value_1, value_2 FROM users_table) as bar
ON(foo.user_id = bar.value_1)
WHERE
value_2 IN (SELECT value_1 FROM users_table WHERE value_2 < 1)
AND
value_1 IN (SELECT value_2 FROM users_table WHERE value_1 < 2)
AND
foo.user_id IN (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2))
$$);
DEBUG: generating subplan XXX_1 for subquery SELECT value_1, value_2 FROM public.users_table
DEBUG: generating subplan XXX_2 for subquery SELECT value_1 FROM public.users_table WHERE (value_2 OPERATOR(pg_catalog.<) 1)
DEBUG: generating subplan XXX_3 for subquery SELECT value_2 FROM public.users_table WHERE (value_1 OPERATOR(pg_catalog.<) 2)
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (((SELECT users_table.user_id FROM public.users_table) foo JOIN (SELECT users_table.user_id FROM public.users_table WHERE (users_table.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])) UNION SELECT users_table.user_id FROM public.users_table WHERE (users_table.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8]))) a ON ((a.user_id OPERATOR(pg_catalog.=) foo.user_id))) JOIN (SELECT intermediate_result.value_1, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, value_2 integer)) bar ON ((foo.user_id OPERATOR(pg_catalog.=) bar.value_1))) WHERE ((bar.value_2 OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.value_1 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer))) AND (bar.value_1 OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer))) AND (foo.user_id OPERATOR(pg_catalog.=) ANY (SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2]))))))
valid
---------------------------------------------------------------------
t
(1 row)
-- make sure that we don't pick the refeence table as
-- the anchor
SELECT true AS valid FROM explain_json_2($$
SELECT count(*)
FROM
users_reference_table AS users_table_ref,
(SELECT user_id FROM users_Table) AS foo,
(SELECT user_id, value_2 FROM events_Table) AS bar
WHERE
users_table_ref.user_id = foo.user_id
AND foo.user_id = bar.value_2;
$$);
DEBUG: generating subplan XXX_1 for subquery SELECT user_id, value_2 FROM public.events_table
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM public.users_reference_table users_table_ref, (SELECT users_table.user_id FROM public.users_table) foo, (SELECT intermediate_result.user_id, intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_2 integer)) bar WHERE ((users_table_ref.user_id OPERATOR(pg_catalog.=) foo.user_id) AND (foo.user_id OPERATOR(pg_catalog.=) bar.value_2))
valid
---------------------------------------------------------------------
t
(1 row)
-- make sure to skip calling recursive planning over and over again
-- for already recursively planned subqueries
SET client_min_messages TO DEBUG2;
SELECT *
FROM
(SELECT *
FROM users_table
OFFSET 0) AS users_table
JOIN LATERAL
(SELECT *
FROM
(SELECT *
FROM events_table
WHERE user_id = users_table.user_id) AS bar
LEFT JOIN users_table u2 ON u2.user_id = bar.value_2) AS foo ON TRUE;
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: generating subplan XXX_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table OFFSET 0
DEBUG: skipping recursive planning for the subquery since it contains references to outer queries
DEBUG: skipping recursive planning for the subquery since it contains references to outer queries
DEBUG: skipping recursive planning for the subquery since it contains references to outer queries
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT users_table.user_id, users_table."time", users_table.value_1, users_table.value_2, users_table.value_3, users_table.value_4, foo.user_id, foo."time", foo.event_type, foo.value_2, foo.value_3, foo.value_4, foo.user_id_1 AS user_id, foo.time_1 AS "time", foo.value_1, foo.value_2_1 AS value_2, foo.value_3_1 AS value_3, foo.value_4_1 AS value_4 FROM ((SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table JOIN LATERAL (SELECT bar.user_id, bar."time", bar.event_type, bar.value_2, bar.value_3, bar.value_4, u2.user_id, u2."time", u2.value_1, u2.value_2, u2.value_3, u2.value_4 FROM ((SELECT events_table.user_id, events_table."time", events_table.event_type, events_table.value_2, events_table.value_3, events_table.value_4 FROM public.events_table WHERE (events_table.user_id OPERATOR(pg_catalog.=) users_table.user_id)) bar LEFT JOIN public.users_table u2 ON ((u2.user_id OPERATOR(pg_catalog.=) bar.value_2)))) foo(user_id, "time", event_type, value_2, value_3, value_4, user_id_1, time_1, value_1, value_2_1, value_3_1, value_4_1) ON (true))
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: skipping recursive planning for the subquery since it contains references to outer queries
DEBUG: skipping recursive planning for the subquery since it contains references to outer queries
DEBUG: skipping recursive planning for the subquery since it contains references to outer queries
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- similar to the above, make sure that we skip recursive planning when
-- the subquery doesn't have any tables
SELECT true AS valid FROM explain_json_2($$
SELECT *
FROM
(SELECT 1 AS user_id) AS users_table
JOIN LATERAL
(SELECT *
FROM
(SELECT *
FROM events_table
WHERE user_id = users_table.user_id) AS bar
LEFT JOIN users_table u2 ON u2.user_id = bar.value_2) AS foo ON TRUE
$$);
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: skipping recursive planning for the subquery since it contains references to outer queries
DEBUG: skipping recursive planning for the subquery since it contains references to outer queries
DEBUG: skipping recursive planning for the subquery since it contains references to outer queries
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- similar to the above, make sure that we skip recursive planning when
-- the subquery contains only intermediate results
SELECT COUNT(*) = 176
FROM
(
SELECT * FROM(
SELECT *
FROM users_table
EXCEPT
SELECT *
FROM users_table
WHERE value_1 > 2
) AS users_table_union
) AS users_table_limited
JOIN LATERAL
(SELECT *
FROM
(SELECT *
FROM
(SELECT *
FROM events_table WHERE value_3 > 4
INTERSECT
SELECT *
FROM events_table
WHERE value_2 > 2
) AS events_table
WHERE user_id = users_table_limited.user_id) AS bar
LEFT JOIN users_table u2 ON u2.user_id = bar.value_2) AS foo ON TRUE;
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: generating subplan XXX_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: generating subplan XXX_2 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table WHERE (value_1 OPERATOR(pg_catalog.>) 2)
DEBUG: Creating router plan
DEBUG: generating subplan XXX_3 for subquery SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint) EXCEPT SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: generating subplan XXX_4 for subquery SELECT user_id, "time", event_type, value_2, value_3, value_4 FROM public.events_table WHERE (value_3 OPERATOR(pg_catalog.>) (4)::double precision)
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: generating subplan XXX_5 for subquery SELECT user_id, "time", event_type, value_2, value_3, value_4 FROM public.events_table WHERE (value_2 OPERATOR(pg_catalog.>) 2)
DEBUG: Creating router plan
DEBUG: generating subplan XXX_6 for subquery SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.event_type, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, event_type integer, value_2 integer, value_3 double precision, value_4 bigint) INTERSECT SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.event_type, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_5'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, event_type integer, value_2 integer, value_3 double precision, value_4 bigint)
DEBUG: recursively planning right side of the left join since the outer side is a recurring rel
DEBUG: recursively planning distributed relation "users_table" "u2" since it is part of a distributed join node that is outer joined with a recurring rel
DEBUG: Wrapping relation "users_table" "u2" to a subquery
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: generating subplan XXX_7 for subquery SELECT user_id FROM public.users_table u2 WHERE true
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT (count(*) OPERATOR(pg_catalog.=) 176) FROM ((SELECT users_table_union.user_id, users_table_union."time", users_table_union.value_1, users_table_union.value_2, users_table_union.value_3, users_table_union.value_4 FROM (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table_union) users_table_limited JOIN LATERAL (SELECT bar.user_id, bar."time", bar.event_type, bar.value_2, bar.value_3, bar.value_4, u2.user_id, u2."time", u2.value_1, u2.value_2, u2.value_3, u2.value_4 FROM ((SELECT events_table.user_id, events_table."time", events_table.event_type, events_table.value_2, events_table.value_3, events_table.value_4 FROM (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.event_type, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_6'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, event_type integer, value_2 integer, value_3 double precision, value_4 bigint)) events_table WHERE (events_table.user_id OPERATOR(pg_catalog.=) users_table_limited.user_id)) bar LEFT JOIN (SELECT u2_1.user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_7'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) u2_1) u2 ON ((u2.user_id OPERATOR(pg_catalog.=) bar.value_2)))) foo(user_id, "time", event_type, value_2, value_3, value_4, user_id_1, time_1, value_1, value_2_1, value_3_1, value_4_1) ON (true))
DEBUG: Creating router plan
?column?
---------------------------------------------------------------------
t
(1 row)
-- similar to the above, but this time there are multiple
-- non-colocated subquery joins one of them contains lateral
-- join
SELECT count(*) FROM events_table WHERE user_id NOT IN
(
SELECT users_table_limited.user_id
FROM
(SELECT *
FROM users_table
EXCEPT
SELECT *
FROM users_table
WHERE value_1 > 2
) AS users_table_limited
JOIN LATERAL
(SELECT *
FROM
(SELECT *
FROM
(SELECT *
FROM events_table WHERE value_3 > 4
INTERSECT
SELECT *
FROM events_table
WHERE value_2 > 2
) AS events_table
WHERE user_id = users_table_limited.user_id) AS bar
LEFT JOIN users_table u2 ON u2.user_id = bar.value_2) AS foo ON TRUE
);
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: generating subplan XXX_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: generating subplan XXX_2 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table WHERE (value_1 OPERATOR(pg_catalog.>) 2)
DEBUG: Creating router plan
DEBUG: generating subplan XXX_3 for subquery SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint) EXCEPT SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: generating subplan XXX_4 for subquery SELECT user_id, "time", event_type, value_2, value_3, value_4 FROM public.events_table WHERE (value_3 OPERATOR(pg_catalog.>) (4)::double precision)
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: generating subplan XXX_5 for subquery SELECT user_id, "time", event_type, value_2, value_3, value_4 FROM public.events_table WHERE (value_2 OPERATOR(pg_catalog.>) 2)
DEBUG: Creating router plan
DEBUG: generating subplan XXX_6 for subquery SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.event_type, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, event_type integer, value_2 integer, value_3 double precision, value_4 bigint) INTERSECT SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.event_type, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_5'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, event_type integer, value_2 integer, value_3 double precision, value_4 bigint)
DEBUG: recursively planning right side of the left join since the outer side is a recurring rel
DEBUG: recursively planning distributed relation "users_table" "u2" since it is part of a distributed join node that is outer joined with a recurring rel
DEBUG: Wrapping relation "users_table" "u2" to a subquery
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: generating subplan XXX_7 for subquery SELECT user_id FROM public.users_table u2 WHERE true
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM public.events_table WHERE (NOT (user_id OPERATOR(pg_catalog.=) ANY (SELECT users_table_limited.user_id FROM ((SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table_limited JOIN LATERAL (SELECT bar.user_id, bar."time", bar.event_type, bar.value_2, bar.value_3, bar.value_4, u2.user_id, u2."time", u2.value_1, u2.value_2, u2.value_3, u2.value_4 FROM ((SELECT events_table_1.user_id, events_table_1."time", events_table_1.event_type, events_table_1.value_2, events_table_1.value_3, events_table_1.value_4 FROM (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.event_type, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_6'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, event_type integer, value_2 integer, value_3 double precision, value_4 bigint)) events_table_1 WHERE (events_table_1.user_id OPERATOR(pg_catalog.=) users_table_limited.user_id)) bar LEFT JOIN (SELECT u2_1.user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_7'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) u2_1) u2 ON ((u2.user_id OPERATOR(pg_catalog.=) bar.value_2)))) foo(user_id, "time", event_type, value_2, value_3, value_4, user_id_1, time_1, value_1, value_2_1, value_3_1, value_4_1) ON (true)))))
DEBUG: Router planner cannot handle multi-shard select queries
count
---------------------------------------------------------------------
62
(1 row)
-- make sure that non-colocated subquery joins work fine in
-- modifications
CREATE TABLE table1 (id int, tenant_id int);
CREATE TABLE table2 (id int, tenant_id int) partition by range(tenant_id);
CREATE TABLE table2_p1 PARTITION OF table2 FOR VALUES FROM (1) TO (10);
-- modifications on the partitons are only allowed with rep=1
SET citus.shard_replication_factor TO 1;
SELECT create_distributed_table('table2','tenant_id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('table1','tenant_id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
CREATE VIEW table1_view AS SELECT * from table1 table_name_for_view where id < 100;
-- all of the above queries are non-colocated subquery joins
-- because the views are replaced with subqueries
UPDATE table2 SET id=20 FROM table1_view WHERE table1_view.id=table2.id;
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: generating subplan XXX_1 for subquery SELECT id, tenant_id FROM non_colocated_subquery.table1 table_name_for_view WHERE (id OPERATOR(pg_catalog.<) 100)
DEBUG: Plan XXX query after replacing subqueries and CTEs: UPDATE non_colocated_subquery.table2 SET id = 20 FROM (SELECT intermediate_result.id, intermediate_result.tenant_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, tenant_id integer)) table1_view WHERE (table1_view.id OPERATOR(pg_catalog.=) table2.id)
DEBUG: Creating router plan
UPDATE table2_p1 SET id=20 FROM table1_view WHERE table1_view.id=table2_p1.id;
DEBUG: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: generating subplan XXX_1 for subquery SELECT id, tenant_id FROM non_colocated_subquery.table1 table_name_for_view WHERE (id OPERATOR(pg_catalog.<) 100)
DEBUG: Plan XXX query after replacing subqueries and CTEs: UPDATE non_colocated_subquery.table2_p1 SET id = 20 FROM (SELECT intermediate_result.id, intermediate_result.tenant_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, tenant_id integer)) table1_view WHERE (table1_view.id OPERATOR(pg_catalog.=) table2_p1.id)
DEBUG: Creating router plan
RESET client_min_messages;
DROP FUNCTION explain_json_2(text);
SET search_path TO 'public';
DROP SCHEMA non_colocated_subquery CASCADE;
NOTICE: drop cascades to 5 other objects