diff --git a/src/test/regress/expected/non_colocated_subquery_joins_0.out b/src/test/regress/expected/non_colocated_subquery_joins_0.out new file mode 100644 index 000000000..bfa9cfe9c --- /dev/null +++ b/src/test/regress/expected/non_colocated_subquery_joins_0.out @@ -0,0 +1,922 @@ +-- =================================================================== +-- test recursive planning functionality for non-colocated subqueries +-- We prefered to use EXPLAIN almost all the queries here, +-- otherwise the execution time of so many repartition queries would +-- be too high for the regression tests. Also, note that we're mostly +-- interested in recurive planning side of the things, thus supressing +-- the actual explain output. +-- =================================================================== +SET client_min_messages TO DEBUG1; +CREATE SCHEMA non_colocated_subquery; +SET search_path TO non_colocated_subquery, public; +-- we don't use the data anyway +CREATE TABLE users_table_local AS SELECT * FROM users_table LIMIT 0; +DEBUG: push down of limit count: 0 +CREATE TABLE events_table_local AS SELECT * FROM events_table LIMIT 0; +DEBUG: push down of limit count: 0 +SET citus.enable_repartition_joins TO ON; +\set VERBOSITY terse +-- Function that parses explain output as JSON +-- copied from multi_explain.sql and had to give +-- a different name via postfix to prevent concurrent +-- create/drop etc. +CREATE OR REPLACE FUNCTION explain_json_2(query text) +RETURNS jsonb +AS $BODY$ +DECLARE + result jsonb; +BEGIN + EXECUTE format('EXPLAIN (FORMAT JSON) %s', query) INTO result; + RETURN result; +END; +$BODY$ LANGUAGE plpgsql; +-- leaf queries contain colocated joins +-- but not the subquery +SELECT true AS valid FROM explain_json_2($$ + SELECT + foo.value_2 + FROM + (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo, + (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar + WHERE + foo.value_2 = bar.value_2; +$$); +DEBUG: generating subplan 3_1 for subquery SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8]))) +DEBUG: Plan 3 query after replacing subqueries and CTEs: SELECT foo.value_2 FROM (SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) foo, (SELECT intermediate_result.value_2 FROM read_intermediate_result('3_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) bar WHERE (foo.value_2 OPERATOR(pg_catalog.=) bar.value_2) + valid +------- + t +(1 row) + +-- simple non colocated join with subqueries in WHERE clause +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + events_table + WHERE + event_type + IN + (SELECT event_type FROM events_table WHERE user_id < 100); + +$$); +DEBUG: generating subplan 5_1 for subquery SELECT event_type FROM public.events_table WHERE (user_id OPERATOR(pg_catalog.<) 100) +DEBUG: Plan 5 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM public.events_table WHERE (event_type OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.event_type FROM read_intermediate_result('5_1'::text, 'binary'::citus_copy_format) intermediate_result(event_type integer))) + valid +------- + t +(1 row) + +-- simple non colocated join with subqueries in WHERE clause with NOT IN +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + events_table + WHERE + user_id + NOT IN + (SELECT user_id FROM events_table WHERE event_type = 2); +$$); +DEBUG: generating subplan 7_1 for subquery SELECT user_id FROM public.events_table WHERE (event_type OPERATOR(pg_catalog.=) 2) +DEBUG: Plan 7 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM public.events_table WHERE (NOT (user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.user_id FROM read_intermediate_result('7_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)))) + valid +------- + t +(1 row) + +-- Subqueries in WHERE and FROM are mixed +-- In this query, only subquery in WHERE is not a colocated join +SELECT true AS valid FROM explain_json_2($$ + + SELECT + foo.user_id + FROM + (SELECT users_table.user_id, event_type FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo, + (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar + WHERE + foo.user_id = bar.user_id AND + foo.event_type IN (SELECT event_type FROM events_table WHERE user_id < 3); + +$$); +DEBUG: generating subplan 9_1 for subquery SELECT event_type FROM public.events_table WHERE (user_id OPERATOR(pg_catalog.<) 3) +DEBUG: Plan 9 query after replacing subqueries and CTEs: SELECT foo.user_id FROM (SELECT users_table.user_id, events_table.event_type FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) foo, (SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))) bar WHERE ((foo.user_id OPERATOR(pg_catalog.=) bar.user_id) AND (foo.event_type OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.event_type FROM read_intermediate_result('9_1'::text, 'binary'::citus_copy_format) intermediate_result(event_type integer)))) + valid +------- + t +(1 row) + +-- Subqueries in WHERE and FROM are mixed +-- In this query, one of the joins in the FROM clause is not colocated +SELECT true AS valid FROM explain_json_2($$ + + SELECT + foo.user_id + FROM + (SELECT users_table.user_id, event_type FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo, + (SELECT (users_table.user_id / 2) as user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar + WHERE + foo.user_id = bar.user_id AND + foo.user_id IN (SELECT user_id FROM events_table WHERE user_id < 10); +$$); +DEBUG: generating subplan 11_1 for subquery SELECT (users_table.user_id OPERATOR(pg_catalog./) 2) AS user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8]))) +DEBUG: Plan 11 query after replacing subqueries and CTEs: SELECT foo.user_id FROM (SELECT users_table.user_id, events_table.event_type FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) foo, (SELECT intermediate_result.user_id FROM read_intermediate_result('11_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) bar WHERE ((foo.user_id OPERATOR(pg_catalog.=) bar.user_id) AND (foo.user_id OPERATOR(pg_catalog.=) ANY (SELECT events_table.user_id FROM public.events_table WHERE (events_table.user_id OPERATOR(pg_catalog.<) 10)))) + valid +------- + t +(1 row) + +-- Subqueries in WHERE and FROM are mixed +-- In this query, both the joins in the FROM clause is not colocated +SELECT true AS valid FROM explain_json_2($$ + + SELECT + foo.user_id + FROM + (SELECT users_table.user_id, event_type FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo, + (SELECT (users_table.user_id / 2) as user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar + WHERE + foo.user_id = bar.user_id AND + foo.user_id NOT IN (SELECT user_id FROM events_table WHERE user_id < 10); +$$); +DEBUG: generating subplan 13_1 for subquery SELECT (users_table.user_id OPERATOR(pg_catalog./) 2) AS user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8]))) +DEBUG: generating subplan 13_2 for subquery SELECT user_id FROM public.events_table WHERE (user_id OPERATOR(pg_catalog.<) 10) +DEBUG: Plan 13 query after replacing subqueries and CTEs: SELECT foo.user_id FROM (SELECT users_table.user_id, events_table.event_type FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) foo, (SELECT intermediate_result.user_id FROM read_intermediate_result('13_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) bar WHERE ((foo.user_id OPERATOR(pg_catalog.=) bar.user_id) AND (NOT (foo.user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.user_id FROM read_intermediate_result('13_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer))))) + valid +------- + t +(1 row) + +-- Subqueries in WHERE and FROM are mixed +-- In this query, one of the joins in the FROM clause is not colocated and subquery in WHERE clause is not colocated +-- similar to the above, but, this time bar is the anchor subquery +SELECT true AS valid FROM explain_json_2($$ + SELECT + foo.user_id + FROM + (SELECT users_table.user_id, event_type FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) as foo, + (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar + WHERE + foo.user_id = bar.user_id AND + foo.event_type IN (SELECT event_type FROM events_table WHERE user_id < 4); +$$); +DEBUG: cannot use real time executor with repartition jobs +DEBUG: generating subplan 16_1 for subquery SELECT users_table.user_id, events_table.event_type FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4]))) +DEBUG: generating subplan 16_2 for subquery SELECT event_type FROM public.events_table WHERE (user_id OPERATOR(pg_catalog.<) 4) +DEBUG: Plan 16 query after replacing subqueries and CTEs: SELECT foo.user_id FROM (SELECT intermediate_result.user_id, intermediate_result.event_type FROM read_intermediate_result('16_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event_type integer)) foo, (SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))) bar WHERE ((foo.user_id OPERATOR(pg_catalog.=) bar.user_id) AND (foo.event_type OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.event_type FROM read_intermediate_result('16_2'::text, 'binary'::citus_copy_format) intermediate_result(event_type integer)))) +ERROR: cannot pushdown the subquery +-- The inner subqueries and the subquery in WHERE are non-located joins +SELECT true AS valid FROM explain_json_2($$ + SELECT foo_top.*, events_table.user_id FROM + ( + + SELECT + foo.user_id, random() + FROM + (SELECT users_table.user_id, event_type FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) as foo, + (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.event_type AND event_type IN (5,6,7,8)) as bar + WHERE + foo.user_id = bar.user_id AND + foo.event_type IN (SELECT event_type FROM events_table WHERE user_id = 5) + + ) as foo_top, events_table WHERE events_table.user_id = foo_top.user_id; +$$); +DEBUG: cannot use real time executor with repartition jobs +DEBUG: generating subplan 19_1 for subquery SELECT users_table.user_id, events_table.event_type FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4]))) +DEBUG: cannot use real time executor with repartition jobs +DEBUG: generating subplan 19_2 for subquery SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.event_type) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8]))) +DEBUG: generating subplan 19_3 for subquery SELECT event_type FROM public.events_table WHERE (user_id OPERATOR(pg_catalog.=) 5) +DEBUG: generating subplan 19_4 for subquery SELECT foo.user_id, random() AS random FROM (SELECT intermediate_result.user_id, intermediate_result.event_type FROM read_intermediate_result('19_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event_type integer)) foo, (SELECT intermediate_result.user_id FROM read_intermediate_result('19_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) bar WHERE ((foo.user_id OPERATOR(pg_catalog.=) bar.user_id) AND (foo.event_type OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.event_type FROM read_intermediate_result('19_3'::text, 'binary'::citus_copy_format) intermediate_result(event_type integer)))) +DEBUG: Plan 19 query after replacing subqueries and CTEs: SELECT foo_top.user_id, foo_top.random, events_table.user_id FROM (SELECT intermediate_result.user_id, intermediate_result.random FROM read_intermediate_result('19_4'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, random double precision)) foo_top, public.events_table WHERE (events_table.user_id OPERATOR(pg_catalog.=) foo_top.user_id) + valid +------- + t +(1 row) + +-- Slightly more complex query where there are 5 joins, 1 of them is non-colocated +SELECT true AS valid FROM explain_json_2($$ + + SELECT * FROM + ( + SELECT + foo1.user_id, random() + FROM + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo1, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as foo2, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo3, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (13,14,15,16)) as foo4, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (17,18,19,20)) as foo5 + + WHERE + + foo1.user_id = foo4.user_id AND + foo1.user_id = foo2.user_id AND + foo1.user_id = foo3.user_id AND + foo1.user_id = foo4.user_id AND + foo1.user_id = foo5.value_1 + ) as foo_top; + +$$); +DEBUG: generating subplan 24_1 for subquery SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[17, 18, 19, 20]))) +DEBUG: Plan 24 query after replacing subqueries and CTEs: SELECT user_id, random FROM (SELECT foo1.user_id, random() AS random FROM (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) foo1, (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))) foo2, (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[9, 10, 11, 12])))) foo3, (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[13, 14, 15, 16])))) foo4, (SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('24_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer)) foo5 WHERE ((foo1.user_id OPERATOR(pg_catalog.=) foo4.user_id) AND (foo1.user_id OPERATOR(pg_catalog.=) foo2.user_id) AND (foo1.user_id OPERATOR(pg_catalog.=) foo3.user_id) AND (foo1.user_id OPERATOR(pg_catalog.=) foo4.user_id) AND (foo1.user_id OPERATOR(pg_catalog.=) foo5.value_1))) foo_top + valid +------- + t +(1 row) + +-- Very similar to the above query +-- One of the queries is not joined on partition key, but this time subquery itself +SELECT true AS valid FROM explain_json_2($$ + + SELECT * FROM + ( + SELECT + foo1.user_id, random() + FROM + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo1, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as foo2, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo3, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (13,14,15,16)) as foo4, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (17,18,19,20)) as foo5 + + WHERE + + foo1.user_id = foo4.user_id AND + foo1.user_id = foo2.user_id AND + foo1.user_id = foo3.user_id AND + foo1.user_id = foo4.user_id AND + foo1.user_id = foo5.user_id + ) as foo_top; +$$); +DEBUG: cannot use real time executor with repartition jobs +DEBUG: generating subplan 26_1 for subquery SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[17, 18, 19, 20]))) +DEBUG: Plan 26 query after replacing subqueries and CTEs: SELECT user_id, random FROM (SELECT foo1.user_id, random() AS random FROM (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) foo1, (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))) foo2, (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[9, 10, 11, 12])))) foo3, (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[13, 14, 15, 16])))) foo4, (SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('26_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer)) foo5 WHERE ((foo1.user_id OPERATOR(pg_catalog.=) foo4.user_id) AND (foo1.user_id OPERATOR(pg_catalog.=) foo2.user_id) AND (foo1.user_id OPERATOR(pg_catalog.=) foo3.user_id) AND (foo1.user_id OPERATOR(pg_catalog.=) foo4.user_id) AND (foo1.user_id OPERATOR(pg_catalog.=) foo5.user_id))) foo_top + valid +------- + t +(1 row) + +-- There are two non colocated joins, one is in the one of the leaf queries, +-- the other is on the top-level subquery +SELECT true AS valid FROM explain_json_2($$ + + SELECT * FROM + ( + SELECT + foo1.user_id, random() + FROM + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo1, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (5,6,7,8)) as foo2, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo3, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (13,14,15,16)) as foo4, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (17,18,19,20)) as foo5 + WHERE + foo1.user_id = foo4.user_id AND + foo1.user_id = foo2.user_id AND + foo1.user_id = foo3.user_id AND + foo1.user_id = foo4.user_id AND + foo1.user_id = foo5.value_1 + ) as foo_top; +$$); +DEBUG: cannot use real time executor with repartition jobs +DEBUG: generating subplan 28_1 for subquery SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8]))) +DEBUG: generating subplan 28_2 for subquery SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[17, 18, 19, 20]))) +DEBUG: Plan 28 query after replacing subqueries and CTEs: SELECT user_id, random FROM (SELECT foo1.user_id, random() AS random FROM (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) foo1, (SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('28_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer)) foo2, (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[9, 10, 11, 12])))) foo3, (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[13, 14, 15, 16])))) foo4, (SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('28_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer)) foo5 WHERE ((foo1.user_id OPERATOR(pg_catalog.=) foo4.user_id) AND (foo1.user_id OPERATOR(pg_catalog.=) foo2.user_id) AND (foo1.user_id OPERATOR(pg_catalog.=) foo3.user_id) AND (foo1.user_id OPERATOR(pg_catalog.=) foo4.user_id) AND (foo1.user_id OPERATOR(pg_catalog.=) foo5.value_1))) foo_top + valid +------- + t +(1 row) + +-- a similar query to the above, but, this sime the second +-- non colocated join is on the already recursively planned subquery +-- the results should be the same +SELECT true AS valid FROM explain_json_2($$ + + SELECT * FROM + ( + SELECT + foo1.user_id, random() + FROM + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo1, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (5,6,7,8)) as foo2, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo3, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (13,14,15,16)) as foo4, + (SELECT users_table.user_id, users_table.value_1 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (17,18,19,20)) as foo5 + WHERE + foo1.user_id = foo4.user_id AND + foo1.user_id = foo2.user_id AND + foo1.user_id = foo3.user_id AND + foo1.user_id = foo4.user_id AND + foo2.user_id = foo5.value_1 + ) as foo_top; +$$); +DEBUG: cannot use real time executor with repartition jobs +DEBUG: generating subplan 31_1 for subquery SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8]))) +DEBUG: generating subplan 31_2 for subquery SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[17, 18, 19, 20]))) +DEBUG: Plan 31 query after replacing subqueries and CTEs: SELECT user_id, random FROM (SELECT foo1.user_id, random() AS random FROM (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) foo1, (SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('31_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer)) foo2, (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[9, 10, 11, 12])))) foo3, (SELECT users_table.user_id, users_table.value_1 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[13, 14, 15, 16])))) foo4, (SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('31_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer)) foo5 WHERE ((foo1.user_id OPERATOR(pg_catalog.=) foo4.user_id) AND (foo1.user_id OPERATOR(pg_catalog.=) foo2.user_id) AND (foo1.user_id OPERATOR(pg_catalog.=) foo3.user_id) AND (foo1.user_id OPERATOR(pg_catalog.=) foo4.user_id) AND (foo2.user_id OPERATOR(pg_catalog.=) foo5.value_1))) foo_top + valid +------- + t +(1 row) + +-- Deeper subqueries are non-colocated +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + ( + SELECT + foo.user_id + FROM + (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) as foo, + (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar + WHERE + foo.user_id = bar.user_id) as foo_top JOIN + + ( + SELECT + foo.user_id + FROM + (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) as foo, + (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar + WHERE + foo.user_id = bar.user_id) as bar_top + ON (foo_top.user_id = bar_top.user_id); +$$); +DEBUG: cannot use real time executor with repartition jobs +DEBUG: generating subplan 34_1 for subquery SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4]))) +DEBUG: cannot use real time executor with repartition jobs +DEBUG: generating subplan 34_2 for subquery SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4]))) +DEBUG: Plan 34 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT foo.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('34_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) foo, (SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))) bar WHERE (foo.user_id OPERATOR(pg_catalog.=) bar.user_id)) foo_top JOIN (SELECT foo.user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('34_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) foo, (SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))) bar WHERE (foo.user_id OPERATOR(pg_catalog.=) bar.user_id)) bar_top ON ((foo_top.user_id OPERATOR(pg_catalog.=) bar_top.user_id))) + valid +------- + t +(1 row) + +-- Top level Subquery is not colocated +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + ( + SELECT + foo.user_id, foo.value_2 + FROM + (SELECT DISTINCT users_table.user_id, users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo, + (SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar + WHERE + foo.user_id = bar.user_id) as foo_top JOIN + + ( + SELECT + foo.user_id + FROM + (SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo, + (SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (13,14,15,16)) as bar + WHERE + foo.user_id = bar.user_id) as bar_top + ON (foo_top.value_2 = bar_top.user_id); + +$$); +DEBUG: generating subplan 37_1 for subquery SELECT foo.user_id FROM (SELECT DISTINCT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[9, 10, 11, 12])))) foo, (SELECT DISTINCT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[13, 14, 15, 16])))) bar WHERE (foo.user_id OPERATOR(pg_catalog.=) bar.user_id) +DEBUG: Plan 37 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT foo.user_id, foo.value_2 FROM (SELECT DISTINCT users_table.user_id, users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) foo, (SELECT DISTINCT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))) bar WHERE (foo.user_id OPERATOR(pg_catalog.=) bar.user_id)) foo_top JOIN (SELECT intermediate_result.user_id FROM read_intermediate_result('37_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) bar_top ON ((foo_top.value_2 OPERATOR(pg_catalog.=) bar_top.user_id))) + valid +------- + t +(1 row) + +-- Top level Subquery is not colocated as the above +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + ( + SELECT + foo.user_id, foo.value_2 + FROM + (SELECT DISTINCT users_table.user_id, users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo, + (SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar + WHERE + foo.user_id = bar.user_id) as foo_top JOIN + ( + SELECT + foo.user_id + FROM + (SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as foo, + (SELECT DISTINCT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (13,14,15,16)) as bar + WHERE + foo.user_id = bar.user_id) as bar_top + ON (foo_top.value_2 = bar_top.user_id); +$$); +DEBUG: cannot use real time executor with repartition jobs +DEBUG: generating subplan 39_1 for subquery SELECT DISTINCT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[13, 14, 15, 16]))) +DEBUG: generating subplan 39_2 for subquery SELECT foo.user_id FROM (SELECT DISTINCT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[9, 10, 11, 12])))) foo, (SELECT intermediate_result.user_id FROM read_intermediate_result('39_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) bar WHERE (foo.user_id OPERATOR(pg_catalog.=) bar.user_id) +DEBUG: Plan 39 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT foo.user_id, foo.value_2 FROM (SELECT DISTINCT users_table.user_id, users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) foo, (SELECT DISTINCT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))) bar WHERE (foo.user_id OPERATOR(pg_catalog.=) bar.user_id)) foo_top JOIN (SELECT intermediate_result.user_id FROM read_intermediate_result('39_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) bar_top ON ((foo_top.value_2 OPERATOR(pg_catalog.=) bar_top.user_id))) + valid +------- + t +(1 row) + +-- non colocated joins are deep inside the query +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + ( + SELECT * FROM + (SELECT DISTINCT users_table.user_id FROM users_table, + (SELECT events_table.user_id as my_users FROM events_table, users_table WHERE events_table.event_type = users_table.user_id) as foo + WHERE foo.my_users = users_table.user_id) as mid_level_query + ) as bar; +$$); +DEBUG: cannot use real time executor with repartition jobs +DEBUG: generating subplan 42_1 for subquery SELECT events_table.user_id AS my_users FROM public.events_table, public.users_table WHERE (events_table.event_type OPERATOR(pg_catalog.=) users_table.user_id) +DEBUG: Plan 42 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT mid_level_query.user_id FROM (SELECT DISTINCT users_table.user_id FROM public.users_table, (SELECT intermediate_result.my_users FROM read_intermediate_result('42_1'::text, 'binary'::citus_copy_format) intermediate_result(my_users integer)) foo WHERE (foo.my_users OPERATOR(pg_catalog.=) users_table.user_id)) mid_level_query) bar + valid +------- + t +(1 row) + +-- similar to the above, with relation rtes +-- we're able to recursively plan foo +-- note that if we haven't added random() to the subquery, we'd be able run the query +-- via regular repartitioning since PostgreSQL would pull the query up +SELECT true AS valid FROM explain_json_2($$ + + SELECT count(*) FROM ( SELECT * FROM + (SELECT DISTINCT users_table.user_id FROM users_table, + (SELECT events_table.event_type as my_users, random() FROM events_table, users_table WHERE events_table.user_id = users_table.user_id) as foo + WHERE foo.my_users = users_table.user_id) as mid_level_query ) as bar; + +$$); +DEBUG: generating subplan 44_1 for subquery SELECT events_table.event_type AS my_users, random() AS random FROM public.events_table, public.users_table WHERE (events_table.user_id OPERATOR(pg_catalog.=) users_table.user_id) +DEBUG: Plan 44 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT mid_level_query.user_id FROM (SELECT DISTINCT users_table.user_id FROM public.users_table, (SELECT intermediate_result.my_users, intermediate_result.random FROM read_intermediate_result('44_1'::text, 'binary'::citus_copy_format) intermediate_result(my_users integer, random double precision)) foo WHERE (foo.my_users OPERATOR(pg_catalog.=) users_table.user_id)) mid_level_query) bar + valid +------- + t +(1 row) + +-- same as the above query, but, one level deeper subquery + SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + ( + SELECT * FROM + (SELECT DISTINCT users_table.user_id FROM users_table, + (SELECT events_table.user_id as my_users FROM events_table, + (SELECT events_table.user_id, random() FROM users_table, events_table WHERE users_table.user_id = events_table.user_id) as selected_users + WHERE events_table.event_type = selected_users.user_id) as foo + + WHERE foo.my_users = users_table.user_id) as mid_level_query + ) as bar; + $$); +DEBUG: generating subplan 46_1 for subquery SELECT events_table.user_id, random() AS random FROM public.users_table, public.events_table WHERE (users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) +DEBUG: Plan 46 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT mid_level_query.user_id FROM (SELECT DISTINCT users_table.user_id FROM public.users_table, (SELECT events_table.user_id AS my_users FROM public.events_table, (SELECT intermediate_result.user_id, intermediate_result.random FROM read_intermediate_result('46_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, random double precision)) selected_users WHERE (events_table.event_type OPERATOR(pg_catalog.=) selected_users.user_id)) foo WHERE (foo.my_users OPERATOR(pg_catalog.=) users_table.user_id)) mid_level_query) bar + valid +------- + t +(1 row) + +-- deeper query, subquery in WHERE clause +-- this time successfull plan the query since the join on the relation and +-- the subquery on the distribution key +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + ( + SELECT * FROM + (SELECT DISTINCT users_table.user_id FROM users_table, + + + (SELECT events_table.user_id as my_users FROM events_table, + (SELECT events_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND + + users_table.user_id IN (SELECT value_2 FROM events_table) + + ) as selected_users + WHERE events_table.user_id = selected_users.user_id) as foo + + WHERE foo.my_users = users_table.user_id) as mid_level_query + + ) as bar; + +$$); +DEBUG: generating subplan 48_1 for subquery SELECT value_2 FROM public.events_table +DEBUG: Plan 48 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT mid_level_query.user_id FROM (SELECT DISTINCT users_table.user_id FROM public.users_table, (SELECT events_table.user_id AS my_users FROM public.events_table, (SELECT events_table_1.user_id FROM public.users_table users_table_1, public.events_table events_table_1 WHERE ((users_table_1.user_id OPERATOR(pg_catalog.=) events_table_1.user_id) AND (users_table_1.user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.value_2 FROM read_intermediate_result('48_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer))))) selected_users WHERE (events_table.user_id OPERATOR(pg_catalog.=) selected_users.user_id)) foo WHERE (foo.my_users OPERATOR(pg_catalog.=) users_table.user_id)) mid_level_query) bar + valid +------- + t +(1 row) + +-- should recursively plan the subquery in WHERE clause +SELECT true AS valid FROM explain_json_2($$SELECT + count(*) +FROM + users_table +WHERE + value_1 + IN + (SELECT + users_table.user_id + FROM + users_table, events_table + WHERE + users_table.user_id = events_table.value_2 AND event_type IN (5,6));$$); +DEBUG: cannot use real time executor with repartition jobs +DEBUG: generating subplan 50_1 for subquery SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6]))) +DEBUG: Plan 50 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM public.users_table WHERE (value_1 OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.user_id FROM read_intermediate_result('50_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer))) + valid +------- + t +(1 row) + +-- leaf subquery repartitioning should work fine when used with CTEs +SELECT true AS valid FROM explain_json_2($$ + WITH q1 AS (SELECT user_id FROM users_table) +SELECT count(*) FROM q1, (SELECT + users_table.user_id, random() + FROM + users_table, events_table + WHERE + users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) as bar WHERE bar.user_id = q1.user_id ;$$); +DEBUG: generating subplan 52_1 for CTE q1: SELECT user_id FROM public.users_table +DEBUG: cannot use real time executor with repartition jobs +DEBUG: generating subplan 52_2 for subquery SELECT users_table.user_id, random() AS random FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4]))) +DEBUG: Plan 52 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('52_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) q1, (SELECT intermediate_result.user_id, intermediate_result.random FROM read_intermediate_result('52_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, random double precision)) bar WHERE (bar.user_id OPERATOR(pg_catalog.=) q1.user_id) + valid +------- + t +(1 row) + +-- subquery joins should work fine when used with CTEs +SELECT true AS valid FROM explain_json_2($$ + WITH q1 AS (SELECT user_id FROM users_table) + SELECT count(*) FROM q1, (SELECT + users_table.user_id, random() + FROM + users_table, events_table + WHERE + users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as bar WHERE bar.user_id = q1.user_id ;$$); +DEBUG: generating subplan 55_1 for CTE q1: SELECT user_id FROM public.users_table +DEBUG: Plan 55 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('55_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) q1, (SELECT users_table.user_id, random() AS random FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) bar WHERE (bar.user_id OPERATOR(pg_catalog.=) q1.user_id) + valid +------- + t +(1 row) + +-- should work fine within UNIONs +SELECT true AS valid FROM explain_json_2($$ + (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) UNION + (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8));$$); +DEBUG: cannot use real time executor with repartition jobs +DEBUG: generating subplan 57_1 for subquery SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4]))) +DEBUG: generating subplan 57_2 for subquery SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8]))) +DEBUG: Plan 57 query after replacing subqueries and CTEs: SELECT intermediate_result.user_id FROM read_intermediate_result('57_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) UNION SELECT intermediate_result.user_id FROM read_intermediate_result('57_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) + valid +------- + t +(1 row) + +-- should work fine within leaf queries of deeper subqueries +SELECT true AS valid FROM explain_json_2($$ +SELECT event, array_length(events_table, 1) +FROM ( + SELECT event, array_agg(t.user_id) AS events_table + FROM ( + SELECT + DISTINCT ON(e.event_type::text) e.event_type::text as event, e.time, e.user_id + FROM + users_table AS u, + events_table AS e, + (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (5,6,7,8)) as bar + WHERE u.user_id = e.user_id AND + u.user_id IN + ( + SELECT + user_id + FROM + users_table + WHERE value_2 >= 5 + AND EXISTS (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.value_2 AND event_type IN (1,2,3,4)) + LIMIT 5 + ) + ) t, users_table WHERE users_table.value_1 = t.event::int + GROUP BY event +) q +ORDER BY 2 DESC, 1; +$$); +DEBUG: cannot use real time executor with repartition jobs +DEBUG: generating subplan 60_1 for subquery SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4]))) +DEBUG: push down of limit count: 5 +DEBUG: generating subplan 60_2 for subquery SELECT user_id FROM public.users_table WHERE ((value_2 OPERATOR(pg_catalog.>=) 5) AND (EXISTS (SELECT intermediate_result.user_id FROM read_intermediate_result('60_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)))) LIMIT 5 +DEBUG: cannot use real time executor with repartition jobs +DEBUG: generating subplan 60_3 for subquery SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.value_2) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8]))) +DEBUG: generating subplan 60_4 for subquery SELECT DISTINCT ON ((e.event_type)::text) (e.event_type)::text AS event, e."time", e.user_id FROM public.users_table u, public.events_table e, (SELECT intermediate_result.user_id FROM read_intermediate_result('60_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) bar WHERE ((u.user_id OPERATOR(pg_catalog.=) e.user_id) AND (u.user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.user_id FROM read_intermediate_result('60_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)))) +DEBUG: generating subplan 60_5 for subquery SELECT t.event, array_agg(t.user_id) AS events_table FROM (SELECT intermediate_result.event, intermediate_result."time", intermediate_result.user_id FROM read_intermediate_result('60_4'::text, 'binary'::citus_copy_format) intermediate_result(event text, "time" timestamp without time zone, user_id integer)) t, public.users_table WHERE (users_table.value_1 OPERATOR(pg_catalog.=) (t.event)::integer) GROUP BY t.event +DEBUG: Plan 60 query after replacing subqueries and CTEs: SELECT event, array_length(events_table, 1) AS array_length FROM (SELECT intermediate_result.event, intermediate_result.events_table FROM read_intermediate_result('60_5'::text, 'binary'::citus_copy_format) intermediate_result(event text, events_table integer[])) q ORDER BY (array_length(events_table, 1)) DESC, event + valid +------- + t +(1 row) + +-- this is also supported since we can recursively plan relations as well +-- the relations are joined under a join tree with an alias +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + (users_table u1 JOIN users_table u2 using(value_1)) a JOIN (SELECT value_1, random() FROM users_table) as u3 USING (value_1); +$$); +DEBUG: generating subplan 66_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true +DEBUG: generating subplan 66_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 66 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('66_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) a(value_1, user_id, "time", value_2, value_3, value_4, user_id_1, time_1, value_2_1, value_3_1, value_4_1) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('66_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) + valid +------- + t +(1 row) + +-- a very similar query to the above +-- however, this time we users a subquery instead of join alias, and it works +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + (SELECT * FROM users_table u1 JOIN users_table u2 using(value_1)) a JOIN (SELECT value_1, random() FROM users_table) as u3 USING (value_1); +$$); +DEBUG: cannot use real time executor with repartition jobs +DEBUG: generating subplan 69_1 for subquery SELECT u1.value_1, u1.user_id, u1."time", u1.value_2, u1.value_3, u1.value_4, u2.user_id, u2."time", u2.value_2, u2.value_3, u2.value_4 FROM (public.users_table u1 JOIN public.users_table u2 USING (value_1)) +DEBUG: Plan 69 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.value_1, intermediate_result.user_id, intermediate_result."time", intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4, intermediate_result.user_id_1 AS user_id, intermediate_result.time_1 AS "time", intermediate_result.value_2_1 AS value_2, intermediate_result.value_3_1 AS value_3, intermediate_result.value_4_1 AS value_4 FROM read_intermediate_result('69_1'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, user_id integer, "time" timestamp without time zone, value_2 integer, value_3 double precision, value_4 bigint, user_id_1 integer, time_1 timestamp without time zone, value_2_1 integer, value_3_1 double precision, value_4_1 bigint)) a(value_1, user_id, "time", value_2, value_3, value_4, user_id_1, time_1, value_2_1, value_3_1, value_4_1) JOIN (SELECT users_table.value_1, random() AS random FROM public.users_table) u3 USING (value_1)) + valid +------- + t +(1 row) + +-- a similar query to the above, this time subquery is on the left +-- and the relation is on the right of the join tree +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + (SELECT value_2, random() FROM users_table) as u1 + JOIN + events_table + using (value_2); +$$); +DEBUG: generating subplan 71_1 for subquery SELECT value_2, random() AS random FROM public.users_table +DEBUG: Plan 71 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.value_2, intermediate_result.random FROM read_intermediate_result('71_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer, random double precision)) u1 JOIN public.events_table USING (value_2)) + valid +------- + t +(1 row) + +-- recursive planning should kick in for outer joins as well +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + (SELECT value_2, random() FROM users_table) as u1 + LEFT JOIN + (SELECT value_2, random() FROM users_table) as u2 + USING(value_2); +$$); +DEBUG: generating subplan 73_1 for subquery SELECT value_2, random() AS random FROM public.users_table +DEBUG: Plan 73 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT users_table.value_2, random() AS random FROM public.users_table) u1 LEFT JOIN (SELECT intermediate_result.value_2, intermediate_result.random FROM read_intermediate_result('73_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer, random double precision)) u2 USING (value_2)) + valid +------- + t +(1 row) + +-- recursive planning should kick in for outer joins as well +-- but this time recursive planning might convert the query +-- into a not supported join +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + (SELECT value_2, random() FROM users_table) as u1 + RIGHT JOIN + (SELECT value_2, random() FROM users_table) as u2 + USING(value_2); +$$); +DEBUG: generating subplan 75_1 for subquery SELECT value_2, random() AS random FROM public.users_table +DEBUG: Plan 75 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT users_table.value_2, random() AS random FROM public.users_table) u1 RIGHT JOIN (SELECT intermediate_result.value_2, intermediate_result.random FROM read_intermediate_result('75_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer, random double precision)) u2 USING (value_2)) +ERROR: cannot pushdown the subquery +-- set operations may produce not very efficient plans +-- although we could have picked a as our anchor subquery, +-- we pick foo in this case and recursively plan a +SELECT true AS valid FROM explain_json_2($$ + + SELECT * FROM + ( + ( + SELECT user_id FROM users_table + UNION + SELECT user_id FROM users_table + ) a + JOIN + (SELECT value_1 FROM users_table) as foo ON (a.user_id = foo.value_1) + ); +$$); +DEBUG: generating subplan 78_1 for subquery SELECT user_id FROM public.users_table +DEBUG: generating subplan 78_2 for subquery SELECT user_id FROM public.users_table +DEBUG: Plan 78 query after replacing subqueries and CTEs: SELECT intermediate_result.user_id FROM read_intermediate_result('78_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) UNION SELECT intermediate_result.user_id FROM read_intermediate_result('78_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) +DEBUG: generating subplan 77_1 for subquery SELECT users_table.user_id FROM public.users_table UNION SELECT users_table.user_id FROM public.users_table +DEBUG: Plan 77 query after replacing subqueries and CTEs: SELECT a.user_id, foo.value_1 FROM ((SELECT intermediate_result.user_id FROM read_intermediate_result('77_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) a JOIN (SELECT users_table.value_1 FROM public.users_table) foo ON ((a.user_id OPERATOR(pg_catalog.=) foo.value_1))) + valid +------- + t +(1 row) + +-- we could do the same with regular tables as well +SELECT true AS valid FROM explain_json_2($$ + + SELECT * FROM + ( + ( + SELECT user_id FROM users_table + UNION + SELECT user_id FROM users_table + ) a + JOIN + users_table as foo ON (a.user_id = foo.value_1) + ); +$$); +DEBUG: generating subplan 82_1 for subquery SELECT user_id FROM public.users_table +DEBUG: generating subplan 82_2 for subquery SELECT user_id FROM public.users_table +DEBUG: Plan 82 query after replacing subqueries and CTEs: SELECT intermediate_result.user_id FROM read_intermediate_result('82_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) UNION SELECT intermediate_result.user_id FROM read_intermediate_result('82_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) +DEBUG: generating subplan 81_1 for subquery SELECT users_table.user_id FROM public.users_table UNION SELECT users_table.user_id FROM public.users_table +DEBUG: Plan 81 query after replacing subqueries and CTEs: SELECT a.user_id, foo.user_id, foo."time", foo.value_1, foo.value_2, foo.value_3, foo.value_4 FROM ((SELECT intermediate_result.user_id FROM read_intermediate_result('81_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) a JOIN public.users_table foo ON ((a.user_id OPERATOR(pg_catalog.=) foo.value_1))) + valid +------- + t +(1 row) + +-- this time the the plan is optimial, we are +-- able to keep the UNION query given that foo +-- is the anchor +SELECT true AS valid FROM explain_json_2($$ + + SELECT * FROM + ( + (SELECT user_id FROM users_table) as foo + JOIN + ( + SELECT user_id FROM users_table WHERE user_id IN (1,2,3,4) + UNION + SELECT user_id FROM users_table WHERE user_id IN (5,6,7,8) + ) a + + ON (a.user_id = foo.user_id) + JOIN + + (SELECT value_1 FROM users_table) as bar + + ON(foo.user_id = bar.value_1) + ); +$$); +DEBUG: generating subplan 85_1 for subquery SELECT value_1 FROM public.users_table +DEBUG: Plan 85 query after replacing subqueries and CTEs: SELECT foo.user_id, a.user_id, bar.value_1 FROM (((SELECT users_table.user_id FROM public.users_table) foo JOIN (SELECT users_table.user_id FROM public.users_table WHERE (users_table.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])) UNION SELECT users_table.user_id FROM public.users_table WHERE (users_table.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8]))) a ON ((a.user_id OPERATOR(pg_catalog.=) foo.user_id))) JOIN (SELECT intermediate_result.value_1 FROM read_intermediate_result('85_1'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer)) bar ON ((foo.user_id OPERATOR(pg_catalog.=) bar.value_1))) + valid +------- + t +(1 row) + +-- it should be safe to recursively plan non colocated subqueries +-- inside a CTE +SELECT true AS valid FROM explain_json_2($$ + + WITH non_colocated_subquery AS + ( + SELECT + foo.value_2 + FROM + (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo, + (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar + WHERE + foo.value_2 = bar.value_2 + ), + non_colocated_subquery_2 AS + ( + SELECT + count(*) as cnt + FROM + events_table + WHERE + event_type + IN + (SELECT event_type FROM events_table WHERE user_id < 4) + ) + SELECT + * + FROM + non_colocated_subquery, non_colocated_subquery_2 + WHERE + non_colocated_subquery.value_2 != non_colocated_subquery_2.cnt +$$); +DEBUG: generating subplan 87_1 for CTE non_colocated_subquery: SELECT foo.value_2 FROM (SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) foo, (SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))) bar WHERE (foo.value_2 OPERATOR(pg_catalog.=) bar.value_2) +DEBUG: generating subplan 88_1 for subquery SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8]))) +DEBUG: Plan 88 query after replacing subqueries and CTEs: SELECT foo.value_2 FROM (SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) foo, (SELECT intermediate_result.value_2 FROM read_intermediate_result('88_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) bar WHERE (foo.value_2 OPERATOR(pg_catalog.=) bar.value_2) +DEBUG: generating subplan 87_2 for CTE non_colocated_subquery_2: SELECT count(*) AS cnt FROM public.events_table WHERE (event_type OPERATOR(pg_catalog.=) ANY (SELECT events_table_1.event_type FROM public.events_table events_table_1 WHERE (events_table_1.user_id OPERATOR(pg_catalog.<) 4))) +DEBUG: generating subplan 90_1 for subquery SELECT event_type FROM public.events_table WHERE (user_id OPERATOR(pg_catalog.<) 4) +DEBUG: Plan 90 query after replacing subqueries and CTEs: SELECT count(*) AS cnt FROM public.events_table WHERE (event_type OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.event_type FROM read_intermediate_result('90_1'::text, 'binary'::citus_copy_format) intermediate_result(event_type integer))) +DEBUG: Plan 87 query after replacing subqueries and CTEs: SELECT non_colocated_subquery.value_2, non_colocated_subquery_2.cnt FROM (SELECT intermediate_result.value_2 FROM read_intermediate_result('87_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) non_colocated_subquery, (SELECT intermediate_result.cnt FROM read_intermediate_result('87_2'::text, 'binary'::citus_copy_format) intermediate_result(cnt bigint)) non_colocated_subquery_2 WHERE (non_colocated_subquery.value_2 OPERATOR(pg_catalog.<>) non_colocated_subquery_2.cnt) + valid +------- + t +(1 row) + +-- non colocated subquery joins should work fine along with local tables +SELECT true AS valid FROM explain_json_2($$ + SELECT + count(*) + FROM + (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4)) as foo, + (SELECT users_table_local.value_2 FROM users_table_local, events_table_local WHERE users_table_local.user_id = events_table_local.user_id AND event_type IN (5,6,7,8)) as bar, + (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (9,10,11,12)) as baz + WHERE + foo.value_2 = bar.value_2 + AND + foo.value_2 = baz.value_2 +$$); +DEBUG: generating subplan 92_1 for subquery SELECT users_table_local.value_2 FROM non_colocated_subquery.users_table_local, non_colocated_subquery.events_table_local WHERE ((users_table_local.user_id OPERATOR(pg_catalog.=) events_table_local.user_id) AND (events_table_local.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8]))) +DEBUG: generating subplan 92_2 for subquery SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[9, 10, 11, 12]))) +DEBUG: Plan 92 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])))) foo, (SELECT intermediate_result.value_2 FROM read_intermediate_result('92_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) bar, (SELECT intermediate_result.value_2 FROM read_intermediate_result('92_2'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) baz WHERE ((foo.value_2 OPERATOR(pg_catalog.=) bar.value_2) AND (foo.value_2 OPERATOR(pg_catalog.=) baz.value_2)) + valid +------- + t +(1 row) + +-- a combination of subqueries in FROM and WHERE clauses +-- we actually recursively plan non colocated subqueries +-- pretty accurate, however, we hit our join checks, which seems too restrictive +SELECT true AS valid FROM explain_json_2($$ + + SELECT + count(*) + FROM + (SELECT user_id FROM users_table) as foo + JOIN + ( + SELECT user_id FROM users_table WHERE user_id IN (1,2,3,4) + UNION + SELECT user_id FROM users_table WHERE user_id IN (5,6,7,8) + ) a + + ON (a.user_id = foo.user_id) + JOIN + + (SELECT value_1, value_2 FROM users_table) as bar + + ON(foo.user_id = bar.value_1) + WHERE + value_2 IN (SELECT value_1 FROM users_table WHERE value_2 < 1) + AND + value_1 IN (SELECT value_2 FROM users_table WHERE value_1 < 2) + AND + foo.user_id IN (SELECT users_table.user_id FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2)) +$$); +DEBUG: generating subplan 94_1 for subquery SELECT value_1, value_2 FROM public.users_table +DEBUG: generating subplan 94_2 for subquery SELECT value_1 FROM public.users_table WHERE (value_2 OPERATOR(pg_catalog.<) 1) +DEBUG: generating subplan 94_3 for subquery SELECT value_2 FROM public.users_table WHERE (value_1 OPERATOR(pg_catalog.<) 2) +DEBUG: Plan 94 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (((SELECT users_table.user_id FROM public.users_table) foo JOIN (SELECT users_table.user_id FROM public.users_table WHERE (users_table.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4])) UNION SELECT users_table.user_id FROM public.users_table WHERE (users_table.user_id OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8]))) a ON ((a.user_id OPERATOR(pg_catalog.=) foo.user_id))) JOIN (SELECT intermediate_result.value_1, intermediate_result.value_2 FROM read_intermediate_result('94_1'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, value_2 integer)) bar ON ((foo.user_id OPERATOR(pg_catalog.=) bar.value_1))) WHERE ((bar.value_2 OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.value_1 FROM read_intermediate_result('94_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer))) AND (bar.value_1 OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.value_2 FROM read_intermediate_result('94_3'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer))) AND (foo.user_id OPERATOR(pg_catalog.=) ANY (SELECT users_table.user_id FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2])))))) +ERROR: cannot pushdown the subquery +-- make sure that we don't pick the refeence table as +-- the anchor +SELECT true AS valid FROM explain_json_2($$ + + SELECT count(*) + FROM + users_reference_table AS users_table_ref, + (SELECT user_id FROM users_Table) AS foo, + (SELECT user_id, value_2 FROM events_Table) AS bar + WHERE + users_table_ref.user_id = foo.user_id + AND foo.user_id = bar.value_2; +$$); +DEBUG: generating subplan 98_1 for subquery SELECT user_id, value_2 FROM public.events_table +DEBUG: Plan 98 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM public.users_reference_table users_table_ref, (SELECT users_table.user_id FROM public.users_table) foo, (SELECT intermediate_result.user_id, intermediate_result.value_2 FROM read_intermediate_result('98_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_2 integer)) bar WHERE ((users_table_ref.user_id OPERATOR(pg_catalog.=) foo.user_id) AND (foo.user_id OPERATOR(pg_catalog.=) bar.value_2)) + valid +------- + t +(1 row) + +RESET client_min_messages; +DROP FUNCTION explain_json_2(text); +SET search_path TO 'public'; +DROP SCHEMA non_colocated_subquery CASCADE; +NOTICE: drop cascades to 2 other objects diff --git a/src/test/regress/expected/recursive_relation_planning_restriction_pushdown_0.out b/src/test/regress/expected/recursive_relation_planning_restriction_pushdown_0.out new file mode 100644 index 000000000..18c6f26a3 --- /dev/null +++ b/src/test/regress/expected/recursive_relation_planning_restriction_pushdown_0.out @@ -0,0 +1,536 @@ +---------------------------------------------------- +-- recursive_relation_planning_restirction_pushdown +-- In this test file, we mosly test whether Citus +-- can successfully pushdown filters to the subquery +-- that is +---------------------------------------------------- +-- all the queries in this file have the +-- same tables/subqueries combination as below +-- because this test aims to hold the query planning +-- steady, but mostly ensure that filters are handled +-- properly. Note that u2 is the relation that is +-- recursively planned +-- Setting the debug level so that filters can be observed +SET client_min_messages TO DEBUG1; +-- no filters on u2 +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1); +DEBUG: generating subplan 1_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true +DEBUG: generating subplan 1_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 1 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('1_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('1_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) + count +------- + 38501 +(1 row) + +-- scalar array expressions can be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE u2.value_1 > ANY(ARRAY[2, 1, 6]); +DEBUG: generating subplan 4_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE (value_1 OPERATOR(pg_catalog.>) ANY ('{2,1,6}'::integer[])) +DEBUG: generating subplan 4_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 4 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('4_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('4_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (users_table.value_1 OPERATOR(pg_catalog.>) ANY (ARRAY[2, 1, 6])) + count +------- + 33398 +(1 row) + +-- array operators on the table can be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE ARRAY[u2.value_1, u2.value_2] @> (ARRAY[2, 3]); +DEBUG: generating subplan 7_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE (ARRAY[value_1, value_2] OPERATOR(pg_catalog.@>) '{2,3}'::integer[]) +DEBUG: generating subplan 7_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 7 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('7_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('7_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (ARRAY[users_table.value_1, users_table.value_2] OPERATOR(pg_catalog.@>) ARRAY[2, 3]) + count +------- + 4704 +(1 row) + +-- array operators on different tables cannot be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE ARRAY[u2.value_1, u1.user_id] @> (ARRAY[2, 3]); +DEBUG: generating subplan 10_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true +DEBUG: generating subplan 10_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 10 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('10_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('10_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (ARRAY[users_table.value_1, u1.user_id] OPERATOR(pg_catalog.@>) ARRAY[2, 3]) + count +------- + 3352 +(1 row) + +-- coerced expressions can be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE (u2.value_1/2.0 > 2)::int::bool::text::bool; +DEBUG: generating subplan 13_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE (((((((value_1)::numeric OPERATOR(pg_catalog./) 2.0) OPERATOR(pg_catalog.>) '2'::numeric))::integer)::boolean)::text)::boolean +DEBUG: generating subplan 13_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 13 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('13_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('13_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (((((((users_table.value_1)::numeric OPERATOR(pg_catalog./) 2.0) OPERATOR(pg_catalog.>) (2)::numeric))::integer)::boolean)::text)::boolean + count +------- + 729 +(1 row) + +-- case expression on a single table can be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE (CASE WHEN u2.value_1 > 3 THEN u2.value_1 > 2 ELSE false END); +DEBUG: generating subplan 16_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE CASE WHEN (value_1 OPERATOR(pg_catalog.>) 3) THEN (value_1 OPERATOR(pg_catalog.>) 2) ELSE false END +DEBUG: generating subplan 16_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 16 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('16_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('16_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE CASE WHEN (users_table.value_1 OPERATOR(pg_catalog.>) 3) THEN (users_table.value_1 OPERATOR(pg_catalog.>) 2) ELSE false END + count +------- + 9990 +(1 row) + +-- case expression multiple tables cannot be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE (CASE WHEN u1.value_1 > 4000 THEN u2.value_1 / 100 > 1 ELSE false END); +DEBUG: generating subplan 19_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true +DEBUG: generating subplan 19_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 19 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('19_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('19_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE CASE WHEN (u1.value_1 OPERATOR(pg_catalog.>) 4000) THEN ((users_table.value_1 OPERATOR(pg_catalog./) 100) OPERATOR(pg_catalog.>) 1) ELSE false END + count +------- + 0 +(1 row) + +-- coalesce expressions can be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE COALESCE((u2.user_id/5.0)::int::bool, false); +DEBUG: generating subplan 22_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE COALESCE(((((user_id)::numeric OPERATOR(pg_catalog./) 5.0))::integer)::boolean, false) +DEBUG: generating subplan 22_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 22 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('22_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('22_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE COALESCE(((((users_table.user_id)::numeric OPERATOR(pg_catalog./) 5.0))::integer)::boolean, false) + count +------- + 28198 +(1 row) + +-- nullif expressions can be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE NULLIF((u2.value_2/5.0)::int::bool, false); +DEBUG: generating subplan 25_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE NULLIF(((((value_2)::numeric OPERATOR(pg_catalog./) 5.0))::integer)::boolean, false) +DEBUG: generating subplan 25_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 25 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('25_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('25_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE NULLIF(((((users_table.value_2)::numeric OPERATOR(pg_catalog./) 5.0))::integer)::boolean, false) + count +------- + 18895 +(1 row) + +-- null test can be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE u2.value_3 IS NOT NULL; +DEBUG: generating subplan 28_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE (value_3 IS NOT NULL) +DEBUG: generating subplan 28_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 28 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('28_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('28_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (users_table.value_3 IS NOT NULL) + count +------- + 38501 +(1 row) + +-- functions can be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE isfinite(u2.time); +DEBUG: generating subplan 31_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE isfinite("time") +DEBUG: generating subplan 31_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 31 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('31_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('31_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE isfinite(users_table."time") + count +------- + 38501 +(1 row) + +-- functions with multiple tables cannot be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE int4smaller(u2.value_1, u1.value_1) = 55; +DEBUG: generating subplan 34_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true +DEBUG: generating subplan 34_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 34 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('34_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('34_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (int4smaller(users_table.value_1, u1.value_1) OPERATOR(pg_catalog.=) 55) + count +------- + 0 +(1 row) + +-- functions with multiple columns from the same tables can be pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE int4smaller(u2.value_1, u2.value_2) = u2.value_1; +DEBUG: generating subplan 37_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE (value_1 OPERATOR(pg_catalog.=) int4smaller(value_1, value_2)) +DEBUG: generating subplan 37_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 37 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('37_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('37_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (int4smaller(users_table.value_1, users_table.value_2) OPERATOR(pg_catalog.=) users_table.value_1) + count +------- + 20686 +(1 row) + +-- row expressions can be pushdown +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE row(u2.value_1, 2, 3) > row(u2.value_2, 2, 3); +DEBUG: generating subplan 40_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE (ROW(value_1, 2, 3) OPERATOR(pg_catalog.>) ROW(value_2, 2, 3)) +DEBUG: generating subplan 40_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 40 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('40_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('40_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (ROW(users_table.value_1, 2, 3) OPERATOR(pg_catalog.>) ROW(users_table.value_2, 2, 3)) + count +------- + 17815 +(1 row) + +-- multiple expression from the same table can be pushed down together +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) + WHERE + (u2.user_id/1.0)::int::bool::text::bool AND + CASE WHEN u2.value_1 > 4000 THEN u2.value_2 / 100 > 1 ELSE false END AND + COALESCE((u2.user_id/50000)::bool, false) AND + NULLIF((u2.value_3/50000)::int::bool, false) AND + isfinite(u2.time) AND + u2.value_4 IS DISTINCT FROM 50040 AND + row(u2.value_4, 2, 3) > row(2000, 2, 3); +DEBUG: generating subplan 43_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE (((((((user_id)::numeric OPERATOR(pg_catalog./) 1.0))::integer)::boolean)::text)::boolean AND CASE WHEN (value_1 OPERATOR(pg_catalog.>) 4000) THEN ((value_2 OPERATOR(pg_catalog./) 100) OPERATOR(pg_catalog.>) 1) ELSE false END AND COALESCE(((user_id OPERATOR(pg_catalog./) 50000))::boolean, false) AND NULLIF((((value_3 OPERATOR(pg_catalog./) '50000'::double precision))::integer)::boolean, false) AND isfinite("time") AND (value_4 IS DISTINCT FROM 50040) AND (ROW(value_4, 2, 3) OPERATOR(pg_catalog.>) ROW(2000, 2, 3))) +DEBUG: generating subplan 43_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 43 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('43_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('43_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (((((((users_table.user_id)::numeric OPERATOR(pg_catalog./) 1.0))::integer)::boolean)::text)::boolean AND CASE WHEN (users_table.value_1 OPERATOR(pg_catalog.>) 4000) THEN ((users_table.value_2 OPERATOR(pg_catalog./) 100) OPERATOR(pg_catalog.>) 1) ELSE false END AND COALESCE(((users_table.user_id OPERATOR(pg_catalog./) 50000))::boolean, false) AND NULLIF((((users_table.value_3 OPERATOR(pg_catalog./) (50000)::double precision))::integer)::boolean, false) AND isfinite(users_table."time") AND (users_table.value_4 IS DISTINCT FROM 50040) AND (ROW(users_table.value_4, 2, 3) OPERATOR(pg_catalog.>) ROW(2000, 2, 3))) + count +------- + 0 +(1 row) + +-- subqueries are not pushdown +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE u2.value_1 > + (SELECT avg(user_id) + FROM events_table); +DEBUG: generating subplan 46_1 for subquery SELECT avg(user_id) AS avg FROM public.events_table +DEBUG: generating subplan 46_2 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true +DEBUG: generating subplan 46_3 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 46 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('46_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('46_3'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE ((users_table.value_1)::numeric OPERATOR(pg_catalog.>) (SELECT intermediate_result.avg FROM read_intermediate_result('46_1'::text, 'binary'::citus_copy_format) intermediate_result(avg numeric))) + count +------- + 9990 +(1 row) + +-- even subqueries with constant values are not pushdowned +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE u2.value_1 > + (SELECT 5); +DEBUG: generating subplan 50_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true +DEBUG: generating subplan 50_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 50 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('50_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('50_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (users_table.value_1 OPERATOR(pg_catalog.>) (SELECT 5)) + count +------- + 0 +(1 row) + +-- filters involving multiple tables aren't pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE u2.value_1 * u1.user_id > 25; +DEBUG: generating subplan 53_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true +DEBUG: generating subplan 53_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 53 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('53_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('53_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE ((users_table.value_1 OPERATOR(pg_catalog.*) u1.user_id) OPERATOR(pg_catalog.>) 25) + count +------- + 162 +(1 row) + +-- filter on other tables can only be pushdown +-- as long as they are equality filters on the +-- joining column +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE u1.value_1 = 3; +DEBUG: generating subplan 56_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE (value_1 OPERATOR(pg_catalog.=) 3) +DEBUG: generating subplan 56_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 56 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('56_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('56_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (u1.value_1 OPERATOR(pg_catalog.=) 3) + count +------- + 17576 +(1 row) + +-- but not when the filter is gt, lt or any other thing +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE u1.value_1 > 3; +DEBUG: generating subplan 59_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true +DEBUG: generating subplan 59_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 59 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('59_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('59_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (u1.value_1 OPERATOR(pg_catalog.>) 3) + count +------- + 9990 +(1 row) + +-- when the filter is on another column than the +-- join column, that's obviously not pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE u1.value_2 = 3; +DEBUG: generating subplan 62_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true +DEBUG: generating subplan 62_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 62 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('62_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('62_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (u1.value_2 OPERATOR(pg_catalog.=) 3) + count +------- + 5618 +(1 row) + +-- or filters on the same table is pushdown +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE u2.value_1 > 4 OR u2.value_4 = 4; +DEBUG: generating subplan 65_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE ((value_1 OPERATOR(pg_catalog.>) 4) OR (value_4 OPERATOR(pg_catalog.=) 4)) +DEBUG: generating subplan 65_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 65 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('65_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('65_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE ((users_table.value_1 OPERATOR(pg_catalog.>) 4) OR (users_table.value_4 OPERATOR(pg_catalog.=) 4)) + count +------- + 729 +(1 row) + +-- and filters on the same table is pushdown +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE u2.value_1 > 2 and u2.value_4 IS NULL; +DEBUG: generating subplan 68_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE ((value_1 OPERATOR(pg_catalog.>) 2) AND (value_4 IS NULL)) +DEBUG: generating subplan 68_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 68 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('68_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('68_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE ((users_table.value_1 OPERATOR(pg_catalog.>) 2) AND (users_table.value_4 IS NULL)) + count +------- + 27566 +(1 row) + +-- filters on different tables are pushdown +-- only the ones that are not ANDed +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE (u2.value_1 > 2 OR u2.value_4 IS NULL) AND (u2.user_id > 4 OR u1.user_id > 3); +DEBUG: generating subplan 71_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE ((value_1 OPERATOR(pg_catalog.>) 2) OR (value_4 IS NULL)) +DEBUG: generating subplan 71_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 71 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('71_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('71_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (((users_table.value_1 OPERATOR(pg_catalog.>) 2) OR (users_table.value_4 IS NULL)) AND ((users_table.user_id OPERATOR(pg_catalog.>) 4) OR (u1.user_id OPERATOR(pg_catalog.>) 3))) + count +------- + 27405 +(1 row) + +-- see the comment above +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE (u2.value_1 > 2 OR u2.value_4 IS NULL) OR (u2.user_id > 4 AND u1.user_id > 3); +DEBUG: generating subplan 74_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE ((value_1 OPERATOR(pg_catalog.>) 2) OR (value_4 IS NULL) OR (user_id OPERATOR(pg_catalog.>) 4)) +DEBUG: generating subplan 74_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 74 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('74_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('74_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE ((users_table.value_1 OPERATOR(pg_catalog.>) 2) OR (users_table.value_4 IS NULL) OR ((users_table.user_id OPERATOR(pg_catalog.>) 4) AND (u1.user_id OPERATOR(pg_catalog.>) 3))) + count +------- + 38501 +(1 row) + +-- see the comment above +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE (u2.value_1 > 2 OR u1.value_4 IS NULL) AND (u2.user_id > 4 AND u1.user_id > 3); +DEBUG: generating subplan 77_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE (user_id OPERATOR(pg_catalog.>) 4) +DEBUG: generating subplan 77_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 77 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('77_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('77_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (((users_table.value_1 OPERATOR(pg_catalog.>) 2) OR (u1.value_4 IS NULL)) AND ((users_table.user_id OPERATOR(pg_catalog.>) 4) AND (u1.user_id OPERATOR(pg_catalog.>) 3))) + count +------- + 7883 +(1 row) + +-- see the comment above +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE (u2.value_1 > 2 OR u1.value_4 IS NULL) AND (u2.user_id > 4 OR u1.user_id > 3); +DEBUG: generating subplan 80_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true +DEBUG: generating subplan 80_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 80 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('80_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('80_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (((users_table.value_1 OPERATOR(pg_catalog.>) 2) OR (u1.value_4 IS NULL)) AND ((users_table.user_id OPERATOR(pg_catalog.>) 4) OR (u1.user_id OPERATOR(pg_catalog.>) 3))) + count +------- + 27405 +(1 row) + +-- see the comment above +-- but volatile functions are not pushed down +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE (u2.value_1 > 2 OR u1.value_4 IS NULL) AND (u2.user_id = 10000 * random() OR u1.user_id > 3); +DEBUG: generating subplan 83_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true +DEBUG: generating subplan 83_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 83 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('83_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('83_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE (((users_table.value_1 OPERATOR(pg_catalog.>) 2) OR (u1.value_4 IS NULL)) AND (((users_table.user_id)::double precision OPERATOR(pg_catalog.=) ((10000)::double precision OPERATOR(pg_catalog.*) random())) OR (u1.user_id OPERATOR(pg_catalog.>) 3))) + count +------- + 22183 +(1 row) + +-- TODO: constant results should be pushed down, but not supported yet +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN + (SELECT value_1, + random() + FROM users_table) AS u3 USING (value_1) +WHERE (u2.value_1 > 2 AND false); +DEBUG: generating subplan 86_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true +DEBUG: generating subplan 86_2 for subquery SELECT value_1, random() AS random FROM public.users_table +DEBUG: Plan 86 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('86_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN (SELECT intermediate_result.value_1, intermediate_result.random FROM read_intermediate_result('86_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer, random double precision)) u3 USING (value_1)) WHERE ((users_table.value_1 OPERATOR(pg_catalog.>) 2) AND false) + count +------- + 0 +(1 row) + +-- TODO: what should the behaviour be? +SELECT count(*) +FROM users_table u1 +JOIN users_table u2 USING(value_1) +JOIN LATERAL + (SELECT value_1, + random() + FROM users_table + WHERE u2.value_2 = 15) AS u3 USING (value_1) +WHERE (u2.value_1 > 2 + AND FALSE); +DEBUG: generating subplan 89_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table u2 WHERE true +DEBUG: Plan 89 query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((public.users_table u1 JOIN (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('89_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table(user_id, "time", value_1, value_2, value_3, value_4) USING (value_1)) JOIN LATERAL (SELECT users_table_1.value_1, random() AS random FROM public.users_table users_table_1 WHERE (users_table.value_2 OPERATOR(pg_catalog.=) 15)) u3 USING (value_1)) WHERE ((users_table.value_1 OPERATOR(pg_catalog.>) 2) AND false) +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator