-- -- multi subquery complex queries aims to expand existing subquery pushdown -- regression tests to cover more caeses -- the tables that are used depends to multi_insert_select_behavioral_analytics_create_table.sql -- -- We don't need shard id sequence here, so commented out to prevent conflicts with concurrent tests -- SET citus.next_shard_id TO 1400000; ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 1400000; -- -- UNIONs and JOINs mixed -- SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM ( SELECT *, random() FROM ( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types" FROM ( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events FROM ( (SELECT * FROM (SELECT "events"."user_id", "events"."time", 0 AS event FROM events_table as "events" WHERE event_type IN (1, 2) ) events_subquery_1) UNION (SELECT * FROM (SELECT "events"."user_id", "events"."time", 1 AS event FROM events_table as "events" WHERE event_type IN (3, 4) ) events_subquery_2) UNION (SELECT * FROM (SELECT "events"."user_id", "events"."time", 2 AS event FROM events_table as "events" WHERE event_type IN (5, 6) ) events_subquery_3) UNION (SELECT * FROM (SELECT "events"."user_id", "events"."time", 3 AS event FROM events_table as "events" WHERE event_type IN (1, 6)) events_subquery_4)) t1 GROUP BY "t1"."user_id") AS t) "q" INNER JOIN (SELECT "users"."user_id" FROM users_table as "users" WHERE value_1 > 0 and value_1 < 4) AS t ON (t.user_id = q.user_id)) as final_query GROUP BY types ORDER BY types; types | sumofeventtype -------+---------------- 0 | 449 1 | 433 2 | 75 3 | 268 (4 rows) -- same query with target entries shuffled inside UNIONs SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM ( SELECT *, random() FROM ( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types" FROM ( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events FROM ( (SELECT * FROM (SELECT "events"."time", 0 AS event, "events"."user_id" FROM events_table as "events" WHERE event_type IN (1, 2) ) events_subquery_1) UNION (SELECT * FROM (SELECT "events"."time", 1 AS event, "events"."user_id" FROM events_table as "events" WHERE event_type IN (3, 4) ) events_subquery_2) UNION (SELECT * FROM (SELECT "events"."time", 2 AS event, "events"."user_id" FROM events_table as "events" WHERE event_type IN (5, 6) ) events_subquery_3) UNION (SELECT * FROM (SELECT "events"."time", 3 AS event, "events"."user_id" FROM events_table as "events" WHERE event_type IN (1, 6)) events_subquery_4)) t1 GROUP BY "t1"."user_id") AS t) "q" INNER JOIN (SELECT "users"."user_id" FROM users_table as "users" WHERE value_1 > 0 and value_1 < 4) AS t ON (t.user_id = q.user_id)) as final_query GROUP BY types ORDER BY types; types | sumofeventtype -------+---------------- 0 | 449 1 | 433 2 | 75 3 | 268 (4 rows) -- supported through recursive planning since events_subquery_2 doesn't have partition key on the target list -- within the shuffled target list SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM ( SELECT *, random() FROM ( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types" FROM ( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events FROM ( (SELECT * FROM (SELECT "events"."time", 0 AS event, "events"."user_id" FROM events_table as "events" WHERE event_type IN (1, 2) ) events_subquery_1) UNION (SELECT * FROM (SELECT "events"."time", 1 AS event, "events"."user_id" * 2 FROM events_table as "events" WHERE event_type IN (3, 4) ) events_subquery_2) UNION (SELECT * FROM (SELECT "events"."time", 2 AS event, "events"."user_id" FROM events_table as "events" WHERE event_type IN (5, 6) ) events_subquery_3) UNION (SELECT * FROM (SELECT "events"."time", 3 AS event, "events"."user_id" FROM events_table as "events" WHERE event_type IN (1, 6)) events_subquery_4)) t1 GROUP BY "t1"."user_id") AS t) "q" INNER JOIN (SELECT "users"."user_id" FROM users_table as "users" WHERE value_1 > 0 and value_1 < 4) AS t ON (t.user_id = q.user_id)) as final_query GROUP BY types ORDER BY types; types | sumofeventtype -------+---------------- 0 | 449 1 | 234 2 | 75 3 | 268 (4 rows) -- supported through recursive planning since events_subquery_2 doesn't have partition key on the target list SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM ( SELECT *, random() FROM ( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types" FROM ( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events FROM ( (SELECT * FROM (SELECT "events"."time", 0 AS event, "events"."user_id" FROM events_table as "events" WHERE event_type IN (1, 2) ) events_subquery_1) UNION (SELECT * FROM (SELECT "events"."time", 1 AS event, "events"."value_2" as user_id FROM events_table as "events" WHERE event_type IN (3, 4) ) events_subquery_2) UNION (SELECT * FROM (SELECT "events"."time", 2 AS event, "events"."user_id" FROM events_table as "events" WHERE event_type IN (5, 6) ) events_subquery_3) UNION (SELECT * FROM (SELECT "events"."time", 3 AS event, "events"."user_id" FROM events_table as "events" WHERE event_type IN (1, 6)) events_subquery_4)) t1 GROUP BY "t1"."user_id") AS t) "q" INNER JOIN (SELECT "users"."user_id" FROM users_table as "users" WHERE value_1 > 0 and value_1 < 4) AS t ON (t.user_id = q.user_id)) as final_query GROUP BY types ORDER BY types; types | sumofeventtype -------+---------------- 0 | 449 1 | 369 2 | 75 3 | 268 (4 rows) -- we can support arbitrary subqueries within UNIONs SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM ( SELECT *, random() FROM (SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types" FROM ( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events FROM ( (SELECT * FROM (SELECT "events"."time", 0 AS event, "events"."user_id" FROM events_table as "events" WHERE event_type IN (1, 2) ) events_subquery_1) UNION (SELECT * FROM ( SELECT * FROM ( SELECT max("events"."time"), 0 AS event, "events"."user_id" FROM events_table as "events", users_table as "users" WHERE events.user_id = users.user_id AND event_type IN (1, 2) GROUP BY "events"."user_id" ) as events_subquery_5 ) events_subquery_2) UNION (SELECT * FROM (SELECT "events"."time", 2 AS event, "events"."user_id" FROM events_table as "events" WHERE event_type IN (3, 4) ) events_subquery_3) UNION (SELECT * FROM (SELECT "events"."time", 3 AS event, "events"."user_id" FROM events_table as "events" WHERE event_type IN (5, 6)) events_subquery_4) ) t1 GROUP BY "t1"."user_id") AS t) "q" INNER JOIN (SELECT "users"."user_id" FROM users_table as "users" WHERE value_1 > 0 and value_1 < 4) AS t ON (t.user_id = q.user_id)) as final_query GROUP BY types ORDER BY types; types | sumofeventtype -------+---------------- 0 | 449 2 | 433 3 | 75 (3 rows) SET citus.enable_repartition_joins to ON; SET client_min_messages TO DEBUG1; -- recursively planned since events_subquery_5 is not joined on partition key SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM ( SELECT *, random() FROM (SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types" FROM ( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events FROM ( (SELECT * FROM (SELECT "events"."time", 0 AS event, "events"."user_id" FROM events_table as "events" WHERE event_type IN (1, 2) ) events_subquery_1) UNION (SELECT * FROM ( SELECT * FROM ( SELECT max("events"."time"), 0 AS event, "events"."user_id" FROM events_table as "events", users_table as "users" WHERE events.user_id = users.value_2 AND event_type IN (1, 2) GROUP BY "events"."user_id" ) as events_subquery_5 ) events_subquery_2) UNION (SELECT * FROM (SELECT "events"."time", 2 AS event, "events"."user_id" FROM events_table as "events" WHERE event_type IN (3, 4) ) events_subquery_3) UNION (SELECT * FROM (SELECT "events"."time", 3 AS event, "events"."user_id" FROM events_table as "events" WHERE event_type IN (5, 6)) events_subquery_4) ) t1 GROUP BY "t1"."user_id") AS t) "q" INNER JOIN (SELECT "users"."user_id" FROM users_table as "users" WHERE value_1 > 0 and value_1 < 4) AS t ON (t.user_id = q.user_id)) as final_query GROUP BY types ORDER BY types; DEBUG: cannot use real time executor with repartition jobs HINT: Since you enabled citus.enable_repartition_joins Citus chose to use task-tracker. DEBUG: generating subplan 16_1 for subquery SELECT max(events."time") AS max, 0 AS event, events.user_id FROM public.events_table events, public.users_table users WHERE ((events.user_id = users.value_2) AND (events.event_type = ANY (ARRAY[1, 2]))) GROUP BY events.user_id DEBUG: generating subplan 16_2 for subquery SELECT "time", event, user_id FROM (SELECT events."time", 0 AS event, events.user_id FROM public.events_table events WHERE (events.event_type = ANY (ARRAY[1, 2]))) events_subquery_1 DEBUG: generating subplan 16_3 for subquery SELECT "time", event, user_id FROM (SELECT events."time", 2 AS event, events.user_id FROM public.events_table events WHERE (events.event_type = ANY (ARRAY[3, 4]))) events_subquery_3 DEBUG: generating subplan 16_4 for subquery SELECT "time", event, user_id FROM (SELECT events."time", 3 AS event, events.user_id FROM public.events_table events WHERE (events.event_type = ANY (ARRAY[5, 6]))) events_subquery_4 DEBUG: generating subplan 16_5 for subquery SELECT intermediate_result."time", intermediate_result.event, intermediate_result.user_id FROM read_intermediate_result('16_2'::text, 'binary'::citus_copy_format) intermediate_result("time" timestamp without time zone, event integer, user_id integer) UNION SELECT events_subquery_2.max, events_subquery_2.event, events_subquery_2.user_id FROM (SELECT events_subquery_5.max, events_subquery_5.event, events_subquery_5.user_id FROM (SELECT intermediate_result.max, intermediate_result.event, intermediate_result.user_id FROM read_intermediate_result('16_1'::text, 'binary'::citus_copy_format) intermediate_result(max timestamp without time zone, event integer, user_id integer)) events_subquery_5) events_subquery_2 UNION SELECT intermediate_result."time", intermediate_result.event, intermediate_result.user_id FROM read_intermediate_result('16_3'::text, 'binary'::citus_copy_format) intermediate_result("time" timestamp without time zone, event integer, user_id integer) UNION SELECT intermediate_result."time", intermediate_result.event, intermediate_result.user_id FROM read_intermediate_result('16_4'::text, 'binary'::citus_copy_format) intermediate_result("time" timestamp without time zone, event integer, user_id integer) DEBUG: Plan 16 query after replacing subqueries and CTEs: SELECT event_types AS types, count(*) AS sumofeventtype FROM (SELECT q.user_id, q."time", q.event_types, t.user_id, random() AS random FROM ((SELECT t_1.user_id, t_1."time", unnest(t_1.collected_events) AS event_types FROM (SELECT t1.user_id, min(t1."time") AS "time", array_agg(t1.event ORDER BY t1."time", t1.event DESC) AS collected_events FROM (SELECT intermediate_result."time", intermediate_result.event, intermediate_result.user_id FROM read_intermediate_result('16_5'::text, 'binary'::citus_copy_format) intermediate_result("time" timestamp without time zone, event integer, user_id integer)) t1 GROUP BY t1.user_id) t_1) q JOIN (SELECT users.user_id FROM public.users_table users WHERE ((users.value_1 > 0) AND (users.value_1 < 4))) t ON ((t.user_id = q.user_id)))) final_query(user_id, "time", event_types, user_id_1, random) GROUP BY event_types ORDER BY event_types types | sumofeventtype -------+---------------- 0 | 449 2 | 433 3 | 75 (3 rows) RESET client_min_messages; SET citus.enable_repartition_joins to OFF; -- recursively planned since the join is not equi join SET client_min_messages TO DEBUG1; SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM ( SELECT *, random() FROM ( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types" FROM ( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events FROM ( (SELECT * FROM (SELECT "events"."user_id", "events"."time", 0 AS event FROM events_table as "events" WHERE event_type IN (1, 2) ) events_subquery_1) UNION (SELECT * FROM (SELECT "events"."user_id", "events"."time", 1 AS event FROM events_table as "events" WHERE event_type IN (3, 4) ) events_subquery_2) UNION (SELECT * FROM (SELECT "events"."user_id", "events"."time", 2 AS event FROM events_table as "events" WHERE event_type IN (5, 6) ) events_subquery_3) UNION (SELECT * FROM (SELECT "events"."user_id", "events"."time", 3 AS event FROM events_table as "events" WHERE event_type IN (4, 5)) events_subquery_4)) t1 GROUP BY "t1"."user_id") AS t) "q" INNER JOIN (SELECT "users"."user_id" FROM users_table as "users" WHERE value_1 > 0 and value_1 < 4) AS t ON (t.user_id != q.user_id)) as final_query GROUP BY types ORDER BY types; DEBUG: generating subplan 22_1 for subquery SELECT user_id, "time", unnest(collected_events) AS event_types FROM (SELECT t1.user_id, min(t1."time") AS "time", array_agg(t1.event ORDER BY t1."time", t1.event DESC) AS collected_events FROM (SELECT events_subquery_1.user_id, events_subquery_1."time", events_subquery_1.event FROM (SELECT events.user_id, events."time", 0 AS event FROM public.events_table events WHERE (events.event_type = ANY (ARRAY[1, 2]))) events_subquery_1 UNION SELECT events_subquery_2.user_id, events_subquery_2."time", events_subquery_2.event FROM (SELECT events.user_id, events."time", 1 AS event FROM public.events_table events WHERE (events.event_type = ANY (ARRAY[3, 4]))) events_subquery_2 UNION SELECT events_subquery_3.user_id, events_subquery_3."time", events_subquery_3.event FROM (SELECT events.user_id, events."time", 2 AS event FROM public.events_table events WHERE (events.event_type = ANY (ARRAY[5, 6]))) events_subquery_3 UNION SELECT events_subquery_4.user_id, events_subquery_4."time", events_subquery_4.event FROM (SELECT events.user_id, events."time", 3 AS event FROM public.events_table events WHERE (events.event_type = ANY (ARRAY[4, 5]))) events_subquery_4) t1 GROUP BY t1.user_id) t DEBUG: Plan 22 query after replacing subqueries and CTEs: SELECT event_types AS types, count(*) AS sumofeventtype FROM (SELECT q.user_id, q."time", q.event_types, t.user_id, random() AS random FROM ((SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.event_types FROM read_intermediate_result('22_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, event_types integer)) q JOIN (SELECT users.user_id FROM public.users_table users WHERE ((users.value_1 > 0) AND (users.value_1 < 4))) t ON ((t.user_id <> q.user_id)))) final_query(user_id, "time", event_types, user_id_1, random) GROUP BY event_types ORDER BY event_types types | sumofeventtype -------+---------------- 0 | 2088 1 | 2163 2 | 397 3 | 1397 (4 rows) RESET client_min_messages; -- not supported since subquery 3 includes a JOIN with non-equi join SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM ( SELECT *, random() FROM ( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types" FROM ( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events FROM ( (SELECT * FROM (SELECT "events"."user_id", "events"."time", 0 AS event FROM events_table as "events" WHERE event_type IN (1, 2) ) events_subquery_1) UNION (SELECT * FROM (SELECT "events"."user_id", "events"."time", 1 AS event FROM events_table as "events" WHERE event_type IN (3, 4) ) events_subquery_2) UNION (SELECT * FROM (SELECT "events"."user_id", "events"."time", 2 AS event FROM events_table as "events", users_table as "users" WHERE event_type IN (5, 6) AND users.user_id != events.user_id ) events_subquery_3) UNION (SELECT * FROM (SELECT "events"."user_id", "events"."time", 3 AS event FROM events_table as "events" WHERE event_type IN (4, 5)) events_subquery_4)) t1 GROUP BY "t1"."user_id") AS t) "q" INNER JOIN (SELECT "users"."user_id" FROM users_table as "users" WHERE value_1 > 0 and value_1 < 4) AS t ON (t.user_id = q.user_id)) as final_query GROUP BY types ORDER BY types; ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- similar query with more union statements (to enable UNION tree become larger) SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM ( SELECT *, random() FROM ( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types" FROM ( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events FROM ( (SELECT * FROM (SELECT "events"."user_id", "events"."time", 0 AS event FROM events_table as "events" WHERE event_type IN (1, 2) ) events_subquery_1) UNION (SELECT * FROM (SELECT "events"."user_id", "events"."time", 1 AS event FROM events_table as "events" WHERE event_type IN (2, 3) ) events_subquery_2) UNION (SELECT * FROM (SELECT "events"."user_id", "events"."time", 2 AS event FROM events_table as "events" WHERE event_type IN (3, 4) ) events_subquery_3) UNION (SELECT * FROM (SELECT "events"."user_id", "events"."time", 3 AS event FROM events_table as "events" WHERE event_type IN (4, 5)) events_subquery_4) UNION (SELECT * FROM (SELECT "events"."user_id", "events"."time", 4 AS event FROM events_table as "events" WHERE event_type IN (5, 6)) events_subquery_5) UNION (SELECT * FROM (SELECT "events"."user_id", "events"."time", 5 AS event FROM events_table as "events" WHERE event_type IN (6, 1)) events_subquery_6) UNION (SELECT * FROM (SELECT "events"."user_id", "events"."time", 6 AS event FROM events_table as "events" WHERE event_type IN (2, 5)) events_subquery_6) ) t1 GROUP BY "t1"."user_id") AS t) "q" INNER JOIN (SELECT "users"."user_id" FROM users_table as "users" WHERE value_1 > 0 and value_1 < 4) AS t ON (t.user_id = q.user_id)) as final_query GROUP BY types ORDER BY types; types | sumofeventtype -------+---------------- 0 | 449 1 | 434 2 | 433 3 | 255 4 | 75 5 | 268 6 | 256 (7 rows) -- -- UNION ALL Queries -- SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM ( SELECT *, random() FROM ( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types" FROM ( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events FROM ( (SELECT * FROM (SELECT "events"."user_id", "events"."time", 0 AS event FROM events_table as "events" WHERE event_type IN (1, 2) ) events_subquery_1) UNION ALL (SELECT * FROM (SELECT "events"."user_id", "events"."time", 1 AS event FROM events_table as "events" WHERE event_type IN (3, 4) ) events_subquery_2) UNION ALL (SELECT * FROM (SELECT "events"."user_id", "events"."time", 2 AS event FROM events_table as "events" WHERE event_type IN (5, 6) ) events_subquery_3) UNION ALL (SELECT * FROM (SELECT "events"."user_id", "events"."time", 3 AS event FROM events_table as "events" WHERE event_type IN (6, 1)) events_subquery_4)) t1 GROUP BY "t1"."user_id") AS t) "q" INNER JOIN (SELECT "users"."user_id" FROM users_table as "users" WHERE value_1 > 0 and value_1 < 4) AS t ON (t.user_id = q.user_id)) as final_query GROUP BY types ORDER BY types; types | sumofeventtype -------+---------------- 0 | 449 1 | 433 2 | 75 3 | 268 (4 rows) -- same query target list entries shuffled SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM ( SELECT *, random() FROM ( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types" FROM ( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events FROM ( (SELECT * FROM (SELECT "events"."time", 0 AS event, "events"."user_id" FROM events_table as "events" WHERE event_type IN (1, 2) ) events_subquery_1) UNION ALL (SELECT * FROM (SELECT "events"."time", 1 AS event, "events"."user_id" FROM events_table as "events" WHERE event_type IN (3, 4) ) events_subquery_2) UNION ALL (SELECT * FROM (SELECT "events"."time", 2 AS event, "events"."user_id" FROM events_table as "events" WHERE event_type IN (5, 6) ) events_subquery_3) UNION ALL (SELECT * FROM (SELECT "events"."time", 3 AS event, "events"."user_id" FROM events_table as "events" WHERE event_type IN (1, 6)) events_subquery_4)) t1 GROUP BY "t1"."user_id") AS t) "q" INNER JOIN (SELECT "users"."user_id" FROM users_table as "users" WHERE value_1 > 0 and value_1 < 4) AS t ON (t.user_id = q.user_id)) as final_query GROUP BY types ORDER BY types; types | sumofeventtype -------+---------------- 0 | 449 1 | 433 2 | 75 3 | 268 (4 rows) -- supported through recursive planning since subquery 3 does not have partition key SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM ( SELECT *, random() FROM ( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types" FROM ( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events FROM ( (SELECT * FROM (SELECT "events"."user_id", "events"."time", 0 AS event FROM events_table as "events" WHERE event_type IN (1, 2) ) events_subquery_1) UNION ALL (SELECT * FROM (SELECT "events"."user_id", "events"."time", 1 AS event FROM events_table as "events" WHERE event_type IN (3, 4) ) events_subquery_2) UNION ALL (SELECT * FROM (SELECT "events"."value_2", "events"."time", 2 AS event FROM events_table as "events" WHERE event_type IN (5, 6) ) events_subquery_3) UNION ALL (SELECT * FROM (SELECT "events"."user_id", "events"."time", 3 AS event FROM events_table as "events" WHERE event_type IN (1, 6)) events_subquery_4)) t1 GROUP BY "t1"."user_id") AS t) "q" INNER JOIN (SELECT "users"."user_id" FROM users_table as "users" WHERE value_1 > 0 and value_1 < 4) AS t ON (t.user_id = q.user_id)) as final_query GROUP BY types ORDER BY types; types | sumofeventtype -------+---------------- 0 | 449 1 | 433 2 | 62 3 | 268 (4 rows) -- supported through recursive planning since events_subquery_4 does not have partition key on the -- target list SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM ( SELECT *, random() FROM ( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types" FROM ( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events FROM ( (SELECT * FROM (SELECT "events"."time", 0 AS event, "events"."user_id" FROM events_table as "events" WHERE event_type IN (1, 2) ) events_subquery_1) UNION ALL (SELECT * FROM (SELECT "events"."time", 1 AS event, "events"."user_id" FROM events_table as "events" WHERE event_type IN (3, 4) ) events_subquery_2) UNION ALL (SELECT * FROM (SELECT "events"."time", 2 AS event, "events"."user_id" FROM events_table as "events" WHERE event_type IN (5, 6) ) events_subquery_3) UNION ALL (SELECT * FROM (SELECT "events"."time", 3 AS event, "events"."user_id" * 2 FROM events_table as "events" WHERE event_type IN (1, 6)) events_subquery_4)) t1 GROUP BY "t1"."user_id") AS t) "q" INNER JOIN (SELECT "users"."user_id" FROM users_table as "users" WHERE value_1 > 0 and value_1 < 4) AS t ON (t.user_id = q.user_id)) as final_query GROUP BY types ORDER BY types; types | sumofeventtype -------+---------------- 0 | 449 1 | 433 2 | 75 3 | 124 (4 rows) -- union all with inner and left joins SELECT user_id, count(*) as cnt FROM (SELECT first_query.user_id, random() FROM ( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types" FROM ( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events FROM ( (SELECT * FROM (SELECT "events"."user_id", "events"."time", 0 AS event FROM events_table as "events" WHERE event_type IN (1, 2) ) events_subquery_1) UNION ALL (SELECT * FROM (SELECT "events"."user_id", "events"."time", 1 AS event FROM events_table as "events" WHERE event_type IN (3, 4) ) events_subquery_2) UNION ALL (SELECT * FROM (SELECT "events"."user_id", "events"."time", 2 AS event FROM events_table as "events" WHERE event_type IN (5, 6) ) events_subquery_3) UNION ALL (SELECT * FROM (SELECT "events"."user_id", "events"."time", 3 AS event FROM events_table as "events" WHERE event_type IN (1, 6)) events_subquery_4)) t1 GROUP BY "t1"."user_id") AS t) "first_query" INNER JOIN (SELECT "t"."user_id" FROM (SELECT "users"."user_id" FROM users_table as "users" WHERE value_1 > 0 and value_1 < 4) AS t LEFT OUTER JOIN ( SELECT DISTINCT "events"."user_id" as user_id FROM events_table as "events" WHERE event_type IN (0, 6) GROUP BY user_id ) as t2 ON (t2.user_id = t.user_id) WHERE t2.user_id is NULL) as second_query ON ("first_query".user_id = "second_query".user_id)) as final_query GROUP BY user_id ORDER BY cnt DESC, user_id DESC LIMIT 10; user_id | cnt ---------+----- 3 | 275 6 | 72 (2 rows) -- recursively planned since the join between t and t2 is not equi join -- union all with inner and left joins SET client_min_messages TO DEBUG1; SELECT user_id, count(*) as cnt FROM (SELECT first_query.user_id, random() FROM ( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types" FROM ( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events FROM ( (SELECT * FROM (SELECT "events"."user_id", "events"."time", 0 AS event FROM events_table as "events" WHERE event_type IN (1, 2) ) events_subquery_1) UNION ALL (SELECT * FROM (SELECT "events"."user_id", "events"."time", 1 AS event FROM events_table as "events" WHERE event_type IN (3, 4) ) events_subquery_2) UNION ALL (SELECT * FROM (SELECT "events"."user_id", "events"."time", 2 AS event FROM events_table as "events" WHERE event_type IN (5, 6) ) events_subquery_3) UNION ALL (SELECT * FROM (SELECT "events"."user_id", "events"."time", 3 AS event FROM events_table as "events" WHERE event_type IN (1, 6)) events_subquery_4)) t1 GROUP BY "t1"."user_id") AS t) "first_query" INNER JOIN (SELECT "t"."user_id" FROM (SELECT "users"."user_id" FROM users_table as "users" WHERE value_1 > 0 and value_1 < 4) AS t LEFT OUTER JOIN ( SELECT DISTINCT "events"."user_id" as user_id FROM events_table as "events" WHERE event_type IN (0, 6) GROUP BY user_id ) as t2 ON (t2.user_id > t.user_id) WHERE t2.user_id is NULL) as second_query ON ("first_query".user_id = "second_query".user_id)) as final_query GROUP BY user_id ORDER BY cnt DESC, user_id DESC LIMIT 10; DEBUG: generating subplan 42_1 for subquery SELECT DISTINCT user_id FROM public.events_table events WHERE (event_type = ANY (ARRAY[0, 6])) GROUP BY user_id DEBUG: Plan 42 query after replacing subqueries and CTEs: SELECT user_id, count(*) AS cnt FROM (SELECT first_query.user_id, random() AS random FROM ((SELECT t.user_id, t."time", unnest(t.collected_events) AS event_types FROM (SELECT t1.user_id, min(t1."time") AS "time", array_agg(t1.event ORDER BY t1."time", t1.event DESC) AS collected_events FROM (SELECT events_subquery_1.user_id, events_subquery_1."time", events_subquery_1.event FROM (SELECT events.user_id, events."time", 0 AS event FROM public.events_table events WHERE (events.event_type = ANY (ARRAY[1, 2]))) events_subquery_1 UNION ALL SELECT events_subquery_2.user_id, events_subquery_2."time", events_subquery_2.event FROM (SELECT events.user_id, events."time", 1 AS event FROM public.events_table events WHERE (events.event_type = ANY (ARRAY[3, 4]))) events_subquery_2 UNION ALL SELECT events_subquery_3.user_id, events_subquery_3."time", events_subquery_3.event FROM (SELECT events.user_id, events."time", 2 AS event FROM public.events_table events WHERE (events.event_type = ANY (ARRAY[5, 6]))) events_subquery_3 UNION ALL SELECT events_subquery_4.user_id, events_subquery_4."time", events_subquery_4.event FROM (SELECT events.user_id, events."time", 3 AS event FROM public.events_table events WHERE (events.event_type = ANY (ARRAY[1, 6]))) events_subquery_4) t1 GROUP BY t1.user_id) t) first_query JOIN (SELECT t.user_id FROM ((SELECT users.user_id FROM public.users_table users WHERE ((users.value_1 > 0) AND (users.value_1 < 4))) t LEFT JOIN (SELECT intermediate_result.user_id FROM read_intermediate_result('42_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) t2 ON ((t2.user_id > t.user_id))) WHERE (t2.user_id IS NULL)) second_query ON ((first_query.user_id = second_query.user_id)))) final_query GROUP BY user_id ORDER BY (count(*)) DESC, user_id DESC LIMIT 10 user_id | cnt ---------+----- 5 | 324 6 | 72 (2 rows) RESET client_min_messages; -- -- Union, inner join and left join -- SELECT user_id, count(*) as cnt FROM (SELECT first_query.user_id, random() FROM ( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types" FROM ( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events FROM ( (SELECT * FROM (SELECT "events"."user_id", "events"."time", 0 AS event FROM events_table as "events" WHERE event_type IN (1, 2) ) events_subquery_1) UNION (SELECT * FROM (SELECT "events"."user_id", "events"."time", 1 AS event FROM events_table as "events" WHERE event_type IN (3, 4) ) events_subquery_2) UNION (SELECT * FROM (SELECT "events"."user_id", "events"."time", 2 AS event FROM events_table as "events" WHERE event_type IN (5, 6) ) events_subquery_3) UNION (SELECT * FROM (SELECT "events"."user_id", "events"."time", 3 AS event FROM events_table as "events" WHERE event_type IN (1, 6)) events_subquery_4)) t1 GROUP BY "t1"."user_id") AS t) "first_query" INNER JOIN (SELECT "t"."user_id" FROM (SELECT "users"."user_id" FROM users_table as "users" WHERE value_1 > 0 and value_1 < 4) AS t LEFT OUTER JOIN ( SELECT DISTINCT "events"."user_id" as user_id FROM events_table as "events" WHERE event_type IN (0, 6) GROUP BY user_id ) as t2 ON (t2.user_id = t.user_id) WHERE t2.user_id is NULL) as second_query ON ("first_query".user_id = "second_query".user_id)) as final_query GROUP BY user_id ORDER BY cnt DESC, user_id DESC LIMIT 10; user_id | cnt ---------+----- 3 | 275 6 | 72 (2 rows) -- Simple LATERAL JOINs with GROUP BYs in each side -- need to set subquery_pushdown due to limit for next 2 queries SET citus.subquery_pushdown to ON; SELECT * FROM (SELECT "some_users_data".user_id, lastseen FROM (SELECT user_id, max(time) AS lastseen FROM (SELECT user_id, time FROM (SELECT user_id, time FROM events_table as "events" WHERE user_id > 1 and user_id < 4) "events_1" ORDER BY time DESC LIMIT 1000) "recent_events_1" GROUP BY user_id ORDER BY max(time) DESC) "some_recent_users" JOIN LATERAL (SELECT "users".user_id FROM users_table as "users" WHERE "users"."user_id" = "some_recent_users"."user_id" AND users.value_2 > 1 and users.value_2 < 3 LIMIT 1) "some_users_data" ON TRUE ORDER BY lastseen DESC LIMIT 50) "some_users" order BY user_id LIMIT 50; user_id | lastseen ---------+--------------------------------- 2 | Thu Nov 23 17:26:14.563216 2017 3 | Thu Nov 23 18:08:26.550729 2017 (2 rows) -- same query with subuqery joins in topmost select SELECT "some_users_data".user_id, lastseen FROM (SELECT user_id, max(time) AS lastseen FROM (SELECT user_id, time FROM (SELECT user_id, time FROM events_table as "events" WHERE user_id > 1 and user_id < 4) "events_1" ORDER BY time DESC LIMIT 1000) "recent_events_1" GROUP BY user_id ORDER BY max(TIME) DESC) "some_recent_users" JOIN LATERAL (SELECT "users".user_id FROM users_table as "users" WHERE "users"."user_id" = "some_recent_users"."user_id" AND users.value_2 > 1 and users.value_2 < 3 LIMIT 1) "some_users_data" ON TRUE ORDER BY user_id limit 50; user_id | lastseen ---------+--------------------------------- 2 | Thu Nov 23 17:26:14.563216 2017 3 | Thu Nov 23 18:08:26.550729 2017 (2 rows) -- reset subquery_pushdown SET citus.subquery_pushdown to OFF; -- we recursively plan recent_events_1 -- but not some_users_data since it has a reference -- from an outer query which is not recursively planned SELECT "some_users_data".user_id, lastseen FROM (SELECT user_id, max(time) AS lastseen FROM (SELECT user_id, time FROM (SELECT user_id, time FROM events_table as "events" WHERE user_id > 1 and user_id < 4) "events_1" ORDER BY time DESC LIMIT 1000) "recent_events_1" GROUP BY user_id ORDER BY max(TIME) DESC) "some_recent_users" JOIN LATERAL (SELECT "users".user_id FROM users_table as "users" WHERE "users"."value_1" = "some_recent_users"."user_id" AND users.value_2 > 1 and users.value_2 < 3 LIMIT 1) "some_users_data" ON TRUE ORDER BY user_id limit 50; ERROR: cannot push down this subquery DETAIL: Limit in subquery is currently unsupported when a subquery references a column from another query -- we recursively plan some queries but fail in the end -- since some_users_data since it has a reference -- from an outer query which is not recursively planned SELECT "some_users_data".user_id, lastseen FROM (SELECT 2 * user_id as user_id, max(time) AS lastseen FROM (SELECT user_id, time FROM (SELECT user_id, time FROM events_table as "events" WHERE user_id > 1 and user_id < 4) "events_1" ORDER BY time DESC LIMIT 1000) "recent_events_1" GROUP BY user_id ORDER BY max(TIME) DESC) "some_recent_users" JOIN LATERAL (SELECT "users".user_id FROM users_table as "users" WHERE "users"."user_id" = "some_recent_users"."user_id" AND users.value_2 > 1 and users.value_2 < 3 LIMIT 1) "some_users_data" ON TRUE ORDER BY user_id limit 50; ERROR: cannot push down this subquery DETAIL: Limit in subquery is currently unsupported when a subquery references a column from another query -- LATERAL JOINs used with INNER JOINs SET citus.subquery_pushdown to ON; SELECT user_id, lastseen FROM (SELECT "some_users_data".user_id, lastseen FROM (SELECT filter_users_1.user_id, time AS lastseen FROM (SELECT user_where_1_1.user_id FROM (SELECT "users"."user_id" FROM users_table as "users" WHERE user_id > 1 and user_id < 3 and value_1 > 2) user_where_1_1 INNER JOIN (SELECT "users"."user_id" FROM users_table as "users" WHERE user_id > 1 and user_id < 3 and value_2 > 3) user_where_1_join_1 ON ("user_where_1_1".user_id = "user_where_1_join_1".user_id)) filter_users_1 JOIN LATERAL (SELECT user_id, time FROM events_table as "events" WHERE user_id > 1 and user_id < 3 AND user_id = filter_users_1.user_id ORDER BY time DESC LIMIT 1) "last_events_1" ON TRUE ORDER BY time DESC LIMIT 10) "some_recent_users" JOIN LATERAL (SELECT "users".user_id FROM users_table as "users" WHERE "users"."user_id" = "some_recent_users"."user_id" AND "users"."value_2" > 4 LIMIT 1) "some_users_data" ON TRUE ORDER BY lastseen DESC LIMIT 10) "some_users" ORDER BY user_id DESC LIMIT 10; user_id | lastseen ---------+--------------------------------- 2 | Thu Nov 23 17:26:14.563216 2017 2 | Thu Nov 23 17:26:14.563216 2017 2 | Thu Nov 23 17:26:14.563216 2017 2 | Thu Nov 23 17:26:14.563216 2017 2 | Thu Nov 23 17:26:14.563216 2017 2 | Thu Nov 23 17:26:14.563216 2017 2 | Thu Nov 23 17:26:14.563216 2017 2 | Thu Nov 23 17:26:14.563216 2017 2 | Thu Nov 23 17:26:14.563216 2017 2 | Thu Nov 23 17:26:14.563216 2017 (10 rows) -- -- A similar query with topmost select is dropped -- and replaced by aggregation. Notice the heavy use of limit -- SELECT "some_users_data".user_id, MAX(lastseen), count(*) FROM (SELECT filter_users_1.user_id, time AS lastseen FROM (SELECT user_where_1_1.user_id FROM (SELECT "users"."user_id" FROM users_table as "users" WHERE user_id > 1 and user_id < 3 and value_1 > 2) user_where_1_1 INNER JOIN (SELECT "users"."user_id" FROM users_table as "users" WHERE user_id > 1 and user_id < 3 and value_2 > 3) user_where_1_join_1 ON ("user_where_1_1".user_id = "user_where_1_join_1".user_id)) filter_users_1 JOIN LATERAL (SELECT user_id, time FROM events_table as "events" WHERE user_id > 1 and user_id < 3 and user_id = filter_users_1.user_id ORDER BY time DESC LIMIT 1) "last_events_1" ON true ORDER BY time DESC LIMIT 10) "some_recent_users" JOIN LATERAL (SELECT "users".user_id FROM users_table as "users" WHERE "users"."user_id" = "some_recent_users"."user_id" AND "users"."value_2" > 4 LIMIT 1) "some_users_data" ON true GROUP BY 1 ORDER BY 2, 1 DESC LIMIT 10; user_id | max | count ---------+---------------------------------+------- 2 | Thu Nov 23 17:26:14.563216 2017 | 10 (1 row) SET citus.subquery_pushdown to OFF; -- not supported since the inner JOIN is not equi join and LATERAL JOIN prevents recursive planning SET client_min_messages TO DEBUG2; SELECT user_id, lastseen FROM (SELECT "some_users_data".user_id, lastseen FROM (SELECT filter_users_1.user_id, time AS lastseen FROM (SELECT user_where_1_1.user_id FROM (SELECT "users"."user_id" FROM users_table as "users" WHERE user_id > 1 and user_id < 4 and value_1 > 2) user_where_1_1 INNER JOIN (SELECT "users"."user_id" FROM users_table as "users" WHERE user_id > 1 and user_id < 4 and value_2 > 3) user_where_1_join_1 ON ("user_where_1_1".user_id != "user_where_1_join_1".user_id)) filter_users_1 JOIN LATERAL (SELECT user_id, time FROM events_table as "events" WHERE user_id > 1 and user_id < 4 and user_id = filter_users_1.user_id ORDER BY time DESC LIMIT 1) "last_events_1" ON true ORDER BY time DESC LIMIT 10) "some_recent_users" JOIN LATERAL (SELECT "users".user_id FROM users_table as "users" WHERE "users"."user_id" = "some_recent_users"."user_id" AND "users"."value_2" > 4 LIMIT 1) "some_users_data" ON true ORDER BY lastseen DESC LIMIT 10) "some_users" ORDER BY user_id DESC LIMIT 10; DEBUG: generating subplan 53_1 for subquery SELECT user_id FROM public.users_table users WHERE ((user_id > 1) AND (user_id < 4) AND (value_2 > 3)) DEBUG: skipping recursive planning for the subquery since it contains references to outer queries DEBUG: skipping recursive planning for the subquery since it contains references to outer queries ERROR: cannot push down this subquery DETAIL: Limit in subquery is currently unsupported when a subquery references a column from another query SET citus.enable_repartition_joins to ON; SET client_min_messages TO DEBUG1; -- recursively planner since the inner JOIN is not on the partition key SELECT user_id, lastseen FROM (SELECT "some_users_data".user_id, lastseen FROM (SELECT filter_users_1.user_id, time AS lastseen FROM (SELECT user_where_1_1.user_id FROM (SELECT "users"."user_id" FROM users_table as "users" WHERE user_id > 1 and user_id < 4 and value_1 > 2) user_where_1_1 INNER JOIN (SELECT "users"."user_id", "users"."value_1" FROM users_table as "users" WHERE user_id > 1 and user_id < 4 and value_2 > 3) user_where_1_join_1 ON ("user_where_1_1".user_id = "user_where_1_join_1".value_1)) filter_users_1 JOIN LATERAL (SELECT user_id, time FROM events_table as "events" WHERE user_id > 1 and user_id < 4 and user_id = filter_users_1.user_id ORDER BY time DESC LIMIT 1) "last_events_1" ON true ORDER BY time DESC LIMIT 10) "some_recent_users" JOIN LATERAL (SELECT "users".user_id FROM users_table as "users" WHERE "users"."user_id" = "some_recent_users"."user_id" AND "users"."value_2" > 4 LIMIT 1) "some_users_data" ON true ORDER BY lastseen DESC LIMIT 10) "some_users" ORDER BY user_id DESC LIMIT 10; DEBUG: generating subplan 56_1 for subquery SELECT user_id, value_1 FROM public.users_table users WHERE ((user_id > 1) AND (user_id < 4) AND (value_2 > 3)) ERROR: cannot push down this subquery DETAIL: Limit in subquery is currently unsupported when a subquery references a column from another query SET citus.enable_repartition_joins to OFF; RESET client_min_messages; -- not supported since upper LATERAL JOIN is not equi join SELECT user_id, lastseen FROM (SELECT "some_users_data".user_id, lastseen FROM (SELECT filter_users_1.user_id, time AS lastseen FROM (SELECT user_where_1_1.user_id FROM (SELECT "users"."user_id" FROM users_table as "users" WHERE user_id > 1 and user_id < 3 and value_1 > 2) user_where_1_1 INNER JOIN (SELECT "users"."user_id", "users"."value_1" FROM users_table as "users" WHERE user_id > 1 and user_id < 3 and value_2 > 3) user_where_1_join_1 ON ("user_where_1_1".user_id = "user_where_1_join_1".user_id)) filter_users_1 JOIN LATERAL (SELECT user_id, time FROM events_table as "events" WHERE user_id > 1 and user_id < 3 and user_id != filter_users_1.user_id ORDER BY time DESC LIMIT 1) "last_events_1" ON true ORDER BY time DESC LIMIT 10) "some_recent_users" JOIN LATERAL (SELECT "users".user_id FROM users_table as "users" WHERE "users"."user_id" = "some_recent_users"."user_id" AND "users"."value_2" > 4 LIMIT 1) "some_users_data" ON true ORDER BY lastseen DESC LIMIT 10) "some_users" ORDER BY user_id DESC LIMIT 10; ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- not pushdownable since lower LATERAL JOIN is not on the partition key -- not recursively plannable due to LATERAL join where there is a reference -- from an outer query SELECT user_id, lastseen FROM (SELECT "some_users_data".user_id, lastseen FROM (SELECT filter_users_1.user_id, time AS lastseen FROM (SELECT user_where_1_1.user_id FROM (SELECT "users"."user_id" FROM users_table as "users" WHERE user_id > 1 and user_id < 3 and value_1 > 2) user_where_1_1 INNER JOIN (SELECT "users"."user_id", "users"."value_1" FROM users_table as "users" WHERE user_id > 1 and user_id < 3 and value_2 > 3) user_where_1_join_1 ON ("user_where_1_1".user_id = "user_where_1_join_1".user_id)) filter_users_1 JOIN LATERAL (SELECT user_id, time FROM events_table as "events" WHERE user_id > 1 and user_id < 3 and user_id = filter_users_1.user_id ORDER BY time DESC LIMIT 1) "last_events_1" ON true ORDER BY time DESC LIMIT 10) "some_recent_users" JOIN LATERAL (SELECT "users".user_id FROM users_table as "users" WHERE "users"."value_1" = "some_recent_users"."user_id" AND "users"."value_2" > 4 LIMIT 1) "some_users_data" ON true ORDER BY lastseen DESC LIMIT 10) "some_users" ORDER BY user_id DESC LIMIT 10; ERROR: cannot push down this subquery DETAIL: Limit in subquery is currently unsupported when a subquery references a column from another query -- NESTED INNER JOINs SELECT count(*) AS value, "generated_group_field" FROM (SELECT DISTINCT "pushedDownQuery"."real_user_id", "generated_group_field" FROM (SELECT "eventQuery"."real_user_id", "eventQuery"."time", random(), ("eventQuery"."value_2") AS "generated_group_field" FROM (SELECT * FROM (SELECT "events"."time", "events"."user_id", "events"."value_2" FROM events_table as "events" WHERE user_id > 1 and user_id < 4 AND event_type IN (4, 5) ) "temp_data_queries" INNER JOIN (SELECT user_where_1_1.real_user_id FROM (SELECT "users"."user_id" as real_user_id FROM users_table as "users" WHERE user_id > 1 and user_id < 4 and value_2 > 3 ) user_where_1_1 INNER JOIN (SELECT "users"."user_id" FROM users_table as "users" WHERE user_id > 1 and user_id < 4 and value_3 > 3 ) user_where_1_join_1 ON ("user_where_1_1".real_user_id = "user_where_1_join_1".user_id)) "user_filters_1" ON ("temp_data_queries".user_id = "user_filters_1".real_user_id)) "eventQuery") "pushedDownQuery") "pushedDownQuery" GROUP BY "generated_group_field" ORDER BY generated_group_field DESC, value DESC; value | generated_group_field -------+----------------------- 1 | 5 2 | 2 2 | 1 1 | 0 (4 rows) SET citus.enable_repartition_joins to ON; SET client_min_messages TO DEBUG1; -- recursively planned since the first inner join is not on the partition key SELECT count(*) AS value, "generated_group_field" FROM (SELECT DISTINCT "pushedDownQuery"."real_user_id", "generated_group_field" FROM (SELECT "eventQuery"."real_user_id", "eventQuery"."time", random(), ("eventQuery"."value_2") AS "generated_group_field" FROM (SELECT * FROM (SELECT "events"."time", "events"."user_id", "events"."value_2" FROM events_table as "events" WHERE user_id > 1 and user_id < 4 AND event_type IN (4, 5) ) "temp_data_queries" INNER JOIN (SELECT user_where_1_1.real_user_id FROM (SELECT "users"."user_id" as real_user_id FROM users_table as "users" WHERE user_id > 1 and user_id < 4 and value_2 > 3 ) user_where_1_1 INNER JOIN (SELECT "users"."user_id", "users"."value_2" FROM users_table as "users" WHERE user_id > 1 and user_id < 4 and value_3 > 3 ) user_where_1_join_1 ON ("user_where_1_1".real_user_id = "user_where_1_join_1".value_2)) "user_filters_1" ON ("temp_data_queries".user_id = "user_filters_1".real_user_id)) "eventQuery") "pushedDownQuery") "pushedDownQuery" GROUP BY "generated_group_field" ORDER BY generated_group_field DESC, value DESC; DEBUG: generating subplan 64_1 for subquery SELECT user_id, value_2 FROM public.users_table users WHERE ((user_id > 1) AND (user_id < 4) AND (value_3 > (3)::double precision)) DEBUG: Plan 64 query after replacing subqueries and CTEs: SELECT count(*) AS value, generated_group_field FROM (SELECT DISTINCT "pushedDownQuery_1".real_user_id, "pushedDownQuery_1".generated_group_field FROM (SELECT "eventQuery".real_user_id, "eventQuery"."time", random() AS random, "eventQuery".value_2 AS generated_group_field FROM (SELECT temp_data_queries."time", temp_data_queries.user_id, temp_data_queries.value_2, user_filters_1.real_user_id FROM ((SELECT events."time", events.user_id, events.value_2 FROM public.events_table events WHERE ((events.user_id > 1) AND (events.user_id < 4) AND (events.event_type = ANY (ARRAY[4, 5])))) temp_data_queries JOIN (SELECT user_where_1_1.real_user_id FROM ((SELECT users.user_id AS real_user_id FROM public.users_table users WHERE ((users.user_id > 1) AND (users.user_id < 4) AND (users.value_2 > 3))) user_where_1_1 JOIN (SELECT intermediate_result.user_id, intermediate_result.value_2 FROM read_intermediate_result('64_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_2 integer)) user_where_1_join_1 ON ((user_where_1_1.real_user_id = user_where_1_join_1.value_2)))) user_filters_1 ON ((temp_data_queries.user_id = user_filters_1.real_user_id)))) "eventQuery") "pushedDownQuery_1") "pushedDownQuery" GROUP BY generated_group_field ORDER BY generated_group_field DESC, (count(*)) DESC value | generated_group_field -------+----------------------- 1 | 5 2 | 2 2 | 1 1 | 0 (4 rows) -- recursive planning kicked-in since the non-equi join is among subqueries SELECT count(*) AS value, "generated_group_field" FROM (SELECT DISTINCT "pushedDownQuery"."real_user_id", "generated_group_field" FROM (SELECT "eventQuery"."real_user_id", "eventQuery"."time", random(), ("eventQuery"."value_2") AS "generated_group_field" FROM (SELECT * FROM (SELECT "events"."time", "events"."user_id", "events"."value_2" FROM events_table as "events" WHERE user_id > 1 and user_id < 4 AND event_type IN (4, 5) ) "temp_data_queries" INNER JOIN (SELECT user_where_1_1.real_user_id FROM (SELECT "users"."user_id" as real_user_id FROM users_table as "users" WHERE user_id > 1 and user_id < 4 and value_2 > 3 ) user_where_1_1 INNER JOIN (SELECT "users"."user_id", "users"."value_2" FROM users_table as "users" WHERE user_id > 1 and user_id < 4 and value_3 > 3 ) user_where_1_join_1 ON ("user_where_1_1".real_user_id >= "user_where_1_join_1".user_id)) "user_filters_1" ON ("temp_data_queries".user_id = "user_filters_1".real_user_id)) "eventQuery") "pushedDownQuery") "pushedDownQuery" GROUP BY "generated_group_field" ORDER BY generated_group_field DESC, value DESC; DEBUG: generating subplan 66_1 for subquery SELECT user_id, value_2 FROM public.users_table users WHERE ((user_id > 1) AND (user_id < 4) AND (value_3 > (3)::double precision)) DEBUG: Plan 66 query after replacing subqueries and CTEs: SELECT count(*) AS value, generated_group_field FROM (SELECT DISTINCT "pushedDownQuery_1".real_user_id, "pushedDownQuery_1".generated_group_field FROM (SELECT "eventQuery".real_user_id, "eventQuery"."time", random() AS random, "eventQuery".value_2 AS generated_group_field FROM (SELECT temp_data_queries."time", temp_data_queries.user_id, temp_data_queries.value_2, user_filters_1.real_user_id FROM ((SELECT events."time", events.user_id, events.value_2 FROM public.events_table events WHERE ((events.user_id > 1) AND (events.user_id < 4) AND (events.event_type = ANY (ARRAY[4, 5])))) temp_data_queries JOIN (SELECT user_where_1_1.real_user_id FROM ((SELECT users.user_id AS real_user_id FROM public.users_table users WHERE ((users.user_id > 1) AND (users.user_id < 4) AND (users.value_2 > 3))) user_where_1_1 JOIN (SELECT intermediate_result.user_id, intermediate_result.value_2 FROM read_intermediate_result('66_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_2 integer)) user_where_1_join_1 ON ((user_where_1_1.real_user_id >= user_where_1_join_1.user_id)))) user_filters_1 ON ((temp_data_queries.user_id = user_filters_1.real_user_id)))) "eventQuery") "pushedDownQuery_1") "pushedDownQuery" GROUP BY generated_group_field ORDER BY generated_group_field DESC, (count(*)) DESC value | generated_group_field -------+----------------------- 1 | 5 2 | 2 2 | 1 1 | 0 (4 rows) SET citus.enable_repartition_joins to OFF; RESET client_min_messages; -- single level inner joins SELECT "value_3", count(*) AS cnt FROM (SELECT "value_3", "user_id", random() FROM (SELECT users_in_segment_1.user_id, value_3 FROM (SELECT user_id, value_3 * 2 as value_3 FROM (SELECT user_id, value_3 FROM (SELECT "users"."user_id", value_3 FROM users_table as "users" WHERE user_id > 1 and user_id < 4 and value_2 > 2 ) simple_user_where_1 ) all_buckets_1 ) users_in_segment_1 JOIN (SELECT "users"."user_id" FROM users_table as "users" WHERE user_id > 1 and user_id < 4 and value_2 > 3 ) some_users_data ON ("users_in_segment_1".user_id = "some_users_data".user_id) ) segmentalias_1) "tempQuery" GROUP BY "value_3" ORDER BY cnt, value_3 DESC LIMIT 10; value_3 | cnt ---------+----- 0 | 7 10 | 21 4 | 21 8 | 28 6 | 28 2 | 35 (6 rows) SET citus.enable_repartition_joins to ON; SET client_min_messages TO DEBUG1; -- although there is no column equality at all -- still recursive planning plans "some_users_data" -- and the query becomes OK SELECT "value_3", count(*) AS cnt FROM (SELECT "value_3", "user_id", random() FROM (SELECT users_in_segment_1.user_id, value_3 FROM (SELECT user_id, value_3 * 2 as value_3 FROM (SELECT user_id, value_3 FROM (SELECT "users"."user_id", value_3 FROM users_table as "users" WHERE user_id > 1 and user_id < 4 and value_2 > 2 ) simple_user_where_1 ) all_buckets_1 ) users_in_segment_1 JOIN (SELECT "users"."user_id" FROM users_table as "users" WHERE user_id > 1 and user_id < 4 and value_2 > 3 ) some_users_data ON (true) ) segmentalias_1) "tempQuery" GROUP BY "value_3" ORDER BY cnt, value_3 DESC LIMIT 10; DEBUG: generating subplan 69_1 for subquery SELECT user_id FROM public.users_table users WHERE ((user_id > 1) AND (user_id < 4) AND (value_2 > 3)) DEBUG: Plan 69 query after replacing subqueries and CTEs: SELECT value_3, count(*) AS cnt FROM (SELECT segmentalias_1.value_3, segmentalias_1.user_id, random() AS random FROM (SELECT users_in_segment_1.user_id, users_in_segment_1.value_3 FROM ((SELECT all_buckets_1.user_id, (all_buckets_1.value_3 * (2)::double precision) AS value_3 FROM (SELECT simple_user_where_1.user_id, simple_user_where_1.value_3 FROM (SELECT users.user_id, users.value_3 FROM public.users_table users WHERE ((users.user_id > 1) AND (users.user_id < 4) AND (users.value_2 > 2))) simple_user_where_1) all_buckets_1) users_in_segment_1 JOIN (SELECT intermediate_result.user_id FROM read_intermediate_result('69_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) some_users_data ON (true))) segmentalias_1) "tempQuery" GROUP BY value_3 ORDER BY (count(*)), value_3 DESC LIMIT 10 value_3 | cnt ---------+----- 0 | 14 10 | 42 4 | 42 8 | 56 6 | 56 2 | 70 (6 rows) SET citus.enable_repartition_joins to OFF; RESET client_min_messages; -- nested LATERAL JOINs SET citus.subquery_pushdown to ON; SELECT * FROM (SELECT "some_users_data".user_id, "some_recent_users".value_3 FROM (SELECT filter_users_1.user_id, value_3 FROM (SELECT "users"."user_id" FROM users_table as "users" WHERE user_id > 1 and user_id < 4 and users.value_2 = 2) filter_users_1 JOIN LATERAL (SELECT user_id, value_3 FROM events_table as "events" WHERE user_id > 1 and user_id < 4 AND ("events".user_id = "filter_users_1".user_id) ORDER BY value_3 DESC LIMIT 1) "last_events_1" ON true ORDER BY value_3 DESC LIMIT 10) "some_recent_users" JOIN LATERAL (SELECT "users".user_id FROM users_table as "users" WHERE "users"."user_id" = "some_recent_users"."user_id" AND users.value_2 > 2 LIMIT 1) "some_users_data" ON true ORDER BY value_3 DESC LIMIT 10) "some_users" ORDER BY value_3 DESC LIMIT 10; user_id | value_3 ---------+--------- 3 | 5 3 | 5 3 | 5 3 | 5 3 | 5 2 | 5 2 | 5 2 | 5 2 | 5 (9 rows) -- nested lateral join at top most level SELECT "some_users_data".user_id, "some_recent_users".value_3 FROM (SELECT filter_users_1.user_id, value_3 FROM (SELECT "users"."user_id" FROM users_table as "users" WHERE user_id > 1 and user_id < 4 and users.value_2 = 2 ) filter_users_1 JOIN LATERAL (SELECT user_id, value_3 FROM events_table as "events" WHERE user_id > 1 and user_id < 4 AND ("events".user_id = "filter_users_1".user_id) ORDER BY value_3 DESC LIMIT 1 ) "last_events_1" ON true ORDER BY value_3 DESC LIMIT 10 ) "some_recent_users" JOIN LATERAL (SELECT "users".user_id FROM users_table as "users" WHERE "users"."user_id" = "some_recent_users"."user_id" AND users.value_2 > 2 LIMIT 1 ) "some_users_data" ON true ORDER BY value_3 DESC, user_id ASC LIMIT 10; user_id | value_3 ---------+--------- 2 | 5 2 | 5 2 | 5 2 | 5 3 | 5 3 | 5 3 | 5 3 | 5 3 | 5 (9 rows) -- longer nested lateral joins SELECT * FROM (SELECT "some_users_data".user_id, "some_recent_users".value_3 FROM (SELECT filter_users_1.user_id, value_3 FROM (SELECT "users"."user_id" FROM users_table as "users" WHERE user_id > 1 and user_id < 4 and users.value_2 = 2) filter_users_1 JOIN LATERAL (SELECT user_id, value_3 FROM events_table as "events" WHERE user_id > 1 and user_id < 4 AND ("events".user_id = "filter_users_1".user_id) ORDER BY value_3 DESC LIMIT 1) "last_events_1" ON true ORDER BY value_3 DESC LIMIT 10) "some_recent_users" JOIN LATERAL (SELECT "users".user_id FROM users_table as "users" WHERE "users"."user_id" = "some_recent_users"."user_id" AND users.value_2 > 2 LIMIT 1) "some_users_data" ON true ORDER BY value_3 DESC LIMIT 10) "some_users" ORDER BY value_3 DESC LIMIT 10; user_id | value_3 ---------+--------- 3 | 5 3 | 5 3 | 5 3 | 5 3 | 5 2 | 5 2 | 5 2 | 5 2 | 5 (9 rows) -- longer nested lateral join wth top level join SELECT "some_users_data".user_id, "some_recent_users".value_3 FROM (SELECT filter_users_1.user_id, value_3 FROM (SELECT "users"."user_id" FROM users_table as "users" WHERE user_id > 1 and user_id < 4 and users.value_2 = 2 ) filter_users_1 JOIN LATERAL (SELECT user_id, value_3 FROM events_table as "events" WHERE user_id > 1 and user_id < 4 AND ("events".user_id = "filter_users_1".user_id) ORDER BY value_3 DESC LIMIT 1 ) "last_events_1" ON TRUE ORDER BY value_3 DESC LIMIT 10 ) "some_recent_users" JOIN LATERAL (SELECT "users".user_id FROM users_table as "users" WHERE "users"."user_id" = "some_recent_users"."user_id" AND users.value_2 > 2 LIMIT 1 ) "some_users_data" ON TRUE ORDER BY value_3 DESC LIMIT 10; user_id | value_3 ---------+--------- 3 | 5 3 | 5 3 | 5 3 | 5 3 | 5 2 | 5 2 | 5 2 | 5 2 | 5 (9 rows) SET citus.subquery_pushdown to OFF; -- LEFT JOINs used with INNER JOINs SELECT count(*) AS cnt, "generated_group_field" FROM (SELECT "eventQuery"."user_id", random(), generated_group_field FROM (SELECT "multi_group_wrapper_1".*, generated_group_field, random() FROM (SELECT * FROM (SELECT "events"."time", "events"."user_id" as event_user_id FROM events_table as "events" WHERE user_id > 4) "temp_data_queries" INNER JOIN (SELECT "users"."user_id" FROM users_table as "users" WHERE user_id > 4 and value_2 = 5) "user_filters_1" ON ("temp_data_queries".event_user_id = "user_filters_1".user_id)) AS "multi_group_wrapper_1" LEFT JOIN (SELECT "users"."user_id" AS "user_id", value_2 AS "generated_group_field" FROM users_table as "users") "left_group_by_1" ON ("left_group_by_1".user_id = "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery" group BY "generated_group_field" ORDER BY cnt DESC, generated_group_field ASC LIMIT 10; cnt | generated_group_field -----+----------------------- 336 | 2 210 | 1 210 | 3 126 | 4 126 | 5 84 | 0 (6 rows) -- single table subquery, no JOINS involved SELECT count(*) AS cnt, user_id FROM (SELECT "eventQuery"."user_id", random() FROM (SELECT "events"."user_id" FROM events_table "events" WHERE event_type IN (1, 2)) "eventQuery") "pushedDownQuery" GROUP BY "user_id" ORDER BY cnt DESC, user_id DESC LIMIT 10; cnt | user_id -----+--------- 11 | 3 10 | 2 8 | 4 6 | 5 4 | 6 4 | 1 (6 rows) -- lateral joins in the nested manner SET citus.subquery_pushdown to ON; SELECT * FROM (SELECT "some_users_data".user_id, value_2 FROM (SELECT user_id, max(value_2) AS value_2 FROM (SELECT user_id, value_2 FROM (SELECT user_id, value_2 FROM events_table as "events" WHERE user_id > 1 and user_id < 3) "events_1" ORDER BY value_2 DESC LIMIT 10000) "recent_events_1" GROUP BY user_id ORDER BY max(value_2) DESC) "some_recent_users" JOIN LATERAL (SELECT "users".user_id FROM users_table as "users" WHERE "users"."user_id" = "some_recent_users"."user_id" AND value_2 > 4 LIMIT 1) "some_users_data" ON true ORDER BY value_2 DESC LIMIT 10) "some_users" ORDER BY value_2 DESC, user_id DESC LIMIT 10; user_id | value_2 ---------+--------- 2 | 5 (1 row) SET citus.subquery_pushdown to OFF; -- not pushdownable since lower LATERAL JOIN is not on the partition key -- not recursively plannable due to LATERAL join where there is a reference -- from an outer query SELECT * FROM (SELECT "some_users_data".user_id, value_2 FROM (SELECT user_id, max(value_2) AS value_2 FROM (SELECT user_id, value_2 FROM (SELECT user_id, value_2 FROM events_table as "events" WHERE user_id > 1 and user_id < 3) "events_1" ORDER BY value_2 DESC LIMIT 10000) "recent_events_1" GROUP BY user_id ORDER BY max(value_2) DESC) "some_recent_users" JOIN LATERAL (SELECT "users".user_id FROM users_table as "users" WHERE "users"."value_2" = "some_recent_users"."user_id" AND value_2 > 4 LIMIT 1) "some_users_data" ON true ORDER BY value_2 DESC LIMIT 10) "some_users" ORDER BY value_2 DESC, user_id DESC LIMIT 10; ERROR: cannot push down this subquery DETAIL: Limit in subquery is currently unsupported when a subquery references a column from another query -- lets test some unsupported set operations -- not supported since we use INTERSECT SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM ( SELECT *, random() FROM ( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types" FROM ( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events FROM ( (SELECT * FROM (SELECT "events"."user_id", "events"."time", 0 AS event FROM events_table as "events" WHERE event_type IN (1, 2) ) events_subquery_1) UNION (SELECT * FROM (SELECT "events"."user_id", "events"."time", 1 AS event FROM events_table as "events" WHERE event_type IN (3, 4) ) events_subquery_2) UNION (SELECT * FROM (SELECT "events"."user_id", "events"."time", 2 AS event FROM events_table as "events" WHERE event_type IN (5, 6) ) events_subquery_3) INTERSECT (SELECT * FROM (SELECT "events"."user_id", "events"."time", 3 AS event FROM events_table as "events" WHERE event_type IN (4, 5)) events_subquery_4)) t1 GROUP BY "t1"."user_id") AS t) "q" INNER JOIN (SELECT "users"."user_id" FROM users_table as "users" WHERE value_1 > 0 and value_1 < 4) AS t ON (t.user_id = q.user_id)) as final_query GROUP BY types ORDER BY types; types | sumofeventtype -------+---------------- 0 | 449 1 | 433 (2 rows) -- supported through recursive planning SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM ( SELECT * FROM ( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types" FROM ( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events FROM ( (SELECT * FROM (SELECT "events"."user_id", "events"."time", 0 AS event FROM events_table as "events" WHERE event_type IN (1, 2) ) events_subquery_1) UNION (SELECT * FROM (SELECT "events"."user_id", "events"."time", 1 AS event FROM events_table as "events" WHERE event_type IN (3, 4) ) events_subquery_2) UNION (SELECT * FROM (SELECT "events"."user_id", "events"."time", 2 AS event FROM events_table as "events" WHERE event_type IN (5, 6) ) events_subquery_3) UNION (SELECT * FROM (SELECT "events"."user_id", "events"."time", 3 AS event FROM events_table as "events" WHERE event_type IN (4, 5)) events_subquery_4) ORDER BY 1, 2 OFFSET 3) t1 GROUP BY "t1"."user_id") AS t) "q" INNER JOIN (SELECT "users"."user_id" FROM users_table as "users" WHERE value_1 > 0 and value_1 < 4) AS t ON (t.user_id = q.user_id)) as final_query GROUP BY types ORDER BY types; types | sumofeventtype -------+---------------- 0 | 449 1 | 425 2 | 75 3 | 251 (4 rows) -- not supported due to non relation rte SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM ( SELECT *, random() FROM ( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types" FROM ( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events FROM ( (SELECT * FROM (SELECT "events"."user_id", "events"."time", 0 AS event FROM events_table as "events" WHERE event_type IN (1, 2) ) events_subquery_1) UNION (SELECT * FROM (SELECT "events"."user_id", "events"."time", 1 AS event FROM events_table as "events" WHERE event_type IN (3, 4) ) events_subquery_2) UNION (SELECT * FROM (SELECT "events"."user_id", "events"."time", 2 AS event FROM events_table as "events" WHERE event_type IN (5, 6) ) events_subquery_3) UNION (SELECT * FROM (SELECT 1 as user_id, now(), 3 AS event ) events_subquery_4)) t1 GROUP BY "t1"."user_id") AS t) "q" INNER JOIN (SELECT "users"."user_id" FROM users_table as "users" WHERE value_1 > 0 and value_1 < 4) AS t ON (t.user_id = q.user_id)) as final_query GROUP BY types ORDER BY types; types | sumofeventtype -------+---------------- 0 | 449 1 | 433 2 | 75 3 | 4 (4 rows) -- similar to the above, but constant rte is on the right side of the query SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType FROM ( SELECT *, random() FROM ( SELECT "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types" FROM ( SELECT "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events FROM ( (SELECT * FROM (SELECT "events"."user_id", "events"."time", 0 AS event FROM events_table as "events" WHERE event_type IN (1, 2) ) events_subquery_1) UNION (SELECT * FROM (SELECT "events"."user_id", "events"."time", 1 AS event FROM events_table as "events" WHERE event_type IN (3, 4) ) events_subquery_2) UNION (SELECT * FROM (SELECT "events"."user_id", "events"."time", 2 AS event FROM events_table as "events" WHERE event_type IN (5, 6) ) events_subquery_3) UNION (SELECT * FROM (SELECT 1 as user_id, now(), 3 AS event ) events_subquery_4)) t1 GROUP BY "t1"."user_id") AS t) "q" INNER JOIN (SELECT 1 as user_id) AS t ON (t.user_id = q.user_id)) as final_query GROUP BY types ORDER BY types; types | sumofeventtype -------+---------------- 0 | 4 1 | 8 2 | 1 3 | 1 (4 rows)