diff --git a/src/backend/distributed/planner/recursive_planning.c b/src/backend/distributed/planner/recursive_planning.c index 28b856b2c..779ee79fa 100644 --- a/src/backend/distributed/planner/recursive_planning.c +++ b/src/backend/distributed/planner/recursive_planning.c @@ -878,12 +878,14 @@ RecursivelyPlanDistributedJoinNode(Node *node, Query *query, else if (distributedRte->rtekind == RTE_SUBQUERY) { /* - * XXX: Similar to JoinExpr, we don't know how to recursively plan distributed - * subqueries within join expressions yet. + * We don't try logging the subquery here because RecursivelyPlanSubquery + * will anyway do so if the query doesn't reference the outer query. */ - ereport(DEBUG4, (errmsg("recursive planner cannot plan distributed " - "subqueries within join expressions yet"))); - return; + ereport(DEBUG1, (errmsg("recursively planning the distributed subquery " + "since it is part of a distributed join node " + "that is outer joined with a recurring rel"))); + + RecursivelyPlanSubquery(distributedRte->subquery, recursivePlanningContext); } else { diff --git a/src/test/regress/expected/cte_inline.out b/src/test/regress/expected/cte_inline.out index 7e46227d2..39d48e915 100644 --- a/src/test/regress/expected/cte_inline.out +++ b/src/test/regress/expected/cte_inline.out @@ -1357,22 +1357,17 @@ DEBUG: Router planner cannot handle multi-shard select queries { "8" : "test18", "8" : "test28", "8" : "test38", "8" : "test48", "8" : "test58", "8" : "test68", "8" : "test78", "8" : "test8", "8" : "test88", "8" : "test98" } (1 row) --- this test can only work if the CTE is recursively --- planned WITH b AS (SELECT * FROM test_table) SELECT count(*) FROM (SELECT key as x FROM test_table OFFSET 0) as ref LEFT JOIN b ON (ref.x = b.key); DEBUG: CTE b is going to be inlined via distributed planning DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries DEBUG: generating subplan XXX_1 for subquery SELECT key AS x FROM cte_inline.test_table OFFSET 0 -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.x FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) ref LEFT JOIN (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table) b ON ((ref.x OPERATOR(pg_catalog.=) b.key))) +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for CTE b: SELECT key, value, other_value FROM cte_inline.test_table -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_2 for subquery SELECT key AS x FROM cte_inline.test_table OFFSET 0 -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.x FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) ref LEFT JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) b ON ((ref.x OPERATOR(pg_catalog.=) b.key))) +DEBUG: generating subplan XXX_2 for subquery SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.x FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) ref LEFT JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) b ON ((ref.x OPERATOR(pg_catalog.=) b.key))) DEBUG: Creating router plan count --------------------------------------------------------------------- @@ -1397,8 +1392,6 @@ DEBUG: Router planner cannot handle multi-shard select queries 480 (1 row) --- cte a has to be recursively planned because of OFFSET 0 --- after that, cte b also requires recursive planning WITH a AS (SELECT * FROM test_table OFFSET 0), b AS (SELECT * FROM test_table) SELECT min(a.key) FROM a LEFT JOIN b ON (a.value = b.value); @@ -1407,13 +1400,10 @@ DEBUG: CTE b is going to be inlined via distributed planning DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries DEBUG: generating subplan XXX_1 for subquery SELECT key, value, other_value FROM cte_inline.test_table OFFSET 0 -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT min(a.key) AS min FROM ((SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) a LEFT JOIN (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table) b ON ((a.value OPERATOR(pg_catalog.=) b.value))) -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for CTE a: SELECT key, value, other_value FROM cte_inline.test_table OFFSET 0 -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_2 for CTE b: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_2 for subquery SELECT key, value, other_value FROM cte_inline.test_table DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT min(a.key) AS min FROM ((SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) a LEFT JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) b ON ((a.value OPERATOR(pg_catalog.=) b.value))) DEBUG: Creating router plan min @@ -1450,14 +1440,11 @@ DEBUG: CTE cte_2 is going to be inlined via distributed planning DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries DEBUG: generating subplan XXX_1 for subquery SELECT value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 3) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT value FROM ((SELECT test_table.value FROM cte_inline.test_table WHERE (test_table.key OPERATOR(pg_catalog.>) 1)) cte_1 FULL JOIN (SELECT intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_2 USING (value)) ORDER BY value DESC LIMIT 3 +DEBUG: recursively planning left side of the full join since the other side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 1) -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_2 for CTE cte_2: SELECT value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 3) -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT value FROM ((SELECT intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_1 FULL JOIN (SELECT intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_2 USING (value)) ORDER BY value DESC LIMIT 3 +DEBUG: generating subplan XXX_2 for subquery SELECT value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 1) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT value FROM ((SELECT intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_1 FULL JOIN (SELECT intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_2 USING (value)) ORDER BY value DESC LIMIT 3 DEBUG: Creating router plan value --------------------------------------------------------------------- diff --git a/src/test/regress/expected/cte_inline_0.out b/src/test/regress/expected/cte_inline_0.out index ab2b91791..a727d4d21 100644 --- a/src/test/regress/expected/cte_inline_0.out +++ b/src/test/regress/expected/cte_inline_0.out @@ -1357,22 +1357,17 @@ DEBUG: Router planner cannot handle multi-shard select queries { "8" : "test18", "8" : "test28", "8" : "test38", "8" : "test48", "8" : "test58", "8" : "test68", "8" : "test78", "8" : "test8", "8" : "test88", "8" : "test98" } (1 row) --- this test can only work if the CTE is recursively --- planned WITH b AS (SELECT * FROM test_table) SELECT count(*) FROM (SELECT key as x FROM test_table OFFSET 0) as ref LEFT JOIN b ON (ref.x = b.key); DEBUG: CTE b is going to be inlined via distributed planning DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries DEBUG: generating subplan XXX_1 for subquery SELECT key AS x FROM cte_inline.test_table OFFSET 0 -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.x FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) ref LEFT JOIN (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table) b ON ((ref.x OPERATOR(pg_catalog.=) b.key))) +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for CTE b: SELECT key, value, other_value FROM cte_inline.test_table -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_2 for subquery SELECT key AS x FROM cte_inline.test_table OFFSET 0 -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.x FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) ref LEFT JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) b ON ((ref.x OPERATOR(pg_catalog.=) b.key))) +DEBUG: generating subplan XXX_2 for subquery SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.x FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer)) ref LEFT JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) b ON ((ref.x OPERATOR(pg_catalog.=) b.key))) DEBUG: Creating router plan count --------------------------------------------------------------------- @@ -1397,8 +1392,6 @@ DEBUG: Router planner cannot handle multi-shard select queries 480 (1 row) --- cte a has to be recursively planned because of OFFSET 0 --- after that, cte b also requires recursive planning WITH a AS (SELECT * FROM test_table OFFSET 0), b AS (SELECT * FROM test_table) SELECT min(a.key) FROM a LEFT JOIN b ON (a.value = b.value); @@ -1407,13 +1400,10 @@ DEBUG: CTE b is going to be inlined via distributed planning DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries DEBUG: generating subplan XXX_1 for subquery SELECT key, value, other_value FROM cte_inline.test_table OFFSET 0 -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT min(a.key) AS min FROM ((SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) a LEFT JOIN (SELECT test_table.key, test_table.value, test_table.other_value FROM cte_inline.test_table) b ON ((a.value OPERATOR(pg_catalog.=) b.value))) -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for CTE a: SELECT key, value, other_value FROM cte_inline.test_table OFFSET 0 -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_2 for CTE b: SELECT key, value, other_value FROM cte_inline.test_table +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel DEBUG: Router planner cannot handle multi-shard select queries +DEBUG: generating subplan XXX_2 for subquery SELECT key, value, other_value FROM cte_inline.test_table DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT min(a.key) AS min FROM ((SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) a LEFT JOIN (SELECT intermediate_result.key, intermediate_result.value, intermediate_result.other_value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(key integer, value text, other_value jsonb)) b ON ((a.value OPERATOR(pg_catalog.=) b.value))) DEBUG: Creating router plan min @@ -1450,14 +1440,11 @@ DEBUG: CTE cte_2 is going to be inlined via distributed planning DEBUG: Router planner cannot handle multi-shard select queries DEBUG: Router planner cannot handle multi-shard select queries DEBUG: generating subplan XXX_1 for subquery SELECT value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 3) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT value FROM ((SELECT test_table.value FROM cte_inline.test_table WHERE (test_table.key OPERATOR(pg_catalog.>) 1)) cte_1 FULL JOIN (SELECT intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_2 USING (value)) ORDER BY value DESC LIMIT 3 +DEBUG: recursively planning left side of the full join since the other side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 1) -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: generating subplan XXX_2 for CTE cte_2: SELECT value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 3) -DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT value FROM ((SELECT intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_1 FULL JOIN (SELECT intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_2 USING (value)) ORDER BY value DESC LIMIT 3 +DEBUG: generating subplan XXX_2 for subquery SELECT value FROM cte_inline.test_table WHERE (key OPERATOR(pg_catalog.>) 1) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT value FROM ((SELECT intermediate_result.value FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_1 FULL JOIN (SELECT intermediate_result.value FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value text)) cte_2 USING (value)) ORDER BY value DESC LIMIT 3 DEBUG: Creating router plan value --------------------------------------------------------------------- diff --git a/src/test/regress/expected/multi_insert_select_non_pushable_queries.out b/src/test/regress/expected/multi_insert_select_non_pushable_queries.out index 222157a87..fc3f62385 100644 --- a/src/test/regress/expected/multi_insert_select_non_pushable_queries.out +++ b/src/test/regress/expected/multi_insert_select_non_pushable_queries.out @@ -107,7 +107,7 @@ FROM ( ERROR: the query contains a join that requires repartitioning HINT: Set citus.enable_repartition_joins to on to enable repartitioning -- the LEFT JOIN conditon is not on the partition column (i.e., is it part_key divided by 2) --- still, recursive planning will kick in to plan some part of the query +-- but, we can plan the query thanks to recursive planning SET client_min_messages TO DEBUG1; INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg ) SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event) @@ -145,14 +145,22 @@ FROM ( AND e.event_type IN (106, 107, 108) ) t2 ON (t1.user_id = (t2.user_id)/2) GROUP BY t1.user_id, hasdone_event -) t GROUP BY user_id, hasdone_event; +) t GROUP BY user_id, hasdone_event +RETURNING user_id, value_1_agg, value_2_agg; DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries DEBUG: generating subplan XXX_1 for subquery SELECT u.user_id, 'step=>1'::text AS event, e."time" FROM public.users_table u, public.events_table e WHERE ((u.user_id OPERATOR(pg_catalog.=) e.user_id) AND (u.user_id OPERATOR(pg_catalog.>=) 10) AND (u.user_id OPERATOR(pg_catalog.<=) 25) AND (e.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[100, 101, 102]))) DEBUG: generating subplan XXX_2 for subquery SELECT u.user_id, 'step=>2'::text AS event, e."time" FROM public.users_table u, public.events_table e WHERE ((u.user_id OPERATOR(pg_catalog.=) e.user_id) AND (u.user_id OPERATOR(pg_catalog.>=) 10) AND (u.user_id OPERATOR(pg_catalog.<=) 25) AND (e.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[103, 104, 105]))) DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT intermediate_result.user_id, intermediate_result.event, intermediate_result."time" FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event text, "time" timestamp without time zone) UNION SELECT intermediate_result.user_id, intermediate_result.event, intermediate_result."time" FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event text, "time" timestamp without time zone) DEBUG: generating subplan XXX_1 for subquery SELECT u.user_id, 'step=>1'::text AS event, e."time" FROM public.users_table u, public.events_table e WHERE ((u.user_id OPERATOR(pg_catalog.=) e.user_id) AND (u.user_id OPERATOR(pg_catalog.>=) 10) AND (u.user_id OPERATOR(pg_catalog.<=) 25) AND (e.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[100, 101, 102]))) UNION SELECT u.user_id, 'step=>2'::text AS event, e."time" FROM public.users_table u, public.events_table e WHERE ((u.user_id OPERATOR(pg_catalog.=) e.user_id) AND (u.user_id OPERATOR(pg_catalog.>=) 10) AND (u.user_id OPERATOR(pg_catalog.<=) 25) AND (e.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[103, 104, 105]))) -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: generating subplan XXX_2 for subquery SELECT DISTINCT user_id, 'Has done event'::text AS hasdone_event FROM public.events_table e WHERE ((user_id OPERATOR(pg_catalog.>=) 10) AND (user_id OPERATOR(pg_catalog.<=) 25) AND (event_type OPERATOR(pg_catalog.=) ANY (ARRAY[106, 107, 108]))) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, int4(sum(array_length(events_table, 1))) AS value_1_agg, length(hasdone_event) AS value_2_agg FROM (SELECT t1.user_id, array_agg(t1.event ORDER BY t1."time") AS events_table, COALESCE(t2.hasdone_event, 'Has not done event'::text) AS hasdone_event FROM ((SELECT intermediate_result.user_id, intermediate_result.event, intermediate_result."time" FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event text, "time" timestamp without time zone)) t1 LEFT JOIN (SELECT intermediate_result.user_id, intermediate_result.hasdone_event FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, hasdone_event text)) t2 ON ((t1.user_id OPERATOR(pg_catalog.=) (t2.user_id OPERATOR(pg_catalog./) 2)))) GROUP BY t1.user_id, t2.hasdone_event) t GROUP BY user_id, hasdone_event +DEBUG: Collecting INSERT ... SELECT results on coordinator + user_id | value_1_agg | value_2_agg +--------------------------------------------------------------------- +(0 rows) + RESET client_min_messages; --------------------------------------------------------------------- --------------------------------------------------------------------- @@ -229,7 +237,7 @@ ORDER BY ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- not pushable since the JOIN condition is not equi JOIN -- (subquery_1 JOIN subquery_2) --- still, recursive planning will kick in +-- but, we can plan the query thanks to recursive planning SET client_min_messages TO DEBUG1; INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg) SELECT @@ -295,14 +303,22 @@ WHERE GROUP BY count_pay, user_id ORDER BY - count_pay; + count_pay +RETURNING user_id, value_1_agg, value_2_agg; DEBUG: Set operations are not allowed in distributed INSERT ... SELECT queries DEBUG: generating subplan XXX_1 for subquery SELECT users_table.user_id, 'action=>1'::text AS event, events_table."time" FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (users_table.user_id OPERATOR(pg_catalog.>=) 10) AND (users_table.user_id OPERATOR(pg_catalog.<=) 70) AND (events_table.event_type OPERATOR(pg_catalog.>) 10) AND (events_table.event_type OPERATOR(pg_catalog.<) 12)) DEBUG: generating subplan XXX_2 for subquery SELECT users_table.user_id, 'action=>2'::text AS event, events_table."time" FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (users_table.user_id OPERATOR(pg_catalog.>=) 10) AND (users_table.user_id OPERATOR(pg_catalog.<=) 70) AND (events_table.event_type OPERATOR(pg_catalog.>) 12) AND (events_table.event_type OPERATOR(pg_catalog.<) 14)) DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT intermediate_result.user_id, intermediate_result.event, intermediate_result."time" FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event text, "time" timestamp without time zone) UNION SELECT intermediate_result.user_id, intermediate_result.event, intermediate_result."time" FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event text, "time" timestamp without time zone) DEBUG: generating subplan XXX_1 for subquery SELECT users_table.user_id, 'action=>1'::text AS event, events_table."time" FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (users_table.user_id OPERATOR(pg_catalog.>=) 10) AND (users_table.user_id OPERATOR(pg_catalog.<=) 70) AND (events_table.event_type OPERATOR(pg_catalog.>) 10) AND (events_table.event_type OPERATOR(pg_catalog.<) 12)) UNION SELECT users_table.user_id, 'action=>2'::text AS event, events_table."time" FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (users_table.user_id OPERATOR(pg_catalog.>=) 10) AND (users_table.user_id OPERATOR(pg_catalog.<=) 70) AND (events_table.event_type OPERATOR(pg_catalog.>) 12) AND (events_table.event_type OPERATOR(pg_catalog.<) 14)) -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: generating subplan XXX_2 for subquery SELECT user_id, count(*) AS count_pay FROM public.users_table WHERE ((user_id OPERATOR(pg_catalog.>=) 10) AND (user_id OPERATOR(pg_catalog.<=) 70) AND (value_1 OPERATOR(pg_catalog.>) 15) AND (value_1 OPERATOR(pg_catalog.<) 17)) GROUP BY user_id HAVING (count(*) OPERATOR(pg_catalog.>) 1) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, int4(avg(array_length(events_table, 1))) AS value_1_agg, int4(count_pay) AS value_2_agg FROM (SELECT subquery_1.user_id, array_agg(subquery_1.event ORDER BY subquery_1."time") AS events_table, COALESCE(subquery_2.count_pay, (0)::bigint) AS count_pay FROM ((SELECT intermediate_result.user_id, intermediate_result.event, intermediate_result."time" FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, event text, "time" timestamp without time zone)) subquery_1 LEFT JOIN (SELECT intermediate_result.user_id, intermediate_result.count_pay FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, count_pay bigint)) subquery_2 ON ((subquery_1.user_id OPERATOR(pg_catalog.>) subquery_2.user_id))) GROUP BY subquery_1.user_id, subquery_2.count_pay) subquery_top WHERE (array_ndims(events_table) OPERATOR(pg_catalog.>) 0) GROUP BY count_pay, user_id ORDER BY count_pay +DEBUG: Collecting INSERT ... SELECT results on coordinator + user_id | value_1_agg | value_2_agg +--------------------------------------------------------------------- +(0 rows) + RESET client_min_messages; --------------------------------------------------------------------- --------------------------------------------------------------------- diff --git a/src/test/regress/expected/multi_subquery_complex_reference_clause.out b/src/test/regress/expected/multi_subquery_complex_reference_clause.out index 22a64ccb5..6fe7c1570 100644 --- a/src/test/regress/expected/multi_subquery_complex_reference_clause.out +++ b/src/test/regress/expected/multi_subquery_complex_reference_clause.out @@ -679,8 +679,14 @@ SELECT * FROM WHERE user_id > 2 and value_2 = 1) as foo_in ON (event_val_2 = user_id)) as foo LEFT JOIN (SELECT user_id as user_user_id FROM users_table) as fooo ON (user_id = user_user_id)) as bar; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + user_id +--------------------------------------------------------------------- + + 3 + 5 + 4 +(4 rows) + -- we could even support the following where the subquery -- on the outer part of the left join contains a reference table SELECT max(events_all.cnt), events_all.usr_id @@ -1126,7 +1132,7 @@ count(*) AS cnt, "generated_group_field" 84 | 0 (6 rows) - -- RIGHT JOINs used with INNER JOINs should error out since reference table exist in the + -- RIGHT JOINs used with INNER JOINs should work even if the reference table exist in the -- right side of the RIGHT JOIN. SELECT count(*) AS cnt, "generated_group_field" @@ -1164,8 +1170,16 @@ count(*) AS cnt, "generated_group_field" ORDER BY cnt DESC, generated_group_field ASC LIMIT 10; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + cnt | generated_group_field +--------------------------------------------------------------------- + 1007 | 2 + 952 | 5 + 773 | 1 + 696 | 3 + 433 | 4 + 190 | 0 +(6 rows) + -- right join where the inner part of the join includes a reference table -- joined with hash partitioned table using non-equi join SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event), hasdone_event @@ -1316,8 +1330,27 @@ FROM ORDER BY user_id limit 50; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + user_id | lastseen +--------------------------------------------------------------------- + 1 | + 1 | + 2 | Thu Nov 23 17:26:14.563216 2017 + 2 | Thu Nov 23 17:26:14.563216 2017 + 2 | Thu Nov 23 17:26:14.563216 2017 + 3 | Thu Nov 23 18:08:26.550729 2017 + 3 | Thu Nov 23 18:08:26.550729 2017 + 3 | Thu Nov 23 18:08:26.550729 2017 + 4 | + 4 | + 4 | + 5 | + 5 | + 5 | + 5 | + 5 | + 6 | +(17 rows) + -- -- UNIONs and JOINs with reference tables, should error out -- @@ -1553,9 +1586,6 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS c 67 (1 row) --- we could not push this query not due to non colocated --- subqueries (i.e., they are recursively planned) --- but due to outer join restrictions SELECT count(*) AS cnt, "generated_group_field" FROM @@ -1593,9 +1623,20 @@ count(*) AS cnt, "generated_group_field" cnt DESC, generated_group_field ASC LIMIT 10; DEBUG: generating subplan XXX_1 for subquery SELECT user_id, value_2 AS generated_group_field FROM public.users_table users -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS cnt, generated_group_field FROM (SELECT "eventQuery".user_id, random() AS random, "eventQuery".generated_group_field FROM (SELECT multi_group_wrapper_1."time", multi_group_wrapper_1.event_user_id, multi_group_wrapper_1.user_id, left_group_by_1.generated_group_field, random() AS random FROM ((SELECT temp_data_queries."time", temp_data_queries.event_user_id, user_filters_1.user_id FROM ((SELECT events."time", events.user_id AS event_user_id FROM public.events_table events WHERE (events.user_id OPERATOR(pg_catalog.>) 2)) temp_data_queries JOIN (SELECT users.user_id FROM public.users_reference_table users WHERE ((users.user_id OPERATOR(pg_catalog.>) 2) AND (users.value_2 OPERATOR(pg_catalog.=) 5))) user_filters_1 ON ((temp_data_queries.event_user_id OPERATOR(pg_catalog.<) user_filters_1.user_id)))) multi_group_wrapper_1 RIGHT JOIN (SELECT intermediate_result.user_id, intermediate_result.generated_group_field FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, generated_group_field integer)) left_group_by_1 ON ((left_group_by_1.user_id OPERATOR(pg_catalog.>) multi_group_wrapper_1.event_user_id)))) "eventQuery") "pushedDownQuery" GROUP BY generated_group_field ORDER BY (count(*)) DESC, generated_group_field LIMIT 10 -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table +DEBUG: recursively planning left side of the right join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: generating subplan XXX_2 for subquery SELECT temp_data_queries."time", temp_data_queries.event_user_id, user_filters_1.user_id FROM ((SELECT events."time", events.user_id AS event_user_id FROM public.events_table events WHERE (events.user_id OPERATOR(pg_catalog.>) 2)) temp_data_queries JOIN (SELECT users.user_id FROM public.users_reference_table users WHERE ((users.user_id OPERATOR(pg_catalog.>) 2) AND (users.value_2 OPERATOR(pg_catalog.=) 5))) user_filters_1 ON ((temp_data_queries.event_user_id OPERATOR(pg_catalog.<) user_filters_1.user_id))) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS cnt, generated_group_field FROM (SELECT "eventQuery".user_id, random() AS random, "eventQuery".generated_group_field FROM (SELECT multi_group_wrapper_1."time", multi_group_wrapper_1.event_user_id, multi_group_wrapper_1.user_id, left_group_by_1.generated_group_field, random() AS random FROM ((SELECT intermediate_result."time", intermediate_result.event_user_id, intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result("time" timestamp without time zone, event_user_id integer, user_id integer)) multi_group_wrapper_1 RIGHT JOIN (SELECT intermediate_result.user_id, intermediate_result.generated_group_field FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, generated_group_field integer)) left_group_by_1 ON ((left_group_by_1.user_id OPERATOR(pg_catalog.>) multi_group_wrapper_1.event_user_id)))) "eventQuery") "pushedDownQuery" GROUP BY generated_group_field ORDER BY (count(*)) DESC, generated_group_field LIMIT 10 + cnt | generated_group_field +--------------------------------------------------------------------- + 2042 | 1 + 1675 | 2 + 1470 | 4 + 1259 | 3 + 941 | 0 + 686 | 5 +(6 rows) + RESET client_min_messages; -- two hash partitioned relations are not joined -- on partiton keys although reference table is fine @@ -2132,12 +2173,14 @@ SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INN 6 (1 row) --- one example where unsupported outer join is deep inside a subquery +-- one example where supported outer join is deep inside a subquery SELECT *, random() FROM ( SELECT *,random() FROM user_buy_test_table WHERE user_id > ( SELECT count(*) FROM (SELECT *,random() FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as bar) as foo LEFT JOIN (SELECT *, random() FROM (SELECT *,random() FROM user_buy_test_table d1 JOIN user_buy_test_table d2 USING (user_id)) as bar_inner ) as bar ON true)) as boo; -ERROR: cannot pushdown the subquery -DETAIL: There exist a reference table in the outer part of the outer join + user_id | item_id | buy_count | random | random +--------------------------------------------------------------------- +(0 rows) + SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN user_buy_test_table u1 on ref1.id = u1.user_id) as foo LEFT JOIN user_buy_test_table ON (foo.id = user_buy_test_table.user_id); count --------------------------------------------------------------------- diff --git a/src/test/regress/expected/multi_subquery_in_where_clause.out b/src/test/regress/expected/multi_subquery_in_where_clause.out index b314f72d5..834cef505 100644 --- a/src/test/regress/expected/multi_subquery_in_where_clause.out +++ b/src/test/regress/expected/multi_subquery_in_where_clause.out @@ -534,8 +534,13 @@ WHERE GROUP BY user_id HAVING count(*) > 1 AND sum(value_2) > 29 ORDER BY 1; -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table + user_id +--------------------------------------------------------------------- + 2 + 3 + 4 +(3 rows) + -- NOT EXISTS query has non-equi join SELECT user_id, array_length(events_table, 1) FROM ( diff --git a/src/test/regress/expected/non_colocated_subquery_joins.out b/src/test/regress/expected/non_colocated_subquery_joins.out index a4d66aba2..d03d4ecf3 100644 --- a/src/test/regress/expected/non_colocated_subquery_joins.out +++ b/src/test/regress/expected/non_colocated_subquery_joins.out @@ -715,8 +715,6 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS c (1 row) -- recursive planning should kick in for outer joins as well --- but this time recursive planning might convert the query --- into a not supported join SELECT true AS valid FROM explain_json_2($$ SELECT @@ -729,8 +727,15 @@ SELECT true AS valid FROM explain_json_2($$ $$); DEBUG: function does not have co-located tables DEBUG: generating subplan XXX_1 for subquery SELECT value_2, random() AS random FROM public.users_table -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT users_table.value_2, random() AS random FROM public.users_table) u1 RIGHT JOIN (SELECT intermediate_result.value_2, intermediate_result.random FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer, random double precision)) u2 USING (value_2)) -ERROR: cannot pushdown the subquery +DEBUG: recursively planning left side of the right join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: generating subplan XXX_2 for subquery SELECT value_2, random() AS random FROM public.users_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.value_2, intermediate_result.random FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer, random double precision)) u1 RIGHT JOIN (SELECT intermediate_result.value_2, intermediate_result.random FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer, random double precision)) u2 USING (value_2)) + valid +--------------------------------------------------------------------- + t +(1 row) + -- set operations may produce not very efficient plans -- although we could have picked a as our anchor subquery, -- we pick foo in this case and recursively plan a diff --git a/src/test/regress/expected/subqueries_not_supported.out b/src/test/regress/expected/subqueries_not_supported.out index b69256866..1a5895a54 100644 --- a/src/test/regress/expected/subqueries_not_supported.out +++ b/src/test/regress/expected/subqueries_not_supported.out @@ -70,19 +70,34 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id FROM ERROR: cannot handle complex subqueries when the router executor is disabled SET citus.enable_router_execution TO true; -- OUTER JOINs where the outer part is recursively planned and not the other way --- around is not supported +-- around are supported SELECT foo.value_2 +INTO result_table FROM - (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4) LIMIT 5) as foo + (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4) ORDER BY users_table.value_2 LIMIT 5) as foo LEFT JOIN (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar ON(foo.value_2 = bar.value_2); DEBUG: push down of limit count: 5 -DEBUG: generating subplan XXX_1 for subquery SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4]))) LIMIT 5 -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT foo.value_2 FROM ((SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) foo LEFT JOIN (SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8])))) bar ON ((foo.value_2 OPERATOR(pg_catalog.=) bar.value_2))) -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table +DEBUG: generating subplan XXX_1 for subquery SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2, 3, 4]))) ORDER BY users_table.value_2 LIMIT 5 +DEBUG: recursively planning right side of the left join since the outer side is a recurring rel +DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel +DEBUG: generating subplan XXX_2 for subquery SELECT users_table.value_2 FROM public.users_table, public.events_table WHERE ((users_table.user_id OPERATOR(pg_catalog.=) events_table.user_id) AND (events_table.event_type OPERATOR(pg_catalog.=) ANY (ARRAY[5, 6, 7, 8]))) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT foo.value_2 FROM ((SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) foo LEFT JOIN (SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer)) bar ON ((foo.value_2 OPERATOR(pg_catalog.=) bar.value_2))) +SELECT COUNT(*) = 60 FROM result_table WHERE value_2 = 0; + ?column? +--------------------------------------------------------------------- + t +(1 row) + +SELECT COUNT(*) = 0 FROM result_table WHERE value_2 != 0; + ?column? +--------------------------------------------------------------------- + t +(1 row) + +DROP TABLE result_table; -- We do not support GROUPING SETS in subqueries -- This also includes ROLLUP or CUBE clauses SELECT * FROM (SELECT user_id, value_1 FROM users_table GROUP BY GROUPING SETS ((user_id), (value_1))) s; diff --git a/src/test/regress/expected/with_basics.out b/src/test/regress/expected/with_basics.out index 0d0070fd6..4eefb8837 100644 --- a/src/test/regress/expected/with_basics.out +++ b/src/test/regress/expected/with_basics.out @@ -840,8 +840,18 @@ LEFT JOIN WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id ORDER BY 2 DESC, 1 DESC; -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table + user_id | user_id +--------------------------------------------------------------------- + 7 | + 6 | + 4 | + 2 | + 8 | 8 + 5 | 5 + 3 | 3 + 1 | 1 +(8 rows) + -- similar query as the above, but this time -- use NOT EXITS, which is pretty common struct WITH distinct_undistribured AS @@ -855,9 +865,20 @@ LEFT JOIN WHERE NOT EXISTS (SELECT NULL FROM distinct_undistribured - WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id; -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table + WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id +ORDER BY 1,2; + user_id | user_id +--------------------------------------------------------------------- + 1 | + 2 | + 3 | + 4 | + 5 | + 6 | + 7 | + 8 | +(8 rows) + -- same NOT EXISTS struct, but with CTE -- so should work WITH distinct_undistribured AS ( @@ -905,8 +926,11 @@ LEFT JOIN (SELECT NULL FROM distinct_undistribured d2 WHERE d1.user_id = d2.user_id )) AS bar USING (user_id); -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table + count +--------------------------------------------------------------------- + 8 +(1 row) + -- should work fine with materialized ctes WITH distinct_undistribured AS MATERIALIZED ( SELECT DISTINCT user_id @@ -951,8 +975,18 @@ LEFT JOIN WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id ORDER BY 2 DESC, 1 DESC; -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table + user_id | user_id +--------------------------------------------------------------------- + 7 | + 6 | + 4 | + 2 | + 8 | 8 + 5 | 5 + 3 | 3 + 1 | 1 +(8 rows) + WITH distinct_undistribured AS MATERIALIZED (SELECT DISTINCT user_id FROM test_cte) @@ -964,9 +998,20 @@ LEFT JOIN WHERE NOT EXISTS (SELECT NULL FROM distinct_undistribured - WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id; -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table + WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id +ORDER BY 1,2; + user_id | user_id +--------------------------------------------------------------------- + 1 | + 2 | + 3 | + 4 | + 5 | + 6 | + 7 | + 8 | +(8 rows) + -- NOT EXISTS struct, with cte inlining disabled WITH distinct_undistribured AS MATERIALIZED( SELECT DISTINCT user_id @@ -1013,8 +1058,11 @@ LEFT JOIN (SELECT NULL FROM distinct_undistribured d2 WHERE d1.user_id = d2.user_id )) AS bar USING (user_id); -ERROR: cannot pushdown the subquery -DETAIL: Complex subqueries, CTEs and local tables cannot be in the outer part of an outer join with a distributed table + count +--------------------------------------------------------------------- + 8 +(1 row) + -- some test with failures WITH a AS MATERIALIZED (SELECT * FROM users_table LIMIT 10) SELECT user_id/0 FROM users_table JOIN a USING (user_id); diff --git a/src/test/regress/sql/cte_inline.sql b/src/test/regress/sql/cte_inline.sql index ab11c3749..862a8510b 100644 --- a/src/test/regress/sql/cte_inline.sql +++ b/src/test/regress/sql/cte_inline.sql @@ -566,8 +566,6 @@ EXECUTE retry_planning(4); EXECUTE retry_planning(5); EXECUTE retry_planning(6); --- this test can only work if the CTE is recursively --- planned WITH b AS (SELECT * FROM test_table) SELECT count(*) FROM (SELECT key as x FROM test_table OFFSET 0) as ref LEFT JOIN b ON (ref.x = b.key); @@ -578,8 +576,6 @@ WITH a AS (SELECT * FROM test_table), b AS (SELECT * FROM test_table) SELECT count(*) FROM a LEFT JOIN b ON (a.value = b.value); --- cte a has to be recursively planned because of OFFSET 0 --- after that, cte b also requires recursive planning WITH a AS (SELECT * FROM test_table OFFSET 0), b AS (SELECT * FROM test_table) SELECT min(a.key) FROM a LEFT JOIN b ON (a.value = b.value); diff --git a/src/test/regress/sql/multi_insert_select_non_pushable_queries.sql b/src/test/regress/sql/multi_insert_select_non_pushable_queries.sql index 399883ed9..b4654144b 100644 --- a/src/test/regress/sql/multi_insert_select_non_pushable_queries.sql +++ b/src/test/regress/sql/multi_insert_select_non_pushable_queries.sql @@ -112,7 +112,7 @@ FROM ( ) t GROUP BY user_id, hasdone_event; -- the LEFT JOIN conditon is not on the partition column (i.e., is it part_key divided by 2) --- still, recursive planning will kick in to plan some part of the query +-- but, we can plan the query thanks to recursive planning SET client_min_messages TO DEBUG1; INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg ) @@ -153,7 +153,8 @@ FROM ( ) t2 ON (t1.user_id = (t2.user_id)/2) GROUP BY t1.user_id, hasdone_event -) t GROUP BY user_id, hasdone_event; +) t GROUP BY user_id, hasdone_event +RETURNING user_id, value_1_agg, value_2_agg; RESET client_min_messages; ------------------------------------ @@ -232,7 +233,7 @@ ORDER BY -- not pushable since the JOIN condition is not equi JOIN -- (subquery_1 JOIN subquery_2) --- still, recursive planning will kick in +-- but, we can plan the query thanks to recursive planning SET client_min_messages TO DEBUG1; INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg) SELECT @@ -298,7 +299,8 @@ WHERE GROUP BY count_pay, user_id ORDER BY - count_pay; + count_pay +RETURNING user_id, value_1_agg, value_2_agg; RESET client_min_messages; ------------------------------------ diff --git a/src/test/regress/sql/multi_subquery_complex_reference_clause.sql b/src/test/regress/sql/multi_subquery_complex_reference_clause.sql index 64ff78c48..b722bd8f9 100644 --- a/src/test/regress/sql/multi_subquery_complex_reference_clause.sql +++ b/src/test/regress/sql/multi_subquery_complex_reference_clause.sql @@ -723,7 +723,7 @@ count(*) AS cnt, "generated_group_field" cnt DESC, generated_group_field ASC LIMIT 10; - -- RIGHT JOINs used with INNER JOINs should error out since reference table exist in the + -- RIGHT JOINs used with INNER JOINs should work even if the reference table exist in the -- right side of the RIGHT JOIN. SELECT count(*) AS cnt, "generated_group_field" @@ -1097,9 +1097,6 @@ SELECT count(*) FROM ON user_buy_test_table.user_id > users_ref_test_table.id) subquery_2 WHERE subquery_1.user_id != subquery_2.user_id ; --- we could not push this query not due to non colocated --- subqueries (i.e., they are recursively planned) --- but due to outer join restrictions SELECT count(*) AS cnt, "generated_group_field" FROM @@ -1449,7 +1446,7 @@ SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INN SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON (user_buy_test_table.user_id > 19); SELECT count(*) FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as foo LEFT JOIN user_buy_test_table ON (foo.id = user_buy_test_table.user_id); --- one example where unsupported outer join is deep inside a subquery +-- one example where supported outer join is deep inside a subquery SELECT *, random() FROM ( SELECT *,random() FROM user_buy_test_table WHERE user_id > ( SELECT count(*) FROM (SELECT *,random() FROM (SELECT ref1.*, random() FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table ref2 on ref1.id = ref2.id) as bar) as foo LEFT JOIN (SELECT *, random() FROM (SELECT *,random() FROM user_buy_test_table d1 JOIN user_buy_test_table d2 USING (user_id)) as bar_inner ) as bar ON true)) as boo; diff --git a/src/test/regress/sql/non_colocated_subquery_joins.sql b/src/test/regress/sql/non_colocated_subquery_joins.sql index e1d25a691..bde8f5b0a 100644 --- a/src/test/regress/sql/non_colocated_subquery_joins.sql +++ b/src/test/regress/sql/non_colocated_subquery_joins.sql @@ -532,8 +532,6 @@ $$); -- recursive planning should kick in for outer joins as well --- but this time recursive planning might convert the query --- into a not supported join SELECT true AS valid FROM explain_json_2($$ SELECT diff --git a/src/test/regress/sql/subqueries_not_supported.sql b/src/test/regress/sql/subqueries_not_supported.sql index 242623a3f..1360a9313 100644 --- a/src/test/regress/sql/subqueries_not_supported.sql +++ b/src/test/regress/sql/subqueries_not_supported.sql @@ -68,15 +68,19 @@ FROM SET citus.enable_router_execution TO true; -- OUTER JOINs where the outer part is recursively planned and not the other way --- around is not supported +-- around are supported SELECT foo.value_2 +INTO result_table FROM - (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4) LIMIT 5) as foo + (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (1,2,3,4) ORDER BY users_table.value_2 LIMIT 5) as foo LEFT JOIN (SELECT users_table.value_2 FROM users_table, events_table WHERE users_table.user_id = events_table.user_id AND event_type IN (5,6,7,8)) as bar ON(foo.value_2 = bar.value_2); +SELECT COUNT(*) = 60 FROM result_table WHERE value_2 = 0; +SELECT COUNT(*) = 0 FROM result_table WHERE value_2 != 0; +DROP TABLE result_table; -- We do not support GROUPING SETS in subqueries -- This also includes ROLLUP or CUBE clauses diff --git a/src/test/regress/sql/with_basics.sql b/src/test/regress/sql/with_basics.sql index 8c0bf2823..ff3c8ebce 100644 --- a/src/test/regress/sql/with_basics.sql +++ b/src/test/regress/sql/with_basics.sql @@ -573,7 +573,8 @@ LEFT JOIN WHERE NOT EXISTS (SELECT NULL FROM distinct_undistribured - WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id; + WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id +ORDER BY 1,2; -- same NOT EXISTS struct, but with CTE -- so should work @@ -658,7 +659,8 @@ LEFT JOIN WHERE NOT EXISTS (SELECT NULL FROM distinct_undistribured - WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id; + WHERE distinct_undistribured.user_id = test_cte_distributed.user_id)) exsist_in_distributed ON distinct_undistribured.user_id = exsist_in_distributed.user_id +ORDER BY 1,2; -- NOT EXISTS struct, with cte inlining disabled WITH distinct_undistribured AS MATERIALIZED(