From ad5ff1d01ae66792a43a3b499619de07f086a089 Mon Sep 17 00:00:00 2001 From: Onder Kalaci Date: Wed, 13 Mar 2019 17:43:49 +0300 Subject: [PATCH] Some queries lead to infinite recursion with recurisve planning The rule for infinite recursion is the following: - If the query contains a subquery which is recursively planned, and no other subqueries can be recursively planned due to correlation (e.g., LATERAL joins), the planner keeps recursing again and again. One interesting thing here is that even if a subquery contains only intermediate result(s), we re-recursively plan that. In the end, the logic in the code does the following: - Try recursive planning any of the subqueries in the query tree - If any subquery is recursively planned, call the planner again where the subquery is replaced with the intermediate result. - Try recursively planning any of the queries - If any subquery is recursively planned, call the planner again where the subquery (in this case it is already intermediate result) is replaced with the intermediate result. - Try recursively planning any of the queries - If any subquery is recursively planned, call the planner again where the subquery (in this case it is already intermediate result) is replaced with the intermediate result. - Try recursively planning any of the queries - If any subquery is recursively planned, call the planner again where the subquery (in this case it is already intermediate result) is replaced with the intermediate result. ...... --- .../planner/query_colocation_checker.c | 13 ++ .../expected/non_colocated_subquery_joins.out | 122 ++++++++++++++++++ .../sql/non_colocated_subquery_joins.sql | 91 +++++++++++++ 3 files changed, 226 insertions(+) diff --git a/src/backend/distributed/planner/query_colocation_checker.c b/src/backend/distributed/planner/query_colocation_checker.c index f3154e168..83fbd6a9c 100644 --- a/src/backend/distributed/planner/query_colocation_checker.c +++ b/src/backend/distributed/planner/query_colocation_checker.c @@ -25,6 +25,7 @@ #include "distributed/query_colocation_checker.h" #include "distributed/pg_dist_partition.h" #include "distributed/relation_restriction_equivalence.h" +#include "distributed/multi_logical_planner.h" /* only to access utility functions */ #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "parser/parsetree.h" @@ -194,6 +195,18 @@ SubqueryColocated(Query *subquery, ColocatedJoinChecker *checker) RelationRestrictionContext *unionedRelationRestrictionContext = NULL; PlannerRestrictionContext *unionedPlannerRestrictionContext = NULL; + /* + * There are no relations in the input subquery, such as a subquery + * that consist of only intermediate results or without FROM + * clause. + */ + if (list_length(filteredRestrictionList) == 0) + { + Assert(!QueryContainsDistributedTableRTE(subquery)); + + return true; + } + /* * We merge the relation restrictions of the input subquery and the anchor * restrictions to form a temporary relation restriction context. The aim of diff --git a/src/test/regress/expected/non_colocated_subquery_joins.out b/src/test/regress/expected/non_colocated_subquery_joins.out index 44c74dfa0..dbac68871 100644 --- a/src/test/regress/expected/non_colocated_subquery_joins.out +++ b/src/test/regress/expected/non_colocated_subquery_joins.out @@ -916,6 +916,128 @@ DEBUG: Plan 97 query after replacing subqueries and CTEs: SELECT count(*) AS co t (1 row) +-- make sure to skip calling recursive planning over and over again +-- for already recursively planned subqueries +SET client_min_messages TO DEBUG2; +SELECT * +FROM + (SELECT * + FROM users_table + OFFSET 0) AS users_table +JOIN LATERAL + (SELECT * + FROM + (SELECT * + FROM events_table + WHERE user_id = users_table.user_id) AS bar + LEFT JOIN users_table u2 ON u2.user_id = bar.value_2) AS foo ON TRUE; +DEBUG: generating subplan 99_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table OFFSET 0 +DEBUG: skipping recursive planning for the subquery since it contains references to outer queries +DEBUG: skipping recursive planning for the subquery since it contains references to outer queries +DEBUG: skipping recursive planning for the subquery since it contains references to outer queries +DEBUG: Plan 99 query after replacing subqueries and CTEs: SELECT users_table.user_id, users_table."time", users_table.value_1, users_table.value_2, users_table.value_3, users_table.value_4, foo.user_id, foo."time", foo.event_type, foo.value_2, foo.value_3, foo.value_4, foo.user_id_1 AS user_id, foo.time_1 AS "time", foo.value_1, foo.value_2_1 AS value_2, foo.value_3_1 AS value_3, foo.value_4_1 AS value_4 FROM ((SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('99_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table JOIN LATERAL (SELECT bar.user_id, bar."time", bar.event_type, bar.value_2, bar.value_3, bar.value_4, u2.user_id, u2."time", u2.value_1, u2.value_2, u2.value_3, u2.value_4 FROM ((SELECT events_table.user_id, events_table."time", events_table.event_type, events_table.value_2, events_table.value_3, events_table.value_4 FROM public.events_table WHERE (events_table.user_id OPERATOR(pg_catalog.=) users_table.user_id)) bar LEFT JOIN public.users_table u2 ON ((u2.user_id OPERATOR(pg_catalog.=) bar.value_2)))) foo(user_id, "time", event_type, value_2, value_3, value_4, user_id_1, time_1, value_1, value_2_1, value_3_1, value_4_1) ON (true)) +DEBUG: skipping recursive planning for the subquery since it contains references to outer queries +DEBUG: skipping recursive planning for the subquery since it contains references to outer queries +DEBUG: skipping recursive planning for the subquery since it contains references to outer queries +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +-- similar to the above, make sure that we skip recursive plannig when +-- the subquery doesn't have any tables +SELECT true AS valid FROM explain_json_2($$ +SELECT * +FROM + (SELECT 1 AS user_id) AS users_table +JOIN LATERAL + (SELECT * + FROM + (SELECT * + FROM events_table + WHERE user_id = users_table.user_id) AS bar + LEFT JOIN users_table u2 ON u2.user_id = bar.value_2) AS foo ON TRUE +$$); +DEBUG: skipping recursive planning for the subquery since it contains references to outer queries +DEBUG: skipping recursive planning for the subquery since it contains references to outer queries +DEBUG: skipping recursive planning for the subquery since it contains references to outer queries +ERROR: cannot perform distributed planning on this query +-- similar to the above, make sure that we skip recursive plannig when +-- the subquery contains only intermediate results +SELECT * +FROM + ( + SELECT * FROM( + SELECT * + FROM users_table + EXCEPT + SELECT * + FROM users_table + WHERE value_1 > 2 + ) AS users_table_union + ) AS users_table_limited +JOIN LATERAL + (SELECT * + FROM + (SELECT * + FROM + (SELECT * + FROM events_table WHERE value_3 > 4 + INTERSECT + SELECT * + FROM events_table + WHERE value_2 > 2 + ) AS events_table + WHERE user_id = users_table_limited.user_id) AS bar + LEFT JOIN users_table u2 ON u2.user_id = bar.value_2) AS foo ON TRUE; +DEBUG: generating subplan 102_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table +DEBUG: generating subplan 102_2 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table WHERE (value_1 OPERATOR(pg_catalog.>) 2) +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 102_3 for subquery SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('102_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint) EXCEPT SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('102_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint) +DEBUG: generating subplan 102_4 for subquery SELECT user_id, "time", event_type, value_2, value_3, value_4 FROM public.events_table WHERE (value_3 OPERATOR(pg_catalog.>) (4)::double precision) +DEBUG: generating subplan 102_5 for subquery SELECT user_id, "time", event_type, value_2, value_3, value_4 FROM public.events_table WHERE (value_2 OPERATOR(pg_catalog.>) 2) +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 102_6 for subquery SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.event_type, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('102_4'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, event_type integer, value_2 integer, value_3 double precision, value_4 bigint) INTERSECT SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.event_type, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('102_5'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, event_type integer, value_2 integer, value_3 double precision, value_4 bigint) +DEBUG: Plan 102 query after replacing subqueries and CTEs: SELECT users_table_limited.user_id, users_table_limited."time", users_table_limited.value_1, users_table_limited.value_2, users_table_limited.value_3, users_table_limited.value_4, foo.user_id, foo."time", foo.event_type, foo.value_2, foo.value_3, foo.value_4, foo.user_id_1 AS user_id, foo.time_1 AS "time", foo.value_1, foo.value_2_1 AS value_2, foo.value_3_1 AS value_3, foo.value_4_1 AS value_4 FROM ((SELECT users_table_union.user_id, users_table_union."time", users_table_union.value_1, users_table_union.value_2, users_table_union.value_3, users_table_union.value_4 FROM (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('102_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) users_table_union) users_table_limited JOIN LATERAL (SELECT bar.user_id, bar."time", bar.event_type, bar.value_2, bar.value_3, bar.value_4, u2.user_id, u2."time", u2.value_1, u2.value_2, u2.value_3, u2.value_4 FROM ((SELECT events_table.user_id, events_table."time", events_table.event_type, events_table.value_2, events_table.value_3, events_table.value_4 FROM (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.event_type, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('102_6'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, event_type integer, value_2 integer, value_3 double precision, value_4 bigint)) events_table WHERE (events_table.user_id OPERATOR(pg_catalog.=) users_table_limited.user_id)) bar LEFT JOIN public.users_table u2 ON ((u2.user_id OPERATOR(pg_catalog.=) bar.value_2)))) foo(user_id, "time", event_type, value_2, value_3, value_4, user_id_1, time_1, value_1, value_2_1, value_3_1, value_4_1) ON (true)) +ERROR: cannot pushdown the subquery +-- similar to the above, but this time there are multiple +-- non-colocated subquery joins one of them contains lateral +-- join +SELECT count(*) FROM events_table WHERE user_id NOT IN +( + SELECT users_table_limited.user_id + FROM + (SELECT * + FROM users_table + EXCEPT + SELECT * + FROM users_table + WHERE value_1 > 2 + ) AS users_table_limited + JOIN LATERAL + (SELECT * + FROM + (SELECT * + FROM + (SELECT * + FROM events_table WHERE value_3 > 4 + INTERSECT + SELECT * + FROM events_table + WHERE value_2 > 2 + ) AS events_table + WHERE user_id = users_table_limited.user_id) AS bar + LEFT JOIN users_table u2 ON u2.user_id = bar.value_2) AS foo ON TRUE + ); +DEBUG: generating subplan 109_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table +DEBUG: generating subplan 109_2 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table WHERE (value_1 OPERATOR(pg_catalog.>) 2) +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 109_3 for subquery SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('109_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint) EXCEPT SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('109_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint) +DEBUG: generating subplan 109_4 for subquery SELECT user_id, "time", event_type, value_2, value_3, value_4 FROM public.events_table WHERE (value_3 OPERATOR(pg_catalog.>) (4)::double precision) +DEBUG: generating subplan 109_5 for subquery SELECT user_id, "time", event_type, value_2, value_3, value_4 FROM public.events_table WHERE (value_2 OPERATOR(pg_catalog.>) 2) +DEBUG: Creating router plan +DEBUG: Plan is router executable +DEBUG: generating subplan 109_6 for subquery SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.event_type, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('109_4'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, event_type integer, value_2 integer, value_3 double precision, value_4 bigint) INTERSECT SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.event_type, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('109_5'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, event_type integer, value_2 integer, value_3 double precision, value_4 bigint) +ERROR: cannot pushdown the subquery RESET client_min_messages; DROP FUNCTION explain_json_2(text); SET search_path TO 'public'; diff --git a/src/test/regress/sql/non_colocated_subquery_joins.sql b/src/test/regress/sql/non_colocated_subquery_joins.sql index 4fe86503f..03e233395 100644 --- a/src/test/regress/sql/non_colocated_subquery_joins.sql +++ b/src/test/regress/sql/non_colocated_subquery_joins.sql @@ -691,6 +691,97 @@ SELECT true AS valid FROM explain_json_2($$ AND foo.user_id = bar.value_2; $$); +-- make sure to skip calling recursive planning over and over again +-- for already recursively planned subqueries +SET client_min_messages TO DEBUG2; +SELECT * +FROM + (SELECT * + FROM users_table + OFFSET 0) AS users_table +JOIN LATERAL + (SELECT * + FROM + (SELECT * + FROM events_table + WHERE user_id = users_table.user_id) AS bar + LEFT JOIN users_table u2 ON u2.user_id = bar.value_2) AS foo ON TRUE; + +-- similar to the above, make sure that we skip recursive plannig when +-- the subquery doesn't have any tables +SELECT true AS valid FROM explain_json_2($$ +SELECT * +FROM + (SELECT 1 AS user_id) AS users_table +JOIN LATERAL + (SELECT * + FROM + (SELECT * + FROM events_table + WHERE user_id = users_table.user_id) AS bar + LEFT JOIN users_table u2 ON u2.user_id = bar.value_2) AS foo ON TRUE +$$); + +-- similar to the above, make sure that we skip recursive plannig when +-- the subquery contains only intermediate results +SELECT * +FROM + ( + SELECT * FROM( + SELECT * + FROM users_table + EXCEPT + SELECT * + FROM users_table + WHERE value_1 > 2 + ) AS users_table_union + ) AS users_table_limited +JOIN LATERAL + (SELECT * + FROM + (SELECT * + FROM + (SELECT * + FROM events_table WHERE value_3 > 4 + INTERSECT + SELECT * + FROM events_table + WHERE value_2 > 2 + ) AS events_table + WHERE user_id = users_table_limited.user_id) AS bar + LEFT JOIN users_table u2 ON u2.user_id = bar.value_2) AS foo ON TRUE; + +-- similar to the above, but this time there are multiple +-- non-colocated subquery joins one of them contains lateral +-- join +SELECT count(*) FROM events_table WHERE user_id NOT IN +( + SELECT users_table_limited.user_id + FROM + (SELECT * + FROM users_table + EXCEPT + SELECT * + FROM users_table + WHERE value_1 > 2 + ) AS users_table_limited + JOIN LATERAL + (SELECT * + FROM + (SELECT * + FROM + (SELECT * + FROM events_table WHERE value_3 > 4 + INTERSECT + SELECT * + FROM events_table + WHERE value_2 > 2 + ) AS events_table + WHERE user_id = users_table_limited.user_id) AS bar + LEFT JOIN users_table u2 ON u2.user_id = bar.value_2) AS foo ON TRUE + ); + + RESET client_min_messages; DROP FUNCTION explain_json_2(text);