From c660d01ae35a1b40b1361a59853941f5add69459 Mon Sep 17 00:00:00 2001 From: aykutbozkurt Date: Mon, 24 Apr 2023 17:11:28 +0300 Subject: [PATCH 1/4] preserve modified query and planner context for inlined planning --- .../distributed/planner/distributed_planner.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/backend/distributed/planner/distributed_planner.c b/src/backend/distributed/planner/distributed_planner.c index 38962b333..60eb426fc 100644 --- a/src/backend/distributed/planner/distributed_planner.c +++ b/src/backend/distributed/planner/distributed_planner.c @@ -809,8 +809,8 @@ InlineCtesAndCreateDistributedPlannedStmt(uint64 planId, { /* * We'll inline the CTEs and try distributed planning, preserve the original - * query in case the planning fails and we fallback to recursive planning of - * CTEs. + * query ,the modified query, and planner context in case the planning fails + * and we fallback to recursive planning of CTEs. */ Query *copyOfOriginalQuery = copyObject(planContext->originalQuery); @@ -819,13 +819,21 @@ InlineCtesAndCreateDistributedPlannedStmt(uint64 planId, /* after inlining, we shouldn't have any inlinable CTEs */ Assert(!QueryTreeContainsInlinableCTE(copyOfOriginalQuery)); + /* recompute modified query and planner context after we inlined the query */ + PlannerRestrictionContext *plannerContextForInlinedQuery = + CreateAndPushPlannerRestrictionContext(); + Query *copyOfInlinedOriginalQuery = copyObject(copyOfOriginalQuery); + standard_planner(copyOfInlinedOriginalQuery, NULL, 0, planContext->boundParams); + Query *modifiedInlinedQuery = copyOfInlinedOriginalQuery; + /* simply recurse into CreateDistributedPlannedStmt() in a PG_TRY() block */ PlannedStmt *result = TryCreateDistributedPlannedStmt(planContext->plan, copyOfOriginalQuery, - planContext->query, + modifiedInlinedQuery, planContext->boundParams, - planContext-> - plannerRestrictionContext); + plannerContextForInlinedQuery); + + PopPlannerRestrictionContext(); return result; } From d682452424551c828c39f2c71da8c14117d8d83a Mon Sep 17 00:00:00 2001 From: aykutbozkurt Date: Mon, 24 Apr 2023 17:50:42 +0300 Subject: [PATCH 2/4] refactor --- .../distributed/planner/distributed_planner.c | 83 ++++--------------- 1 file changed, 18 insertions(+), 65 deletions(-) diff --git a/src/backend/distributed/planner/distributed_planner.c b/src/backend/distributed/planner/distributed_planner.c index 60eb426fc..67b0dc790 100644 --- a/src/backend/distributed/planner/distributed_planner.c +++ b/src/backend/distributed/planner/distributed_planner.c @@ -92,15 +92,8 @@ static bool ListContainsDistributedTableRTE(List *rangeTableList, bool *maybeHasForeignDistributedTable); static PlannedStmt * CreateDistributedPlannedStmt( DistributedPlanningContext *planContext); -static PlannedStmt * InlineCtesAndCreateDistributedPlannedStmt(uint64 planId, - DistributedPlanningContext - *planContext); -static PlannedStmt * TryCreateDistributedPlannedStmt(PlannedStmt *localPlan, - Query *originalQuery, - Query *query, ParamListInfo - boundParams, - PlannerRestrictionContext * - plannerRestrictionContext); +static PlannedStmt * TryInlineCtesAndCreateDistributedPlannedStmt( + DistributedPlanningContext *planContext); static DeferredErrorMessage * DeferErrorIfPartitionTableNotSingleReplicated(Oid relationId); @@ -711,7 +704,7 @@ CreateDistributedPlannedStmt(DistributedPlanningContext *planContext) * if the distributed planning fails with inlined CTEs, because recursively * planning CTEs can provide full SQL coverage, although it might be slow. */ - resultPlan = InlineCtesAndCreateDistributedPlannedStmt(planId, planContext); + resultPlan = TryInlineCtesAndCreateDistributedPlannedStmt(planContext); if (resultPlan != NULL) { return resultPlan; @@ -796,7 +789,7 @@ CreateDistributedPlannedStmt(DistributedPlanningContext *planContext) /* - * InlineCtesAndCreateDistributedPlannedStmt gets all the parameters required + * TryInlineCtesAndCreateDistributedPlannedStmt gets all the parameters required * for creating a distributed planned statement. The function is primarily a * wrapper on top of CreateDistributedPlannedStmt(), by first inlining the * CTEs and calling CreateDistributedPlannedStmt() in PG_TRY() block. The @@ -804,68 +797,28 @@ CreateDistributedPlannedStmt(DistributedPlanningContext *planContext) * CTEs are inlined. */ static PlannedStmt * -InlineCtesAndCreateDistributedPlannedStmt(uint64 planId, - DistributedPlanningContext *planContext) -{ - /* - * We'll inline the CTEs and try distributed planning, preserve the original - * query ,the modified query, and planner context in case the planning fails - * and we fallback to recursive planning of CTEs. - */ - Query *copyOfOriginalQuery = copyObject(planContext->originalQuery); - - RecursivelyInlineCtesInQueryTree(copyOfOriginalQuery); - - /* after inlining, we shouldn't have any inlinable CTEs */ - Assert(!QueryTreeContainsInlinableCTE(copyOfOriginalQuery)); - - /* recompute modified query and planner context after we inlined the query */ - PlannerRestrictionContext *plannerContextForInlinedQuery = - CreateAndPushPlannerRestrictionContext(); - Query *copyOfInlinedOriginalQuery = copyObject(copyOfOriginalQuery); - standard_planner(copyOfInlinedOriginalQuery, NULL, 0, planContext->boundParams); - Query *modifiedInlinedQuery = copyOfInlinedOriginalQuery; - - /* simply recurse into CreateDistributedPlannedStmt() in a PG_TRY() block */ - PlannedStmt *result = TryCreateDistributedPlannedStmt(planContext->plan, - copyOfOriginalQuery, - modifiedInlinedQuery, - planContext->boundParams, - plannerContextForInlinedQuery); - - PopPlannerRestrictionContext(); - - return result; -} - - -/* - * TryCreateDistributedPlannedStmt is a wrapper around CreateDistributedPlannedStmt, simply - * calling it in PG_TRY()/PG_CATCH() block. The function returns a PlannedStmt if the input - * query can be planned by Citus. If not, the function returns NULL and generates a DEBUG4 - * message with the reason for the failure. - */ -static PlannedStmt * -TryCreateDistributedPlannedStmt(PlannedStmt *localPlan, - Query *originalQuery, - Query *query, ParamListInfo boundParams, - PlannerRestrictionContext *plannerRestrictionContext) +TryInlineCtesAndCreateDistributedPlannedStmt(DistributedPlanningContext *planContext) { MemoryContext savedContext = CurrentMemoryContext; PlannedStmt *result = NULL; - DistributedPlanningContext *planContext = palloc0(sizeof(DistributedPlanningContext)); - - planContext->plan = localPlan; - planContext->boundParams = boundParams; - planContext->originalQuery = originalQuery; - planContext->query = query; - planContext->plannerRestrictionContext = plannerRestrictionContext; + /* + * We'll inline the CTEs and try distributed planning, preserve the original + * query, the modified query, and planner context in case the planning fails + * and we fallback to recursive planning of CTEs. + */ + Query *inlinedQuery = copyObject(planContext->originalQuery); + RecursivelyInlineCtesInQueryTree(inlinedQuery); + /* after inlining, we shouldn't have any inlinable CTEs */ + Assert(!QueryTreeContainsInlinableCTE(inlinedQuery)); PG_TRY(); { - result = CreateDistributedPlannedStmt(planContext); + /* replan after we inlined the query */ + result = planner(inlinedQuery, + NULL, 0, + planContext->boundParams); } PG_CATCH(); { From a27557c4e157ff556014eb68404afdfcde753e70 Mon Sep 17 00:00:00 2001 From: aykutbozkurt Date: Mon, 24 Apr 2023 19:46:37 +0300 Subject: [PATCH 3/4] add test --- src/test/regress/expected/issue_6872.out | 105 +++++++++++++++++++++++ src/test/regress/multi_schedule | 2 +- src/test/regress/sql/issue_6872.sql | 85 ++++++++++++++++++ 3 files changed, 191 insertions(+), 1 deletion(-) create mode 100644 src/test/regress/expected/issue_6872.out create mode 100644 src/test/regress/sql/issue_6872.sql diff --git a/src/test/regress/expected/issue_6872.out b/src/test/regress/expected/issue_6872.out new file mode 100644 index 000000000..54e16aed9 --- /dev/null +++ b/src/test/regress/expected/issue_6872.out @@ -0,0 +1,105 @@ +CREATE SCHEMA issue_6872; +SET search_path to 'issue_6872'; +CREATE TABLE ref0(id int); +SELECT create_reference_table('ref0'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE ref1(id int); +SELECT create_reference_table('ref1'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE dist0(id int); +SELECT create_distributed_table('dist0','id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE dist1(id int); +SELECT create_distributed_table('dist1','id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO dist0 SELECT i FROM generate_series(0,10) i; +INSERT INTO dist0 SELECT i FROM generate_series(1005,1025) i; +INSERT INTO dist0 SELECT * FROM dist0 ORDER BY id LIMIT 1; +INSERT INTO ref0 SELECT i FROM generate_series(0,10) i; +INSERT INTO ref0 SELECT i FROM generate_series(1010,1030) i; +INSERT INTO ref0 SELECT NULL FROM generate_series(0,1) i; +INSERT INTO ref0 SELECT * FROM ref0 ORDER BY id LIMIT 1; +INSERT INTO dist1 SELECT i FROM generate_series(0,10) i; +INSERT INTO dist1 SELECT i FROM generate_series(1015,1035) i; +INSERT INTO dist1 SELECT * FROM dist1 ORDER BY id LIMIT 1; +INSERT INTO ref1 SELECT i FROM generate_series(0,10) i; +INSERT INTO ref1 SELECT i FROM generate_series(1020,1040) i; +INSERT INTO ref1 SELECT NULL FROM generate_series(0,1) i; +INSERT INTO ref1 SELECT * FROM ref1 ORDER BY id LIMIT 1; +-- added to verify we fixed the issue https://github.com/citusdata/citus/issues/6872 +-- generated by Citus query generator tool +WITH cte_0 AS ( + SELECT + table_0.id + FROM + dist0 AS table_0 + INNER JOIN ref0 AS table_1 USING (id) + ORDER BY + id +) +SELECT + avg(avgsub.id) +FROM + ( + SELECT + table_2.id + FROM + ( + SELECT + table_3.id + FROM + cte_0 AS table_3 + ORDER BY + id + ) AS table_2 + WHERE + table_2.id IN ( + SELECT + table_4.id + FROM + dist0 AS table_4 + RIGHT JOIN ( + SELECT + table_6.id + FROM + dist0 AS table_6 + INNER JOIN ref1 AS table_7 USING (id) + ORDER BY + id + LIMIT + 28 + ) AS table_5 USING (id) + RIGHT JOIN dist1 AS table_8 USING (id) + WHERE + table_8.id = 6 + ORDER BY + id + ) + ) AS avgsub; + avg +--------------------------------------------------------------------- + 6.0000000000000000 +(1 row) + +DROP SCHEMA issue_6872 CASCADE; +NOTICE: drop cascades to 4 other objects +DETAIL: drop cascades to table ref0 +drop cascades to table ref1 +drop cascades to table dist0 +drop cascades to table dist1 diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule index a78ee6088..1b8e7849c 100644 --- a/src/test/regress/multi_schedule +++ b/src/test/regress/multi_schedule @@ -95,7 +95,7 @@ test: multi_dropped_column_aliases foreign_key_restriction_enforcement test: binary_protocol test: alter_table_set_access_method test: alter_distributed_table -test: issue_5248 issue_5099 issue_5763 issue_6543 issue_6758 +test: issue_5248 issue_5099 issue_5763 issue_6543 issue_6758 issue_6872 test: object_propagation_debug test: undistribute_table test: run_command_on_all_nodes diff --git a/src/test/regress/sql/issue_6872.sql b/src/test/regress/sql/issue_6872.sql new file mode 100644 index 000000000..7934b6c45 --- /dev/null +++ b/src/test/regress/sql/issue_6872.sql @@ -0,0 +1,85 @@ +CREATE SCHEMA issue_6872; +SET search_path to 'issue_6872'; + +CREATE TABLE ref0(id int); +SELECT create_reference_table('ref0'); + +CREATE TABLE ref1(id int); +SELECT create_reference_table('ref1'); + +CREATE TABLE dist0(id int); +SELECT create_distributed_table('dist0','id'); + +CREATE TABLE dist1(id int); +SELECT create_distributed_table('dist1','id'); + +INSERT INTO dist0 SELECT i FROM generate_series(0,10) i; +INSERT INTO dist0 SELECT i FROM generate_series(1005,1025) i; +INSERT INTO dist0 SELECT * FROM dist0 ORDER BY id LIMIT 1; + +INSERT INTO ref0 SELECT i FROM generate_series(0,10) i; +INSERT INTO ref0 SELECT i FROM generate_series(1010,1030) i; +INSERT INTO ref0 SELECT NULL FROM generate_series(0,1) i; +INSERT INTO ref0 SELECT * FROM ref0 ORDER BY id LIMIT 1; + +INSERT INTO dist1 SELECT i FROM generate_series(0,10) i; +INSERT INTO dist1 SELECT i FROM generate_series(1015,1035) i; +INSERT INTO dist1 SELECT * FROM dist1 ORDER BY id LIMIT 1; + +INSERT INTO ref1 SELECT i FROM generate_series(0,10) i; +INSERT INTO ref1 SELECT i FROM generate_series(1020,1040) i; +INSERT INTO ref1 SELECT NULL FROM generate_series(0,1) i; +INSERT INTO ref1 SELECT * FROM ref1 ORDER BY id LIMIT 1; + +-- added to verify we fixed the issue https://github.com/citusdata/citus/issues/6872 +-- generated by Citus query generator tool +WITH cte_0 AS ( + SELECT + table_0.id + FROM + dist0 AS table_0 + INNER JOIN ref0 AS table_1 USING (id) + ORDER BY + id +) +SELECT + avg(avgsub.id) +FROM + ( + SELECT + table_2.id + FROM + ( + SELECT + table_3.id + FROM + cte_0 AS table_3 + ORDER BY + id + ) AS table_2 + WHERE + table_2.id IN ( + SELECT + table_4.id + FROM + dist0 AS table_4 + RIGHT JOIN ( + SELECT + table_6.id + FROM + dist0 AS table_6 + INNER JOIN ref1 AS table_7 USING (id) + ORDER BY + id + LIMIT + 28 + ) AS table_5 USING (id) + RIGHT JOIN dist1 AS table_8 USING (id) + WHERE + table_8.id = 6 + ORDER BY + id + ) + ) AS avgsub; + +DROP SCHEMA issue_6872 CASCADE; From 614df2a812aad130d71dc562716241331fedefe8 Mon Sep 17 00:00:00 2001 From: aykutbozkurt Date: Mon, 24 Apr 2023 22:26:12 +0300 Subject: [PATCH 4/4] fix test --- .../multi_level_recursive_queries.out | 55 +++++++------------ 1 file changed, 21 insertions(+), 34 deletions(-) diff --git a/src/test/regress/expected/multi_level_recursive_queries.out b/src/test/regress/expected/multi_level_recursive_queries.out index b2bf0a49c..5420c4ec2 100644 --- a/src/test/regress/expected/multi_level_recursive_queries.out +++ b/src/test/regress/expected/multi_level_recursive_queries.out @@ -82,17 +82,14 @@ DEBUG: Wrapping relation "tbl_dist1" to a subquery DEBUG: generating subplan XXX_1 for subquery SELECT id FROM multi_recursive.tbl_dist1 WHERE true DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT id FROM multi_recursive.tbl_dist1 WHERE (id OPERATOR(pg_catalog.=) ANY (SELECT tbl_ref1.id FROM (multi_recursive.tbl_ref1 LEFT JOIN (SELECT tbl_dist1_2.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) tbl_dist1_2) tbl_dist1_1 USING (id)))) DEBUG: generating subplan XXX_2 for subquery SELECT id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) cte_0 LIMIT 0 -DEBUG: generating subplan XXX_3 for subquery SELECT table_5.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_5 RIGHT JOIN multi_recursive.tbl_dist1 USING (id)) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(table_4.id) AS count FROM (multi_recursive.tbl_dist1 RIGHT JOIN (SELECT table_5.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_5 RIGHT JOIN multi_recursive.tbl_dist1 tbl_dist1_1 USING (id))) table_4 USING (id)) +DEBUG: generating subplan XXX_1 for subquery SELECT table_5.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_5 RIGHT JOIN multi_recursive.tbl_dist1 USING (id)) DEBUG: recursively planning left side of the right join since the outer side is a recurring rel DEBUG: recursively planning distributed relation "tbl_dist1" since it is part of a distributed join node that is outer joined with a recurring rel DEBUG: Wrapping relation "tbl_dist1" to a subquery -DEBUG: generating subplan XXX_4 for subquery SELECT id FROM multi_recursive.tbl_dist1 WHERE true -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(table_4.id) AS count FROM ((SELECT tbl_dist1_1.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) tbl_dist1_1) tbl_dist1 RIGHT JOIN (SELECT intermediate_result.id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_4 USING (id)) - count ---------------------------------------------------------------------- - 0 -(1 row) - +DEBUG: generating subplan XXX_2 for subquery SELECT id FROM multi_recursive.tbl_dist1 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(table_4.id) AS count FROM ((SELECT tbl_dist1_1.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) tbl_dist1_1) tbl_dist1 RIGHT JOIN (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_4 USING (id)) +ERROR: recursive complex joins are only supported when all distributed tables are co-located and joined on their distribution columns RESET client_min_messages; DROP TABLE IF EXISTS dist0; NOTICE: table "dist0" does not exist, skipping @@ -143,19 +140,15 @@ DEBUG: Wrapping relation "dist0" to a subquery DEBUG: generating subplan XXX_2 for subquery SELECT id FROM multi_recursive.dist0 WHERE true DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT avg(id) AS avg FROM (SELECT table_2.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_2 FULL JOIN (SELECT dist0_1.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) dist0_1) dist0 USING (id))) avgsub DEBUG: generating subplan XXX_1 for CTE cte_0: SELECT table_1.id FROM (multi_recursive.dist0 RIGHT JOIN multi_recursive.dist0 table_1 USING (id)) ORDER BY table_1.id -DEBUG: generating subplan XXX_1 for subquery SELECT table_5.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_5, multi_recursive.dist1 -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT table_3.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_3 JOIN multi_recursive.dist1 USING (id)) -DEBUG: generating subplan XXX_2 for subquery SELECT table_3.id FROM ((SELECT table_5.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_5, multi_recursive.dist1 dist1_1) table_3 JOIN multi_recursive.dist1 USING (id)) +DEBUG: generating subplan XXX_2 for subquery SELECT table_5.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_5, multi_recursive.dist1 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT avg(id) AS avg FROM (SELECT table_2.id FROM ((SELECT table_3.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_3 JOIN multi_recursive.dist1 USING (id))) table_2 FULL JOIN multi_recursive.dist0 USING (id))) avgsub +DEBUG: generating subplan XXX_1 for subquery SELECT table_3.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_3 JOIN multi_recursive.dist1 USING (id)) DEBUG: recursively planning right side of the full join since the other side is a recurring rel DEBUG: recursively planning distributed relation "dist0" since it is part of a distributed join node that is outer joined with a recurring rel DEBUG: Wrapping relation "dist0" to a subquery -DEBUG: generating subplan XXX_3 for subquery SELECT id FROM multi_recursive.dist0 WHERE true -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT avg(id) AS avg FROM (SELECT table_2.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_2 FULL JOIN (SELECT dist0_1.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) dist0_1) dist0 USING (id))) avgsub - avg ---------------------------------------------------------------------- - 1020.0000000000000000 -(1 row) - +DEBUG: generating subplan XXX_2 for subquery SELECT id FROM multi_recursive.dist0 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT avg(id) AS avg FROM (SELECT table_2.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_2 FULL JOIN (SELECT dist0_1.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) dist0_1) dist0 USING (id))) avgsub +ERROR: recursive complex joins are only supported when all distributed tables are co-located and joined on their distribution columns RESET client_min_messages; DROP TABLE IF EXISTS dist0; CREATE TABLE dist0(id int); @@ -229,17 +222,14 @@ DEBUG: generating subplan XXX_2 for subquery SELECT id FROM multi_recursive.dis DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT avg(id) AS avg FROM (SELECT table_4.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_4 FULL JOIN (SELECT dist0_1.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) dist0_1) dist0 USING (id))) avgsub DEBUG: generating subplan XXX_1 for CTE cte_0: SELECT table_0.id FROM (multi_recursive.dist1 table_0 LEFT JOIN multi_recursive.dist1 table_1 USING (id)) ORDER BY table_0.id LIMIT 41 DEBUG: push down of limit count: 41 -DEBUG: generating subplan XXX_2 for subquery SELECT table_5.id FROM (((SELECT table_6.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_6) table_5 JOIN multi_recursive.dist0 USING (id)) JOIN multi_recursive.dist1 table_9 USING (id)) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT avg(id) AS avg FROM (SELECT table_4.id FROM ((SELECT table_5.id FROM (((SELECT table_6.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_6) table_5 JOIN multi_recursive.dist0 dist0_1 USING (id)) JOIN multi_recursive.dist1 table_9 USING (id))) table_4 FULL JOIN multi_recursive.dist0 USING (id))) avgsub +DEBUG: generating subplan XXX_1 for subquery SELECT table_5.id FROM (((SELECT table_6.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_6) table_5 JOIN multi_recursive.dist0 USING (id)) JOIN multi_recursive.dist1 table_9 USING (id)) DEBUG: recursively planning right side of the full join since the other side is a recurring rel DEBUG: recursively planning distributed relation "dist0" since it is part of a distributed join node that is outer joined with a recurring rel DEBUG: Wrapping relation "dist0" to a subquery -DEBUG: generating subplan XXX_3 for subquery SELECT id FROM multi_recursive.dist0 WHERE true -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT avg(id) AS avg FROM (SELECT table_4.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_4 FULL JOIN (SELECT dist0_1.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) dist0_1) dist0 USING (id))) avgsub - avg ---------------------------------------------------------------------- - 1.3095238095238095 -(1 row) - +DEBUG: generating subplan XXX_2 for subquery SELECT id FROM multi_recursive.dist0 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT avg(id) AS avg FROM (SELECT table_4.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_4 FULL JOIN (SELECT dist0_1.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) dist0_1) dist0 USING (id))) avgsub +ERROR: recursive complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- QUERY6 -- recursive planner multipass the query with inlined cte and fails. Then, cte is planned without inlining and it succeeds. -- Why inlined query failed? @@ -268,17 +258,14 @@ DEBUG: generating subplan XXX_2 for subquery SELECT id FROM multi_recursive.dis DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT avg(id) AS avg FROM (SELECT table_4.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_4 FULL JOIN (SELECT dist0_1.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) dist0_1) dist0 USING (id))) avgsub DEBUG: generating subplan XXX_1 for CTE cte_0: SELECT table_0.id FROM (multi_recursive.dist1 table_0 LEFT JOIN multi_recursive.dist1 table_1 USING (id)) ORDER BY table_0.id LIMIT 41 DEBUG: push down of limit count: 41 -DEBUG: generating subplan XXX_2 for subquery SELECT table_5.id FROM (((SELECT table_6.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_6) table_5 JOIN multi_recursive.dist0 USING (id)) JOIN multi_recursive.dist1 table_9 USING (id)) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT avg(id) AS avg FROM (SELECT table_4.id FROM ((SELECT table_5.id FROM (((SELECT table_6.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_6) table_5 JOIN multi_recursive.dist0 dist0_1 USING (id)) JOIN multi_recursive.dist1 table_9 USING (id))) table_4 FULL JOIN multi_recursive.dist0 USING (id))) avgsub +DEBUG: generating subplan XXX_1 for subquery SELECT table_5.id FROM (((SELECT table_6.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_6) table_5 JOIN multi_recursive.dist0 USING (id)) JOIN multi_recursive.dist1 table_9 USING (id)) DEBUG: recursively planning right side of the full join since the other side is a recurring rel DEBUG: recursively planning distributed relation "dist0" since it is part of a distributed join node that is outer joined with a recurring rel DEBUG: Wrapping relation "dist0" to a subquery -DEBUG: generating subplan XXX_3 for subquery SELECT id FROM multi_recursive.dist0 WHERE true -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT avg(id) AS avg FROM (SELECT table_4.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_4 FULL JOIN (SELECT dist0_1.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) dist0_1) dist0 USING (id))) avgsub - avg ---------------------------------------------------------------------- - 1.3095238095238095 -(1 row) - +DEBUG: generating subplan XXX_2 for subquery SELECT id FROM multi_recursive.dist0 WHERE true +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT avg(id) AS avg FROM (SELECT table_4.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_4 FULL JOIN (SELECT dist0_1.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) dist0_1) dist0 USING (id))) avgsub +ERROR: recursive complex joins are only supported when all distributed tables are co-located and joined on their distribution columns -- QUERY7 -- recursive planner multipass the query and fails. Note that cte is not used in the query. -- Why inlined query failed?