pull/6876/merge
aykut-bozkurt 2024-03-08 08:24:20 -08:00 committed by GitHub
commit 5265df0a7a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 230 additions and 92 deletions

View File

@ -97,15 +97,8 @@ static bool ListContainsDistributedTableRTE(List *rangeTableList,
bool *maybeHasForeignDistributedTable); bool *maybeHasForeignDistributedTable);
static PlannedStmt * CreateDistributedPlannedStmt( static PlannedStmt * CreateDistributedPlannedStmt(
DistributedPlanningContext *planContext); DistributedPlanningContext *planContext);
static PlannedStmt * InlineCtesAndCreateDistributedPlannedStmt(uint64 planId, static PlannedStmt * TryInlineCtesAndCreateDistributedPlannedStmt(
DistributedPlanningContext DistributedPlanningContext *planContext);
*planContext);
static PlannedStmt * TryCreateDistributedPlannedStmt(PlannedStmt *localPlan,
Query *originalQuery,
Query *query, ParamListInfo
boundParams,
PlannerRestrictionContext *
plannerRestrictionContext);
static DeferredErrorMessage * DeferErrorIfPartitionTableNotSingleReplicated(Oid static DeferredErrorMessage * DeferErrorIfPartitionTableNotSingleReplicated(Oid
relationId); relationId);
@ -734,7 +727,7 @@ CreateDistributedPlannedStmt(DistributedPlanningContext *planContext)
* if the distributed planning fails with inlined CTEs, because recursively * if the distributed planning fails with inlined CTEs, because recursively
* planning CTEs can provide full SQL coverage, although it might be slow. * planning CTEs can provide full SQL coverage, although it might be slow.
*/ */
resultPlan = InlineCtesAndCreateDistributedPlannedStmt(planId, planContext); resultPlan = TryInlineCtesAndCreateDistributedPlannedStmt(planContext);
if (resultPlan != NULL) if (resultPlan != NULL)
{ {
return resultPlan; return resultPlan;
@ -819,7 +812,7 @@ CreateDistributedPlannedStmt(DistributedPlanningContext *planContext)
/* /*
* InlineCtesAndCreateDistributedPlannedStmt gets all the parameters required * TryInlineCtesAndCreateDistributedPlannedStmt gets all the parameters required
* for creating a distributed planned statement. The function is primarily a * for creating a distributed planned statement. The function is primarily a
* wrapper on top of CreateDistributedPlannedStmt(), by first inlining the * wrapper on top of CreateDistributedPlannedStmt(), by first inlining the
* CTEs and calling CreateDistributedPlannedStmt() in PG_TRY() block. The * CTEs and calling CreateDistributedPlannedStmt() in PG_TRY() block. The
@ -827,60 +820,28 @@ CreateDistributedPlannedStmt(DistributedPlanningContext *planContext)
* CTEs are inlined. * CTEs are inlined.
*/ */
static PlannedStmt * static PlannedStmt *
InlineCtesAndCreateDistributedPlannedStmt(uint64 planId, TryInlineCtesAndCreateDistributedPlannedStmt(DistributedPlanningContext *planContext)
DistributedPlanningContext *planContext)
{
/*
* We'll inline the CTEs and try distributed planning, preserve the original
* query in case the planning fails and we fallback to recursive planning of
* CTEs.
*/
Query *copyOfOriginalQuery = copyObject(planContext->originalQuery);
RecursivelyInlineCtesInQueryTree(copyOfOriginalQuery);
/* after inlining, we shouldn't have any inlinable CTEs */
Assert(!QueryTreeContainsInlinableCTE(copyOfOriginalQuery));
/* simply recurse into CreateDistributedPlannedStmt() in a PG_TRY() block */
PlannedStmt *result = TryCreateDistributedPlannedStmt(planContext->plan,
copyOfOriginalQuery,
planContext->query,
planContext->boundParams,
planContext->
plannerRestrictionContext);
return result;
}
/*
* TryCreateDistributedPlannedStmt is a wrapper around CreateDistributedPlannedStmt, simply
* calling it in PG_TRY()/PG_CATCH() block. The function returns a PlannedStmt if the input
* query can be planned by Citus. If not, the function returns NULL and generates a DEBUG4
* message with the reason for the failure.
*/
static PlannedStmt *
TryCreateDistributedPlannedStmt(PlannedStmt *localPlan,
Query *originalQuery,
Query *query, ParamListInfo boundParams,
PlannerRestrictionContext *plannerRestrictionContext)
{ {
MemoryContext savedContext = CurrentMemoryContext; MemoryContext savedContext = CurrentMemoryContext;
PlannedStmt *result = NULL; PlannedStmt *result = NULL;
DistributedPlanningContext *planContext = palloc0(sizeof(DistributedPlanningContext)); /*
* We'll inline the CTEs and try distributed planning, preserve the original
planContext->plan = localPlan; * query, the modified query, and planner context in case the planning fails
planContext->boundParams = boundParams; * and we fallback to recursive planning of CTEs.
planContext->originalQuery = originalQuery; */
planContext->query = query; Query *inlinedQuery = copyObject(planContext->originalQuery);
planContext->plannerRestrictionContext = plannerRestrictionContext; RecursivelyInlineCtesInQueryTree(inlinedQuery);
/* after inlining, we shouldn't have any inlinable CTEs */
Assert(!QueryTreeContainsInlinableCTE(inlinedQuery));
PG_TRY(); PG_TRY();
{ {
result = CreateDistributedPlannedStmt(planContext); /* replan after we inlined the query */
result = planner(inlinedQuery,
NULL, 0,
planContext->boundParams);
} }
PG_CATCH(); PG_CATCH();
{ {

View File

@ -0,0 +1,105 @@
CREATE SCHEMA issue_6872;
SET search_path to 'issue_6872';
CREATE TABLE ref0(id int);
SELECT create_reference_table('ref0');
create_reference_table
---------------------------------------------------------------------
(1 row)
CREATE TABLE ref1(id int);
SELECT create_reference_table('ref1');
create_reference_table
---------------------------------------------------------------------
(1 row)
CREATE TABLE dist0(id int);
SELECT create_distributed_table('dist0','id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
CREATE TABLE dist1(id int);
SELECT create_distributed_table('dist1','id');
create_distributed_table
---------------------------------------------------------------------
(1 row)
INSERT INTO dist0 SELECT i FROM generate_series(0,10) i;
INSERT INTO dist0 SELECT i FROM generate_series(1005,1025) i;
INSERT INTO dist0 SELECT * FROM dist0 ORDER BY id LIMIT 1;
INSERT INTO ref0 SELECT i FROM generate_series(0,10) i;
INSERT INTO ref0 SELECT i FROM generate_series(1010,1030) i;
INSERT INTO ref0 SELECT NULL FROM generate_series(0,1) i;
INSERT INTO ref0 SELECT * FROM ref0 ORDER BY id LIMIT 1;
INSERT INTO dist1 SELECT i FROM generate_series(0,10) i;
INSERT INTO dist1 SELECT i FROM generate_series(1015,1035) i;
INSERT INTO dist1 SELECT * FROM dist1 ORDER BY id LIMIT 1;
INSERT INTO ref1 SELECT i FROM generate_series(0,10) i;
INSERT INTO ref1 SELECT i FROM generate_series(1020,1040) i;
INSERT INTO ref1 SELECT NULL FROM generate_series(0,1) i;
INSERT INTO ref1 SELECT * FROM ref1 ORDER BY id LIMIT 1;
-- added to verify we fixed the issue https://github.com/citusdata/citus/issues/6872
-- generated by Citus query generator tool
WITH cte_0 AS (
SELECT
table_0.id
FROM
dist0 AS table_0
INNER JOIN ref0 AS table_1 USING (id)
ORDER BY
id
)
SELECT
avg(avgsub.id)
FROM
(
SELECT
table_2.id
FROM
(
SELECT
table_3.id
FROM
cte_0 AS table_3
ORDER BY
id
) AS table_2
WHERE
table_2.id IN (
SELECT
table_4.id
FROM
dist0 AS table_4
RIGHT JOIN (
SELECT
table_6.id
FROM
dist0 AS table_6
INNER JOIN ref1 AS table_7 USING (id)
ORDER BY
id
LIMIT
28
) AS table_5 USING (id)
RIGHT JOIN dist1 AS table_8 USING (id)
WHERE
table_8.id = 6
ORDER BY
id
)
) AS avgsub;
avg
---------------------------------------------------------------------
6.0000000000000000
(1 row)
DROP SCHEMA issue_6872 CASCADE;
NOTICE: drop cascades to 4 other objects
DETAIL: drop cascades to table ref0
drop cascades to table ref1
drop cascades to table dist0
drop cascades to table dist1

View File

@ -82,17 +82,14 @@ DEBUG: Wrapping relation "tbl_dist1" to a subquery
DEBUG: generating subplan XXX_1 for subquery SELECT id FROM multi_recursive.tbl_dist1 WHERE true DEBUG: generating subplan XXX_1 for subquery SELECT id FROM multi_recursive.tbl_dist1 WHERE true
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT id FROM multi_recursive.tbl_dist1 WHERE (id OPERATOR(pg_catalog.=) ANY (SELECT tbl_ref1.id FROM (multi_recursive.tbl_ref1 LEFT JOIN (SELECT tbl_dist1_2.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) tbl_dist1_2) tbl_dist1_1 USING (id)))) DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT id FROM multi_recursive.tbl_dist1 WHERE (id OPERATOR(pg_catalog.=) ANY (SELECT tbl_ref1.id FROM (multi_recursive.tbl_ref1 LEFT JOIN (SELECT tbl_dist1_2.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) tbl_dist1_2) tbl_dist1_1 USING (id))))
DEBUG: generating subplan XXX_2 for subquery SELECT id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) cte_0 LIMIT 0 DEBUG: generating subplan XXX_2 for subquery SELECT id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) cte_0 LIMIT 0
DEBUG: generating subplan XXX_3 for subquery SELECT table_5.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_5 RIGHT JOIN multi_recursive.tbl_dist1 USING (id)) DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(table_4.id) AS count FROM (multi_recursive.tbl_dist1 RIGHT JOIN (SELECT table_5.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_5 RIGHT JOIN multi_recursive.tbl_dist1 tbl_dist1_1 USING (id))) table_4 USING (id))
DEBUG: generating subplan XXX_1 for subquery SELECT table_5.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_5 RIGHT JOIN multi_recursive.tbl_dist1 USING (id))
DEBUG: recursively planning left side of the right join since the outer side is a recurring rel DEBUG: recursively planning left side of the right join since the outer side is a recurring rel
DEBUG: recursively planning distributed relation "tbl_dist1" since it is part of a distributed join node that is outer joined with a recurring rel DEBUG: recursively planning distributed relation "tbl_dist1" since it is part of a distributed join node that is outer joined with a recurring rel
DEBUG: Wrapping relation "tbl_dist1" to a subquery DEBUG: Wrapping relation "tbl_dist1" to a subquery
DEBUG: generating subplan XXX_4 for subquery SELECT id FROM multi_recursive.tbl_dist1 WHERE true DEBUG: generating subplan XXX_2 for subquery SELECT id FROM multi_recursive.tbl_dist1 WHERE true
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(table_4.id) AS count FROM ((SELECT tbl_dist1_1.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) tbl_dist1_1) tbl_dist1 RIGHT JOIN (SELECT intermediate_result.id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_4 USING (id)) DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(table_4.id) AS count FROM ((SELECT tbl_dist1_1.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) tbl_dist1_1) tbl_dist1 RIGHT JOIN (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_4 USING (id))
count ERROR: recursive complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
---------------------------------------------------------------------
0
(1 row)
RESET client_min_messages; RESET client_min_messages;
DROP TABLE IF EXISTS dist0; DROP TABLE IF EXISTS dist0;
NOTICE: table "dist0" does not exist, skipping NOTICE: table "dist0" does not exist, skipping
@ -143,19 +140,15 @@ DEBUG: Wrapping relation "dist0" to a subquery
DEBUG: generating subplan XXX_2 for subquery SELECT id FROM multi_recursive.dist0 WHERE true DEBUG: generating subplan XXX_2 for subquery SELECT id FROM multi_recursive.dist0 WHERE true
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT avg(id) AS avg FROM (SELECT table_2.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_2 FULL JOIN (SELECT dist0_1.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) dist0_1) dist0 USING (id))) avgsub DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT avg(id) AS avg FROM (SELECT table_2.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_2 FULL JOIN (SELECT dist0_1.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) dist0_1) dist0 USING (id))) avgsub
DEBUG: generating subplan XXX_1 for CTE cte_0: SELECT table_1.id FROM (multi_recursive.dist0 RIGHT JOIN multi_recursive.dist0 table_1 USING (id)) ORDER BY table_1.id DEBUG: generating subplan XXX_1 for CTE cte_0: SELECT table_1.id FROM (multi_recursive.dist0 RIGHT JOIN multi_recursive.dist0 table_1 USING (id)) ORDER BY table_1.id
DEBUG: generating subplan XXX_1 for subquery SELECT table_5.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_5, multi_recursive.dist1 DEBUG: generating subplan XXX_2 for subquery SELECT table_5.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_5, multi_recursive.dist1
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT table_3.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_3 JOIN multi_recursive.dist1 USING (id)) DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT avg(id) AS avg FROM (SELECT table_2.id FROM ((SELECT table_3.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_3 JOIN multi_recursive.dist1 USING (id))) table_2 FULL JOIN multi_recursive.dist0 USING (id))) avgsub
DEBUG: generating subplan XXX_2 for subquery SELECT table_3.id FROM ((SELECT table_5.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_5, multi_recursive.dist1 dist1_1) table_3 JOIN multi_recursive.dist1 USING (id)) DEBUG: generating subplan XXX_1 for subquery SELECT table_3.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_3 JOIN multi_recursive.dist1 USING (id))
DEBUG: recursively planning right side of the full join since the other side is a recurring rel DEBUG: recursively planning right side of the full join since the other side is a recurring rel
DEBUG: recursively planning distributed relation "dist0" since it is part of a distributed join node that is outer joined with a recurring rel DEBUG: recursively planning distributed relation "dist0" since it is part of a distributed join node that is outer joined with a recurring rel
DEBUG: Wrapping relation "dist0" to a subquery DEBUG: Wrapping relation "dist0" to a subquery
DEBUG: generating subplan XXX_3 for subquery SELECT id FROM multi_recursive.dist0 WHERE true DEBUG: generating subplan XXX_2 for subquery SELECT id FROM multi_recursive.dist0 WHERE true
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT avg(id) AS avg FROM (SELECT table_2.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_2 FULL JOIN (SELECT dist0_1.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) dist0_1) dist0 USING (id))) avgsub DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT avg(id) AS avg FROM (SELECT table_2.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_2 FULL JOIN (SELECT dist0_1.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) dist0_1) dist0 USING (id))) avgsub
avg ERROR: recursive complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
---------------------------------------------------------------------
1020.0000000000000000
(1 row)
RESET client_min_messages; RESET client_min_messages;
DROP TABLE IF EXISTS dist0; DROP TABLE IF EXISTS dist0;
CREATE TABLE dist0(id int); CREATE TABLE dist0(id int);
@ -229,17 +222,14 @@ DEBUG: generating subplan XXX_2 for subquery SELECT id FROM multi_recursive.dis
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT avg(id) AS avg FROM (SELECT table_4.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_4 FULL JOIN (SELECT dist0_1.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) dist0_1) dist0 USING (id))) avgsub DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT avg(id) AS avg FROM (SELECT table_4.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_4 FULL JOIN (SELECT dist0_1.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) dist0_1) dist0 USING (id))) avgsub
DEBUG: generating subplan XXX_1 for CTE cte_0: SELECT table_0.id FROM (multi_recursive.dist1 table_0 LEFT JOIN multi_recursive.dist1 table_1 USING (id)) ORDER BY table_0.id LIMIT 41 DEBUG: generating subplan XXX_1 for CTE cte_0: SELECT table_0.id FROM (multi_recursive.dist1 table_0 LEFT JOIN multi_recursive.dist1 table_1 USING (id)) ORDER BY table_0.id LIMIT 41
DEBUG: push down of limit count: 41 DEBUG: push down of limit count: 41
DEBUG: generating subplan XXX_2 for subquery SELECT table_5.id FROM (((SELECT table_6.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_6) table_5 JOIN multi_recursive.dist0 USING (id)) JOIN multi_recursive.dist1 table_9 USING (id)) DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT avg(id) AS avg FROM (SELECT table_4.id FROM ((SELECT table_5.id FROM (((SELECT table_6.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_6) table_5 JOIN multi_recursive.dist0 dist0_1 USING (id)) JOIN multi_recursive.dist1 table_9 USING (id))) table_4 FULL JOIN multi_recursive.dist0 USING (id))) avgsub
DEBUG: generating subplan XXX_1 for subquery SELECT table_5.id FROM (((SELECT table_6.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_6) table_5 JOIN multi_recursive.dist0 USING (id)) JOIN multi_recursive.dist1 table_9 USING (id))
DEBUG: recursively planning right side of the full join since the other side is a recurring rel DEBUG: recursively planning right side of the full join since the other side is a recurring rel
DEBUG: recursively planning distributed relation "dist0" since it is part of a distributed join node that is outer joined with a recurring rel DEBUG: recursively planning distributed relation "dist0" since it is part of a distributed join node that is outer joined with a recurring rel
DEBUG: Wrapping relation "dist0" to a subquery DEBUG: Wrapping relation "dist0" to a subquery
DEBUG: generating subplan XXX_3 for subquery SELECT id FROM multi_recursive.dist0 WHERE true DEBUG: generating subplan XXX_2 for subquery SELECT id FROM multi_recursive.dist0 WHERE true
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT avg(id) AS avg FROM (SELECT table_4.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_4 FULL JOIN (SELECT dist0_1.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) dist0_1) dist0 USING (id))) avgsub DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT avg(id) AS avg FROM (SELECT table_4.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_4 FULL JOIN (SELECT dist0_1.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) dist0_1) dist0 USING (id))) avgsub
avg ERROR: recursive complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
---------------------------------------------------------------------
1.3095238095238095
(1 row)
-- QUERY6 -- QUERY6
-- recursive planner multipass the query with inlined cte and fails. Then, cte is planned without inlining and it succeeds. -- recursive planner multipass the query with inlined cte and fails. Then, cte is planned without inlining and it succeeds.
-- Why inlined query failed? -- Why inlined query failed?
@ -268,17 +258,14 @@ DEBUG: generating subplan XXX_2 for subquery SELECT id FROM multi_recursive.dis
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT avg(id) AS avg FROM (SELECT table_4.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_4 FULL JOIN (SELECT dist0_1.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) dist0_1) dist0 USING (id))) avgsub DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT avg(id) AS avg FROM (SELECT table_4.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_4 FULL JOIN (SELECT dist0_1.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) dist0_1) dist0 USING (id))) avgsub
DEBUG: generating subplan XXX_1 for CTE cte_0: SELECT table_0.id FROM (multi_recursive.dist1 table_0 LEFT JOIN multi_recursive.dist1 table_1 USING (id)) ORDER BY table_0.id LIMIT 41 DEBUG: generating subplan XXX_1 for CTE cte_0: SELECT table_0.id FROM (multi_recursive.dist1 table_0 LEFT JOIN multi_recursive.dist1 table_1 USING (id)) ORDER BY table_0.id LIMIT 41
DEBUG: push down of limit count: 41 DEBUG: push down of limit count: 41
DEBUG: generating subplan XXX_2 for subquery SELECT table_5.id FROM (((SELECT table_6.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_6) table_5 JOIN multi_recursive.dist0 USING (id)) JOIN multi_recursive.dist1 table_9 USING (id)) DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT avg(id) AS avg FROM (SELECT table_4.id FROM ((SELECT table_5.id FROM (((SELECT table_6.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_6) table_5 JOIN multi_recursive.dist0 dist0_1 USING (id)) JOIN multi_recursive.dist1 table_9 USING (id))) table_4 FULL JOIN multi_recursive.dist0 USING (id))) avgsub
DEBUG: generating subplan XXX_1 for subquery SELECT table_5.id FROM (((SELECT table_6.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_6) table_5 JOIN multi_recursive.dist0 USING (id)) JOIN multi_recursive.dist1 table_9 USING (id))
DEBUG: recursively planning right side of the full join since the other side is a recurring rel DEBUG: recursively planning right side of the full join since the other side is a recurring rel
DEBUG: recursively planning distributed relation "dist0" since it is part of a distributed join node that is outer joined with a recurring rel DEBUG: recursively planning distributed relation "dist0" since it is part of a distributed join node that is outer joined with a recurring rel
DEBUG: Wrapping relation "dist0" to a subquery DEBUG: Wrapping relation "dist0" to a subquery
DEBUG: generating subplan XXX_3 for subquery SELECT id FROM multi_recursive.dist0 WHERE true DEBUG: generating subplan XXX_2 for subquery SELECT id FROM multi_recursive.dist0 WHERE true
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT avg(id) AS avg FROM (SELECT table_4.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_4 FULL JOIN (SELECT dist0_1.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) dist0_1) dist0 USING (id))) avgsub DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT avg(id) AS avg FROM (SELECT table_4.id FROM ((SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) table_4 FULL JOIN (SELECT dist0_1.id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) dist0_1) dist0 USING (id))) avgsub
avg ERROR: recursive complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
---------------------------------------------------------------------
1.3095238095238095
(1 row)
-- QUERY7 -- QUERY7
-- recursive planner multipass the query and fails. Note that cte is not used in the query. -- recursive planner multipass the query and fails. Note that cte is not used in the query.
-- Why inlined query failed? -- Why inlined query failed?

View File

@ -103,7 +103,7 @@ test: multi_dropped_column_aliases foreign_key_restriction_enforcement
test: binary_protocol test: binary_protocol
test: alter_table_set_access_method test: alter_table_set_access_method
test: alter_distributed_table test: alter_distributed_table
test: issue_5248 issue_5099 issue_5763 issue_6543 issue_6758 test: issue_5248 issue_5099 issue_5763 issue_6543 issue_6758 issue_6872
test: object_propagation_debug test: object_propagation_debug
test: undistribute_table test: undistribute_table
test: run_command_on_all_nodes test: run_command_on_all_nodes

View File

@ -0,0 +1,85 @@
CREATE SCHEMA issue_6872;
SET search_path to 'issue_6872';
CREATE TABLE ref0(id int);
SELECT create_reference_table('ref0');
CREATE TABLE ref1(id int);
SELECT create_reference_table('ref1');
CREATE TABLE dist0(id int);
SELECT create_distributed_table('dist0','id');
CREATE TABLE dist1(id int);
SELECT create_distributed_table('dist1','id');
INSERT INTO dist0 SELECT i FROM generate_series(0,10) i;
INSERT INTO dist0 SELECT i FROM generate_series(1005,1025) i;
INSERT INTO dist0 SELECT * FROM dist0 ORDER BY id LIMIT 1;
INSERT INTO ref0 SELECT i FROM generate_series(0,10) i;
INSERT INTO ref0 SELECT i FROM generate_series(1010,1030) i;
INSERT INTO ref0 SELECT NULL FROM generate_series(0,1) i;
INSERT INTO ref0 SELECT * FROM ref0 ORDER BY id LIMIT 1;
INSERT INTO dist1 SELECT i FROM generate_series(0,10) i;
INSERT INTO dist1 SELECT i FROM generate_series(1015,1035) i;
INSERT INTO dist1 SELECT * FROM dist1 ORDER BY id LIMIT 1;
INSERT INTO ref1 SELECT i FROM generate_series(0,10) i;
INSERT INTO ref1 SELECT i FROM generate_series(1020,1040) i;
INSERT INTO ref1 SELECT NULL FROM generate_series(0,1) i;
INSERT INTO ref1 SELECT * FROM ref1 ORDER BY id LIMIT 1;
-- added to verify we fixed the issue https://github.com/citusdata/citus/issues/6872
-- generated by Citus query generator tool
WITH cte_0 AS (
SELECT
table_0.id
FROM
dist0 AS table_0
INNER JOIN ref0 AS table_1 USING (id)
ORDER BY
id
)
SELECT
avg(avgsub.id)
FROM
(
SELECT
table_2.id
FROM
(
SELECT
table_3.id
FROM
cte_0 AS table_3
ORDER BY
id
) AS table_2
WHERE
table_2.id IN (
SELECT
table_4.id
FROM
dist0 AS table_4
RIGHT JOIN (
SELECT
table_6.id
FROM
dist0 AS table_6
INNER JOIN ref1 AS table_7 USING (id)
ORDER BY
id
LIMIT
28
) AS table_5 USING (id)
RIGHT JOIN dist1 AS table_8 USING (id)
WHERE
table_8.id = 6
ORDER BY
id
)
) AS avgsub;
DROP SCHEMA issue_6872 CASCADE;