diff --git a/src/backend/distributed/planner/recursive_planning.c b/src/backend/distributed/planner/recursive_planning.c index a6761a7f4..bb8591a8c 100644 --- a/src/backend/distributed/planner/recursive_planning.c +++ b/src/backend/distributed/planner/recursive_planning.c @@ -159,7 +159,7 @@ static void RecursivelyPlanNonColocatedSubqueriesInWhere(Query *query, recursivePlanningContext); static List * SublinkListFromWhere(Query *originalQuery); static bool ExtractSublinkWalker(Node *node, List **sublinkList); -static bool ShouldRecursivelyPlanAllSubqueriesInWhere(Query *query); +static bool ShouldRecursivelyPlanSublinks(Query *query); static bool RecursivelyPlanAllSubqueries(Node *node, RecursivePlanningContext *planningContext); static DeferredErrorMessage * RecursivelyPlanCTEs(Query *query, @@ -321,12 +321,18 @@ RecursivelyPlanSubqueriesAndCTEs(Query *query, RecursivePlanningContext *context /* * If the FROM clause is recurring (does not contain a distributed table), * then we cannot have any distributed tables appearing in subqueries in - * the WHERE clause. + * the SELECT and WHERE clauses. */ - if (ShouldRecursivelyPlanAllSubqueriesInWhere(query)) + if (ShouldRecursivelyPlanSublinks(query)) { /* replace all subqueries in the WHERE clause */ - RecursivelyPlanAllSubqueries((Node *) query->jointree->quals, context); + if (query->jointree && query->jointree->quals) + { + RecursivelyPlanAllSubqueries((Node *) query->jointree->quals, context); + } + + /* replace all subqueries in the SELECT clause */ + RecursivelyPlanAllSubqueries((Node *) query->targetList, context); } if (query->havingQual != NULL) @@ -652,27 +658,12 @@ ExtractSublinkWalker(Node *node, List **sublinkList) /* - * ShouldRecursivelyPlanAllSubqueriesInWhere returns true if the query has - * a WHERE clause and a recurring FROM clause (does not contain a distributed - * table). + * ShouldRecursivelyPlanSublinks returns true if the query has a recurring + * FROM clause. */ static bool -ShouldRecursivelyPlanAllSubqueriesInWhere(Query *query) +ShouldRecursivelyPlanSublinks(Query *query) { - FromExpr *joinTree = query->jointree; - if (joinTree == NULL) - { - /* there is no FROM clause */ - return false; - } - - Node *whereClause = joinTree->quals; - if (whereClause == NULL) - { - /* there is no WHERE clause */ - return false; - } - if (FindNodeMatchingCheckFunctionInRangeTableList(query->rtable, IsDistributedTableRTE)) { diff --git a/src/test/regress/expected/subquery_in_targetlist.out b/src/test/regress/expected/subquery_in_targetlist.out index 395b4dc96..a32b480f1 100644 --- a/src/test/regress/expected/subquery_in_targetlist.out +++ b/src/test/regress/expected/subquery_in_targetlist.out @@ -446,9 +446,96 @@ GROUP BY user_id 1 | 15 (6 rows) --- make sure that we don't pushdown subqueries in the target list if no FROM clause +-- FROM is empty join tree, sublink can be recursively planned SELECT (SELECT DISTINCT user_id FROM users_table WHERE user_id = (SELECT max(user_id) FROM users_table )); + user_id +--------------------------------------------------------------------- + 6 +(1 row) + +-- FROM is subquery with empty join tree, sublink can be recursively planned +SELECT (SELECT DISTINCT user_id FROM users_table WHERE user_id = (SELECT max(user_id) FROM users_table )) +FROM (SELECT 1) a; + user_id +--------------------------------------------------------------------- + 6 +(1 row) + +-- correlated subquery with recurring from clause (prevents recursive planning of outer sublink) +SELECT (SELECT DISTINCT user_id FROM users_table WHERE user_id = (SELECT max(user_id) FROM users_table) AND value_2 = a) +FROM (SELECT 1 AS a) r; ERROR: correlated subqueries are not supported when the FROM clause contains a subquery without FROM +SELECT (SELECT DISTINCT user_id FROM users_table WHERE user_id = (SELECT max(user_id) FROM users_table) AND value_2 = r.user_id) +FROM users_reference_table r; +ERROR: correlated subqueries are not supported when the FROM clause contains a reference table +-- correlated subquery with recurring from clause (prevents recursive planning of inner sublink) +SELECT (SELECT DISTINCT user_id FROM users_table WHERE user_id = (SELECT max(user_id) FROM users_table WHERE user_id = a)) +FROM (SELECT 1 AS a) r; +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +-- recurring from clause containing a subquery with sublink on distributed table, recursive planning saves the day +SELECT (SELECT DISTINCT user_id FROM users_table WHERE user_id = (SELECT max(user_id) FROM users_table )) +FROM (SELECT * FROM users_reference_table WHERE user_id IN (SELECT user_id FROM events_table)) r +ORDER BY 1 LIMIT 3; + user_id +--------------------------------------------------------------------- + 6 + 6 + 6 +(3 rows) + +-- recurring from clause containing a subquery with correlated sublink on distributed table +SELECT (SELECT DISTINCT user_id FROM users_table WHERE user_id = (SELECT max(user_id) FROM users_table )) +FROM (SELECT * FROM users_reference_table WHERE value_2 IN (SELECT value_2 FROM events_table WHERE events_table.user_id = users_reference_table.user_id)) r +ORDER BY 1 LIMIT 3; +ERROR: correlated subqueries are not supported when the FROM clause contains a reference table +-- recurring from clause with sublink with distributed table in sublink in where +SELECT (SELECT DISTINCT user_id FROM users_reference_table WHERE user_id IN (SELECT user_id FROM users_table) AND user_id < 2), (SELECT 2), 3 +FROM users_reference_table r +ORDER BY 1 LIMIT 3; + user_id | ?column? | ?column? +--------------------------------------------------------------------- + 1 | 2 | 3 + 1 | 2 | 3 + 1 | 2 | 3 +(3 rows) + +-- recurring from clause with sublink with distributed table in sublink in target list +SELECT (SELECT 1), (SELECT (SELECT user_id FROM users_table WHERE user_id < 2 GROUP BY user_id) + FROM users_reference_table WHERE user_id < 2 GROUP BY user_id) +FROM users_reference_table r +ORDER BY 1 LIMIT 3; + ?column? | user_id +--------------------------------------------------------------------- + 1 | 1 + 1 | 1 + 1 | 1 +(3 rows) + +-- recurring from clause with correlated sublink with distributed table in sublink in target list +SELECT (SELECT (SELECT user_id FROM users_table WHERE user_id = users_reference_table.user_id GROUP BY user_id) + FROM users_reference_table WHERE user_id < 2 GROUP BY user_id) +FROM users_reference_table r +ORDER BY 1 LIMIT 3; +ERROR: correlated subqueries are not supported when the FROM clause contains a reference table +-- recurring from clause with correlated sublink with a recurring from clause and a distributed table in sublink +SELECT (SELECT DISTINCT user_id FROM users_reference_table WHERE user_id IN (SELECT user_id FROM users_reference_table) AND value_2 = r.value_2 AND user_id < 2) +FROM users_reference_table r +ORDER BY 1 LIMIT 3; + user_id +--------------------------------------------------------------------- + 1 + 1 + 1 +(3 rows) + +-- correlated subquery with recursively planned subquery in FROM (outer sublink) +SELECT (SELECT DISTINCT user_id FROM users_table WHERE user_id = (SELECT max(user_id) FROM users_table WHERE user_id = r.user_id)) +FROM (SELECT user_id FROM users_table ORDER BY 1 LIMIT 3) r; +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +-- correlated subquery with recursively planned subquery in FROM (inner sublink) +SELECT (SELECT (SELECT max(user_id) FROM users_table) FROM users_table WHERE user_id = r.user_id) +FROM (SELECT user_id FROM users_table ORDER BY 1 LIMIT 3) r; +ERROR: correlated subqueries are not supported when the FROM clause contains a CTE or subquery -- not meaningful SELECT FOR UPDATE query that should fail SELECT count(*) FROM (SELECT (SELECT user_id FROM users_table WHERE user_id = u1.user_id FOR UPDATE) diff --git a/src/test/regress/sql/subquery_in_targetlist.sql b/src/test/regress/sql/subquery_in_targetlist.sql index a434e50f3..7972db2f4 100644 --- a/src/test/regress/sql/subquery_in_targetlist.sql +++ b/src/test/regress/sql/subquery_in_targetlist.sql @@ -288,9 +288,64 @@ GROUP BY user_id FROM users_table u1 GROUP BY user_id) as foo) ORDER BY 1 DESC; --- make sure that we don't pushdown subqueries in the target list if no FROM clause +-- FROM is empty join tree, sublink can be recursively planned SELECT (SELECT DISTINCT user_id FROM users_table WHERE user_id = (SELECT max(user_id) FROM users_table )); +-- FROM is subquery with empty join tree, sublink can be recursively planned +SELECT (SELECT DISTINCT user_id FROM users_table WHERE user_id = (SELECT max(user_id) FROM users_table )) +FROM (SELECT 1) a; + +-- correlated subquery with recurring from clause (prevents recursive planning of outer sublink) +SELECT (SELECT DISTINCT user_id FROM users_table WHERE user_id = (SELECT max(user_id) FROM users_table) AND value_2 = a) +FROM (SELECT 1 AS a) r; + +SELECT (SELECT DISTINCT user_id FROM users_table WHERE user_id = (SELECT max(user_id) FROM users_table) AND value_2 = r.user_id) +FROM users_reference_table r; + +-- correlated subquery with recurring from clause (prevents recursive planning of inner sublink) +SELECT (SELECT DISTINCT user_id FROM users_table WHERE user_id = (SELECT max(user_id) FROM users_table WHERE user_id = a)) +FROM (SELECT 1 AS a) r; + +-- recurring from clause containing a subquery with sublink on distributed table, recursive planning saves the day +SELECT (SELECT DISTINCT user_id FROM users_table WHERE user_id = (SELECT max(user_id) FROM users_table )) +FROM (SELECT * FROM users_reference_table WHERE user_id IN (SELECT user_id FROM events_table)) r +ORDER BY 1 LIMIT 3; + +-- recurring from clause containing a subquery with correlated sublink on distributed table +SELECT (SELECT DISTINCT user_id FROM users_table WHERE user_id = (SELECT max(user_id) FROM users_table )) +FROM (SELECT * FROM users_reference_table WHERE value_2 IN (SELECT value_2 FROM events_table WHERE events_table.user_id = users_reference_table.user_id)) r +ORDER BY 1 LIMIT 3; + +-- recurring from clause with sublink with distributed table in sublink in where +SELECT (SELECT DISTINCT user_id FROM users_reference_table WHERE user_id IN (SELECT user_id FROM users_table) AND user_id < 2), (SELECT 2), 3 +FROM users_reference_table r +ORDER BY 1 LIMIT 3; + +-- recurring from clause with sublink with distributed table in sublink in target list +SELECT (SELECT 1), (SELECT (SELECT user_id FROM users_table WHERE user_id < 2 GROUP BY user_id) + FROM users_reference_table WHERE user_id < 2 GROUP BY user_id) +FROM users_reference_table r +ORDER BY 1 LIMIT 3; + +-- recurring from clause with correlated sublink with distributed table in sublink in target list +SELECT (SELECT (SELECT user_id FROM users_table WHERE user_id = users_reference_table.user_id GROUP BY user_id) + FROM users_reference_table WHERE user_id < 2 GROUP BY user_id) +FROM users_reference_table r +ORDER BY 1 LIMIT 3; + +-- recurring from clause with correlated sublink with a recurring from clause and a distributed table in sublink +SELECT (SELECT DISTINCT user_id FROM users_reference_table WHERE user_id IN (SELECT user_id FROM users_reference_table) AND value_2 = r.value_2 AND user_id < 2) +FROM users_reference_table r +ORDER BY 1 LIMIT 3; + +-- correlated subquery with recursively planned subquery in FROM (outer sublink) +SELECT (SELECT DISTINCT user_id FROM users_table WHERE user_id = (SELECT max(user_id) FROM users_table WHERE user_id = r.user_id)) +FROM (SELECT user_id FROM users_table ORDER BY 1 LIMIT 3) r; + +-- correlated subquery with recursively planned subquery in FROM (inner sublink) +SELECT (SELECT (SELECT max(user_id) FROM users_table) FROM users_table WHERE user_id = r.user_id) +FROM (SELECT user_id FROM users_table ORDER BY 1 LIMIT 3) r; + -- not meaningful SELECT FOR UPDATE query that should fail SELECT count(*) FROM (SELECT (SELECT user_id FROM users_table WHERE user_id = u1.user_id FOR UPDATE)