Expand support for subqueries in target list through recursive planning

pull/4421/head
Marco Slot 2020-12-16 19:25:42 +01:00
parent bfc98e01d1
commit de6aaaa648
3 changed files with 157 additions and 24 deletions

View File

@ -159,7 +159,7 @@ static void RecursivelyPlanNonColocatedSubqueriesInWhere(Query *query,
recursivePlanningContext);
static List * SublinkListFromWhere(Query *originalQuery);
static bool ExtractSublinkWalker(Node *node, List **sublinkList);
static bool ShouldRecursivelyPlanAllSubqueriesInWhere(Query *query);
static bool ShouldRecursivelyPlanSublinks(Query *query);
static bool RecursivelyPlanAllSubqueries(Node *node,
RecursivePlanningContext *planningContext);
static DeferredErrorMessage * RecursivelyPlanCTEs(Query *query,
@ -321,12 +321,18 @@ RecursivelyPlanSubqueriesAndCTEs(Query *query, RecursivePlanningContext *context
/*
* If the FROM clause is recurring (does not contain a distributed table),
* then we cannot have any distributed tables appearing in subqueries in
* the WHERE clause.
* the SELECT and WHERE clauses.
*/
if (ShouldRecursivelyPlanAllSubqueriesInWhere(query))
if (ShouldRecursivelyPlanSublinks(query))
{
/* replace all subqueries in the WHERE clause */
RecursivelyPlanAllSubqueries((Node *) query->jointree->quals, context);
if (query->jointree && query->jointree->quals)
{
RecursivelyPlanAllSubqueries((Node *) query->jointree->quals, context);
}
/* replace all subqueries in the SELECT clause */
RecursivelyPlanAllSubqueries((Node *) query->targetList, context);
}
if (query->havingQual != NULL)
@ -652,27 +658,12 @@ ExtractSublinkWalker(Node *node, List **sublinkList)
/*
* ShouldRecursivelyPlanAllSubqueriesInWhere returns true if the query has
* a WHERE clause and a recurring FROM clause (does not contain a distributed
* table).
* ShouldRecursivelyPlanSublinks returns true if the query has a recurring
* FROM clause.
*/
static bool
ShouldRecursivelyPlanAllSubqueriesInWhere(Query *query)
ShouldRecursivelyPlanSublinks(Query *query)
{
FromExpr *joinTree = query->jointree;
if (joinTree == NULL)
{
/* there is no FROM clause */
return false;
}
Node *whereClause = joinTree->quals;
if (whereClause == NULL)
{
/* there is no WHERE clause */
return false;
}
if (FindNodeMatchingCheckFunctionInRangeTableList(query->rtable,
IsDistributedTableRTE))
{

View File

@ -446,9 +446,96 @@ GROUP BY user_id
1 | 15
(6 rows)
-- make sure that we don't pushdown subqueries in the target list if no FROM clause
-- FROM is empty join tree, sublink can be recursively planned
SELECT (SELECT DISTINCT user_id FROM users_table WHERE user_id = (SELECT max(user_id) FROM users_table ));
user_id
---------------------------------------------------------------------
6
(1 row)
-- FROM is subquery with empty join tree, sublink can be recursively planned
SELECT (SELECT DISTINCT user_id FROM users_table WHERE user_id = (SELECT max(user_id) FROM users_table ))
FROM (SELECT 1) a;
user_id
---------------------------------------------------------------------
6
(1 row)
-- correlated subquery with recurring from clause (prevents recursive planning of outer sublink)
SELECT (SELECT DISTINCT user_id FROM users_table WHERE user_id = (SELECT max(user_id) FROM users_table) AND value_2 = a)
FROM (SELECT 1 AS a) r;
ERROR: correlated subqueries are not supported when the FROM clause contains a subquery without FROM
SELECT (SELECT DISTINCT user_id FROM users_table WHERE user_id = (SELECT max(user_id) FROM users_table) AND value_2 = r.user_id)
FROM users_reference_table r;
ERROR: correlated subqueries are not supported when the FROM clause contains a reference table
-- correlated subquery with recurring from clause (prevents recursive planning of inner sublink)
SELECT (SELECT DISTINCT user_id FROM users_table WHERE user_id = (SELECT max(user_id) FROM users_table WHERE user_id = a))
FROM (SELECT 1 AS a) r;
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- recurring from clause containing a subquery with sublink on distributed table, recursive planning saves the day
SELECT (SELECT DISTINCT user_id FROM users_table WHERE user_id = (SELECT max(user_id) FROM users_table ))
FROM (SELECT * FROM users_reference_table WHERE user_id IN (SELECT user_id FROM events_table)) r
ORDER BY 1 LIMIT 3;
user_id
---------------------------------------------------------------------
6
6
6
(3 rows)
-- recurring from clause containing a subquery with correlated sublink on distributed table
SELECT (SELECT DISTINCT user_id FROM users_table WHERE user_id = (SELECT max(user_id) FROM users_table ))
FROM (SELECT * FROM users_reference_table WHERE value_2 IN (SELECT value_2 FROM events_table WHERE events_table.user_id = users_reference_table.user_id)) r
ORDER BY 1 LIMIT 3;
ERROR: correlated subqueries are not supported when the FROM clause contains a reference table
-- recurring from clause with sublink with distributed table in sublink in where
SELECT (SELECT DISTINCT user_id FROM users_reference_table WHERE user_id IN (SELECT user_id FROM users_table) AND user_id < 2), (SELECT 2), 3
FROM users_reference_table r
ORDER BY 1 LIMIT 3;
user_id | ?column? | ?column?
---------------------------------------------------------------------
1 | 2 | 3
1 | 2 | 3
1 | 2 | 3
(3 rows)
-- recurring from clause with sublink with distributed table in sublink in target list
SELECT (SELECT 1), (SELECT (SELECT user_id FROM users_table WHERE user_id < 2 GROUP BY user_id)
FROM users_reference_table WHERE user_id < 2 GROUP BY user_id)
FROM users_reference_table r
ORDER BY 1 LIMIT 3;
?column? | user_id
---------------------------------------------------------------------
1 | 1
1 | 1
1 | 1
(3 rows)
-- recurring from clause with correlated sublink with distributed table in sublink in target list
SELECT (SELECT (SELECT user_id FROM users_table WHERE user_id = users_reference_table.user_id GROUP BY user_id)
FROM users_reference_table WHERE user_id < 2 GROUP BY user_id)
FROM users_reference_table r
ORDER BY 1 LIMIT 3;
ERROR: correlated subqueries are not supported when the FROM clause contains a reference table
-- recurring from clause with correlated sublink with a recurring from clause and a distributed table in sublink
SELECT (SELECT DISTINCT user_id FROM users_reference_table WHERE user_id IN (SELECT user_id FROM users_reference_table) AND value_2 = r.value_2 AND user_id < 2)
FROM users_reference_table r
ORDER BY 1 LIMIT 3;
user_id
---------------------------------------------------------------------
1
1
1
(3 rows)
-- correlated subquery with recursively planned subquery in FROM (outer sublink)
SELECT (SELECT DISTINCT user_id FROM users_table WHERE user_id = (SELECT max(user_id) FROM users_table WHERE user_id = r.user_id))
FROM (SELECT user_id FROM users_table ORDER BY 1 LIMIT 3) r;
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
-- correlated subquery with recursively planned subquery in FROM (inner sublink)
SELECT (SELECT (SELECT max(user_id) FROM users_table) FROM users_table WHERE user_id = r.user_id)
FROM (SELECT user_id FROM users_table ORDER BY 1 LIMIT 3) r;
ERROR: correlated subqueries are not supported when the FROM clause contains a CTE or subquery
-- not meaningful SELECT FOR UPDATE query that should fail
SELECT count(*) FROM (SELECT
(SELECT user_id FROM users_table WHERE user_id = u1.user_id FOR UPDATE)

View File

@ -288,9 +288,64 @@ GROUP BY user_id
FROM users_table u1
GROUP BY user_id) as foo) ORDER BY 1 DESC;
-- make sure that we don't pushdown subqueries in the target list if no FROM clause
-- FROM is empty join tree, sublink can be recursively planned
SELECT (SELECT DISTINCT user_id FROM users_table WHERE user_id = (SELECT max(user_id) FROM users_table ));
-- FROM is subquery with empty join tree, sublink can be recursively planned
SELECT (SELECT DISTINCT user_id FROM users_table WHERE user_id = (SELECT max(user_id) FROM users_table ))
FROM (SELECT 1) a;
-- correlated subquery with recurring from clause (prevents recursive planning of outer sublink)
SELECT (SELECT DISTINCT user_id FROM users_table WHERE user_id = (SELECT max(user_id) FROM users_table) AND value_2 = a)
FROM (SELECT 1 AS a) r;
SELECT (SELECT DISTINCT user_id FROM users_table WHERE user_id = (SELECT max(user_id) FROM users_table) AND value_2 = r.user_id)
FROM users_reference_table r;
-- correlated subquery with recurring from clause (prevents recursive planning of inner sublink)
SELECT (SELECT DISTINCT user_id FROM users_table WHERE user_id = (SELECT max(user_id) FROM users_table WHERE user_id = a))
FROM (SELECT 1 AS a) r;
-- recurring from clause containing a subquery with sublink on distributed table, recursive planning saves the day
SELECT (SELECT DISTINCT user_id FROM users_table WHERE user_id = (SELECT max(user_id) FROM users_table ))
FROM (SELECT * FROM users_reference_table WHERE user_id IN (SELECT user_id FROM events_table)) r
ORDER BY 1 LIMIT 3;
-- recurring from clause containing a subquery with correlated sublink on distributed table
SELECT (SELECT DISTINCT user_id FROM users_table WHERE user_id = (SELECT max(user_id) FROM users_table ))
FROM (SELECT * FROM users_reference_table WHERE value_2 IN (SELECT value_2 FROM events_table WHERE events_table.user_id = users_reference_table.user_id)) r
ORDER BY 1 LIMIT 3;
-- recurring from clause with sublink with distributed table in sublink in where
SELECT (SELECT DISTINCT user_id FROM users_reference_table WHERE user_id IN (SELECT user_id FROM users_table) AND user_id < 2), (SELECT 2), 3
FROM users_reference_table r
ORDER BY 1 LIMIT 3;
-- recurring from clause with sublink with distributed table in sublink in target list
SELECT (SELECT 1), (SELECT (SELECT user_id FROM users_table WHERE user_id < 2 GROUP BY user_id)
FROM users_reference_table WHERE user_id < 2 GROUP BY user_id)
FROM users_reference_table r
ORDER BY 1 LIMIT 3;
-- recurring from clause with correlated sublink with distributed table in sublink in target list
SELECT (SELECT (SELECT user_id FROM users_table WHERE user_id = users_reference_table.user_id GROUP BY user_id)
FROM users_reference_table WHERE user_id < 2 GROUP BY user_id)
FROM users_reference_table r
ORDER BY 1 LIMIT 3;
-- recurring from clause with correlated sublink with a recurring from clause and a distributed table in sublink
SELECT (SELECT DISTINCT user_id FROM users_reference_table WHERE user_id IN (SELECT user_id FROM users_reference_table) AND value_2 = r.value_2 AND user_id < 2)
FROM users_reference_table r
ORDER BY 1 LIMIT 3;
-- correlated subquery with recursively planned subquery in FROM (outer sublink)
SELECT (SELECT DISTINCT user_id FROM users_table WHERE user_id = (SELECT max(user_id) FROM users_table WHERE user_id = r.user_id))
FROM (SELECT user_id FROM users_table ORDER BY 1 LIMIT 3) r;
-- correlated subquery with recursively planned subquery in FROM (inner sublink)
SELECT (SELECT (SELECT max(user_id) FROM users_table) FROM users_table WHERE user_id = r.user_id)
FROM (SELECT user_id FROM users_table ORDER BY 1 LIMIT 3) r;
-- not meaningful SELECT FOR UPDATE query that should fail
SELECT count(*) FROM (SELECT
(SELECT user_id FROM users_table WHERE user_id = u1.user_id FOR UPDATE)