Ensure Citus never try to access a not planned subquery

PostgreSQL might remove some of the subqueries when they do not
contribute to the query result at all. Citus should not try to
access such subqueries during planning.
pull/2105/head
Onder Kalaci 2018-04-13 15:15:03 +03:00
parent b0b130f064
commit 814f0e3acc
3 changed files with 209 additions and 1 deletions

View File

@ -1179,6 +1179,22 @@ AddRteSubqueryToAttributeEquivalenceClass(AttributeEquivalenceClass
TargetEntry *subqueryTargetEntry = NULL; TargetEntry *subqueryTargetEntry = NULL;
Query *targetSubquery = GetTargetSubquery(root, rangeTableEntry, varToBeAdded); Query *targetSubquery = GetTargetSubquery(root, rangeTableEntry, varToBeAdded);
/*
* We might not always get the subquery because the subquery might be a
* referencing to RELOPT_DEADREL such that the corresponding join is
* removed via join_is_removable().
*
* Returning here implies that PostgreSQL doesn't need to plan the
* subquery because it doesn't contribute to the query result at all.
* Since the relations in the subquery does not appear in the query
* plan as well, Citus would simply ignore the subquery and treat that
* as a safe-to-pushdown subquery.
*/
if (targetSubquery == NULL)
{
return;
}
subqueryTargetEntry = get_tle_by_resno(targetSubquery->targetList, subqueryTargetEntry = get_tle_by_resno(targetSubquery->targetList,
varToBeAdded->varattno); varToBeAdded->varattno);
@ -1252,7 +1268,7 @@ GetTargetSubquery(PlannerInfo *root, RangeTblEntry *rangeTableEntry, Var *varToB
{ {
RelOptInfo *baseRelOptInfo = find_base_rel(root, varToBeAdded->varno); RelOptInfo *baseRelOptInfo = find_base_rel(root, varToBeAdded->varno);
/* If the targetSubquery hasn't been planned yet, we have to punt */ /* If the targetSubquery was not planned, we have to punt */
if (baseRelOptInfo->subroot == NULL) if (baseRelOptInfo->subroot == NULL)
{ {
return NULL; return NULL;

View File

@ -276,5 +276,112 @@ ERROR: could not create distributed plan
DETAIL: Possibly this is caused by the use of parameters in SQL functions, which is not supported in Citus. DETAIL: Possibly this is caused by the use of parameters in SQL functions, which is not supported in Citus.
HINT: Consider using PL/pgSQL functions instead. HINT: Consider using PL/pgSQL functions instead.
CONTEXT: SQL function "sql_subquery_test" statement 1 CONTEXT: SQL function "sql_subquery_test" statement 1
-- the joins are actually removed since they are
-- not needed by PostgreSQL planner (e.g., target list
-- doesn't contain anything from there)
-- but Citus can still pushdown this query
SELECT
t1.user_id, count(*)
FROM users_table t1
LEFT JOIN (
SELECT
user_id
FROM
users_table
UNION
SELECT
user_id
FROM
events_table
) t2 ON t1.user_id = t2.user_id
INNER JOIN (
SELECT
user_id
FROM
users_table
) t3 ON t1.user_id = t3.user_id
GROUP BY 1
ORDER BY 2 DESC;
user_id | count
---------+-------
5 | 676
4 | 529
2 | 324
3 | 289
6 | 100
1 | 49
(6 rows)
-- the joins are actually removed since they are
-- not needed by PostgreSQL planner (e.g., target list
-- doesn't contain anything from there)
-- but Citus can still plan this query even though the query
-- is not safe to pushdown
SELECT
t1.user_id, count(*)
FROM users_table t1
LEFT JOIN (
SELECT
user_id
FROM
users_table
UNION
SELECT
value_2
FROM
events_table
) t2 ON t1.user_id = t2.user_id
INNER JOIN (
SELECT
user_id
FROM
users_table
) t3 ON t1.user_id = t3.user_id
GROUP BY 1
ORDER BY 2 DESC;
user_id | count
---------+-------
5 | 676
4 | 529
2 | 324
3 | 289
6 | 100
1 | 49
(6 rows)
-- Similar to the above queries, but
-- this time the joins are not removed because
-- target list contains all the entries
SELECT
*
FROM users_table t1
LEFT JOIN (
SELECT
user_id
FROM
users_table
UNION
SELECT
user_id
FROM
events_table
) t2 ON t1.user_id = t2.user_id
INNER JOIN (
SELECT
user_id
FROM
users_table
) t3 ON t1.user_id = t3.user_id
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC, 5 DESC, 6 DESC, 7 DESC, 8 DESC
LIMIT 5;
user_id | time | value_1 | value_2 | value_3 | value_4 | user_id | user_id
---------+---------------------------------+---------+---------+---------+---------+---------+---------
6 | Thu Nov 23 14:43:18.024104 2017 | 3 | 2 | 5 | | 6 | 6
6 | Thu Nov 23 14:43:18.024104 2017 | 3 | 2 | 5 | | 6 | 6
6 | Thu Nov 23 14:43:18.024104 2017 | 3 | 2 | 5 | | 6 | 6
6 | Thu Nov 23 14:43:18.024104 2017 | 3 | 2 | 5 | | 6 | 6
6 | Thu Nov 23 14:43:18.024104 2017 | 3 | 2 | 5 | | 6 | 6
(5 rows)
DROP FUNCTION plpgsql_subquery_test(int, int); DROP FUNCTION plpgsql_subquery_test(int, int);
DROP FUNCTION sql_subquery_test(int, int); DROP FUNCTION sql_subquery_test(int, int);

View File

@ -151,5 +151,90 @@ $$ LANGUAGE SQL;
-- should error out -- should error out
SELECT sql_subquery_test(1,1); SELECT sql_subquery_test(1,1);
-- the joins are actually removed since they are
-- not needed by PostgreSQL planner (e.g., target list
-- doesn't contain anything from there)
-- but Citus can still pushdown this query
SELECT
t1.user_id, count(*)
FROM users_table t1
LEFT JOIN (
SELECT
user_id
FROM
users_table
UNION
SELECT
user_id
FROM
events_table
) t2 ON t1.user_id = t2.user_id
INNER JOIN (
SELECT
user_id
FROM
users_table
) t3 ON t1.user_id = t3.user_id
GROUP BY 1
ORDER BY 2 DESC;
-- the joins are actually removed since they are
-- not needed by PostgreSQL planner (e.g., target list
-- doesn't contain anything from there)
-- but Citus can still plan this query even though the query
-- is not safe to pushdown
SELECT
t1.user_id, count(*)
FROM users_table t1
LEFT JOIN (
SELECT
user_id
FROM
users_table
UNION
SELECT
value_2
FROM
events_table
) t2 ON t1.user_id = t2.user_id
INNER JOIN (
SELECT
user_id
FROM
users_table
) t3 ON t1.user_id = t3.user_id
GROUP BY 1
ORDER BY 2 DESC;
-- Similar to the above queries, but
-- this time the joins are not removed because
-- target list contains all the entries
SELECT
*
FROM users_table t1
LEFT JOIN (
SELECT
user_id
FROM
users_table
UNION
SELECT
user_id
FROM
events_table
) t2 ON t1.user_id = t2.user_id
INNER JOIN (
SELECT
user_id
FROM
users_table
) t3 ON t1.user_id = t3.user_id
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC, 5 DESC, 6 DESC, 7 DESC, 8 DESC
LIMIT 5;
DROP FUNCTION plpgsql_subquery_test(int, int); DROP FUNCTION plpgsql_subquery_test(int, int);
DROP FUNCTION sql_subquery_test(int, int); DROP FUNCTION sql_subquery_test(int, int);