From 814f0e3acc3e50ec2e47be90f2df3ac0cf1f3e90 Mon Sep 17 00:00:00 2001 From: Onder Kalaci Date: Fri, 13 Apr 2018 15:15:03 +0300 Subject: [PATCH] Ensure Citus never try to access a not planned subquery PostgreSQL might remove some of the subqueries when they do not contribute to the query result at all. Citus should not try to access such subqueries during planning. --- .../relation_restriction_equivalence.c | 18 ++- .../regress/expected/multi_subquery_misc.out | 107 ++++++++++++++++++ src/test/regress/sql/multi_subquery_misc.sql | 85 ++++++++++++++ 3 files changed, 209 insertions(+), 1 deletion(-) diff --git a/src/backend/distributed/planner/relation_restriction_equivalence.c b/src/backend/distributed/planner/relation_restriction_equivalence.c index 631152b8e..23a7a3f47 100644 --- a/src/backend/distributed/planner/relation_restriction_equivalence.c +++ b/src/backend/distributed/planner/relation_restriction_equivalence.c @@ -1179,6 +1179,22 @@ AddRteSubqueryToAttributeEquivalenceClass(AttributeEquivalenceClass TargetEntry *subqueryTargetEntry = NULL; Query *targetSubquery = GetTargetSubquery(root, rangeTableEntry, varToBeAdded); + /* + * We might not always get the subquery because the subquery might be a + * referencing to RELOPT_DEADREL such that the corresponding join is + * removed via join_is_removable(). + * + * Returning here implies that PostgreSQL doesn't need to plan the + * subquery because it doesn't contribute to the query result at all. + * Since the relations in the subquery does not appear in the query + * plan as well, Citus would simply ignore the subquery and treat that + * as a safe-to-pushdown subquery. + */ + if (targetSubquery == NULL) + { + return; + } + subqueryTargetEntry = get_tle_by_resno(targetSubquery->targetList, varToBeAdded->varattno); @@ -1252,7 +1268,7 @@ GetTargetSubquery(PlannerInfo *root, RangeTblEntry *rangeTableEntry, Var *varToB { RelOptInfo *baseRelOptInfo = find_base_rel(root, varToBeAdded->varno); - /* If the targetSubquery hasn't been planned yet, we have to punt */ + /* If the targetSubquery was not planned, we have to punt */ if (baseRelOptInfo->subroot == NULL) { return NULL; diff --git a/src/test/regress/expected/multi_subquery_misc.out b/src/test/regress/expected/multi_subquery_misc.out index f867f3b97..bae9595dd 100644 --- a/src/test/regress/expected/multi_subquery_misc.out +++ b/src/test/regress/expected/multi_subquery_misc.out @@ -276,5 +276,112 @@ ERROR: could not create distributed plan DETAIL: Possibly this is caused by the use of parameters in SQL functions, which is not supported in Citus. HINT: Consider using PL/pgSQL functions instead. CONTEXT: SQL function "sql_subquery_test" statement 1 +-- the joins are actually removed since they are +-- not needed by PostgreSQL planner (e.g., target list +-- doesn't contain anything from there) +-- but Citus can still pushdown this query +SELECT + t1.user_id, count(*) +FROM users_table t1 +LEFT JOIN ( + SELECT + user_id + FROM + users_table + UNION + SELECT + user_id + FROM + events_table +) t2 ON t1.user_id = t2.user_id +INNER JOIN ( + SELECT + user_id + FROM + users_table +) t3 ON t1.user_id = t3.user_id +GROUP BY 1 +ORDER BY 2 DESC; + user_id | count +---------+------- + 5 | 676 + 4 | 529 + 2 | 324 + 3 | 289 + 6 | 100 + 1 | 49 +(6 rows) + +-- the joins are actually removed since they are +-- not needed by PostgreSQL planner (e.g., target list +-- doesn't contain anything from there) +-- but Citus can still plan this query even though the query +-- is not safe to pushdown +SELECT + t1.user_id, count(*) +FROM users_table t1 +LEFT JOIN ( + SELECT + user_id + FROM + users_table + UNION + SELECT + value_2 + FROM + events_table +) t2 ON t1.user_id = t2.user_id +INNER JOIN ( + SELECT + user_id + FROM + users_table +) t3 ON t1.user_id = t3.user_id +GROUP BY 1 +ORDER BY 2 DESC; + user_id | count +---------+------- + 5 | 676 + 4 | 529 + 2 | 324 + 3 | 289 + 6 | 100 + 1 | 49 +(6 rows) + +-- Similar to the above queries, but +-- this time the joins are not removed because +-- target list contains all the entries +SELECT + * +FROM users_table t1 +LEFT JOIN ( + SELECT + user_id + FROM + users_table + UNION + SELECT + user_id + FROM + events_table +) t2 ON t1.user_id = t2.user_id +INNER JOIN ( + SELECT + user_id + FROM + users_table +) t3 ON t1.user_id = t3.user_id +ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC, 5 DESC, 6 DESC, 7 DESC, 8 DESC +LIMIT 5; + user_id | time | value_1 | value_2 | value_3 | value_4 | user_id | user_id +---------+---------------------------------+---------+---------+---------+---------+---------+--------- + 6 | Thu Nov 23 14:43:18.024104 2017 | 3 | 2 | 5 | | 6 | 6 + 6 | Thu Nov 23 14:43:18.024104 2017 | 3 | 2 | 5 | | 6 | 6 + 6 | Thu Nov 23 14:43:18.024104 2017 | 3 | 2 | 5 | | 6 | 6 + 6 | Thu Nov 23 14:43:18.024104 2017 | 3 | 2 | 5 | | 6 | 6 + 6 | Thu Nov 23 14:43:18.024104 2017 | 3 | 2 | 5 | | 6 | 6 +(5 rows) + DROP FUNCTION plpgsql_subquery_test(int, int); DROP FUNCTION sql_subquery_test(int, int); diff --git a/src/test/regress/sql/multi_subquery_misc.sql b/src/test/regress/sql/multi_subquery_misc.sql index ca2662c44..c4b897227 100644 --- a/src/test/regress/sql/multi_subquery_misc.sql +++ b/src/test/regress/sql/multi_subquery_misc.sql @@ -151,5 +151,90 @@ $$ LANGUAGE SQL; -- should error out SELECT sql_subquery_test(1,1); + + +-- the joins are actually removed since they are +-- not needed by PostgreSQL planner (e.g., target list +-- doesn't contain anything from there) +-- but Citus can still pushdown this query +SELECT + t1.user_id, count(*) +FROM users_table t1 +LEFT JOIN ( + SELECT + user_id + FROM + users_table + UNION + SELECT + user_id + FROM + events_table +) t2 ON t1.user_id = t2.user_id +INNER JOIN ( + SELECT + user_id + FROM + users_table +) t3 ON t1.user_id = t3.user_id +GROUP BY 1 +ORDER BY 2 DESC; + + +-- the joins are actually removed since they are +-- not needed by PostgreSQL planner (e.g., target list +-- doesn't contain anything from there) +-- but Citus can still plan this query even though the query +-- is not safe to pushdown +SELECT + t1.user_id, count(*) +FROM users_table t1 +LEFT JOIN ( + SELECT + user_id + FROM + users_table + UNION + SELECT + value_2 + FROM + events_table +) t2 ON t1.user_id = t2.user_id +INNER JOIN ( + SELECT + user_id + FROM + users_table +) t3 ON t1.user_id = t3.user_id +GROUP BY 1 +ORDER BY 2 DESC; + + +-- Similar to the above queries, but +-- this time the joins are not removed because +-- target list contains all the entries +SELECT + * +FROM users_table t1 +LEFT JOIN ( + SELECT + user_id + FROM + users_table + UNION + SELECT + user_id + FROM + events_table +) t2 ON t1.user_id = t2.user_id +INNER JOIN ( + SELECT + user_id + FROM + users_table +) t3 ON t1.user_id = t3.user_id +ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC, 5 DESC, 6 DESC, 7 DESC, 8 DESC +LIMIT 5; + DROP FUNCTION plpgsql_subquery_test(int, int); DROP FUNCTION sql_subquery_test(int, int);