diff --git a/src/backend/distributed/planner/relation_restriction_equivalence.c b/src/backend/distributed/planner/relation_restriction_equivalence.c index 631152b8e..23a7a3f47 100644 --- a/src/backend/distributed/planner/relation_restriction_equivalence.c +++ b/src/backend/distributed/planner/relation_restriction_equivalence.c @@ -1179,6 +1179,22 @@ AddRteSubqueryToAttributeEquivalenceClass(AttributeEquivalenceClass TargetEntry *subqueryTargetEntry = NULL; Query *targetSubquery = GetTargetSubquery(root, rangeTableEntry, varToBeAdded); + /* + * We might not always get the subquery because the subquery might be a + * referencing to RELOPT_DEADREL such that the corresponding join is + * removed via join_is_removable(). + * + * Returning here implies that PostgreSQL doesn't need to plan the + * subquery because it doesn't contribute to the query result at all. + * Since the relations in the subquery does not appear in the query + * plan as well, Citus would simply ignore the subquery and treat that + * as a safe-to-pushdown subquery. + */ + if (targetSubquery == NULL) + { + return; + } + subqueryTargetEntry = get_tle_by_resno(targetSubquery->targetList, varToBeAdded->varattno); @@ -1252,7 +1268,7 @@ GetTargetSubquery(PlannerInfo *root, RangeTblEntry *rangeTableEntry, Var *varToB { RelOptInfo *baseRelOptInfo = find_base_rel(root, varToBeAdded->varno); - /* If the targetSubquery hasn't been planned yet, we have to punt */ + /* If the targetSubquery was not planned, we have to punt */ if (baseRelOptInfo->subroot == NULL) { return NULL; diff --git a/src/test/regress/expected/multi_subquery_misc.out b/src/test/regress/expected/multi_subquery_misc.out index f867f3b97..bae9595dd 100644 --- a/src/test/regress/expected/multi_subquery_misc.out +++ b/src/test/regress/expected/multi_subquery_misc.out @@ -276,5 +276,112 @@ ERROR: could not create distributed plan DETAIL: Possibly this is caused by the use of parameters in SQL functions, which is not supported in Citus. HINT: Consider using PL/pgSQL functions instead. CONTEXT: SQL function "sql_subquery_test" statement 1 +-- the joins are actually removed since they are +-- not needed by PostgreSQL planner (e.g., target list +-- doesn't contain anything from there) +-- but Citus can still pushdown this query +SELECT + t1.user_id, count(*) +FROM users_table t1 +LEFT JOIN ( + SELECT + user_id + FROM + users_table + UNION + SELECT + user_id + FROM + events_table +) t2 ON t1.user_id = t2.user_id +INNER JOIN ( + SELECT + user_id + FROM + users_table +) t3 ON t1.user_id = t3.user_id +GROUP BY 1 +ORDER BY 2 DESC; + user_id | count +---------+------- + 5 | 676 + 4 | 529 + 2 | 324 + 3 | 289 + 6 | 100 + 1 | 49 +(6 rows) + +-- the joins are actually removed since they are +-- not needed by PostgreSQL planner (e.g., target list +-- doesn't contain anything from there) +-- but Citus can still plan this query even though the query +-- is not safe to pushdown +SELECT + t1.user_id, count(*) +FROM users_table t1 +LEFT JOIN ( + SELECT + user_id + FROM + users_table + UNION + SELECT + value_2 + FROM + events_table +) t2 ON t1.user_id = t2.user_id +INNER JOIN ( + SELECT + user_id + FROM + users_table +) t3 ON t1.user_id = t3.user_id +GROUP BY 1 +ORDER BY 2 DESC; + user_id | count +---------+------- + 5 | 676 + 4 | 529 + 2 | 324 + 3 | 289 + 6 | 100 + 1 | 49 +(6 rows) + +-- Similar to the above queries, but +-- this time the joins are not removed because +-- target list contains all the entries +SELECT + * +FROM users_table t1 +LEFT JOIN ( + SELECT + user_id + FROM + users_table + UNION + SELECT + user_id + FROM + events_table +) t2 ON t1.user_id = t2.user_id +INNER JOIN ( + SELECT + user_id + FROM + users_table +) t3 ON t1.user_id = t3.user_id +ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC, 5 DESC, 6 DESC, 7 DESC, 8 DESC +LIMIT 5; + user_id | time | value_1 | value_2 | value_3 | value_4 | user_id | user_id +---------+---------------------------------+---------+---------+---------+---------+---------+--------- + 6 | Thu Nov 23 14:43:18.024104 2017 | 3 | 2 | 5 | | 6 | 6 + 6 | Thu Nov 23 14:43:18.024104 2017 | 3 | 2 | 5 | | 6 | 6 + 6 | Thu Nov 23 14:43:18.024104 2017 | 3 | 2 | 5 | | 6 | 6 + 6 | Thu Nov 23 14:43:18.024104 2017 | 3 | 2 | 5 | | 6 | 6 + 6 | Thu Nov 23 14:43:18.024104 2017 | 3 | 2 | 5 | | 6 | 6 +(5 rows) + DROP FUNCTION plpgsql_subquery_test(int, int); DROP FUNCTION sql_subquery_test(int, int); diff --git a/src/test/regress/sql/multi_subquery_misc.sql b/src/test/regress/sql/multi_subquery_misc.sql index ca2662c44..c4b897227 100644 --- a/src/test/regress/sql/multi_subquery_misc.sql +++ b/src/test/regress/sql/multi_subquery_misc.sql @@ -151,5 +151,90 @@ $$ LANGUAGE SQL; -- should error out SELECT sql_subquery_test(1,1); + + +-- the joins are actually removed since they are +-- not needed by PostgreSQL planner (e.g., target list +-- doesn't contain anything from there) +-- but Citus can still pushdown this query +SELECT + t1.user_id, count(*) +FROM users_table t1 +LEFT JOIN ( + SELECT + user_id + FROM + users_table + UNION + SELECT + user_id + FROM + events_table +) t2 ON t1.user_id = t2.user_id +INNER JOIN ( + SELECT + user_id + FROM + users_table +) t3 ON t1.user_id = t3.user_id +GROUP BY 1 +ORDER BY 2 DESC; + + +-- the joins are actually removed since they are +-- not needed by PostgreSQL planner (e.g., target list +-- doesn't contain anything from there) +-- but Citus can still plan this query even though the query +-- is not safe to pushdown +SELECT + t1.user_id, count(*) +FROM users_table t1 +LEFT JOIN ( + SELECT + user_id + FROM + users_table + UNION + SELECT + value_2 + FROM + events_table +) t2 ON t1.user_id = t2.user_id +INNER JOIN ( + SELECT + user_id + FROM + users_table +) t3 ON t1.user_id = t3.user_id +GROUP BY 1 +ORDER BY 2 DESC; + + +-- Similar to the above queries, but +-- this time the joins are not removed because +-- target list contains all the entries +SELECT + * +FROM users_table t1 +LEFT JOIN ( + SELECT + user_id + FROM + users_table + UNION + SELECT + user_id + FROM + events_table +) t2 ON t1.user_id = t2.user_id +INNER JOIN ( + SELECT + user_id + FROM + users_table +) t3 ON t1.user_id = t3.user_id +ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC, 5 DESC, 6 DESC, 7 DESC, 8 DESC +LIMIT 5; + DROP FUNCTION plpgsql_subquery_test(int, int); DROP FUNCTION sql_subquery_test(int, int);