From b5ced403d86f1647529616f3108608fdb2362095 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philip=20Dub=C3=A9?= Date: Fri, 31 May 2019 12:08:26 -0700 Subject: [PATCH] Also check rewrittenQuery jointree for outer join --- .../planner/query_pushdown_planning.c | 13 +++++++++++++ src/test/regress/expected/multi_subquery.out | 19 +++++++++++++++++++ .../expected/non_colocated_subquery_joins.out | 2 +- src/test/regress/sql/multi_subquery.sql | 11 +++++++++++ 4 files changed, 44 insertions(+), 1 deletion(-) diff --git a/src/backend/distributed/planner/query_pushdown_planning.c b/src/backend/distributed/planner/query_pushdown_planning.c index a0cc7ace2..5aef6fde5 100644 --- a/src/backend/distributed/planner/query_pushdown_planning.c +++ b/src/backend/distributed/planner/query_pushdown_planning.c @@ -142,6 +142,19 @@ ShouldUseSubqueryPushDown(Query *originalQuery, Query *rewrittenQuery) return true; } + /* + * Original query may not have an outer join while rewritten query does. + * We should push down in this case. + * An example of this is https://github.com/citusdata/citus/issues/2739 + * where postgres pulls-up the outer-join in the subquery. + */ + if (FindNodeCheck((Node *) rewrittenQuery->jointree, IsOuterJoinExpr)) + { + /* Assert what _should_ be only situation this occurs in. */ + Assert(JoinTreeContainsSubquery(originalQuery)); + return true; + } + /* * Some unsupported join clauses in logical planner * may be supported by subquery pushdown planner. diff --git a/src/test/regress/expected/multi_subquery.out b/src/test/regress/expected/multi_subquery.out index e93cf0623..92ca1f226 100644 --- a/src/test/regress/expected/multi_subquery.out +++ b/src/test/regress/expected/multi_subquery.out @@ -462,6 +462,25 @@ SELECT DISTINCT ON (t1.user_id) t1.user_id, t2.value_1, t2.value_2, t2.value_3 ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC LIMIT 5; ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +-- outer joins as subqueries should work +-- https://github.com/citusdata/citus/issues/2739 +SELECT user_id, value_1, event_type +FROM ( + SELECT a.user_id, a.value_1, b.event_type + FROM users_table a + LEFT JOIN events_table b ON a.user_id = b.user_id +) lo +ORDER BY 1, 2, 3 +LIMIT 5; + user_id | value_1 | event_type +---------+---------+------------ + 1 | 1 | 0 + 1 | 1 | 0 + 1 | 1 | 1 + 1 | 1 | 1 + 1 | 1 | 2 +(5 rows) + -- inner joins on reference tables with functions works SELECT DISTINCT ON (t1.user_id) t1.user_id, t2.value_1, t2.value_2, t2.value_3 FROM events_table t1 diff --git a/src/test/regress/expected/non_colocated_subquery_joins.out b/src/test/regress/expected/non_colocated_subquery_joins.out index dbac68871..7e2e0dd6e 100644 --- a/src/test/regress/expected/non_colocated_subquery_joins.out +++ b/src/test/regress/expected/non_colocated_subquery_joins.out @@ -957,7 +957,7 @@ $$); DEBUG: skipping recursive planning for the subquery since it contains references to outer queries DEBUG: skipping recursive planning for the subquery since it contains references to outer queries DEBUG: skipping recursive planning for the subquery since it contains references to outer queries -ERROR: cannot perform distributed planning on this query +ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator -- similar to the above, make sure that we skip recursive plannig when -- the subquery contains only intermediate results SELECT * diff --git a/src/test/regress/sql/multi_subquery.sql b/src/test/regress/sql/multi_subquery.sql index 2665a1141..d65453486 100644 --- a/src/test/regress/sql/multi_subquery.sql +++ b/src/test/regress/sql/multi_subquery.sql @@ -314,6 +314,17 @@ SELECT DISTINCT ON (t1.user_id) t1.user_id, t2.value_1, t2.value_2, t2.value_3 ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC LIMIT 5; +-- outer joins as subqueries should work +-- https://github.com/citusdata/citus/issues/2739 +SELECT user_id, value_1, event_type +FROM ( + SELECT a.user_id, a.value_1, b.event_type + FROM users_table a + LEFT JOIN events_table b ON a.user_id = b.user_id +) lo +ORDER BY 1, 2, 3 +LIMIT 5; + -- inner joins on reference tables with functions works SELECT DISTINCT ON (t1.user_id) t1.user_id, t2.value_1, t2.value_2, t2.value_3 FROM events_table t1