From cbf0c5c020473b4c994a98fce0046b3a98b66c7f Mon Sep 17 00:00:00 2001 From: aykutbozkurt Date: Fri, 3 Feb 2023 17:14:19 +0300 Subject: [PATCH] join order planner does not depend on original tree so we should not check whether original tree contains any outer join or not for query pushdown --- .../planner/query_pushdown_planning.c | 9 ----- .../regress/expected/multi_hash_pruning.out | 10 ++--- .../regress/expected/multi_outer_join.out | 2 + .../expected/multi_outer_join_reference.out | 2 + .../expected/non_colocated_outer_joins.out | 37 ++++++++++++++++--- 5 files changed, 40 insertions(+), 20 deletions(-) diff --git a/src/backend/distributed/planner/query_pushdown_planning.c b/src/backend/distributed/planner/query_pushdown_planning.c index 5cae19497..c3451b8f7 100644 --- a/src/backend/distributed/planner/query_pushdown_planning.c +++ b/src/backend/distributed/planner/query_pushdown_planning.c @@ -184,15 +184,6 @@ ShouldUseSubqueryPushDown(Query *originalQuery, Query *rewrittenQuery, return true; } - /* - * We handle outer joins as subqueries, since the join order planner - * does not know how to handle them. - */ - if (FindNodeMatchingCheckFunction((Node *) originalQuery->jointree, IsOuterJoinExpr)) - { - return true; - } - /* * Original query may not have an outer join while rewritten query does. * We should push down in this case. diff --git a/src/test/regress/expected/multi_hash_pruning.out b/src/test/regress/expected/multi_hash_pruning.out index 0a113c5f8..a69528129 100644 --- a/src/test/regress/expected/multi_hash_pruning.out +++ b/src/test/regress/expected/multi_hash_pruning.out @@ -1242,19 +1242,19 @@ WHERE o_orderkey IN (1, 2) --------------------------------------------------------------------- Aggregate -> Custom Scan (Citus Adaptive) - Task Count: 3 - Tasks Shown: One of 3 + Task Count: 1 + Tasks Shown: All -> Task Node: host=localhost port=xxxxx dbname=regression -> Aggregate -> Nested Loop Join Filter: (orders_hash_partitioned.o_orderkey = lineitem_hash_partitioned.l_orderkey) - -> Seq Scan on orders_hash_partitioned_630000 orders_hash_partitioned + -> Seq Scan on orders_hash_partitioned_630003 orders_hash_partitioned Filter: (o_orderkey = ANY ('{1,2}'::integer[])) -> Materialize - -> Bitmap Heap Scan on lineitem_hash_partitioned_630004 lineitem_hash_partitioned + -> Bitmap Heap Scan on lineitem_hash_partitioned_630007 lineitem_hash_partitioned Recheck Cond: (l_orderkey = ANY ('{2,3}'::integer[])) - -> Bitmap Index Scan on lineitem_hash_partitioned_pkey_630004 + -> Bitmap Index Scan on lineitem_hash_partitioned_pkey_630007 Index Cond: (l_orderkey = ANY ('{2,3}'::integer[])) (16 rows) diff --git a/src/test/regress/expected/multi_outer_join.out b/src/test/regress/expected/multi_outer_join.out index 5e56c143b..67040a159 100644 --- a/src/test/regress/expected/multi_outer_join.out +++ b/src/test/regress/expected/multi_outer_join.out @@ -174,6 +174,7 @@ FROM multi_outer_join_left a LEFT JOIN multi_outer_join_right_reference b ON (l_custkey = r_custkey) WHERE r_custkey = 5 or r_custkey > 15; +LOG: join order: [ "multi_outer_join_left" ][ reference join "multi_outer_join_right_reference" ] min | max --------------------------------------------------------------------- 5 | 5 @@ -314,6 +315,7 @@ FROM multi_outer_join_left a LEFT JOIN multi_outer_join_right b ON (l_custkey = r_custkey) WHERE r_custkey = 21 or r_custkey < 10; +LOG: join order: [ "multi_outer_join_left" ][ local partition join "multi_outer_join_right" ] min | max --------------------------------------------------------------------- 21 | 21 diff --git a/src/test/regress/expected/multi_outer_join_reference.out b/src/test/regress/expected/multi_outer_join_reference.out index 5c05820eb..8000cc4d4 100644 --- a/src/test/regress/expected/multi_outer_join_reference.out +++ b/src/test/regress/expected/multi_outer_join_reference.out @@ -170,6 +170,7 @@ FROM multi_outer_join_left_hash a LEFT JOIN multi_outer_join_right_reference b ON (l_custkey = r_custkey) WHERE r_custkey = 5 or r_custkey > 15; +LOG: join order: [ "multi_outer_join_left_hash" ][ reference join "multi_outer_join_right_reference" ] min | max --------------------------------------------------------------------- 5 | 5 @@ -312,6 +313,7 @@ FROM multi_outer_join_left_hash a LEFT JOIN multi_outer_join_right_reference b ON (l_custkey = r_custkey) WHERE r_custkey = 21 or r_custkey < 10; +LOG: join order: [ "multi_outer_join_left_hash" ][ reference join "multi_outer_join_right_reference" ] min | max --------------------------------------------------------------------- 21 | 21 diff --git a/src/test/regress/expected/non_colocated_outer_joins.out b/src/test/regress/expected/non_colocated_outer_joins.out index c5bd6b7c3..30dbda8dd 100644 --- a/src/test/regress/expected/non_colocated_outer_joins.out +++ b/src/test/regress/expected/non_colocated_outer_joins.out @@ -266,7 +266,11 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT t1.col1, t1.c -- join order planner can handle queries with multi joins consisting of outer joins with simple join clause SELECT t1.*, t2.*, t3.* FROM test_hash1 t1 RIGHT JOIN test_hash2 t2 ON (t1.col1 = t2.col1) INNER JOIN test_hash3 t3 ON (t3.col1 = t1.col1) ORDER BY 1,2,3,4,5,6; -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +LOG: join order: [ "test_hash1" ][ local partition join "test_hash3" ][ dual partition join "test_hash2" ] + col1 | col2 | col1 | col2 | col1 | col2 +--------------------------------------------------------------------- +(0 rows) + SELECT t1.*, t2.*, t3.* FROM test_hash1 t1 RIGHT JOIN test_hash2 t2 ON (t1.col1 = t2.col1) INNER JOIN test_hash3 t3 ON (t3.col1 = t2.col1) ORDER BY 1,2,3,4,5,6; DEBUG: recursively planning noncolocated relation DEBUG: Wrapping relation "test_hash2" "t2" to a subquery @@ -292,7 +296,11 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT t1.col1, t1.c (5 rows) SELECT t1.*, t2.*, t3.* FROM test_hash1 t1 RIGHT JOIN test_hash2 t2 ON (t1.col2 = t2.col2) INNER JOIN test_hash3 t3 ON (t3.col2 = t1.col2) ORDER BY 1,2,3,4,5,6; -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +LOG: join order: [ "test_hash1" ][ dual partition join "test_hash2" ][ dual partition join "test_hash3" ] + col1 | col2 | col1 | col2 | col1 | col2 +--------------------------------------------------------------------- +(0 rows) + SELECT t1.*, t2.*, t3.* FROM test_hash1 t1 RIGHT JOIN test_hash2 t2 ON (t1.col2 = t2.col2) INNER JOIN test_hash3 t3 ON (t3.col2 = t2.col2) ORDER BY 1,2,3,4,5,6; DEBUG: recursively planning noncolocated relation DEBUG: Wrapping relation "test_hash2" "t2" to a subquery @@ -318,7 +326,11 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT t1.col1, t1.c (5 rows) SELECT t1.*, t2.*, t3.* FROM test_hash1 t1 RIGHT JOIN test_hash2 t2 ON (t1.col2 = t2.col1) INNER JOIN test_hash3 t3 ON (t3.col2 = t1.col1) ORDER BY 1,2,3,4,5,6; -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +LOG: join order: [ "test_hash1" ][ dual partition join "test_hash2" ][ dual partition join "test_hash3" ] + col1 | col2 | col1 | col2 | col1 | col2 +--------------------------------------------------------------------- +(0 rows) + SELECT t1.*, t2.*, t3.* FROM test_hash1 t1 RIGHT JOIN test_hash2 t2 ON (t1.col2 = t2.col1) INNER JOIN test_hash3 t3 ON (t3.col2 = t2.col1) ORDER BY 1,2,3,4,5,6; DEBUG: recursively planning noncolocated relation DEBUG: Wrapping relation "test_hash2" "t2" to a subquery @@ -344,7 +356,11 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT t1.col1, t1.c (5 rows) SELECT t1.*, t2.*, t3.* FROM test_hash2 t2 LEFT JOIN test_hash1 t1 ON (t1.col1 = t2.col1) INNER JOIN test_hash3 t3 ON (t3.col1 = t1.col1) ORDER BY 1,2,3,4,5,6; -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +LOG: join order: [ "test_hash1" ][ local partition join "test_hash3" ][ dual partition join "test_hash2" ] + col1 | col2 | col1 | col2 | col1 | col2 +--------------------------------------------------------------------- +(0 rows) + SELECT t1.*, t2.*, t3.* FROM test_hash2 t2 LEFT JOIN test_hash1 t1 ON (t1.col1 = t2.col1) INNER JOIN test_hash3 t3 ON (t3.col1 = t2.col1) ORDER BY 1,2,3,4,5,6; DEBUG: recursively planning noncolocated relation DEBUG: Wrapping relation "test_hash1" "t1" to a subquery @@ -365,7 +381,11 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT t1.col1, t1.c (5 rows) SELECT t1.*, t2.*, t3.* FROM test_hash2 t2 LEFT JOIN test_hash1 t1 ON (t1.col2 = t2.col2) INNER JOIN test_hash3 t3 ON (t3.col2 = t1.col2) ORDER BY 1,2,3,4,5,6; -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +LOG: join order: [ "test_hash2" ][ dual partition join "test_hash1" ][ dual partition join "test_hash3" ] + col1 | col2 | col1 | col2 | col1 | col2 +--------------------------------------------------------------------- +(0 rows) + SELECT t1.*, t2.*, t3.* FROM test_hash2 t2 LEFT JOIN test_hash1 t1 ON (t1.col2 = t2.col2) INNER JOIN test_hash3 t3 ON (t3.col2 = t2.col2) ORDER BY 1,2,3,4,5,6; DEBUG: recursively planning noncolocated relation DEBUG: Wrapping relation "test_hash1" "t1" to a subquery @@ -386,7 +406,11 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT t1.col1, t1.c (5 rows) SELECT t1.*, t2.*, t3.* FROM test_hash2 t2 LEFT JOIN test_hash1 t1 ON (t1.col2 = t2.col1) INNER JOIN test_hash3 t3 ON (t3.col2 = t1.col1) ORDER BY 1,2,3,4,5,6; -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +LOG: join order: [ "test_hash2" ][ dual partition join "test_hash1" ][ dual partition join "test_hash3" ] + col1 | col2 | col1 | col2 | col1 | col2 +--------------------------------------------------------------------- +(0 rows) + SELECT t1.*, t2.*, t3.* FROM test_hash2 t2 LEFT JOIN test_hash1 t1 ON (t1.col2 = t2.col1) INNER JOIN test_hash3 t3 ON (t3.col2 = t2.col1) ORDER BY 1,2,3,4,5,6; DEBUG: recursively planning noncolocated relation DEBUG: Wrapping relation "test_hash1" "t1" to a subquery @@ -1778,6 +1802,7 @@ SELECT * FROM dist1 LEFT JOIN dist2 ON (dist1.x = dist2.x AND dist2.x >2) ORDER -- single join condition and dist2.x >2 is regular filter and applied after join SELECT * FROM dist1 LEFT JOIN dist2 ON (dist1.x = dist2.x) WHERE dist2.x >2 ORDER BY 1,2,3,4; +LOG: join order: [ "dist1" ][ local partition join "dist2" ] x | y | x | y --------------------------------------------------------------------- (0 rows)