From 427b2054ee754241a3036d12986c2fbabdafd0a4 Mon Sep 17 00:00:00 2001 From: aykutbozkurt Date: Tue, 20 Dec 2022 14:26:07 +0300 Subject: [PATCH] use pushdown planner if restriction keys are equivalent --- .../planner/query_pushdown_planning.c | 28 +++++++++++++++---- src/test/regress/expected/cross_join.out | 21 ++++++++++---- src/test/regress/expected/cte_inline.out | 12 -------- src/test/regress/expected/join_pushdown.out | 3 +- .../regress/expected/multi_outer_join.out | 6 +++- .../expected/multi_outer_join_reference.out | 6 +++- .../multi_subquery_behavioral_analytics.out | 6 ++-- ...lti_subquery_in_where_reference_clause.out | 8 ++---- src/test/regress/expected/multi_view.out | 4 +-- .../regress/expected/recurring_outer_join.out | 7 ++--- .../regress/expected/subquery_in_where.out | 2 +- src/test/regress/sql/recurring_outer_join.sql | 4 +-- 12 files changed, 61 insertions(+), 46 deletions(-) diff --git a/src/backend/distributed/planner/query_pushdown_planning.c b/src/backend/distributed/planner/query_pushdown_planning.c index 5caeb49c2..6c7f4ebc4 100644 --- a/src/backend/distributed/planner/query_pushdown_planning.c +++ b/src/backend/distributed/planner/query_pushdown_planning.c @@ -183,12 +183,6 @@ ShouldUseSubqueryPushDown(Query *originalQuery, Query *rewrittenQuery, return true; } - /* if there is right recursive join, fix join order can not handle it */ - if (HasRightRecursiveJoin(rewrittenQuery->jointree)) - { - return true; - } - /* * Some unsupported join clauses in logical planner * may be supported by subquery pushdown planner. @@ -199,6 +193,28 @@ ShouldUseSubqueryPushDown(Query *originalQuery, Query *rewrittenQuery, return true; } + /* + * some unsupported outer joins in logical planner + * may be supported by pushdown planner. + */ + if (FindNodeMatchingCheckFunction((Node *) rewrittenQuery->jointree, IsOuterJoinExpr)) + { + /* we can pushdown outer joins if all restrictions are on partition columns */ + if (RestrictionEquivalenceForPartitionKeys(plannerRestrictionContext)) + { + return true; + } + + /* + * join order planner only handles left recursive join trees (except inner joins, + * which are commutative) + */ + if (HasRightRecursiveJoin(rewrittenQuery->jointree)) + { + return true; + } + } + /* check if the query has a window function and it is safe to pushdown */ if (originalQuery->hasWindowFuncs && SafeToPushdownWindowFunction(originalQuery, NULL)) diff --git a/src/test/regress/expected/cross_join.out b/src/test/regress/expected/cross_join.out index c1101e482..9fc5b1684 100644 --- a/src/test/regress/expected/cross_join.out +++ b/src/test/regress/expected/cross_join.out @@ -4,8 +4,11 @@ -- a distributed table can be cross joined with a reference table -- and the CROSS JOIN can be in the outer part of an outer JOIN SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 LEFT JOIN users_table u ON (e2.user_id = u.user_id); -ERROR: cannot perform distributed planning on this query -DETAIL: Cartesian products are currently unsupported + count +--------------------------------------------------------------------- + 176649 +(1 row) + -- two distributed tables cannot be cross joined -- as it lacks distribution key equality SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 CROSS JOIN users_table u; @@ -30,11 +33,17 @@ SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table r (1 row) SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table; -ERROR: cannot perform distributed planning on this query -DETAIL: Cartesian products are currently unsupported + count +--------------------------------------------------------------------- + 606 +(1 row) + SELECT count(*) FROM users_ref_test_table ref1 RIGHT JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table; -ERROR: cannot perform distributed planning on this query -DETAIL: Cartesian products are currently unsupported + count +--------------------------------------------------------------------- + 606 +(1 row) + SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 CROSS JOIN users_table; count --------------------------------------------------------------------- diff --git a/src/test/regress/expected/cte_inline.out b/src/test/regress/expected/cte_inline.out index b479ad7ac..39d48e915 100644 --- a/src/test/regress/expected/cte_inline.out +++ b/src/test/regress/expected/cte_inline.out @@ -779,18 +779,6 @@ FROM cte LEFT JOIN test_table USING (key); DEBUG: CTE cte is going to be inlined via distributed planning DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1] -DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823] -DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647] -DEBUG: join prunable for intervals [-1073741824,-1] and [-2147483648,-1073741825] -DEBUG: join prunable for intervals [-1073741824,-1] and [0,1073741823] -DEBUG: join prunable for intervals [-1073741824,-1] and [1073741824,2147483647] -DEBUG: join prunable for intervals [0,1073741823] and [-2147483648,-1073741825] -DEBUG: join prunable for intervals [0,1073741823] and [-1073741824,-1] -DEBUG: join prunable for intervals [0,1073741823] and [1073741824,2147483647] -DEBUG: join prunable for intervals [1073741824,2147483647] and [-2147483648,-1073741825] -DEBUG: join prunable for intervals [1073741824,2147483647] and [-1073741824,-1] -DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823] count --------------------------------------------------------------------- 1021 diff --git a/src/test/regress/expected/join_pushdown.out b/src/test/regress/expected/join_pushdown.out index 3bb4f0981..c71478d30 100644 --- a/src/test/regress/expected/join_pushdown.out +++ b/src/test/regress/expected/join_pushdown.out @@ -151,7 +151,8 @@ ORDER BY 1; -- Full outer join with different distribution column types, should error out SELECT * FROM test_table_1 full join test_table_2 using(id); -ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator +ERROR: cannot push down this subquery +DETAIL: Shards of relations in subquery need to have 1-to-1 shard partitioning -- Test when the non-distributed column has the value of NULL INSERT INTO test_table_1 VALUES(7, NULL); INSERT INTO test_table_2 VALUES(7, NULL); diff --git a/src/test/regress/expected/multi_outer_join.out b/src/test/regress/expected/multi_outer_join.out index 17b13773d..c8fe61a95 100644 --- a/src/test/regress/expected/multi_outer_join.out +++ b/src/test/regress/expected/multi_outer_join.out @@ -174,6 +174,7 @@ FROM multi_outer_join_left a LEFT JOIN multi_outer_join_right_reference b ON (l_custkey = r_custkey) WHERE r_custkey = 5 or r_custkey > 15; +LOG: join order: [ "multi_outer_join_left" ][ reference join "multi_outer_join_right_reference" ] min | max --------------------------------------------------------------------- 5 | 5 @@ -276,7 +277,9 @@ SELECT count(*) FROM multi_outer_join_left a LEFT JOIN multi_outer_join_right b ON (l_nationkey = r_nationkey); -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +LOG: join order: [ "multi_outer_join_left" ][ dual partition join "multi_outer_join_right" ] +ERROR: the query contains a join that requires repartitioning +HINT: Set citus.enable_repartition_joins to on to enable repartitioning -- Anti-join should return customers for which there is no row in the right table SELECT min(l_custkey), max(l_custkey) @@ -309,6 +312,7 @@ FROM multi_outer_join_left a LEFT JOIN multi_outer_join_right b ON (l_custkey = r_custkey) WHERE r_custkey = 21 or r_custkey < 10; +LOG: join order: [ "multi_outer_join_left" ][ local partition join "multi_outer_join_right" ] min | max --------------------------------------------------------------------- 21 | 21 diff --git a/src/test/regress/expected/multi_outer_join_reference.out b/src/test/regress/expected/multi_outer_join_reference.out index aca91bda7..83e8d33f9 100644 --- a/src/test/regress/expected/multi_outer_join_reference.out +++ b/src/test/regress/expected/multi_outer_join_reference.out @@ -170,6 +170,7 @@ FROM multi_outer_join_left_hash a LEFT JOIN multi_outer_join_right_reference b ON (l_custkey = r_custkey) WHERE r_custkey = 5 or r_custkey > 15; +LOG: join order: [ "multi_outer_join_left_hash" ][ reference join "multi_outer_join_right_reference" ] min | max --------------------------------------------------------------------- 5 | 5 @@ -274,7 +275,9 @@ SELECT count(*) FROM multi_outer_join_left_hash a LEFT JOIN multi_outer_join_right_hash b ON (l_nationkey = r_nationkey); -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +LOG: join order: [ "multi_outer_join_left_hash" ][ dual partition join "multi_outer_join_right_hash" ] +ERROR: the query contains a join that requires repartitioning +HINT: Set citus.enable_repartition_joins to on to enable repartitioning -- Anti-join should return customers for which there is no row in the right table SELECT min(l_custkey), max(l_custkey) @@ -307,6 +310,7 @@ FROM multi_outer_join_left_hash a LEFT JOIN multi_outer_join_right_reference b ON (l_custkey = r_custkey) WHERE r_custkey = 21 or r_custkey < 10; +LOG: join order: [ "multi_outer_join_left_hash" ][ reference join "multi_outer_join_right_reference" ] min | max --------------------------------------------------------------------- 21 | 21 diff --git a/src/test/regress/expected/multi_subquery_behavioral_analytics.out b/src/test/regress/expected/multi_subquery_behavioral_analytics.out index 171dc731d..9284b7f92 100644 --- a/src/test/regress/expected/multi_subquery_behavioral_analytics.out +++ b/src/test/regress/expected/multi_subquery_behavioral_analytics.out @@ -561,9 +561,9 @@ ORDER BY 1 DESC, 2 DESC LIMIT 3; user_id | value_2 --------------------------------------------------------------------- - 6 | 4 - 6 | 4 - 6 | 4 + 5 | 5 + 5 | 5 + 5 | 2 (3 rows) --------------------------------------------------------------------- diff --git a/src/test/regress/expected/multi_subquery_in_where_reference_clause.out b/src/test/regress/expected/multi_subquery_in_where_reference_clause.out index 7bf59629d..52cbe3917 100644 --- a/src/test/regress/expected/multi_subquery_in_where_reference_clause.out +++ b/src/test/regress/expected/multi_subquery_in_where_reference_clause.out @@ -44,10 +44,7 @@ ORDER BY user_id LIMIT 3; user_id --------------------------------------------------------------------- - 1 - 2 - 3 -(3 rows) +(0 rows) -- subqueries in WHERE with NOT EXISTS operator, should not work since -- there is a correlated subquery in WHERE clause @@ -65,8 +62,7 @@ WHERE users_reference_table.user_id = events_table.user_id ) LIMIT 3; -ERROR: the query contains a join that requires repartitioning -HINT: Set citus.enable_repartition_joins to on to enable repartitioning +ERROR: correlated subqueries are not supported when the FROM clause contains a reference table -- immutable functions are also treated as reference tables, query should not -- work since there is a correlated subquery in the WHERE clause SELECT diff --git a/src/test/regress/expected/multi_view.out b/src/test/regress/expected/multi_view.out index 1bedf32c8..11f78ea34 100644 --- a/src/test/regress/expected/multi_view.out +++ b/src/test/regress/expected/multi_view.out @@ -143,7 +143,7 @@ SELECT distinct(o_orderkey) FROM priority_orders join air_shipped_lineitems SELECT o_orderkey, l_orderkey FROM priority_orders left join air_shipped_lineitems ON (o_orderkey = l_orderkey) ORDER BY o_orderkey LIMIT 1; o_orderkey | l_orderkey --------------------------------------------------------------------- - 32 | 32 + 2 | (1 row) -- however, this works @@ -157,7 +157,7 @@ SELECT count(*) FROM priority_orders left join lineitem_hash_part ON (o_orderkey SELECT count(*) FROM priority_orders right join lineitem_hash_part ON (o_orderkey = l_orderkey) WHERE l_shipmode ='AIR'; count --------------------------------------------------------------------- - 700 + 1706 (1 row) -- view on the outer side is supported diff --git a/src/test/regress/expected/recurring_outer_join.out b/src/test/regress/expected/recurring_outer_join.out index 3cd7cc6dc..4829cae47 100644 --- a/src/test/regress/expected/recurring_outer_join.out +++ b/src/test/regress/expected/recurring_outer_join.out @@ -385,15 +385,14 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS c 18 (1 row) - -- not supported because we join t3 (inner rel of the anti join) with a column - -- of reference table, not with the distribution column of the other distributed - -- table (t2) + -- supported by join order planner SELECT COUNT(*) FROM ref_1 t1 JOIN dist_1 t2 ON (t1.a = t2.a) WHERE NOT EXISTS (SELECT * FROM dist_1 t3 WHERE t1.a = a); -ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +ERROR: the query contains a join that requires repartitioning +HINT: Set citus.enable_repartition_joins to on to enable repartitioning -- supported because the semi join is performed based on distribution keys -- of the distributed tables SELECT COUNT(*) FROM diff --git a/src/test/regress/expected/subquery_in_where.out b/src/test/regress/expected/subquery_in_where.out index 7e1360db1..eb56acd87 100644 --- a/src/test/regress/expected/subquery_in_where.out +++ b/src/test/regress/expected/subquery_in_where.out @@ -1018,7 +1018,7 @@ WHERE NOT EXISTS WHERE u.user_id = e.user_id and e.value_2 > 10000 LIMIT 1); avg --------------------------------------------------------------------- - 2.5385934819897084 + 2.5544554455445545 (1 row) -- a [correlated] lateral join can also be pushed down even if the subquery diff --git a/src/test/regress/sql/recurring_outer_join.sql b/src/test/regress/sql/recurring_outer_join.sql index e26df4b86..cb2baa80c 100644 --- a/src/test/regress/sql/recurring_outer_join.sql +++ b/src/test/regress/sql/recurring_outer_join.sql @@ -160,9 +160,7 @@ SELECT COUNT(*) FROM ref_1 LEFT JOIN (dist_1 t1 LEFT JOIN dist_1 t2 USING (a)) q ON (t1.a = t2.a) WHERE t1.a IN (SELECT a FROM dist_1 t3); - -- not supported because we join t3 (inner rel of the anti join) with a column - -- of reference table, not with the distribution column of the other distributed - -- table (t2) + -- supported by join order planner SELECT COUNT(*) FROM ref_1 t1 JOIN dist_1 t2