use pushdown planner if restriction keys are equivalent

outer-join-noncolocated-dist-tables
aykutbozkurt 2022-12-20 14:26:07 +03:00
parent 3cc9536bef
commit 427b2054ee
12 changed files with 61 additions and 46 deletions

View File

@ -183,12 +183,6 @@ ShouldUseSubqueryPushDown(Query *originalQuery, Query *rewrittenQuery,
return true;
}
/* if there is right recursive join, fix join order can not handle it */
if (HasRightRecursiveJoin(rewrittenQuery->jointree))
{
return true;
}
/*
* Some unsupported join clauses in logical planner
* may be supported by subquery pushdown planner.
@ -199,6 +193,28 @@ ShouldUseSubqueryPushDown(Query *originalQuery, Query *rewrittenQuery,
return true;
}
/*
* some unsupported outer joins in logical planner
* may be supported by pushdown planner.
*/
if (FindNodeMatchingCheckFunction((Node *) rewrittenQuery->jointree, IsOuterJoinExpr))
{
/* we can pushdown outer joins if all restrictions are on partition columns */
if (RestrictionEquivalenceForPartitionKeys(plannerRestrictionContext))
{
return true;
}
/*
* join order planner only handles left recursive join trees (except inner joins,
* which are commutative)
*/
if (HasRightRecursiveJoin(rewrittenQuery->jointree))
{
return true;
}
}
/* check if the query has a window function and it is safe to pushdown */
if (originalQuery->hasWindowFuncs &&
SafeToPushdownWindowFunction(originalQuery, NULL))

View File

@ -4,8 +4,11 @@
-- a distributed table can be cross joined with a reference table
-- and the CROSS JOIN can be in the outer part of an outer JOIN
SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 LEFT JOIN users_table u ON (e2.user_id = u.user_id);
ERROR: cannot perform distributed planning on this query
DETAIL: Cartesian products are currently unsupported
count
---------------------------------------------------------------------
176649
(1 row)
-- two distributed tables cannot be cross joined
-- as it lacks distribution key equality
SELECT count(*) FROM events_reference_table e1 CROSS JOIN events_table e2 CROSS JOIN users_table u;
@ -30,11 +33,17 @@ SELECT count(*) FROM users_ref_test_table ref1 INNER JOIN users_ref_test_table r
(1 row)
SELECT count(*) FROM users_ref_test_table ref1 LEFT JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table;
ERROR: cannot perform distributed planning on this query
DETAIL: Cartesian products are currently unsupported
count
---------------------------------------------------------------------
606
(1 row)
SELECT count(*) FROM users_ref_test_table ref1 RIGHT JOIN users_ref_test_table ref2 on ref1.id = ref2.id CROSS JOIN users_table;
ERROR: cannot perform distributed planning on this query
DETAIL: Cartesian products are currently unsupported
count
---------------------------------------------------------------------
606
(1 row)
SELECT count(*) FROM users_ref_test_table ref1 CROSS JOIN users_ref_test_table ref2 CROSS JOIN users_table;
count
---------------------------------------------------------------------

View File

@ -779,18 +779,6 @@ FROM
cte LEFT JOIN test_table USING (key);
DEBUG: CTE cte is going to be inlined via distributed planning
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1]
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823]
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647]
DEBUG: join prunable for intervals [-1073741824,-1] and [-2147483648,-1073741825]
DEBUG: join prunable for intervals [-1073741824,-1] and [0,1073741823]
DEBUG: join prunable for intervals [-1073741824,-1] and [1073741824,2147483647]
DEBUG: join prunable for intervals [0,1073741823] and [-2147483648,-1073741825]
DEBUG: join prunable for intervals [0,1073741823] and [-1073741824,-1]
DEBUG: join prunable for intervals [0,1073741823] and [1073741824,2147483647]
DEBUG: join prunable for intervals [1073741824,2147483647] and [-2147483648,-1073741825]
DEBUG: join prunable for intervals [1073741824,2147483647] and [-1073741824,-1]
DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823]
count
---------------------------------------------------------------------
1021

View File

@ -151,7 +151,8 @@ ORDER BY 1;
-- Full outer join with different distribution column types, should error out
SELECT * FROM test_table_1 full join test_table_2 using(id);
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: cannot push down this subquery
DETAIL: Shards of relations in subquery need to have 1-to-1 shard partitioning
-- Test when the non-distributed column has the value of NULL
INSERT INTO test_table_1 VALUES(7, NULL);
INSERT INTO test_table_2 VALUES(7, NULL);

View File

@ -174,6 +174,7 @@ FROM
multi_outer_join_left a LEFT JOIN multi_outer_join_right_reference b ON (l_custkey = r_custkey)
WHERE
r_custkey = 5 or r_custkey > 15;
LOG: join order: [ "multi_outer_join_left" ][ reference join "multi_outer_join_right_reference" ]
min | max
---------------------------------------------------------------------
5 | 5
@ -276,7 +277,9 @@ SELECT
count(*)
FROM
multi_outer_join_left a LEFT JOIN multi_outer_join_right b ON (l_nationkey = r_nationkey);
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
LOG: join order: [ "multi_outer_join_left" ][ dual partition join "multi_outer_join_right" ]
ERROR: the query contains a join that requires repartitioning
HINT: Set citus.enable_repartition_joins to on to enable repartitioning
-- Anti-join should return customers for which there is no row in the right table
SELECT
min(l_custkey), max(l_custkey)
@ -309,6 +312,7 @@ FROM
multi_outer_join_left a LEFT JOIN multi_outer_join_right b ON (l_custkey = r_custkey)
WHERE
r_custkey = 21 or r_custkey < 10;
LOG: join order: [ "multi_outer_join_left" ][ local partition join "multi_outer_join_right" ]
min | max
---------------------------------------------------------------------
21 | 21

View File

@ -170,6 +170,7 @@ FROM
multi_outer_join_left_hash a LEFT JOIN multi_outer_join_right_reference b ON (l_custkey = r_custkey)
WHERE
r_custkey = 5 or r_custkey > 15;
LOG: join order: [ "multi_outer_join_left_hash" ][ reference join "multi_outer_join_right_reference" ]
min | max
---------------------------------------------------------------------
5 | 5
@ -274,7 +275,9 @@ SELECT
count(*)
FROM
multi_outer_join_left_hash a LEFT JOIN multi_outer_join_right_hash b ON (l_nationkey = r_nationkey);
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
LOG: join order: [ "multi_outer_join_left_hash" ][ dual partition join "multi_outer_join_right_hash" ]
ERROR: the query contains a join that requires repartitioning
HINT: Set citus.enable_repartition_joins to on to enable repartitioning
-- Anti-join should return customers for which there is no row in the right table
SELECT
min(l_custkey), max(l_custkey)
@ -307,6 +310,7 @@ FROM
multi_outer_join_left_hash a LEFT JOIN multi_outer_join_right_reference b ON (l_custkey = r_custkey)
WHERE
r_custkey = 21 or r_custkey < 10;
LOG: join order: [ "multi_outer_join_left_hash" ][ reference join "multi_outer_join_right_reference" ]
min | max
---------------------------------------------------------------------
21 | 21

View File

@ -561,9 +561,9 @@ ORDER BY 1 DESC, 2 DESC
LIMIT 3;
user_id | value_2
---------------------------------------------------------------------
6 | 4
6 | 4
6 | 4
5 | 5
5 | 5
5 | 2
(3 rows)
---------------------------------------------------------------------

View File

@ -44,10 +44,7 @@ ORDER BY user_id
LIMIT 3;
user_id
---------------------------------------------------------------------
1
2
3
(3 rows)
(0 rows)
-- subqueries in WHERE with NOT EXISTS operator, should not work since
-- there is a correlated subquery in WHERE clause
@ -65,8 +62,7 @@ WHERE
users_reference_table.user_id = events_table.user_id
)
LIMIT 3;
ERROR: the query contains a join that requires repartitioning
HINT: Set citus.enable_repartition_joins to on to enable repartitioning
ERROR: correlated subqueries are not supported when the FROM clause contains a reference table
-- immutable functions are also treated as reference tables, query should not
-- work since there is a correlated subquery in the WHERE clause
SELECT

View File

@ -143,7 +143,7 @@ SELECT distinct(o_orderkey) FROM priority_orders join air_shipped_lineitems
SELECT o_orderkey, l_orderkey FROM priority_orders left join air_shipped_lineitems ON (o_orderkey = l_orderkey) ORDER BY o_orderkey LIMIT 1;
o_orderkey | l_orderkey
---------------------------------------------------------------------
32 | 32
2 |
(1 row)
-- however, this works
@ -157,7 +157,7 @@ SELECT count(*) FROM priority_orders left join lineitem_hash_part ON (o_orderkey
SELECT count(*) FROM priority_orders right join lineitem_hash_part ON (o_orderkey = l_orderkey) WHERE l_shipmode ='AIR';
count
---------------------------------------------------------------------
700
1706
(1 row)
-- view on the outer side is supported

View File

@ -385,15 +385,14 @@ DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS c
18
(1 row)
-- not supported because we join t3 (inner rel of the anti join) with a column
-- of reference table, not with the distribution column of the other distributed
-- table (t2)
-- supported by join order planner
SELECT COUNT(*) FROM
ref_1 t1
JOIN dist_1 t2
ON (t1.a = t2.a)
WHERE NOT EXISTS (SELECT * FROM dist_1 t3 WHERE t1.a = a);
ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns
ERROR: the query contains a join that requires repartitioning
HINT: Set citus.enable_repartition_joins to on to enable repartitioning
-- supported because the semi join is performed based on distribution keys
-- of the distributed tables
SELECT COUNT(*) FROM

View File

@ -1018,7 +1018,7 @@ WHERE NOT EXISTS
WHERE u.user_id = e.user_id and e.value_2 > 10000 LIMIT 1);
avg
---------------------------------------------------------------------
2.5385934819897084
2.5544554455445545
(1 row)
-- a [correlated] lateral join can also be pushed down even if the subquery

View File

@ -160,9 +160,7 @@ SELECT COUNT(*) FROM ref_1 LEFT JOIN (dist_1 t1 LEFT JOIN dist_1 t2 USING (a)) q
ON (t1.a = t2.a)
WHERE t1.a IN (SELECT a FROM dist_1 t3);
-- not supported because we join t3 (inner rel of the anti join) with a column
-- of reference table, not with the distribution column of the other distributed
-- table (t2)
-- supported by join order planner
SELECT COUNT(*) FROM
ref_1 t1
JOIN dist_1 t2