consider all applicable clauses instead of returning after finding one

outer-join-noncolocated-dist-tables
aykutbozkurt 2023-01-21 04:47:14 +03:00
parent 02411a4410
commit d637814e46
15 changed files with 133 additions and 81 deletions

View File

@ -386,7 +386,7 @@ ExtractPushdownJoinRestrictInfos(List *restrictInfoListOfJoin,
foreach_ptr(restrictInfo, restrictInfoListOfJoin)
{
if (!restrictInfo->can_join &&
(!IS_OUTER_JOIN(joinType) || RINFO_IS_PUSHED_DOWN(restrictInfo, joinRelids)))
RINFO_IS_PUSHED_DOWN(restrictInfo, joinRelids))
{
joinFilterRestrictInfoList = lappend(joinFilterRestrictInfoList,
restrictInfo);
@ -405,6 +405,8 @@ static List *
FindJoinClauseForTables(List *joinRestrictInfoListList, List *generatedEcJoinClauseList,
List *lhsTableIdList, uint32 rhsTableId, JoinType joinType)
{
List *applicableJoinClauseListList = NIL;
List *joinRestrictInfoList = NIL;
foreach_ptr(joinRestrictInfoList, joinRestrictInfoListList)
{
@ -423,7 +425,8 @@ FindJoinClauseForTables(List *joinRestrictInfoListList, List *generatedEcJoinCla
pushdownableJoinRestrictInfoList);
List *nonPushdownableJoinRestrictClauseList =
get_all_actual_clauses(nonPushdownableJoinRestrictInfoList);
return nonPushdownableJoinRestrictClauseList;
applicableJoinClauseListList = lappend(applicableJoinClauseListList,
nonPushdownableJoinRestrictClauseList);
}
}
}
@ -436,12 +439,13 @@ FindJoinClauseForTables(List *joinRestrictInfoListList, List *generatedEcJoinCla
{
if (IsApplicableJoinClause(lhsTableIdList, rhsTableId, ecClause))
{
return list_make1(ecClause);
applicableJoinClauseListList = lappend(applicableJoinClauseListList,
list_make1(ecClause));
}
}
}
return NIL;
return applicableJoinClauseListList;
}
@ -985,11 +989,13 @@ EvaluateJoinRules(List *joinedTableList, JoinOrderNode *currentJoinNode,
*/
List *joinedTableIdList = RangeTableIdList(joinedTableList);
uint32 candidateTableId = candidateTable->rangeTableId;
List *applicableJoinClauseList = FindJoinClauseForTables(joinRestrictInfoListList,
generatedEcJoinClauseList,
joinedTableIdList,
candidateTableId,
joinType);
List *applicableJoinClauseListList = FindJoinClauseForTables(joinRestrictInfoListList,
generatedEcJoinClauseList,
joinedTableIdList,
candidateTableId,
joinType);
List *emptyClauseList = NIL;
applicableJoinClauseListList = lappend(applicableJoinClauseListList, emptyClauseList);
/* we then evaluate all join rules in order */
for (uint32 ruleIndex = lowestValidIndex; ruleIndex <= highestValidIndex; ruleIndex++)
@ -997,27 +1003,26 @@ EvaluateJoinRules(List *joinedTableList, JoinOrderNode *currentJoinNode,
JoinRuleType ruleType = (JoinRuleType) ruleIndex;
RuleEvalFunction ruleEvalFunction = JoinRuleEvalFunction(ruleType);
nextJoinNode = (*ruleEvalFunction)(currentJoinNode,
candidateTable,
applicableJoinClauseList,
joinType);
/* break after finding the first join rule that applies */
if (nextJoinNode != NULL)
List *applicableJoinClauseList = NIL;
foreach_ptr(applicableJoinClauseList, applicableJoinClauseListList)
{
break;
nextJoinNode = (*ruleEvalFunction)(currentJoinNode,
candidateTable,
applicableJoinClauseList,
joinType);
/* return after finding the first join rule that applies */
if (nextJoinNode != NULL)
{
nextJoinNode->joinType = joinType;
nextJoinNode->joinClauseList = applicableJoinClauseList;
return nextJoinNode;
}
}
}
if (nextJoinNode == NULL)
{
return NULL;
}
Assert(nextJoinNode != NULL);
nextJoinNode->joinType = joinType;
nextJoinNode->joinClauseList = applicableJoinClauseList;
return nextJoinNode;
Assert(nextJoinNode == NULL);
return NULL;
}

View File

@ -1003,7 +1003,7 @@ WHERE i_id = s_i_id
AND i_id = ol_i_id
GROUP BY extract(YEAR FROM o_entry_d)
ORDER BY l_year;
LOG: join order: [ "order_line" ][ reference join(INNER) "item" ][ local partition join(INNER) "oorder" ][ local partition join(INNER) "customer" ][ reference join(INNER) "nation" ][ reference join(INNER) "region" ][ dual partition join(INNER) "stock" ][ reference join(INNER) "supplier" ][ reference join(INNER) "nation" ]
LOG: join order: [ "order_line" ][ reference join(INNER) "item" ][ local partition join(INNER) "oorder" ][ local partition join(INNER) "customer" ][ reference join(INNER) "nation" ][ reference join(INNER) "region" ][ single hash partition join(INNER) "stock" ][ reference join(INNER) "supplier" ][ reference join(INNER) "nation" ]
l_year | mkt_share
---------------------------------------------------------------------
2008 | 0.50000000000000000000
@ -1036,7 +1036,7 @@ GROUP BY
ORDER BY
n_name,
l_year DESC;
LOG: join order: [ "order_line" ][ reference join(INNER) "item" ][ local partition join(INNER) "oorder" ][ dual partition join(INNER) "stock" ][ reference join(INNER) "supplier" ][ reference join(INNER) "nation" ]
LOG: join order: [ "order_line" ][ reference join(INNER) "item" ][ local partition join(INNER) "oorder" ][ single hash partition join(INNER) "stock" ][ reference join(INNER) "supplier" ][ reference join(INNER) "nation" ]
n_name | l_year | sum_profit
---------------------------------------------------------------------
Germany | 2008 | 3.00

View File

@ -72,6 +72,7 @@ FROM
test b
WHERE t2.y - a.x - b.x = 0
ORDER BY 1,2,3;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: cannot perform distributed planning on this query
DETAIL: Cartesian products are currently unsupported
SET client_min_messages TO WARNING;
DROP SCHEMA expression_reference_join CASCADE;

View File

@ -343,7 +343,8 @@ SELECT count(*) FROM lineitem JOIN orders ON l_orderkey = o_orderkey
-- Check that we make sure local joins are between columns only.
SELECT count(*) FROM lineitem, orders WHERE l_orderkey + 1 = o_orderkey;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: cannot perform distributed planning on this query
DETAIL: Cartesian products are currently unsupported
-- Check that we can issue limit/offset queries
-- the subquery is recursively planned since it contains OFFSET, which is not pushdownable
SELECT * FROM (SELECT o_custkey FROM orders GROUP BY o_custkey ORDER BY o_custkey OFFSET 20) sq ORDER BY 1 LIMIT 5;

View File

@ -1104,7 +1104,8 @@ FROM (SELECT SUM(raw_events_second.value_4) AS v4,
raw_events_second
WHERE raw_events_first.user_id != raw_events_second.user_id
GROUP BY raw_events_second.user_id) AS foo;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: cannot perform distributed planning on this query
DETAIL: Cartesian products are currently unsupported
SET client_min_messages TO DEBUG2;
-- INSERT returns NULL partition key value via coordinator
INSERT INTO agg_events

View File

@ -1104,7 +1104,8 @@ FROM (SELECT SUM(raw_events_second.value_4) AS v4,
raw_events_second
WHERE raw_events_first.user_id != raw_events_second.user_id
GROUP BY raw_events_second.user_id) AS foo;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: cannot perform distributed planning on this query
DETAIL: Cartesian products are currently unsupported
SET client_min_messages TO DEBUG2;
-- INSERT returns NULL partition key value via coordinator
INSERT INTO agg_events

View File

@ -19,7 +19,8 @@ FROM (
) t
GROUP BY user_id
) q;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: cannot perform distributed planning on this query
DETAIL: Cartesian products are currently unsupported
---------------------------------------------------------------------
---------------------------------------------------------------------
-- Funnel grouped by whether or not a user has done an event
@ -64,7 +65,8 @@ FROM (
) t2 ON (t1.user_id = t2.user_id)
GROUP BY t1.user_id, hasdone_event
) t GROUP BY user_id, hasdone_event;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: cannot perform distributed planning on this query
DETAIL: Cartesian products are currently unsupported
-- not pushable since the JOIN is not an equi join right part of the UNION
-- is not joined on the partition key
INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg )
@ -234,7 +236,8 @@ GROUP BY
count_pay, user_id
ORDER BY
count_pay;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: cannot perform distributed planning on this query
DETAIL: Cartesian products are currently unsupported
-- not pushable since the JOIN condition is not equi JOIN
-- (subquery_1 JOIN subquery_2)
-- but, we can plan the query thanks to recursive planning
@ -763,7 +766,8 @@ FROM
) temp
ON users_table.user_id = temp.user_id
WHERE users_table.value_1 < 50;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: cannot perform distributed planning on this query
DETAIL: Cartesian products are currently unsupported
-- not pushable since the join is not on the partition key
INSERT INTO agg_results_third(user_id, value_1_agg, value_3_agg)
SELECT

View File

@ -899,7 +899,7 @@ FROM
WHERE
colocated_table_test.value_1 = reference_table_test.value_1 AND colocated_table_test_2.value_1 = reference_table_test.value_1
ORDER BY 1;
LOG: join order: [ "colocated_table_test" ][ reference join(INNER) "reference_table_test" ][ dual partition join(INNER) "colocated_table_test_2" ]
LOG: join order: [ "colocated_table_test" ][ reference join(INNER) "reference_table_test" ][ local partition join(INNER) "colocated_table_test_2" ]
value_2
---------------------------------------------------------------------
1

View File

@ -1111,7 +1111,7 @@ FROM
WHERE
colocated_table_test.value_1 = reference_table_test.value_1 AND colocated_table_test_2.value_1 = reference_table_test.value_1
ORDER BY reference_table_test.value_2;
LOG: join order: [ "colocated_table_test" ][ reference join(INNER) "reference_table_test" ][ dual partition join(INNER) "colocated_table_test_2" ]
LOG: join order: [ "colocated_table_test" ][ reference join(INNER) "reference_table_test" ][ local partition join(INNER) "colocated_table_test_2" ]
value_2
---------------------------------------------------------------------
1

View File

@ -267,7 +267,8 @@ WHERE
(o_orderkey < l_quantity + 3)
ORDER BY l_orderkey DESC
LIMIT 10;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: cannot perform distributed planning on this query
DETAIL: Cartesian products are currently unsupported
-- implicit typecast supported in equi-join
SELECT l_orderkey
FROM
@ -300,7 +301,8 @@ JOIN
ON (l_orderkey::int4 = o_orderkey::int8)
ORDER BY l_orderkey DESC
LIMIT 10;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: cannot perform distributed planning on this query
DETAIL: Cartesian products are currently unsupported
-- type casts in filters are supported as long as
-- a valid equi-join exists
SELECT l_orderkey
@ -364,7 +366,8 @@ WHERE
(o_orderkey < l_quantity)
ORDER BY l_orderkey DESC
LIMIT 10;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: cannot perform distributed planning on this query
DETAIL: Cartesian products are currently unsupported
-- query is not supported if there is a single
-- join clause with arithmetic expression. It fails
-- with a different error message
@ -376,7 +379,8 @@ JOIN
ON (l_orderkey = o_orderkey + 1)
ORDER BY l_orderkey DESC
LIMIT 10;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: cannot perform distributed planning on this query
DETAIL: Cartesian products are currently unsupported
-- query is not supported if does not have equi-join clause
SELECT l_orderkey
FROM
@ -388,7 +392,8 @@ WHERE
(o_orderkey < l_quantity)
ORDER BY l_orderkey DESC
LIMIT 10;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: cannot perform distributed planning on this query
DETAIL: Cartesian products are currently unsupported
-- outer joins on reference tables with functions works
SELECT DISTINCT ON (t1.user_id) t1.user_id, t2.value_1, t2.value_2, t2.value_3
FROM events_table t1
@ -425,7 +430,8 @@ FROM events_table t1
LEFT JOIN users_table t2 ON t1.user_id > t2.user_id
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
LIMIT 5;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: cannot perform distributed planning on this query
DETAIL: Cartesian products are currently unsupported
-- outer joins on reference tables with expressions should work
SELECT DISTINCT ON (t1.user_id) t1.user_id, t2.value_1, t2.value_2, t2.value_3
FROM events_table t1
@ -466,7 +472,8 @@ SELECT DISTINCT ON (t1.user_id) t1.user_id, t2.value_1, t2.value_2, t2.value_3
LEFT JOIN users_reference_table t2 ON t1.user_id = trunc(t2.user_id)
ORDER BY 1 DESC, 2 DESC, 3 DESC, 4 DESC
LIMIT 5;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: cannot perform distributed planning on this query
DETAIL: Cartesian products are currently unsupported
-- outer joins as subqueries should work
-- https://github.com/citusdata/citus/issues/2739
SELECT user_id, value_1, event_type

View File

@ -572,7 +572,8 @@ GROUP BY
types
ORDER BY
types;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: cannot perform distributed planning on this query
DETAIL: Cartesian products are currently unsupported
-- similar query with more union statements (to enable UNION tree become larger)
SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType
FROM

View File

@ -857,7 +857,8 @@ FROM (
SELECT events_table.user_id, sum(events_table.value_2) AS counter FROM events_table, users_table WHERE users_table.user_id > events_table.user_id GROUP BY 1
) user_id
GROUP BY user_id;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: cannot perform distributed planning on this query
DETAIL: Cartesian products are currently unsupported
-- non-equi join also not supported for UNION ALL
SELECT user_id, sum(counter)
FROM (
@ -866,7 +867,8 @@ FROM (
SELECT events_table.user_id, sum(events_table.value_2) AS counter FROM events_table, users_table WHERE users_table.user_id > events_table.user_id GROUP BY 1
) user_id
GROUP BY user_id;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: cannot perform distributed planning on this query
DETAIL: Cartesian products are currently unsupported
-- joins inside unions are supported -- slightly more comlex than the above
SELECT * FROM
(

View File

@ -1135,9 +1135,9 @@ SELECT * FROM dist1 LEFT JOIN dist2 ON (dist1.x = dist2.x) WHERE dist1.x >2 ORDE
-- inner table will be converted to empty result. Constant filter will be applied before join but will not be pushdowned.
SELECT * FROM dist1 LEFT JOIN dist2 ON (dist1.y = dist2.y AND false) ORDER BY 1,2,3,4;
LOG: join order: [ "dist1" ][ cartesian product(LEFT) "dist2" ]
x | y | x | y
---------------------------------------------------------------------
(0 rows)
1 | 2 | |
3 | 4 | |
(2 rows)
--- constant false filter as base filter for left join.
-- both tables will be converted to empty result .e.g RTE_RESULT

View File

@ -566,7 +566,8 @@ SELECT count(*)
FROM numeric_repartition_first f,
numeric_repartition_second s
WHERE f.id = s.numeric_column;
ERROR: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
ERROR: cannot perform distributed planning on this query
DETAIL: Cartesian products are currently unsupported
-- show that the same query works if we use an int column instead of a numeric on the filter clause
SELECT count(*)
FROM numeric_repartition_first f,

View File

@ -98,7 +98,7 @@ FROM
WHERE
r1.id = t1.id AND t2.sum = t1.id;
DEBUG: Router planner cannot handle multi-shard select queries
LOG: join order: [ "single_hash_repartition_first" ][ reference join(INNER) "ref_table" ][ single hash partition join(INNER) "single_hash_repartition_second" ]
LOG: join order: [ "single_hash_repartition_second" ][ reference join(INNER) "ref_table" ][ single hash partition join(INNER) "single_hash_repartition_first" ]
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1]
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823]
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647]
@ -172,7 +172,19 @@ FROM
WHERE
t1.sum = t2.sum AND t1.sum = t3.id;
DEBUG: Router planner cannot handle multi-shard select queries
LOG: join order: [ "single_hash_repartition_first" ][ single hash partition join(INNER) "single_hash_repartition_second" ][ dual partition join(INNER) "single_hash_repartition_first" ]
LOG: join order: [ "single_hash_repartition_first" ][ single hash partition join(INNER) "single_hash_repartition_second" ][ single hash partition join(INNER) "single_hash_repartition_first" ]
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1]
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823]
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647]
DEBUG: join prunable for intervals [-1073741824,-1] and [-2147483648,-1073741825]
DEBUG: join prunable for intervals [-1073741824,-1] and [0,1073741823]
DEBUG: join prunable for intervals [-1073741824,-1] and [1073741824,2147483647]
DEBUG: join prunable for intervals [0,1073741823] and [-2147483648,-1073741825]
DEBUG: join prunable for intervals [0,1073741823] and [-1073741824,-1]
DEBUG: join prunable for intervals [0,1073741823] and [1073741824,2147483647]
DEBUG: join prunable for intervals [1073741824,2147483647] and [-2147483648,-1073741825]
DEBUG: join prunable for intervals [1073741824,2147483647] and [-1073741824,-1]
DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823]
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1]
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823]
DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647]
@ -187,38 +199,18 @@ DEBUG: join prunable for intervals [1073741824,2147483647] and [-1073741824,-1]
DEBUG: join prunable for intervals [1073741824,2147483647] and [0,1073741823]
DEBUG: pruning merge fetch taskId 1
DETAIL: Creating dependency on merge taskId 5
DEBUG: pruning merge fetch taskId 3
DETAIL: Creating dependency on merge taskId 10
DEBUG: pruning merge fetch taskId 5
DETAIL: Creating dependency on merge taskId 15
DEBUG: pruning merge fetch taskId 7
DETAIL: Creating dependency on merge taskId 20
DEBUG: join prunable for task partitionId 0 and 1
DEBUG: join prunable for task partitionId 0 and 2
DEBUG: join prunable for task partitionId 0 and 3
DEBUG: join prunable for task partitionId 1 and 0
DEBUG: join prunable for task partitionId 1 and 2
DEBUG: join prunable for task partitionId 1 and 3
DEBUG: join prunable for task partitionId 2 and 0
DEBUG: join prunable for task partitionId 2 and 1
DEBUG: join prunable for task partitionId 2 and 3
DEBUG: join prunable for task partitionId 3 and 0
DEBUG: join prunable for task partitionId 3 and 1
DEBUG: join prunable for task partitionId 3 and 2
DEBUG: pruning merge fetch taskId 1
DETAIL: Creating dependency on merge taskId 9
DEBUG: pruning merge fetch taskId 2
DETAIL: Creating dependency on merge taskId 5
DEBUG: pruning merge fetch taskId 4
DETAIL: Creating dependency on merge taskId 14
DETAIL: Creating dependency on merge taskId 10
DEBUG: pruning merge fetch taskId 5
DETAIL: Creating dependency on merge taskId 10
DEBUG: pruning merge fetch taskId 7
DETAIL: Creating dependency on merge taskId 19
DETAIL: Creating dependency on merge taskId 15
DEBUG: pruning merge fetch taskId 8
DETAIL: Creating dependency on merge taskId 15
DEBUG: pruning merge fetch taskId 10
DETAIL: Creating dependency on merge taskId 24
DETAIL: Creating dependency on merge taskId 20
DEBUG: pruning merge fetch taskId 11
DETAIL: Creating dependency on merge taskId 20
ERROR: the query contains a join that requires repartitioning
@ -233,17 +225,53 @@ WHERE
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: single partition column types do not match
DEBUG: single partition column types do not match
DEBUG: dual partition column types do not match
DEBUG: single partition column types do not match
DEBUG: single partition column types do not match
DEBUG: single partition column types do not match
DEBUG: single partition column types do not match
DEBUG: single partition column types do not match
DEBUG: single partition column types do not match
DEBUG: dual partition column types do not match
DEBUG: single partition column types do not match
DEBUG: single partition column types do not match
DEBUG: dual partition column types do not match
DEBUG: dual partition column types do not match
DEBUG: dual partition column types do not match
DEBUG: single partition column types do not match
DEBUG: single partition column types do not match
DEBUG: single partition column types do not match
DEBUG: single partition column types do not match
DEBUG: single partition column types do not match
DEBUG: single partition column types do not match
DEBUG: single partition column types do not match
DEBUG: single partition column types do not match
DEBUG: dual partition column types do not match
LOG: join order: [ "single_hash_repartition_second" ][ cartesian product(INNER) "single_hash_repartition_first" ][ dual partition join(INNER) "single_hash_repartition_first" ]
DEBUG: dual partition column types do not match
DEBUG: dual partition column types do not match
DEBUG: dual partition column types do not match
DEBUG: single partition column types do not match
DEBUG: single partition column types do not match
DEBUG: single partition column types do not match
DEBUG: single partition column types do not match
DEBUG: single partition column types do not match
DEBUG: single partition column types do not match
DEBUG: single partition column types do not match
DEBUG: single partition column types do not match
DEBUG: dual partition column types do not match
DEBUG: dual partition column types do not match
DEBUG: dual partition column types do not match
DEBUG: dual partition column types do not match
DEBUG: single partition column types do not match
DEBUG: single partition column types do not match
DEBUG: single partition column types do not match
DEBUG: single partition column types do not match
DEBUG: single partition column types do not match
DEBUG: single partition column types do not match
DEBUG: single partition column types do not match
DEBUG: single partition column types do not match
DEBUG: dual partition column types do not match
DEBUG: dual partition column types do not match
DEBUG: dual partition column types do not match
DEBUG: dual partition column types do not match
LOG: join order: [ "single_hash_repartition_first" ][ local partition join(INNER) "single_hash_repartition_first" ][ cartesian product(INNER) "single_hash_repartition_second" ]
ERROR: cannot perform distributed planning on this query
DETAIL: Cartesian products are currently unsupported
-- single repartition query in CTE