mirror of https://github.com/citusdata/citus.git
In the MERGE join clause, there is a datatype mismatch between target's distribution column
and the expression originating from the source. If the types are different, Citus uses different hash functions for the two column types, which might lead to incorrect repartitioning of the result datapull/7077/head
parent
186804c119
commit
846cbc3a39
|
@ -1193,17 +1193,28 @@ SourceResultPartitionColumnIndex(Query *mergeQuery, List *sourceTargetList,
|
|||
List *mergeJoinConditionList = WhereClauseList(mergeQuery->jointree);
|
||||
Var *targetColumn = targetRelation->partitionColumn;
|
||||
Var *sourceRepartitionVar = NULL;
|
||||
bool foundTypeMismatch = false;
|
||||
|
||||
OpExpr *validJoinClause =
|
||||
SinglePartitionJoinClause(list_make1(targetColumn), mergeJoinConditionList);
|
||||
SinglePartitionJoinClause(list_make1(targetColumn), mergeJoinConditionList,
|
||||
&foundTypeMismatch);
|
||||
if (!validJoinClause)
|
||||
{
|
||||
if (foundTypeMismatch)
|
||||
{
|
||||
ereport(ERROR, (errmsg("In the MERGE ON clause, there is a datatype mismatch "
|
||||
"between target's distribution "
|
||||
"column and the expression originating from the source."),
|
||||
errdetail(
|
||||
"If the types are different, Citus uses different hash "
|
||||
"functions for the two column types, which might "
|
||||
"lead to incorrect repartitioning of the result data")));
|
||||
}
|
||||
|
||||
ereport(ERROR, (errmsg("The required join operation is missing between "
|
||||
"the target's distribution column and any "
|
||||
"expression originating from the source. The "
|
||||
"issue may arise from either a non-equi-join or "
|
||||
"a mismatch in the datatypes of the columns being "
|
||||
"joined."),
|
||||
"issue may arise from a non-equi-join."),
|
||||
errdetail("Without a equi-join condition on the target's "
|
||||
"distribution column, the source rows "
|
||||
"cannot be efficiently redistributed, and "
|
||||
|
|
|
@ -999,7 +999,8 @@ SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
|
|||
}
|
||||
|
||||
OpExpr *joinClause =
|
||||
SinglePartitionJoinClause(currentPartitionColumnList, applicableJoinClauses);
|
||||
SinglePartitionJoinClause(currentPartitionColumnList, applicableJoinClauses,
|
||||
NULL);
|
||||
if (joinClause != NULL)
|
||||
{
|
||||
if (currentPartitionMethod == DISTRIBUTE_BY_HASH)
|
||||
|
@ -1037,7 +1038,8 @@ SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
|
|||
*/
|
||||
List *candidatePartitionColumnList = list_make1(candidatePartitionColumn);
|
||||
joinClause = SinglePartitionJoinClause(candidatePartitionColumnList,
|
||||
applicableJoinClauses);
|
||||
applicableJoinClauses,
|
||||
NULL);
|
||||
if (joinClause != NULL)
|
||||
{
|
||||
if (candidatePartitionMethod == DISTRIBUTE_BY_HASH)
|
||||
|
@ -1078,8 +1080,14 @@ SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
|
|||
* clause exists, the function returns NULL.
|
||||
*/
|
||||
OpExpr *
|
||||
SinglePartitionJoinClause(List *partitionColumnList, List *applicableJoinClauses)
|
||||
SinglePartitionJoinClause(List *partitionColumnList, List *applicableJoinClauses, bool
|
||||
*foundTypeMismatch)
|
||||
{
|
||||
if (foundTypeMismatch)
|
||||
{
|
||||
*foundTypeMismatch = false;
|
||||
}
|
||||
|
||||
if (list_length(partitionColumnList) == 0)
|
||||
{
|
||||
return NULL;
|
||||
|
@ -1121,6 +1129,10 @@ SinglePartitionJoinClause(List *partitionColumnList, List *applicableJoinClauses
|
|||
{
|
||||
ereport(DEBUG1, (errmsg("single partition column types do not "
|
||||
"match")));
|
||||
if (foundTypeMismatch)
|
||||
{
|
||||
*foundTypeMismatch = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2140,7 +2140,8 @@ ApplySinglePartitionJoin(MultiNode *leftNode, MultiNode *rightNode,
|
|||
* we introduce a (re-)partition operator for the other column.
|
||||
*/
|
||||
OpExpr *joinClause = SinglePartitionJoinClause(partitionColumnList,
|
||||
applicableJoinClauses);
|
||||
applicableJoinClauses,
|
||||
NULL);
|
||||
Assert(joinClause != NULL);
|
||||
|
||||
/* both are verified in SinglePartitionJoinClause to not be NULL, assert is to guard */
|
||||
|
|
|
@ -99,7 +99,8 @@ extern bool NodeIsEqualsOpExpr(Node *node);
|
|||
extern bool IsSupportedReferenceJoin(JoinType joinType, bool leftIsReferenceTable,
|
||||
bool rightIsReferenceTable);
|
||||
extern OpExpr * SinglePartitionJoinClause(List *partitionColumnList,
|
||||
List *applicableJoinClauses);
|
||||
List *applicableJoinClauses,
|
||||
bool *foundTypeMismatch);
|
||||
extern OpExpr * DualPartitionJoinClause(List *applicableJoinClauses);
|
||||
extern Var * LeftColumnOrNULL(OpExpr *joinClause);
|
||||
extern Var * RightColumnOrNULL(OpExpr *joinClause);
|
||||
|
|
|
@ -3065,7 +3065,7 @@ WHEN MATCHED AND t.customer_id = 200 THEN
|
|||
DELETE
|
||||
WHEN NOT MATCHED THEN
|
||||
INSERT VALUES(s.customer_id, s.order_id, s.order_center, 1, s.order_time);
|
||||
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined.
|
||||
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join.
|
||||
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
|
||||
SELECT * FROM target_filter ORDER BY 1, 2;
|
||||
customer_id | last_order_id | order_center | order_count | last_order
|
||||
|
@ -3414,7 +3414,7 @@ MERGE INTO t1
|
|||
UPDATE SET val = t1.val + 1
|
||||
WHEN NOT MATCHED THEN
|
||||
INSERT (id, val) VALUES (s1_res.id, s1_res.val);
|
||||
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined.
|
||||
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join.
|
||||
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
|
||||
-- Join condition without target distribution column
|
||||
WITH s1_res AS (
|
||||
|
@ -3424,7 +3424,7 @@ WITH s1_res AS (
|
|||
WHEN MATCHED THEN DELETE
|
||||
WHEN NOT MATCHED THEN
|
||||
INSERT (id, val) VALUES (s1_res.id, s1_res.val);
|
||||
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined.
|
||||
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join.
|
||||
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
|
||||
--
|
||||
-- Reference tables
|
||||
|
@ -3842,7 +3842,7 @@ EXPLAIN MERGE INTO demo_distributed t
|
|||
USING demo_source_table s
|
||||
ON (s.id2 + 1 = t.id1)
|
||||
WHEN MATCHED THEN UPDATE SET val1 = 15;
|
||||
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined.
|
||||
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join.
|
||||
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
|
||||
-- Sub-queries and CTEs are not allowed in actions and ON clause
|
||||
CREATE TABLE target_1 (a int, b int, c int);
|
||||
|
@ -3947,6 +3947,14 @@ WHEN MATCHED THEN
|
|||
DELETE;
|
||||
ERROR: Sub-queries and CTEs are not allowed in ON clause for MERGE with repartitioning
|
||||
HINT: Consider making the source and target colocated and joined on the distribution column to make it a routable query
|
||||
-- Datatype mismatch between target and source join column
|
||||
WITH src AS (SELECT FLOOR(b) AS a FROM source_2)
|
||||
MERGE INTO target_1 t
|
||||
USING src
|
||||
ON t.a = src.a
|
||||
WHEN MATCHED THEN DELETE;
|
||||
ERROR: In the MERGE ON clause, there is a datatype mismatch between target's distribution column and the expression originating from the source.
|
||||
DETAIL: If the types are different, Citus uses different hash functions for the two column types, which might lead to incorrect repartitioning of the result data
|
||||
RESET client_min_messages;
|
||||
DROP SERVER foreign_server CASCADE;
|
||||
NOTICE: drop cascades to 3 other objects
|
||||
|
|
|
@ -406,7 +406,7 @@ SELECT create_distributed_table('tbl2', 'x');
|
|||
|
||||
MERGE INTO tbl1 USING tbl2 ON (true)
|
||||
WHEN MATCHED THEN DELETE;
|
||||
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined.
|
||||
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join.
|
||||
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
|
||||
-- also, inside subqueries & ctes
|
||||
WITH targq AS (
|
||||
|
@ -414,7 +414,7 @@ WITH targq AS (
|
|||
)
|
||||
MERGE INTO tbl1 USING targq ON (true)
|
||||
WHEN MATCHED THEN DELETE;
|
||||
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined.
|
||||
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join.
|
||||
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
|
||||
WITH foo AS (
|
||||
MERGE INTO tbl1 USING tbl2 ON (true)
|
||||
|
@ -431,7 +431,7 @@ USING tbl2
|
|||
ON (true)
|
||||
WHEN MATCHED THEN
|
||||
DO NOTHING;
|
||||
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined.
|
||||
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join.
|
||||
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
|
||||
MERGE INTO tbl1 t
|
||||
USING tbl2
|
||||
|
|
|
@ -2474,6 +2474,13 @@ ON (t1.a = t2.a AND (SELECT max(a) > 55 FROM cte_2))
|
|||
WHEN MATCHED THEN
|
||||
DELETE;
|
||||
|
||||
-- Datatype mismatch between target and source join column
|
||||
WITH src AS (SELECT FLOOR(b) AS a FROM source_2)
|
||||
MERGE INTO target_1 t
|
||||
USING src
|
||||
ON t.a = src.a
|
||||
WHEN MATCHED THEN DELETE;
|
||||
|
||||
RESET client_min_messages;
|
||||
DROP SERVER foreign_server CASCADE;
|
||||
DROP FUNCTION merge_when_and_write();
|
||||
|
|
Loading…
Reference in New Issue