In the MERGE join clause, there is a datatype mismatch between target's distribution column

and the expression originating from the source. If the types are different, Citus uses
different hash functions for the two column types, which might lead to incorrect repartitioning
of the result data
pull/7077/head
Teja Mupparti 2023-07-25 20:42:28 -07:00 committed by Teja Mupparti
parent 186804c119
commit 846cbc3a39
7 changed files with 56 additions and 16 deletions

View File

@ -1193,17 +1193,28 @@ SourceResultPartitionColumnIndex(Query *mergeQuery, List *sourceTargetList,
List *mergeJoinConditionList = WhereClauseList(mergeQuery->jointree); List *mergeJoinConditionList = WhereClauseList(mergeQuery->jointree);
Var *targetColumn = targetRelation->partitionColumn; Var *targetColumn = targetRelation->partitionColumn;
Var *sourceRepartitionVar = NULL; Var *sourceRepartitionVar = NULL;
bool foundTypeMismatch = false;
OpExpr *validJoinClause = OpExpr *validJoinClause =
SinglePartitionJoinClause(list_make1(targetColumn), mergeJoinConditionList); SinglePartitionJoinClause(list_make1(targetColumn), mergeJoinConditionList,
&foundTypeMismatch);
if (!validJoinClause) if (!validJoinClause)
{ {
if (foundTypeMismatch)
{
ereport(ERROR, (errmsg("In the MERGE ON clause, there is a datatype mismatch "
"between target's distribution "
"column and the expression originating from the source."),
errdetail(
"If the types are different, Citus uses different hash "
"functions for the two column types, which might "
"lead to incorrect repartitioning of the result data")));
}
ereport(ERROR, (errmsg("The required join operation is missing between " ereport(ERROR, (errmsg("The required join operation is missing between "
"the target's distribution column and any " "the target's distribution column and any "
"expression originating from the source. The " "expression originating from the source. The "
"issue may arise from either a non-equi-join or " "issue may arise from a non-equi-join."),
"a mismatch in the datatypes of the columns being "
"joined."),
errdetail("Without a equi-join condition on the target's " errdetail("Without a equi-join condition on the target's "
"distribution column, the source rows " "distribution column, the source rows "
"cannot be efficiently redistributed, and " "cannot be efficiently redistributed, and "

View File

@ -999,7 +999,8 @@ SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
} }
OpExpr *joinClause = OpExpr *joinClause =
SinglePartitionJoinClause(currentPartitionColumnList, applicableJoinClauses); SinglePartitionJoinClause(currentPartitionColumnList, applicableJoinClauses,
NULL);
if (joinClause != NULL) if (joinClause != NULL)
{ {
if (currentPartitionMethod == DISTRIBUTE_BY_HASH) if (currentPartitionMethod == DISTRIBUTE_BY_HASH)
@ -1037,7 +1038,8 @@ SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
*/ */
List *candidatePartitionColumnList = list_make1(candidatePartitionColumn); List *candidatePartitionColumnList = list_make1(candidatePartitionColumn);
joinClause = SinglePartitionJoinClause(candidatePartitionColumnList, joinClause = SinglePartitionJoinClause(candidatePartitionColumnList,
applicableJoinClauses); applicableJoinClauses,
NULL);
if (joinClause != NULL) if (joinClause != NULL)
{ {
if (candidatePartitionMethod == DISTRIBUTE_BY_HASH) if (candidatePartitionMethod == DISTRIBUTE_BY_HASH)
@ -1078,8 +1080,14 @@ SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
* clause exists, the function returns NULL. * clause exists, the function returns NULL.
*/ */
OpExpr * OpExpr *
SinglePartitionJoinClause(List *partitionColumnList, List *applicableJoinClauses) SinglePartitionJoinClause(List *partitionColumnList, List *applicableJoinClauses, bool
*foundTypeMismatch)
{ {
if (foundTypeMismatch)
{
*foundTypeMismatch = false;
}
if (list_length(partitionColumnList) == 0) if (list_length(partitionColumnList) == 0)
{ {
return NULL; return NULL;
@ -1121,6 +1129,10 @@ SinglePartitionJoinClause(List *partitionColumnList, List *applicableJoinClauses
{ {
ereport(DEBUG1, (errmsg("single partition column types do not " ereport(DEBUG1, (errmsg("single partition column types do not "
"match"))); "match")));
if (foundTypeMismatch)
{
*foundTypeMismatch = true;
}
} }
} }
} }

View File

@ -2140,7 +2140,8 @@ ApplySinglePartitionJoin(MultiNode *leftNode, MultiNode *rightNode,
* we introduce a (re-)partition operator for the other column. * we introduce a (re-)partition operator for the other column.
*/ */
OpExpr *joinClause = SinglePartitionJoinClause(partitionColumnList, OpExpr *joinClause = SinglePartitionJoinClause(partitionColumnList,
applicableJoinClauses); applicableJoinClauses,
NULL);
Assert(joinClause != NULL); Assert(joinClause != NULL);
/* both are verified in SinglePartitionJoinClause to not be NULL, assert is to guard */ /* both are verified in SinglePartitionJoinClause to not be NULL, assert is to guard */

View File

@ -99,7 +99,8 @@ extern bool NodeIsEqualsOpExpr(Node *node);
extern bool IsSupportedReferenceJoin(JoinType joinType, bool leftIsReferenceTable, extern bool IsSupportedReferenceJoin(JoinType joinType, bool leftIsReferenceTable,
bool rightIsReferenceTable); bool rightIsReferenceTable);
extern OpExpr * SinglePartitionJoinClause(List *partitionColumnList, extern OpExpr * SinglePartitionJoinClause(List *partitionColumnList,
List *applicableJoinClauses); List *applicableJoinClauses,
bool *foundTypeMismatch);
extern OpExpr * DualPartitionJoinClause(List *applicableJoinClauses); extern OpExpr * DualPartitionJoinClause(List *applicableJoinClauses);
extern Var * LeftColumnOrNULL(OpExpr *joinClause); extern Var * LeftColumnOrNULL(OpExpr *joinClause);
extern Var * RightColumnOrNULL(OpExpr *joinClause); extern Var * RightColumnOrNULL(OpExpr *joinClause);

View File

@ -3065,7 +3065,7 @@ WHEN MATCHED AND t.customer_id = 200 THEN
DELETE DELETE
WHEN NOT MATCHED THEN WHEN NOT MATCHED THEN
INSERT VALUES(s.customer_id, s.order_id, s.order_center, 1, s.order_time); INSERT VALUES(s.customer_id, s.order_id, s.order_center, 1, s.order_time);
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined. ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join.
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
SELECT * FROM target_filter ORDER BY 1, 2; SELECT * FROM target_filter ORDER BY 1, 2;
customer_id | last_order_id | order_center | order_count | last_order customer_id | last_order_id | order_center | order_count | last_order
@ -3414,7 +3414,7 @@ MERGE INTO t1
UPDATE SET val = t1.val + 1 UPDATE SET val = t1.val + 1
WHEN NOT MATCHED THEN WHEN NOT MATCHED THEN
INSERT (id, val) VALUES (s1_res.id, s1_res.val); INSERT (id, val) VALUES (s1_res.id, s1_res.val);
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined. ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join.
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
-- Join condition without target distribution column -- Join condition without target distribution column
WITH s1_res AS ( WITH s1_res AS (
@ -3424,7 +3424,7 @@ WITH s1_res AS (
WHEN MATCHED THEN DELETE WHEN MATCHED THEN DELETE
WHEN NOT MATCHED THEN WHEN NOT MATCHED THEN
INSERT (id, val) VALUES (s1_res.id, s1_res.val); INSERT (id, val) VALUES (s1_res.id, s1_res.val);
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined. ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join.
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
-- --
-- Reference tables -- Reference tables
@ -3842,7 +3842,7 @@ EXPLAIN MERGE INTO demo_distributed t
USING demo_source_table s USING demo_source_table s
ON (s.id2 + 1 = t.id1) ON (s.id2 + 1 = t.id1)
WHEN MATCHED THEN UPDATE SET val1 = 15; WHEN MATCHED THEN UPDATE SET val1 = 15;
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined. ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join.
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
-- Sub-queries and CTEs are not allowed in actions and ON clause -- Sub-queries and CTEs are not allowed in actions and ON clause
CREATE TABLE target_1 (a int, b int, c int); CREATE TABLE target_1 (a int, b int, c int);
@ -3947,6 +3947,14 @@ WHEN MATCHED THEN
DELETE; DELETE;
ERROR: Sub-queries and CTEs are not allowed in ON clause for MERGE with repartitioning ERROR: Sub-queries and CTEs are not allowed in ON clause for MERGE with repartitioning
HINT: Consider making the source and target colocated and joined on the distribution column to make it a routable query HINT: Consider making the source and target colocated and joined on the distribution column to make it a routable query
-- Datatype mismatch between target and source join column
WITH src AS (SELECT FLOOR(b) AS a FROM source_2)
MERGE INTO target_1 t
USING src
ON t.a = src.a
WHEN MATCHED THEN DELETE;
ERROR: In the MERGE ON clause, there is a datatype mismatch between target's distribution column and the expression originating from the source.
DETAIL: If the types are different, Citus uses different hash functions for the two column types, which might lead to incorrect repartitioning of the result data
RESET client_min_messages; RESET client_min_messages;
DROP SERVER foreign_server CASCADE; DROP SERVER foreign_server CASCADE;
NOTICE: drop cascades to 3 other objects NOTICE: drop cascades to 3 other objects

View File

@ -406,7 +406,7 @@ SELECT create_distributed_table('tbl2', 'x');
MERGE INTO tbl1 USING tbl2 ON (true) MERGE INTO tbl1 USING tbl2 ON (true)
WHEN MATCHED THEN DELETE; WHEN MATCHED THEN DELETE;
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined. ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join.
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
-- also, inside subqueries & ctes -- also, inside subqueries & ctes
WITH targq AS ( WITH targq AS (
@ -414,7 +414,7 @@ WITH targq AS (
) )
MERGE INTO tbl1 USING targq ON (true) MERGE INTO tbl1 USING targq ON (true)
WHEN MATCHED THEN DELETE; WHEN MATCHED THEN DELETE;
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined. ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join.
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
WITH foo AS ( WITH foo AS (
MERGE INTO tbl1 USING tbl2 ON (true) MERGE INTO tbl1 USING tbl2 ON (true)
@ -431,7 +431,7 @@ USING tbl2
ON (true) ON (true)
WHEN MATCHED THEN WHEN MATCHED THEN
DO NOTHING; DO NOTHING;
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined. ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join.
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
MERGE INTO tbl1 t MERGE INTO tbl1 t
USING tbl2 USING tbl2

View File

@ -2474,6 +2474,13 @@ ON (t1.a = t2.a AND (SELECT max(a) > 55 FROM cte_2))
WHEN MATCHED THEN WHEN MATCHED THEN
DELETE; DELETE;
-- Datatype mismatch between target and source join column
WITH src AS (SELECT FLOOR(b) AS a FROM source_2)
MERGE INTO target_1 t
USING src
ON t.a = src.a
WHEN MATCHED THEN DELETE;
RESET client_min_messages; RESET client_min_messages;
DROP SERVER foreign_server CASCADE; DROP SERVER foreign_server CASCADE;
DROP FUNCTION merge_when_and_write(); DROP FUNCTION merge_when_and_write();