diff --git a/src/backend/distributed/planner/merge_planner.c b/src/backend/distributed/planner/merge_planner.c index 213e914d9..f6af545cb 100644 --- a/src/backend/distributed/planner/merge_planner.c +++ b/src/backend/distributed/planner/merge_planner.c @@ -1193,17 +1193,28 @@ SourceResultPartitionColumnIndex(Query *mergeQuery, List *sourceTargetList, List *mergeJoinConditionList = WhereClauseList(mergeQuery->jointree); Var *targetColumn = targetRelation->partitionColumn; Var *sourceRepartitionVar = NULL; + bool foundTypeMismatch = false; OpExpr *validJoinClause = - SinglePartitionJoinClause(list_make1(targetColumn), mergeJoinConditionList); + SinglePartitionJoinClause(list_make1(targetColumn), mergeJoinConditionList, + &foundTypeMismatch); if (!validJoinClause) { + if (foundTypeMismatch) + { + ereport(ERROR, (errmsg("In the MERGE ON clause, there is a datatype mismatch " + "between target's distribution " + "column and the expression originating from the source."), + errdetail( + "If the types are different, Citus uses different hash " + "functions for the two column types, which might " + "lead to incorrect repartitioning of the result data"))); + } + ereport(ERROR, (errmsg("The required join operation is missing between " "the target's distribution column and any " "expression originating from the source. The " - "issue may arise from either a non-equi-join or " - "a mismatch in the datatypes of the columns being " - "joined."), + "issue may arise from a non-equi-join."), errdetail("Without a equi-join condition on the target's " "distribution column, the source rows " "cannot be efficiently redistributed, and " diff --git a/src/backend/distributed/planner/multi_join_order.c b/src/backend/distributed/planner/multi_join_order.c index 79007b70d..7714a1e08 100644 --- a/src/backend/distributed/planner/multi_join_order.c +++ b/src/backend/distributed/planner/multi_join_order.c @@ -999,7 +999,8 @@ SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable, } OpExpr *joinClause = - SinglePartitionJoinClause(currentPartitionColumnList, applicableJoinClauses); + SinglePartitionJoinClause(currentPartitionColumnList, applicableJoinClauses, + NULL); if (joinClause != NULL) { if (currentPartitionMethod == DISTRIBUTE_BY_HASH) @@ -1037,7 +1038,8 @@ SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable, */ List *candidatePartitionColumnList = list_make1(candidatePartitionColumn); joinClause = SinglePartitionJoinClause(candidatePartitionColumnList, - applicableJoinClauses); + applicableJoinClauses, + NULL); if (joinClause != NULL) { if (candidatePartitionMethod == DISTRIBUTE_BY_HASH) @@ -1078,8 +1080,14 @@ SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable, * clause exists, the function returns NULL. */ OpExpr * -SinglePartitionJoinClause(List *partitionColumnList, List *applicableJoinClauses) +SinglePartitionJoinClause(List *partitionColumnList, List *applicableJoinClauses, bool + *foundTypeMismatch) { + if (foundTypeMismatch) + { + *foundTypeMismatch = false; + } + if (list_length(partitionColumnList) == 0) { return NULL; @@ -1121,6 +1129,10 @@ SinglePartitionJoinClause(List *partitionColumnList, List *applicableJoinClauses { ereport(DEBUG1, (errmsg("single partition column types do not " "match"))); + if (foundTypeMismatch) + { + *foundTypeMismatch = true; + } } } } diff --git a/src/backend/distributed/planner/multi_logical_planner.c b/src/backend/distributed/planner/multi_logical_planner.c index fa9e5bb61..0969e0c7c 100644 --- a/src/backend/distributed/planner/multi_logical_planner.c +++ b/src/backend/distributed/planner/multi_logical_planner.c @@ -2140,7 +2140,8 @@ ApplySinglePartitionJoin(MultiNode *leftNode, MultiNode *rightNode, * we introduce a (re-)partition operator for the other column. */ OpExpr *joinClause = SinglePartitionJoinClause(partitionColumnList, - applicableJoinClauses); + applicableJoinClauses, + NULL); Assert(joinClause != NULL); /* both are verified in SinglePartitionJoinClause to not be NULL, assert is to guard */ diff --git a/src/include/distributed/multi_join_order.h b/src/include/distributed/multi_join_order.h index 4e4ba1dd2..5eff90506 100644 --- a/src/include/distributed/multi_join_order.h +++ b/src/include/distributed/multi_join_order.h @@ -99,7 +99,8 @@ extern bool NodeIsEqualsOpExpr(Node *node); extern bool IsSupportedReferenceJoin(JoinType joinType, bool leftIsReferenceTable, bool rightIsReferenceTable); extern OpExpr * SinglePartitionJoinClause(List *partitionColumnList, - List *applicableJoinClauses); + List *applicableJoinClauses, + bool *foundTypeMismatch); extern OpExpr * DualPartitionJoinClause(List *applicableJoinClauses); extern Var * LeftColumnOrNULL(OpExpr *joinClause); extern Var * RightColumnOrNULL(OpExpr *joinClause); diff --git a/src/test/regress/expected/merge.out b/src/test/regress/expected/merge.out index 0fd5ba3c3..a73467e81 100644 --- a/src/test/regress/expected/merge.out +++ b/src/test/regress/expected/merge.out @@ -3065,7 +3065,7 @@ WHEN MATCHED AND t.customer_id = 200 THEN DELETE WHEN NOT MATCHED THEN INSERT VALUES(s.customer_id, s.order_id, s.order_center, 1, s.order_time); -ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined. +ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join. DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting SELECT * FROM target_filter ORDER BY 1, 2; customer_id | last_order_id | order_center | order_count | last_order @@ -3414,7 +3414,7 @@ MERGE INTO t1 UPDATE SET val = t1.val + 1 WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val); -ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined. +ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join. DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting -- Join condition without target distribution column WITH s1_res AS ( @@ -3424,7 +3424,7 @@ WITH s1_res AS ( WHEN MATCHED THEN DELETE WHEN NOT MATCHED THEN INSERT (id, val) VALUES (s1_res.id, s1_res.val); -ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined. +ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join. DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting -- -- Reference tables @@ -3842,7 +3842,7 @@ EXPLAIN MERGE INTO demo_distributed t USING demo_source_table s ON (s.id2 + 1 = t.id1) WHEN MATCHED THEN UPDATE SET val1 = 15; -ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined. +ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join. DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting -- Sub-queries and CTEs are not allowed in actions and ON clause CREATE TABLE target_1 (a int, b int, c int); @@ -3947,6 +3947,14 @@ WHEN MATCHED THEN DELETE; ERROR: Sub-queries and CTEs are not allowed in ON clause for MERGE with repartitioning HINT: Consider making the source and target colocated and joined on the distribution column to make it a routable query +-- Datatype mismatch between target and source join column +WITH src AS (SELECT FLOOR(b) AS a FROM source_2) +MERGE INTO target_1 t +USING src +ON t.a = src.a +WHEN MATCHED THEN DELETE; +ERROR: In the MERGE ON clause, there is a datatype mismatch between target's distribution column and the expression originating from the source. +DETAIL: If the types are different, Citus uses different hash functions for the two column types, which might lead to incorrect repartitioning of the result data RESET client_min_messages; DROP SERVER foreign_server CASCADE; NOTICE: drop cascades to 3 other objects diff --git a/src/test/regress/expected/pg15.out b/src/test/regress/expected/pg15.out index 036f8371f..cb346ff57 100644 --- a/src/test/regress/expected/pg15.out +++ b/src/test/regress/expected/pg15.out @@ -406,7 +406,7 @@ SELECT create_distributed_table('tbl2', 'x'); MERGE INTO tbl1 USING tbl2 ON (true) WHEN MATCHED THEN DELETE; -ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined. +ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join. DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting -- also, inside subqueries & ctes WITH targq AS ( @@ -414,7 +414,7 @@ WITH targq AS ( ) MERGE INTO tbl1 USING targq ON (true) WHEN MATCHED THEN DELETE; -ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined. +ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join. DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting WITH foo AS ( MERGE INTO tbl1 USING tbl2 ON (true) @@ -431,7 +431,7 @@ USING tbl2 ON (true) WHEN MATCHED THEN DO NOTHING; -ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined. +ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join. DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting MERGE INTO tbl1 t USING tbl2 diff --git a/src/test/regress/sql/merge.sql b/src/test/regress/sql/merge.sql index 62722c6c8..a41e80841 100644 --- a/src/test/regress/sql/merge.sql +++ b/src/test/regress/sql/merge.sql @@ -2474,6 +2474,13 @@ ON (t1.a = t2.a AND (SELECT max(a) > 55 FROM cte_2)) WHEN MATCHED THEN DELETE; +-- Datatype mismatch between target and source join column +WITH src AS (SELECT FLOOR(b) AS a FROM source_2) +MERGE INTO target_1 t +USING src +ON t.a = src.a +WHEN MATCHED THEN DELETE; + RESET client_min_messages; DROP SERVER foreign_server CASCADE; DROP FUNCTION merge_when_and_write();