In the MERGE join clause, there is a datatype mismatch between target's distribution column

and the expression originating from the source. If the types are different, Citus uses
different hash functions for the two column types, which might lead to incorrect repartitioning
of the result data
pull/7077/head
Teja Mupparti 2023-07-25 20:42:28 -07:00 committed by Teja Mupparti
parent 186804c119
commit 846cbc3a39
7 changed files with 56 additions and 16 deletions

View File

@ -1193,17 +1193,28 @@ SourceResultPartitionColumnIndex(Query *mergeQuery, List *sourceTargetList,
List *mergeJoinConditionList = WhereClauseList(mergeQuery->jointree);
Var *targetColumn = targetRelation->partitionColumn;
Var *sourceRepartitionVar = NULL;
bool foundTypeMismatch = false;
OpExpr *validJoinClause =
SinglePartitionJoinClause(list_make1(targetColumn), mergeJoinConditionList);
SinglePartitionJoinClause(list_make1(targetColumn), mergeJoinConditionList,
&foundTypeMismatch);
if (!validJoinClause)
{
if (foundTypeMismatch)
{
ereport(ERROR, (errmsg("In the MERGE ON clause, there is a datatype mismatch "
"between target's distribution "
"column and the expression originating from the source."),
errdetail(
"If the types are different, Citus uses different hash "
"functions for the two column types, which might "
"lead to incorrect repartitioning of the result data")));
}
ereport(ERROR, (errmsg("The required join operation is missing between "
"the target's distribution column and any "
"expression originating from the source. The "
"issue may arise from either a non-equi-join or "
"a mismatch in the datatypes of the columns being "
"joined."),
"issue may arise from a non-equi-join."),
errdetail("Without a equi-join condition on the target's "
"distribution column, the source rows "
"cannot be efficiently redistributed, and "

View File

@ -999,7 +999,8 @@ SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
}
OpExpr *joinClause =
SinglePartitionJoinClause(currentPartitionColumnList, applicableJoinClauses);
SinglePartitionJoinClause(currentPartitionColumnList, applicableJoinClauses,
NULL);
if (joinClause != NULL)
{
if (currentPartitionMethod == DISTRIBUTE_BY_HASH)
@ -1037,7 +1038,8 @@ SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
*/
List *candidatePartitionColumnList = list_make1(candidatePartitionColumn);
joinClause = SinglePartitionJoinClause(candidatePartitionColumnList,
applicableJoinClauses);
applicableJoinClauses,
NULL);
if (joinClause != NULL)
{
if (candidatePartitionMethod == DISTRIBUTE_BY_HASH)
@ -1078,8 +1080,14 @@ SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
* clause exists, the function returns NULL.
*/
OpExpr *
SinglePartitionJoinClause(List *partitionColumnList, List *applicableJoinClauses)
SinglePartitionJoinClause(List *partitionColumnList, List *applicableJoinClauses, bool
*foundTypeMismatch)
{
if (foundTypeMismatch)
{
*foundTypeMismatch = false;
}
if (list_length(partitionColumnList) == 0)
{
return NULL;
@ -1121,6 +1129,10 @@ SinglePartitionJoinClause(List *partitionColumnList, List *applicableJoinClauses
{
ereport(DEBUG1, (errmsg("single partition column types do not "
"match")));
if (foundTypeMismatch)
{
*foundTypeMismatch = true;
}
}
}
}

View File

@ -2140,7 +2140,8 @@ ApplySinglePartitionJoin(MultiNode *leftNode, MultiNode *rightNode,
* we introduce a (re-)partition operator for the other column.
*/
OpExpr *joinClause = SinglePartitionJoinClause(partitionColumnList,
applicableJoinClauses);
applicableJoinClauses,
NULL);
Assert(joinClause != NULL);
/* both are verified in SinglePartitionJoinClause to not be NULL, assert is to guard */

View File

@ -99,7 +99,8 @@ extern bool NodeIsEqualsOpExpr(Node *node);
extern bool IsSupportedReferenceJoin(JoinType joinType, bool leftIsReferenceTable,
bool rightIsReferenceTable);
extern OpExpr * SinglePartitionJoinClause(List *partitionColumnList,
List *applicableJoinClauses);
List *applicableJoinClauses,
bool *foundTypeMismatch);
extern OpExpr * DualPartitionJoinClause(List *applicableJoinClauses);
extern Var * LeftColumnOrNULL(OpExpr *joinClause);
extern Var * RightColumnOrNULL(OpExpr *joinClause);

View File

@ -3065,7 +3065,7 @@ WHEN MATCHED AND t.customer_id = 200 THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(s.customer_id, s.order_id, s.order_center, 1, s.order_time);
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined.
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join.
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
SELECT * FROM target_filter ORDER BY 1, 2;
customer_id | last_order_id | order_center | order_count | last_order
@ -3414,7 +3414,7 @@ MERGE INTO t1
UPDATE SET val = t1.val + 1
WHEN NOT MATCHED THEN
INSERT (id, val) VALUES (s1_res.id, s1_res.val);
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined.
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join.
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
-- Join condition without target distribution column
WITH s1_res AS (
@ -3424,7 +3424,7 @@ WITH s1_res AS (
WHEN MATCHED THEN DELETE
WHEN NOT MATCHED THEN
INSERT (id, val) VALUES (s1_res.id, s1_res.val);
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined.
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join.
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
--
-- Reference tables
@ -3842,7 +3842,7 @@ EXPLAIN MERGE INTO demo_distributed t
USING demo_source_table s
ON (s.id2 + 1 = t.id1)
WHEN MATCHED THEN UPDATE SET val1 = 15;
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined.
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join.
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
-- Sub-queries and CTEs are not allowed in actions and ON clause
CREATE TABLE target_1 (a int, b int, c int);
@ -3947,6 +3947,14 @@ WHEN MATCHED THEN
DELETE;
ERROR: Sub-queries and CTEs are not allowed in ON clause for MERGE with repartitioning
HINT: Consider making the source and target colocated and joined on the distribution column to make it a routable query
-- Datatype mismatch between target and source join column
WITH src AS (SELECT FLOOR(b) AS a FROM source_2)
MERGE INTO target_1 t
USING src
ON t.a = src.a
WHEN MATCHED THEN DELETE;
ERROR: In the MERGE ON clause, there is a datatype mismatch between target's distribution column and the expression originating from the source.
DETAIL: If the types are different, Citus uses different hash functions for the two column types, which might lead to incorrect repartitioning of the result data
RESET client_min_messages;
DROP SERVER foreign_server CASCADE;
NOTICE: drop cascades to 3 other objects

View File

@ -406,7 +406,7 @@ SELECT create_distributed_table('tbl2', 'x');
MERGE INTO tbl1 USING tbl2 ON (true)
WHEN MATCHED THEN DELETE;
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined.
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join.
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
-- also, inside subqueries & ctes
WITH targq AS (
@ -414,7 +414,7 @@ WITH targq AS (
)
MERGE INTO tbl1 USING targq ON (true)
WHEN MATCHED THEN DELETE;
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined.
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join.
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
WITH foo AS (
MERGE INTO tbl1 USING tbl2 ON (true)
@ -431,7 +431,7 @@ USING tbl2
ON (true)
WHEN MATCHED THEN
DO NOTHING;
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined.
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join.
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
MERGE INTO tbl1 t
USING tbl2

View File

@ -2474,6 +2474,13 @@ ON (t1.a = t2.a AND (SELECT max(a) > 55 FROM cte_2))
WHEN MATCHED THEN
DELETE;
-- Datatype mismatch between target and source join column
WITH src AS (SELECT FLOOR(b) AS a FROM source_2)
MERGE INTO target_1 t
USING src
ON t.a = src.a
WHEN MATCHED THEN DELETE;
RESET client_min_messages;
DROP SERVER foreign_server CASCADE;
DROP FUNCTION merge_when_and_write();