mirror of https://github.com/citusdata/citus.git
In the MERGE join clause, there is a datatype mismatch between target's distribution column
and the expression originating from the source. If the types are different, Citus uses different hash functions for the two column types, which might lead to incorrect repartitioning of the result datapull/7077/head
parent
186804c119
commit
846cbc3a39
|
@ -1193,17 +1193,28 @@ SourceResultPartitionColumnIndex(Query *mergeQuery, List *sourceTargetList,
|
||||||
List *mergeJoinConditionList = WhereClauseList(mergeQuery->jointree);
|
List *mergeJoinConditionList = WhereClauseList(mergeQuery->jointree);
|
||||||
Var *targetColumn = targetRelation->partitionColumn;
|
Var *targetColumn = targetRelation->partitionColumn;
|
||||||
Var *sourceRepartitionVar = NULL;
|
Var *sourceRepartitionVar = NULL;
|
||||||
|
bool foundTypeMismatch = false;
|
||||||
|
|
||||||
OpExpr *validJoinClause =
|
OpExpr *validJoinClause =
|
||||||
SinglePartitionJoinClause(list_make1(targetColumn), mergeJoinConditionList);
|
SinglePartitionJoinClause(list_make1(targetColumn), mergeJoinConditionList,
|
||||||
|
&foundTypeMismatch);
|
||||||
if (!validJoinClause)
|
if (!validJoinClause)
|
||||||
{
|
{
|
||||||
|
if (foundTypeMismatch)
|
||||||
|
{
|
||||||
|
ereport(ERROR, (errmsg("In the MERGE ON clause, there is a datatype mismatch "
|
||||||
|
"between target's distribution "
|
||||||
|
"column and the expression originating from the source."),
|
||||||
|
errdetail(
|
||||||
|
"If the types are different, Citus uses different hash "
|
||||||
|
"functions for the two column types, which might "
|
||||||
|
"lead to incorrect repartitioning of the result data")));
|
||||||
|
}
|
||||||
|
|
||||||
ereport(ERROR, (errmsg("The required join operation is missing between "
|
ereport(ERROR, (errmsg("The required join operation is missing between "
|
||||||
"the target's distribution column and any "
|
"the target's distribution column and any "
|
||||||
"expression originating from the source. The "
|
"expression originating from the source. The "
|
||||||
"issue may arise from either a non-equi-join or "
|
"issue may arise from a non-equi-join."),
|
||||||
"a mismatch in the datatypes of the columns being "
|
|
||||||
"joined."),
|
|
||||||
errdetail("Without a equi-join condition on the target's "
|
errdetail("Without a equi-join condition on the target's "
|
||||||
"distribution column, the source rows "
|
"distribution column, the source rows "
|
||||||
"cannot be efficiently redistributed, and "
|
"cannot be efficiently redistributed, and "
|
||||||
|
|
|
@ -999,7 +999,8 @@ SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
|
||||||
}
|
}
|
||||||
|
|
||||||
OpExpr *joinClause =
|
OpExpr *joinClause =
|
||||||
SinglePartitionJoinClause(currentPartitionColumnList, applicableJoinClauses);
|
SinglePartitionJoinClause(currentPartitionColumnList, applicableJoinClauses,
|
||||||
|
NULL);
|
||||||
if (joinClause != NULL)
|
if (joinClause != NULL)
|
||||||
{
|
{
|
||||||
if (currentPartitionMethod == DISTRIBUTE_BY_HASH)
|
if (currentPartitionMethod == DISTRIBUTE_BY_HASH)
|
||||||
|
@ -1037,7 +1038,8 @@ SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
|
||||||
*/
|
*/
|
||||||
List *candidatePartitionColumnList = list_make1(candidatePartitionColumn);
|
List *candidatePartitionColumnList = list_make1(candidatePartitionColumn);
|
||||||
joinClause = SinglePartitionJoinClause(candidatePartitionColumnList,
|
joinClause = SinglePartitionJoinClause(candidatePartitionColumnList,
|
||||||
applicableJoinClauses);
|
applicableJoinClauses,
|
||||||
|
NULL);
|
||||||
if (joinClause != NULL)
|
if (joinClause != NULL)
|
||||||
{
|
{
|
||||||
if (candidatePartitionMethod == DISTRIBUTE_BY_HASH)
|
if (candidatePartitionMethod == DISTRIBUTE_BY_HASH)
|
||||||
|
@ -1078,8 +1080,14 @@ SinglePartitionJoin(JoinOrderNode *currentJoinNode, TableEntry *candidateTable,
|
||||||
* clause exists, the function returns NULL.
|
* clause exists, the function returns NULL.
|
||||||
*/
|
*/
|
||||||
OpExpr *
|
OpExpr *
|
||||||
SinglePartitionJoinClause(List *partitionColumnList, List *applicableJoinClauses)
|
SinglePartitionJoinClause(List *partitionColumnList, List *applicableJoinClauses, bool
|
||||||
|
*foundTypeMismatch)
|
||||||
{
|
{
|
||||||
|
if (foundTypeMismatch)
|
||||||
|
{
|
||||||
|
*foundTypeMismatch = false;
|
||||||
|
}
|
||||||
|
|
||||||
if (list_length(partitionColumnList) == 0)
|
if (list_length(partitionColumnList) == 0)
|
||||||
{
|
{
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -1121,6 +1129,10 @@ SinglePartitionJoinClause(List *partitionColumnList, List *applicableJoinClauses
|
||||||
{
|
{
|
||||||
ereport(DEBUG1, (errmsg("single partition column types do not "
|
ereport(DEBUG1, (errmsg("single partition column types do not "
|
||||||
"match")));
|
"match")));
|
||||||
|
if (foundTypeMismatch)
|
||||||
|
{
|
||||||
|
*foundTypeMismatch = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2140,7 +2140,8 @@ ApplySinglePartitionJoin(MultiNode *leftNode, MultiNode *rightNode,
|
||||||
* we introduce a (re-)partition operator for the other column.
|
* we introduce a (re-)partition operator for the other column.
|
||||||
*/
|
*/
|
||||||
OpExpr *joinClause = SinglePartitionJoinClause(partitionColumnList,
|
OpExpr *joinClause = SinglePartitionJoinClause(partitionColumnList,
|
||||||
applicableJoinClauses);
|
applicableJoinClauses,
|
||||||
|
NULL);
|
||||||
Assert(joinClause != NULL);
|
Assert(joinClause != NULL);
|
||||||
|
|
||||||
/* both are verified in SinglePartitionJoinClause to not be NULL, assert is to guard */
|
/* both are verified in SinglePartitionJoinClause to not be NULL, assert is to guard */
|
||||||
|
|
|
@ -99,7 +99,8 @@ extern bool NodeIsEqualsOpExpr(Node *node);
|
||||||
extern bool IsSupportedReferenceJoin(JoinType joinType, bool leftIsReferenceTable,
|
extern bool IsSupportedReferenceJoin(JoinType joinType, bool leftIsReferenceTable,
|
||||||
bool rightIsReferenceTable);
|
bool rightIsReferenceTable);
|
||||||
extern OpExpr * SinglePartitionJoinClause(List *partitionColumnList,
|
extern OpExpr * SinglePartitionJoinClause(List *partitionColumnList,
|
||||||
List *applicableJoinClauses);
|
List *applicableJoinClauses,
|
||||||
|
bool *foundTypeMismatch);
|
||||||
extern OpExpr * DualPartitionJoinClause(List *applicableJoinClauses);
|
extern OpExpr * DualPartitionJoinClause(List *applicableJoinClauses);
|
||||||
extern Var * LeftColumnOrNULL(OpExpr *joinClause);
|
extern Var * LeftColumnOrNULL(OpExpr *joinClause);
|
||||||
extern Var * RightColumnOrNULL(OpExpr *joinClause);
|
extern Var * RightColumnOrNULL(OpExpr *joinClause);
|
||||||
|
|
|
@ -3065,7 +3065,7 @@ WHEN MATCHED AND t.customer_id = 200 THEN
|
||||||
DELETE
|
DELETE
|
||||||
WHEN NOT MATCHED THEN
|
WHEN NOT MATCHED THEN
|
||||||
INSERT VALUES(s.customer_id, s.order_id, s.order_center, 1, s.order_time);
|
INSERT VALUES(s.customer_id, s.order_id, s.order_center, 1, s.order_time);
|
||||||
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined.
|
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join.
|
||||||
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
|
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
|
||||||
SELECT * FROM target_filter ORDER BY 1, 2;
|
SELECT * FROM target_filter ORDER BY 1, 2;
|
||||||
customer_id | last_order_id | order_center | order_count | last_order
|
customer_id | last_order_id | order_center | order_count | last_order
|
||||||
|
@ -3414,7 +3414,7 @@ MERGE INTO t1
|
||||||
UPDATE SET val = t1.val + 1
|
UPDATE SET val = t1.val + 1
|
||||||
WHEN NOT MATCHED THEN
|
WHEN NOT MATCHED THEN
|
||||||
INSERT (id, val) VALUES (s1_res.id, s1_res.val);
|
INSERT (id, val) VALUES (s1_res.id, s1_res.val);
|
||||||
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined.
|
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join.
|
||||||
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
|
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
|
||||||
-- Join condition without target distribution column
|
-- Join condition without target distribution column
|
||||||
WITH s1_res AS (
|
WITH s1_res AS (
|
||||||
|
@ -3424,7 +3424,7 @@ WITH s1_res AS (
|
||||||
WHEN MATCHED THEN DELETE
|
WHEN MATCHED THEN DELETE
|
||||||
WHEN NOT MATCHED THEN
|
WHEN NOT MATCHED THEN
|
||||||
INSERT (id, val) VALUES (s1_res.id, s1_res.val);
|
INSERT (id, val) VALUES (s1_res.id, s1_res.val);
|
||||||
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined.
|
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join.
|
||||||
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
|
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
|
||||||
--
|
--
|
||||||
-- Reference tables
|
-- Reference tables
|
||||||
|
@ -3842,7 +3842,7 @@ EXPLAIN MERGE INTO demo_distributed t
|
||||||
USING demo_source_table s
|
USING demo_source_table s
|
||||||
ON (s.id2 + 1 = t.id1)
|
ON (s.id2 + 1 = t.id1)
|
||||||
WHEN MATCHED THEN UPDATE SET val1 = 15;
|
WHEN MATCHED THEN UPDATE SET val1 = 15;
|
||||||
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined.
|
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join.
|
||||||
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
|
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
|
||||||
-- Sub-queries and CTEs are not allowed in actions and ON clause
|
-- Sub-queries and CTEs are not allowed in actions and ON clause
|
||||||
CREATE TABLE target_1 (a int, b int, c int);
|
CREATE TABLE target_1 (a int, b int, c int);
|
||||||
|
@ -3947,6 +3947,14 @@ WHEN MATCHED THEN
|
||||||
DELETE;
|
DELETE;
|
||||||
ERROR: Sub-queries and CTEs are not allowed in ON clause for MERGE with repartitioning
|
ERROR: Sub-queries and CTEs are not allowed in ON clause for MERGE with repartitioning
|
||||||
HINT: Consider making the source and target colocated and joined on the distribution column to make it a routable query
|
HINT: Consider making the source and target colocated and joined on the distribution column to make it a routable query
|
||||||
|
-- Datatype mismatch between target and source join column
|
||||||
|
WITH src AS (SELECT FLOOR(b) AS a FROM source_2)
|
||||||
|
MERGE INTO target_1 t
|
||||||
|
USING src
|
||||||
|
ON t.a = src.a
|
||||||
|
WHEN MATCHED THEN DELETE;
|
||||||
|
ERROR: In the MERGE ON clause, there is a datatype mismatch between target's distribution column and the expression originating from the source.
|
||||||
|
DETAIL: If the types are different, Citus uses different hash functions for the two column types, which might lead to incorrect repartitioning of the result data
|
||||||
RESET client_min_messages;
|
RESET client_min_messages;
|
||||||
DROP SERVER foreign_server CASCADE;
|
DROP SERVER foreign_server CASCADE;
|
||||||
NOTICE: drop cascades to 3 other objects
|
NOTICE: drop cascades to 3 other objects
|
||||||
|
|
|
@ -406,7 +406,7 @@ SELECT create_distributed_table('tbl2', 'x');
|
||||||
|
|
||||||
MERGE INTO tbl1 USING tbl2 ON (true)
|
MERGE INTO tbl1 USING tbl2 ON (true)
|
||||||
WHEN MATCHED THEN DELETE;
|
WHEN MATCHED THEN DELETE;
|
||||||
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined.
|
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join.
|
||||||
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
|
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
|
||||||
-- also, inside subqueries & ctes
|
-- also, inside subqueries & ctes
|
||||||
WITH targq AS (
|
WITH targq AS (
|
||||||
|
@ -414,7 +414,7 @@ WITH targq AS (
|
||||||
)
|
)
|
||||||
MERGE INTO tbl1 USING targq ON (true)
|
MERGE INTO tbl1 USING targq ON (true)
|
||||||
WHEN MATCHED THEN DELETE;
|
WHEN MATCHED THEN DELETE;
|
||||||
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined.
|
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join.
|
||||||
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
|
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
|
||||||
WITH foo AS (
|
WITH foo AS (
|
||||||
MERGE INTO tbl1 USING tbl2 ON (true)
|
MERGE INTO tbl1 USING tbl2 ON (true)
|
||||||
|
@ -431,7 +431,7 @@ USING tbl2
|
||||||
ON (true)
|
ON (true)
|
||||||
WHEN MATCHED THEN
|
WHEN MATCHED THEN
|
||||||
DO NOTHING;
|
DO NOTHING;
|
||||||
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from either a non-equi-join or a mismatch in the datatypes of the columns being joined.
|
ERROR: The required join operation is missing between the target's distribution column and any expression originating from the source. The issue may arise from a non-equi-join.
|
||||||
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
|
DETAIL: Without a equi-join condition on the target's distribution column, the source rows cannot be efficiently redistributed, and the NOT-MATCHED condition cannot be evaluated unambiguously. This can result in incorrect or unexpected results when attempting to merge tables in a distributed setting
|
||||||
MERGE INTO tbl1 t
|
MERGE INTO tbl1 t
|
||||||
USING tbl2
|
USING tbl2
|
||||||
|
|
|
@ -2474,6 +2474,13 @@ ON (t1.a = t2.a AND (SELECT max(a) > 55 FROM cte_2))
|
||||||
WHEN MATCHED THEN
|
WHEN MATCHED THEN
|
||||||
DELETE;
|
DELETE;
|
||||||
|
|
||||||
|
-- Datatype mismatch between target and source join column
|
||||||
|
WITH src AS (SELECT FLOOR(b) AS a FROM source_2)
|
||||||
|
MERGE INTO target_1 t
|
||||||
|
USING src
|
||||||
|
ON t.a = src.a
|
||||||
|
WHEN MATCHED THEN DELETE;
|
||||||
|
|
||||||
RESET client_min_messages;
|
RESET client_min_messages;
|
||||||
DROP SERVER foreign_server CASCADE;
|
DROP SERVER foreign_server CASCADE;
|
||||||
DROP FUNCTION merge_when_and_write();
|
DROP FUNCTION merge_when_and_write();
|
||||||
|
|
Loading…
Reference in New Issue