From 5bad6c6a1def94e78dafe1583c5ac4487353ba15 Mon Sep 17 00:00:00 2001 From: Parag Jain <40451840+paragikjain@users.noreply.github.com> Date: Fri, 13 Sep 2024 09:46:39 +0530 Subject: [PATCH] [Bug Fix] : writing incorrect data to target Merge repartition Command (#7659) We were writing incorrect data to target collection in some cases of merge command. In case of repartition when source query is RELATION. We were referring to incorrect attribute number that was resulting into this incorrect behavior. Example : ![image](https://github.com/user-attachments/assets/a101cb36-7976-459c-befb-96a55a5b3dc1) ![image](https://github.com/user-attachments/assets/e5c83b7b-5b8e-4d79-a927-95684dc9ba49) I have added fixed tests as part of this PR , Thanks. --- .../distributed/planner/merge_planner.c | 2 +- .../planner/query_colocation_checker.c | 4 +- .../distributed/query_colocation_checker.h | 2 + src/test/regress/expected/merge_vcore.out | 106 ++++++++++++++++++ src/test/regress/sql/merge_vcore.sql | 84 ++++++++++++++ 5 files changed, 194 insertions(+), 4 deletions(-) diff --git a/src/backend/distributed/planner/merge_planner.c b/src/backend/distributed/planner/merge_planner.c index 1f9d17c43..f8a181546 100644 --- a/src/backend/distributed/planner/merge_planner.c +++ b/src/backend/distributed/planner/merge_planner.c @@ -858,7 +858,7 @@ ConvertRelationRTEIntoSubquery(Query *mergeQuery, RangeTblEntry *sourceRte, newRangeTableRef->rtindex = SINGLE_RTE_INDEX; sourceResultsQuery->jointree = makeFromExpr(list_make1(newRangeTableRef), NULL); sourceResultsQuery->targetList = - CreateAllTargetListForRelation(sourceRte->relid, requiredAttributes); + CreateFilteredTargetListForRelation(sourceRte->relid, requiredAttributes); List *restrictionList = GetRestrictInfoListForRelation(sourceRte, plannerRestrictionContext); List *copyRestrictionList = copyObject(restrictionList); diff --git a/src/backend/distributed/planner/query_colocation_checker.c b/src/backend/distributed/planner/query_colocation_checker.c index bef91618e..d298b0f46 100644 --- a/src/backend/distributed/planner/query_colocation_checker.c +++ b/src/backend/distributed/planner/query_colocation_checker.c @@ -45,8 +45,6 @@ static RangeTblEntry * AnchorRte(Query *subquery); static List * UnionRelationRestrictionLists(List *firstRelationList, List *secondRelationList); -static List * CreateFilteredTargetListForRelation(Oid relationId, - List *requiredAttributes); static List * CreateDummyTargetList(Oid relationId, List *requiredAttributes); static TargetEntry * CreateTargetEntryForColumn(Form_pg_attribute attributeTuple, Index rteIndex, @@ -378,7 +376,7 @@ CreateAllTargetListForRelation(Oid relationId, List *requiredAttributes) * only the required columns of the given relation. If there is not required * columns then a dummy NULL column is put as the only entry. */ -static List * +List * CreateFilteredTargetListForRelation(Oid relationId, List *requiredAttributes) { Relation relation = relation_open(relationId, AccessShareLock); diff --git a/src/include/distributed/query_colocation_checker.h b/src/include/distributed/query_colocation_checker.h index 2a46d364c..485e4a033 100644 --- a/src/include/distributed/query_colocation_checker.h +++ b/src/include/distributed/query_colocation_checker.h @@ -39,5 +39,7 @@ extern Query * WrapRteRelationIntoSubquery(RangeTblEntry *rteRelation, List *requiredAttributes, RTEPermissionInfo *perminfo); extern List * CreateAllTargetListForRelation(Oid relationId, List *requiredAttributes); +extern List * CreateFilteredTargetListForRelation(Oid relationId, + List *requiredAttributes); #endif /* QUERY_COLOCATION_CHECKER_H */ diff --git a/src/test/regress/expected/merge_vcore.out b/src/test/regress/expected/merge_vcore.out index 03f6f8820..0eccb811b 100644 --- a/src/test/regress/expected/merge_vcore.out +++ b/src/test/regress/expected/merge_vcore.out @@ -476,6 +476,112 @@ WHEN MATCHED THEN DO NOTHING; Filter: ('2'::bigint = id) (11 rows) +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS target; +-- Bug Fix Test as part of this PR +-- Test 1 +CREATE TABLE source ( + id int, + age int, + salary int +); +CREATE TABLE target ( + id int, + age int, + salary int +); +SELECT create_distributed_table('source', 'id', colocate_with=>'none'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('target', 'id', colocate_with=>'none'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO source (id, age, salary) VALUES (1,30, 100000); +MERGE INTO ONLY target USING source ON (source.id = target.id) +WHEN NOT MATCHED THEN +INSERT (id, salary) VALUES (source.id, source.salary); +SELECT * FROM TARGET; + id | age | salary +--------------------------------------------------------------------- + 1 | | 100000 +(1 row) + +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS target; +-- Test 2 +CREATE TABLE source ( + id int, + age int, + salary int +); +CREATE TABLE target ( + id int, + age int, + salary int +); +SELECT create_distributed_table('source', 'id', colocate_with=>'none'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('target', 'id', colocate_with=>'none'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO source (id, age, salary) VALUES (1,30, 100000); +MERGE INTO ONLY target USING source ON (source.id = target.id) +WHEN NOT MATCHED THEN +INSERT (salary, id) VALUES (source.salary, source.id); +SELECT * FROM TARGET; + id | age | salary +--------------------------------------------------------------------- + 1 | | 100000 +(1 row) + +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS target; +-- Test 3 +CREATE TABLE source ( + id int, + age int, + salary int +); +CREATE TABLE target ( + id int, + age int, + salary int +); +SELECT create_distributed_table('source', 'id', colocate_with=>'none'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('target', 'id', colocate_with=>'none'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO source (id, age, salary) VALUES (1,30, 100000); +MERGE INTO ONLY target USING source ON (source.id = target.id) +WHEN NOT MATCHED THEN +INSERT (salary, id, age) VALUES (source.age, source.id, source.salary); +SELECT * FROM TARGET; + id | age | salary +--------------------------------------------------------------------- + 1 | 100000 | 30 +(1 row) + DROP TABLE IF EXISTS source; DROP TABLE IF EXISTS target; DROP SCHEMA IF EXISTS merge_vcore_schema CASCADE; diff --git a/src/test/regress/sql/merge_vcore.sql b/src/test/regress/sql/merge_vcore.sql index 472bbfe91..2ab95e874 100644 --- a/src/test/regress/sql/merge_vcore.sql +++ b/src/test/regress/sql/merge_vcore.sql @@ -312,6 +312,90 @@ WHEN MATCHED THEN DO NOTHING; DROP TABLE IF EXISTS source; DROP TABLE IF EXISTS target; + +-- Bug Fix Test as part of this PR +-- Test 1 +CREATE TABLE source ( + id int, + age int, + salary int +); + +CREATE TABLE target ( + id int, + age int, + salary int +); + +SELECT create_distributed_table('source', 'id', colocate_with=>'none'); +SELECT create_distributed_table('target', 'id', colocate_with=>'none'); + +INSERT INTO source (id, age, salary) VALUES (1,30, 100000); + +MERGE INTO ONLY target USING source ON (source.id = target.id) +WHEN NOT MATCHED THEN +INSERT (id, salary) VALUES (source.id, source.salary); + +SELECT * FROM TARGET; +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS target; + + +-- Test 2 +CREATE TABLE source ( + id int, + age int, + salary int +); + +CREATE TABLE target ( + id int, + age int, + salary int +); + +SELECT create_distributed_table('source', 'id', colocate_with=>'none'); +SELECT create_distributed_table('target', 'id', colocate_with=>'none'); + +INSERT INTO source (id, age, salary) VALUES (1,30, 100000); + +MERGE INTO ONLY target USING source ON (source.id = target.id) +WHEN NOT MATCHED THEN +INSERT (salary, id) VALUES (source.salary, source.id); + +SELECT * FROM TARGET; +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS target; + + +-- Test 3 +CREATE TABLE source ( + id int, + age int, + salary int +); + +CREATE TABLE target ( + id int, + age int, + salary int +); + +SELECT create_distributed_table('source', 'id', colocate_with=>'none'); +SELECT create_distributed_table('target', 'id', colocate_with=>'none'); + +INSERT INTO source (id, age, salary) VALUES (1,30, 100000); + +MERGE INTO ONLY target USING source ON (source.id = target.id) +WHEN NOT MATCHED THEN +INSERT (salary, id, age) VALUES (source.age, source.id, source.salary); + +SELECT * FROM TARGET; +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS target; + + + DROP SCHEMA IF EXISTS merge_vcore_schema CASCADE;