diff --git a/src/backend/distributed/planner/insert_select_planner.c b/src/backend/distributed/planner/insert_select_planner.c index ca0c74f8f..ce61bd0ae 100644 --- a/src/backend/distributed/planner/insert_select_planner.c +++ b/src/backend/distributed/planner/insert_select_planner.c @@ -96,6 +96,17 @@ static List * AddInsertSelectCasts(List *insertTargetList, List *selectTargetLis Oid targetRelationId); static Expr * CastExpr(Expr *expr, Oid sourceType, Oid targetType, Oid targetCollation, int targetTypeMod); +static Oid GetNextvalReturnTypeCatalog(void); +static void AppendCastedEntry(TargetEntry *insertEntry, TargetEntry *selectEntry, + Oid castFromType, Oid targetType, Oid collation, int32 + typmod, + int targetEntryIndex, + List **projectedEntries, List **nonProjectedEntries); +static void SetTargetEntryName(TargetEntry *tle, const char *format, int index); +static void ResetTargetEntryResno(List *targetList); +static void ProcessEntryPair(TargetEntry *insertEntry, TargetEntry *selectEntry, + Form_pg_attribute attr, int targetEntryIndex, + List **projectedEntries, List **nonProjectedEntries); /* depth of current insert/select planner. */ @@ -1617,11 +1628,11 @@ RelabelTargetEntryList(List *selectTargetList, List *insertTargetList) /* - * AddInsertSelectCasts makes sure that the types in columns in the given - * target lists have the same type as the columns of the given relation. - * It might add casts to ensure that. + * AddInsertSelectCasts ensures that the columns in the given target lists + * have the same type as the corresponding columns of the target relation. + * It adds casts when necessary. * - * It returns the updated selectTargetList. + * Returns the updated selectTargetList. */ static List * AddInsertSelectCasts(List *insertTargetList, List *selectTargetList, @@ -1631,9 +1642,9 @@ AddInsertSelectCasts(List *insertTargetList, List *selectTargetList, List *nonProjectedEntries = NIL; /* - * ReorderInsertSelectTargetLists() makes sure that first few columns of - * the SELECT query match the insert targets. It might contain additional - * items for GROUP BY, etc. + * ReorderInsertSelectTargetLists() ensures that the first few columns of the + * SELECT query match the insert targets. It might also include additional + * items (for GROUP BY, etc.), so the insertTargetList is shorter. */ Assert(list_length(insertTargetList) <= list_length(selectTargetList)); @@ -1646,71 +1657,20 @@ AddInsertSelectCasts(List *insertTargetList, List *selectTargetList, forboth_ptr(insertEntry, insertTargetList, selectEntry, selectTargetList) { + /* + * Retrieve the target attribute corresponding to the insert entry. + * The attribute is located at (resno - 1) in the tuple descriptor. + */ Form_pg_attribute attr = TupleDescAttr(destTupleDescriptor, insertEntry->resno - 1); - Oid sourceType = exprType((Node *) selectEntry->expr); - Oid targetType = attr->atttypid; - if (sourceType != targetType) - { - /* ReorderInsertSelectTargetLists ensures we only have Vars */ - Assert(IsA(insertEntry->expr, Var)); - - /* we will cast the SELECT expression, so the type changes */ - Var *insertVar = (Var *) insertEntry->expr; - insertVar->vartype = targetType; - insertVar->vartypmod = attr->atttypmod; - insertVar->varcollid = attr->attcollation; - - /* - * We cannot modify the selectEntry in-place, because ORDER BY or - * GROUP BY clauses might be pointing to it with comparison types - * of the source type. So instead we keep the original one as a - * non-projected entry, so GROUP BY and ORDER BY are happy, and - * create a duplicated projected entry with the coerced expression. - */ - TargetEntry *coercedEntry = copyObject(selectEntry); - coercedEntry->expr = CastExpr((Expr *) selectEntry->expr, sourceType, - targetType, attr->attcollation, - attr->atttypmod); - coercedEntry->ressortgroupref = 0; - - /* - * The only requirement is that users don't use this name in ORDER BY - * or GROUP BY, and it should be unique across the same query. - */ - StringInfo resnameString = makeStringInfo(); - appendStringInfo(resnameString, "auto_coerced_by_citus_%d", targetEntryIndex); - coercedEntry->resname = resnameString->data; - - projectedEntries = lappend(projectedEntries, coercedEntry); - - if (selectEntry->ressortgroupref != 0) - { - selectEntry->resjunk = true; - - /* - * This entry might still end up in the SELECT output list, so - * rename it to avoid ambiguity. - * - * See https://github.com/citusdata/citus/pull/3470. - */ - resnameString = makeStringInfo(); - appendStringInfo(resnameString, "discarded_target_item_%d", - targetEntryIndex); - selectEntry->resname = resnameString->data; - - nonProjectedEntries = lappend(nonProjectedEntries, selectEntry); - } - } - else - { - projectedEntries = lappend(projectedEntries, selectEntry); - } + ProcessEntryPair(insertEntry, selectEntry, attr, targetEntryIndex, + &projectedEntries, &nonProjectedEntries); targetEntryIndex++; } + /* Append any additional non-projected entries from selectTargetList */ for (int entryIndex = list_length(insertTargetList); entryIndex < list_length(selectTargetList); entryIndex++) @@ -1719,14 +1679,9 @@ AddInsertSelectCasts(List *insertTargetList, List *selectTargetList, entryIndex)); } - /* selectEntry->resno must be the ordinal number of the entry */ + /* Concatenate projected and non-projected entries and reset resno numbering */ selectTargetList = list_concat(projectedEntries, nonProjectedEntries); - int entryResNo = 1; - TargetEntry *selectTargetEntry = NULL; - foreach_declared_ptr(selectTargetEntry, selectTargetList) - { - selectTargetEntry->resno = entryResNo++; - } + ResetTargetEntryResno(selectTargetList); table_close(distributedRelation, NoLock); @@ -1734,6 +1689,147 @@ AddInsertSelectCasts(List *insertTargetList, List *selectTargetList, } +/* + * Processes a single pair of insert and select target entries. + * It compares the source and target types and appends either the + * original select entry or a casted version to the appropriate list. + */ +static void +ProcessEntryPair(TargetEntry *insertEntry, TargetEntry *selectEntry, + Form_pg_attribute attr, int targetEntryIndex, + List **projectedEntries, List **nonProjectedEntries) +{ + Oid effectiveSourceType = exprType((Node *) selectEntry->expr); + Oid targetType = attr->atttypid; + + /* + * If the select expression is a NextValueExpr, use its actual return type. + * + * NextValueExpr represents a call to the nextval() function, which is used to + * obtain the next value from a sequence—commonly for populating auto-increment + * columns. In many cases, nextval() returns an INT8 (bigint), but the actual + * return type may differ depending on database configuration or custom implementations. + * + * Since the target column might have a different type (e.g., INT4), we need to + * obtain the real return type of nextval() to ensure that any type coercion is applied + * correctly. This is done by calling GetNextvalReturnTypeCatalog(), which looks up the + * function in the catalog and returns its return type. The effectiveSourceType is then + * set to this value, ensuring that subsequent comparisons and casts use the correct type. + */ + if (IsA(selectEntry->expr, NextValueExpr)) + { + effectiveSourceType = GetNextvalReturnTypeCatalog(); + } + + if (effectiveSourceType != targetType) + { + AppendCastedEntry(insertEntry, selectEntry, + effectiveSourceType, targetType, + attr->attcollation, attr->atttypmod, + targetEntryIndex, + projectedEntries, nonProjectedEntries); + } + else + { + /* Types match, no cast needed */ + *projectedEntries = lappend(*projectedEntries, selectEntry); + } +} + + +/* + * Resets the resno field for each target entry in the list so that + * they are numbered sequentially. + */ +static void +ResetTargetEntryResno(List *targetList) +{ + int entryResNo = 1; + ListCell *lc = NULL; + foreach(lc, targetList) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + tle->resno = entryResNo++; + } +} + + +/* + * Looks up the nextval(regclass) function in pg_proc, returning its actual + * rettype. In a standard build, that will be INT8OID, but this is more robust. + */ +static Oid +GetNextvalReturnTypeCatalog(void) +{ + Oid argTypes[1] = { REGCLASSOID }; + List *nameList = list_make1(makeString("nextval")); + + /* Look up the nextval(regclass) function */ + Oid nextvalFuncOid = LookupFuncName(nameList, 1, argTypes, false); + if (!OidIsValid(nextvalFuncOid)) + { + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not find function nextval(regclass)"))); + } + + /* Retrieve and validate the return type of the nextval function */ + Oid nextvalReturnType = get_func_rettype(nextvalFuncOid); + if (!OidIsValid(nextvalReturnType)) + { + elog(ERROR, "could not determine return type of nextval(regclass)"); + } + + return nextvalReturnType; +} + + +/** + * Modifies the given insert entry to match the target column's type and typmod, + * then creates and appends a new target entry containing a casted expression + * to the projected list. If the original select entry is used by ORDER BY or GROUP BY, + * it is marked as junk to avoid ambiguity. + */ +static void +AppendCastedEntry(TargetEntry *insertEntry, TargetEntry *selectEntry, + Oid castFromType, Oid targetType, Oid collation, int32 typmod, + int targetEntryIndex, + List **projectedEntries, List **nonProjectedEntries) +{ + /* Update the insert entry's Var to match the target column's type, typmod, and collation */ + Assert(IsA(insertEntry->expr, Var)); + { + Var *insertVar = (Var *) insertEntry->expr; + insertVar->vartype = targetType; + insertVar->vartypmod = typmod; + insertVar->varcollid = collation; + } + + /* Create a new TargetEntry with the casted expression */ + TargetEntry *coercedEntry = copyObject(selectEntry); + coercedEntry->expr = CastExpr((Expr *) selectEntry->expr, + castFromType, + targetType, + collation, + typmod); + coercedEntry->ressortgroupref = 0; + + /* Assign a unique name to the coerced entry */ + SetTargetEntryName(coercedEntry, "auto_coerced_by_citus_%d", targetEntryIndex); + *projectedEntries = lappend(*projectedEntries, coercedEntry); + + /* If the original select entry is referenced in ORDER BY or GROUP BY, + * mark it as junk and rename it to avoid ambiguity. + */ + if (selectEntry->ressortgroupref != 0) + { + selectEntry->resjunk = true; + SetTargetEntryName(selectEntry, "discarded_target_item_%d", targetEntryIndex); + *nonProjectedEntries = lappend(*nonProjectedEntries, selectEntry); + } +} + + /* * CastExpr returns an expression which casts the given expr from sourceType to * the given targetType. @@ -1815,6 +1911,16 @@ CastExpr(Expr *expr, Oid sourceType, Oid targetType, Oid targetCollation, } +/* Helper function to set the target entry name using a formatted string */ +static void +SetTargetEntryName(TargetEntry *tle, const char *format, int index) +{ + StringInfo resnameString = makeStringInfo(); + appendStringInfo(resnameString, format, index); + tle->resname = resnameString->data; +} + + /* PlanningInsertSelect returns true if we are planning an INSERT ...SELECT query */ bool PlanningInsertSelect(void) diff --git a/src/test/regress/expected/generated_identity.out b/src/test/regress/expected/generated_identity.out index 8fe7a0dc6..b1102b781 100644 --- a/src/test/regress/expected/generated_identity.out +++ b/src/test/regress/expected/generated_identity.out @@ -560,5 +560,116 @@ SELECT * FROM test; 1 | 2 | 2 (2 rows) +-- Test for issue #7887 Fix insert select planner to exclude identity columns from target list on partial inserts +-- https://github.com/citusdata/citus/pull/7911 +CREATE TABLE local1 ( + id text not null primary key +); +CREATE TABLE reference1 ( + id int not null primary key, + reference_col1 text not null +); +SELECT create_reference_table('reference1'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE local2 ( + id int not null generated always as identity, + local1fk text not null, + reference1fk int not null, + constraint loc1fk foreign key (local1fk) references local1(id), + constraint reference1fk foreign key (reference1fk) references reference1(id), + constraint testlocpk primary key (id) +); +INSERT INTO local1(id) VALUES ('aaaaa'), ('bbbbb'), ('ccccc'); +INSERT INTO reference1(id, reference_col1) VALUES (1, 'test'), (2, 'test2'), (3, 'test3'); +-- +-- Partial insert: omit the identity column +-- This triggers the known bug in older code paths if not fixed. +-- +INSERT INTO local2(local1fk, reference1fk) + SELECT id, 1 + FROM local1; +-- Check inserted rows in local2 +SELECT * FROM local2; + id | local1fk | reference1fk +--------------------------------------------------------------------- + 1 | aaaaa | 1 + 2 | bbbbb | 1 + 3 | ccccc | 1 +(3 rows) + +-- We do a "INSERT INTO local2(id, local1fk, reference1fk) SELECT 9999, id, 2" which +-- should fail under normal PG rules if no OVERRIDING clause is used. +INSERT INTO local2(id, local1fk, reference1fk) + SELECT 9999, id, 2 FROM local1 LIMIT 1; +ERROR: cannot insert a non-DEFAULT value into column "id" +DETAIL: Column "id" is an identity column defined as GENERATED ALWAYS. +HINT: Use OVERRIDING SYSTEM VALUE to override. +-- Using OVERRIDING SYSTEM VALUE to override ALWAYS identity +INSERT INTO local2(id, local1fk, reference1fk) + OVERRIDING SYSTEM VALUE + SELECT 9999, id, 2 FROM local1 LIMIT 1; +-- Create a second table with BY DEFAULT identity to test different identity mode +CREATE TABLE local2_bydefault ( + id int NOT NULL GENERATED BY DEFAULT AS IDENTITY, + local1fk text NOT NULL, + reference1fk int NOT NULL, + CONSTRAINT loc1fk_bd FOREIGN KEY (local1fk) REFERENCES local1(id), + CONSTRAINT reference1fk_bd FOREIGN KEY (reference1fk) REFERENCES reference1(id), + CONSTRAINT testlocpk_bd PRIMARY KEY (id) +); +INSERT INTO local1(id) VALUES ('xxxxx'), ('yyyyy'), ('ddddd'), ('zzzzz'); +INSERT INTO local2_bydefault(local1fk, reference1fk) + SELECT 'xxxxx', 1; +-- Show inserted row in local2_bydefault +SELECT * FROM local2_bydefault; + id | local1fk | reference1fk +--------------------------------------------------------------------- + 1 | xxxxx | 1 +(1 row) + +-- +-- Overriding a BY DEFAULT identity with user value +-- (which is allowed even without OVERRIDING clause). +-- +-- Provide explicit id for BY DEFAULT identity => no special OVERRIDING needed +INSERT INTO local2_bydefault(id, local1fk, reference1fk) + VALUES (5000, 'yyyyy', 2); +-- Show rows (we expect id=5000 and one with auto-generated ID) +SELECT * FROM local2_bydefault ORDER BY id; + id | local1fk | reference1fk +--------------------------------------------------------------------- + 1 | xxxxx | 1 + 5000 | yyyyy | 2 +(2 rows) + +-- Insert referencing reference1fk=3 => partial insert on both tables +INSERT INTO local2(local1fk, reference1fk) + VALUES ('ddddd', 3); +INSERT INTO local2_bydefault(local1fk, reference1fk) + SELECT 'zzzzz', 3; +-- Show final state of local2 and local2_bydefault +SELECT 'local2' as table_name, * FROM local2 +UNION ALL +SELECT 'local2_bydefault', * FROM local2_bydefault +ORDER BY table_name, id; + table_name | id | local1fk | reference1fk +--------------------------------------------------------------------- + local2 | 1 | aaaaa | 1 + local2 | 2 | bbbbb | 1 + local2 | 3 | ccccc | 1 + local2 | 4 | ddddd | 3 + local2 | 9999 | aaaaa | 2 + local2_bydefault | 1 | xxxxx | 1 + local2_bydefault | 2 | zzzzz | 3 + local2_bydefault | 5000 | yyyyy | 2 +(8 rows) + +-- End of test for issue #7887 +-- Cleanup +SET client_min_messages TO WARNING; DROP SCHEMA generated_identities CASCADE; DROP USER identity_test_user; diff --git a/src/test/regress/sql/generated_identity.sql b/src/test/regress/sql/generated_identity.sql index df967ddd0..5de9ea692 100644 --- a/src/test/regress/sql/generated_identity.sql +++ b/src/test/regress/sql/generated_identity.sql @@ -279,5 +279,99 @@ INSERT INTO test VALUES (1,2); INSERT INTO test SELECT x, y FROM test WHERE x = 1; SELECT * FROM test; + +-- Test for issue #7887 Fix insert select planner to exclude identity columns from target list on partial inserts +-- https://github.com/citusdata/citus/pull/7911 +CREATE TABLE local1 ( + id text not null primary key +); + +CREATE TABLE reference1 ( + id int not null primary key, + reference_col1 text not null +); +SELECT create_reference_table('reference1'); + +CREATE TABLE local2 ( + id int not null generated always as identity, + local1fk text not null, + reference1fk int not null, + constraint loc1fk foreign key (local1fk) references local1(id), + constraint reference1fk foreign key (reference1fk) references reference1(id), + constraint testlocpk primary key (id) +); + +INSERT INTO local1(id) VALUES ('aaaaa'), ('bbbbb'), ('ccccc'); +INSERT INTO reference1(id, reference_col1) VALUES (1, 'test'), (2, 'test2'), (3, 'test3'); + +-- +-- Partial insert: omit the identity column +-- This triggers the known bug in older code paths if not fixed. +-- +INSERT INTO local2(local1fk, reference1fk) + SELECT id, 1 + FROM local1; + +-- Check inserted rows in local2 +SELECT * FROM local2; + + +-- We do a "INSERT INTO local2(id, local1fk, reference1fk) SELECT 9999, id, 2" which +-- should fail under normal PG rules if no OVERRIDING clause is used. + +INSERT INTO local2(id, local1fk, reference1fk) + SELECT 9999, id, 2 FROM local1 LIMIT 1; + +-- Using OVERRIDING SYSTEM VALUE to override ALWAYS identity +INSERT INTO local2(id, local1fk, reference1fk) + OVERRIDING SYSTEM VALUE + SELECT 9999, id, 2 FROM local1 LIMIT 1; + +-- Create a second table with BY DEFAULT identity to test different identity mode +CREATE TABLE local2_bydefault ( + id int NOT NULL GENERATED BY DEFAULT AS IDENTITY, + local1fk text NOT NULL, + reference1fk int NOT NULL, + CONSTRAINT loc1fk_bd FOREIGN KEY (local1fk) REFERENCES local1(id), + CONSTRAINT reference1fk_bd FOREIGN KEY (reference1fk) REFERENCES reference1(id), + CONSTRAINT testlocpk_bd PRIMARY KEY (id) +); + +INSERT INTO local1(id) VALUES ('xxxxx'), ('yyyyy'), ('ddddd'), ('zzzzz'); + +INSERT INTO local2_bydefault(local1fk, reference1fk) + SELECT 'xxxxx', 1; + +-- Show inserted row in local2_bydefault +SELECT * FROM local2_bydefault; + +-- +-- Overriding a BY DEFAULT identity with user value +-- (which is allowed even without OVERRIDING clause). +-- +-- Provide explicit id for BY DEFAULT identity => no special OVERRIDING needed +INSERT INTO local2_bydefault(id, local1fk, reference1fk) + VALUES (5000, 'yyyyy', 2); + +-- Show rows (we expect id=5000 and one with auto-generated ID) +SELECT * FROM local2_bydefault ORDER BY id; + +-- Insert referencing reference1fk=3 => partial insert on both tables +INSERT INTO local2(local1fk, reference1fk) + VALUES ('ddddd', 3); + +INSERT INTO local2_bydefault(local1fk, reference1fk) + SELECT 'zzzzz', 3; + +-- Show final state of local2 and local2_bydefault +SELECT 'local2' as table_name, * FROM local2 +UNION ALL +SELECT 'local2_bydefault', * FROM local2_bydefault +ORDER BY table_name, id; + +-- End of test for issue #7887 + +-- Cleanup +SET client_min_messages TO WARNING; DROP SCHEMA generated_identities CASCADE; DROP USER identity_test_user;