From 29ba5a6251fb5d819c25ad8be42563b73c3da398 Mon Sep 17 00:00:00 2001 From: Onder Kalaci Date: Fri, 11 Mar 2022 10:59:28 +0100 Subject: [PATCH] Only change the sequence types if the target column type is a supported sequence type Before this commit, we erroneously converted the sequence type to the column's type it is used. However, it is possible that the sequence is used in an expression which then converted to a type that cannot be a sequence, such as text. With this commit, we only try this conversion if the column type is a supported sequence type (e.g., smallint, int and bigint). Note that we do this conversion because if the column type is a bigint and the sequence is NOT a bigint, users would be in trouble because sequences would generate values that are out of the range of the column. (The other ways are already not supported such as the column is int and the sequence is bigint would fail on the worker.) In other words, with this commit, we scope this optimization only when the target column type is a supported sequence type. Otherwise, we let users to more freely use the sequences. (cherry picked from commit db529facabea38cefeb8ffa0a578609da7d79bf6) Conflicts: - src/backend/distributed/commands/create_distributed_table.c - src/include/distributed/metadata_utility.h EnsureSequenceTypeSupported did not have the third parameter prior to Citus 11. I removed the references to ownerRelationId. - src/test/regress/multi_schedule Trivial conflict for the newly introduced test files. This conflict was easy to resolve, but I modified the tests to prevent failures due to the following: a) MX is not on by default on 10.2 contrary to Citus 11 b) We set replication factor = 2 here contrary to 1 in Citus 11 --- .../commands/create_distributed_table.c | 29 +++-- src/include/distributed/metadata_utility.h | 2 +- .../sequences_with_different_types.out | 119 ++++++++++++++++++ src/test/regress/multi_schedule | 2 +- .../sql/sequences_with_different_types.sql | 81 ++++++++++++ 5 files changed, 223 insertions(+), 10 deletions(-) create mode 100644 src/test/regress/expected/sequences_with_different_types.out create mode 100644 src/test/regress/sql/sequences_with_different_types.sql diff --git a/src/backend/distributed/commands/create_distributed_table.c b/src/backend/distributed/commands/create_distributed_table.c index 27a7f75e2..9d36b2af1 100644 --- a/src/backend/distributed/commands/create_distributed_table.c +++ b/src/backend/distributed/commands/create_distributed_table.c @@ -31,6 +31,7 @@ #include "catalog/pg_opclass.h" #include "catalog/pg_proc.h" #include "catalog/pg_trigger.h" +#include "catalog/pg_type.h" #include "commands/defrem.h" #include "commands/extension.h" #include "commands/sequence.h" @@ -597,7 +598,7 @@ CreateDistributedTable(Oid relationId, Var *distributionColumn, char distributio * Otherwise, the condition is ensured. */ void -EnsureSequenceTypeSupported(Oid seqOid, Oid seqTypId) +EnsureSequenceTypeSupported(Oid seqOid, Oid attributeTypeId) { List *citusTableIdList = CitusTableTypeIdList(ANY_CITUS_TABLE_TYPE); Oid citusTableId = InvalidOid; @@ -622,9 +623,9 @@ EnsureSequenceTypeSupported(Oid seqOid, Oid seqTypId) */ if (currentSeqOid == seqOid) { - Oid currentSeqTypId = GetAttributeTypeOid(citusTableId, - currentAttnum); - if (seqTypId != currentSeqTypId) + Oid currentAttributeTypId = GetAttributeTypeOid(citusTableId, + currentAttnum); + if (attributeTypeId != currentAttributeTypId) { char *sequenceName = generate_qualified_relation_name( seqOid); @@ -716,17 +717,29 @@ EnsureDistributedSequencesHaveOneType(Oid relationId, List *dependentSequenceLis * We should make sure that the type of the column that uses * that sequence is supported */ - Oid seqTypId = GetAttributeTypeOid(relationId, attnum); - EnsureSequenceTypeSupported(sequenceOid, seqTypId); + Oid attributeTypeId = GetAttributeTypeOid(relationId, attnum); + EnsureSequenceTypeSupported(sequenceOid, attributeTypeId); /* * Alter the sequence's data type in the coordinator if needed. + * + * First, we should only change the sequence type if the column + * is a supported sequence type. For example, if a sequence is used + * in an expression which then becomes a text, we should not try to + * alter the sequence type to text. Postgres only supports int2, int4 + * and int8 as the sequence type. + * * A sequence's type is bigint by default and it doesn't change even if * it's used in an int column. We should change the type if needed, * and not allow future ALTER SEQUENCE ... TYPE ... commands for - * sequences used as defaults in distributed tables + * sequences used as defaults in distributed tables. */ - AlterSequenceType(sequenceOid, seqTypId); + if (attributeTypeId == INT2OID || + attributeTypeId == INT4OID || + attributeTypeId == INT8OID) + { + AlterSequenceType(sequenceOid, attributeTypeId); + } } } diff --git a/src/include/distributed/metadata_utility.h b/src/include/distributed/metadata_utility.h index 6065e210d..57d8a20ea 100644 --- a/src/include/distributed/metadata_utility.h +++ b/src/include/distributed/metadata_utility.h @@ -291,7 +291,7 @@ extern bool GetNodeDiskSpaceStatsForConnection(MultiConnection *connection, uint64 *availableBytes, uint64 *totalBytes); extern void ExecuteQueryViaSPI(char *query, int SPIOK); -extern void EnsureSequenceTypeSupported(Oid seqOid, Oid seqTypId); +extern void EnsureSequenceTypeSupported(Oid seqOid, Oid attributeTypeId); extern void AlterSequenceType(Oid seqOid, Oid typeOid); extern void MarkSequenceListDistributedAndPropagateDependencies(List *sequenceList); extern void MarkSequenceDistributedAndPropagateDependencies(Oid sequenceOid); diff --git a/src/test/regress/expected/sequences_with_different_types.out b/src/test/regress/expected/sequences_with_different_types.out new file mode 100644 index 000000000..a6d89b48e --- /dev/null +++ b/src/test/regress/expected/sequences_with_different_types.out @@ -0,0 +1,119 @@ +CREATE SCHEMA sequences_with_different_types; +SET search_path TO sequences_with_different_types; +SELECT start_metadata_sync_to_node(nodename, nodeport) FROM pg_dist_node WHERE noderole = 'primary'; + start_metadata_sync_to_node +--------------------------------------------------------------------- + + +(2 rows) + +CREATE TYPE two_big_ints AS (a bigint, b bigint); +-- by default, sequences get bigint type +CREATE SEQUENCE bigint_sequence_1; +CREATE SEQUENCE bigint_sequence_2 START 10000; +CREATE SEQUENCE bigint_sequence_3 INCREMENT 10; +CREATE SEQUENCE bigint_sequence_4 MINVALUE 1000000; +CREATE SEQUENCE bigint_sequence_5; +CREATE SEQUENCE bigint_sequence_8; +CREATE TABLE table_1 +( + user_id bigint, + user_code_1 text DEFAULT (('CD'::text || lpad(nextval('bigint_sequence_1'::regclass)::text, 10, '0'::text))), + user_code_2 text DEFAULT nextval('bigint_sequence_2'::regclass)::text, + user_code_3 text DEFAULT (nextval('bigint_sequence_3'::regclass) + 1000)::text, + user_code_4 float DEFAULT nextval('bigint_sequence_4'::regclass), + user_code_5 two_big_ints DEFAULT (nextval('bigint_sequence_5'::regclass), nextval('bigint_sequence_5'::regclass)), + user_code_8 jsonb DEFAULT to_jsonb('test'::text) || to_jsonb(nextval('bigint_sequence_8'::regclass)) +); +SET citus.shard_replication_factor TO 1; +SELECT create_distributed_table('table_1', 'user_id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO table_1 VALUES (1, DEFAULT, DEFAULT, DEFAULT, DEFAULT, DEFAULT, DEFAULT), (2, DEFAULT, DEFAULT, DEFAULT, DEFAULT, DEFAULT, DEFAULT) RETURNING *; + user_id | user_code_1 | user_code_2 | user_code_3 | user_code_4 | user_code_5 | user_code_8 +--------------------------------------------------------------------- + 1 | CD0000000001 | 10000 | 1001 | 1000000 | (1,2) | ["test", 1] + 2 | CD0000000002 | 10001 | 1011 | 1000001 | (3,4) | ["test", 2] +(2 rows) + +\c - - - :worker_1_port +SET search_path TO sequences_with_different_types; +INSERT INTO table_1 VALUES (3, DEFAULT, DEFAULT, DEFAULT, DEFAULT, DEFAULT, DEFAULT), (4, DEFAULT, DEFAULT, DEFAULT, DEFAULT, DEFAULT, DEFAULT) RETURNING *; + user_id | user_code_1 | user_code_2 | user_code_3 | user_code_4 | user_code_5 | user_code_8 +--------------------------------------------------------------------- + 3 | CD3940649673 | 3940649673949185 | 3940649673950185 | 3.94064967394918e+15 | (3940649673949185,3940649673949186) | ["test", 3940649673949185] + 4 | CD3940649673 | 3940649673949186 | 3940649673950195 | 3.94064967394919e+15 | (3940649673949187,3940649673949188) | ["test", 3940649673949186] +(2 rows) + +\c - - - :master_port +SET search_path TO sequences_with_different_types; +CREATE SEQUENCE bigint_sequence_6; +CREATE TABLE table_2 +( + user_id bigint, + user_code OID DEFAULT nextval('bigint_sequence_6'::regclass) +); +SET citus.shard_replication_factor TO 1; +SELECT create_distributed_table('table_2', 'user_id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- on the coordinator, the sequence starts from 0 +INSERT INTO table_2 VALUES (1, DEFAULT) RETURNING *; + user_id | user_code +--------------------------------------------------------------------- + 1 | 1 +(1 row) + +\c - - - :worker_1_port +SET search_path TO sequences_with_different_types; +-- this fails because on the workers the start value of the sequence +-- is greater than the largest value of an oid +INSERT INTO table_2 VALUES (1, DEFAULT) RETURNING *; +ERROR: OID out of range +\c - - - :master_port +SET search_path TO sequences_with_different_types; +CREATE SEQUENCE bigint_sequence_7; +CREATE TABLE table_3 +( + user_id bigint, + user_code boolean DEFAULT ((nextval('bigint_sequence_7'::regclass)%2)::int)::boolean +); +SET citus.shard_replication_factor TO 1; +SELECT create_distributed_table('table_3', 'user_id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO table_3 VALUES (1, DEFAULT), (2, DEFAULT) RETURNING *; + user_id | user_code +--------------------------------------------------------------------- + 1 | t + 2 | f +(2 rows) + +\c - - - :worker_1_port +SET search_path TO sequences_with_different_types; +INSERT INTO table_3 VALUES (3, DEFAULT), (4, DEFAULT) RETURNING *; + user_id | user_code +--------------------------------------------------------------------- + 3 | t + 4 | f +(2 rows) + +\c - - - :master_port +SET client_min_messages TO WARNING; +DROP SCHEMA sequences_with_different_types CASCADE; +SELECT stop_metadata_sync_to_node(nodename, nodeport) FROM pg_dist_node WHERE noderole = 'primary'; + stop_metadata_sync_to_node +--------------------------------------------------------------------- + + +(2 rows) + diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule index 0d77b0556..f14f110e8 100644 --- a/src/test/regress/multi_schedule +++ b/src/test/regress/multi_schedule @@ -51,7 +51,7 @@ test: subqueries_deep subquery_view subquery_partitioning subqueries_not_support test: subquery_in_targetlist subquery_in_where subquery_complex_target_list test: subquery_prepared_statements test: non_colocated_leaf_subquery_joins non_colocated_subquery_joins non_colocated_join_order -test: cte_inline recursive_view_local_table values +test: cte_inline recursive_view_local_table values sequences_with_different_types test: pg13 pg12 # run pg14 sequentially as it syncs metadata test: pg14 diff --git a/src/test/regress/sql/sequences_with_different_types.sql b/src/test/regress/sql/sequences_with_different_types.sql new file mode 100644 index 000000000..b145c265f --- /dev/null +++ b/src/test/regress/sql/sequences_with_different_types.sql @@ -0,0 +1,81 @@ +CREATE SCHEMA sequences_with_different_types; +SET search_path TO sequences_with_different_types; + +SELECT start_metadata_sync_to_node(nodename, nodeport) FROM pg_dist_node WHERE noderole = 'primary'; + +CREATE TYPE two_big_ints AS (a bigint, b bigint); +-- by default, sequences get bigint type +CREATE SEQUENCE bigint_sequence_1; +CREATE SEQUENCE bigint_sequence_2 START 10000; +CREATE SEQUENCE bigint_sequence_3 INCREMENT 10; +CREATE SEQUENCE bigint_sequence_4 MINVALUE 1000000; +CREATE SEQUENCE bigint_sequence_5; +CREATE SEQUENCE bigint_sequence_8; + +CREATE TABLE table_1 +( + user_id bigint, + user_code_1 text DEFAULT (('CD'::text || lpad(nextval('bigint_sequence_1'::regclass)::text, 10, '0'::text))), + user_code_2 text DEFAULT nextval('bigint_sequence_2'::regclass)::text, + user_code_3 text DEFAULT (nextval('bigint_sequence_3'::regclass) + 1000)::text, + user_code_4 float DEFAULT nextval('bigint_sequence_4'::regclass), + user_code_5 two_big_ints DEFAULT (nextval('bigint_sequence_5'::regclass), nextval('bigint_sequence_5'::regclass)), + user_code_8 jsonb DEFAULT to_jsonb('test'::text) || to_jsonb(nextval('bigint_sequence_8'::regclass)) + +); +SET citus.shard_replication_factor TO 1; +SELECT create_distributed_table('table_1', 'user_id'); + +INSERT INTO table_1 VALUES (1, DEFAULT, DEFAULT, DEFAULT, DEFAULT, DEFAULT, DEFAULT), (2, DEFAULT, DEFAULT, DEFAULT, DEFAULT, DEFAULT, DEFAULT) RETURNING *; + +\c - - - :worker_1_port +SET search_path TO sequences_with_different_types; + +INSERT INTO table_1 VALUES (3, DEFAULT, DEFAULT, DEFAULT, DEFAULT, DEFAULT, DEFAULT), (4, DEFAULT, DEFAULT, DEFAULT, DEFAULT, DEFAULT, DEFAULT) RETURNING *; + + +\c - - - :master_port +SET search_path TO sequences_with_different_types; +CREATE SEQUENCE bigint_sequence_6; + +CREATE TABLE table_2 +( + user_id bigint, + user_code OID DEFAULT nextval('bigint_sequence_6'::regclass) +); +SET citus.shard_replication_factor TO 1; +SELECT create_distributed_table('table_2', 'user_id'); + +-- on the coordinator, the sequence starts from 0 +INSERT INTO table_2 VALUES (1, DEFAULT) RETURNING *; + +\c - - - :worker_1_port +SET search_path TO sequences_with_different_types; + +-- this fails because on the workers the start value of the sequence +-- is greater than the largest value of an oid +INSERT INTO table_2 VALUES (1, DEFAULT) RETURNING *; + +\c - - - :master_port +SET search_path TO sequences_with_different_types; +CREATE SEQUENCE bigint_sequence_7; + +CREATE TABLE table_3 +( + user_id bigint, + user_code boolean DEFAULT ((nextval('bigint_sequence_7'::regclass)%2)::int)::boolean +); +SET citus.shard_replication_factor TO 1; +SELECT create_distributed_table('table_3', 'user_id'); + +INSERT INTO table_3 VALUES (1, DEFAULT), (2, DEFAULT) RETURNING *; + +\c - - - :worker_1_port +SET search_path TO sequences_with_different_types; +INSERT INTO table_3 VALUES (3, DEFAULT), (4, DEFAULT) RETURNING *; + +\c - - - :master_port +SET client_min_messages TO WARNING; +DROP SCHEMA sequences_with_different_types CASCADE; + +SELECT stop_metadata_sync_to_node(nodename, nodeport) FROM pg_dist_node WHERE noderole = 'primary';