From d61fd6e4786c1caecc61d14dd87a9b1983cfa902 Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Mon, 15 Feb 2021 19:20:26 +0300 Subject: [PATCH] Decide changing sequence dependencies on MX nodes according to resulting relation (#4713) When executing alter_table / undistribute_table udf's, we should not try to change sequence dependencies on MX workers if new table wouldn't require syncing metadata. Previously, we were checking that for input table. But in some cases, the fact that input table requires syncing metadata doesn't imply the same for resulting table (e.g when undistributing a Citus table). Even more, doing that was giving an unexpected error when undistributing a Citus table so this commit actually fixes that. --- .../distributed/commands/alter_table.c | 9 +- .../undistribute_table_cascade_mx.out | 117 ++++++++++++++++++ .../sql/undistribute_table_cascade_mx.sql | 84 +++++++++++++ 3 files changed, 209 insertions(+), 1 deletion(-) diff --git a/src/backend/distributed/commands/alter_table.c b/src/backend/distributed/commands/alter_table.c index c69332b8e..eef98cae1 100644 --- a/src/backend/distributed/commands/alter_table.c +++ b/src/backend/distributed/commands/alter_table.c @@ -1178,7 +1178,14 @@ ReplaceTable(Oid sourceId, Oid targetId, List *justBeforeDropCommands, { changeDependencyFor(RelationRelationId, sequenceOid, RelationRelationId, sourceId, targetId); - if (ShouldSyncTableMetadata(sourceId)) + + /* + * Skip if we cannot sync metadata for target table. + * Checking only for the target table is sufficient since we will + * anyway drop the source table even if it was a Citus table that + * has metadata on MX workers. + */ + if (ShouldSyncTableMetadata(targetId)) { Oid sequenceSchemaOid = get_rel_namespace(sequenceOid); char *sequenceSchemaName = get_namespace_name(sequenceSchemaOid); diff --git a/src/test/regress/expected/undistribute_table_cascade_mx.out b/src/test/regress/expected/undistribute_table_cascade_mx.out index 150939c13..f52d3b0aa 100644 --- a/src/test/regress/expected/undistribute_table_cascade_mx.out +++ b/src/test/regress/expected/undistribute_table_cascade_mx.out @@ -105,5 +105,122 @@ $$); (localhost,57638,t,4) (2 rows) +-- create a reference table with an implicit sequence +CREATE TABLE reference_table_2 (id bigserial); +SELECT create_reference_table('reference_table_2'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +-- this should work fine since we won't try to change sequence dependencies on mx workers +SELECT undistribute_table('reference_table_2'); + undistribute_table +--------------------------------------------------------------------- + +(1 row) + +create table countries( + id serial primary key + , name text + , code varchar(2) collate "C" unique +); +insert into countries(name, code) select 'country-'||i, i::text from generate_series(10,99) i; +select create_reference_table('countries'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE users ( + id bigserial + , org_id bigint + , name text + , created_at timestamptz default now() + , country_id int references countries(id) + , primary key (org_id, id) +); +SET citus.replication_model to 'streaming'; +-- "users" table was implicitly added to citus metadata when defining foreign key, +-- so create_distributed_table would first undistribute it. +-- Show that it works well when changing sequence dependencies on mx workers. +select create_distributed_table('users', 'org_id'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- count sequences that depend on "users" table's "id" column +SELECT COUNT(*) +FROM pg_class s + JOIN pg_depend d ON d.objid=s.oid AND d.classid='pg_class'::regclass AND d.refclassid='pg_class'::regclass + JOIN pg_class t ON t.oid=d.refobjid + JOIN pg_attribute a ON a.attrelid=t.oid AND a.attnum=d.refobjsubid +WHERE s.relkind='S' AND t.relname = 'users' AND a.attname = 'id'; + count +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT run_command_on_workers( +$$ +SELECT COUNT(*) +FROM pg_class s + JOIN pg_depend d ON d.objid=s.oid AND d.classid='pg_class'::regclass AND d.refclassid='pg_class'::regclass + JOIN pg_class t ON t.oid=d.refobjid + JOIN pg_attribute a ON a.attrelid=t.oid AND a.attnum=d.refobjsubid +WHERE s.relkind='S' AND t.relname = 'users' AND a.attname = 'id'; +$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,57637,t,1) + (localhost,57638,t,1) +(2 rows) + +SELECT alter_distributed_table ('users', shard_count=>10); + alter_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- first drop the column that has a foreign key since +-- alter_table_set_access_method doesn't support foreign keys +ALTER TABLE users DROP country_id; +-- set access method to columnar if pg version > 11 +DO $proc$ +BEGIN +IF substring(current_Setting('server_version'), '\d+')::int >= 12 THEN + EXECUTE + $$ + SELECT alter_table_set_access_method('users', 'columnar'); + $$; +END IF; +END$proc$; +SELECT COUNT(*) +FROM pg_class s + JOIN pg_depend d ON d.objid=s.oid AND d.classid='pg_class'::regclass AND d.refclassid='pg_class'::regclass + JOIN pg_class t ON t.oid=d.refobjid + JOIN pg_attribute a ON a.attrelid=t.oid AND a.attnum=d.refobjsubid +WHERE s.relkind='S' AND t.relname = 'users' AND a.attname = 'id'; + count +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT run_command_on_workers( +$$ +SELECT COUNT(*) +FROM pg_class s + JOIN pg_depend d ON d.objid=s.oid AND d.classid='pg_class'::regclass AND d.refclassid='pg_class'::regclass + JOIN pg_class t ON t.oid=d.refobjid + JOIN pg_attribute a ON a.attrelid=t.oid AND a.attnum=d.refobjsubid +WHERE s.relkind='S' AND t.relname = 'users' AND a.attname = 'id'; +$$); + run_command_on_workers +--------------------------------------------------------------------- + (localhost,57637,t,1) + (localhost,57638,t,1) +(2 rows) + -- cleanup at exit DROP SCHEMA undistribute_table_cascade_mx CASCADE; diff --git a/src/test/regress/sql/undistribute_table_cascade_mx.sql b/src/test/regress/sql/undistribute_table_cascade_mx.sql index dc32f106c..c8f2f82ca 100644 --- a/src/test/regress/sql/undistribute_table_cascade_mx.sql +++ b/src/test/regress/sql/undistribute_table_cascade_mx.sql @@ -59,5 +59,89 @@ $$ SELECT count(*) FROM pg_catalog.pg_tables WHERE schemaname='undistribute_table_cascade_mx' $$); +-- create a reference table with an implicit sequence +CREATE TABLE reference_table_2 (id bigserial); +SELECT create_reference_table('reference_table_2'); + +-- this should work fine since we won't try to change sequence dependencies on mx workers +SELECT undistribute_table('reference_table_2'); + +create table countries( + id serial primary key + , name text + , code varchar(2) collate "C" unique +); +insert into countries(name, code) select 'country-'||i, i::text from generate_series(10,99) i; +select create_reference_table('countries'); + +CREATE TABLE users ( + id bigserial + , org_id bigint + , name text + , created_at timestamptz default now() + , country_id int references countries(id) + , primary key (org_id, id) +); + +SET citus.replication_model to 'streaming'; + +-- "users" table was implicitly added to citus metadata when defining foreign key, +-- so create_distributed_table would first undistribute it. +-- Show that it works well when changing sequence dependencies on mx workers. +select create_distributed_table('users', 'org_id'); + +-- count sequences that depend on "users" table's "id" column +SELECT COUNT(*) +FROM pg_class s + JOIN pg_depend d ON d.objid=s.oid AND d.classid='pg_class'::regclass AND d.refclassid='pg_class'::regclass + JOIN pg_class t ON t.oid=d.refobjid + JOIN pg_attribute a ON a.attrelid=t.oid AND a.attnum=d.refobjsubid +WHERE s.relkind='S' AND t.relname = 'users' AND a.attname = 'id'; + +SELECT run_command_on_workers( +$$ +SELECT COUNT(*) +FROM pg_class s + JOIN pg_depend d ON d.objid=s.oid AND d.classid='pg_class'::regclass AND d.refclassid='pg_class'::regclass + JOIN pg_class t ON t.oid=d.refobjid + JOIN pg_attribute a ON a.attrelid=t.oid AND a.attnum=d.refobjsubid +WHERE s.relkind='S' AND t.relname = 'users' AND a.attname = 'id'; +$$); + +SELECT alter_distributed_table ('users', shard_count=>10); + +-- first drop the column that has a foreign key since +-- alter_table_set_access_method doesn't support foreign keys +ALTER TABLE users DROP country_id; + +-- set access method to columnar if pg version > 11 +DO $proc$ +BEGIN +IF substring(current_Setting('server_version'), '\d+')::int >= 12 THEN + EXECUTE + $$ + SELECT alter_table_set_access_method('users', 'columnar'); + $$; +END IF; +END$proc$; + +SELECT COUNT(*) +FROM pg_class s + JOIN pg_depend d ON d.objid=s.oid AND d.classid='pg_class'::regclass AND d.refclassid='pg_class'::regclass + JOIN pg_class t ON t.oid=d.refobjid + JOIN pg_attribute a ON a.attrelid=t.oid AND a.attnum=d.refobjsubid +WHERE s.relkind='S' AND t.relname = 'users' AND a.attname = 'id'; + +SELECT run_command_on_workers( +$$ +SELECT COUNT(*) +FROM pg_class s + JOIN pg_depend d ON d.objid=s.oid AND d.classid='pg_class'::regclass AND d.refclassid='pg_class'::regclass + JOIN pg_class t ON t.oid=d.refobjid + JOIN pg_attribute a ON a.attrelid=t.oid AND a.attnum=d.refobjsubid +WHERE s.relkind='S' AND t.relname = 'users' AND a.attname = 'id'; +$$); + + -- cleanup at exit DROP SCHEMA undistribute_table_cascade_mx CASCADE;