Use original relation to retrieve column name because of syscache (#6387)

During alter_distributed_table, we create a new table like the
original table but with the altered options.

To retrieve the name of the distribution column, we were using
the attribute syscache of the new table, since we already created
the new table as identical to the original table.

However, the attribute syscaches of these two tables are not
the same if the original table has dropped columns. The reason
is that dropped columns are all still present in the cache.
Hence, for example, the attnos would be different in the syscaches.

So, let's use the attribute syscache of the original table.
pull/6400/head
Naisila Puka 2022-10-06 12:08:00 +03:00 committed by GitHub
parent f21cbe68f8
commit b5cba3a3fe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 252 additions and 1 deletions

View File

@ -1223,8 +1223,15 @@ CreateDistributedTableLike(TableConversionState *con)
newShardCount = con->shardCount;
}
/*
* To get the correct column name, we use the original relation id, not the
* new relation id. The reason is that the cached attributes of the original
* and newly created tables are not the same if the original table has
* dropped columns (dropped columns are still present in the attribute cache)
* Detailed example in https://github.com/citusdata/citus/pull/6387
*/
char *distributionColumnName =
ColumnToColumnName(con->newRelationId, (Node *) newDistributionKey);
ColumnToColumnName(con->relationId, (Node *) newDistributionKey);
Oid originalRelationId = con->relationId;
if (con->originalDistributionKey != NULL && PartitionTable(originalRelationId))

View File

@ -100,6 +100,161 @@ SELECT STRING_AGG(table_name::text, ', ' ORDER BY 1) AS "Colocation Groups" FROM
dist_table
(3 rows)
-- right now dist_table has columns a, b, dist_column is b, it has 6 shards
-- column cache is: a pos 1, b pos 2
-- let's add another column
ALTER TABLE dist_table ADD COLUMN c int DEFAULT 1;
-- right now column cache is: a pos 1, b pos 2, c pos 3
-- test using alter_distributed_table to change shard count after dropping one column
ALTER TABLE dist_table DROP COLUMN a;
-- right now column cache is: a pos 1 attisdropped=true, b pos 2, c pos 3
-- let's try changing the shard count
SELECT alter_distributed_table('dist_table', shard_count := 7, cascade_to_colocated := false);
NOTICE: creating a new table for alter_distributed_table.dist_table
NOTICE: moving the data of alter_distributed_table.dist_table
NOTICE: dropping the old alter_distributed_table.dist_table
NOTICE: renaming the new table to alter_distributed_table.dist_table
alter_distributed_table
---------------------------------------------------------------------
(1 row)
-- right now column cache is: b pos 1, c pos 2 because a new table has been created
-- check that b is still distribution column
SELECT table_name, citus_table_type, distribution_column, shard_count FROM public.citus_tables
WHERE table_name = 'dist_table'::regclass;
table_name | citus_table_type | distribution_column | shard_count
---------------------------------------------------------------------
dist_table | distributed | b | 7
(1 row)
-- let's add another column
ALTER TABLE dist_table ADD COLUMN d int DEFAULT 2;
-- right now column cache is: b pos 1, c pos 2, d pos 3, dist_column is b
-- test using alter_distributed_table to change dist. column after dropping one column
ALTER TABLE dist_table DROP COLUMN c;
-- right now column cache is: b pos 1, c pos 2 attisdropped=true, d pos 3
-- let's try changing the distribution column
SELECT alter_distributed_table('dist_table', distribution_column := 'd');
NOTICE: creating a new table for alter_distributed_table.dist_table
NOTICE: moving the data of alter_distributed_table.dist_table
NOTICE: dropping the old alter_distributed_table.dist_table
NOTICE: renaming the new table to alter_distributed_table.dist_table
alter_distributed_table
---------------------------------------------------------------------
(1 row)
-- right now column cache is: b pos 1, d pos 2 because a new table has been created
-- check that d is the distribution column
SELECT table_name, citus_table_type, distribution_column, shard_count FROM public.citus_tables
WHERE table_name = 'dist_table'::regclass;
table_name | citus_table_type | distribution_column | shard_count
---------------------------------------------------------------------
dist_table | distributed | d | 7
(1 row)
-- add another column and undistribute
ALTER TABLE dist_table ADD COLUMN e int DEFAULT 3;
SELECT undistribute_table('dist_table');
NOTICE: creating a new table for alter_distributed_table.dist_table
NOTICE: moving the data of alter_distributed_table.dist_table
NOTICE: dropping the old alter_distributed_table.dist_table
NOTICE: renaming the new table to alter_distributed_table.dist_table
undistribute_table
---------------------------------------------------------------------
(1 row)
-- right now column cache is: b pos 1, d pos 2, e pos 3, table is not Citus table
-- try dropping column and then distributing
ALTER TABLE dist_table DROP COLUMN b;
-- right now column cache is: b pos 1 attisdropped=true, d pos 2, e pos 3
-- distribute with d
SELECT create_distributed_table ('dist_table', 'd', colocate_with := 'none');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$alter_distributed_table.dist_table$$)
create_distributed_table
---------------------------------------------------------------------
(1 row)
-- check that d is the distribution column
SELECT table_name, citus_table_type, distribution_column, shard_count FROM public.citus_tables
WHERE table_name = 'dist_table'::regclass;
table_name | citus_table_type | distribution_column | shard_count
---------------------------------------------------------------------
dist_table | distributed | d | 4
(1 row)
-- alter distribution column to e
SELECT alter_distributed_table('dist_table', distribution_column := 'e');
NOTICE: creating a new table for alter_distributed_table.dist_table
NOTICE: moving the data of alter_distributed_table.dist_table
NOTICE: dropping the old alter_distributed_table.dist_table
NOTICE: renaming the new table to alter_distributed_table.dist_table
alter_distributed_table
---------------------------------------------------------------------
(1 row)
-- right now column cache is: d pos 1, e pos 2
-- check that e is the distribution column
SELECT table_name, citus_table_type, distribution_column, shard_count FROM public.citus_tables
WHERE table_name = 'dist_table'::regclass;
table_name | citus_table_type | distribution_column | shard_count
---------------------------------------------------------------------
dist_table | distributed | e | 4
(1 row)
ALTER TABLE dist_table ADD COLUMN a int DEFAULT 4;
ALTER TABLE dist_table ADD COLUMN b int DEFAULT 5;
-- right now column cache is: d pos 1, e pos 2, a pos 3, b pos 4
-- alter distribution column to a
SELECT alter_distributed_table('dist_table', distribution_column := 'a');
NOTICE: creating a new table for alter_distributed_table.dist_table
NOTICE: moving the data of alter_distributed_table.dist_table
NOTICE: dropping the old alter_distributed_table.dist_table
NOTICE: renaming the new table to alter_distributed_table.dist_table
alter_distributed_table
---------------------------------------------------------------------
(1 row)
-- right now column cache hasn't changed
-- check that a is the distribution column
SELECT table_name, citus_table_type, distribution_column, shard_count FROM public.citus_tables
WHERE table_name = 'dist_table'::regclass;
table_name | citus_table_type | distribution_column | shard_count
---------------------------------------------------------------------
dist_table | distributed | a | 4
(1 row)
ALTER TABLE dist_table DROP COLUMN d;
ALTER TABLE dist_table DROP COLUMN e;
-- right now column cache is: d pos 1 attisdropped=true, e pos 2 attisdropped=true, a pos 3, b pos 4
-- alter distribution column to b
SELECT alter_distributed_table('dist_table', distribution_column := 'b');
NOTICE: creating a new table for alter_distributed_table.dist_table
NOTICE: moving the data of alter_distributed_table.dist_table
NOTICE: dropping the old alter_distributed_table.dist_table
NOTICE: renaming the new table to alter_distributed_table.dist_table
alter_distributed_table
---------------------------------------------------------------------
(1 row)
-- column cache is: a pos 1, b pos 2 -> configuration with which we started these drop column tests
-- check that b is the distribution column
SELECT table_name, citus_table_type, distribution_column, shard_count FROM public.citus_tables
WHERE table_name = 'dist_table'::regclass;
table_name | citus_table_type | distribution_column | shard_count
---------------------------------------------------------------------
dist_table | distributed | b | 4
(1 row)
-- test altering colocation, note that shard count will also change
SELECT alter_distributed_table('dist_table', colocate_with := 'alter_distributed_table.colocation_table');
NOTICE: creating a new table for alter_distributed_table.dist_table

View File

@ -33,6 +33,95 @@ SELECT table_name, citus_table_type, distribution_column, shard_count FROM publi
SELECT STRING_AGG(table_name::text, ', ' ORDER BY 1) AS "Colocation Groups" FROM public.citus_tables
WHERE table_name IN ('dist_table', 'colocation_table', 'colocation_table_2') GROUP BY colocation_id ORDER BY 1;
-- right now dist_table has columns a, b, dist_column is b, it has 6 shards
-- column cache is: a pos 1, b pos 2
-- let's add another column
ALTER TABLE dist_table ADD COLUMN c int DEFAULT 1;
-- right now column cache is: a pos 1, b pos 2, c pos 3
-- test using alter_distributed_table to change shard count after dropping one column
ALTER TABLE dist_table DROP COLUMN a;
-- right now column cache is: a pos 1 attisdropped=true, b pos 2, c pos 3
-- let's try changing the shard count
SELECT alter_distributed_table('dist_table', shard_count := 7, cascade_to_colocated := false);
-- right now column cache is: b pos 1, c pos 2 because a new table has been created
-- check that b is still distribution column
SELECT table_name, citus_table_type, distribution_column, shard_count FROM public.citus_tables
WHERE table_name = 'dist_table'::regclass;
-- let's add another column
ALTER TABLE dist_table ADD COLUMN d int DEFAULT 2;
-- right now column cache is: b pos 1, c pos 2, d pos 3, dist_column is b
-- test using alter_distributed_table to change dist. column after dropping one column
ALTER TABLE dist_table DROP COLUMN c;
-- right now column cache is: b pos 1, c pos 2 attisdropped=true, d pos 3
-- let's try changing the distribution column
SELECT alter_distributed_table('dist_table', distribution_column := 'd');
-- right now column cache is: b pos 1, d pos 2 because a new table has been created
-- check that d is the distribution column
SELECT table_name, citus_table_type, distribution_column, shard_count FROM public.citus_tables
WHERE table_name = 'dist_table'::regclass;
-- add another column and undistribute
ALTER TABLE dist_table ADD COLUMN e int DEFAULT 3;
SELECT undistribute_table('dist_table');
-- right now column cache is: b pos 1, d pos 2, e pos 3, table is not Citus table
-- try dropping column and then distributing
ALTER TABLE dist_table DROP COLUMN b;
-- right now column cache is: b pos 1 attisdropped=true, d pos 2, e pos 3
-- distribute with d
SELECT create_distributed_table ('dist_table', 'd', colocate_with := 'none');
-- check that d is the distribution column
SELECT table_name, citus_table_type, distribution_column, shard_count FROM public.citus_tables
WHERE table_name = 'dist_table'::regclass;
-- alter distribution column to e
SELECT alter_distributed_table('dist_table', distribution_column := 'e');
-- right now column cache is: d pos 1, e pos 2
-- check that e is the distribution column
SELECT table_name, citus_table_type, distribution_column, shard_count FROM public.citus_tables
WHERE table_name = 'dist_table'::regclass;
ALTER TABLE dist_table ADD COLUMN a int DEFAULT 4;
ALTER TABLE dist_table ADD COLUMN b int DEFAULT 5;
-- right now column cache is: d pos 1, e pos 2, a pos 3, b pos 4
-- alter distribution column to a
SELECT alter_distributed_table('dist_table', distribution_column := 'a');
-- right now column cache hasn't changed
-- check that a is the distribution column
SELECT table_name, citus_table_type, distribution_column, shard_count FROM public.citus_tables
WHERE table_name = 'dist_table'::regclass;
ALTER TABLE dist_table DROP COLUMN d;
ALTER TABLE dist_table DROP COLUMN e;
-- right now column cache is: d pos 1 attisdropped=true, e pos 2 attisdropped=true, a pos 3, b pos 4
-- alter distribution column to b
SELECT alter_distributed_table('dist_table', distribution_column := 'b');
-- column cache is: a pos 1, b pos 2 -> configuration with which we started these drop column tests
-- check that b is the distribution column
SELECT table_name, citus_table_type, distribution_column, shard_count FROM public.citus_tables
WHERE table_name = 'dist_table'::regclass;
-- test altering colocation, note that shard count will also change
SELECT alter_distributed_table('dist_table', colocate_with := 'alter_distributed_table.colocation_table');
SELECT table_name, citus_table_type, distribution_column, shard_count FROM public.citus_tables