Fix inconsistent shard metadata issue

pull/3334/head
Marco Slot 2019-12-23 03:49:06 +01:00
parent 10605f8a26
commit a2ddfecd86
3 changed files with 80 additions and 27 deletions

View File

@ -1158,6 +1158,25 @@ BuildCachedShardList(DistTableCacheEntry *cacheEntry)
}
heap_close(distShardRelation, AccessShareLock);
ShardInterval *firstShardInterval = shardIntervalArray[0];
bool foundInCache = false;
ShardCacheEntry *shardEntry = hash_search(DistShardCacheHash,
&firstShardInterval->shardId, HASH_FIND,
&foundInCache);
if (foundInCache && shardEntry->tableEntry != cacheEntry)
{
/*
* Normally, all shard cache entries for a given DistTableEntry are removed
* before we get here. There is one exception: When a shard changes from
* one relation ID to another. That typically happens during metadata
* syncing when the distributed table is dropped and re-created without
* changing the shard IDs. That means that old relation no longer exists
* and we can safely wipe its entry, which will remove all corresponding
* shard cache entries.
*/
ResetDistTableCacheEntry(shardEntry->tableEntry);
}
}
/* look up value comparison function */

View File

@ -1,24 +1,48 @@
-- test that we are tolerant to the relation ID of a shard being changed
-- and do not cache invalid metadata
CREATE SCHEMA mci_1;
CREATE SCHEMA mci_2;
SET citus.next_shard_id TO 1601000;
SET citus.shard_count TO 1;
SET citus.shard_replication_factor TO 1;
CREATE TABLE tab9 (test_id integer NOT NULL, data int);
CREATE TABLE tab10 (test_id integer NOT NULL, data int);
SELECT create_distributed_table('tab9', 'test_id', 'hash');
CREATE TABLE mci_1.test (test_id integer NOT NULL, data int);
CREATE TABLE mci_2.test (test_id integer NOT NULL, data int);
SELECT create_distributed_table('mci_1.test', 'test_id');
create_distributed_table
--------------------------
(1 row)
SELECT master_create_distributed_table('tab10', 'test_id', 'hash');
master_create_distributed_table
---------------------------------
SELECT create_distributed_table('mci_2.test', 'test_id', 'append');
create_distributed_table
--------------------------
(1 row)
TRUNCATE tab9;
UPDATE pg_dist_shard SET logicalrelid = 'tab10'::regclass WHERE logicalrelid = 'tab9'::regclass;
TRUNCATE tab10;
ERROR: cached metadata for shard 1601000 is inconsistent
HINT: Reconnect and try again.
DROP TABLE tab9;
DROP TABLE tab10;
INSERT INTO mci_1.test VALUES (1,2), (3,4);
-- move shards into other append-distributed table
SELECT run_command_on_placements('mci_1.test', 'ALTER TABLE %s SET SCHEMA mci_2');
run_command_on_placements
-------------------------------------------
(localhost,57637,1601000,t,"ALTER TABLE")
(localhost,57638,1601000,t,"ALTER TABLE")
(localhost,57637,1601001,t,"ALTER TABLE")
(localhost,57638,1601001,t,"ALTER TABLE")
(localhost,57637,1601002,t,"ALTER TABLE")
(localhost,57638,1601002,t,"ALTER TABLE")
(localhost,57637,1601003,t,"ALTER TABLE")
(localhost,57638,1601003,t,"ALTER TABLE")
(8 rows)
UPDATE pg_dist_shard
SET logicalrelid = 'mci_2.test'::regclass, shardminvalue = NULL, shardmaxvalue = NULL
WHERE logicalrelid = 'mci_1.test'::regclass;
SELECT * FROM mci_2.test ORDER BY test_id;
test_id | data
---------+------
1 | 2
3 | 4
(2 rows)
DROP SCHEMA mci_1 CASCADE;
NOTICE: drop cascades to table mci_1.test
DROP SCHEMA mci_2 CASCADE;
NOTICE: drop cascades to table mci_2.test

View File

@ -1,13 +1,23 @@
SET citus.next_shard_id TO 1601000;
SET citus.shard_count TO 1;
SET citus.shard_replication_factor TO 1;
CREATE TABLE tab9 (test_id integer NOT NULL, data int);
CREATE TABLE tab10 (test_id integer NOT NULL, data int);
SELECT create_distributed_table('tab9', 'test_id', 'hash');
SELECT master_create_distributed_table('tab10', 'test_id', 'hash');
TRUNCATE tab9;
UPDATE pg_dist_shard SET logicalrelid = 'tab10'::regclass WHERE logicalrelid = 'tab9'::regclass;
TRUNCATE tab10;
-- test that we are tolerant to the relation ID of a shard being changed
-- and do not cache invalid metadata
CREATE SCHEMA mci_1;
CREATE SCHEMA mci_2;
DROP TABLE tab9;
DROP TABLE tab10;
SET citus.next_shard_id TO 1601000;
CREATE TABLE mci_1.test (test_id integer NOT NULL, data int);
CREATE TABLE mci_2.test (test_id integer NOT NULL, data int);
SELECT create_distributed_table('mci_1.test', 'test_id');
SELECT create_distributed_table('mci_2.test', 'test_id', 'append');
INSERT INTO mci_1.test VALUES (1,2), (3,4);
-- move shards into other append-distributed table
SELECT run_command_on_placements('mci_1.test', 'ALTER TABLE %s SET SCHEMA mci_2');
UPDATE pg_dist_shard
SET logicalrelid = 'mci_2.test'::regclass, shardminvalue = NULL, shardmaxvalue = NULL
WHERE logicalrelid = 'mci_1.test'::regclass;
SELECT * FROM mci_2.test ORDER BY test_id;
DROP SCHEMA mci_1 CASCADE;
DROP SCHEMA mci_2 CASCADE;