Fix crash in isolation tests

- There was a crash when the table a shardid belonged to changed during
  a session. Instead of crashing (a failed assert) we now throw an error
- Update the isolation test which was crashing to no longer exercise
  that code path
- Add a regression test to check that the error is thrown
pull/1265/head
Brian Cloutier 2017-04-29 01:25:34 +03:00 committed by Brian Cloutier
parent ad5cd326a4
commit 22e7aa9a4f
8 changed files with 82 additions and 30 deletions

View File

@ -793,7 +793,14 @@ BuildCachedShardList(DistTableCacheEntry *cacheEntry)
shardEntry = hash_search(DistShardCacheHash, &shardInterval->shardId, HASH_ENTER, shardEntry = hash_search(DistShardCacheHash, &shardInterval->shardId, HASH_ENTER,
&foundInCache); &foundInCache);
Assert(!foundInCache); if (foundInCache)
{
ereport(ERROR, (errmsg("cached metadata for shard " UINT64_FORMAT
" is inconsistent",
shardInterval->shardId),
errhint("Reconnect and try again.")));
}
shardEntry->shardIndex = shardIndex; shardEntry->shardIndex = shardIndex;
shardEntry->tableEntry = cacheEntry; shardEntry->tableEntry = cacheEntry;
@ -2121,7 +2128,7 @@ WorkerNodeHashCode(const void *key, Size keySize)
* ResetDistTableCacheEntry frees any out-of-band memory used by a cache entry, * ResetDistTableCacheEntry frees any out-of-band memory used by a cache entry,
* but does not free the entry itself. * but does not free the entry itself.
*/ */
void static void
ResetDistTableCacheEntry(DistTableCacheEntry *cacheEntry) ResetDistTableCacheEntry(DistTableCacheEntry *cacheEntry)
{ {
int shardIndex = 0; int shardIndex = 0;

View File

@ -7,7 +7,7 @@ step s1a:
master_add_node master_add_node
(1,1,localhost,57637,default,f) (1,1,localhost,57637,default,f,t)
master_add_node master_add_node
(2,2,localhost,57638,default,f) (2,2,localhost,57638,default,f,t)

View File

@ -5,7 +5,7 @@ master_create_worker_shards
step s2-invalidate-57637: step s2-invalidate-57637:
UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = 102008 AND nodeport = 57637; UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57637;
step s1-begin: step s1-begin:
BEGIN; BEGIN;
@ -14,7 +14,7 @@ step s1-insertone:
INSERT INTO test_table VALUES(1, 1); INSERT INTO test_table VALUES(1, 1);
step s2-repair: step s2-repair:
SELECT master_copy_shard_placement(102008, 'localhost', 57638, 'localhost', 57637); SELECT master_copy_shard_placement((SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass), 'localhost', 57638, 'localhost', 57637);
<waiting ...> <waiting ...>
step s1-commit: step s1-commit:
COMMIT; COMMIT;
@ -32,7 +32,7 @@ step s1-insertone:
INSERT INTO test_table VALUES(1, 1); INSERT INTO test_table VALUES(1, 1);
step s2-invalidate-57637: step s2-invalidate-57637:
UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = 102008 AND nodeport = 57637; UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57637;
step s1-begin: step s1-begin:
BEGIN; BEGIN;
@ -41,7 +41,7 @@ step s1-insertall:
INSERT INTO test_table SELECT test_id, data+1 FROM test_table; INSERT INTO test_table SELECT test_id, data+1 FROM test_table;
step s2-repair: step s2-repair:
SELECT master_copy_shard_placement(102008, 'localhost', 57638, 'localhost', 57637); SELECT master_copy_shard_placement((SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass), 'localhost', 57638, 'localhost', 57637);
<waiting ...> <waiting ...>
step s1-commit: step s1-commit:
COMMIT; COMMIT;
@ -56,13 +56,13 @@ master_create_worker_shards
step s2-invalidate-57637: step s2-invalidate-57637:
UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = 102008 AND nodeport = 57637; UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57637;
step s2-begin: step s2-begin:
BEGIN; BEGIN;
step s2-repair: step s2-repair:
SELECT master_copy_shard_placement(102008, 'localhost', 57638, 'localhost', 57637); SELECT master_copy_shard_placement((SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass), 'localhost', 57638, 'localhost', 57637);
master_copy_shard_placement master_copy_shard_placement
@ -75,7 +75,7 @@ step s2-commit:
step s1-insertone: <... completed> step s1-insertone: <... completed>
step s2-invalidate-57638: step s2-invalidate-57638:
UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = 102008 AND nodeport = 57638; UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57638;
step s1-display: step s1-display:
SELECT * FROM test_table WHERE test_id = 1; SELECT * FROM test_table WHERE test_id = 1;
@ -84,10 +84,10 @@ test_id data
1 1 1 1
step s2-invalidate-57637: step s2-invalidate-57637:
UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = 102008 AND nodeport = 57637; UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57637;
step s2-revalidate-57638: step s2-revalidate-57638:
UPDATE pg_dist_shard_placement SET shardstate = '1' WHERE shardid = 102008 AND nodeport = 57638; UPDATE pg_dist_shard_placement SET shardstate = '1' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57638;
step s1-display: step s1-display:
SELECT * FROM test_table WHERE test_id = 1; SELECT * FROM test_table WHERE test_id = 1;
@ -101,7 +101,7 @@ master_create_worker_shards
step s2-invalidate-57637: step s2-invalidate-57637:
UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = 102008 AND nodeport = 57637; UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57637;
step s1-prepared-insertone: step s1-prepared-insertone:
EXECUTE insertone; EXECUTE insertone;
@ -110,7 +110,7 @@ step s2-begin:
BEGIN; BEGIN;
step s2-repair: step s2-repair:
SELECT master_copy_shard_placement(102008, 'localhost', 57638, 'localhost', 57637); SELECT master_copy_shard_placement((SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass), 'localhost', 57638, 'localhost', 57637);
master_copy_shard_placement master_copy_shard_placement
@ -124,7 +124,7 @@ step s2-commit:
step s1-prepared-insertone: <... completed> step s1-prepared-insertone: <... completed>
error in steps s2-commit s1-prepared-insertone: ERROR: prepared modifications cannot be executed on a shard while it is being copied error in steps s2-commit s1-prepared-insertone: ERROR: prepared modifications cannot be executed on a shard while it is being copied
step s2-invalidate-57638: step s2-invalidate-57638:
UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = 102008 AND nodeport = 57638; UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57638;
step s1-display: step s1-display:
SELECT * FROM test_table WHERE test_id = 1; SELECT * FROM test_table WHERE test_id = 1;
@ -133,10 +133,10 @@ test_id data
1 1 1 1
step s2-invalidate-57637: step s2-invalidate-57637:
UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = 102008 AND nodeport = 57637; UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57637;
step s2-revalidate-57638: step s2-revalidate-57638:
UPDATE pg_dist_shard_placement SET shardstate = '1' WHERE shardid = 102008 AND nodeport = 57638; UPDATE pg_dist_shard_placement SET shardstate = '1' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57638;
step s1-display: step s1-display:
SELECT * FROM test_table WHERE test_id = 1; SELECT * FROM test_table WHERE test_id = 1;
@ -150,7 +150,7 @@ master_create_worker_shards
step s2-invalidate-57637: step s2-invalidate-57637:
UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = 102008 AND nodeport = 57637; UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57637;
step s1-insertone: step s1-insertone:
INSERT INTO test_table VALUES(1, 1); INSERT INTO test_table VALUES(1, 1);
@ -162,7 +162,7 @@ step s2-begin:
BEGIN; BEGIN;
step s2-repair: step s2-repair:
SELECT master_copy_shard_placement(102008, 'localhost', 57638, 'localhost', 57637); SELECT master_copy_shard_placement((SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass), 'localhost', 57638, 'localhost', 57637);
master_copy_shard_placement master_copy_shard_placement
@ -176,7 +176,7 @@ step s2-commit:
step s1-prepared-insertall: <... completed> step s1-prepared-insertall: <... completed>
error in steps s2-commit s1-prepared-insertall: ERROR: prepared modifications cannot be executed on a shard while it is being copied error in steps s2-commit s1-prepared-insertall: ERROR: prepared modifications cannot be executed on a shard while it is being copied
step s2-invalidate-57638: step s2-invalidate-57638:
UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = 102008 AND nodeport = 57638; UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57638;
step s1-display: step s1-display:
SELECT * FROM test_table WHERE test_id = 1; SELECT * FROM test_table WHERE test_id = 1;
@ -186,10 +186,10 @@ test_id data
1 1 1 1
1 2 1 2
step s2-invalidate-57637: step s2-invalidate-57637:
UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = 102008 AND nodeport = 57637; UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57637;
step s2-revalidate-57638: step s2-revalidate-57638:
UPDATE pg_dist_shard_placement SET shardstate = '1' WHERE shardid = 102008 AND nodeport = 57638; UPDATE pg_dist_shard_placement SET shardstate = '1' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57638;
step s1-display: step s1-display:
SELECT * FROM test_table WHERE test_id = 1; SELECT * FROM test_table WHERE test_id = 1;

View File

@ -0,0 +1,28 @@
ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1601000;
CREATE TABLE tab9 (test_id integer NOT NULL, data int);
CREATE TABLE tab10 (test_id integer NOT NULL, data int);
SELECT master_create_distributed_table('tab9', 'test_id', 'hash');
master_create_distributed_table
---------------------------------
(1 row)
SELECT master_create_distributed_table('tab10', 'test_id', 'hash');
master_create_distributed_table
---------------------------------
(1 row)
SELECT master_create_worker_shards('tab9', 1, 1);
master_create_worker_shards
-----------------------------
(1 row)
TRUNCATE tab9;
UPDATE pg_dist_shard SET logicalrelid = 'tab10'::regclass WHERE logicalrelid = 'tab9'::regclass;
TRUNCATE tab10;
ERROR: cached metadata for shard 1601000 is inconsistent
HINT: Reconnect and try again.
DROP TABLE tab9;
DROP TABLE tab10;

View File

@ -1,3 +1,3 @@
test: isolation_cluster_management test: isolation_cluster_management
test: isolation_concurrent_dml
test: isolation_dml_vs_repair test: isolation_dml_vs_repair
test: isolation_concurrent_dml

View File

@ -234,3 +234,9 @@ test: multi_transactional_drop_shards
# multi_multiuser tests simple combinations of permission access and queries # multi_multiuser tests simple combinations of permission access and queries
# ---------- # ----------
test: multi_multiuser test: multi_multiuser
# ---------
# multi_cache_invalidation tests for an obscure crash citus used to exhibit when shardids
# changed the table they belonged to during a session
# --------
test: multi_cache_invalidation

View File

@ -1,6 +1,5 @@
setup setup
{ {
ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 102008;
CREATE TABLE test_table (test_id integer NOT NULL, data int); CREATE TABLE test_table (test_id integer NOT NULL, data int);
SELECT master_create_distributed_table('test_table', 'test_id', 'hash'); SELECT master_create_distributed_table('test_table', 'test_id', 'hash');
SELECT master_create_worker_shards('test_table', 1, 2); SELECT master_create_worker_shards('test_table', 1, 2);
@ -66,27 +65,27 @@ step "s2-begin"
step "s2-invalidate-57637" step "s2-invalidate-57637"
{ {
UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = 102008 AND nodeport = 57637; UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57637;
} }
step "s2-revalidate-57637" step "s2-revalidate-57637"
{ {
UPDATE pg_dist_shard_placement SET shardstate = '1' WHERE shardid = 102008 AND nodeport = 57637; UPDATE pg_dist_shard_placement SET shardstate = '1' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57637;
} }
step "s2-invalidate-57638" step "s2-invalidate-57638"
{ {
UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = 102008 AND nodeport = 57638; UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57638;
} }
step "s2-revalidate-57638" step "s2-revalidate-57638"
{ {
UPDATE pg_dist_shard_placement SET shardstate = '1' WHERE shardid = 102008 AND nodeport = 57638; UPDATE pg_dist_shard_placement SET shardstate = '1' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57638;
} }
step "s2-repair" step "s2-repair"
{ {
SELECT master_copy_shard_placement(102008, 'localhost', 57638, 'localhost', 57637); SELECT master_copy_shard_placement((SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass), 'localhost', 57638, 'localhost', 57637);
} }
step "s2-commit" step "s2-commit"

View File

@ -0,0 +1,12 @@
ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1601000;
CREATE TABLE tab9 (test_id integer NOT NULL, data int);
CREATE TABLE tab10 (test_id integer NOT NULL, data int);
SELECT master_create_distributed_table('tab9', 'test_id', 'hash');
SELECT master_create_distributed_table('tab10', 'test_id', 'hash');
SELECT master_create_worker_shards('tab9', 1, 1);
TRUNCATE tab9;
UPDATE pg_dist_shard SET logicalrelid = 'tab10'::regclass WHERE logicalrelid = 'tab9'::regclass;
TRUNCATE tab10;
DROP TABLE tab9;
DROP TABLE tab10;