diff --git a/src/backend/distributed/utils/metadata_cache.c b/src/backend/distributed/utils/metadata_cache.c index 15062f78e..27e5ad22c 100644 --- a/src/backend/distributed/utils/metadata_cache.c +++ b/src/backend/distributed/utils/metadata_cache.c @@ -793,7 +793,14 @@ BuildCachedShardList(DistTableCacheEntry *cacheEntry) shardEntry = hash_search(DistShardCacheHash, &shardInterval->shardId, HASH_ENTER, &foundInCache); - Assert(!foundInCache); + if (foundInCache) + { + ereport(ERROR, (errmsg("cached metadata for shard " UINT64_FORMAT + " is inconsistent", + shardInterval->shardId), + errhint("Reconnect and try again."))); + } + shardEntry->shardIndex = shardIndex; shardEntry->tableEntry = cacheEntry; @@ -2121,7 +2128,7 @@ WorkerNodeHashCode(const void *key, Size keySize) * ResetDistTableCacheEntry frees any out-of-band memory used by a cache entry, * but does not free the entry itself. */ -void +static void ResetDistTableCacheEntry(DistTableCacheEntry *cacheEntry) { int shardIndex = 0; diff --git a/src/test/regress/expected/isolation_cluster_management.out b/src/test/regress/expected/isolation_cluster_management.out index 8ee09c894..5932b5ec9 100644 --- a/src/test/regress/expected/isolation_cluster_management.out +++ b/src/test/regress/expected/isolation_cluster_management.out @@ -7,7 +7,7 @@ step s1a: master_add_node -(1,1,localhost,57637,default,f) +(1,1,localhost,57637,default,f,t) master_add_node -(2,2,localhost,57638,default,f) +(2,2,localhost,57638,default,f,t) diff --git a/src/test/regress/expected/isolation_dml_vs_repair.out b/src/test/regress/expected/isolation_dml_vs_repair.out index 0730dd370..543929531 100644 --- a/src/test/regress/expected/isolation_dml_vs_repair.out +++ b/src/test/regress/expected/isolation_dml_vs_repair.out @@ -5,7 +5,7 @@ master_create_worker_shards step s2-invalidate-57637: - UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = 102008 AND nodeport = 57637; + UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57637; step s1-begin: BEGIN; @@ -14,7 +14,7 @@ step s1-insertone: INSERT INTO test_table VALUES(1, 1); step s2-repair: - SELECT master_copy_shard_placement(102008, 'localhost', 57638, 'localhost', 57637); + SELECT master_copy_shard_placement((SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass), 'localhost', 57638, 'localhost', 57637); step s1-commit: COMMIT; @@ -32,7 +32,7 @@ step s1-insertone: INSERT INTO test_table VALUES(1, 1); step s2-invalidate-57637: - UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = 102008 AND nodeport = 57637; + UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57637; step s1-begin: BEGIN; @@ -41,7 +41,7 @@ step s1-insertall: INSERT INTO test_table SELECT test_id, data+1 FROM test_table; step s2-repair: - SELECT master_copy_shard_placement(102008, 'localhost', 57638, 'localhost', 57637); + SELECT master_copy_shard_placement((SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass), 'localhost', 57638, 'localhost', 57637); step s1-commit: COMMIT; @@ -56,13 +56,13 @@ master_create_worker_shards step s2-invalidate-57637: - UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = 102008 AND nodeport = 57637; + UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57637; step s2-begin: BEGIN; step s2-repair: - SELECT master_copy_shard_placement(102008, 'localhost', 57638, 'localhost', 57637); + SELECT master_copy_shard_placement((SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass), 'localhost', 57638, 'localhost', 57637); master_copy_shard_placement @@ -75,7 +75,7 @@ step s2-commit: step s1-insertone: <... completed> step s2-invalidate-57638: - UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = 102008 AND nodeport = 57638; + UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57638; step s1-display: SELECT * FROM test_table WHERE test_id = 1; @@ -84,10 +84,10 @@ test_id data 1 1 step s2-invalidate-57637: - UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = 102008 AND nodeport = 57637; + UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57637; step s2-revalidate-57638: - UPDATE pg_dist_shard_placement SET shardstate = '1' WHERE shardid = 102008 AND nodeport = 57638; + UPDATE pg_dist_shard_placement SET shardstate = '1' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57638; step s1-display: SELECT * FROM test_table WHERE test_id = 1; @@ -101,7 +101,7 @@ master_create_worker_shards step s2-invalidate-57637: - UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = 102008 AND nodeport = 57637; + UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57637; step s1-prepared-insertone: EXECUTE insertone; @@ -110,7 +110,7 @@ step s2-begin: BEGIN; step s2-repair: - SELECT master_copy_shard_placement(102008, 'localhost', 57638, 'localhost', 57637); + SELECT master_copy_shard_placement((SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass), 'localhost', 57638, 'localhost', 57637); master_copy_shard_placement @@ -124,7 +124,7 @@ step s2-commit: step s1-prepared-insertone: <... completed> error in steps s2-commit s1-prepared-insertone: ERROR: prepared modifications cannot be executed on a shard while it is being copied step s2-invalidate-57638: - UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = 102008 AND nodeport = 57638; + UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57638; step s1-display: SELECT * FROM test_table WHERE test_id = 1; @@ -133,10 +133,10 @@ test_id data 1 1 step s2-invalidate-57637: - UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = 102008 AND nodeport = 57637; + UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57637; step s2-revalidate-57638: - UPDATE pg_dist_shard_placement SET shardstate = '1' WHERE shardid = 102008 AND nodeport = 57638; + UPDATE pg_dist_shard_placement SET shardstate = '1' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57638; step s1-display: SELECT * FROM test_table WHERE test_id = 1; @@ -150,7 +150,7 @@ master_create_worker_shards step s2-invalidate-57637: - UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = 102008 AND nodeport = 57637; + UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57637; step s1-insertone: INSERT INTO test_table VALUES(1, 1); @@ -162,7 +162,7 @@ step s2-begin: BEGIN; step s2-repair: - SELECT master_copy_shard_placement(102008, 'localhost', 57638, 'localhost', 57637); + SELECT master_copy_shard_placement((SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass), 'localhost', 57638, 'localhost', 57637); master_copy_shard_placement @@ -176,7 +176,7 @@ step s2-commit: step s1-prepared-insertall: <... completed> error in steps s2-commit s1-prepared-insertall: ERROR: prepared modifications cannot be executed on a shard while it is being copied step s2-invalidate-57638: - UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = 102008 AND nodeport = 57638; + UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57638; step s1-display: SELECT * FROM test_table WHERE test_id = 1; @@ -186,10 +186,10 @@ test_id data 1 1 1 2 step s2-invalidate-57637: - UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = 102008 AND nodeport = 57637; + UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57637; step s2-revalidate-57638: - UPDATE pg_dist_shard_placement SET shardstate = '1' WHERE shardid = 102008 AND nodeport = 57638; + UPDATE pg_dist_shard_placement SET shardstate = '1' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57638; step s1-display: SELECT * FROM test_table WHERE test_id = 1; diff --git a/src/test/regress/expected/multi_cache_invalidation.out b/src/test/regress/expected/multi_cache_invalidation.out new file mode 100644 index 000000000..e5bdd6698 --- /dev/null +++ b/src/test/regress/expected/multi_cache_invalidation.out @@ -0,0 +1,28 @@ +ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1601000; +CREATE TABLE tab9 (test_id integer NOT NULL, data int); +CREATE TABLE tab10 (test_id integer NOT NULL, data int); +SELECT master_create_distributed_table('tab9', 'test_id', 'hash'); + master_create_distributed_table +--------------------------------- + +(1 row) + +SELECT master_create_distributed_table('tab10', 'test_id', 'hash'); + master_create_distributed_table +--------------------------------- + +(1 row) + +SELECT master_create_worker_shards('tab9', 1, 1); + master_create_worker_shards +----------------------------- + +(1 row) + +TRUNCATE tab9; +UPDATE pg_dist_shard SET logicalrelid = 'tab10'::regclass WHERE logicalrelid = 'tab9'::regclass; +TRUNCATE tab10; +ERROR: cached metadata for shard 1601000 is inconsistent +HINT: Reconnect and try again. +DROP TABLE tab9; +DROP TABLE tab10; diff --git a/src/test/regress/isolation_schedule b/src/test/regress/isolation_schedule index 93a2f5ab8..51c9d4f97 100644 --- a/src/test/regress/isolation_schedule +++ b/src/test/regress/isolation_schedule @@ -1,3 +1,3 @@ test: isolation_cluster_management -test: isolation_concurrent_dml test: isolation_dml_vs_repair +test: isolation_concurrent_dml diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule index 1ca71eec6..5381ccf5a 100644 --- a/src/test/regress/multi_schedule +++ b/src/test/regress/multi_schedule @@ -234,3 +234,9 @@ test: multi_transactional_drop_shards # multi_multiuser tests simple combinations of permission access and queries # ---------- test: multi_multiuser + +# --------- +# multi_cache_invalidation tests for an obscure crash citus used to exhibit when shardids +# changed the table they belonged to during a session +# -------- +test: multi_cache_invalidation diff --git a/src/test/regress/specs/isolation_dml_vs_repair.spec b/src/test/regress/specs/isolation_dml_vs_repair.spec index e4e9219ee..6da20e7d9 100644 --- a/src/test/regress/specs/isolation_dml_vs_repair.spec +++ b/src/test/regress/specs/isolation_dml_vs_repair.spec @@ -1,6 +1,5 @@ setup { - ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 102008; CREATE TABLE test_table (test_id integer NOT NULL, data int); SELECT master_create_distributed_table('test_table', 'test_id', 'hash'); SELECT master_create_worker_shards('test_table', 1, 2); @@ -66,27 +65,27 @@ step "s2-begin" step "s2-invalidate-57637" { - UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = 102008 AND nodeport = 57637; + UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57637; } step "s2-revalidate-57637" { - UPDATE pg_dist_shard_placement SET shardstate = '1' WHERE shardid = 102008 AND nodeport = 57637; + UPDATE pg_dist_shard_placement SET shardstate = '1' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57637; } step "s2-invalidate-57638" { - UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = 102008 AND nodeport = 57638; + UPDATE pg_dist_shard_placement SET shardstate = '3' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57638; } step "s2-revalidate-57638" { - UPDATE pg_dist_shard_placement SET shardstate = '1' WHERE shardid = 102008 AND nodeport = 57638; + UPDATE pg_dist_shard_placement SET shardstate = '1' WHERE shardid = (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass) AND nodeport = 57638; } step "s2-repair" { - SELECT master_copy_shard_placement(102008, 'localhost', 57638, 'localhost', 57637); + SELECT master_copy_shard_placement((SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_table'::regclass), 'localhost', 57638, 'localhost', 57637); } step "s2-commit" diff --git a/src/test/regress/sql/multi_cache_invalidation.sql b/src/test/regress/sql/multi_cache_invalidation.sql new file mode 100644 index 000000000..8594ad36d --- /dev/null +++ b/src/test/regress/sql/multi_cache_invalidation.sql @@ -0,0 +1,12 @@ +ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1601000; +CREATE TABLE tab9 (test_id integer NOT NULL, data int); +CREATE TABLE tab10 (test_id integer NOT NULL, data int); +SELECT master_create_distributed_table('tab9', 'test_id', 'hash'); +SELECT master_create_distributed_table('tab10', 'test_id', 'hash'); +SELECT master_create_worker_shards('tab9', 1, 1); +TRUNCATE tab9; +UPDATE pg_dist_shard SET logicalrelid = 'tab10'::regclass WHERE logicalrelid = 'tab9'::regclass; +TRUNCATE tab10; + +DROP TABLE tab9; +DROP TABLE tab10;