From c9df9f37d66a5873cc5d25f02bbd59a8c608802f Mon Sep 17 00:00:00 2001 From: Onder Kalaci Date: Mon, 8 May 2017 17:07:26 +0300 Subject: [PATCH 1/2] Accept invalidations during COPY command to prevent inconsistent data among placements COPY command is blocked on the shard metadata lock and once it acquires the lock it doesn't check the cache invalidations. In some cases this could lead to data inconsistency among the shard replicas if there exists concurrent shard placement addition. --- src/backend/distributed/commands/multi_copy.c | 9 +++ .../expected/isolation_add_node_vs_copy.out | 73 +++++++++++++++++++ src/test/regress/isolation_schedule | 1 + .../specs/isolation_add_node_vs_copy.spec | 71 ++++++++++++++++++ 4 files changed, 154 insertions(+) create mode 100644 src/test/regress/expected/isolation_add_node_vs_copy.out create mode 100644 src/test/regress/specs/isolation_add_node_vs_copy.spec diff --git a/src/backend/distributed/commands/multi_copy.c b/src/backend/distributed/commands/multi_copy.c index a0e1f0b65..67370d5b4 100644 --- a/src/backend/distributed/commands/multi_copy.c +++ b/src/backend/distributed/commands/multi_copy.c @@ -72,6 +72,7 @@ #include "nodes/makefuncs.h" #include "tsearch/ts_locale.h" #include "utils/builtins.h" +#include "utils/inval.h" #include "utils/lsyscache.h" #include "utils/rel.h" #include "utils/memutils.h" @@ -1751,6 +1752,14 @@ CitusCopyDestReceiverStartup(DestReceiver *dest, int operation, LockShardListMetadata(shardIntervalList, ShareLock); LockShardListResources(shardIntervalList, ShareLock); + /* + * We might have some concurrent metadata changes. In order to get the changes, + * we first need to accept the cache invalidation messages and then access the + * cache entry to trigger the cache entry rebuild. + */ + AcceptInvalidationMessages(); + cacheEntry = DistributedTableCacheEntry(tableId); + /* keep the table metadata to avoid looking it up for every tuple */ copyDest->tableMetadata = cacheEntry; diff --git a/src/test/regress/expected/isolation_add_node_vs_copy.out b/src/test/regress/expected/isolation_add_node_vs_copy.out new file mode 100644 index 000000000..b2cbd5631 --- /dev/null +++ b/src/test/regress/expected/isolation_add_node_vs_copy.out @@ -0,0 +1,73 @@ +Parsed test spec with 2 sessions + +starting permutation: s1-begin s1-add-second-worker s2-begin s2-copy-to-reference-table s1-commit s2-commit s2-print-content s1-remove-second-worker s1-begin s1-add-second-worker s2-begin s2-copy-to-reference-table s1-commit s2-commit s2-print-content +create_reference_table + + +step s1-begin: + BEGIN; + +step s1-add-second-worker: + SELECT master_add_node('localhost', 57638); + +master_add_node + +(4,4,localhost,57638,default,f,t) +step s2-begin: + BEGIN; + +step s2-copy-to-reference-table: + COPY test_reference_table FROM PROGRAM 'echo "1\n2\n3\n4\n5"'; + +step s1-commit: + COMMIT; + +step s2-copy-to-reference-table: <... completed> +step s2-commit: + COMMIT; + +step s2-print-content: + SELECT run_command_on_placements('test_reference_table', 'select count(*) from %s'); + +run_command_on_placements + +(localhost,57637,102197,t,5) +(localhost,57638,102197,t,5) +step s1-remove-second-worker: + SELECT master_remove_node('localhost', 57638); + +master_remove_node + + +step s1-begin: + BEGIN; + +step s1-add-second-worker: + SELECT master_add_node('localhost', 57638); + +master_add_node + +(5,5,localhost,57638,default,f,t) +step s2-begin: + BEGIN; + +step s2-copy-to-reference-table: + COPY test_reference_table FROM PROGRAM 'echo "1\n2\n3\n4\n5"'; + +step s1-commit: + COMMIT; + +step s2-copy-to-reference-table: <... completed> +step s2-commit: + COMMIT; + +step s2-print-content: + SELECT run_command_on_placements('test_reference_table', 'select count(*) from %s'); + +run_command_on_placements + +(localhost,57637,102197,t,10) +(localhost,57638,102197,t,10) +master_add_node + +(5,5,localhost,57638,default,f,t) diff --git a/src/test/regress/isolation_schedule b/src/test/regress/isolation_schedule index 0dcf6e45c..2b6f96b54 100644 --- a/src/test/regress/isolation_schedule +++ b/src/test/regress/isolation_schedule @@ -2,3 +2,4 @@ test: isolation_cluster_management test: isolation_dml_vs_repair test: isolation_concurrent_dml test: isolation_drop_shards +test: isolation_add_node_vs_copy diff --git a/src/test/regress/specs/isolation_add_node_vs_copy.spec b/src/test/regress/specs/isolation_add_node_vs_copy.spec new file mode 100644 index 000000000..b4f0a6cfe --- /dev/null +++ b/src/test/regress/specs/isolation_add_node_vs_copy.spec @@ -0,0 +1,71 @@ +setup +{ + truncate pg_dist_shard_placement; + truncate pg_dist_shard; + truncate pg_dist_partition; + truncate pg_dist_colocation; + truncate pg_dist_node; + + SELECT master_add_node('localhost', 57637); + + CREATE TABLE test_reference_table (test_id integer); + SELECT create_reference_table('test_reference_table'); +} + +teardown +{ + DROP TABLE IF EXISTS test_reference_table CASCADE; + + SELECT master_add_node('localhost', 57637); + SELECT master_add_node('localhost', 57638); +} + +session "s1" + +step "s1-begin" +{ + BEGIN; +} + +step "s1-add-second-worker" +{ + SELECT master_add_node('localhost', 57638); +} + +step "s1-remove-second-worker" +{ + SELECT master_remove_node('localhost', 57638); +} + +step "s1-commit" +{ + COMMIT; +} + +session "s2" + +step "s2-begin" +{ + BEGIN; +} + +step "s2-copy-to-reference-table" +{ + COPY test_reference_table FROM PROGRAM 'echo "1\n2\n3\n4\n5"'; +} + +step "s2-commit" +{ + COMMIT; +} + +step "s2-print-content" +{ + SELECT run_command_on_placements('test_reference_table', 'select count(*) from %s'); +} + +# verify that copy gets the invalidation and re-builts its metadata cache +# note that we need to run the same test twice to ensure that metadata is cached +# otherwise the test would be useless since the cache would be empty and the +# metadata data is gathered from the tables directly +permutation "s1-begin" "s1-add-second-worker" "s2-begin" "s2-copy-to-reference-table" "s1-commit" "s2-commit" "s2-print-content" "s1-remove-second-worker" "s1-begin" "s1-add-second-worker" "s2-begin" "s2-copy-to-reference-table" "s1-commit" "s2-commit" "s2-print-content" From 0884783de1aaceef48a315da5fd20cec943e87f3 Mon Sep 17 00:00:00 2001 From: Onder Kalaci Date: Wed, 10 May 2017 12:09:12 +0300 Subject: [PATCH 2/2] Improve tests --- .../expected/isolation_add_node_vs_copy.out | 42 ++++++++++++------- .../specs/isolation_add_node_vs_copy.spec | 29 ++++++------- 2 files changed, 41 insertions(+), 30 deletions(-) diff --git a/src/test/regress/expected/isolation_add_node_vs_copy.out b/src/test/regress/expected/isolation_add_node_vs_copy.out index b2cbd5631..52c73b8a2 100644 --- a/src/test/regress/expected/isolation_add_node_vs_copy.out +++ b/src/test/regress/expected/isolation_add_node_vs_copy.out @@ -8,11 +8,11 @@ step s1-begin: BEGIN; step s1-add-second-worker: - SELECT master_add_node('localhost', 57638); + SELECT nodename, nodeport, isactive FROM master_add_node('localhost', 57638); -master_add_node +nodename nodeport isactive -(4,4,localhost,57638,default,f,t) +localhost 57638 t step s2-begin: BEGIN; @@ -27,12 +27,17 @@ step s2-commit: COMMIT; step s2-print-content: - SELECT run_command_on_placements('test_reference_table', 'select count(*) from %s'); + SELECT + nodeport, success, result + FROM + run_command_on_placements('test_reference_table', 'select count(*) from %s') + ORDER BY + nodeport; -run_command_on_placements +nodeport success result -(localhost,57637,102197,t,5) -(localhost,57638,102197,t,5) +57637 t 5 +57638 t 5 step s1-remove-second-worker: SELECT master_remove_node('localhost', 57638); @@ -43,11 +48,11 @@ step s1-begin: BEGIN; step s1-add-second-worker: - SELECT master_add_node('localhost', 57638); + SELECT nodename, nodeport, isactive FROM master_add_node('localhost', 57638); -master_add_node +nodename nodeport isactive -(5,5,localhost,57638,default,f,t) +localhost 57638 t step s2-begin: BEGIN; @@ -62,12 +67,17 @@ step s2-commit: COMMIT; step s2-print-content: - SELECT run_command_on_placements('test_reference_table', 'select count(*) from %s'); + SELECT + nodeport, success, result + FROM + run_command_on_placements('test_reference_table', 'select count(*) from %s') + ORDER BY + nodeport; -run_command_on_placements +nodeport success result -(localhost,57637,102197,t,10) -(localhost,57638,102197,t,10) -master_add_node +57637 t 10 +57638 t 10 +nodename nodeport isactive -(5,5,localhost,57638,default,f,t) +localhost 57638 t diff --git a/src/test/regress/specs/isolation_add_node_vs_copy.spec b/src/test/regress/specs/isolation_add_node_vs_copy.spec index b4f0a6cfe..491d6da23 100644 --- a/src/test/regress/specs/isolation_add_node_vs_copy.spec +++ b/src/test/regress/specs/isolation_add_node_vs_copy.spec @@ -1,23 +1,18 @@ +# remove one of the nodes for the purpose of the test setup -{ - truncate pg_dist_shard_placement; - truncate pg_dist_shard; - truncate pg_dist_partition; - truncate pg_dist_colocation; - truncate pg_dist_node; - - SELECT master_add_node('localhost', 57637); +{ + SELECT master_remove_node('localhost', 57638); CREATE TABLE test_reference_table (test_id integer); SELECT create_reference_table('test_reference_table'); } +# ensure that both nodes exists for the remaining of the isolation tests teardown { - DROP TABLE IF EXISTS test_reference_table CASCADE; - - SELECT master_add_node('localhost', 57637); - SELECT master_add_node('localhost', 57638); + DROP TABLE test_reference_table; + SELECT nodename, nodeport, isactive FROM master_add_node('localhost', 57637); + SELECT nodename, nodeport, isactive FROM master_add_node('localhost', 57638); } session "s1" @@ -29,7 +24,7 @@ step "s1-begin" step "s1-add-second-worker" { - SELECT master_add_node('localhost', 57638); + SELECT nodename, nodeport, isactive FROM master_add_node('localhost', 57638); } step "s1-remove-second-worker" @@ -61,9 +56,15 @@ step "s2-commit" step "s2-print-content" { - SELECT run_command_on_placements('test_reference_table', 'select count(*) from %s'); + SELECT + nodeport, success, result + FROM + run_command_on_placements('test_reference_table', 'select count(*) from %s') + ORDER BY + nodeport; } + # verify that copy gets the invalidation and re-builts its metadata cache # note that we need to run the same test twice to ensure that metadata is cached # otherwise the test would be useless since the cache would be empty and the