From 18015ca5016217bca72829514e23adf1572838dd Mon Sep 17 00:00:00 2001 From: Jelte Fennema Date: Fri, 26 Aug 2022 11:48:55 +0200 Subject: [PATCH] Fix flakyness in multi_transaction_recovery (#6249) Sometimes in CI multi_transaction_recovery would fail with the following error: ```diff SET LOCAL citus.defer_drop_after_shard_move TO OFF; SELECT citus_move_shard_placement((SELECT * FROM selected_shard), 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode := 'block_writes'); - citus_move_shard_placement ---------------------------------------------------------------------- - -(1 row) - +ERROR: could not find placement matching "localhost:57637" +HINT: Confirm the placement still exists and try again. COMMIT; ``` Source: https://app.circleci.com/pipelines/github/citusdata/citus/26510/workflows/8269ea93-d9b4-4376-ae0e-8332a5c15fc6/jobs/755548 The reason for this was that when choosing `selected_shard` we didn't ensure that it was actually located on the node that we were moving it from. Instead we simply picked the first shard for the table that was returned by the query. To fix this issue this PR adds a filter to only choose shards that are located on the intended node. --- src/test/regress/expected/multi_transaction_recovery.out | 5 ++++- src/test/regress/sql/multi_transaction_recovery.sql | 6 +++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/test/regress/expected/multi_transaction_recovery.out b/src/test/regress/expected/multi_transaction_recovery.out index ad5f5e699..1e4cea224 100644 --- a/src/test/regress/expected/multi_transaction_recovery.out +++ b/src/test/regress/expected/multi_transaction_recovery.out @@ -352,7 +352,10 @@ SELECT recover_prepared_transactions(); 0 (1 row) -SELECT shardid INTO selected_shard FROM pg_dist_shard WHERE logicalrelid='test_2pcskip'::regclass LIMIT 1; +SELECT shardid INTO selected_shard +FROM citus_shards +WHERE table_name='test_2pcskip'::regclass AND nodeport = :worker_1_port +LIMIT 1; SELECT COUNT(*) FROM pg_dist_transaction; count --------------------------------------------------------------------- diff --git a/src/test/regress/sql/multi_transaction_recovery.sql b/src/test/regress/sql/multi_transaction_recovery.sql index 5156c83e3..5b5afb2e2 100644 --- a/src/test/regress/sql/multi_transaction_recovery.sql +++ b/src/test/regress/sql/multi_transaction_recovery.sql @@ -193,7 +193,11 @@ SELECT create_distributed_table('test_2pcskip', 'a'); INSERT INTO test_2pcskip SELECT i FROM generate_series(0, 5)i; SELECT recover_prepared_transactions(); -SELECT shardid INTO selected_shard FROM pg_dist_shard WHERE logicalrelid='test_2pcskip'::regclass LIMIT 1; +SELECT shardid INTO selected_shard +FROM citus_shards +WHERE table_name='test_2pcskip'::regclass AND nodeport = :worker_1_port +LIMIT 1; + SELECT COUNT(*) FROM pg_dist_transaction; BEGIN; SET LOCAL citus.defer_drop_after_shard_move TO OFF;