Debug flaky logical_replication test (#7309)

Sometimes in CI our logical_replication test fails like this:

```diff
+++ /__w/citus/citus/src/test/regress/results/logical_replication.out.modified	2023-11-01 14:15:08.562758546 +0000
@@ -40,21 +40,21 @@

 SELECT count(*) from pg_publication;
  count
 -------
      0
 (1 row)

 SELECT count(*) from pg_replication_slots;
  count
 -------
-     0
+     1
 (1 row)

 SELECT count(*) FROM dist;
  count
 -------
```

It's hard to understand what is going on here, just based on the wrong
number. So this PR changes the test to show the name of the
subscription, publication and replication slot to make finding the cause
easier.

In passing this also fixes another flaky test in the same file that our
flaky test detection picked up. This is done by waiting for resource
cleanup after the shard move.
pull/7321/head^2
Jelte Fennema-Nio 2023-11-02 13:15:02 +01:00 committed by GitHub
parent 6fed82609c
commit 5a48a1602e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 72 additions and 73 deletions

View File

@ -32,23 +32,21 @@ CREATE SUBSCRIPTION citus_shard_move_subscription_:postgres_oid
PUBLICATION citus_shard_move_publication_:postgres_oid
WITH (enabled=false, slot_name=citus_shard_move_slot_:postgres_oid);
NOTICE: created replication slot "citus_shard_move_slot_10" on publisher
SELECT count(*) from pg_subscription;
count
SELECT subname from pg_subscription;
subname
---------------------------------------------------------------------
1
citus_shard_move_subscription_10
(1 row)
SELECT count(*) from pg_publication;
count
SELECT pubname from pg_publication;
pubname
---------------------------------------------------------------------
0
(1 row)
(0 rows)
SELECT count(*) from pg_replication_slots;
count
SELECT slot_name from pg_replication_slots;
slot_name
---------------------------------------------------------------------
0
(1 row)
(0 rows)
SELECT count(*) FROM dist;
count
@ -58,22 +56,21 @@ SELECT count(*) FROM dist;
\c - - - :worker_1_port
SET search_path TO logical_replication;
SELECT count(*) from pg_subscription;
count
SELECT subname from pg_subscription;
subname
---------------------------------------------------------------------
0
(0 rows)
SELECT pubname from pg_publication;
pubname
---------------------------------------------------------------------
citus_shard_move_publication_10
(1 row)
SELECT count(*) from pg_publication;
count
SELECT slot_name from pg_replication_slots;
slot_name
---------------------------------------------------------------------
1
(1 row)
SELECT count(*) from pg_replication_slots;
count
---------------------------------------------------------------------
1
citus_shard_move_slot_10
(1 row)
SELECT count(*) FROM dist;
@ -90,25 +87,29 @@ select citus_move_shard_placement(6830002, 'localhost', :worker_1_port, 'localho
(1 row)
SELECT public.wait_for_resource_cleanup();
wait_for_resource_cleanup
---------------------------------------------------------------------
(1 row)
-- the subscription is still there, as there is no cleanup record for it
-- we have created it manually
SELECT count(*) from pg_subscription;
count
SELECT subname from pg_subscription;
subname
---------------------------------------------------------------------
1
citus_shard_move_subscription_10
(1 row)
SELECT count(*) from pg_publication;
count
SELECT pubname from pg_publication;
pubname
---------------------------------------------------------------------
0
(1 row)
(0 rows)
SELECT count(*) from pg_replication_slots;
count
SELECT slot_name from pg_replication_slots;
slot_name
---------------------------------------------------------------------
0
(1 row)
(0 rows)
SELECT count(*) from dist;
count
@ -120,22 +121,21 @@ SELECT count(*) from dist;
SET search_path TO logical_replication;
-- the publication and repslot are still there, as there are no cleanup records for them
-- we have created them manually
SELECT count(*) from pg_subscription;
count
SELECT subname from pg_subscription;
subname
---------------------------------------------------------------------
0
(0 rows)
SELECT pubname from pg_publication;
pubname
---------------------------------------------------------------------
citus_shard_move_publication_10
(1 row)
SELECT count(*) from pg_publication;
count
SELECT slot_name from pg_replication_slots;
slot_name
---------------------------------------------------------------------
1
(1 row)
SELECT count(*) from pg_replication_slots;
count
---------------------------------------------------------------------
1
citus_shard_move_slot_10
(1 row)
SELECT count(*) from dist;
@ -153,23 +153,20 @@ SELECT pg_drop_replication_slot('citus_shard_move_slot_' || :postgres_oid);
\c - - - :worker_2_port
SET search_path TO logical_replication;
SELECT count(*) from pg_subscription;
count
SELECT subname from pg_subscription;
subname
---------------------------------------------------------------------
0
(1 row)
(0 rows)
SELECT count(*) from pg_publication;
count
SELECT pubname from pg_publication;
pubname
---------------------------------------------------------------------
0
(1 row)
(0 rows)
SELECT count(*) from pg_replication_slots;
count
SELECT slot_name from pg_replication_slots;
slot_name
---------------------------------------------------------------------
0
(1 row)
(0 rows)
SELECT count(*) from dist;
count

View File

@ -35,17 +35,17 @@ CREATE SUBSCRIPTION citus_shard_move_subscription_:postgres_oid
WITH (enabled=false, slot_name=citus_shard_move_slot_:postgres_oid);
SELECT count(*) from pg_subscription;
SELECT count(*) from pg_publication;
SELECT count(*) from pg_replication_slots;
SELECT subname from pg_subscription;
SELECT pubname from pg_publication;
SELECT slot_name from pg_replication_slots;
SELECT count(*) FROM dist;
\c - - - :worker_1_port
SET search_path TO logical_replication;
SELECT count(*) from pg_subscription;
SELECT count(*) from pg_publication;
SELECT count(*) from pg_replication_slots;
SELECT subname from pg_subscription;
SELECT pubname from pg_publication;
SELECT slot_name from pg_replication_slots;
SELECT count(*) FROM dist;
\c - - - :master_port
@ -53,11 +53,13 @@ SET search_path TO logical_replication;
select citus_move_shard_placement(6830002, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'force_logical');
SELECT public.wait_for_resource_cleanup();
-- the subscription is still there, as there is no cleanup record for it
-- we have created it manually
SELECT count(*) from pg_subscription;
SELECT count(*) from pg_publication;
SELECT count(*) from pg_replication_slots;
SELECT subname from pg_subscription;
SELECT pubname from pg_publication;
SELECT slot_name from pg_replication_slots;
SELECT count(*) from dist;
\c - - - :worker_1_port
@ -65,9 +67,9 @@ SET search_path TO logical_replication;
-- the publication and repslot are still there, as there are no cleanup records for them
-- we have created them manually
SELECT count(*) from pg_subscription;
SELECT count(*) from pg_publication;
SELECT count(*) from pg_replication_slots;
SELECT subname from pg_subscription;
SELECT pubname from pg_publication;
SELECT slot_name from pg_replication_slots;
SELECT count(*) from dist;
DROP PUBLICATION citus_shard_move_publication_:postgres_oid;
@ -76,9 +78,9 @@ SELECT pg_drop_replication_slot('citus_shard_move_slot_' || :postgres_oid);
\c - - - :worker_2_port
SET search_path TO logical_replication;
SELECT count(*) from pg_subscription;
SELECT count(*) from pg_publication;
SELECT count(*) from pg_replication_slots;
SELECT subname from pg_subscription;
SELECT pubname from pg_publication;
SELECT slot_name from pg_replication_slots;
SELECT count(*) from dist;
\c - - - :master_port