mirror of https://github.com/citusdata/citus.git
Fix flakyness in citus_split_shard_by_split_points_deferred_drop (#6819)
In CI we would sometimes get this failure: ```diff -- The original shard is marked for deferred drop with policy_type = 2. -- The previous shard should be dropped at the beginning of the second split call SELECT * from pg_dist_cleanup; record_id | operation_id | object_type | object_name | node_group_id | policy_type -----------+--------------+-------------+--------------------------------------------------------------------------+---------------+------------- + 60 | 778 | 3 | citus_shard_split_slot_18_21216_778 | 16 | 0 512 | 778 | 1 | citus_split_shard_by_split_points_deferred_schema.table_to_split_8981001 | 16 | 2 -(1 row) +(2 rows) ``` Replication slots sometimes cannot be deleted right away. Which is hard to resolve, but luckily we can filter these cleanup records out easily by filtering by policy_type. While debugging this issue I learnt that we did not use `GetNextCleanupRecordId` in all places where we created cleanup records. This caused test failures when running tests multiple times, when they set `citus.next_cleanup_record_id`. I tried fixing that by calling GetNextCleanupRecordId in all places but that caused many other tests to fail due to deadlocks. So, instead this adresses that issue by using `ALTER SEQUENCE ... RESTART` instead of `citus.next_cleanup_record_id`. In a follow up PR we should probably get rid of `citus.next_cleanup_record_id`, since it's only used in one other file.naisila/remove_misleading_constant
parent
7c0589abb8
commit
dcee370270
|
@ -28,7 +28,7 @@ SET citus.next_placement_id TO 8610000;
|
||||||
SET citus.shard_count TO 2;
|
SET citus.shard_count TO 2;
|
||||||
SET citus.shard_replication_factor TO 1;
|
SET citus.shard_replication_factor TO 1;
|
||||||
SET citus.next_operation_id TO 777;
|
SET citus.next_operation_id TO 777;
|
||||||
SET citus.next_cleanup_record_id TO 511;
|
ALTER SEQUENCE pg_catalog.pg_dist_cleanup_recordid_seq RESTART 511;
|
||||||
SET ROLE test_split_role;
|
SET ROLE test_split_role;
|
||||||
SET search_path TO "citus_split_shard_by_split_points_deferred_schema";
|
SET search_path TO "citus_split_shard_by_split_points_deferred_schema";
|
||||||
CREATE TABLE table_to_split(id int PRIMARY KEY, int_data int, data text);
|
CREATE TABLE table_to_split(id int PRIMARY KEY, int_data int, data text);
|
||||||
|
@ -64,10 +64,10 @@ SELECT pg_catalog.citus_split_shard_by_split_points(
|
||||||
|
|
||||||
-- The original shard is marked for deferred drop with policy_type = 2.
|
-- The original shard is marked for deferred drop with policy_type = 2.
|
||||||
-- The previous shard should be dropped at the beginning of the second split call
|
-- The previous shard should be dropped at the beginning of the second split call
|
||||||
SELECT * from pg_dist_cleanup;
|
SELECT * FROM pg_dist_cleanup WHERE policy_type = 2;
|
||||||
record_id | operation_id | object_type | object_name | node_group_id | policy_type
|
record_id | operation_id | object_type | object_name | node_group_id | policy_type
|
||||||
---------------------------------------------------------------------
|
---------------------------------------------------------------------
|
||||||
512 | 778 | 1 | citus_split_shard_by_split_points_deferred_schema.table_to_split_8981001 | 16 | 2
|
526 | 778 | 1 | citus_split_shard_by_split_points_deferred_schema.table_to_split_8981001 | 16 | 2
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
-- One of the physical shards should not be deleted, the other one should.
|
-- One of the physical shards should not be deleted, the other one should.
|
||||||
|
@ -90,8 +90,12 @@ SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind
|
||||||
|
|
||||||
-- Perform deferred drop cleanup.
|
-- Perform deferred drop cleanup.
|
||||||
\c - postgres - :master_port
|
\c - postgres - :master_port
|
||||||
CALL citus_cleanup_orphaned_resources();
|
SELECT public.wait_for_resource_cleanup();
|
||||||
NOTICE: cleaned up 1 orphaned resources
|
wait_for_resource_cleanup
|
||||||
|
---------------------------------------------------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
-- Clenaup has been done.
|
-- Clenaup has been done.
|
||||||
SELECT * from pg_dist_cleanup;
|
SELECT * from pg_dist_cleanup;
|
||||||
record_id | operation_id | object_type | object_name | node_group_id | policy_type
|
record_id | operation_id | object_type | object_name | node_group_id | policy_type
|
||||||
|
|
|
@ -24,7 +24,7 @@ SET citus.next_placement_id TO 8610000;
|
||||||
SET citus.shard_count TO 2;
|
SET citus.shard_count TO 2;
|
||||||
SET citus.shard_replication_factor TO 1;
|
SET citus.shard_replication_factor TO 1;
|
||||||
SET citus.next_operation_id TO 777;
|
SET citus.next_operation_id TO 777;
|
||||||
SET citus.next_cleanup_record_id TO 511;
|
ALTER SEQUENCE pg_catalog.pg_dist_cleanup_recordid_seq RESTART 511;
|
||||||
SET ROLE test_split_role;
|
SET ROLE test_split_role;
|
||||||
SET search_path TO "citus_split_shard_by_split_points_deferred_schema";
|
SET search_path TO "citus_split_shard_by_split_points_deferred_schema";
|
||||||
|
|
||||||
|
@ -51,7 +51,7 @@ SELECT pg_catalog.citus_split_shard_by_split_points(
|
||||||
|
|
||||||
-- The original shard is marked for deferred drop with policy_type = 2.
|
-- The original shard is marked for deferred drop with policy_type = 2.
|
||||||
-- The previous shard should be dropped at the beginning of the second split call
|
-- The previous shard should be dropped at the beginning of the second split call
|
||||||
SELECT * from pg_dist_cleanup;
|
SELECT * FROM pg_dist_cleanup WHERE policy_type = 2;
|
||||||
|
|
||||||
-- One of the physical shards should not be deleted, the other one should.
|
-- One of the physical shards should not be deleted, the other one should.
|
||||||
\c - - - :worker_1_port
|
\c - - - :worker_1_port
|
||||||
|
@ -62,7 +62,7 @@ SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind
|
||||||
|
|
||||||
-- Perform deferred drop cleanup.
|
-- Perform deferred drop cleanup.
|
||||||
\c - postgres - :master_port
|
\c - postgres - :master_port
|
||||||
CALL citus_cleanup_orphaned_resources();
|
SELECT public.wait_for_resource_cleanup();
|
||||||
|
|
||||||
-- Clenaup has been done.
|
-- Clenaup has been done.
|
||||||
SELECT * from pg_dist_cleanup;
|
SELECT * from pg_dist_cleanup;
|
||||||
|
|
Loading…
Reference in New Issue