mirror of https://github.com/citusdata/citus.git
Fixes flakiness in background_rebalance_parallel test (#6910)
Fixes the following flaky outputs by decreasing citus_task_wait loop interval, and changing the order of wait commands. https://app.circleci.com/pipelines/github/citusdata/citus/32102/workflows/19958297-6c7e-49ef-9bc2-8efe8aacb96f/jobs/1089589 ``` diff SELECT job_id, task_id, status, nodes_involved FROM pg_dist_background_task WHERE job_id in (:job_id) ORDER BY task_id; job_id | task_id | status | nodes_involved --------+---------+----------+---------------- 17779 | 1013 | done | {50,56} 17779 | 1014 | running | {50,57} - 17779 | 1015 | running | {50,56} - 17779 | 1016 | blocked | {50,57} + 17779 | 1015 | done | {50,56} + 17779 | 1016 | running | {50,57} 17779 | 1017 | runnable | {50,56} 17779 | 1018 | blocked | {50,57} 17779 | 1019 | runnable | {50,56} 17779 | 1020 | blocked | {50,57} (8 rows) ``` https://github.com/citusdata/citus/pull/6893#issuecomment-1525661408 ```diff SELECT job_id, task_id, status, nodes_involved FROM pg_dist_background_task WHERE job_id in (:job_id) ORDER BY task_id; job_id | task_id | status | nodes_involved --------+---------+----------+---------------- 17779 | 1013 | done | {50,56} - 17779 | 1014 | running | {50,57} + 17779 | 1014 | runnable | {50,57} 17779 | 1015 | running | {50,56} 17779 | 1016 | blocked | {50,57} 17779 | 1017 | runnable | {50,56} 17779 | 1018 | blocked | {50,57} 17779 | 1019 | runnable | {50,56} 17779 | 1020 | blocked | {50,57} (8 rows) ```pull/6903/head^2
parent
3217e3f181
commit
905fd46410
|
@ -395,7 +395,7 @@ citus_task_wait_internal(int64 taskid, BackgroundTaskStatus *desiredStatus)
|
|||
|
||||
/* sleep for a while, before rechecking the task status */
|
||||
CHECK_FOR_INTERRUPTS();
|
||||
const long delay_ms = 1000;
|
||||
const long delay_ms = 100;
|
||||
(void) WaitLatch(MyLatch,
|
||||
WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
|
||||
delay_ms,
|
||||
|
|
|
@ -513,6 +513,12 @@ FROM pg_dist_background_task WHERE job_id in (:job_id) ORDER BY task_id;
|
|||
(8 rows)
|
||||
|
||||
-- increase citus.max_background_task_executors_per_node
|
||||
SELECT citus_task_wait(1013, desired_status => 'done');
|
||||
citus_task_wait
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
ALTER SYSTEM SET citus.max_background_task_executors_per_node = 2;
|
||||
SELECT pg_reload_conf();
|
||||
pg_reload_conf
|
||||
|
@ -520,13 +526,13 @@ SELECT pg_reload_conf();
|
|||
t
|
||||
(1 row)
|
||||
|
||||
SELECT citus_task_wait(1015, desired_status => 'running');
|
||||
SELECT citus_task_wait(1014, desired_status => 'running');
|
||||
citus_task_wait
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT citus_task_wait(1013, desired_status => 'done');
|
||||
SELECT citus_task_wait(1015, desired_status => 'running');
|
||||
citus_task_wait
|
||||
---------------------------------------------------------------------
|
||||
|
||||
|
|
|
@ -221,10 +221,12 @@ SELECT job_id, task_id, status, nodes_involved
|
|||
FROM pg_dist_background_task WHERE job_id in (:job_id) ORDER BY task_id;
|
||||
|
||||
-- increase citus.max_background_task_executors_per_node
|
||||
SELECT citus_task_wait(1013, desired_status => 'done');
|
||||
ALTER SYSTEM SET citus.max_background_task_executors_per_node = 2;
|
||||
SELECT pg_reload_conf();
|
||||
|
||||
SELECT citus_task_wait(1014, desired_status => 'running');
|
||||
SELECT citus_task_wait(1015, desired_status => 'running');
|
||||
SELECT citus_task_wait(1013, desired_status => 'done');
|
||||
|
||||
-- show that at most 2 tasks per node are running
|
||||
-- among the tasks that are not blocked
|
||||
|
|
Loading…
Reference in New Issue