From 34c5c9a97d28e15a1117d062ca423d05c6355a9b Mon Sep 17 00:00:00 2001 From: Naisila Puka <37271756+naisila@users.noreply.github.com> Date: Fri, 5 May 2023 16:47:01 +0300 Subject: [PATCH] Fixes flakiness in background_rebalance_parallel test (#6910) Fixes the following flaky outputs by decreasing citus_task_wait loop interval, and changing the order of wait commands. https://app.circleci.com/pipelines/github/citusdata/citus/32102/workflows/19958297-6c7e-49ef-9bc2-8efe8aacb96f/jobs/1089589 ``` diff SELECT job_id, task_id, status, nodes_involved FROM pg_dist_background_task WHERE job_id in (:job_id) ORDER BY task_id; job_id | task_id | status | nodes_involved --------+---------+----------+---------------- 17779 | 1013 | done | {50,56} 17779 | 1014 | running | {50,57} - 17779 | 1015 | running | {50,56} - 17779 | 1016 | blocked | {50,57} + 17779 | 1015 | done | {50,56} + 17779 | 1016 | running | {50,57} 17779 | 1017 | runnable | {50,56} 17779 | 1018 | blocked | {50,57} 17779 | 1019 | runnable | {50,56} 17779 | 1020 | blocked | {50,57} (8 rows) ``` https://github.com/citusdata/citus/pull/6893#issuecomment-1525661408 ```diff SELECT job_id, task_id, status, nodes_involved FROM pg_dist_background_task WHERE job_id in (:job_id) ORDER BY task_id; job_id | task_id | status | nodes_involved --------+---------+----------+---------------- 17779 | 1013 | done | {50,56} - 17779 | 1014 | running | {50,57} + 17779 | 1014 | runnable | {50,57} 17779 | 1015 | running | {50,56} 17779 | 1016 | blocked | {50,57} 17779 | 1017 | runnable | {50,56} 17779 | 1018 | blocked | {50,57} 17779 | 1019 | runnable | {50,56} 17779 | 1020 | blocked | {50,57} (8 rows) ``` (cherry picked from commit 905fd46410544c98aac2be92d188c52cc0bb2802) --- src/backend/distributed/utils/background_jobs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/distributed/utils/background_jobs.c b/src/backend/distributed/utils/background_jobs.c index d7a5a31bd..28638f30e 100644 --- a/src/backend/distributed/utils/background_jobs.c +++ b/src/backend/distributed/utils/background_jobs.c @@ -389,7 +389,7 @@ citus_task_wait_internal(int64 taskid, BackgroundTaskStatus *desiredStatus) /* sleep for a while, before rechecking the task status */ CHECK_FOR_INTERRUPTS(); - const long delay_ms = 1000; + const long delay_ms = 100; (void) WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, delay_ms,