Fixes flakiness in background_rebalance_parallel test (#6910)

Fixes the following flaky outputs by decreasing citus_task_wait loop
interval, and changing the order of wait commands.

https://app.circleci.com/pipelines/github/citusdata/citus/32102/workflows/19958297-6c7e-49ef-9bc2-8efe8aacb96f/jobs/1089589

``` diff
SELECT job_id, task_id, status, nodes_involved
 FROM pg_dist_background_task WHERE job_id in (:job_id) ORDER BY task_id;
  job_id | task_id |  status  | nodes_involved 
 --------+---------+----------+----------------
   17779 |    1013 | done     | {50,56}
   17779 |    1014 | running  | {50,57}
-  17779 |    1015 | running  | {50,56}
-  17779 |    1016 | blocked  | {50,57}
+  17779 |    1015 | done     | {50,56}
+  17779 |    1016 | running  | {50,57}
   17779 |    1017 | runnable | {50,56}
   17779 |    1018 | blocked  | {50,57}
   17779 |    1019 | runnable | {50,56}
   17779 |    1020 | blocked  | {50,57}
 (8 rows)
```

https://github.com/citusdata/citus/pull/6893#issuecomment-1525661408
```diff
SELECT job_id, task_id, status, nodes_involved
 FROM pg_dist_background_task WHERE job_id in (:job_id) ORDER BY task_id;
  job_id | task_id |  status  | nodes_involved 
 --------+---------+----------+----------------
   17779 |    1013 | done     | {50,56}
-  17779 |    1014 | running  | {50,57}
+  17779 |    1014 | runnable | {50,57}
   17779 |    1015 | running  | {50,56}
   17779 |    1016 | blocked  | {50,57}
   17779 |    1017 | runnable | {50,56}
   17779 |    1018 | blocked  | {50,57}
   17779 |    1019 | runnable | {50,56}
   17779 |    1020 | blocked  | {50,57}
 (8 rows)
```
pull/6903/head^2
Naisila Puka 2023-05-05 16:47:01 +03:00 committed by GitHub
parent 3217e3f181
commit 905fd46410
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 12 additions and 4 deletions

View File

@ -395,7 +395,7 @@ citus_task_wait_internal(int64 taskid, BackgroundTaskStatus *desiredStatus)
/* sleep for a while, before rechecking the task status */
CHECK_FOR_INTERRUPTS();
const long delay_ms = 1000;
const long delay_ms = 100;
(void) WaitLatch(MyLatch,
WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
delay_ms,

View File

@ -513,6 +513,12 @@ FROM pg_dist_background_task WHERE job_id in (:job_id) ORDER BY task_id;
(8 rows)
-- increase citus.max_background_task_executors_per_node
SELECT citus_task_wait(1013, desired_status => 'done');
citus_task_wait
---------------------------------------------------------------------
(1 row)
ALTER SYSTEM SET citus.max_background_task_executors_per_node = 2;
SELECT pg_reload_conf();
pg_reload_conf
@ -520,13 +526,13 @@ SELECT pg_reload_conf();
t
(1 row)
SELECT citus_task_wait(1015, desired_status => 'running');
SELECT citus_task_wait(1014, desired_status => 'running');
citus_task_wait
---------------------------------------------------------------------
(1 row)
SELECT citus_task_wait(1013, desired_status => 'done');
SELECT citus_task_wait(1015, desired_status => 'running');
citus_task_wait
---------------------------------------------------------------------

View File

@ -221,10 +221,12 @@ SELECT job_id, task_id, status, nodes_involved
FROM pg_dist_background_task WHERE job_id in (:job_id) ORDER BY task_id;
-- increase citus.max_background_task_executors_per_node
SELECT citus_task_wait(1013, desired_status => 'done');
ALTER SYSTEM SET citus.max_background_task_executors_per_node = 2;
SELECT pg_reload_conf();
SELECT citus_task_wait(1014, desired_status => 'running');
SELECT citus_task_wait(1015, desired_status => 'running');
SELECT citus_task_wait(1013, desired_status => 'done');
-- show that at most 2 tasks per node are running
-- among the tasks that are not blocked