Improve rebalance reporting for retried tasks (#6683)

If there is a problem with an ongoing rebalance, we did not show details
on background tasks that are stuck in runnable state. Similar to how we
show details for errored tasks, we now show details on tasks that are
being retried.

Earlier we showed the following output when a task was stuck:

```
┌────────────────────────────┐
│ {                         ↵│
│     "tasks": [            ↵│
│     ],                    ↵│
│     "task_state_counts": {↵│
│         "done": 13,       ↵│
│         "blocked": 2,     ↵│
│         "runnable": 1     ↵│
│     }                     ↵│
│ }                          │
└────────────────────────────┘
```

Now we show details like the following:

```
+-----------------------------------------------------------------------
| {
|     "tasks": [
|         {
|             "state": "runnable",
|             "command": "SELECT pg_catalog.citus_move_shard_placement(1
|             "message": "ERROR: Moving shards to a node that shouldn't
|             "retried": 2,
|             "task_id": 3
|         }
|     ],
|     "task_state_counts": {
|         "blocked": 1,
|         "runnable": 1
|     }
| }
+-----------------------------------------------------------------------
```
pull/6684/head
Hanefi Onaldi 2023-01-31 15:26:52 +03:00 committed by GitHub
parent 14c31fbb07
commit 47ff03123b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 6 additions and 6 deletions

View File

@ -74,7 +74,7 @@ CREATE OR REPLACE FUNCTION pg_catalog.citus_job_status (
WHERE j.job_id = $1 WHERE j.job_id = $1
AND t.status = 'running' AND t.status = 'running'
), ),
errored_task_details AS ( errored_or_retried_task_details AS (
SELECT jsonb_agg(jsonb_build_object( SELECT jsonb_agg(jsonb_build_object(
'state', t.status, 'state', t.status,
'retried', coalesce(t.retry_count,0), 'retried', coalesce(t.retry_count,0),
@ -85,7 +85,7 @@ CREATE OR REPLACE FUNCTION pg_catalog.citus_job_status (
pg_dist_background_task t JOIN pg_dist_background_job j ON t.job_id = j.job_id pg_dist_background_task t JOIN pg_dist_background_job j ON t.job_id = j.job_id
WHERE j.job_id = $1 WHERE j.job_id = $1
AND NOT EXISTS (SELECT 1 FROM rp WHERE rp.sessionid = t.pid) AND NOT EXISTS (SELECT 1 FROM rp WHERE rp.sessionid = t.pid)
AND t.status = 'error' AND (t.status = 'error' OR (t.status = 'runnable' AND t.retry_count > 0))
) )
SELECT SELECT
job_id, job_id,
@ -97,7 +97,7 @@ CREATE OR REPLACE FUNCTION pg_catalog.citus_job_status (
jsonb_build_object( jsonb_build_object(
'task_state_counts', (SELECT jsonb_object_agg(status, count) FROM task_state_occurence_counts), 'task_state_counts', (SELECT jsonb_object_agg(status, count) FROM task_state_occurence_counts),
'tasks', (COALESCE((SELECT tasks FROM running_task_details),'[]'::jsonb) || 'tasks', (COALESCE((SELECT tasks FROM running_task_details),'[]'::jsonb) ||
COALESCE((SELECT tasks FROM errored_task_details),'[]'::jsonb))) AS details COALESCE((SELECT tasks FROM errored_or_retried_task_details),'[]'::jsonb))) AS details
FROM pg_dist_background_job j FROM pg_dist_background_job j
WHERE j.job_id = $1 WHERE j.job_id = $1
$fn$; $fn$;

View File

@ -74,7 +74,7 @@ CREATE OR REPLACE FUNCTION pg_catalog.citus_job_status (
WHERE j.job_id = $1 WHERE j.job_id = $1
AND t.status = 'running' AND t.status = 'running'
), ),
errored_task_details AS ( errored_or_retried_task_details AS (
SELECT jsonb_agg(jsonb_build_object( SELECT jsonb_agg(jsonb_build_object(
'state', t.status, 'state', t.status,
'retried', coalesce(t.retry_count,0), 'retried', coalesce(t.retry_count,0),
@ -85,7 +85,7 @@ CREATE OR REPLACE FUNCTION pg_catalog.citus_job_status (
pg_dist_background_task t JOIN pg_dist_background_job j ON t.job_id = j.job_id pg_dist_background_task t JOIN pg_dist_background_job j ON t.job_id = j.job_id
WHERE j.job_id = $1 WHERE j.job_id = $1
AND NOT EXISTS (SELECT 1 FROM rp WHERE rp.sessionid = t.pid) AND NOT EXISTS (SELECT 1 FROM rp WHERE rp.sessionid = t.pid)
AND t.status = 'error' AND (t.status = 'error' OR (t.status = 'runnable' AND t.retry_count > 0))
) )
SELECT SELECT
job_id, job_id,
@ -97,7 +97,7 @@ CREATE OR REPLACE FUNCTION pg_catalog.citus_job_status (
jsonb_build_object( jsonb_build_object(
'task_state_counts', (SELECT jsonb_object_agg(status, count) FROM task_state_occurence_counts), 'task_state_counts', (SELECT jsonb_object_agg(status, count) FROM task_state_occurence_counts),
'tasks', (COALESCE((SELECT tasks FROM running_task_details),'[]'::jsonb) || 'tasks', (COALESCE((SELECT tasks FROM running_task_details),'[]'::jsonb) ||
COALESCE((SELECT tasks FROM errored_task_details),'[]'::jsonb))) AS details COALESCE((SELECT tasks FROM errored_or_retried_task_details),'[]'::jsonb))) AS details
FROM pg_dist_background_job j FROM pg_dist_background_job j
WHERE j.job_id = $1 WHERE j.job_id = $1
$fn$; $fn$;