mirror of https://github.com/citusdata/citus.git
Improve rebalance reporting for retried tasks (#6683)
If there is a problem with an ongoing rebalance, we did not show details on background tasks that are stuck in runnable state. Similar to how we show details for errored tasks, we now show details on tasks that are being retried. Earlier we showed the following output when a task was stuck: ``` ┌────────────────────────────┐ │ { ↵│ │ "tasks": [ ↵│ │ ], ↵│ │ "task_state_counts": {↵│ │ "done": 13, ↵│ │ "blocked": 2, ↵│ │ "runnable": 1 ↵│ │ } ↵│ │ } │ └────────────────────────────┘ ``` Now we show details like the following: ``` +----------------------------------------------------------------------- | { | "tasks": [ | { | "state": "runnable", | "command": "SELECT pg_catalog.citus_move_shard_placement(1 | "message": "ERROR: Moving shards to a node that shouldn't | "retried": 2, | "task_id": 3 | } | ], | "task_state_counts": { | "blocked": 1, | "runnable": 1 | } | } +----------------------------------------------------------------------- ```pull/6684/head
parent
14c31fbb07
commit
47ff03123b
|
@ -74,7 +74,7 @@ CREATE OR REPLACE FUNCTION pg_catalog.citus_job_status (
|
|||
WHERE j.job_id = $1
|
||||
AND t.status = 'running'
|
||||
),
|
||||
errored_task_details AS (
|
||||
errored_or_retried_task_details AS (
|
||||
SELECT jsonb_agg(jsonb_build_object(
|
||||
'state', t.status,
|
||||
'retried', coalesce(t.retry_count,0),
|
||||
|
@ -85,7 +85,7 @@ CREATE OR REPLACE FUNCTION pg_catalog.citus_job_status (
|
|||
pg_dist_background_task t JOIN pg_dist_background_job j ON t.job_id = j.job_id
|
||||
WHERE j.job_id = $1
|
||||
AND NOT EXISTS (SELECT 1 FROM rp WHERE rp.sessionid = t.pid)
|
||||
AND t.status = 'error'
|
||||
AND (t.status = 'error' OR (t.status = 'runnable' AND t.retry_count > 0))
|
||||
)
|
||||
SELECT
|
||||
job_id,
|
||||
|
@ -97,7 +97,7 @@ CREATE OR REPLACE FUNCTION pg_catalog.citus_job_status (
|
|||
jsonb_build_object(
|
||||
'task_state_counts', (SELECT jsonb_object_agg(status, count) FROM task_state_occurence_counts),
|
||||
'tasks', (COALESCE((SELECT tasks FROM running_task_details),'[]'::jsonb) ||
|
||||
COALESCE((SELECT tasks FROM errored_task_details),'[]'::jsonb))) AS details
|
||||
COALESCE((SELECT tasks FROM errored_or_retried_task_details),'[]'::jsonb))) AS details
|
||||
FROM pg_dist_background_job j
|
||||
WHERE j.job_id = $1
|
||||
$fn$;
|
||||
|
|
|
@ -74,7 +74,7 @@ CREATE OR REPLACE FUNCTION pg_catalog.citus_job_status (
|
|||
WHERE j.job_id = $1
|
||||
AND t.status = 'running'
|
||||
),
|
||||
errored_task_details AS (
|
||||
errored_or_retried_task_details AS (
|
||||
SELECT jsonb_agg(jsonb_build_object(
|
||||
'state', t.status,
|
||||
'retried', coalesce(t.retry_count,0),
|
||||
|
@ -85,7 +85,7 @@ CREATE OR REPLACE FUNCTION pg_catalog.citus_job_status (
|
|||
pg_dist_background_task t JOIN pg_dist_background_job j ON t.job_id = j.job_id
|
||||
WHERE j.job_id = $1
|
||||
AND NOT EXISTS (SELECT 1 FROM rp WHERE rp.sessionid = t.pid)
|
||||
AND t.status = 'error'
|
||||
AND (t.status = 'error' OR (t.status = 'runnable' AND t.retry_count > 0))
|
||||
)
|
||||
SELECT
|
||||
job_id,
|
||||
|
@ -97,7 +97,7 @@ CREATE OR REPLACE FUNCTION pg_catalog.citus_job_status (
|
|||
jsonb_build_object(
|
||||
'task_state_counts', (SELECT jsonb_object_agg(status, count) FROM task_state_occurence_counts),
|
||||
'tasks', (COALESCE((SELECT tasks FROM running_task_details),'[]'::jsonb) ||
|
||||
COALESCE((SELECT tasks FROM errored_task_details),'[]'::jsonb))) AS details
|
||||
COALESCE((SELECT tasks FROM errored_or_retried_task_details),'[]'::jsonb))) AS details
|
||||
FROM pg_dist_background_job j
|
||||
WHERE j.job_id = $1
|
||||
$fn$;
|
||||
|
|
Loading…
Reference in New Issue