Fix flaky background rebalance parallel test (#6893)

A test in background_rebalance_parallel.sql was failing intermittently
where the order of tasks in the output was not deterministic. This
commit fixes the test by removing id columns for the background tasks in
the output.

A sample failing diff before this patch is below:

```diff
 SELECT D.task_id,
        (SELECT T.command FROM pg_dist_background_task T WHERE T.task_id = D.task_id),
        D.depends_on,
        (SELECT T.command FROM pg_dist_background_task T WHERE T.task_id = D.depends_on)
 FROM pg_dist_background_task_depend D  WHERE job_id in (:job_id) ORDER BY D.task_id, D.depends_on ASC;
  task_id |                               command                               | depends_on |                               command
 ---------+---------------------------------------------------------------------+------------+---------------------------------------------------------------------
-    1014 | SELECT pg_catalog.citus_move_shard_placement(85674026,50,57,'auto') |       1013 | SELECT pg_catalog.citus_move_shard_placement(85674025,50,56,'auto')
-    1016 | SELECT pg_catalog.citus_move_shard_placement(85674032,50,57,'auto') |       1015 | SELECT pg_catalog.citus_move_shard_placement(85674031,50,56,'auto')
-    1018 | SELECT pg_catalog.citus_move_shard_placement(85674038,50,57,'auto') |       1017 | SELECT pg_catalog.citus_move_shard_placement(85674037,50,56,'auto')
-    1020 | SELECT pg_catalog.citus_move_shard_placement(85674044,50,57,'auto') |       1019 | SELECT pg_catalog.citus_move_shard_placement(85674043,50,56,'auto')
+    1014 | SELECT pg_catalog.citus_move_shard_placement(85674038,50,57,'auto') |       1013 | SELECT pg_catalog.citus_move_shard_placement(85674037,50,56,'auto')
+    1016 | SELECT pg_catalog.citus_move_shard_placement(85674044,50,57,'auto') |       1015 | SELECT pg_catalog.citus_move_shard_placement(85674043,50,56,'auto')
+    1018 | SELECT pg_catalog.citus_move_shard_placement(85674026,50,57,'auto') |       1017 | SELECT pg_catalog.citus_move_shard_placement(85674025,50,56,'auto')
+    1020 | SELECT pg_catalog.citus_move_shard_placement(85674032,50,57,'auto') |       1019 | SELECT pg_catalog.citus_move_shard_placement(85674031,50,56,'auto')
 (4 rows)
```

Notice that the dependent and dependee tasks have some commands, but
they have different task ids.
pull/6910/head
Hanefi Onaldi 2023-05-05 12:07:46 +03:00 committed by GitHub
parent b58665773b
commit 3217e3f181
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 28 additions and 15 deletions

View File

@ -466,17 +466,27 @@ SELECT citus_rebalance_start AS job_id from citus_rebalance_start() \gset
-- see dependent tasks to understand which tasks remain runnable because of
-- citus.max_background_task_executors_per_node
-- and which tasks are actually blocked from colocation group dependencies
SELECT D.task_id,
(SELECT T.command FROM pg_dist_background_task T WHERE T.task_id = D.task_id),
D.depends_on,
(SELECT T.command FROM pg_dist_background_task T WHERE T.task_id = D.depends_on)
FROM pg_dist_background_task_depend D WHERE job_id in (:job_id) ORDER BY D.task_id, D.depends_on ASC;
task_id | command | depends_on | command
SELECT (SELECT T.command FROM pg_dist_background_task T WHERE T.task_id = D.task_id),
(SELECT T.command depends_on_command FROM pg_dist_background_task T WHERE T.task_id = D.depends_on)
FROM pg_dist_background_task_depend D WHERE job_id in (:job_id) ORDER BY 1, 2 ASC;
command | depends_on_command
---------------------------------------------------------------------
1014 | SELECT pg_catalog.citus_move_shard_placement(85674026,50,57,'auto') | 1013 | SELECT pg_catalog.citus_move_shard_placement(85674025,50,56,'auto')
1016 | SELECT pg_catalog.citus_move_shard_placement(85674032,50,57,'auto') | 1015 | SELECT pg_catalog.citus_move_shard_placement(85674031,50,56,'auto')
1018 | SELECT pg_catalog.citus_move_shard_placement(85674038,50,57,'auto') | 1017 | SELECT pg_catalog.citus_move_shard_placement(85674037,50,56,'auto')
1020 | SELECT pg_catalog.citus_move_shard_placement(85674044,50,57,'auto') | 1019 | SELECT pg_catalog.citus_move_shard_placement(85674043,50,56,'auto')
SELECT pg_catalog.citus_move_shard_placement(85674026,50,57,'auto') | SELECT pg_catalog.citus_move_shard_placement(85674025,50,56,'auto')
SELECT pg_catalog.citus_move_shard_placement(85674032,50,57,'auto') | SELECT pg_catalog.citus_move_shard_placement(85674031,50,56,'auto')
SELECT pg_catalog.citus_move_shard_placement(85674038,50,57,'auto') | SELECT pg_catalog.citus_move_shard_placement(85674037,50,56,'auto')
SELECT pg_catalog.citus_move_shard_placement(85674044,50,57,'auto') | SELECT pg_catalog.citus_move_shard_placement(85674043,50,56,'auto')
(4 rows)
SELECT task_id, depends_on
FROM pg_dist_background_task_depend
WHERE job_id in (:job_id)
ORDER BY 1, 2 ASC;
task_id | depends_on
---------------------------------------------------------------------
1014 | 1013
1016 | 1015
1018 | 1017
1020 | 1019
(4 rows)
-- default citus.max_background_task_executors_per_node is 1

View File

@ -204,11 +204,14 @@ SELECT citus_rebalance_start AS job_id from citus_rebalance_start() \gset
-- see dependent tasks to understand which tasks remain runnable because of
-- citus.max_background_task_executors_per_node
-- and which tasks are actually blocked from colocation group dependencies
SELECT D.task_id,
(SELECT T.command FROM pg_dist_background_task T WHERE T.task_id = D.task_id),
D.depends_on,
(SELECT T.command FROM pg_dist_background_task T WHERE T.task_id = D.depends_on)
FROM pg_dist_background_task_depend D WHERE job_id in (:job_id) ORDER BY D.task_id, D.depends_on ASC;
SELECT (SELECT T.command FROM pg_dist_background_task T WHERE T.task_id = D.task_id),
(SELECT T.command depends_on_command FROM pg_dist_background_task T WHERE T.task_id = D.depends_on)
FROM pg_dist_background_task_depend D WHERE job_id in (:job_id) ORDER BY 1, 2 ASC;
SELECT task_id, depends_on
FROM pg_dist_background_task_depend
WHERE job_id in (:job_id)
ORDER BY 1, 2 ASC;
-- default citus.max_background_task_executors_per_node is 1
-- show that first exactly one task per node is running