From 15f5796eee783e7871d3c211e952d03a654fc2a5 Mon Sep 17 00:00:00 2001 From: Hanefi Onaldi Date: Fri, 5 May 2023 12:07:46 +0300 Subject: [PATCH] Fix flaky background rebalance parallel test (#6893) A test in background_rebalance_parallel.sql was failing intermittently where the order of tasks in the output was not deterministic. This commit fixes the test by removing id columns for the background tasks in the output. A sample failing diff before this patch is below: ```diff SELECT D.task_id, (SELECT T.command FROM pg_dist_background_task T WHERE T.task_id = D.task_id), D.depends_on, (SELECT T.command FROM pg_dist_background_task T WHERE T.task_id = D.depends_on) FROM pg_dist_background_task_depend D WHERE job_id in (:job_id) ORDER BY D.task_id, D.depends_on ASC; task_id | command | depends_on | command ---------+---------------------------------------------------------------------+------------+--------------------------------------------------------------------- - 1014 | SELECT pg_catalog.citus_move_shard_placement(85674026,50,57,'auto') | 1013 | SELECT pg_catalog.citus_move_shard_placement(85674025,50,56,'auto') - 1016 | SELECT pg_catalog.citus_move_shard_placement(85674032,50,57,'auto') | 1015 | SELECT pg_catalog.citus_move_shard_placement(85674031,50,56,'auto') - 1018 | SELECT pg_catalog.citus_move_shard_placement(85674038,50,57,'auto') | 1017 | SELECT pg_catalog.citus_move_shard_placement(85674037,50,56,'auto') - 1020 | SELECT pg_catalog.citus_move_shard_placement(85674044,50,57,'auto') | 1019 | SELECT pg_catalog.citus_move_shard_placement(85674043,50,56,'auto') + 1014 | SELECT pg_catalog.citus_move_shard_placement(85674038,50,57,'auto') | 1013 | SELECT pg_catalog.citus_move_shard_placement(85674037,50,56,'auto') + 1016 | SELECT pg_catalog.citus_move_shard_placement(85674044,50,57,'auto') | 1015 | SELECT pg_catalog.citus_move_shard_placement(85674043,50,56,'auto') + 1018 | SELECT pg_catalog.citus_move_shard_placement(85674026,50,57,'auto') | 1017 | SELECT pg_catalog.citus_move_shard_placement(85674025,50,56,'auto') + 1020 | SELECT pg_catalog.citus_move_shard_placement(85674032,50,57,'auto') | 1019 | SELECT pg_catalog.citus_move_shard_placement(85674031,50,56,'auto') (4 rows) ``` Notice that the dependent and dependee tasks have some commands, but they have different task ids. (cherry picked from commit 3217e3f1817b63df68ae79f0bf80448b4074e9fc) --- .../background_rebalance_parallel.out | 30 ++++++++++++------- .../sql/background_rebalance_parallel.sql | 13 ++++---- 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/src/test/regress/expected/background_rebalance_parallel.out b/src/test/regress/expected/background_rebalance_parallel.out index 9c43fab9b..187f709e4 100644 --- a/src/test/regress/expected/background_rebalance_parallel.out +++ b/src/test/regress/expected/background_rebalance_parallel.out @@ -466,17 +466,27 @@ SELECT citus_rebalance_start AS job_id from citus_rebalance_start() \gset -- see dependent tasks to understand which tasks remain runnable because of -- citus.max_background_task_executors_per_node -- and which tasks are actually blocked from colocation group dependencies -SELECT D.task_id, - (SELECT T.command FROM pg_dist_background_task T WHERE T.task_id = D.task_id), - D.depends_on, - (SELECT T.command FROM pg_dist_background_task T WHERE T.task_id = D.depends_on) -FROM pg_dist_background_task_depend D WHERE job_id in (:job_id) ORDER BY D.task_id, D.depends_on ASC; - task_id | command | depends_on | command +SELECT (SELECT T.command FROM pg_dist_background_task T WHERE T.task_id = D.task_id), + (SELECT T.command depends_on_command FROM pg_dist_background_task T WHERE T.task_id = D.depends_on) +FROM pg_dist_background_task_depend D WHERE job_id in (:job_id) ORDER BY 1, 2 ASC; + command | depends_on_command --------------------------------------------------------------------- - 1014 | SELECT pg_catalog.citus_move_shard_placement(85674026,50,57,'auto') | 1013 | SELECT pg_catalog.citus_move_shard_placement(85674025,50,56,'auto') - 1016 | SELECT pg_catalog.citus_move_shard_placement(85674032,50,57,'auto') | 1015 | SELECT pg_catalog.citus_move_shard_placement(85674031,50,56,'auto') - 1018 | SELECT pg_catalog.citus_move_shard_placement(85674038,50,57,'auto') | 1017 | SELECT pg_catalog.citus_move_shard_placement(85674037,50,56,'auto') - 1020 | SELECT pg_catalog.citus_move_shard_placement(85674044,50,57,'auto') | 1019 | SELECT pg_catalog.citus_move_shard_placement(85674043,50,56,'auto') + SELECT pg_catalog.citus_move_shard_placement(85674026,50,57,'auto') | SELECT pg_catalog.citus_move_shard_placement(85674025,50,56,'auto') + SELECT pg_catalog.citus_move_shard_placement(85674032,50,57,'auto') | SELECT pg_catalog.citus_move_shard_placement(85674031,50,56,'auto') + SELECT pg_catalog.citus_move_shard_placement(85674038,50,57,'auto') | SELECT pg_catalog.citus_move_shard_placement(85674037,50,56,'auto') + SELECT pg_catalog.citus_move_shard_placement(85674044,50,57,'auto') | SELECT pg_catalog.citus_move_shard_placement(85674043,50,56,'auto') +(4 rows) + +SELECT task_id, depends_on +FROM pg_dist_background_task_depend +WHERE job_id in (:job_id) +ORDER BY 1, 2 ASC; + task_id | depends_on +--------------------------------------------------------------------- + 1014 | 1013 + 1016 | 1015 + 1018 | 1017 + 1020 | 1019 (4 rows) -- default citus.max_background_task_executors_per_node is 1 diff --git a/src/test/regress/sql/background_rebalance_parallel.sql b/src/test/regress/sql/background_rebalance_parallel.sql index 5229e7f88..e55fd93bb 100644 --- a/src/test/regress/sql/background_rebalance_parallel.sql +++ b/src/test/regress/sql/background_rebalance_parallel.sql @@ -204,11 +204,14 @@ SELECT citus_rebalance_start AS job_id from citus_rebalance_start() \gset -- see dependent tasks to understand which tasks remain runnable because of -- citus.max_background_task_executors_per_node -- and which tasks are actually blocked from colocation group dependencies -SELECT D.task_id, - (SELECT T.command FROM pg_dist_background_task T WHERE T.task_id = D.task_id), - D.depends_on, - (SELECT T.command FROM pg_dist_background_task T WHERE T.task_id = D.depends_on) -FROM pg_dist_background_task_depend D WHERE job_id in (:job_id) ORDER BY D.task_id, D.depends_on ASC; +SELECT (SELECT T.command FROM pg_dist_background_task T WHERE T.task_id = D.task_id), + (SELECT T.command depends_on_command FROM pg_dist_background_task T WHERE T.task_id = D.depends_on) +FROM pg_dist_background_task_depend D WHERE job_id in (:job_id) ORDER BY 1, 2 ASC; + +SELECT task_id, depends_on +FROM pg_dist_background_task_depend +WHERE job_id in (:job_id) +ORDER BY 1, 2 ASC; -- default citus.max_background_task_executors_per_node is 1 -- show that first exactly one task per node is running