From d708f13b5c89be2b934eb5292cfa0da6188d5dfb Mon Sep 17 00:00:00 2001 From: Jelte Fennema-Nio Date: Wed, 1 Nov 2023 15:08:51 +0100 Subject: [PATCH] Fix flaky failure_split_cleanup (#7299) Sometimes failure_split_cleanup failed in CI like this: ```diff ERROR: server closed the connection unexpectedly CONTEXT: while executing command on localhost:9060 SELECT operation_id, object_type, object_name, node_group_id, policy_type FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; operation_id | object_type | object_name | node_group_id | policy_type --------------+-------------+-----------------------------------------------------------+---------------+------------- 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981000 | 1 | 0 - 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 1 | 1 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 2 | 0 + 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 1 | 1 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981003 | 2 | 1 777 | 4 | citus_shard_split_publication_1_10_777 | 2 | 0 (5 rows) -- we need to allow connection so that we can connect to proxy ``` Source: https://github.com/citusdata/citus/actions/runs/6717642291/attempts/1#summary-18256014949 It's the common problem where we're missing a column in the ORDER BY clause. This fixes that by adding an node_group_id to the query in question. --- .../regress/expected/failure_split_cleanup.out | 18 +++++++++--------- src/test/regress/sql/failure_split_cleanup.sql | 16 ++++++++-------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/test/regress/expected/failure_split_cleanup.out b/src/test/regress/expected/failure_split_cleanup.out index fe646587c..d81335325 100644 --- a/src/test/regress/expected/failure_split_cleanup.out +++ b/src/test/regress/expected/failure_split_cleanup.out @@ -277,12 +277,12 @@ CONTEXT: while executing command on localhost:xxxxx ERROR: connection not open CONTEXT: while executing command on localhost:xxxxx SELECT operation_id, object_type, object_name, node_group_id, policy_type - FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; + FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id; operation_id | object_type | object_name | node_group_id | policy_type --------------------------------------------------------------------- 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981000 | 1 | 0 - 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 2 | 0 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 1 | 1 + 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 2 | 0 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981003 | 2 | 1 777 | 4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0 777 | 4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0 @@ -336,7 +336,7 @@ CONTEXT: while executing command on localhost:xxxxx (1 row) SELECT operation_id, object_type, object_name, node_group_id, policy_type - FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; + FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id; operation_id | object_type | object_name | node_group_id | policy_type --------------------------------------------------------------------- (0 rows) @@ -388,7 +388,7 @@ CONTEXT: while executing command on localhost:xxxxx ERROR: connection not open CONTEXT: while executing command on localhost:xxxxx SELECT operation_id, object_type, object_name, node_group_id, policy_type - FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; + FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id; operation_id | object_type | object_name | node_group_id | policy_type --------------------------------------------------------------------- 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981000 | 1 | 0 @@ -455,7 +455,7 @@ CONTEXT: while executing command on localhost:xxxxx (1 row) SELECT operation_id, object_type, object_name, node_group_id, policy_type - FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; + FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id; operation_id | object_type | object_name | node_group_id | policy_type --------------------------------------------------------------------- (0 rows) @@ -507,7 +507,7 @@ CONTEXT: while executing command on localhost:xxxxx ERROR: connection not open CONTEXT: while executing command on localhost:xxxxx SELECT operation_id, object_type, object_name, node_group_id, policy_type - FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; + FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id; operation_id | object_type | object_name | node_group_id | policy_type --------------------------------------------------------------------- 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981000 | 1 | 0 @@ -574,7 +574,7 @@ CONTEXT: while executing command on localhost:xxxxx (1 row) SELECT operation_id, object_type, object_name, node_group_id, policy_type - FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; + FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id; operation_id | object_type | object_name | node_group_id | policy_type --------------------------------------------------------------------- (0 rows) @@ -634,7 +634,7 @@ WARNING: connection to the remote node localhost:xxxxx failed with the followin ERROR: connection not open CONTEXT: while executing command on localhost:xxxxx SELECT operation_id, object_type, object_name, node_group_id, policy_type - FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; + FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id; operation_id | object_type | object_name | node_group_id | policy_type --------------------------------------------------------------------- 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 1 | 1 @@ -701,7 +701,7 @@ CONTEXT: while executing command on localhost:xxxxx (1 row) SELECT operation_id, object_type, object_name, node_group_id, policy_type - FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; + FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id; operation_id | object_type | object_name | node_group_id | policy_type --------------------------------------------------------------------- (0 rows) diff --git a/src/test/regress/sql/failure_split_cleanup.sql b/src/test/regress/sql/failure_split_cleanup.sql index 1b85d3d17..9dfbb245e 100644 --- a/src/test/regress/sql/failure_split_cleanup.sql +++ b/src/test/regress/sql/failure_split_cleanup.sql @@ -136,7 +136,7 @@ SELECT create_distributed_table('table_to_split', 'id'); ARRAY[:worker_1_node, :worker_2_node], 'force_logical'); SELECT operation_id, object_type, object_name, node_group_id, policy_type - FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; + FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id; -- we need to allow connection so that we can connect to proxy SELECT citus.mitmproxy('conn.allow()'); @@ -155,7 +155,7 @@ SELECT create_distributed_table('table_to_split', 'id'); \c - postgres - :master_port SELECT public.wait_for_resource_cleanup(); SELECT operation_id, object_type, object_name, node_group_id, policy_type - FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; + FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id; \c - - - :worker_2_proxy_port SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; @@ -182,7 +182,7 @@ SELECT create_distributed_table('table_to_split', 'id'); ARRAY[:worker_1_node, :worker_2_node], 'force_logical'); SELECT operation_id, object_type, object_name, node_group_id, policy_type - FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; + FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id; -- we need to allow connection so that we can connect to proxy SELECT citus.mitmproxy('conn.allow()'); @@ -201,7 +201,7 @@ SELECT create_distributed_table('table_to_split', 'id'); \c - postgres - :master_port SELECT public.wait_for_resource_cleanup(); SELECT operation_id, object_type, object_name, node_group_id, policy_type - FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; + FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id; \c - - - :worker_2_proxy_port SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; @@ -228,7 +228,7 @@ SELECT create_distributed_table('table_to_split', 'id'); ARRAY[:worker_1_node, :worker_2_node], 'force_logical'); SELECT operation_id, object_type, object_name, node_group_id, policy_type - FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; + FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id; -- we need to allow connection so that we can connect to proxy SELECT citus.mitmproxy('conn.allow()'); @@ -247,7 +247,7 @@ SELECT create_distributed_table('table_to_split', 'id'); \c - postgres - :master_port SELECT public.wait_for_resource_cleanup(); SELECT operation_id, object_type, object_name, node_group_id, policy_type - FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; + FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id; \c - - - :worker_2_proxy_port SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; @@ -275,7 +275,7 @@ SELECT create_distributed_table('table_to_split', 'id'); 'force_logical'); SELECT operation_id, object_type, object_name, node_group_id, policy_type - FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; + FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id; SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname; -- we need to allow connection so that we can connect to proxy SELECT citus.mitmproxy('conn.allow()'); @@ -295,7 +295,7 @@ SELECT create_distributed_table('table_to_split', 'id'); \c - postgres - :master_port SELECT public.wait_for_resource_cleanup(); SELECT operation_id, object_type, object_name, node_group_id, policy_type - FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; + FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id; \c - - - :worker_2_proxy_port SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog;