Fix flaky failure_split_cleanup (#7299)

Sometimes failure_split_cleanup failed in CI like this:

```diff
 ERROR:  server closed the connection unexpectedly
 CONTEXT:  while executing command on localhost:9060
     SELECT operation_id, object_type, object_name, node_group_id, policy_type
     FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
  operation_id | object_type |                        object_name                        | node_group_id | policy_type
 --------------+-------------+-----------------------------------------------------------+---------------+-------------
           777 |           1 | citus_failure_split_cleanup_schema.table_to_split_8981000 |             1 |           0
-          777 |           1 | citus_failure_split_cleanup_schema.table_to_split_8981002 |             1 |           1
           777 |           1 | citus_failure_split_cleanup_schema.table_to_split_8981002 |             2 |           0
+          777 |           1 | citus_failure_split_cleanup_schema.table_to_split_8981002 |             1 |           1
           777 |           1 | citus_failure_split_cleanup_schema.table_to_split_8981003 |             2 |           1
           777 |           4 | citus_shard_split_publication_1_10_777                    |             2 |           0
 (5 rows)

     -- we need to allow connection so that we can connect to proxy
```

Source:
https://github.com/citusdata/citus/actions/runs/6717642291/attempts/1#summary-18256014949

It's the common problem where we're missing a column in the ORDER BY
clause. This fixes that by adding an node_group_id to the query in
question.
pull/7301/head^2
Jelte Fennema-Nio 2023-11-01 15:08:51 +01:00 committed by GitHub
parent c83c556702
commit c9f2fc892d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 17 additions and 17 deletions

View File

@ -277,12 +277,12 @@ CONTEXT: while executing command on localhost:xxxxx
ERROR: connection not open
CONTEXT: while executing command on localhost:xxxxx
SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
operation_id | object_type | object_name | node_group_id | policy_type
---------------------------------------------------------------------
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981000 | 1 | 0
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 2 | 0
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 1 | 1
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 2 | 0
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981003 | 2 | 1
777 | 4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0
777 | 4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0
@ -336,7 +336,7 @@ CONTEXT: while executing command on localhost:xxxxx
(1 row)
SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
operation_id | object_type | object_name | node_group_id | policy_type
---------------------------------------------------------------------
(0 rows)
@ -388,7 +388,7 @@ CONTEXT: while executing command on localhost:xxxxx
ERROR: connection not open
CONTEXT: while executing command on localhost:xxxxx
SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
operation_id | object_type | object_name | node_group_id | policy_type
---------------------------------------------------------------------
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981000 | 1 | 0
@ -455,7 +455,7 @@ CONTEXT: while executing command on localhost:xxxxx
(1 row)
SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
operation_id | object_type | object_name | node_group_id | policy_type
---------------------------------------------------------------------
(0 rows)
@ -507,7 +507,7 @@ CONTEXT: while executing command on localhost:xxxxx
ERROR: connection not open
CONTEXT: while executing command on localhost:xxxxx
SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
operation_id | object_type | object_name | node_group_id | policy_type
---------------------------------------------------------------------
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981000 | 1 | 0
@ -574,7 +574,7 @@ CONTEXT: while executing command on localhost:xxxxx
(1 row)
SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
operation_id | object_type | object_name | node_group_id | policy_type
---------------------------------------------------------------------
(0 rows)
@ -634,7 +634,7 @@ WARNING: connection to the remote node localhost:xxxxx failed with the followin
ERROR: connection not open
CONTEXT: while executing command on localhost:xxxxx
SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
operation_id | object_type | object_name | node_group_id | policy_type
---------------------------------------------------------------------
777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 1 | 1
@ -701,7 +701,7 @@ CONTEXT: while executing command on localhost:xxxxx
(1 row)
SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
operation_id | object_type | object_name | node_group_id | policy_type
---------------------------------------------------------------------
(0 rows)

View File

@ -136,7 +136,7 @@ SELECT create_distributed_table('table_to_split', 'id');
ARRAY[:worker_1_node, :worker_2_node],
'force_logical');
SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
-- we need to allow connection so that we can connect to proxy
SELECT citus.mitmproxy('conn.allow()');
@ -155,7 +155,7 @@ SELECT create_distributed_table('table_to_split', 'id');
\c - postgres - :master_port
SELECT public.wait_for_resource_cleanup();
SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
\c - - - :worker_2_proxy_port
SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog;
@ -182,7 +182,7 @@ SELECT create_distributed_table('table_to_split', 'id');
ARRAY[:worker_1_node, :worker_2_node],
'force_logical');
SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
-- we need to allow connection so that we can connect to proxy
SELECT citus.mitmproxy('conn.allow()');
@ -201,7 +201,7 @@ SELECT create_distributed_table('table_to_split', 'id');
\c - postgres - :master_port
SELECT public.wait_for_resource_cleanup();
SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
\c - - - :worker_2_proxy_port
SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog;
@ -228,7 +228,7 @@ SELECT create_distributed_table('table_to_split', 'id');
ARRAY[:worker_1_node, :worker_2_node],
'force_logical');
SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
-- we need to allow connection so that we can connect to proxy
SELECT citus.mitmproxy('conn.allow()');
@ -247,7 +247,7 @@ SELECT create_distributed_table('table_to_split', 'id');
\c - postgres - :master_port
SELECT public.wait_for_resource_cleanup();
SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
\c - - - :worker_2_proxy_port
SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog;
@ -275,7 +275,7 @@ SELECT create_distributed_table('table_to_split', 'id');
'force_logical');
SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname;
-- we need to allow connection so that we can connect to proxy
SELECT citus.mitmproxy('conn.allow()');
@ -295,7 +295,7 @@ SELECT create_distributed_table('table_to_split', 'id');
\c - postgres - :master_port
SELECT public.wait_for_resource_cleanup();
SELECT operation_id, object_type, object_name, node_group_id, policy_type
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name;
FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id;
\c - - - :worker_2_proxy_port
SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog;