From 2bccb5815770b67cf646a7dc5bb9539d5a29c010 Mon Sep 17 00:00:00 2001 From: Jelte Fennema-Nio Date: Wed, 1 Nov 2023 13:12:20 +0100 Subject: [PATCH 1/3] Run github actions on main (#7292) We want the nice looking green checkmark on our main branch too. This PR includes running on pushes to release branches too, but that won't come into effect until we have release branches with this workflow file. --- .github/workflows/build_and_test.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index d285e4f50..d900fe867 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -10,6 +10,10 @@ on: required: false default: false type: boolean + push: + branches: + - "main" + - "release-*" pull_request: types: [opened, reopened,synchronize] jobs: From c83c5567028d2035651c39f737ac5a944a70db16 Mon Sep 17 00:00:00 2001 From: Jelte Fennema-Nio Date: Wed, 1 Nov 2023 14:44:45 +0100 Subject: [PATCH 2/3] Fix flaky isolation_master_update_node (#7303) Sometimes in CI isolation_master_update_node fails like this: ```diff ------------------ (1 row) step s2-abort: ABORT; step s1-abort: ABORT; FATAL: terminating connection due to administrator command FATAL: terminating connection due to administrator command SSL connection has been closed unexpectedly +server closed the connection unexpectedly master_remove_node ------------------ ``` This just seesm like a random error line. The only way to reasonably fix this is by adding an extra output file. So that's what this PR does. --- .../isolation_master_update_node_1.out | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 src/test/regress/expected/isolation_master_update_node_1.out diff --git a/src/test/regress/expected/isolation_master_update_node_1.out b/src/test/regress/expected/isolation_master_update_node_1.out new file mode 100644 index 000000000..474956629 --- /dev/null +++ b/src/test/regress/expected/isolation_master_update_node_1.out @@ -0,0 +1,68 @@ +Parsed test spec with 2 sessions + +starting permutation: s1-begin s1-insert s2-begin s2-update-node-1 s1-abort s2-abort +create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +step s1-begin: BEGIN; +step s1-insert: INSERT INTO t1 SELECT generate_series(1, 100); +step s2-begin: BEGIN; +step s2-update-node-1: + -- update a specific node by address + SELECT master_update_node(nodeid, 'localhost', nodeport + 10) + FROM pg_dist_node + WHERE nodename = 'localhost' + AND nodeport = 57637; + +step s1-abort: ABORT; +step s2-update-node-1: <... completed> +master_update_node +--------------------------------------------------------------------- + +(1 row) + +step s2-abort: ABORT; +master_remove_node +--------------------------------------------------------------------- + + +(2 rows) + + +starting permutation: s1-begin s1-insert s2-begin s2-update-node-1-force s2-abort s1-abort +create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +step s1-begin: BEGIN; +step s1-insert: INSERT INTO t1 SELECT generate_series(1, 100); +step s2-begin: BEGIN; +step s2-update-node-1-force: + -- update a specific node by address (force) + SELECT master_update_node(nodeid, 'localhost', nodeport + 10, force => true, lock_cooldown => 100) + FROM pg_dist_node + WHERE nodename = 'localhost' + AND nodeport = 57637; + +step s2-update-node-1-force: <... completed> +master_update_node +--------------------------------------------------------------------- + +(1 row) + +step s2-abort: ABORT; +step s1-abort: ABORT; +FATAL: terminating connection due to administrator command +FATAL: terminating connection due to administrator command +SSL connection has been closed unexpectedly +server closed the connection unexpectedly + +master_remove_node +--------------------------------------------------------------------- + + +(2 rows) + From c9f2fc892d4ce01a4bc23beb508e2ff03f08a774 Mon Sep 17 00:00:00 2001 From: Jelte Fennema-Nio Date: Wed, 1 Nov 2023 15:08:51 +0100 Subject: [PATCH 3/3] Fix flaky failure_split_cleanup (#7299) Sometimes failure_split_cleanup failed in CI like this: ```diff ERROR: server closed the connection unexpectedly CONTEXT: while executing command on localhost:9060 SELECT operation_id, object_type, object_name, node_group_id, policy_type FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; operation_id | object_type | object_name | node_group_id | policy_type --------------+-------------+-----------------------------------------------------------+---------------+------------- 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981000 | 1 | 0 - 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 1 | 1 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 2 | 0 + 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 1 | 1 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981003 | 2 | 1 777 | 4 | citus_shard_split_publication_1_10_777 | 2 | 0 (5 rows) -- we need to allow connection so that we can connect to proxy ``` Source: https://github.com/citusdata/citus/actions/runs/6717642291/attempts/1#summary-18256014949 It's the common problem where we're missing a column in the ORDER BY clause. This fixes that by adding an node_group_id to the query in question. --- .../regress/expected/failure_split_cleanup.out | 18 +++++++++--------- src/test/regress/sql/failure_split_cleanup.sql | 16 ++++++++-------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/test/regress/expected/failure_split_cleanup.out b/src/test/regress/expected/failure_split_cleanup.out index fe646587c..d81335325 100644 --- a/src/test/regress/expected/failure_split_cleanup.out +++ b/src/test/regress/expected/failure_split_cleanup.out @@ -277,12 +277,12 @@ CONTEXT: while executing command on localhost:xxxxx ERROR: connection not open CONTEXT: while executing command on localhost:xxxxx SELECT operation_id, object_type, object_name, node_group_id, policy_type - FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; + FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id; operation_id | object_type | object_name | node_group_id | policy_type --------------------------------------------------------------------- 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981000 | 1 | 0 - 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 2 | 0 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 1 | 1 + 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 2 | 0 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981003 | 2 | 1 777 | 4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0 777 | 4 | citus_shard_split_publication_xxxxxxx_xxxxxxx_xxxxxxx | 2 | 0 @@ -336,7 +336,7 @@ CONTEXT: while executing command on localhost:xxxxx (1 row) SELECT operation_id, object_type, object_name, node_group_id, policy_type - FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; + FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id; operation_id | object_type | object_name | node_group_id | policy_type --------------------------------------------------------------------- (0 rows) @@ -388,7 +388,7 @@ CONTEXT: while executing command on localhost:xxxxx ERROR: connection not open CONTEXT: while executing command on localhost:xxxxx SELECT operation_id, object_type, object_name, node_group_id, policy_type - FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; + FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id; operation_id | object_type | object_name | node_group_id | policy_type --------------------------------------------------------------------- 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981000 | 1 | 0 @@ -455,7 +455,7 @@ CONTEXT: while executing command on localhost:xxxxx (1 row) SELECT operation_id, object_type, object_name, node_group_id, policy_type - FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; + FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id; operation_id | object_type | object_name | node_group_id | policy_type --------------------------------------------------------------------- (0 rows) @@ -507,7 +507,7 @@ CONTEXT: while executing command on localhost:xxxxx ERROR: connection not open CONTEXT: while executing command on localhost:xxxxx SELECT operation_id, object_type, object_name, node_group_id, policy_type - FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; + FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id; operation_id | object_type | object_name | node_group_id | policy_type --------------------------------------------------------------------- 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981000 | 1 | 0 @@ -574,7 +574,7 @@ CONTEXT: while executing command on localhost:xxxxx (1 row) SELECT operation_id, object_type, object_name, node_group_id, policy_type - FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; + FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id; operation_id | object_type | object_name | node_group_id | policy_type --------------------------------------------------------------------- (0 rows) @@ -634,7 +634,7 @@ WARNING: connection to the remote node localhost:xxxxx failed with the followin ERROR: connection not open CONTEXT: while executing command on localhost:xxxxx SELECT operation_id, object_type, object_name, node_group_id, policy_type - FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; + FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id; operation_id | object_type | object_name | node_group_id | policy_type --------------------------------------------------------------------- 777 | 1 | citus_failure_split_cleanup_schema.table_to_split_8981002 | 1 | 1 @@ -701,7 +701,7 @@ CONTEXT: while executing command on localhost:xxxxx (1 row) SELECT operation_id, object_type, object_name, node_group_id, policy_type - FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; + FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id; operation_id | object_type | object_name | node_group_id | policy_type --------------------------------------------------------------------- (0 rows) diff --git a/src/test/regress/sql/failure_split_cleanup.sql b/src/test/regress/sql/failure_split_cleanup.sql index 1b85d3d17..9dfbb245e 100644 --- a/src/test/regress/sql/failure_split_cleanup.sql +++ b/src/test/regress/sql/failure_split_cleanup.sql @@ -136,7 +136,7 @@ SELECT create_distributed_table('table_to_split', 'id'); ARRAY[:worker_1_node, :worker_2_node], 'force_logical'); SELECT operation_id, object_type, object_name, node_group_id, policy_type - FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; + FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id; -- we need to allow connection so that we can connect to proxy SELECT citus.mitmproxy('conn.allow()'); @@ -155,7 +155,7 @@ SELECT create_distributed_table('table_to_split', 'id'); \c - postgres - :master_port SELECT public.wait_for_resource_cleanup(); SELECT operation_id, object_type, object_name, node_group_id, policy_type - FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; + FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id; \c - - - :worker_2_proxy_port SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; @@ -182,7 +182,7 @@ SELECT create_distributed_table('table_to_split', 'id'); ARRAY[:worker_1_node, :worker_2_node], 'force_logical'); SELECT operation_id, object_type, object_name, node_group_id, policy_type - FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; + FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id; -- we need to allow connection so that we can connect to proxy SELECT citus.mitmproxy('conn.allow()'); @@ -201,7 +201,7 @@ SELECT create_distributed_table('table_to_split', 'id'); \c - postgres - :master_port SELECT public.wait_for_resource_cleanup(); SELECT operation_id, object_type, object_name, node_group_id, policy_type - FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; + FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id; \c - - - :worker_2_proxy_port SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; @@ -228,7 +228,7 @@ SELECT create_distributed_table('table_to_split', 'id'); ARRAY[:worker_1_node, :worker_2_node], 'force_logical'); SELECT operation_id, object_type, object_name, node_group_id, policy_type - FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; + FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id; -- we need to allow connection so that we can connect to proxy SELECT citus.mitmproxy('conn.allow()'); @@ -247,7 +247,7 @@ SELECT create_distributed_table('table_to_split', 'id'); \c - postgres - :master_port SELECT public.wait_for_resource_cleanup(); SELECT operation_id, object_type, object_name, node_group_id, policy_type - FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; + FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id; \c - - - :worker_2_proxy_port SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog; @@ -275,7 +275,7 @@ SELECT create_distributed_table('table_to_split', 'id'); 'force_logical'); SELECT operation_id, object_type, object_name, node_group_id, policy_type - FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; + FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id; SELECT relname FROM pg_class where relname LIKE '%table_to_split_%' AND relkind = 'r' order by relname; -- we need to allow connection so that we can connect to proxy SELECT citus.mitmproxy('conn.allow()'); @@ -295,7 +295,7 @@ SELECT create_distributed_table('table_to_split', 'id'); \c - postgres - :master_port SELECT public.wait_for_resource_cleanup(); SELECT operation_id, object_type, object_name, node_group_id, policy_type - FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name; + FROM pg_dist_cleanup where operation_id = 777 ORDER BY object_name, node_group_id; \c - - - :worker_2_proxy_port SET search_path TO "citus_failure_split_cleanup_schema", public, pg_catalog;