From d16b458e2a4c6533f791cdeec2e3b9421d43551f Mon Sep 17 00:00:00 2001 From: Jelte Fennema Date: Thu, 18 Aug 2022 14:14:16 +0200 Subject: [PATCH] Remove the flaky rollback_to_savepoint test (#6190) This removes a flaky test that I introduced in #3868 after I fixed the issue described in #3622. This test is sometimes fails randomly in CI. The way it fails indicates that there might be some bug: A connection breaks after rolling back to a savepoint. I tried reproducing this issue locally, but I wasn't able to. I don't understand what causes the failure. Things that I tried were: 1. Running the test with: ```sql SET citus.force_max_query_parallelization = true; ``` 2. Running the test with: ```sql SET citus.max_adaptive_executor_pool_size = 1; ``` 3. Running the test in parallel with the same tests that it is run in parallel with in multi_schedule. None of these allowed me to reproduce the issue locally. So I think it's time to give on fixing this test and simply remove the test. The regression that this test protects against seems very unlikely to reappear, since in #3868 I also added a big comment about the need for the newly added `UnclaimConnection` call. So, I think the need for the test is quite small, and removing it will make our CI less flaky. In case the cause of the bug ever gets found, I tracked the bug in #6189 Example of a failing CI run: https://app.circleci.com/pipelines/github/citusdata/citus/26098/workflows/f84741d9-13b1-4ae7-9155-c21ed3466951/jobs/736424 For reference the unexpected diff is this (so both warnings and an error): ```diff INSERT INTO t SELECT i FROM generate_series(1, 100) i; +WARNING: connection to the remote node localhost:57638 failed with the following error: +WARNING: +CONTEXT: while executing command on localhost:57638 +ERROR: connection to the remote node localhost:57638 failed with the following error: ROLLBACK; ``` This test is also mentioned as the most failing regression test in #5975 --- .../expected/rollback_to_savepoint.out | 27 ------------------- src/test/regress/multi_schedule | 2 +- .../regress/sql/rollback_to_savepoint.sql | 23 ---------------- 3 files changed, 1 insertion(+), 51 deletions(-) delete mode 100644 src/test/regress/expected/rollback_to_savepoint.out delete mode 100644 src/test/regress/sql/rollback_to_savepoint.sql diff --git a/src/test/regress/expected/rollback_to_savepoint.out b/src/test/regress/expected/rollback_to_savepoint.out deleted file mode 100644 index b5b631463..000000000 --- a/src/test/regress/expected/rollback_to_savepoint.out +++ /dev/null @@ -1,27 +0,0 @@ --- Regression test for this issue: --- https://github.com/citusdata/citus/issues/3622 -SET citus.shard_count = 4; -SET citus.next_shard_id TO 1954000; -CREATE SCHEMA rollback_to_savepoint; -SET search_path TO rollback_to_savepoint; -CREATE TABLE t(a int); -SELECT create_distributed_table('t', 'a'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- This timeout is chosen such that the INSERT with --- generate_series(1, 100000000) is cancelled at the right time to trigger the --- bug -SET statement_timeout = '2s'; -BEGIN; -INSERT INTO t VALUES (4); -SAVEPOINT s1; -INSERT INTO t SELECT i FROM generate_series(1, 10000000) i; -ERROR: canceling statement due to statement timeout -ROLLBACK TO SAVEPOINT s1; -INSERT INTO t SELECT i FROM generate_series(1, 100) i; -ROLLBACK; -DROP SCHEMA rollback_to_savepoint CASCADE; -NOTICE: drop cascades to table t diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule index 58cfc87c8..ab8d092ab 100644 --- a/src/test/regress/multi_schedule +++ b/src/test/regress/multi_schedule @@ -64,7 +64,7 @@ test: tableam # ---------- # Miscellaneous tests to check our query planning behavior # ---------- -test: multi_deparse_shard_query multi_distributed_transaction_id intermediate_results limit_intermediate_size rollback_to_savepoint +test: multi_deparse_shard_query multi_distributed_transaction_id intermediate_results limit_intermediate_size test: multi_explain hyperscale_tutorial partitioned_intermediate_results distributed_intermediate_results multi_real_time_transaction test: multi_basic_queries cross_join multi_complex_expressions multi_subquery multi_subquery_complex_queries multi_subquery_behavioral_analytics test: multi_subquery_complex_reference_clause multi_subquery_window_functions multi_view multi_sql_function multi_prepare_sql diff --git a/src/test/regress/sql/rollback_to_savepoint.sql b/src/test/regress/sql/rollback_to_savepoint.sql deleted file mode 100644 index 48ef29a5c..000000000 --- a/src/test/regress/sql/rollback_to_savepoint.sql +++ /dev/null @@ -1,23 +0,0 @@ --- Regression test for this issue: --- https://github.com/citusdata/citus/issues/3622 -SET citus.shard_count = 4; -SET citus.next_shard_id TO 1954000; -CREATE SCHEMA rollback_to_savepoint; -SET search_path TO rollback_to_savepoint; - -CREATE TABLE t(a int); -SELECT create_distributed_table('t', 'a'); - --- This timeout is chosen such that the INSERT with --- generate_series(1, 100000000) is cancelled at the right time to trigger the --- bug -SET statement_timeout = '2s'; -BEGIN; -INSERT INTO t VALUES (4); -SAVEPOINT s1; -INSERT INTO t SELECT i FROM generate_series(1, 10000000) i; -ROLLBACK TO SAVEPOINT s1; -INSERT INTO t SELECT i FROM generate_series(1, 100) i; -ROLLBACK; - -DROP SCHEMA rollback_to_savepoint CASCADE;