Flaky Test Detection CI Workflow (#6495)

This PR adds a new CI workflow named ```flaky-test``` to run flaky test detection on newly introduced regression tests. Co-authored-by: Jelte Fennema <github-tech@jeltef.nl>
2022-12-12 14:36:23 +03:00 · 2022-12-12 14:36:23 +03:00 · e2a73ad8a8
parent 190307e8d8
commit e2a73ad8a8
6 changed files with 225 additions and 19 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -25,10 +25,12 @@ parameters:
  flaky_test:
    type: string
    default: ''
-  flaky_test_make:
-    type: string
-    default: check-minimal
-
+  flaky_test_runs:
+    type: integer
+    default: 1600
+  skip_flaky_tests:
+    type: boolean
+    default: false

 jobs:
  build:
@ -616,17 +618,18 @@ jobs:
      image_tag:
        description: 'docker image tag to use'
        type: string
-      make:
-        description: 'make target'
-        type: string
-        default: check-minimal
      test:
-        description: 'the test that should be run multiple times'
+        description: 'the test file path that should be run multiple times'
        type: string
+        default: ''
      runs:
        description: 'number of times that the test should be run in total'
        type: integer
-        default: 1600
+        default: 256
+      skip:
+        description: 'A flag to bypass flaky test detection.'
+        type: boolean
+        default: false
    docker:
      - image: '<< parameters.image >>:<< parameters.image_tag >><< pipeline.parameters.image_suffix >>'
    working_directory: /home/circleci/project
@ -635,6 +638,33 @@ jobs:
      - checkout
      - attach_workspace:
          at: .
+      - run:
+          name: 'Detect regression tests need to be ran'
+          command: |
+            skip=<< parameters.skip >>
+            if [ "$skip" = true ]; then
+                echo "Skipping flaky test detection."
+                circleci-agent step halt
+            fi
+
+            testForDebugging="<< parameters.test >>"
+
+            if [ -z "$testForDebugging" ]; then
+              detected_changes=$(git diff origin/HEAD --name-only --diff-filter=AM | (grep 'src/test/regress/sql/.*.sql\|src/test/regress/spec/.*.spec' || true))
+              tests=${detected_changes}
+            else
+              tests=$testForDebugging;
+            fi
+
+            if [ -z "$tests" ]; then
+                echo "No test found."
+                circleci-agent step halt
+            else
+              echo "Detected tests " $tests
+            fi
+
+            echo export tests=\""$tests"\" >> "$BASH_ENV"
+            source "$BASH_ENV"
      - run:
          name: 'Install Extension'
          command: |
@ -651,7 +681,12 @@ jobs:
      - run:
          name: 'Run minimal tests'
          command: |
-            gosu circleci make -C src/test/regress << parameters.make >> EXTRA_TESTS="$(for i in $(seq << parameters.runs >> | circleci tests split); do echo -n '<< parameters.test >> ' ; done)"
+            tests_array=($tests)
+            for test in "${tests_array[@]}"
+            do
+                echo $test
+                gosu circleci src/test/regress/citus_tests/run_test.py --path $test --repeat << parameters.runs >> --use-base-schedule --use-whole-schedule-line
+            done
          no_output_timeout: 2m
      - run:
          name: 'Regressions'
@ -691,6 +726,7 @@ jobs:
      - store_artifacts:
          name: 'Save worker2 log'
          path: src/test/regress/tmp_check/worker.57638/log
+
  upload-coverage:
    docker:
      - image: 'citus/exttester:<< pipeline.parameters.pg15_version >><< pipeline.parameters.image_suffix >>'
@ -719,8 +755,8 @@ workflows:
          pg_major: 15
          image_tag: '<< pipeline.parameters.pg15_version >>'
          requires: [build-flaky-15]
-          make: '<< pipeline.parameters.flaky_test_make >>'
          test: '<< pipeline.parameters.flaky_test >>'
+          runs: << pipeline.parameters.flaky_test_runs >>

  build_and_test:
    when:
@ -1209,3 +1245,9 @@ workflows:
            branches:
              only:
                - /tpcc_benchmark\/.*/ # match with tpcc_benchmark/ prefix
+      - test-flakyness:
+          name: 'test-15_flaky'
+          pg_major: 15
+          image_tag: '<< pipeline.parameters.pg15_version >>'
+          requires: [build-15]
+          skip: << pipeline.parameters.skip_flaky_tests >>
--- a/src/test/regress/Makefile
+++ b/src/test/regress/Makefile
@ -124,6 +124,17 @@ check-minimal-mx: all
 	$(pg_regress_multi_check) --load-extension=citus \
 	-- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/mx_minimal_schedule $(EXTRA_TESTS)

+check-custom-schedule: all
+	$(pg_regress_multi_check) --load-extension=citus \
+	-- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS)
+
+check-failure-custom-schedule: all
+	$(pg_regress_multi_check) --load-extension=citus --mitmproxy \
+	-- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS)
+
+check-isolation-custom-schedule: all  $(isolation_test_files)
+	$(pg_regress_multi_check) --load-extension=citus --isolationtester \
+	-- $(MULTI_REGRESS_OPTS) --inputdir=$(citus_abs_srcdir)/build --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS)

 check-empty: all
 	$(pg_regress_multi_check) --load-extension=citus \
--- a/src/test/regress/README.md
+++ b/src/test/regress/README.md
@ -29,7 +29,27 @@ Take a look at the makefile for a list of all the testing targets.
 ### Running a specific test

 Often you want to run a specific test and don't want to run everything. You can
-use one of the following commands to do so:
+simply use `run_test.py [test_name]` script like below in that case. It detects the test schedule
+and make target to run the given test.
+
+```bash
+src/test/regress/citus_tests/run_test.py multi_utility_warnings
+```
+You can pass `--repeat` or `r` parameter to run the given test for multiple times.
+
+```bash
+src/test/regress/citus_tests/run_test.py multi_utility_warnings -r 1000
+```
+
+To force the script to use base schedules rather than minimal ones, you can
+pass `-b` or `--use-base-schedule`.
+
+```bash
+src/test/regress/citus_tests/run_test.py coordinator_shouldhaveshards -r 1000 --use-base-schedule
+```
+
+If you would like to run a specific test on a certain target you can use one
+of the following commands to do so:

 ```bash
 # If your tests needs almost no setup you can use check-minimal
@ -42,6 +62,7 @@ make install -j9 && make -C src/test/regress/ check-base EXTRA_TESTS='with_prepa
 make install -j9 && make -C src/test/regress/ check-base EXTRA_TESTS='add_coordinator coordinator_shouldhaveshards'
 ```

+
 ## Normalization

 The output of tests is sadly not completely predictable. Still we want to
--- a/src/test/regress/citus_tests/common.py
+++ b/src/test/regress/citus_tests/common.py
@ -295,3 +295,15 @@ def initialize_citus_cluster(bindir, datadir, settings, config):
    if config.add_coordinator_to_metadata:
        add_coordinator_to_metadata(bindir, config.coordinator_port())
    config.setup_steps()
+
+def eprint(*args, **kwargs):
+    """eprint prints to stderr"""
+
+    print(*args, file=sys.stderr, **kwargs)
+
+
+def run(command, *args, shell=True, **kwargs):
+    """run runs the given command and prints it to stderr"""
+
+    eprint(f"+ {command} ")
+    return subprocess.run(command, *args, check=True, shell=shell, **kwargs)
--- a/src/test/regress/citus_tests/run_test.py
+++ b/src/test/regress/citus_tests/run_test.py
@ -0,0 +1,115 @@
+#!/usr/bin/env python3
+
+import sys
+import os
+import pathlib
+from glob import glob
+import argparse
+import shutil
+import random
+import re
+import common
+import config
+
+args = argparse.ArgumentParser()
+args.add_argument("test_name", help="Test name (must be included in a schedule.)", nargs='?')
+args.add_argument("-p", "--path", required=False, help="Relative path for test file (must have a .sql or .spec extension)", type=pathlib.Path)
+args.add_argument("-r", "--repeat", help="Number of test to run", type=int, default=1)
+args.add_argument("-b", "--use-base-schedule", required=False, help="Choose base-schedules rather than minimal-schedules", action='store_true')
+args.add_argument("-w", "--use-whole-schedule-line", required=False, help="Use the whole line found in related schedule", action='store_true')
+
+args = vars(args.parse_args())
+
+regress_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+test_file_path = args['path']
+test_file_name = args['test_name']
+use_base_schedule = args['use_base_schedule']
+use_whole_schedule_line = args['use_whole_schedule_line']
+
+if not (test_file_name or test_file_path):
+    print(f"FATAL: No test given.")
+    sys.exit(2)
+
+
+if test_file_path:
+    test_file_path = os.path.join(os.getcwd(), args['path'])
+
+    if not os.path.isfile(test_file_path):
+        print(f"ERROR: test file '{test_file_path}' does not exist")
+        sys.exit(2)
+
+    test_file_extension = pathlib.Path(test_file_path).suffix
+    test_file_name = pathlib.Path(test_file_path).stem
+
+    if not test_file_extension in '.spec.sql':
+        print(
+            "ERROR: Unrecognized test extension. Valid extensions are: .sql and .spec"
+        )
+        sys.exit(1)
+
+test_schedule = ''
+
+# find related schedule
+for schedule_file_path in sorted(glob(os.path.join(regress_dir, "*_schedule"))):
+        for schedule_line in open(schedule_file_path, 'r'):
+            if  re.search(r'\b' + test_file_name + r'\b', schedule_line):
+                test_schedule = pathlib.Path(schedule_file_path).stem
+                if use_whole_schedule_line:
+                    test_schedule_line = schedule_line
+                else:
+                    test_schedule_line = f"test: {test_file_name}\n"
+                break
+        else:
+            continue
+        break
+
+# map suitable schedule
+if not test_schedule:
+    print(
+        f"WARNING: Could not find any schedule for '{test_file_name}'"
+    )
+    sys.exit(0)
+elif "isolation" in test_schedule:
+    test_schedule = 'base_isolation_schedule'
+elif "failure" in test_schedule:
+    test_schedule = 'failure_base_schedule'
+elif "mx" in test_schedule:
+    if use_base_schedule:
+        test_schedule = 'mx_base_schedule'
+    else:
+        test_schedule = 'mx_minimal_schedule'
+elif test_schedule in config.ARBITRARY_SCHEDULE_NAMES:
+    print(f"WARNING: Arbitrary config schedule ({test_schedule}) is not supported.")
+    sys.exit(0)
+else:
+    if use_base_schedule:
+        test_schedule = 'base_schedule'
+    else:
+        test_schedule = 'minimal_schedule'
+
+# copy base schedule to a temp file and append test_schedule_line
+# to be able to run tests in parallel (if test_schedule_line is a parallel group.)
+tmp_schedule_path = os.path.join(regress_dir, f"tmp_schedule_{ random.randint(1, 10000)}")
+shutil.copy2(os.path.join(regress_dir, test_schedule), tmp_schedule_path)
+with open(tmp_schedule_path, "a") as myfile:
+        for i in range(args['repeat']):
+            myfile.write(test_schedule_line)
+
+# find suitable make recipe
+if "isolation" in test_schedule:
+    make_recipe = 'check-isolation-custom-schedule'
+if "failure" in test_schedule:
+    make_recipe = 'check-failure-custom-schedule'
+else:
+    make_recipe = 'check-custom-schedule'
+
+# prepare command to run tests
+test_command = f"make -C {regress_dir} {make_recipe} SCHEDULE='{pathlib.Path(tmp_schedule_path).stem}'"
+
+# run test command n times
+try:
+    print(f"Executing.. {test_command}")
+    result = common.run(test_command)
+finally:
+    # remove temp schedule file
+    os.remove(tmp_schedule_path)
--- a/src/test/regress/flaky_tests.md
+++ b/src/test/regress/flaky_tests.md
@ -17,21 +17,17 @@ tests.

 ## Reproducing a flaky test

+### 1. Reproduce a flaky test in CI
 Before trying to fix the flakyness, it's important that you can reproduce the
 flaky test. Often it only reproduces in CI, so we have a CI job that can help
 you reproduce flakyness consistently by running the same test a lot of times.
-You can configure CI to run this job by setting the `flaky_test` and if
-necessary the possibly the `flaky_test_make` parameters.
+You can configure CI to run this job by setting the `flaky_test`

 ```diff
   flaky_test:
     type: string
 -    default: ''
 +    default: 'isolation_shard_rebalancer_progress'
-   flaky_test_make:
-     type: string
-    default: check-minimal
-+    default: check-isolation-base
 ```

 Once you get this job to consistently fail in CI, you can continue with the next
@ -40,6 +36,15 @@ with this CI job, it's almost certainly caused by running it concurrently with
 other tests. See the "Don't run test in parallel with others" section below on
 how to fix that.

+### 2. Reproduce a flaky test in local environment
+To reproduce the flaky tests on your local environment, you can use `run-test.py [test_name]`
+script like below.
+
+```bash
+src/test/regress/citus_tests/run_test.py isolation_shard_rebalancer_progress -r 1000 --use-base-schedule --use-whole-schedule-line
+```
+
+The script above will try to run the whole line in the schedule file containing the test name by using the related base_schedule (rather than a minimal_schedule), 1000 times.

 ## Easy fixes