diff --git a/.circleci/config.yml b/.circleci/config.yml index ccb357ef9..772e47340 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -25,10 +25,12 @@ parameters: flaky_test: type: string default: '' - flaky_test_make: - type: string - default: check-minimal - + flaky_test_runs: + type: integer + default: 1600 + skip_flaky_tests: + type: boolean + default: false jobs: build: @@ -616,17 +618,18 @@ jobs: image_tag: description: 'docker image tag to use' type: string - make: - description: 'make target' - type: string - default: check-minimal test: - description: 'the test that should be run multiple times' + description: 'the test file path that should be run multiple times' type: string + default: '' runs: description: 'number of times that the test should be run in total' type: integer - default: 1600 + default: 256 + skip: + description: 'A flag to bypass flaky test detection.' + type: boolean + default: false docker: - image: '<< parameters.image >>:<< parameters.image_tag >><< pipeline.parameters.image_suffix >>' working_directory: /home/circleci/project @@ -635,6 +638,33 @@ jobs: - checkout - attach_workspace: at: . + - run: + name: 'Detect regression tests need to be ran' + command: | + skip=<< parameters.skip >> + if [ "$skip" = true ]; then + echo "Skipping flaky test detection." + circleci-agent step halt + fi + + testForDebugging="<< parameters.test >>" + + if [ -z "$testForDebugging" ]; then + detected_changes=$(git diff origin/HEAD --name-only --diff-filter=AM | (grep 'src/test/regress/sql/.*.sql\|src/test/regress/spec/.*.spec' || true)) + tests=${detected_changes} + else + tests=$testForDebugging; + fi + + if [ -z "$tests" ]; then + echo "No test found." + circleci-agent step halt + else + echo "Detected tests " $tests + fi + + echo export tests=\""$tests"\" >> "$BASH_ENV" + source "$BASH_ENV" - run: name: 'Install Extension' command: | @@ -651,7 +681,12 @@ jobs: - run: name: 'Run minimal tests' command: | - gosu circleci make -C src/test/regress << parameters.make >> EXTRA_TESTS="$(for i in $(seq << parameters.runs >> | circleci tests split); do echo -n '<< parameters.test >> ' ; done)" + tests_array=($tests) + for test in "${tests_array[@]}" + do + echo $test + gosu circleci src/test/regress/citus_tests/run_test.py --path $test --repeat << parameters.runs >> --use-base-schedule --use-whole-schedule-line + done no_output_timeout: 2m - run: name: 'Regressions' @@ -691,6 +726,7 @@ jobs: - store_artifacts: name: 'Save worker2 log' path: src/test/regress/tmp_check/worker.57638/log + upload-coverage: docker: - image: 'citus/exttester:<< pipeline.parameters.pg15_version >><< pipeline.parameters.image_suffix >>' @@ -719,8 +755,8 @@ workflows: pg_major: 15 image_tag: '<< pipeline.parameters.pg15_version >>' requires: [build-flaky-15] - make: '<< pipeline.parameters.flaky_test_make >>' test: '<< pipeline.parameters.flaky_test >>' + runs: << pipeline.parameters.flaky_test_runs >> build_and_test: when: @@ -1209,3 +1245,9 @@ workflows: branches: only: - /tpcc_benchmark\/.*/ # match with tpcc_benchmark/ prefix + - test-flakyness: + name: 'test-15_flaky' + pg_major: 15 + image_tag: '<< pipeline.parameters.pg15_version >>' + requires: [build-15] + skip: << pipeline.parameters.skip_flaky_tests >> diff --git a/src/test/regress/Makefile b/src/test/regress/Makefile index 33c2a600c..808e31338 100644 --- a/src/test/regress/Makefile +++ b/src/test/regress/Makefile @@ -124,6 +124,17 @@ check-minimal-mx: all $(pg_regress_multi_check) --load-extension=citus \ -- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/mx_minimal_schedule $(EXTRA_TESTS) +check-custom-schedule: all + $(pg_regress_multi_check) --load-extension=citus \ + -- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS) + +check-failure-custom-schedule: all + $(pg_regress_multi_check) --load-extension=citus --mitmproxy \ + -- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS) + +check-isolation-custom-schedule: all $(isolation_test_files) + $(pg_regress_multi_check) --load-extension=citus --isolationtester \ + -- $(MULTI_REGRESS_OPTS) --inputdir=$(citus_abs_srcdir)/build --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS) check-empty: all $(pg_regress_multi_check) --load-extension=citus \ diff --git a/src/test/regress/README.md b/src/test/regress/README.md index b2a6c77d3..4e8dc05db 100644 --- a/src/test/regress/README.md +++ b/src/test/regress/README.md @@ -29,7 +29,27 @@ Take a look at the makefile for a list of all the testing targets. ### Running a specific test Often you want to run a specific test and don't want to run everything. You can -use one of the following commands to do so: +simply use `run_test.py [test_name]` script like below in that case. It detects the test schedule +and make target to run the given test. + +```bash +src/test/regress/citus_tests/run_test.py multi_utility_warnings +``` +You can pass `--repeat` or `r` parameter to run the given test for multiple times. + +```bash +src/test/regress/citus_tests/run_test.py multi_utility_warnings -r 1000 +``` + +To force the script to use base schedules rather than minimal ones, you can +pass `-b` or `--use-base-schedule`. + +```bash +src/test/regress/citus_tests/run_test.py coordinator_shouldhaveshards -r 1000 --use-base-schedule +``` + +If you would like to run a specific test on a certain target you can use one +of the following commands to do so: ```bash # If your tests needs almost no setup you can use check-minimal @@ -42,6 +62,7 @@ make install -j9 && make -C src/test/regress/ check-base EXTRA_TESTS='with_prepa make install -j9 && make -C src/test/regress/ check-base EXTRA_TESTS='add_coordinator coordinator_shouldhaveshards' ``` + ## Normalization The output of tests is sadly not completely predictable. Still we want to diff --git a/src/test/regress/citus_tests/common.py b/src/test/regress/citus_tests/common.py index a6169d8c5..cf625c4f7 100644 --- a/src/test/regress/citus_tests/common.py +++ b/src/test/regress/citus_tests/common.py @@ -295,3 +295,15 @@ def initialize_citus_cluster(bindir, datadir, settings, config): if config.add_coordinator_to_metadata: add_coordinator_to_metadata(bindir, config.coordinator_port()) config.setup_steps() + +def eprint(*args, **kwargs): + """eprint prints to stderr""" + + print(*args, file=sys.stderr, **kwargs) + + +def run(command, *args, shell=True, **kwargs): + """run runs the given command and prints it to stderr""" + + eprint(f"+ {command} ") + return subprocess.run(command, *args, check=True, shell=shell, **kwargs) diff --git a/src/test/regress/citus_tests/run_test.py b/src/test/regress/citus_tests/run_test.py new file mode 100755 index 000000000..04e32f313 --- /dev/null +++ b/src/test/regress/citus_tests/run_test.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python3 + +import sys +import os +import pathlib +from glob import glob +import argparse +import shutil +import random +import re +import common +import config + +args = argparse.ArgumentParser() +args.add_argument("test_name", help="Test name (must be included in a schedule.)", nargs='?') +args.add_argument("-p", "--path", required=False, help="Relative path for test file (must have a .sql or .spec extension)", type=pathlib.Path) +args.add_argument("-r", "--repeat", help="Number of test to run", type=int, default=1) +args.add_argument("-b", "--use-base-schedule", required=False, help="Choose base-schedules rather than minimal-schedules", action='store_true') +args.add_argument("-w", "--use-whole-schedule-line", required=False, help="Use the whole line found in related schedule", action='store_true') + +args = vars(args.parse_args()) + +regress_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) +test_file_path = args['path'] +test_file_name = args['test_name'] +use_base_schedule = args['use_base_schedule'] +use_whole_schedule_line = args['use_whole_schedule_line'] + +if not (test_file_name or test_file_path): + print(f"FATAL: No test given.") + sys.exit(2) + + +if test_file_path: + test_file_path = os.path.join(os.getcwd(), args['path']) + + if not os.path.isfile(test_file_path): + print(f"ERROR: test file '{test_file_path}' does not exist") + sys.exit(2) + + test_file_extension = pathlib.Path(test_file_path).suffix + test_file_name = pathlib.Path(test_file_path).stem + + if not test_file_extension in '.spec.sql': + print( + "ERROR: Unrecognized test extension. Valid extensions are: .sql and .spec" + ) + sys.exit(1) + +test_schedule = '' + +# find related schedule +for schedule_file_path in sorted(glob(os.path.join(regress_dir, "*_schedule"))): + for schedule_line in open(schedule_file_path, 'r'): + if re.search(r'\b' + test_file_name + r'\b', schedule_line): + test_schedule = pathlib.Path(schedule_file_path).stem + if use_whole_schedule_line: + test_schedule_line = schedule_line + else: + test_schedule_line = f"test: {test_file_name}\n" + break + else: + continue + break + +# map suitable schedule +if not test_schedule: + print( + f"WARNING: Could not find any schedule for '{test_file_name}'" + ) + sys.exit(0) +elif "isolation" in test_schedule: + test_schedule = 'base_isolation_schedule' +elif "failure" in test_schedule: + test_schedule = 'failure_base_schedule' +elif "mx" in test_schedule: + if use_base_schedule: + test_schedule = 'mx_base_schedule' + else: + test_schedule = 'mx_minimal_schedule' +elif test_schedule in config.ARBITRARY_SCHEDULE_NAMES: + print(f"WARNING: Arbitrary config schedule ({test_schedule}) is not supported.") + sys.exit(0) +else: + if use_base_schedule: + test_schedule = 'base_schedule' + else: + test_schedule = 'minimal_schedule' + +# copy base schedule to a temp file and append test_schedule_line +# to be able to run tests in parallel (if test_schedule_line is a parallel group.) +tmp_schedule_path = os.path.join(regress_dir, f"tmp_schedule_{ random.randint(1, 10000)}") +shutil.copy2(os.path.join(regress_dir, test_schedule), tmp_schedule_path) +with open(tmp_schedule_path, "a") as myfile: + for i in range(args['repeat']): + myfile.write(test_schedule_line) + +# find suitable make recipe +if "isolation" in test_schedule: + make_recipe = 'check-isolation-custom-schedule' +if "failure" in test_schedule: + make_recipe = 'check-failure-custom-schedule' +else: + make_recipe = 'check-custom-schedule' + +# prepare command to run tests +test_command = f"make -C {regress_dir} {make_recipe} SCHEDULE='{pathlib.Path(tmp_schedule_path).stem}'" + +# run test command n times +try: + print(f"Executing.. {test_command}") + result = common.run(test_command) +finally: + # remove temp schedule file + os.remove(tmp_schedule_path) diff --git a/src/test/regress/flaky_tests.md b/src/test/regress/flaky_tests.md index f940d4e4f..4d2b7ad7c 100644 --- a/src/test/regress/flaky_tests.md +++ b/src/test/regress/flaky_tests.md @@ -17,21 +17,17 @@ tests. ## Reproducing a flaky test +### 1. Reproduce a flaky test in CI Before trying to fix the flakyness, it's important that you can reproduce the flaky test. Often it only reproduces in CI, so we have a CI job that can help you reproduce flakyness consistently by running the same test a lot of times. -You can configure CI to run this job by setting the `flaky_test` and if -necessary the possibly the `flaky_test_make` parameters. +You can configure CI to run this job by setting the `flaky_test` ```diff flaky_test: type: string - default: '' + default: 'isolation_shard_rebalancer_progress' - flaky_test_make: - type: string -- default: check-minimal -+ default: check-isolation-base ``` Once you get this job to consistently fail in CI, you can continue with the next @@ -40,6 +36,15 @@ with this CI job, it's almost certainly caused by running it concurrently with other tests. See the "Don't run test in parallel with others" section below on how to fix that. +### 2. Reproduce a flaky test in local environment +To reproduce the flaky tests on your local environment, you can use `run-test.py [test_name]` +script like below. + +```bash +src/test/regress/citus_tests/run_test.py isolation_shard_rebalancer_progress -r 1000 --use-base-schedule --use-whole-schedule-line +``` + +The script above will try to run the whole line in the schedule file containing the test name by using the related base_schedule (rather than a minimal_schedule), 1000 times. ## Easy fixes