mirror of https://github.com/citusdata/citus.git
Flaky Test Detection CI Workflow (#6495)
This PR adds a new CI workflow named ```flaky-test``` to run flaky test detection on newly introduced regression tests. Co-authored-by: Jelte Fennema <github-tech@jeltef.nl>pull/6554/head
parent
190307e8d8
commit
e2a73ad8a8
|
@ -25,10 +25,12 @@ parameters:
|
|||
flaky_test:
|
||||
type: string
|
||||
default: ''
|
||||
flaky_test_make:
|
||||
type: string
|
||||
default: check-minimal
|
||||
|
||||
flaky_test_runs:
|
||||
type: integer
|
||||
default: 1600
|
||||
skip_flaky_tests:
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
@ -616,17 +618,18 @@ jobs:
|
|||
image_tag:
|
||||
description: 'docker image tag to use'
|
||||
type: string
|
||||
make:
|
||||
description: 'make target'
|
||||
type: string
|
||||
default: check-minimal
|
||||
test:
|
||||
description: 'the test that should be run multiple times'
|
||||
description: 'the test file path that should be run multiple times'
|
||||
type: string
|
||||
default: ''
|
||||
runs:
|
||||
description: 'number of times that the test should be run in total'
|
||||
type: integer
|
||||
default: 1600
|
||||
default: 256
|
||||
skip:
|
||||
description: 'A flag to bypass flaky test detection.'
|
||||
type: boolean
|
||||
default: false
|
||||
docker:
|
||||
- image: '<< parameters.image >>:<< parameters.image_tag >><< pipeline.parameters.image_suffix >>'
|
||||
working_directory: /home/circleci/project
|
||||
|
@ -635,6 +638,33 @@ jobs:
|
|||
- checkout
|
||||
- attach_workspace:
|
||||
at: .
|
||||
- run:
|
||||
name: 'Detect regression tests need to be ran'
|
||||
command: |
|
||||
skip=<< parameters.skip >>
|
||||
if [ "$skip" = true ]; then
|
||||
echo "Skipping flaky test detection."
|
||||
circleci-agent step halt
|
||||
fi
|
||||
|
||||
testForDebugging="<< parameters.test >>"
|
||||
|
||||
if [ -z "$testForDebugging" ]; then
|
||||
detected_changes=$(git diff origin/HEAD --name-only --diff-filter=AM | (grep 'src/test/regress/sql/.*.sql\|src/test/regress/spec/.*.spec' || true))
|
||||
tests=${detected_changes}
|
||||
else
|
||||
tests=$testForDebugging;
|
||||
fi
|
||||
|
||||
if [ -z "$tests" ]; then
|
||||
echo "No test found."
|
||||
circleci-agent step halt
|
||||
else
|
||||
echo "Detected tests " $tests
|
||||
fi
|
||||
|
||||
echo export tests=\""$tests"\" >> "$BASH_ENV"
|
||||
source "$BASH_ENV"
|
||||
- run:
|
||||
name: 'Install Extension'
|
||||
command: |
|
||||
|
@ -651,7 +681,12 @@ jobs:
|
|||
- run:
|
||||
name: 'Run minimal tests'
|
||||
command: |
|
||||
gosu circleci make -C src/test/regress << parameters.make >> EXTRA_TESTS="$(for i in $(seq << parameters.runs >> | circleci tests split); do echo -n '<< parameters.test >> ' ; done)"
|
||||
tests_array=($tests)
|
||||
for test in "${tests_array[@]}"
|
||||
do
|
||||
echo $test
|
||||
gosu circleci src/test/regress/citus_tests/run_test.py --path $test --repeat << parameters.runs >> --use-base-schedule --use-whole-schedule-line
|
||||
done
|
||||
no_output_timeout: 2m
|
||||
- run:
|
||||
name: 'Regressions'
|
||||
|
@ -691,6 +726,7 @@ jobs:
|
|||
- store_artifacts:
|
||||
name: 'Save worker2 log'
|
||||
path: src/test/regress/tmp_check/worker.57638/log
|
||||
|
||||
upload-coverage:
|
||||
docker:
|
||||
- image: 'citus/exttester:<< pipeline.parameters.pg15_version >><< pipeline.parameters.image_suffix >>'
|
||||
|
@ -719,8 +755,8 @@ workflows:
|
|||
pg_major: 15
|
||||
image_tag: '<< pipeline.parameters.pg15_version >>'
|
||||
requires: [build-flaky-15]
|
||||
make: '<< pipeline.parameters.flaky_test_make >>'
|
||||
test: '<< pipeline.parameters.flaky_test >>'
|
||||
runs: << pipeline.parameters.flaky_test_runs >>
|
||||
|
||||
build_and_test:
|
||||
when:
|
||||
|
@ -1209,3 +1245,9 @@ workflows:
|
|||
branches:
|
||||
only:
|
||||
- /tpcc_benchmark\/.*/ # match with tpcc_benchmark/ prefix
|
||||
- test-flakyness:
|
||||
name: 'test-15_flaky'
|
||||
pg_major: 15
|
||||
image_tag: '<< pipeline.parameters.pg15_version >>'
|
||||
requires: [build-15]
|
||||
skip: << pipeline.parameters.skip_flaky_tests >>
|
||||
|
|
|
@ -124,6 +124,17 @@ check-minimal-mx: all
|
|||
$(pg_regress_multi_check) --load-extension=citus \
|
||||
-- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/mx_minimal_schedule $(EXTRA_TESTS)
|
||||
|
||||
check-custom-schedule: all
|
||||
$(pg_regress_multi_check) --load-extension=citus \
|
||||
-- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS)
|
||||
|
||||
check-failure-custom-schedule: all
|
||||
$(pg_regress_multi_check) --load-extension=citus --mitmproxy \
|
||||
-- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS)
|
||||
|
||||
check-isolation-custom-schedule: all $(isolation_test_files)
|
||||
$(pg_regress_multi_check) --load-extension=citus --isolationtester \
|
||||
-- $(MULTI_REGRESS_OPTS) --inputdir=$(citus_abs_srcdir)/build --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS)
|
||||
|
||||
check-empty: all
|
||||
$(pg_regress_multi_check) --load-extension=citus \
|
||||
|
|
|
@ -29,7 +29,27 @@ Take a look at the makefile for a list of all the testing targets.
|
|||
### Running a specific test
|
||||
|
||||
Often you want to run a specific test and don't want to run everything. You can
|
||||
use one of the following commands to do so:
|
||||
simply use `run_test.py [test_name]` script like below in that case. It detects the test schedule
|
||||
and make target to run the given test.
|
||||
|
||||
```bash
|
||||
src/test/regress/citus_tests/run_test.py multi_utility_warnings
|
||||
```
|
||||
You can pass `--repeat` or `r` parameter to run the given test for multiple times.
|
||||
|
||||
```bash
|
||||
src/test/regress/citus_tests/run_test.py multi_utility_warnings -r 1000
|
||||
```
|
||||
|
||||
To force the script to use base schedules rather than minimal ones, you can
|
||||
pass `-b` or `--use-base-schedule`.
|
||||
|
||||
```bash
|
||||
src/test/regress/citus_tests/run_test.py coordinator_shouldhaveshards -r 1000 --use-base-schedule
|
||||
```
|
||||
|
||||
If you would like to run a specific test on a certain target you can use one
|
||||
of the following commands to do so:
|
||||
|
||||
```bash
|
||||
# If your tests needs almost no setup you can use check-minimal
|
||||
|
@ -42,6 +62,7 @@ make install -j9 && make -C src/test/regress/ check-base EXTRA_TESTS='with_prepa
|
|||
make install -j9 && make -C src/test/regress/ check-base EXTRA_TESTS='add_coordinator coordinator_shouldhaveshards'
|
||||
```
|
||||
|
||||
|
||||
## Normalization
|
||||
|
||||
The output of tests is sadly not completely predictable. Still we want to
|
||||
|
|
|
@ -295,3 +295,15 @@ def initialize_citus_cluster(bindir, datadir, settings, config):
|
|||
if config.add_coordinator_to_metadata:
|
||||
add_coordinator_to_metadata(bindir, config.coordinator_port())
|
||||
config.setup_steps()
|
||||
|
||||
def eprint(*args, **kwargs):
|
||||
"""eprint prints to stderr"""
|
||||
|
||||
print(*args, file=sys.stderr, **kwargs)
|
||||
|
||||
|
||||
def run(command, *args, shell=True, **kwargs):
|
||||
"""run runs the given command and prints it to stderr"""
|
||||
|
||||
eprint(f"+ {command} ")
|
||||
return subprocess.run(command, *args, check=True, shell=shell, **kwargs)
|
||||
|
|
|
@ -0,0 +1,115 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import os
|
||||
import pathlib
|
||||
from glob import glob
|
||||
import argparse
|
||||
import shutil
|
||||
import random
|
||||
import re
|
||||
import common
|
||||
import config
|
||||
|
||||
args = argparse.ArgumentParser()
|
||||
args.add_argument("test_name", help="Test name (must be included in a schedule.)", nargs='?')
|
||||
args.add_argument("-p", "--path", required=False, help="Relative path for test file (must have a .sql or .spec extension)", type=pathlib.Path)
|
||||
args.add_argument("-r", "--repeat", help="Number of test to run", type=int, default=1)
|
||||
args.add_argument("-b", "--use-base-schedule", required=False, help="Choose base-schedules rather than minimal-schedules", action='store_true')
|
||||
args.add_argument("-w", "--use-whole-schedule-line", required=False, help="Use the whole line found in related schedule", action='store_true')
|
||||
|
||||
args = vars(args.parse_args())
|
||||
|
||||
regress_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
|
||||
test_file_path = args['path']
|
||||
test_file_name = args['test_name']
|
||||
use_base_schedule = args['use_base_schedule']
|
||||
use_whole_schedule_line = args['use_whole_schedule_line']
|
||||
|
||||
if not (test_file_name or test_file_path):
|
||||
print(f"FATAL: No test given.")
|
||||
sys.exit(2)
|
||||
|
||||
|
||||
if test_file_path:
|
||||
test_file_path = os.path.join(os.getcwd(), args['path'])
|
||||
|
||||
if not os.path.isfile(test_file_path):
|
||||
print(f"ERROR: test file '{test_file_path}' does not exist")
|
||||
sys.exit(2)
|
||||
|
||||
test_file_extension = pathlib.Path(test_file_path).suffix
|
||||
test_file_name = pathlib.Path(test_file_path).stem
|
||||
|
||||
if not test_file_extension in '.spec.sql':
|
||||
print(
|
||||
"ERROR: Unrecognized test extension. Valid extensions are: .sql and .spec"
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
test_schedule = ''
|
||||
|
||||
# find related schedule
|
||||
for schedule_file_path in sorted(glob(os.path.join(regress_dir, "*_schedule"))):
|
||||
for schedule_line in open(schedule_file_path, 'r'):
|
||||
if re.search(r'\b' + test_file_name + r'\b', schedule_line):
|
||||
test_schedule = pathlib.Path(schedule_file_path).stem
|
||||
if use_whole_schedule_line:
|
||||
test_schedule_line = schedule_line
|
||||
else:
|
||||
test_schedule_line = f"test: {test_file_name}\n"
|
||||
break
|
||||
else:
|
||||
continue
|
||||
break
|
||||
|
||||
# map suitable schedule
|
||||
if not test_schedule:
|
||||
print(
|
||||
f"WARNING: Could not find any schedule for '{test_file_name}'"
|
||||
)
|
||||
sys.exit(0)
|
||||
elif "isolation" in test_schedule:
|
||||
test_schedule = 'base_isolation_schedule'
|
||||
elif "failure" in test_schedule:
|
||||
test_schedule = 'failure_base_schedule'
|
||||
elif "mx" in test_schedule:
|
||||
if use_base_schedule:
|
||||
test_schedule = 'mx_base_schedule'
|
||||
else:
|
||||
test_schedule = 'mx_minimal_schedule'
|
||||
elif test_schedule in config.ARBITRARY_SCHEDULE_NAMES:
|
||||
print(f"WARNING: Arbitrary config schedule ({test_schedule}) is not supported.")
|
||||
sys.exit(0)
|
||||
else:
|
||||
if use_base_schedule:
|
||||
test_schedule = 'base_schedule'
|
||||
else:
|
||||
test_schedule = 'minimal_schedule'
|
||||
|
||||
# copy base schedule to a temp file and append test_schedule_line
|
||||
# to be able to run tests in parallel (if test_schedule_line is a parallel group.)
|
||||
tmp_schedule_path = os.path.join(regress_dir, f"tmp_schedule_{ random.randint(1, 10000)}")
|
||||
shutil.copy2(os.path.join(regress_dir, test_schedule), tmp_schedule_path)
|
||||
with open(tmp_schedule_path, "a") as myfile:
|
||||
for i in range(args['repeat']):
|
||||
myfile.write(test_schedule_line)
|
||||
|
||||
# find suitable make recipe
|
||||
if "isolation" in test_schedule:
|
||||
make_recipe = 'check-isolation-custom-schedule'
|
||||
if "failure" in test_schedule:
|
||||
make_recipe = 'check-failure-custom-schedule'
|
||||
else:
|
||||
make_recipe = 'check-custom-schedule'
|
||||
|
||||
# prepare command to run tests
|
||||
test_command = f"make -C {regress_dir} {make_recipe} SCHEDULE='{pathlib.Path(tmp_schedule_path).stem}'"
|
||||
|
||||
# run test command n times
|
||||
try:
|
||||
print(f"Executing.. {test_command}")
|
||||
result = common.run(test_command)
|
||||
finally:
|
||||
# remove temp schedule file
|
||||
os.remove(tmp_schedule_path)
|
|
@ -17,21 +17,17 @@ tests.
|
|||
|
||||
## Reproducing a flaky test
|
||||
|
||||
### 1. Reproduce a flaky test in CI
|
||||
Before trying to fix the flakyness, it's important that you can reproduce the
|
||||
flaky test. Often it only reproduces in CI, so we have a CI job that can help
|
||||
you reproduce flakyness consistently by running the same test a lot of times.
|
||||
You can configure CI to run this job by setting the `flaky_test` and if
|
||||
necessary the possibly the `flaky_test_make` parameters.
|
||||
You can configure CI to run this job by setting the `flaky_test`
|
||||
|
||||
```diff
|
||||
flaky_test:
|
||||
type: string
|
||||
- default: ''
|
||||
+ default: 'isolation_shard_rebalancer_progress'
|
||||
flaky_test_make:
|
||||
type: string
|
||||
- default: check-minimal
|
||||
+ default: check-isolation-base
|
||||
```
|
||||
|
||||
Once you get this job to consistently fail in CI, you can continue with the next
|
||||
|
@ -40,6 +36,15 @@ with this CI job, it's almost certainly caused by running it concurrently with
|
|||
other tests. See the "Don't run test in parallel with others" section below on
|
||||
how to fix that.
|
||||
|
||||
### 2. Reproduce a flaky test in local environment
|
||||
To reproduce the flaky tests on your local environment, you can use `run-test.py [test_name]`
|
||||
script like below.
|
||||
|
||||
```bash
|
||||
src/test/regress/citus_tests/run_test.py isolation_shard_rebalancer_progress -r 1000 --use-base-schedule --use-whole-schedule-line
|
||||
```
|
||||
|
||||
The script above will try to run the whole line in the schedule file containing the test name by using the related base_schedule (rather than a minimal_schedule), 1000 times.
|
||||
|
||||
## Easy fixes
|
||||
|
||||
|
|
Loading…
Reference in New Issue