mirror of https://github.com/citusdata/citus.git
Flaky Test Detection CI Workflow (#6495)
This PR adds a new CI workflow named ```flaky-test``` to run flaky test detection on newly introduced regression tests. Co-authored-by: Jelte Fennema <github-tech@jeltef.nl>pull/6554/head
parent
190307e8d8
commit
e2a73ad8a8
|
@ -25,10 +25,12 @@ parameters:
|
||||||
flaky_test:
|
flaky_test:
|
||||||
type: string
|
type: string
|
||||||
default: ''
|
default: ''
|
||||||
flaky_test_make:
|
flaky_test_runs:
|
||||||
type: string
|
type: integer
|
||||||
default: check-minimal
|
default: 1600
|
||||||
|
skip_flaky_tests:
|
||||||
|
type: boolean
|
||||||
|
default: false
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
|
@ -616,17 +618,18 @@ jobs:
|
||||||
image_tag:
|
image_tag:
|
||||||
description: 'docker image tag to use'
|
description: 'docker image tag to use'
|
||||||
type: string
|
type: string
|
||||||
make:
|
|
||||||
description: 'make target'
|
|
||||||
type: string
|
|
||||||
default: check-minimal
|
|
||||||
test:
|
test:
|
||||||
description: 'the test that should be run multiple times'
|
description: 'the test file path that should be run multiple times'
|
||||||
type: string
|
type: string
|
||||||
|
default: ''
|
||||||
runs:
|
runs:
|
||||||
description: 'number of times that the test should be run in total'
|
description: 'number of times that the test should be run in total'
|
||||||
type: integer
|
type: integer
|
||||||
default: 1600
|
default: 256
|
||||||
|
skip:
|
||||||
|
description: 'A flag to bypass flaky test detection.'
|
||||||
|
type: boolean
|
||||||
|
default: false
|
||||||
docker:
|
docker:
|
||||||
- image: '<< parameters.image >>:<< parameters.image_tag >><< pipeline.parameters.image_suffix >>'
|
- image: '<< parameters.image >>:<< parameters.image_tag >><< pipeline.parameters.image_suffix >>'
|
||||||
working_directory: /home/circleci/project
|
working_directory: /home/circleci/project
|
||||||
|
@ -635,6 +638,33 @@ jobs:
|
||||||
- checkout
|
- checkout
|
||||||
- attach_workspace:
|
- attach_workspace:
|
||||||
at: .
|
at: .
|
||||||
|
- run:
|
||||||
|
name: 'Detect regression tests need to be ran'
|
||||||
|
command: |
|
||||||
|
skip=<< parameters.skip >>
|
||||||
|
if [ "$skip" = true ]; then
|
||||||
|
echo "Skipping flaky test detection."
|
||||||
|
circleci-agent step halt
|
||||||
|
fi
|
||||||
|
|
||||||
|
testForDebugging="<< parameters.test >>"
|
||||||
|
|
||||||
|
if [ -z "$testForDebugging" ]; then
|
||||||
|
detected_changes=$(git diff origin/HEAD --name-only --diff-filter=AM | (grep 'src/test/regress/sql/.*.sql\|src/test/regress/spec/.*.spec' || true))
|
||||||
|
tests=${detected_changes}
|
||||||
|
else
|
||||||
|
tests=$testForDebugging;
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -z "$tests" ]; then
|
||||||
|
echo "No test found."
|
||||||
|
circleci-agent step halt
|
||||||
|
else
|
||||||
|
echo "Detected tests " $tests
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo export tests=\""$tests"\" >> "$BASH_ENV"
|
||||||
|
source "$BASH_ENV"
|
||||||
- run:
|
- run:
|
||||||
name: 'Install Extension'
|
name: 'Install Extension'
|
||||||
command: |
|
command: |
|
||||||
|
@ -651,7 +681,12 @@ jobs:
|
||||||
- run:
|
- run:
|
||||||
name: 'Run minimal tests'
|
name: 'Run minimal tests'
|
||||||
command: |
|
command: |
|
||||||
gosu circleci make -C src/test/regress << parameters.make >> EXTRA_TESTS="$(for i in $(seq << parameters.runs >> | circleci tests split); do echo -n '<< parameters.test >> ' ; done)"
|
tests_array=($tests)
|
||||||
|
for test in "${tests_array[@]}"
|
||||||
|
do
|
||||||
|
echo $test
|
||||||
|
gosu circleci src/test/regress/citus_tests/run_test.py --path $test --repeat << parameters.runs >> --use-base-schedule --use-whole-schedule-line
|
||||||
|
done
|
||||||
no_output_timeout: 2m
|
no_output_timeout: 2m
|
||||||
- run:
|
- run:
|
||||||
name: 'Regressions'
|
name: 'Regressions'
|
||||||
|
@ -691,6 +726,7 @@ jobs:
|
||||||
- store_artifacts:
|
- store_artifacts:
|
||||||
name: 'Save worker2 log'
|
name: 'Save worker2 log'
|
||||||
path: src/test/regress/tmp_check/worker.57638/log
|
path: src/test/regress/tmp_check/worker.57638/log
|
||||||
|
|
||||||
upload-coverage:
|
upload-coverage:
|
||||||
docker:
|
docker:
|
||||||
- image: 'citus/exttester:<< pipeline.parameters.pg15_version >><< pipeline.parameters.image_suffix >>'
|
- image: 'citus/exttester:<< pipeline.parameters.pg15_version >><< pipeline.parameters.image_suffix >>'
|
||||||
|
@ -719,8 +755,8 @@ workflows:
|
||||||
pg_major: 15
|
pg_major: 15
|
||||||
image_tag: '<< pipeline.parameters.pg15_version >>'
|
image_tag: '<< pipeline.parameters.pg15_version >>'
|
||||||
requires: [build-flaky-15]
|
requires: [build-flaky-15]
|
||||||
make: '<< pipeline.parameters.flaky_test_make >>'
|
|
||||||
test: '<< pipeline.parameters.flaky_test >>'
|
test: '<< pipeline.parameters.flaky_test >>'
|
||||||
|
runs: << pipeline.parameters.flaky_test_runs >>
|
||||||
|
|
||||||
build_and_test:
|
build_and_test:
|
||||||
when:
|
when:
|
||||||
|
@ -1209,3 +1245,9 @@ workflows:
|
||||||
branches:
|
branches:
|
||||||
only:
|
only:
|
||||||
- /tpcc_benchmark\/.*/ # match with tpcc_benchmark/ prefix
|
- /tpcc_benchmark\/.*/ # match with tpcc_benchmark/ prefix
|
||||||
|
- test-flakyness:
|
||||||
|
name: 'test-15_flaky'
|
||||||
|
pg_major: 15
|
||||||
|
image_tag: '<< pipeline.parameters.pg15_version >>'
|
||||||
|
requires: [build-15]
|
||||||
|
skip: << pipeline.parameters.skip_flaky_tests >>
|
||||||
|
|
|
@ -124,6 +124,17 @@ check-minimal-mx: all
|
||||||
$(pg_regress_multi_check) --load-extension=citus \
|
$(pg_regress_multi_check) --load-extension=citus \
|
||||||
-- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/mx_minimal_schedule $(EXTRA_TESTS)
|
-- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/mx_minimal_schedule $(EXTRA_TESTS)
|
||||||
|
|
||||||
|
check-custom-schedule: all
|
||||||
|
$(pg_regress_multi_check) --load-extension=citus \
|
||||||
|
-- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS)
|
||||||
|
|
||||||
|
check-failure-custom-schedule: all
|
||||||
|
$(pg_regress_multi_check) --load-extension=citus --mitmproxy \
|
||||||
|
-- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS)
|
||||||
|
|
||||||
|
check-isolation-custom-schedule: all $(isolation_test_files)
|
||||||
|
$(pg_regress_multi_check) --load-extension=citus --isolationtester \
|
||||||
|
-- $(MULTI_REGRESS_OPTS) --inputdir=$(citus_abs_srcdir)/build --schedule=$(citus_abs_srcdir)/$(SCHEDULE) $(EXTRA_TESTS)
|
||||||
|
|
||||||
check-empty: all
|
check-empty: all
|
||||||
$(pg_regress_multi_check) --load-extension=citus \
|
$(pg_regress_multi_check) --load-extension=citus \
|
||||||
|
|
|
@ -29,7 +29,27 @@ Take a look at the makefile for a list of all the testing targets.
|
||||||
### Running a specific test
|
### Running a specific test
|
||||||
|
|
||||||
Often you want to run a specific test and don't want to run everything. You can
|
Often you want to run a specific test and don't want to run everything. You can
|
||||||
use one of the following commands to do so:
|
simply use `run_test.py [test_name]` script like below in that case. It detects the test schedule
|
||||||
|
and make target to run the given test.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
src/test/regress/citus_tests/run_test.py multi_utility_warnings
|
||||||
|
```
|
||||||
|
You can pass `--repeat` or `r` parameter to run the given test for multiple times.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
src/test/regress/citus_tests/run_test.py multi_utility_warnings -r 1000
|
||||||
|
```
|
||||||
|
|
||||||
|
To force the script to use base schedules rather than minimal ones, you can
|
||||||
|
pass `-b` or `--use-base-schedule`.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
src/test/regress/citus_tests/run_test.py coordinator_shouldhaveshards -r 1000 --use-base-schedule
|
||||||
|
```
|
||||||
|
|
||||||
|
If you would like to run a specific test on a certain target you can use one
|
||||||
|
of the following commands to do so:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# If your tests needs almost no setup you can use check-minimal
|
# If your tests needs almost no setup you can use check-minimal
|
||||||
|
@ -42,6 +62,7 @@ make install -j9 && make -C src/test/regress/ check-base EXTRA_TESTS='with_prepa
|
||||||
make install -j9 && make -C src/test/regress/ check-base EXTRA_TESTS='add_coordinator coordinator_shouldhaveshards'
|
make install -j9 && make -C src/test/regress/ check-base EXTRA_TESTS='add_coordinator coordinator_shouldhaveshards'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
## Normalization
|
## Normalization
|
||||||
|
|
||||||
The output of tests is sadly not completely predictable. Still we want to
|
The output of tests is sadly not completely predictable. Still we want to
|
||||||
|
|
|
@ -295,3 +295,15 @@ def initialize_citus_cluster(bindir, datadir, settings, config):
|
||||||
if config.add_coordinator_to_metadata:
|
if config.add_coordinator_to_metadata:
|
||||||
add_coordinator_to_metadata(bindir, config.coordinator_port())
|
add_coordinator_to_metadata(bindir, config.coordinator_port())
|
||||||
config.setup_steps()
|
config.setup_steps()
|
||||||
|
|
||||||
|
def eprint(*args, **kwargs):
|
||||||
|
"""eprint prints to stderr"""
|
||||||
|
|
||||||
|
print(*args, file=sys.stderr, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def run(command, *args, shell=True, **kwargs):
|
||||||
|
"""run runs the given command and prints it to stderr"""
|
||||||
|
|
||||||
|
eprint(f"+ {command} ")
|
||||||
|
return subprocess.run(command, *args, check=True, shell=shell, **kwargs)
|
||||||
|
|
|
@ -0,0 +1,115 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import pathlib
|
||||||
|
from glob import glob
|
||||||
|
import argparse
|
||||||
|
import shutil
|
||||||
|
import random
|
||||||
|
import re
|
||||||
|
import common
|
||||||
|
import config
|
||||||
|
|
||||||
|
args = argparse.ArgumentParser()
|
||||||
|
args.add_argument("test_name", help="Test name (must be included in a schedule.)", nargs='?')
|
||||||
|
args.add_argument("-p", "--path", required=False, help="Relative path for test file (must have a .sql or .spec extension)", type=pathlib.Path)
|
||||||
|
args.add_argument("-r", "--repeat", help="Number of test to run", type=int, default=1)
|
||||||
|
args.add_argument("-b", "--use-base-schedule", required=False, help="Choose base-schedules rather than minimal-schedules", action='store_true')
|
||||||
|
args.add_argument("-w", "--use-whole-schedule-line", required=False, help="Use the whole line found in related schedule", action='store_true')
|
||||||
|
|
||||||
|
args = vars(args.parse_args())
|
||||||
|
|
||||||
|
regress_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
|
||||||
|
test_file_path = args['path']
|
||||||
|
test_file_name = args['test_name']
|
||||||
|
use_base_schedule = args['use_base_schedule']
|
||||||
|
use_whole_schedule_line = args['use_whole_schedule_line']
|
||||||
|
|
||||||
|
if not (test_file_name or test_file_path):
|
||||||
|
print(f"FATAL: No test given.")
|
||||||
|
sys.exit(2)
|
||||||
|
|
||||||
|
|
||||||
|
if test_file_path:
|
||||||
|
test_file_path = os.path.join(os.getcwd(), args['path'])
|
||||||
|
|
||||||
|
if not os.path.isfile(test_file_path):
|
||||||
|
print(f"ERROR: test file '{test_file_path}' does not exist")
|
||||||
|
sys.exit(2)
|
||||||
|
|
||||||
|
test_file_extension = pathlib.Path(test_file_path).suffix
|
||||||
|
test_file_name = pathlib.Path(test_file_path).stem
|
||||||
|
|
||||||
|
if not test_file_extension in '.spec.sql':
|
||||||
|
print(
|
||||||
|
"ERROR: Unrecognized test extension. Valid extensions are: .sql and .spec"
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
test_schedule = ''
|
||||||
|
|
||||||
|
# find related schedule
|
||||||
|
for schedule_file_path in sorted(glob(os.path.join(regress_dir, "*_schedule"))):
|
||||||
|
for schedule_line in open(schedule_file_path, 'r'):
|
||||||
|
if re.search(r'\b' + test_file_name + r'\b', schedule_line):
|
||||||
|
test_schedule = pathlib.Path(schedule_file_path).stem
|
||||||
|
if use_whole_schedule_line:
|
||||||
|
test_schedule_line = schedule_line
|
||||||
|
else:
|
||||||
|
test_schedule_line = f"test: {test_file_name}\n"
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
break
|
||||||
|
|
||||||
|
# map suitable schedule
|
||||||
|
if not test_schedule:
|
||||||
|
print(
|
||||||
|
f"WARNING: Could not find any schedule for '{test_file_name}'"
|
||||||
|
)
|
||||||
|
sys.exit(0)
|
||||||
|
elif "isolation" in test_schedule:
|
||||||
|
test_schedule = 'base_isolation_schedule'
|
||||||
|
elif "failure" in test_schedule:
|
||||||
|
test_schedule = 'failure_base_schedule'
|
||||||
|
elif "mx" in test_schedule:
|
||||||
|
if use_base_schedule:
|
||||||
|
test_schedule = 'mx_base_schedule'
|
||||||
|
else:
|
||||||
|
test_schedule = 'mx_minimal_schedule'
|
||||||
|
elif test_schedule in config.ARBITRARY_SCHEDULE_NAMES:
|
||||||
|
print(f"WARNING: Arbitrary config schedule ({test_schedule}) is not supported.")
|
||||||
|
sys.exit(0)
|
||||||
|
else:
|
||||||
|
if use_base_schedule:
|
||||||
|
test_schedule = 'base_schedule'
|
||||||
|
else:
|
||||||
|
test_schedule = 'minimal_schedule'
|
||||||
|
|
||||||
|
# copy base schedule to a temp file and append test_schedule_line
|
||||||
|
# to be able to run tests in parallel (if test_schedule_line is a parallel group.)
|
||||||
|
tmp_schedule_path = os.path.join(regress_dir, f"tmp_schedule_{ random.randint(1, 10000)}")
|
||||||
|
shutil.copy2(os.path.join(regress_dir, test_schedule), tmp_schedule_path)
|
||||||
|
with open(tmp_schedule_path, "a") as myfile:
|
||||||
|
for i in range(args['repeat']):
|
||||||
|
myfile.write(test_schedule_line)
|
||||||
|
|
||||||
|
# find suitable make recipe
|
||||||
|
if "isolation" in test_schedule:
|
||||||
|
make_recipe = 'check-isolation-custom-schedule'
|
||||||
|
if "failure" in test_schedule:
|
||||||
|
make_recipe = 'check-failure-custom-schedule'
|
||||||
|
else:
|
||||||
|
make_recipe = 'check-custom-schedule'
|
||||||
|
|
||||||
|
# prepare command to run tests
|
||||||
|
test_command = f"make -C {regress_dir} {make_recipe} SCHEDULE='{pathlib.Path(tmp_schedule_path).stem}'"
|
||||||
|
|
||||||
|
# run test command n times
|
||||||
|
try:
|
||||||
|
print(f"Executing.. {test_command}")
|
||||||
|
result = common.run(test_command)
|
||||||
|
finally:
|
||||||
|
# remove temp schedule file
|
||||||
|
os.remove(tmp_schedule_path)
|
|
@ -17,21 +17,17 @@ tests.
|
||||||
|
|
||||||
## Reproducing a flaky test
|
## Reproducing a flaky test
|
||||||
|
|
||||||
|
### 1. Reproduce a flaky test in CI
|
||||||
Before trying to fix the flakyness, it's important that you can reproduce the
|
Before trying to fix the flakyness, it's important that you can reproduce the
|
||||||
flaky test. Often it only reproduces in CI, so we have a CI job that can help
|
flaky test. Often it only reproduces in CI, so we have a CI job that can help
|
||||||
you reproduce flakyness consistently by running the same test a lot of times.
|
you reproduce flakyness consistently by running the same test a lot of times.
|
||||||
You can configure CI to run this job by setting the `flaky_test` and if
|
You can configure CI to run this job by setting the `flaky_test`
|
||||||
necessary the possibly the `flaky_test_make` parameters.
|
|
||||||
|
|
||||||
```diff
|
```diff
|
||||||
flaky_test:
|
flaky_test:
|
||||||
type: string
|
type: string
|
||||||
- default: ''
|
- default: ''
|
||||||
+ default: 'isolation_shard_rebalancer_progress'
|
+ default: 'isolation_shard_rebalancer_progress'
|
||||||
flaky_test_make:
|
|
||||||
type: string
|
|
||||||
- default: check-minimal
|
|
||||||
+ default: check-isolation-base
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Once you get this job to consistently fail in CI, you can continue with the next
|
Once you get this job to consistently fail in CI, you can continue with the next
|
||||||
|
@ -40,6 +36,15 @@ with this CI job, it's almost certainly caused by running it concurrently with
|
||||||
other tests. See the "Don't run test in parallel with others" section below on
|
other tests. See the "Don't run test in parallel with others" section below on
|
||||||
how to fix that.
|
how to fix that.
|
||||||
|
|
||||||
|
### 2. Reproduce a flaky test in local environment
|
||||||
|
To reproduce the flaky tests on your local environment, you can use `run-test.py [test_name]`
|
||||||
|
script like below.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
src/test/regress/citus_tests/run_test.py isolation_shard_rebalancer_progress -r 1000 --use-base-schedule --use-whole-schedule-line
|
||||||
|
```
|
||||||
|
|
||||||
|
The script above will try to run the whole line in the schedule file containing the test name by using the related base_schedule (rather than a minimal_schedule), 1000 times.
|
||||||
|
|
||||||
## Easy fixes
|
## Easy fixes
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue