mirror of
https://github.com/micropython/micropython.git
synced 2026-04-06 00:50:15 +02:00
tests/run-tests.py: Ignore known-flaky test failures.
Reclassify failures of tests listed in flaky_tests_to_ignore as "ignored" instead of retrying them. Ignored tests still run and their output is reported, but they don't affect the exit code. The ci.sh --exclude lists for these tests are removed so they run normally. Signed-off-by: Andrew Leech <andrew.leech@planet-innovation.com>
This commit is contained in:
committed by
Damien George
parent
ad054fc520
commit
43199278eb
@@ -31,6 +31,7 @@ from test_utils import (
|
||||
get_test_instance,
|
||||
prepare_script_for_target,
|
||||
create_test_report,
|
||||
FLAKY_REASON_PREFIX,
|
||||
)
|
||||
|
||||
RV32_ARCH_FLAGS = {
|
||||
@@ -193,6 +194,23 @@ platform_tests_to_skip = {
|
||||
),
|
||||
}
|
||||
|
||||
# Tests with known intermittent failures. These tests still run, but failures
|
||||
# are reclassified as "ignored" instead of "fail" so they don't affect the CI
|
||||
# exit code. Paths are relative to the tests/ directory (must match test_file
|
||||
# format used by run_one_test, which normalises backslashes to forward slashes).
|
||||
#
|
||||
# Values are (reason, platforms) tuples where platforms is None (all platforms)
|
||||
# or a tuple of sys.platform strings to restrict ignoring to those platforms.
|
||||
flaky_tests_to_ignore = {
|
||||
"thread/thread_gc1.py": ("GC race condition", None),
|
||||
"thread/stress_schedule.py": ("intermittent crash under QEMU", None),
|
||||
"thread/stress_recurse.py": ("stack overflow under emulation", None),
|
||||
"thread/stress_heap.py": ("flaky on macOS", ("darwin",)),
|
||||
"cmdline/repl_lock.py": ("REPL timing under QEMU", None),
|
||||
"cmdline/repl_cont.py": ("REPL escaping on macOS", ("darwin",)),
|
||||
"extmod/time_time_ns.py": ("CI runner clock precision", None),
|
||||
}
|
||||
|
||||
# These tests don't test float explicitly but rather use it to perform the test.
|
||||
tests_requiring_float = (
|
||||
"extmod/asyncio_basic.py",
|
||||
@@ -1062,6 +1080,16 @@ def run_tests(pyb, tests, args, result_dir, num_threads=1):
|
||||
print(line)
|
||||
sys.exit(2)
|
||||
|
||||
# Reclassify known-flaky test failures as ignored.
|
||||
# Safe to mutate: thread pool has joined.
|
||||
results = test_results.value
|
||||
for i, r in enumerate(results):
|
||||
if r[1] == "fail":
|
||||
reason, platforms = flaky_tests_to_ignore.get(r[0], (None, None))
|
||||
if reason is not None:
|
||||
if platforms is None or sys.platform in platforms:
|
||||
results[i] = (r[0], "ignored", "{}: {}".format(FLAKY_REASON_PREFIX, reason))
|
||||
|
||||
# Return test results.
|
||||
return test_results.value, testcase_count.value
|
||||
|
||||
|
||||
@@ -22,6 +22,9 @@ def base_path(*p):
|
||||
sys.path.append(base_path("../tools"))
|
||||
import pyboard
|
||||
|
||||
# Prefix used by run-tests.py to tag known-flaky test results.
|
||||
FLAKY_REASON_PREFIX = "flaky"
|
||||
|
||||
# File with the test results.
|
||||
_RESULTS_FILE = "_results.json"
|
||||
|
||||
@@ -313,11 +316,12 @@ def create_test_report(args, test_results, testcase_count=None):
|
||||
r for r in test_results if r[1] == "skip" and r[2] == "too large"
|
||||
)
|
||||
failed_tests = list(r for r in test_results if r[1] == "fail")
|
||||
ignored_tests = list(r for r in test_results if r[1] == "ignored")
|
||||
dry_run = getattr(args, "dry_run", False)
|
||||
if dry_run:
|
||||
found_tests = list(r for r in test_results if r[1] == "found")
|
||||
|
||||
num_tests_performed = len(passed_tests) + len(failed_tests)
|
||||
num_tests_performed = len(passed_tests) + len(failed_tests) + len(ignored_tests)
|
||||
|
||||
if dry_run:
|
||||
print("{} tests found".format(len(found_tests)))
|
||||
@@ -329,6 +333,14 @@ def create_test_report(args, test_results, testcase_count=None):
|
||||
|
||||
print("{} tests passed".format(len(passed_tests)))
|
||||
|
||||
if len(ignored_tests) > 0:
|
||||
print(
|
||||
"{} tests had known-flaky failures (ignored): {}".format(
|
||||
len(ignored_tests),
|
||||
" ".join("{} [{}]".format(t[0], t[2]) for t in ignored_tests),
|
||||
)
|
||||
)
|
||||
|
||||
if len(skipped_tests) > 0:
|
||||
print(
|
||||
"{} tests skipped: {}".format(
|
||||
@@ -365,6 +377,8 @@ def create_test_report(args, test_results, testcase_count=None):
|
||||
"results": list(test for test in test_results),
|
||||
# A list of failed tests. This is deprecated, use the "results" above instead.
|
||||
"failed_tests": [test[0] for test in failed_tests],
|
||||
# A list of known-flaky tests whose failures were ignored.
|
||||
"ignored_tests": [test[0] for test in ignored_tests],
|
||||
},
|
||||
f,
|
||||
default=to_json,
|
||||
|
||||
18
tools/ci.sh
18
tools/ci.sh
@@ -905,9 +905,7 @@ function ci_unix_macos_run_tests {
|
||||
# Issues with macOS tests:
|
||||
# - float_parse and float_parse_doubleprec parse/print floats out by a few mantissa bits
|
||||
# - ffi_callback crashes for an unknown reason
|
||||
# - thread/stress_heap.py is flaky
|
||||
# - thread/thread_gc1.py is flaky
|
||||
(cd tests && MICROPY_MICROPYTHON=../ports/unix/build-standard/micropython ./run-tests.py --exclude '(float_parse|float_parse_doubleprec|ffi_callback|thread/stress_heap|thread/thread_gc1).py')
|
||||
(cd tests && MICROPY_MICROPYTHON=../ports/unix/build-standard/micropython ./run-tests.py --exclude '(float_parse|float_parse_doubleprec|ffi_callback).py')
|
||||
}
|
||||
|
||||
function ci_unix_qemu_mips_setup {
|
||||
@@ -927,10 +925,8 @@ function ci_unix_qemu_mips_build {
|
||||
function ci_unix_qemu_mips_run_tests {
|
||||
# Issues with MIPS tests:
|
||||
# - thread/stress_aes.py takes around 90 seconds
|
||||
# - thread/stress_recurse.py is flaky
|
||||
# - thread/thread_gc1.py is flaky
|
||||
file ./ports/unix/build-coverage/micropython
|
||||
(cd tests && MICROPY_MICROPYTHON=../ports/unix/build-coverage/micropython MICROPY_TEST_TIMEOUT=180 ./run-tests.py --exclude 'thread/stress_recurse.py|thread/thread_gc1.py')
|
||||
(cd tests && MICROPY_MICROPYTHON=../ports/unix/build-coverage/micropython MICROPY_TEST_TIMEOUT=180 ./run-tests.py)
|
||||
}
|
||||
|
||||
function ci_unix_qemu_arm_setup {
|
||||
@@ -950,10 +946,8 @@ function ci_unix_qemu_arm_build {
|
||||
function ci_unix_qemu_arm_run_tests {
|
||||
# Issues with ARM tests:
|
||||
# - thread/stress_aes.py takes around 70 seconds
|
||||
# - thread/stress_recurse.py is flaky
|
||||
# - thread/thread_gc1.py is flaky
|
||||
file ./ports/unix/build-coverage/micropython
|
||||
(cd tests && MICROPY_MICROPYTHON=../ports/unix/build-coverage/micropython MICROPY_TEST_TIMEOUT=90 ./run-tests.py --exclude 'thread/stress_recurse.py|thread/thread_gc1.py')
|
||||
(cd tests && MICROPY_MICROPYTHON=../ports/unix/build-coverage/micropython MICROPY_TEST_TIMEOUT=90 ./run-tests.py)
|
||||
}
|
||||
|
||||
function ci_unix_qemu_riscv64_setup {
|
||||
@@ -976,12 +970,10 @@ function ci_unix_qemu_riscv64_build {
|
||||
|
||||
function ci_unix_qemu_riscv64_run_tests {
|
||||
# Issues with RISCV-64 tests:
|
||||
# - thread/stress_aes.py takes around 180 seconds
|
||||
# - thread/stress_recurse.py is flaky
|
||||
# - thread/thread_gc1.py is flaky
|
||||
# - thread/stress_aes.py takes around 180 seconds, so exclude it to keep execution time down
|
||||
file ./ports/unix/build-coverage/micropython
|
||||
pushd tests
|
||||
MICROPY_MICROPYTHON=../ports/unix/build-coverage/micropython MICROPY_TEST_TIMEOUT=200 ./run-tests.py --exclude 'thread/stress_recurse.py|thread/thread_gc1.py'
|
||||
MICROPY_MICROPYTHON=../ports/unix/build-coverage/micropython ./run-tests.py --exclude 'thread/stress_aes.py'
|
||||
MICROPY_MICROPYTHON=../ports/unix/build-coverage/micropython ./run-natmodtests.py extmod/btree*.py extmod/deflate*.py extmod/framebuf*.py extmod/heapq*.py extmod/random_basic*.py extmod/re*.py
|
||||
popd
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user