Skip to content

Commit 0b127ff

Browse files
authored
Add --max-failures <n> option in test harness (#25569)
Add --max-failures <n> option in test harness to generalize on the --failfast option (which is effectively a '--max-failures 0' mode). This allows running the harness while allowing a couple of failures. I find that when using `--failfast`, aborting on the first error is sometimes a bit too eager, i.e. there might be a flaky test that triggers the abort, or there might be an error that causes more than one test to fail. Examining test harness results, that interpreting a small handful of failures (1,2,3...5) is still doable one-by-one, but when the number of failures grows to, say, > 5, the typically I want to deal with the first couple of errors only, and re-run the harness after that, assuming the rest of the errors would be from the same root cause. So this enables running with, say, `--max-failures 5`, and get info on the first handful of errors, and ignore if any more errors occur. This helps retain the failfast-like behavior while producing a bit more info, and being a bit more tolerant to instabilities.
1 parent 3b36064 commit 0b127ff

File tree

2 files changed

+36
-24
lines changed

2 files changed

+36
-24
lines changed

test/parallel_testsuite.py

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,17 @@ def cap_max_workers_in_pool(max_workers, is_browser):
4444
return max_workers
4545

4646

47-
def run_test(test, failfast_event, lock, progress_counter, num_tests):
48-
# If failfast mode is in effect and any of the tests have failed,
49-
# and then we should abort executing further tests immediately.
50-
if failfast_event and failfast_event.is_set():
47+
def run_test(test, allowed_failures_counter, lock, progress_counter, num_tests):
48+
# If we have exceeded the number of allowed failures during the test run,
49+
# abort executing further tests immediately.
50+
if allowed_failures_counter and allowed_failures_counter.value < 0:
5151
return None
5252

53+
def test_failed():
54+
if allowed_failures_counter is not None:
55+
with lock:
56+
allowed_failures_counter.value -= 1
57+
5358
olddir = os.getcwd()
5459
result = BufferedParallelTestResult(lock, progress_counter, num_tests)
5560
temp_dir = tempfile.mkdtemp(prefix='emtest_')
@@ -61,14 +66,13 @@ def run_test(test, failfast_event, lock, progress_counter, num_tests):
6166
test(result)
6267

6368
# Alert all other multiprocess pool runners that they need to stop executing further tests.
64-
if failfast_event is not None and result.test_result not in ['success', 'skipped']:
65-
failfast_event.set()
69+
if result.test_result not in ['success', 'skipped']:
70+
test_failed()
6671
except unittest.SkipTest as e:
6772
result.addSkip(test, e)
6873
except Exception as e:
6974
result.addError(test, e)
70-
if failfast_event is not None:
71-
failfast_event.set()
75+
test_failed()
7276
# Before attempting to delete the tmp dir make sure the current
7377
# working directory is not within it.
7478
os.chdir(olddir)
@@ -97,7 +101,7 @@ class ParallelTestSuite(unittest.BaseTestSuite):
97101
def __init__(self, max_cores, options):
98102
super().__init__()
99103
self.max_cores = max_cores
100-
self.failfast = options.failfast
104+
self.max_failures = options.max_failures
101105
self.failing_and_slow_first = options.failing_and_slow_first
102106

103107
def addTest(self, test):
@@ -126,17 +130,17 @@ def run(self, result):
126130
initargs=(worker_id_counter, worker_id_lock),
127131
) as pool:
128132
if python_multiprocessing_structures_are_buggy():
129-
# When multuprocessing shared structures are buggy we don't support failfast
133+
# When multiprocessing shared structures are buggy we don't support failfast
130134
# or the progress bar.
131-
failfast_event = progress_counter = lock = None
132-
if self.failfast:
133-
errlog('The version of python being used is not compatible with --failfast')
135+
allowed_failures_counter = progress_counter = lock = None
136+
if self.max_failures < 2**31 - 1:
137+
errlog('The version of python being used is not compatible with --failfast and --max-failures options. See https://github.com/python/cpython/issues/71936')
134138
sys.exit(1)
135139
else:
136-
failfast_event = manager.Event() if self.failfast else None
140+
allowed_failures_counter = manager.Value('i', self.max_failures)
137141
progress_counter = manager.Value('i', 0)
138142
lock = manager.Lock()
139-
results = pool.starmap(run_test, ((t, failfast_event, lock, progress_counter, len(tests)) for t in tests), chunksize=1)
143+
results = pool.starmap(run_test, ((t, allowed_failures_counter, lock, progress_counter, len(tests)) for t in tests), chunksize=1)
140144
# Send a task to each worker to tear down the browser and server. This
141145
# relies on the implementation detail in the worker pool that all workers
142146
# are cycled through once.
@@ -145,9 +149,8 @@ def run(self, result):
145149
if num_tear_downs != use_cores:
146150
errlog(f'Expected {use_cores} teardowns, got {num_tear_downs}')
147151

148-
# Filter out the None results which can occur in failfast mode.
149-
if self.failfast:
150-
results = [r for r in results if r is not None]
152+
# Filter out the None results which can occur if # of allowed errors was exceeded and the harness aborted.
153+
results = [r for r in results if r is not None]
151154

152155
if self.failing_and_slow_first:
153156
previous_test_run_results = common.load_previous_test_run_results()

test/runner.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,7 @@ def error_on_legacy_suite_names(args):
275275
# order to run the tests in. Generally this is slowest-first to maximize
276276
# parallelization, but if running with fail-fast, then the tests with recent
277277
# known failure frequency are run first, followed by slowest first.
278-
def create_test_run_sorter(failfast):
278+
def create_test_run_sorter(sort_failing_tests_at_front):
279279
previous_test_run_results = common.load_previous_test_run_results()
280280

281281
def read_approx_fail_freq(test_name):
@@ -297,8 +297,8 @@ def sort_tests_failing_and_slowest_first_comparator(x, y):
297297
y = str(y)
298298

299299
# Look at the number of times this test has failed, and order by failures count first
300-
# Only do this in --failfast, if we are looking to fail early. (otherwise sorting by last test run duration is more productive)
301-
if failfast:
300+
# Only do this if we are looking to fail early. (otherwise sorting by last test run duration is more productive)
301+
if sort_failing_tests_at_front:
302302
x_fail_freq = read_approx_fail_freq(x)
303303
y_fail_freq = read_approx_fail_freq(y)
304304
if x_fail_freq != y_fail_freq:
@@ -370,7 +370,7 @@ def load_test_suites(args, modules, options):
370370
tests = flattened_tests(loaded_tests)
371371
suite = suite_for_module(m, tests, options)
372372
if options.failing_and_slow_first:
373-
tests = sorted(tests, key=cmp_to_key(create_test_run_sorter(options.failfast)))
373+
tests = sorted(tests, key=cmp_to_key(create_test_run_sorter(options.max_failures < len(tests) / 2)))
374374
for test in tests:
375375
if not found_start:
376376
# Skip over tests until we find the start
@@ -480,7 +480,8 @@ def parse_args():
480480
parser.add_argument('--browser-auto-config', type=bool, default=True,
481481
help='Use the default CI browser configuration.')
482482
parser.add_argument('tests', nargs='*')
483-
parser.add_argument('--failfast', action='store_true')
483+
parser.add_argument('--failfast', action='store_true', help='If true, test run will abort on first failed test.')
484+
parser.add_argument('--max-failures', type=int, default=2**31 - 1, help='If specified, test run will abort after N failed tests.')
484485
parser.add_argument('--failing-and-slow-first', action='store_true', help='Run failing tests first, then sorted by slowest first. Combine with --failfast for fast fail-early CI runs.')
485486
parser.add_argument('--start-at', metavar='NAME', help='Skip all tests up until <NAME>')
486487
parser.add_argument('--continue', dest='_continue', action='store_true',
@@ -492,7 +493,15 @@ def parse_args():
492493
parser.add_argument('--repeat', type=int, default=1,
493494
help='Repeat each test N times (default: 1).')
494495
parser.add_argument('--bell', action='store_true', help='Play a sound after the test suite finishes.')
495-
return parser.parse_args()
496+
497+
options = parser.parse_args()
498+
499+
if options.failfast:
500+
if options.max_failures != 0:
501+
utils.exit_with_error('--failfast and --max-failures are mutually exclusive!')
502+
options.max_failures = 0
503+
504+
return options
496505

497506

498507
def configure():

0 commit comments

Comments
 (0)