Improve the SingleLineTestResult and integrate with parallel_testsuite (#25761)

sbc100 · web-flow · commit a50094248860 · 2025-11-10T16:46:35.000-08:00
Now the parallel test suite now longer needs its own reporting logic it
can just share use the existing Runner/Result which now supports the
color and single long output formats.

By default we now buffer test output and use the single line test
runner. Adding `-v` will disabled this and give multi-line non-buffered
output. Adding another `-v` will enable logging in the test framework
(same as setting EMTEST_VERBOSE)
diff --git a/test/color_runner.py b/test/color_runner.py
@@ -35,13 +35,37 @@ def _restoreStdout(self):
 class ProgressMixin:
   test_count = 0
   progress_counter = 0
+  fail_count = 0
+
+  def addError(self, test, err):
+    self.fail_count += 1
+    super().addError(test, err)
+
+  def addFailure(self, test, err):
+    self.fail_count += 1
+    super().addFailure(test, err)
+
+  def addUnexpectedSuccess(self, test):
+    self.fail_count += 1
+    super().addUnexpectedSuccess(test)
+
+  def writeProgressPrefix(self):
+    if self.showAll:
+      base_progress = f'[{self.progress_counter}/{self.test_count}'
+      rtn = len(base_progress)
+      progress = with_color(CYAN, base_progress)
+      if self.fail_count:
+        fail_str = f'-{self.fail_count}'
+        progress += with_color(RED, fail_str)
+        rtn += len(fail_str)
+      progress += with_color(CYAN, '] ')
+      self.stream.write(progress)
+      return rtn + 2
+    return 0
 
   def startTest(self, test):
-    assert self.test_count > 0
     self.progress_counter += 1
-    if self.showAll:
-      progress = f'[{self.progress_counter}/{self.test_count}] '
-      self.stream.write(with_color(CYAN, progress))
+    self.writeProgressPrefix()
     super().startTest(test)
 
 
@@ -62,6 +86,9 @@ class ColorTextRunner(unittest.TextTestRunner):
   """Subclass of TextTestRunner that uses ColorTextResult"""
   resultclass = ColorTextResult # type: ignore
 
+  def __init__(self, *args, **kwargs):
+    super().__init__(*args, verbosity=2, **kwargs)
+
   def _makeResult(self):
     result = super()._makeResult()
     result.test_count = self.test_count
diff --git a/test/parallel_testsuite.py b/test/parallel_testsuite.py
@@ -13,10 +13,10 @@
 
 import browser_common
 import common
+from color_runner import BufferingMixin
 from common import errlog
 
 from tools import emprofile, utils
-from tools.colored_logger import CYAN, GREEN, RED, with_color
 from tools.utils import WINDOWS
 
 EMTEST_VISUALIZE = os.getenv('EMTEST_VISUALIZE')
@@ -66,6 +66,10 @@ def run_test(args):
   test.set_temp_dir(temp_dir)
   try:
     test(result)
+  except KeyboardInterrupt:
+    # In case of KeyboardInterrupt do not emit buffered stderr/stdout
+    # as we unwind.
+    result._mirrorOutput = False
   finally:
     result.elapsed = time.perf_counter() - start_time
 
@@ -106,43 +110,11 @@ def __init__(self, max_cores, options):
     self.max_cores = max_cores
     self.max_failures = options.max_failures
     self.failing_and_slow_first = options.failing_and_slow_first
-    self.progress_counter = 0
 
   def addTest(self, test):
     super().addTest(test)
     test.is_parallel = True
 
-  def printOneResult(self, res):
-    self.progress_counter += 1
-    progress = f'[{self.progress_counter}/{self.num_tests}] '
-
-    if res.test_result == 'success':
-      msg = 'ok'
-      color = GREEN
-    elif res.test_result == 'errored':
-      msg = 'ERROR'
-      color = RED
-    elif res.test_result == 'failed':
-      msg = 'FAIL'
-      color = RED
-    elif res.test_result == 'skipped':
-      reason = res.skipped[0][1]
-      msg = f"skipped '{reason}'"
-      color = CYAN
-    elif res.test_result == 'unexpected success':
-      msg = 'unexpected success'
-      color = RED
-    elif res.test_result == 'expected failure':
-      color = RED
-      msg = 'expected failure'
-    else:
-      assert False, f'unhandled test result {res.test_result}'
-
-    if res.test_result != 'skipped':
-      msg += f' ({res.elapsed:.2f}s)'
-
-    errlog(f'{with_color(CYAN, progress)}{res.test} ... {with_color(color, msg)}')
-
   def run(self, result):
     # The 'spawn' method is used on windows and it can be useful to set this on
     # all platforms when debugging multiprocessing issues.  Without this we
@@ -151,6 +123,12 @@ def run(self, result):
     # issues.
     # multiprocessing.set_start_method('spawn')
 
+    # No need to worry about stdout/stderr buffering since are a not
+    # actually running the test here, only setting the results.
+    buffer = result.buffer
+    result.buffer = False
+
+    result.core_time = 0
     tests = self.get_sorted_tests()
     self.num_tests = self.countTestCases()
     contains_browser_test = any(test.is_browser_test() for test in tests)
@@ -176,23 +154,23 @@ def run(self, result):
           allowed_failures_counter = manager.Value('i', self.max_failures)
 
         results = []
-        args = ((t, allowed_failures_counter, result.buffer) for t in tests)
+        args = ((t, allowed_failures_counter, buffer) for t in tests)
         for res in pool.imap_unordered(run_test, args, chunksize=1):
           # results may be be None if # of allowed errors was exceeded
           # and the harness aborted.
           if res:
             if res.test_result not in ['success', 'skipped'] and allowed_failures_counter is not None:
               # Signal existing multiprocess pool runners so that they can exit early if needed.
               allowed_failures_counter.value -= 1
-            self.printOneResult(res)
+            res.integrate_result(result)
             results.append(res)
 
         # Send a task to each worker to tear down the browser and server. This
         # relies on the implementation detail in the worker pool that all workers
         # are cycled through once.
         num_tear_downs = sum([pool.apply(tear_down, ()) for i in range(use_cores)])
         # Assert the assumed behavior above hasn't changed.
-        if num_tear_downs != use_cores:
+        if num_tear_downs != use_cores and not buffer:
           errlog(f'Expected {use_cores} teardowns, got {num_tear_downs}')
 
     if self.failing_and_slow_first:
@@ -218,7 +196,9 @@ def update_test_results_to(test_name):
 
       json.dump(previous_test_run_results, open(common.PREVIOUS_TEST_RUN_RESULTS_FILE, 'w'), indent=2)
 
-    return self.combine_results(result, results)
+    if EMTEST_VISUALIZE:
+      self.visualize_results(results)
+    return result
 
   def get_sorted_tests(self):
     """A list of this suite's tests, sorted with the @is_slow_test tests first.
@@ -237,45 +217,29 @@ def test_key(test):
 
     return sorted(self, key=test_key, reverse=True)
 
-  def combine_results(self, result, buffered_results):
-    errlog('')
-    errlog('DONE: combining results on main thread')
-    errlog('')
+  def visualize_results(self, results):
+    assert EMTEST_VISUALIZE
     # Sort the results back into alphabetical order. Running the tests in
     # parallel causes mis-orderings, this makes the results more readable.
-    results = sorted(buffered_results, key=lambda res: str(res.test))
-    result.core_time = 0
+    results = sorted(results, key=lambda res: str(res.test))
 
     # shared data structures are hard in the python multi-processing world, so
     # use a file to share the flaky test information across test processes.
     flaky_tests = open(common.flaky_tests_log_filename).read().split() if os.path.isfile(common.flaky_tests_log_filename) else []
     # Extract only the test short names
     flaky_tests = [x.split('.')[-1] for x in flaky_tests]
 
-    # The next integrateResult loop will print a *lot* of lines really fast. This
-    # will cause a Python exception being thrown when attempting to print to
-    # stderr, if stderr is in nonblocking mode, like it is on Buildbot CI:
-    # See https://github.com/buildbot/buildbot/issues/8659
-    # To work around that problem, set stderr to blocking mode before printing.
-    if not WINDOWS:
-      os.set_blocking(sys.stderr.fileno(), True)
-
     for r in results:
-      # Integrate the test result to the global test result object
-      r.integrateResult(result)
       r.log_test_run_for_visualization(flaky_tests)
 
     # Generate the parallel test run visualization
-    if EMTEST_VISUALIZE:
-      emprofile.create_profiling_graph(utils.path_from_root('out/graph'))
-      # Cleanup temp files that were used for the visualization
-      emprofile.delete_profiler_logs()
-      utils.delete_file(common.flaky_tests_log_filename)
-
-    return result
+    emprofile.create_profiling_graph(utils.path_from_root('out/graph'))
+    # Cleanup temp files that were used for the visualization
+    emprofile.delete_profiler_logs()
+    utils.delete_file(common.flaky_tests_log_filename)
 
 
-class BufferedParallelTestResult(unittest.TestResult):
+class BufferedParallelTestResult(BufferingMixin, unittest.TestResult):
   """A picklable struct used to communicate test results across processes
   """
   def __init__(self):
@@ -293,15 +257,12 @@ def test_short_name(self):
   def addDuration(self, test, elapsed):
     self.test_duration = elapsed
 
-  def integrateResult(self, overall_results):
+  def integrate_result(self, overall_results):
     """This method get called on the main thread once the buffered result
-    is received.  It add the buffered result to the overall result."""
+    is received.  It adds the buffered result to the overall result."""
     # The exception info objects that we are adding here have already
     # been turned into strings so make _exc_info_to_string into a no-op.
     overall_results._exc_info_to_string = lambda x, _y: x
-    # No need to worry about stdout/stderr buffering since are a not
-    # actually running the test here, only setting the results.
-    overall_results.buffer = False
     overall_results.startTest(self.test)
     if self.test_result == 'success':
       overall_results.addSuccess(self.test)
@@ -321,7 +282,8 @@ def integrateResult(self, overall_results):
     overall_results.core_time += self.test_duration
 
   def log_test_run_for_visualization(self, flaky_tests):
-    if EMTEST_VISUALIZE and (self.test_result != 'skipped' or self.test_duration > 0.2):
+    assert EMTEST_VISUALIZE
+    if self.test_result != 'skipped' or self.test_duration > 0.2:
       test_result = self.test_result
       if test_result == 'success' and self.test_short_name() in flaky_tests:
         test_result = 'warnings'
diff --git a/test/runner.py b/test/runner.py
@@ -429,12 +429,16 @@ def run_tests(options, suites):
     testRunner = xmlrunner.XMLTestRunner(output=output, verbosity=2,
                                          failfast=options.failfast)
     print('Writing XML test output to ' + os.path.abspath(output.name))
-  elif options.buffer and options.ansi and not options.verbose:
-    # When buffering is enabled and ansi color output is available use our nice single-line
+  elif options.ansi and not options.verbose:
+    # When not in verbose mode and ansi color output is available use our nice single-line
     # result display.
-    testRunner = SingleLineTestRunner(verbosity=2, failfast=options.failfast)
+    testRunner = SingleLineTestRunner(failfast=options.failfast)
   else:
-    testRunner = ColorTextRunner(verbosity=2, failfast=options.failfast)
+    if not options.ansi:
+      print('using verbose test runner (ANSI not avilable)')
+    else:
+      print('using verbose test runner (verbose output requested)')
+    testRunner = ColorTextRunner(failfast=options.failfast)
 
   total_core_time = 0
   run_start_time = time.perf_counter()
@@ -472,7 +476,9 @@ def parse_args():
                            'test.  Implies --cores=1.  Defaults to true when running a single test')
   parser.add_argument('--no-clean', action='store_true',
                       help='Do not clean the temporary directory before each test run')
-  parser.add_argument('--verbose', '-v', action='store_true')
+  parser.add_argument('--verbose', '-v', action='count', default=0,
+                      help="Show test stdout and stderr, and don't use the single-line test reporting. "
+                           'Specifying `-v` twice will enable test framework logging (i.e. EMTEST_VERBOSE)')
   # TODO: Replace with BooleanOptionalAction once we can depend on python3.9
   parser.add_argument('--ansi', action='store_true', default=None)
   parser.add_argument('--no-ansi', action='store_false', dest='ansi', default=None)
@@ -492,7 +498,6 @@ def parse_args():
                       help='Use the default CI browser configuration.')
   parser.add_argument('tests', nargs='*')
   parser.add_argument('--failfast', action='store_true', help='If true, test run will abort on first failed test.')
-  parser.add_argument('-b', '--buffer', action='store_true', help='Buffer stdout and stderr during tests')
   parser.add_argument('--max-failures', type=int, default=2**31 - 1, help='If specified, test run will abort after N failed tests.')
   parser.add_argument('--failing-and-slow-first', action='store_true', help='Run failing tests first, then sorted by slowest first. Combine with --failfast for fast fail-early CI runs.')
   parser.add_argument('--start-at', metavar='NAME', help='Skip all tests up until <NAME>')
@@ -587,7 +592,7 @@ def set_env(name, option_value):
   set_env('EMTEST_SKIP_SLOW', options.skip_slow)
   set_env('EMTEST_ALL_ENGINES', options.all_engines)
   set_env('EMTEST_REBASELINE', options.rebaseline)
-  set_env('EMTEST_VERBOSE', options.verbose)
+  set_env('EMTEST_VERBOSE', options.verbose > 1)
   set_env('EMTEST_CORES', options.cores)
   set_env('EMTEST_FORCE64', options.force64)
 
diff --git a/test/single_line_runner.py b/test/single_line_runner.py