@@ -192,6 +192,11 @@ will be run.
192192 `paths` passed to it cannot contain test files, either because the path doesn't exist or
193193 the path points to a file which is not a test file. Default is `false`.
194194 Can also be set using the `RETESTITEMS_VALIDATE_PATHS` environment variable.
195+ - `timeout_profile_wait::Real=0`: When non-zero, a worker that times-out will trigger a CPU profile
196+ for which we will wait `timeout_profile_wait` seconds before terminating the worker.
197+ Zero means no profile will be taken. Can also be set using the `RETESTITEMS_TIMEOUT_PROFILE_WAIT`
198+ environment variable. See the [Profile documentation](https://docs.julialang.org/en/v1/stdlib/Profile/#Triggered-During-Execution)
199+ for more information on triggered profiles. Note you can use `worker_init_expr` to tweak the profile settings on workers.
195200"""
196201function runtests end
197202
@@ -237,14 +242,16 @@ function runtests(
237242 verbose_results:: Bool = (logs != = :issues && isinteractive ()),
238243 test_end_expr:: Expr = Expr (:block ),
239244 validate_paths:: Bool = parse (Bool, get (ENV , " RETESTITEMS_VALIDATE_PATHS" , " false" )),
245+ timeout_profile_wait:: Real = parse (Int, get (ENV , " RETESTITEMS_TIMEOUT_PROFILE_WAIT" , " 0" )),
240246)
241247 nworker_threads = _validated_nworker_threads (nworker_threads)
242248 paths′ = _validated_paths (paths, validate_paths)
243249
244250 logs in LOG_DISPLAY_MODES || throw (ArgumentError (" `logs` must be one of $LOG_DISPLAY_MODES , got $(repr (logs)) " ))
245251 report && logs == :eager && throw (ArgumentError (" `report=true` is not compatible with `logs=:eager`" ))
246252 (0 ≤ memory_threshold ≤ 1 ) || throw (ArgumentError (" `memory_threshold` must be between 0 and 1, got $(repr (memory_threshold)) " ))
247- testitem_timeout > 0 || throw (ArgumentError (" `testitem_timeout` must be a postive number, got $(repr (testitem_timeout)) " ))
253+ testitem_timeout > 0 || throw (ArgumentError (" `testitem_timeout` must be a positive number, got $(repr (testitem_timeout)) " ))
254+ timeout_profile_wait >= 0 || throw (ArgumentError (" `timeout_profile_wait` must be a non-negative number, got $(repr (timeout_profile_wait)) " ))
248255 # If we were given paths but none were valid, then nothing to run.
249256 ! isempty (paths) && isempty (paths′) && return nothing
250257 shouldrun_combined (ti) = shouldrun (ti) && _shouldrun (name, ti. name) && _shouldrun (tags, ti. tags)
@@ -253,13 +260,15 @@ function runtests(
253260 nworkers = max (0 , nworkers)
254261 retries = max (0 , retries)
255262 timeout = ceil (Int, testitem_timeout)
263+ timeout_profile_wait = ceil (Int, timeout_profile_wait)
264+ (timeout_profile_wait > 0 && Sys. iswindows ()) && @warn " CPU profiles on timeout is not supported on Windows, ignoring `timeout_profile_wait`"
256265 debuglvl = Int (debug)
257266 if debuglvl > 0
258267 LoggingExtras. withlevel (LoggingExtras. Debug; verbosity= debuglvl) do
259- _runtests (shouldrun_combined, paths′, nworkers, nworker_threads, worker_init_expr, test_end_expr, timeout, retries, memory_threshold, verbose_results, debuglvl, report, logs)
268+ _runtests (shouldrun_combined, paths′, nworkers, nworker_threads, worker_init_expr, test_end_expr, timeout, retries, memory_threshold, verbose_results, debuglvl, report, logs, timeout_profile_wait )
260269 end
261270 else
262- return _runtests (shouldrun_combined, paths′, nworkers, nworker_threads, worker_init_expr, test_end_expr, timeout, retries, memory_threshold, verbose_results, debuglvl, report, logs)
271+ return _runtests (shouldrun_combined, paths′, nworkers, nworker_threads, worker_init_expr, test_end_expr, timeout, retries, memory_threshold, verbose_results, debuglvl, report, logs, timeout_profile_wait )
263272 end
264273end
265274
273282# By tracking and reusing test environments, we can avoid this issue.
274283const TEST_ENVS = Dict {String, String} ()
275284
276- function _runtests (shouldrun, paths, nworkers:: Int , nworker_threads:: String , worker_init_expr:: Expr , test_end_expr:: Expr , testitem_timeout:: Int , retries:: Int , memory_threshold:: Real , verbose_results:: Bool , debug:: Int , report:: Bool , logs:: Symbol )
285+ function _runtests (shouldrun, paths, nworkers:: Int , nworker_threads:: String , worker_init_expr:: Expr , test_end_expr:: Expr , testitem_timeout:: Int , retries:: Int , memory_threshold:: Real , verbose_results:: Bool , debug:: Int , report:: Bool , logs:: Symbol , timeout_profile_wait :: Int )
277286 # Don't recursively call `runtests` e.g. if we `include` a file which calls it.
278287 # So we ignore the `runtests(...)` call in `test/runtests.jl` when `runtests(...)`
279288 # was called from the command line.
@@ -293,7 +302,7 @@ function _runtests(shouldrun, paths, nworkers::Int, nworker_threads::String, wor
293302 if is_running_test_runtests_jl (proj_file)
294303 # Assume this is `Pkg.test`, so test env already active.
295304 @debugv 2 " Running in current environment `$(Base. active_project ()) `"
296- return _runtests_in_current_env (shouldrun, paths, proj_file, nworkers, nworker_threads, worker_init_expr, test_end_expr, testitem_timeout, retries, memory_threshold, verbose_results, debug, report, logs)
305+ return _runtests_in_current_env (shouldrun, paths, proj_file, nworkers, nworker_threads, worker_init_expr, test_end_expr, testitem_timeout, retries, memory_threshold, verbose_results, debug, report, logs, timeout_profile_wait )
297306 else
298307 @debugv 1 " Activating test environment for `$proj_file `"
299308 orig_proj = Base. active_project ()
@@ -306,7 +315,7 @@ function _runtests(shouldrun, paths, nworkers::Int, nworker_threads::String, wor
306315 testenv = TestEnv. activate ()
307316 TEST_ENVS[proj_file] = testenv
308317 end
309- _runtests_in_current_env (shouldrun, paths, proj_file, nworkers, nworker_threads, worker_init_expr, test_end_expr, testitem_timeout, retries, memory_threshold, verbose_results, debug, report, logs)
318+ _runtests_in_current_env (shouldrun, paths, proj_file, nworkers, nworker_threads, worker_init_expr, test_end_expr, testitem_timeout, retries, memory_threshold, verbose_results, debug, report, logs, timeout_profile_wait )
310319 finally
311320 Base. set_active_project (orig_proj)
312321 end
317326function _runtests_in_current_env (
318327 shouldrun, paths, projectfile:: String , nworkers:: Int , nworker_threads, worker_init_expr:: Expr , test_end_expr:: Expr ,
319328 testitem_timeout:: Int , retries:: Int , memory_threshold:: Real , verbose_results:: Bool , debug:: Int , report:: Bool , logs:: Symbol ,
329+ timeout_profile_wait:: Int ,
320330)
321331 start_time = time ()
322332 proj_name = something (Pkg. Types. read_project (projectfile). name, " " )
@@ -381,7 +391,7 @@ function _runtests_in_current_env(
381391 ti = starting[i]
382392 @spawn begin
383393 with_logger (original_logger) do
384- manage_worker ($ w, $ proj_name, $ testitems, $ ti, $ nworker_threads, $ worker_init_expr, $ test_end_expr, $ testitem_timeout, $ retries, $ memory_threshold, $ verbose_results, $ debug, $ report, $ logs)
394+ manage_worker ($ w, $ proj_name, $ testitems, $ ti, $ nworker_threads, $ worker_init_expr, $ test_end_expr, $ testitem_timeout, $ retries, $ memory_threshold, $ verbose_results, $ debug, $ report, $ logs, $ timeout_profile_wait )
385395 end
386396 end
387397 end
492502
493503function manage_worker (
494504 worker:: Worker , proj_name:: AbstractString , testitems:: TestItems , testitem:: Union{TestItem,Nothing} , nworker_threads, worker_init_expr:: Expr , test_end_expr:: Expr ,
495- default_timeout:: Int , retries:: Int , memory_threshold:: Real , verbose_results:: Bool , debug:: Int , report:: Bool , logs:: Symbol
505+ default_timeout:: Int , retries:: Int , memory_threshold:: Real , verbose_results:: Bool , debug:: Int , report:: Bool , logs:: Symbol , timeout_profile_wait :: Int
496506)
497507 ntestitems = length (testitems. testitems)
498508 run_number = 1
@@ -551,23 +561,35 @@ function manage_worker(
551561 end
552562 catch e
553563 @debugv 2 " Error" exception= e
554- println (DEFAULT_STDOUT[])
555- _print_captured_logs (DEFAULT_STDOUT[], testitem, run_number)
556564 # Handle the exception
557565 if e isa TimeoutException
558- @debugv 1 " Test item $(repr (testitem. name)) timed out. Terminating worker $worker "
559- terminate! (worker)
566+ if timeout_profile_wait > 0
567+ @warn " $worker timed out running test item $(repr (testitem. name)) after $timeout seconds. \
568+ A CPU profile will be triggered on the worker and then it will be terminated."
569+ trigger_profile (worker, timeout_profile_wait, :timeout )
570+ end
571+ terminate! (worker, :timeout )
560572 wait (worker)
573+ # TODO : We print the captured logs after the worker is terminated,
574+ # which means that we include an annoying stackrace from the worker termination,
575+ # but the profiles don't seem to get flushed properly if we don't do this.
576+ # This is not an issue with eager logs, but when going through a file, this seems to help.
577+ println (DEFAULT_STDOUT[])
578+ _print_captured_logs (DEFAULT_STDOUT[], testitem, run_number)
561579 @error " $worker timed out running test item $(repr (testitem. name)) after $timeout seconds. \
562580 Recording test error."
563581 record_timeout! (testitem, run_number, timeout)
564582 elseif e isa WorkerTerminatedException
583+ println (DEFAULT_STDOUT[])
584+ _print_captured_logs (DEFAULT_STDOUT[], testitem, run_number)
565585 @error " $worker died running test item $(repr (testitem. name)) . \
566586 Recording test error."
567587 record_worker_terminated! (testitem, worker, run_number)
568588 else
569589 # We don't expect any other kind of error, so rethrow, which will propagate
570590 # back up to the main coordinator task and throw to the user
591+ println (DEFAULT_STDOUT[])
592+ _print_captured_logs (DEFAULT_STDOUT[], testitem, run_number)
571593 rethrow ()
572594 end
573595 # Handle retries
0 commit comments