Skip to content

Commit e65dc9f

Browse files
authored
Merge pull request #634 from JuliaGPU:vc/ptr
Use ParallelTestRunner.jl
2 parents e66460b + 2957e33 commit e65dc9f

File tree

3 files changed

+22
-439
lines changed

3 files changed

+22
-439
lines changed

test/Project.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
[deps]
22
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
3-
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
4-
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
53
JLArrays = "27aeb0d3-9eb9-45fb-866b-73c2ecf80fcb"
64
JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
75
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
86
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
7+
ParallelTestRunner = "d3525ed8-44d0-4b2c-a655-542cee43accc"
98
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
10-
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
11-
REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
129
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
1310
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
1411
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
12+
13+
[compat]
14+
ParallelTestRunner = "1"

test/runtests.jl

Lines changed: 18 additions & 350 deletions
Original file line numberDiff line numberDiff line change
@@ -1,362 +1,30 @@
1-
using Distributed
2-
using Dates
3-
import REPL
4-
using Printf: @sprintf
1+
using ParallelTestRunner: runtests
2+
import GPUArrays
53

6-
# parse some command-line arguments
7-
function extract_flag!(args, flag, default=nothing)
8-
for f in args
9-
if startswith(f, flag)
10-
# Check if it's just `--flag` or if it's `--flag=foo`
11-
if f != flag
12-
val = split(f, '=')[2]
13-
if default !== nothing && !(typeof(default) <: AbstractString)
14-
val = parse(typeof(default), val)
15-
end
16-
else
17-
val = default
18-
end
4+
include("testsuite.jl")
195

20-
# Drop this value from our args
21-
filter!(x -> x != f, args)
22-
return (true, val)
23-
end
24-
end
25-
return (false, default)
26-
end
27-
do_help, _ = extract_flag!(ARGS, "--help")
28-
if do_help
29-
println("""
30-
Usage: runtests.jl [--help] [--list] [--jobs=N] [TESTS...]
31-
32-
--help Show this text.
33-
--list List all available tests.
34-
--quickfail Fail the entire run as soon as a single test errored.
35-
--jobs=N Launch `N` processes to perform tests (default: Sys.CPU_THREADS).
36-
37-
Remaining arguments filter the tests that will be executed.""")
38-
exit(0)
39-
end
40-
_, jobs = extract_flag!(ARGS, "--jobs", Sys.CPU_THREADS)
41-
do_quickfail, _ = extract_flag!(ARGS, "--quickfail")
6+
const init_code = quote
7+
using Test, JLArrays
428

43-
include("setup.jl") # make sure everything is precompiled
9+
include("testsuite.jl")
4410

45-
@info "Running $jobs tests in parallel. If this is too many, specify the `--jobs` argument to the tests, or set the JULIA_CPU_THREADS environment variable."
46-
47-
# choose tests
48-
const tests = []
49-
const test_runners = Dict()
50-
for AT in (JLArray, Array), name in keys(TestSuite.tests)
51-
push!(tests, "$(AT)/$name")
52-
test_runners["$(AT)/$name"] = ()->TestSuite.tests[name](AT)
53-
end
54-
unique!(tests)
55-
56-
# parse some more command-line arguments
57-
## --list to list all available tests
58-
do_list, _ = extract_flag!(ARGS, "--list")
59-
if do_list
60-
println("Available tests:")
61-
for test in sort(tests)
62-
println(" - $test")
11+
# Disable Float16-related tests until JuliaGPU/KernelAbstractions#600 is resolved
12+
if isdefined(JLArrays.KernelAbstractions, :POCL)
13+
TestSuite.supported_eltypes(::Type{<:JLArray}) =
14+
setdiff(TestSuite.supported_eltypes(), [Float16, ComplexF16])
6315
end
64-
exit(0)
65-
end
66-
## no options should remain
67-
optlike_args = filter(startswith("-"), ARGS)
68-
if !isempty(optlike_args)
69-
error("Unknown test options `$(join(optlike_args, " "))` (try `--help` for usage instructions)")
70-
end
71-
## the remaining args filter tests
72-
if !isempty(ARGS)
73-
filter!(tests) do test
74-
any(arg->startswith(test, arg), ARGS)
75-
end
7616
end
7717

78-
# add workers
79-
const test_exeflags = Base.julia_cmd()
80-
filter!(test_exeflags.exec) do c
81-
return !(startswith(c, "--depwarn") || startswith(c, "--check-bounds"))
82-
end
83-
push!(test_exeflags.exec, "--check-bounds=yes")
84-
push!(test_exeflags.exec, "--startup-file=no")
85-
push!(test_exeflags.exec, "--depwarn=yes")
86-
push!(test_exeflags.exec, "--project=$(Base.active_project())")
87-
const test_exename = popfirst!(test_exeflags.exec)
88-
function addworker(X; kwargs...)
89-
withenv("JULIA_NUM_THREADS" => 1, "OPENBLAS_NUM_THREADS" => 1) do
90-
procs = addprocs(X; exename=test_exename, exeflags=test_exeflags, kwargs...)
91-
@everywhere procs include($(joinpath(@__DIR__, "setup.jl")))
92-
procs
93-
end
94-
end
95-
addworker(min(jobs, length(tests)))
96-
97-
# pretty print information about gc and mem usage
98-
testgroupheader = "Test"
99-
workerheader = "(Worker)"
100-
name_align = maximum([textwidth(testgroupheader) + textwidth(" ") +
101-
textwidth(workerheader); map(x -> textwidth(x) +
102-
3 + ndigits(nworkers()), tests)])
103-
elapsed_align = textwidth("Time (s)")
104-
gc_align = textwidth("GC (s)")
105-
percent_align = textwidth("GC %")
106-
alloc_align = textwidth("Alloc (MB)")
107-
rss_align = textwidth("RSS (MB)")
108-
printstyled(" "^(name_align + textwidth(testgroupheader) - 3), " | ")
109-
printstyled(" | ---------------- CPU ---------------- |\n", color=:white)
110-
printstyled(testgroupheader, color=:white)
111-
printstyled(lpad(workerheader, name_align - textwidth(testgroupheader) + 1), " | ", color=:white)
112-
printstyled("Time (s) | GC (s) | GC % | Alloc (MB) | RSS (MB) |\n", color=:white)
113-
print_lock = stdout isa Base.LibuvStream ? stdout.lock : ReentrantLock()
114-
if stderr isa Base.LibuvStream
115-
stderr.lock = print_lock
116-
end
117-
function print_testworker_stats(test, wrkr, resp)
118-
@nospecialize resp
119-
lock(print_lock)
120-
try
121-
printstyled(test, color=:white)
122-
printstyled(lpad("($wrkr)", name_align - textwidth(test) + 1, " "), " | ", color=:white)
123-
time_str = @sprintf("%7.2f",resp[2])
124-
printstyled(lpad(time_str, elapsed_align, " "), " | ", color=:white)
125-
126-
cpu_gc_str = @sprintf("%5.2f", resp[4])
127-
printstyled(lpad(cpu_gc_str, gc_align, " "), " | ", color=:white)
128-
# since there may be quite a few digits in the percentage,
129-
# the left-padding here is less to make sure everything fits
130-
cpu_percent_str = @sprintf("%4.1f", 100 * resp[4] / resp[2])
131-
printstyled(lpad(cpu_percent_str, percent_align, " "), " | ", color=:white)
132-
cpu_alloc_str = @sprintf("%5.2f", resp[3] / 2^20)
133-
printstyled(lpad(cpu_alloc_str, alloc_align, " "), " | ", color=:white)
134-
135-
cpu_rss_str = @sprintf("%5.2f", resp[6] / 2^20)
136-
printstyled(lpad(cpu_rss_str, rss_align, " "), " |\n", color=:white)
137-
finally
138-
unlock(print_lock)
139-
end
140-
end
141-
global print_testworker_started = (name, wrkr)->begin
142-
end
143-
function print_testworker_errored(name, wrkr)
144-
lock(print_lock)
145-
try
146-
printstyled(name, color=:red)
147-
printstyled(lpad("($wrkr)", name_align - textwidth(name) + 1, " "), " |",
148-
" "^elapsed_align, " failed at $(now())\n", color=:red)
149-
finally
150-
unlock(print_lock)
151-
end
18+
custom_tests = Dict{String, Expr}()
19+
for AT in (:JLArray, :Array), name in keys(TestSuite.tests)
20+
custom_tests["$(AT)/$name"] = :(TestSuite.tests[$name]($AT))
15221
end
15322

154-
# run tasks
155-
t0 = now()
156-
results = []
157-
all_tasks = Task[]
158-
all_tests = copy(tests)
159-
try
160-
# Monitor stdin and kill this task on ^C
161-
# but don't do this on Windows, because it may deadlock in the kernel
162-
t = current_task()
163-
running_tests = Dict{String, DateTime}()
164-
if !Sys.iswindows() && isa(stdin, Base.TTY)
165-
stdin_monitor = @async begin
166-
term = REPL.Terminals.TTYTerminal("xterm", stdin, stdout, stderr)
167-
try
168-
REPL.Terminals.raw!(term, true)
169-
while true
170-
c = read(term, Char)
171-
if c == '\x3'
172-
Base.throwto(t, InterruptException())
173-
break
174-
elseif c == '?'
175-
println("Currently running: ")
176-
tests = sort(collect(running_tests), by=x->x[2])
177-
foreach(tests) do (test, date)
178-
println(test, " (running for ", round(now()-date, Minute), ")")
179-
end
180-
end
181-
end
182-
catch e
183-
isa(e, InterruptException) || rethrow()
184-
finally
185-
REPL.Terminals.raw!(term, false)
186-
end
187-
end
188-
end
189-
@sync begin
190-
function recycle_worker(p)
191-
rmprocs(p, waitfor=30)
192-
return nothing
193-
end
194-
195-
for p in workers()
196-
@async begin
197-
push!(all_tasks, current_task())
198-
while length(tests) > 0
199-
test = popfirst!(tests)
200-
201-
# sometimes a worker failed, and we need to spawn a new one
202-
if p === nothing
203-
p = addworker(1)[1]
204-
end
205-
wrkr = p
206-
207-
local resp
208-
209-
# run the test
210-
running_tests[test] = now()
211-
try
212-
resp = remotecall_fetch(runtests, wrkr, test_runners[test], test)
213-
catch e
214-
isa(e, InterruptException) && return
215-
resp = Any[e]
216-
end
217-
delete!(running_tests, test)
218-
push!(results, (test, resp))
219-
220-
# act on the results
221-
if resp[1] isa Exception
222-
print_testworker_errored(test, wrkr)
223-
do_quickfail && Base.throwto(t, InterruptException())
224-
225-
# the worker encountered some failure, recycle it
226-
# so future tests get a fresh environment
227-
p = recycle_worker(p)
228-
else
229-
print_testworker_stats(test, wrkr, resp)
230-
231-
cpu_rss = resp[6]
232-
if haskey(ENV, "CI") && cpu_rss > 3*2^30
233-
# XXX: collecting garbage
234-
# after each test, we are leaking CPU memory somewhere.
235-
# this is a problem on CI, where2 we don't have much RAM.
236-
# work around this by periodically recycling the worker.
237-
p = recycle_worker(p)
238-
end
239-
end
240-
end
241-
242-
if p !== nothing
243-
recycle_worker(p)
244-
end
245-
end
246-
end
247-
end
248-
catch e
249-
isa(e, InterruptException) || rethrow()
250-
# If the test suite was merely interrupted, still print the
251-
# summary, which can be useful to diagnose what's going on
252-
foreach(task -> begin
253-
istaskstarted(task) || return
254-
istaskdone(task) && return
255-
try
256-
schedule(task, InterruptException(); error=true)
257-
catch ex
258-
@error "InterruptException" exception=ex,catch_backtrace()
259-
end
260-
end, all_tasks)
261-
for t in all_tasks
262-
# NOTE: we can't just wait, but need to discard the exception,
263-
# because the throwto for --quickfail also kills the worker.
264-
try
265-
wait(t)
266-
catch e
267-
showerror(stderr, e)
268-
end
269-
end
270-
finally
271-
if @isdefined stdin_monitor
272-
schedule(stdin_monitor, InterruptException(); error=true)
23+
function test_filter(test)
24+
if startswith(test, "testsuite")
25+
return false
27326
end
27+
return true
27428
end
275-
t1 = now()
276-
elapsed = canonicalize(Dates.CompoundPeriod(t1-t0))
277-
println("Testing finished in $elapsed")
27829

279-
# construct a testset to render the test results
280-
o_ts = Test.DefaultTestSet("Overall")
281-
function with_testset(f, testset)
282-
@static if VERSION >= v"1.13.0-DEV.1044"
283-
Test.@with_testset testset f()
284-
else
285-
Test.push_testset(testset)
286-
try
287-
f()
288-
finally
289-
Test.pop_testset()
290-
end
291-
end
292-
end
293-
with_testset(o_ts) do
294-
completed_tests = Set{String}()
295-
for (testname, (resp,)) in results
296-
push!(completed_tests, testname)
297-
if isa(resp, Test.DefaultTestSet)
298-
with_testset(resp) do
299-
Test.record(o_ts, resp)
300-
end
301-
elseif isa(resp, Tuple{Int,Int})
302-
fake = Test.DefaultTestSet(testname)
303-
for i in 1:resp[1]
304-
Test.record(fake, Test.Pass(:test, nothing, nothing, nothing, nothing))
305-
end
306-
for i in 1:resp[2]
307-
Test.record(fake, Test.Broken(:test, nothing))
308-
end
309-
with_testset(fake) do
310-
Test.record(o_ts, fake)
311-
end
312-
elseif isa(resp, RemoteException) && isa(resp.captured.ex, Test.TestSetException)
313-
println("Worker $(resp.pid) failed running test $(testname):")
314-
Base.showerror(stdout, resp.captured)
315-
println()
316-
fake = Test.DefaultTestSet(testname)
317-
for i in 1:resp.captured.ex.pass
318-
Test.record(fake, Test.Pass(:test, nothing, nothing, nothing, nothing))
319-
end
320-
for i in 1:resp.captured.ex.broken
321-
Test.record(fake, Test.Broken(:test, nothing))
322-
end
323-
for t in resp.captured.ex.errors_and_fails
324-
Test.record(fake, t)
325-
end
326-
with_testset(fake) do
327-
Test.record(o_ts, fake)
328-
end
329-
else
330-
if !isa(resp, Exception)
331-
resp = ErrorException(string("Unknown result type : ", typeof(resp)))
332-
end
333-
# If this test raised an exception that is not a remote testset exception,
334-
# i.e. not a RemoteException capturing a TestSetException that means
335-
# the test runner itself had some problem, so we may have hit a segfault,
336-
# deserialization errors or something similar. Record this testset as Errored.
337-
fake = Test.DefaultTestSet(testname)
338-
Test.record(fake, Test.Error(:nontest_error, testname, nothing, Base.ExceptionStack([(exception=resp,backtrace=[])]), LineNumberNode(1)))
339-
with_testset(fake) do
340-
Test.record(o_ts, fake)
341-
end
342-
end
343-
end
344-
for test in tests
345-
(test in completed_tests) && continue
346-
fake = Test.DefaultTestSet(test)
347-
Test.record(fake, Test.Error(:test_interrupted, test, nothing, Base.ExceptionStack([(exception="skipped",backtrace=[])]), LineNumberNode(1)))
348-
with_testset(fake) do
349-
Test.record(o_ts, fake)
350-
end
351-
end
352-
end
353-
println()
354-
Test.print_test_results(o_ts, 1)
355-
if (VERSION >= v"1.13.0-DEV.1037" && !Test.anynonpass(o_ts)) ||
356-
(VERSION < v"1.13.0-DEV.1037" && !o_ts.anynonpass)
357-
println(" \033[32;1mSUCCESS\033[0m")
358-
else
359-
println(" \033[31;1mFAILURE\033[0m\n")
360-
Test.print_test_errors(o_ts)
361-
throw(Test.FallbackTestSetException("Test run finished with errors"))
362-
end
30+
runtests(GPUArrays, ARGS; init_code, custom_tests, test_filter)

0 commit comments

Comments
 (0)