@@ -9,6 +9,7 @@ gcscrub() = (GC.gc(); GC.gc(); GC.gc(); GC.gc())
99
1010mutable struct Benchmark
1111 samplefunc
12+ linux_perf_func
1213 quote_vals
1314 params:: Parameters
1415end
@@ -106,15 +107,21 @@ function _run(b::Benchmark, p::Parameters; verbose=false, pad="", kwargs...)
106107 start_time = Base. time ()
107108 trial = Trial (params)
108109 params. gcsample && gcscrub ()
109- trial_contents = b. samplefunc (b. quote_vals, params)
110- push! (trial, trial_contents )
111- return_val = trial_contents . return_val
110+ s = b. samplefunc (b. quote_vals, params)
111+ push! (trial, s[ 1 : ( end - 1 )] . .. )
112+ return_val = s[ end ]
112113 iters = 2
113114 while (Base. time () - start_time) < params. seconds && iters ≤ params. samples
114115 params. gcsample && gcscrub ()
115- push! (trial, b. samplefunc (b. quote_vals, params))
116+ push! (trial, b. samplefunc (b. quote_vals, params)[ 1 : ( end - 1 )] . .. )
116117 iters += 1
117118 end
119+
120+ if p. experimental_enable_linux_perf
121+ params. gcsample && gcscrub ()
122+ trial. linux_perf_stats = b. linux_perf_func (b. quote_vals, params)
123+ end
124+
118125 return trial, return_val
119126end
120127
@@ -178,7 +185,7 @@ function _lineartrial(b::Benchmark, p::Parameters=b.params; maxevals=RESOLUTION,
178185 for evals in eachindex (estimates)
179186 params. gcsample && gcscrub ()
180187 params. evals = evals
181- estimates[evals] = b. samplefunc (b. quote_vals, params). time
188+ estimates[evals] = first ( b. samplefunc (b. quote_vals, params))
182189 completed += 1
183190 ((time () - start_time) > params. seconds) && break
184191 end
@@ -506,6 +513,7 @@ function generate_benchmark_definition(
506513 @nospecialize
507514 corefunc = gensym (" core" )
508515 samplefunc = gensym (" sample" )
516+ linux_perf_func = gensym (" perf" )
509517 type_vars = [gensym () for i in 1 : (length (quote_vars) + length (setup_vars))]
510518 signature = Expr (:call , corefunc, quote_vars... , setup_vars... )
511519 signature_def = Expr (
@@ -572,64 +580,57 @@ function generate_benchmark_definition(
572580 __evals,
573581 ),
574582 )
575- if $ (params. experimental_enable_linux_perf)
576- # Based on https://github.com/JuliaPerf/LinuxPerf.jl/blob/a7fee0ff261a5b5ce7a903af7b38d1b5c27dd931/src/LinuxPerf.jl#L1043-L1061
577- __linux_perf_groups = BenchmarkTools. LinuxPerf. set_default_spaces (
578- $ (params. linux_perf_options. events),
579- $ (params. linux_perf_options. spaces),
583+ return __time, __gctime, __memory, __allocs, __return_val
584+ end
585+ @noinline function $ (linux_perf_func)(
586+ $ (Expr (:tuple , quote_vars... )), __params:: $BenchmarkTools.Parameters
587+ )
588+ # Based on https://github.com/JuliaPerf/LinuxPerf.jl/blob/a7fee0ff261a5b5ce7a903af7b38d1b5c27dd931/src/LinuxPerf.jl#L1043-L1061
589+ __linux_perf_groups = $ LinuxPerf. set_default_spaces (
590+ eval (__params. linux_perf_options. events),
591+ eval (__params. linux_perf_options. spaces),
592+ )
593+ __linux_perf_bench = nothing
594+ try
595+ __linux_perf_bench = $ LinuxPerf. make_bench_threaded (
596+ __linux_perf_groups;
597+ threads= eval (__params. linux_perf_options. threads),
580598 )
581- __linux_perf_bench = nothing
582- try
583- __linux_perf_bench = BenchmarkTools. LinuxPerf. make_bench_threaded (
584- __linux_perf_groups;
585- threads= $ (params. linux_perf_options. threads),
586- )
587- catch e
588- if e isa ErrorException &&
589- startswith (e. msg, " perf_event_open error : " )
590- @warn " Perf is disabled"
591- else
592- rethrow ()
593- end
599+ catch e
600+ if e isa ErrorException &&
601+ startswith (e. msg, " perf_event_open error : " )
602+ @warn " Perf is disabled" # Really we only want to do this if we defaulted to running with perf, otherwise we should just throw.
603+ # Given we now more accurately determine if perf is available can we do away with this hack?
604+ else
605+ rethrow ()
594606 end
607+ end
595608
596- if ! isnothing (__linux_perf_bench)
597- try
598- $ (setup)
599- BenchmarkTools. LinuxPerf. enable! (__linux_perf_bench)
600- # We'll just run it one time.
601- __return_val_2 = $ (invocation)
602- BenchmarkTools. LinuxPerf. disable! (__linux_perf_bench)
603- # trick the compiler not to eliminate the code
604- if rand () < 0
605- __linux_perf_stats = __return_val_2
606- else
607- __linux_perf_stats = BenchmarkTools. LinuxPerf. Stats (
608- __linux_perf_bench
609- )
610- end
611- catch
612- rethrow ()
613- finally
614- close (__linux_perf_bench)
615- $ (teardown)
609+ if ! isnothing (__linux_perf_bench)
610+ $ (setup)
611+ try
612+ $ LinuxPerf. enable! (__linux_perf_bench)
613+ # We'll just run it one time.
614+ __return_val_2 = $ (invocation)
615+ $ LinuxPerf. disable! (__linux_perf_bench)
616+ # trick the compiler not to eliminate the code
617+ if rand () < 0
618+ __linux_perf_stats = __return_val_2
619+ else
620+ __linux_perf_stats = $ LinuxPerf. Stats (__linux_perf_bench)
616621 end
622+ return __linux_perf_stats
623+ catch
624+ rethrow ()
625+ finally
626+ close (__linux_perf_bench)
627+ $ (teardown)
617628 end
618- else
619- __return_val_2 = nothing
620- __linux_perf_stats = nothing
621629 end
622- return BenchmarkTools. TrialContents (
623- __time,
624- __gctime,
625- __memory,
626- __allocs,
627- __return_val,
628- __return_val_2,
629- __linux_perf_stats,
630- )
631630 end
632- $ BenchmarkTools. Benchmark ($ (samplefunc), $ (quote_vals), $ (params))
631+ $ BenchmarkTools. Benchmark (
632+ $ (samplefunc), $ (linux_perf_func), $ (quote_vals), $ (params)
633+ )
633634 end ,
634635 )
635636end
0 commit comments