|
16 | 16 |
|
17 | 17 | mutable struct Benchmark |
18 | 18 | samplefunc |
| 19 | + linux_perf_func |
19 | 20 | quote_vals |
20 | 21 | params::Parameters |
21 | 22 | end |
@@ -125,6 +126,12 @@ function _run(b::Benchmark, p::Parameters; verbose=false, pad="", warmup=true, k |
125 | 126 | push!(trial, b.samplefunc(b.quote_vals, params)[1:(end - 1)]...) |
126 | 127 | iters += 1 |
127 | 128 | end |
| 129 | + |
| 130 | + if params.enable_linux_perf |
| 131 | + params.linux_perf_gcscrub && gcscrub() |
| 132 | + trial.linux_perf_stats = b.linux_perf_func(b.quote_vals, params) |
| 133 | + end |
| 134 | + |
128 | 135 | return trial, return_val |
129 | 136 | end |
130 | 137 |
|
@@ -520,6 +527,7 @@ function generate_benchmark_definition( |
520 | 527 | @nospecialize |
521 | 528 | corefunc = gensym("core") |
522 | 529 | samplefunc = gensym("sample") |
| 530 | + linux_perf_func = gensym("perf") |
523 | 531 | type_vars = [gensym() for i in 1:(length(quote_vars) + length(setup_vars))] |
524 | 532 | signature = Expr(:call, corefunc, quote_vars..., setup_vars...) |
525 | 533 | signature_def = Expr( |
@@ -588,7 +596,43 @@ function generate_benchmark_definition( |
588 | 596 | ) |
589 | 597 | return __time, __gctime, __memory, __allocs, __return_val |
590 | 598 | end |
591 | | - $BenchmarkTools.Benchmark($(samplefunc), $(quote_vals), $(params)) |
| 599 | + @noinline function $(linux_perf_func)( |
| 600 | + $(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters |
| 601 | + ) |
| 602 | + # Based on https://github.com/JuliaPerf/LinuxPerf.jl/blob/a7fee0ff261a5b5ce7a903af7b38d1b5c27dd931/src/LinuxPerf.jl#L1043-L1061 |
| 603 | + __linux_perf_groups = $LinuxPerf.set_default_spaces( |
| 604 | + $LinuxPerf.parse_groups(__params.linux_perf_groups), |
| 605 | + __params.linux_perf_spaces, |
| 606 | + ) |
| 607 | + __linux_perf_bench = $LinuxPerf.make_bench_threaded( |
| 608 | + __linux_perf_groups; threads=__params.linux_perf_threads |
| 609 | + ) |
| 610 | + |
| 611 | + try |
| 612 | + @noinline $(setup) |
| 613 | + __evals = __params.evals |
| 614 | + # Isolate code so that e.g. setup doesn't cause different code to be generated by e.g. changing register allocation |
| 615 | + # Unfortunately it still does, e.g. if you define a variable in setup then it's passed into invocation adding a few instructions |
| 616 | + @noinline (function (__evals) |
| 617 | + $LinuxPerf.enable_all!() |
| 618 | + # We'll run it evals times. |
| 619 | + @noinline __return_val_2 = $(invocation) |
| 620 | + for __iter in 2:__evals |
| 621 | + @noinline $(invocation) |
| 622 | + end |
| 623 | + $LinuxPerf.disable_all!() |
| 624 | + # trick the compiler not to eliminate the code |
| 625 | + return __return_val_2 |
| 626 | + end)(__evals) |
| 627 | + return $LinuxPerf.Stats(__linux_perf_bench) |
| 628 | + finally |
| 629 | + close(__linux_perf_bench) |
| 630 | + $(teardown) |
| 631 | + end |
| 632 | + end |
| 633 | + $BenchmarkTools.Benchmark( |
| 634 | + $(samplefunc), $(linux_perf_func), $(quote_vals), $(params) |
| 635 | + ) |
592 | 636 | end, |
593 | 637 | ) |
594 | 638 | end |
|
0 commit comments