@@ -70,11 +70,17 @@ const EVENT_TYPES =
7070 (:scaled_cycles , 9 ) # PERF_COUNT_HW_REF_CPU_CYCLES
7171 ]),
7272 (:sw , 1 , # PERF_TYPE_SOFTWARE
73- [(:page_faults , 2 ), # PERF_COUNT_SW_PAGE_FAULTS
73+ [(:cpu_clock , 0 ), # PERF_COUNT_SW_CPU_CLOCK
74+ (:task_clock , 1 ), # PEF_COUNT_SW_TASK_CLOCK
75+ (:page_faults , 2 ), # PERF_COUNT_SW_PAGE_FAULTS
7476 (:ctx_switches , 3 ), # PERF_COUNT_SW_CONTEXT_SWITCHES
7577 (:cpu_migrations , 4 ), # PERF_COUNT_SW_CPU_MIGRATIONS
7678 (:minor_page_faults , 5 ), # PERF_COUNT_SW_PAGE_FAULTS_MIN
7779 (:major_page_faults , 6 ), # PERF_COUNT_SW_PAGE_FAULTS_MAJ
80+ (:alignment_faults , 7 ), # PERF_COUNT_SW_ALIGNMENT_FAULTS
81+ (:emulation_faults , 8 ), # PERF_COUNT_SW_EMULATION_FAULTS
82+ (:dummy , 9 ), # PERF_COUNT_SW_DUMMY
83+ (:bpf_output , 10 ), # PERF_COUNT_SW_BPF_OUTPUT
7884 ])
7985 ]
8086
@@ -348,4 +354,262 @@ end
348354
349355make_bench () = make_bench (reasonable_defaults)
350356
357+
358+ # Event names are taken from the perf command.
359+ const NAME_TO_EVENT = Dict (
360+ # hardware events
361+ " branch-instructions" => EventType (:hw , :branches ),
362+ " branch-misses" => EventType (:hw , :branch_mispredicts ),
363+ " cache-misses" => EventType (:hw , :cache_misses ),
364+ " cache-references" => EventType (:hw , :cache_access ),
365+ " cpu-cycles" => EventType (:hw , :cycles ),
366+ " instructions" => EventType (:hw , :instructions ),
367+ " stalled-cycles-backend" => EventType (:hw , :stalled_cycles_backend ),
368+ " stalled-cycles-frontend" => EventType (:hw , :stalled_cycles_frontend ),
369+
370+ # software events
371+ " alignment-faults" => EventType (:sw , :alignment_faults ),
372+ " bpf-output" => EventType (:sw , :bpf_output ),
373+ " context-switches" => EventType (:sw , :ctx_switches ),
374+ " cpu-clock" => EventType (:sw , :cpu_clock ),
375+ " cpu-migrations" => EventType (:sw , :cpu_migrations ),
376+ " dummy" => EventType (:sw , :dummy ),
377+ " emulation-faults" => EventType (:sw , :emulation_faults ),
378+ " major-faults" => EventType (:sw , :major_page_faults ),
379+ " minor-faults" => EventType (:sw , :minor_page_faults ),
380+ " page-faults" => EventType (:sw , :page_faults ),
381+ " task-clock" => EventType (:sw , :task_clock ),
382+
383+ # hardware cache events
384+ " L1-dcache-load-misses" => EventType (:cache , :L1_data , :read , :miss ),
385+ " L1-dcache-loads" => EventType (:cache , :L1_data , :read , :access ),
386+ " L1-icache-load-misses" => EventType (:cache , :L1_insn , :read , :miss ),
387+ " L1-icache-loads" => EventType (:cache , :L1_insn , :read , :access ),
388+ " dTLB-load-misses" => EventType (:cache , :TLB_data , :read , :miss ),
389+ " dTLB-loads" => EventType (:cache , :TLB_data , :read , :access ),
390+ " iTLB-load-misses" => EventType (:cache , :TLB_insn , :read , :miss ),
391+ " iTLB-loads" => EventType (:cache , :TLB_insn , :read , :access ),
392+ )
393+
394+ const EVENT_TO_NAME = Dict (event => name for (name, event) in NAME_TO_EVENT)
395+
396+ function parse_pstats_options (opts)
397+ # default events
398+ events = parse_groups ("
399+ (cpu-cycles, stalled-cycles-frontend, stalled-cycles-backend),
400+ (instructions, branch-instructions, branch-misses),
401+ (task-clock, context-switches, cpu-migrations, page-faults)
402+ " )
403+ for opt in opts
404+ if opt isa AbstractString
405+ events = parse_groups (opt)
406+ elseif opt isa Expr && opt. head == :(= )
407+ key, val = opt. args
408+ error (" unknown key: $(key) " )
409+ else
410+ error (" unknown option: $(opt) " )
411+ end
412+ end
413+ return (events = events,)
414+ end
415+
416+ # syntax: groups = (group ',')* group
417+ function parse_groups (str)
418+ groups = Vector{EventType}[]
419+ i = firstindex (str)
420+ next = iterate (str, i)
421+ while next != = nothing
422+ i = skipws (str, i)
423+ group, i = parse_group (str, i)
424+ push! (groups, group)
425+ i = skipws (str, i)
426+ next = iterate (str, i)
427+ if next === nothing
428+ continue
429+ end
430+ c, i = next
431+ if c == ' ,'
432+ # ok
433+ else
434+ error (" unknown character: $(repr (c)) " )
435+ end
436+ end
437+ return groups
438+ end
439+
440+ # syntax: group = event | '(' (event ',')* event ')'
441+ function parse_group (str, i)
442+ group = EventType[]
443+ next = iterate (str, i)
444+ if next === nothing
445+ error (" no events" )
446+ elseif next[1 ] == ' ('
447+ # group
448+ i = next[2 ]
449+ while true
450+ i = skipws (str, i)
451+ event, i = parse_event (str, i)
452+ push! (group, event)
453+ i = skipws (str, i)
454+ next = iterate (str, i)
455+ if next === nothing
456+ error (" unpaired '('" )
457+ end
458+ c, i = next
459+ if c == ' ,'
460+ # ok
461+ elseif c == ' )'
462+ break
463+ else
464+ error (" unknown character: $(repr (c)) " )
465+ end
466+ end
467+ else
468+ # singleton group
469+ i = skipws (str, i)
470+ event, i = parse_event (str, i)
471+ push! (group, event)
472+ end
473+ return group, i
474+ end
475+
476+ # syntax: event = [A-Za-z0-9-]+
477+ function parse_event (str, i)
478+ isok (c) = ' A' ≤ c ≤ ' Z' || ' a' ≤ c ≤ ' z' || ' 0' ≤ c ≤ ' 9' || c == ' -'
479+ start = i
480+ next = iterate (str, start)
481+ while next != = nothing && isok (next[1 ])
482+ i = next[2 ]
483+ next = iterate (str, i)
484+ end
485+ stop = prevind (str, i)
486+ if start > stop
487+ error (" empty event name" )
488+ end
489+ name = str[start: stop]
490+ if ! haskey (NAME_TO_EVENT, name)
491+ error (" unknown event name: $(name) " )
492+ end
493+ return NAME_TO_EVENT[name], i
351494end
495+
496+ # skip whitespace if any
497+ function skipws (str, i)
498+ @label head
499+ next = iterate (str, i)
500+ if next != = nothing && isspace (next[1 ])
501+ i = next[2 ]
502+ @goto head
503+ end
504+ return i
505+ end
506+
507+ struct Stats
508+ groups:: Vector{Vector{Counter}}
509+ end
510+
511+ function Stats (b:: PerfBench )
512+ groups = Vector{Counter}[]
513+ for g in b. groups
514+ values = Vector {UInt64} (undef, length (g)+ 1 + 2 )
515+ read! (g. leader_io, values)
516+ # ?Ref@assert(length(g) == values[1])
517+ enabled, running = values[2 ], values[3 ]
518+ push! (groups, [Counter (g. event_types[i], values[3 + i], enabled, running) for i in 1 : length (g)])
519+ end
520+ return Stats (groups)
521+ end
522+
523+ function Base. haskey (stats:: Stats , name:: AbstractString )
524+ event = NAME_TO_EVENT[name]
525+ return any (counter. event == event for group in stats. groups for counter in group)
526+ end
527+
528+ function Base. getindex (stats:: Stats , name:: AbstractString )
529+ event = NAME_TO_EVENT[name]
530+ for group in stats. groups, counter in group
531+ if counter. event == event
532+ return counter
533+ end
534+ end
535+ throw (KeyError (name))
536+ end
537+
538+ function Base. show (io:: IO , stats:: Stats )
539+ w = 2 + 23 + 18
540+ println (io, ' ━' ^ w)
541+ for group in stats. groups
542+ for i in 1 : length (group)
543+ # grouping character
544+ if length (group) == 1
545+ c = ' ╶'
546+ elseif i == 1
547+ c = ' ┌'
548+ elseif i == length (group)
549+ c = ' └'
550+ else
551+ c = ' │'
552+ end
553+ counter = group[i]
554+ event = counter. event
555+ name = EVENT_TO_NAME[event]
556+ @printf io " %-2s%-23s" c name
557+ if ! isenabled (counter)
558+ @printf (io, " %18s" , " not enabled" )
559+ elseif ! isrun (counter)
560+ @printf (io, " %10s%7.1f%%" , " NA" , 0.0 )
561+ else
562+ @printf (io, " %10.2e%7.1f%%" , scaledcount (counter), fillrate (counter) * 100 )
563+ end
564+ if isrun (counter)
565+ # show a comment
566+ if name == " cpu-cycles"
567+ @printf (io, " # %4.1f cycles per ns" , counter. value / counter. running)
568+ elseif (name == " stalled-cycles-frontend" || name == " stalled-cycles-backend" ) && haskey (stats, " cpu-cycles" )
569+ @printf (io, " # %4.1f%% of cycles" , scaledcount (counter) / scaledcount (stats[" cpu-cycles" ]) * 100 )
570+ elseif name == " instructions" && haskey (stats, " cpu-cycles" )
571+ @printf (io, " # %4.1f insns per cycle" , scaledcount (counter) / scaledcount (stats[" cpu-cycles" ]))
572+ elseif name == " branch-instructions" && haskey (stats, " instructions" )
573+ @printf (io, " # %4.1f%% of instructions" , scaledcount (counter) / scaledcount (stats[" instructions" ]) * 100 )
574+ elseif name == " branch-misses" && haskey (stats, " branch-instructions" )
575+ @printf (io, " # %4.1f%% of branch instructions" , scaledcount (counter)/ scaledcount (stats[" branch-instructions" ]) * 100 )
576+ elseif name == " cache-misses" && haskey (stats, " cache-references" )
577+ @printf (io, " # %4.1f%% of cache references" , scaledcount (counter) / scaledcount (stats[" cache-references" ]) * 100 )
578+ elseif name == " L1-dcache-load-misses" && haskey (stats, " L1-dcache-loads" )
579+ @printf (io, " # %4.1f%% of loads" , scaledcount (counter) / scaledcount (stats[" L1-dcache-loads" ]) * 100 )
580+ end
581+ end
582+ println (io)
583+ end
584+ end
585+ print (io, ' ━' ^ w)
586+ end
587+
588+ isenabled (counter:: Counter ) = counter. enabled > 0
589+ isrun (counter:: Counter ) = counter. running > 0
590+ fillrate (counter:: Counter ) = counter. running / counter. enabled
591+ scaledcount (counter:: Counter ) = counter. value * (counter. enabled / counter. running)
592+
593+ """
594+ @pstats [options] expr
595+
596+ Run `expr` and gather its performance statistics.
597+ """
598+ macro pstats (args... )
599+ if isempty (args)
600+ error (" @pstats requires at least one argument" )
601+ end
602+ opts, expr = parse_pstats_options (args[1 : end - 1 ]), args[end ]
603+ quote
604+ (function ()
605+ bench = make_bench ($ (opts. events))
606+ enable! (bench)
607+ val = $ (esc (expr))
608+ disable! (bench)
609+ # trick the compiler not to eliminate the code
610+ (rand () < 0 ? val : Stats (bench)):: Stats
611+ end )()
612+ end
613+ end
614+
615+ end
0 commit comments