@@ -77,11 +77,17 @@ const EVENT_TYPES =
7777 (:scaled_cycles , 9 ) # PERF_COUNT_HW_REF_CPU_CYCLES
7878 ]),
7979 (:sw , 1 , # PERF_TYPE_SOFTWARE
80- [(:page_faults , 2 ), # PERF_COUNT_SW_PAGE_FAULTS
80+ [(:cpu_clock , 0 ), # PERF_COUNT_SW_CPU_CLOCK
81+ (:task_clock , 1 ), # PEF_COUNT_SW_TASK_CLOCK
82+ (:page_faults , 2 ), # PERF_COUNT_SW_PAGE_FAULTS
8183 (:ctx_switches , 3 ), # PERF_COUNT_SW_CONTEXT_SWITCHES
8284 (:cpu_migrations , 4 ), # PERF_COUNT_SW_CPU_MIGRATIONS
8385 (:minor_page_faults , 5 ), # PERF_COUNT_SW_PAGE_FAULTS_MIN
8486 (:major_page_faults , 6 ), # PERF_COUNT_SW_PAGE_FAULTS_MAJ
87+ (:alignment_faults , 7 ), # PERF_COUNT_SW_ALIGNMENT_FAULTS
88+ (:emulation_faults , 8 ), # PERF_COUNT_SW_EMULATION_FAULTS
89+ (:dummy , 9 ), # PERF_COUNT_SW_DUMMY
90+ (:bpf_output , 10 ), # PERF_COUNT_SW_BPF_OUTPUT
8591 ])
8692 ]
8793
@@ -372,4 +378,262 @@ end
372378
373379make_bench () = make_bench (reasonable_defaults)
374380
381+
382+ # Event names are taken from the perf command.
383+ const NAME_TO_EVENT = Dict (
384+ # hardware events
385+ " branch-instructions" => EventType (:hw , :branches ),
386+ " branch-misses" => EventType (:hw , :branch_mispredicts ),
387+ " cache-misses" => EventType (:hw , :cache_misses ),
388+ " cache-references" => EventType (:hw , :cache_access ),
389+ " cpu-cycles" => EventType (:hw , :cycles ),
390+ " instructions" => EventType (:hw , :instructions ),
391+ " stalled-cycles-backend" => EventType (:hw , :stalled_cycles_backend ),
392+ " stalled-cycles-frontend" => EventType (:hw , :stalled_cycles_frontend ),
393+
394+ # software events
395+ " alignment-faults" => EventType (:sw , :alignment_faults ),
396+ " bpf-output" => EventType (:sw , :bpf_output ),
397+ " context-switches" => EventType (:sw , :ctx_switches ),
398+ " cpu-clock" => EventType (:sw , :cpu_clock ),
399+ " cpu-migrations" => EventType (:sw , :cpu_migrations ),
400+ " dummy" => EventType (:sw , :dummy ),
401+ " emulation-faults" => EventType (:sw , :emulation_faults ),
402+ " major-faults" => EventType (:sw , :major_page_faults ),
403+ " minor-faults" => EventType (:sw , :minor_page_faults ),
404+ " page-faults" => EventType (:sw , :page_faults ),
405+ " task-clock" => EventType (:sw , :task_clock ),
406+
407+ # hardware cache events
408+ " L1-dcache-load-misses" => EventType (:cache , :L1_data , :read , :miss ),
409+ " L1-dcache-loads" => EventType (:cache , :L1_data , :read , :access ),
410+ " L1-icache-load-misses" => EventType (:cache , :L1_insn , :read , :miss ),
411+ " L1-icache-loads" => EventType (:cache , :L1_insn , :read , :access ),
412+ " dTLB-load-misses" => EventType (:cache , :TLB_data , :read , :miss ),
413+ " dTLB-loads" => EventType (:cache , :TLB_data , :read , :access ),
414+ " iTLB-load-misses" => EventType (:cache , :TLB_insn , :read , :miss ),
415+ " iTLB-loads" => EventType (:cache , :TLB_insn , :read , :access ),
416+ )
417+
418+ const EVENT_TO_NAME = Dict (event => name for (name, event) in NAME_TO_EVENT)
419+
420+ function parse_pstats_options (opts)
421+ # default events
422+ events = parse_groups ("
423+ (cpu-cycles, stalled-cycles-frontend, stalled-cycles-backend),
424+ (instructions, branch-instructions, branch-misses),
425+ (task-clock, context-switches, cpu-migrations, page-faults)
426+ " )
427+ for opt in opts
428+ if opt isa AbstractString
429+ events = parse_groups (opt)
430+ elseif opt isa Expr && opt. head == :(= )
431+ key, val = opt. args
432+ error (" unknown key: $(key) " )
433+ else
434+ error (" unknown option: $(opt) " )
435+ end
436+ end
437+ return (events = events,)
438+ end
439+
440+ # syntax: groups = (group ',')* group
441+ function parse_groups (str)
442+ groups = Vector{EventType}[]
443+ i = firstindex (str)
444+ next = iterate (str, i)
445+ while next != = nothing
446+ i = skipws (str, i)
447+ group, i = parse_group (str, i)
448+ push! (groups, group)
449+ i = skipws (str, i)
450+ next = iterate (str, i)
451+ if next === nothing
452+ continue
453+ end
454+ c, i = next
455+ if c == ' ,'
456+ # ok
457+ else
458+ error (" unknown character: $(repr (c)) " )
459+ end
460+ end
461+ return groups
462+ end
463+
464+ # syntax: group = event | '(' (event ',')* event ')'
465+ function parse_group (str, i)
466+ group = EventType[]
467+ next = iterate (str, i)
468+ if next === nothing
469+ error (" no events" )
470+ elseif next[1 ] == ' ('
471+ # group
472+ i = next[2 ]
473+ while true
474+ i = skipws (str, i)
475+ event, i = parse_event (str, i)
476+ push! (group, event)
477+ i = skipws (str, i)
478+ next = iterate (str, i)
479+ if next === nothing
480+ error (" unpaired '('" )
481+ end
482+ c, i = next
483+ if c == ' ,'
484+ # ok
485+ elseif c == ' )'
486+ break
487+ else
488+ error (" unknown character: $(repr (c)) " )
489+ end
490+ end
491+ else
492+ # singleton group
493+ i = skipws (str, i)
494+ event, i = parse_event (str, i)
495+ push! (group, event)
496+ end
497+ return group, i
498+ end
499+
500+ # syntax: event = [A-Za-z0-9-]+
501+ function parse_event (str, i)
502+ isok (c) = ' A' ≤ c ≤ ' Z' || ' a' ≤ c ≤ ' z' || ' 0' ≤ c ≤ ' 9' || c == ' -'
503+ start = i
504+ next = iterate (str, start)
505+ while next != = nothing && isok (next[1 ])
506+ i = next[2 ]
507+ next = iterate (str, i)
508+ end
509+ stop = prevind (str, i)
510+ if start > stop
511+ error (" empty event name" )
512+ end
513+ name = str[start: stop]
514+ if ! haskey (NAME_TO_EVENT, name)
515+ error (" unknown event name: $(name) " )
516+ end
517+ return NAME_TO_EVENT[name], i
375518end
519+
520+ # skip whitespace if any
521+ function skipws (str, i)
522+ @label head
523+ next = iterate (str, i)
524+ if next != = nothing && isspace (next[1 ])
525+ i = next[2 ]
526+ @goto head
527+ end
528+ return i
529+ end
530+
531+ struct Stats
532+ groups:: Vector{Vector{Counter}}
533+ end
534+
535+ function Stats (b:: PerfBench )
536+ groups = Vector{Counter}[]
537+ for g in b. groups
538+ values = Vector {UInt64} (undef, length (g)+ 1 + 2 )
539+ read! (g. leader_io, values)
540+ # ?Ref@assert(length(g) == values[1])
541+ enabled, running = values[2 ], values[3 ]
542+ push! (groups, [Counter (g. event_types[i], values[3 + i], enabled, running) for i in 1 : length (g)])
543+ end
544+ return Stats (groups)
545+ end
546+
547+ function Base. haskey (stats:: Stats , name:: AbstractString )
548+ event = NAME_TO_EVENT[name]
549+ return any (counter. event == event for group in stats. groups for counter in group)
550+ end
551+
552+ function Base. getindex (stats:: Stats , name:: AbstractString )
553+ event = NAME_TO_EVENT[name]
554+ for group in stats. groups, counter in group
555+ if counter. event == event
556+ return counter
557+ end
558+ end
559+ throw (KeyError (name))
560+ end
561+
562+ function Base. show (io:: IO , stats:: Stats )
563+ w = 2 + 23 + 18
564+ println (io, ' ━' ^ w)
565+ for group in stats. groups
566+ for i in 1 : length (group)
567+ # grouping character
568+ if length (group) == 1
569+ c = ' ╶'
570+ elseif i == 1
571+ c = ' ┌'
572+ elseif i == length (group)
573+ c = ' └'
574+ else
575+ c = ' │'
576+ end
577+ counter = group[i]
578+ event = counter. event
579+ name = EVENT_TO_NAME[event]
580+ @printf io " %-2s%-23s" c name
581+ if ! isenabled (counter)
582+ @printf (io, " %18s" , " not enabled" )
583+ elseif ! isrun (counter)
584+ @printf (io, " %10s%7.1f%%" , " NA" , 0.0 )
585+ else
586+ @printf (io, " %10.2e%7.1f%%" , scaledcount (counter), fillrate (counter) * 100 )
587+ end
588+ if isrun (counter)
589+ # show a comment
590+ if name == " cpu-cycles"
591+ @printf (io, " # %4.1f cycles per ns" , counter. value / counter. running)
592+ elseif (name == " stalled-cycles-frontend" || name == " stalled-cycles-backend" ) && haskey (stats, " cpu-cycles" )
593+ @printf (io, " # %4.1f%% of cycles" , scaledcount (counter) / scaledcount (stats[" cpu-cycles" ]) * 100 )
594+ elseif name == " instructions" && haskey (stats, " cpu-cycles" )
595+ @printf (io, " # %4.1f insns per cycle" , scaledcount (counter) / scaledcount (stats[" cpu-cycles" ]))
596+ elseif name == " branch-instructions" && haskey (stats, " instructions" )
597+ @printf (io, " # %4.1f%% of instructions" , scaledcount (counter) / scaledcount (stats[" instructions" ]) * 100 )
598+ elseif name == " branch-misses" && haskey (stats, " branch-instructions" )
599+ @printf (io, " # %4.1f%% of branch instructions" , scaledcount (counter)/ scaledcount (stats[" branch-instructions" ]) * 100 )
600+ elseif name == " cache-misses" && haskey (stats, " cache-references" )
601+ @printf (io, " # %4.1f%% of cache references" , scaledcount (counter) / scaledcount (stats[" cache-references" ]) * 100 )
602+ elseif name == " L1-dcache-load-misses" && haskey (stats, " L1-dcache-loads" )
603+ @printf (io, " # %4.1f%% of loads" , scaledcount (counter) / scaledcount (stats[" L1-dcache-loads" ]) * 100 )
604+ end
605+ end
606+ println (io)
607+ end
608+ end
609+ print (io, ' ━' ^ w)
610+ end
611+
612+ isenabled (counter:: Counter ) = counter. enabled > 0
613+ isrun (counter:: Counter ) = counter. running > 0
614+ fillrate (counter:: Counter ) = counter. running / counter. enabled
615+ scaledcount (counter:: Counter ) = counter. value * (counter. enabled / counter. running)
616+
617+ """
618+ @pstats [options] expr
619+
620+ Run `expr` and gather its performance statistics.
621+ """
622+ macro pstats (args... )
623+ if isempty (args)
624+ error (" @pstats requires at least one argument" )
625+ end
626+ opts, expr = parse_pstats_options (args[1 : end - 1 ]), args[end ]
627+ quote
628+ (function ()
629+ bench = make_bench ($ (opts. events))
630+ enable! (bench)
631+ val = $ (esc (expr))
632+ disable! (bench)
633+ # trick the compiler not to eliminate the code
634+ (rand () < 0 ? val : Stats (bench)):: Stats
635+ end )()
636+ end
637+ end
638+
639+ end
0 commit comments