Skip to content

Commit c61feda

Browse files
ahunter6gregkh
authored andcommitted
perf/core: Add aux_pause, aux_resume, aux_start_paused
[ Upstream commit 18d92bb ] Hardware traces, such as instruction traces, can produce a vast amount of trace data, so being able to reduce tracing to more specific circumstances can be useful. The ability to pause or resume tracing when another event happens, can do that. Add ability for an event to "pause" or "resume" AUX area tracing. Add aux_pause bit to perf_event_attr to indicate that, if the event happens, the associated AUX area tracing should be paused. Ditto aux_resume. Do not allow aux_pause and aux_resume to be set together. Add aux_start_paused bit to perf_event_attr to indicate to an AUX area event that it should start in a "paused" state. Add aux_paused to struct hw_perf_event for AUX area events to keep track of the "paused" state. aux_paused is initialized to aux_start_paused. Add PERF_EF_PAUSE and PERF_EF_RESUME modes for ->stop() and ->start() callbacks. Call as needed, during __perf_event_output(). Add aux_in_pause_resume to struct perf_buffer to prevent races with the NMI handler. Pause/resume in NMI context will miss out if it coincides with another pause/resume. To use aux_pause or aux_resume, an event must be in a group with the AUX area event as the group leader. Example (requires Intel PT and tools patches also): $ perf record --kcore -e intel_pt/aux-action=start-paused/k,syscalls:sys_enter_newuname/aux-action=resume/,syscalls:sys_exit_newuname/aux-action=pause/ uname Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.043 MB perf.data ] $ perf script --call-trace uname 30805 [000] 24001.058782799: name: 0x7ffc9c1865b0 uname 30805 [000] 24001.058784424: psb offs: 0 uname 30805 [000] 24001.058784424: cbr: 39 freq: 3904 MHz (139%) uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) debug_smp_processor_id uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) __x64_sys_newuname uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) down_read uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) __cond_resched uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) preempt_count_add uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) in_lock_functions uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) preempt_count_sub uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) up_read uname 30805 [000] 24001.058784629: ([kernel.kallsyms]) preempt_count_add uname 30805 [000] 24001.058784838: ([kernel.kallsyms]) in_lock_functions uname 30805 [000] 24001.058784838: ([kernel.kallsyms]) preempt_count_sub uname 30805 [000] 24001.058784838: ([kernel.kallsyms]) _copy_to_user uname 30805 [000] 24001.058784838: ([kernel.kallsyms]) syscall_exit_to_user_mode uname 30805 [000] 24001.058784838: ([kernel.kallsyms]) syscall_exit_work uname 30805 [000] 24001.058784838: ([kernel.kallsyms]) perf_syscall_exit uname 30805 [000] 24001.058784838: ([kernel.kallsyms]) debug_smp_processor_id uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) perf_trace_buf_alloc uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) perf_swevent_get_recursion_context uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) debug_smp_processor_id uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) debug_smp_processor_id uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) perf_tp_event uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) perf_trace_buf_update uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) tracing_gen_ctx_irq_test uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) perf_swevent_event uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) __perf_event_account_interrupt uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) __this_cpu_preempt_check uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) perf_event_output_forward uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) perf_event_aux_pause uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) ring_buffer_get uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) __rcu_read_lock uname 30805 [000] 24001.058785046: ([kernel.kallsyms]) __rcu_read_unlock uname 30805 [000] 24001.058785254: ([kernel.kallsyms]) pt_event_stop uname 30805 [000] 24001.058785254: ([kernel.kallsyms]) debug_smp_processor_id uname 30805 [000] 24001.058785254: ([kernel.kallsyms]) debug_smp_processor_id uname 30805 [000] 24001.058785254: ([kernel.kallsyms]) native_write_msr uname 30805 [000] 24001.058785463: ([kernel.kallsyms]) native_write_msr uname 30805 [000] 24001.058785639: 0x0 Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: James Clark <james.clark@arm.com> Link: https://lkml.kernel.org/r/20241022155920.17511-3-adrian.hunter@intel.com Stable-dep-of: 56799bc ("perf: Fix hang while freeing sigtrap event") Signed-off-by: Sasha Levin <sashal@kernel.org>
1 parent a084253 commit c61feda

File tree

4 files changed

+110
-5
lines changed

4 files changed

+110
-5
lines changed

include/linux/perf_event.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,12 @@ struct hw_perf_event {
170170
};
171171
struct { /* aux / Intel-PT */
172172
u64 aux_config;
173+
/*
174+
* For AUX area events, aux_paused cannot be a state
175+
* flag because it can be updated asynchronously to
176+
* state.
177+
*/
178+
unsigned int aux_paused;
173179
};
174180
struct { /* software */
175181
struct hrtimer hrtimer;
@@ -294,6 +300,7 @@ struct perf_event_pmu_context;
294300
#define PERF_PMU_CAP_NO_EXCLUDE 0x0040
295301
#define PERF_PMU_CAP_AUX_OUTPUT 0x0080
296302
#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100
303+
#define PERF_PMU_CAP_AUX_PAUSE 0x0200
297304

298305
/**
299306
* pmu::scope
@@ -384,6 +391,8 @@ struct pmu {
384391
#define PERF_EF_START 0x01 /* start the counter when adding */
385392
#define PERF_EF_RELOAD 0x02 /* reload the counter when starting */
386393
#define PERF_EF_UPDATE 0x04 /* update the counter when stopping */
394+
#define PERF_EF_PAUSE 0x08 /* AUX area event, pause tracing */
395+
#define PERF_EF_RESUME 0x10 /* AUX area event, resume tracing */
387396

388397
/*
389398
* Adds/Removes a counter to/from the PMU, can be done inside a
@@ -423,6 +432,18 @@ struct pmu {
423432
*
424433
* ->start() with PERF_EF_RELOAD will reprogram the counter
425434
* value, must be preceded by a ->stop() with PERF_EF_UPDATE.
435+
*
436+
* ->stop() with PERF_EF_PAUSE will stop as simply as possible. Will not
437+
* overlap another ->stop() with PERF_EF_PAUSE nor ->start() with
438+
* PERF_EF_RESUME.
439+
*
440+
* ->start() with PERF_EF_RESUME will start as simply as possible but
441+
* only if the counter is not otherwise stopped. Will not overlap
442+
* another ->start() with PERF_EF_RESUME nor ->stop() with
443+
* PERF_EF_PAUSE.
444+
*
445+
* Notably, PERF_EF_PAUSE/PERF_EF_RESUME *can* be concurrent with other
446+
* ->stop()/->start() invocations, just not itself.
426447
*/
427448
void (*start) (struct perf_event *event, int flags);
428449
void (*stop) (struct perf_event *event, int flags);
@@ -1685,6 +1706,13 @@ static inline bool has_aux(struct perf_event *event)
16851706
return event->pmu->setup_aux;
16861707
}
16871708

1709+
static inline bool has_aux_action(struct perf_event *event)
1710+
{
1711+
return event->attr.aux_sample_size ||
1712+
event->attr.aux_pause ||
1713+
event->attr.aux_resume;
1714+
}
1715+
16881716
static inline bool is_write_backward(struct perf_event *event)
16891717
{
16901718
return !!event->attr.write_backward;

include/uapi/linux/perf_event.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -511,7 +511,16 @@ struct perf_event_attr {
511511
__u16 sample_max_stack;
512512
__u16 __reserved_2;
513513
__u32 aux_sample_size;
514-
__u32 __reserved_3;
514+
515+
union {
516+
__u32 aux_action;
517+
struct {
518+
__u32 aux_start_paused : 1, /* start AUX area tracing paused */
519+
aux_pause : 1, /* on overflow, pause AUX area tracing */
520+
aux_resume : 1, /* on overflow, resume AUX area tracing */
521+
__reserved_3 : 29;
522+
};
523+
};
515524

516525
/*
517526
* User provided data if sigtrap=1, passed back to user via

kernel/events/core.c

Lines changed: 71 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2146,7 +2146,7 @@ static void perf_put_aux_event(struct perf_event *event)
21462146

21472147
static bool perf_need_aux_event(struct perf_event *event)
21482148
{
2149-
return !!event->attr.aux_output || !!event->attr.aux_sample_size;
2149+
return event->attr.aux_output || has_aux_action(event);
21502150
}
21512151

21522152
static int perf_get_aux_event(struct perf_event *event,
@@ -2171,6 +2171,10 @@ static int perf_get_aux_event(struct perf_event *event,
21712171
!perf_aux_output_match(event, group_leader))
21722172
return 0;
21732173

2174+
if ((event->attr.aux_pause || event->attr.aux_resume) &&
2175+
!(group_leader->pmu->capabilities & PERF_PMU_CAP_AUX_PAUSE))
2176+
return 0;
2177+
21742178
if (event->attr.aux_sample_size && !group_leader->pmu->snapshot_aux)
21752179
return 0;
21762180

@@ -8029,6 +8033,49 @@ void perf_prepare_header(struct perf_event_header *header,
80298033
WARN_ON_ONCE(header->size & 7);
80308034
}
80318035

8036+
static void __perf_event_aux_pause(struct perf_event *event, bool pause)
8037+
{
8038+
if (pause) {
8039+
if (!event->hw.aux_paused) {
8040+
event->hw.aux_paused = 1;
8041+
event->pmu->stop(event, PERF_EF_PAUSE);
8042+
}
8043+
} else {
8044+
if (event->hw.aux_paused) {
8045+
event->hw.aux_paused = 0;
8046+
event->pmu->start(event, PERF_EF_RESUME);
8047+
}
8048+
}
8049+
}
8050+
8051+
static void perf_event_aux_pause(struct perf_event *event, bool pause)
8052+
{
8053+
struct perf_buffer *rb;
8054+
8055+
if (WARN_ON_ONCE(!event))
8056+
return;
8057+
8058+
rb = ring_buffer_get(event);
8059+
if (!rb)
8060+
return;
8061+
8062+
scoped_guard (irqsave) {
8063+
/*
8064+
* Guard against self-recursion here. Another event could trip
8065+
* this same from NMI context.
8066+
*/
8067+
if (READ_ONCE(rb->aux_in_pause_resume))
8068+
break;
8069+
8070+
WRITE_ONCE(rb->aux_in_pause_resume, 1);
8071+
barrier();
8072+
__perf_event_aux_pause(event, pause);
8073+
barrier();
8074+
WRITE_ONCE(rb->aux_in_pause_resume, 0);
8075+
}
8076+
ring_buffer_put(rb);
8077+
}
8078+
80328079
static __always_inline int
80338080
__perf_event_output(struct perf_event *event,
80348081
struct perf_sample_data *data,
@@ -9832,9 +9879,12 @@ static int __perf_event_overflow(struct perf_event *event,
98329879

98339880
ret = __perf_event_account_interrupt(event, throttle);
98349881

9882+
if (event->attr.aux_pause)
9883+
perf_event_aux_pause(event->aux_event, true);
9884+
98359885
if (event->prog && event->prog->type == BPF_PROG_TYPE_PERF_EVENT &&
98369886
!bpf_overflow_handler(event, data, regs))
9837-
return ret;
9887+
goto out;
98389888

98399889
/*
98409890
* XXX event_limit might not quite work as expected on inherited
@@ -9896,6 +9946,9 @@ static int __perf_event_overflow(struct perf_event *event,
98969946
event->pending_wakeup = 1;
98979947
irq_work_queue(&event->pending_irq);
98989948
}
9949+
out:
9950+
if (event->attr.aux_resume)
9951+
perf_event_aux_pause(event->aux_event, false);
98999952

99009953
return ret;
99019954
}
@@ -12312,11 +12365,25 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
1231212365
}
1231312366

1231412367
if (event->attr.aux_output &&
12315-
!(pmu->capabilities & PERF_PMU_CAP_AUX_OUTPUT)) {
12368+
(!(pmu->capabilities & PERF_PMU_CAP_AUX_OUTPUT) ||
12369+
event->attr.aux_pause || event->attr.aux_resume)) {
1231612370
err = -EOPNOTSUPP;
1231712371
goto err_pmu;
1231812372
}
1231912373

12374+
if (event->attr.aux_pause && event->attr.aux_resume) {
12375+
err = -EINVAL;
12376+
goto err_pmu;
12377+
}
12378+
12379+
if (event->attr.aux_start_paused) {
12380+
if (!(pmu->capabilities & PERF_PMU_CAP_AUX_PAUSE)) {
12381+
err = -EOPNOTSUPP;
12382+
goto err_pmu;
12383+
}
12384+
event->hw.aux_paused = 1;
12385+
}
12386+
1232012387
if (cgroup_fd != -1) {
1232112388
err = perf_cgroup_connect(cgroup_fd, event, attr, group_leader);
1232212389
if (err)
@@ -13112,7 +13179,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
1311213179
* Grouping is not supported for kernel events, neither is 'AUX',
1311313180
* make sure the caller's intentions are adjusted.
1311413181
*/
13115-
if (attr->aux_output)
13182+
if (attr->aux_output || attr->aux_action)
1311613183
return ERR_PTR(-EINVAL);
1311713184

1311813185
event = perf_event_alloc(attr, cpu, task, NULL, NULL,

kernel/events/internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ struct perf_buffer {
5252
void (*free_aux)(void *);
5353
refcount_t aux_refcount;
5454
int aux_in_sampling;
55+
int aux_in_pause_resume;
5556
void **aux_pages;
5657
void *aux_priv;
5758

0 commit comments

Comments
 (0)