Skip to content

Commit acef308

Browse files
committed
perf/x86/intel/ds: Simplify the PEBS records processing for adaptive PEBS
JIRA: https://issues.redhat.com/browse/RHEL-47444 upstream ======== commit ae55e30 Author: Kan Liang <kan.liang@linux.intel.com> Date: Tue Nov 19 05:55:04 2024 -0800 description =========== The current code may iterate all the PEBS records in the DS area several times. The first loop is to find all active events and calculate the available records for each event. Then iterate the whole buffer again and again to process available records until all active events are processed. The algorithm is inherited from the old generations. The old PEBS hardware does not deal well with the situation when events happen near each other. SW has to drop the error records. Multiple iterations are required. The hardware limit has been addressed on newer platforms with adaptive PEBS. A simple one-iteration algorithm is introduced. The samples are output by record order with the patch, rather than the event order. It doesn't impact the post-processing. The perf tool always sorts the records by time before presenting them to the end user. In an NMI, the last record has to be specially handled. Add a last[] variable to track the last unprocessed record of each event. Test: 11 PEBS events are used in the perf test. Only the basic information is collected. perf record -e instructions:up,...,instructions:up -c 2000003 benchmark The ftrace is used to record the duration of the intel_pmu_drain_pebs_icl(). The average duration reduced from 62.04us to 57.94us. A small improvement can be observed with the new algorithm. Also, the implementation becomes simpler and more straightforward. Suggested-by: Stephane Eranian <eranian@google.com> Signed-off-by: Kan Liang <kan.liang@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Dapeng Mi <dapeng1.mi@linux.intel.com> Link: https://lore.kernel.org/r/20241119135504.1463839-5-kan.liang@linux.intel.com Signed-off-by: Michael Petlan <mpetlan@redhat.com>
1 parent e2ee150 commit acef308

File tree

1 file changed

+29
-14
lines changed
  • arch/x86/events/intel

1 file changed

+29
-14
lines changed

arch/x86/events/intel/ds.c

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2418,8 +2418,12 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
24182418
static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data)
24192419
{
24202420
short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
2421+
void *last[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS];
24212422
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
24222423
struct debug_store *ds = cpuc->ds;
2424+
struct x86_perf_regs perf_regs;
2425+
struct pt_regs *regs = &perf_regs.regs;
2426+
struct pebs_basic *basic;
24232427
struct perf_event *event;
24242428
void *base, *at, *top;
24252429
int bit;
@@ -2441,30 +2445,41 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
24412445
return;
24422446
}
24432447

2444-
for (at = base; at < top; at += cpuc->pebs_record_size) {
2448+
if (!iregs)
2449+
iregs = &dummy_iregs;
2450+
2451+
/* Process all but the last event for each counter. */
2452+
for (at = base; at < top; at += basic->format_size) {
24452453
u64 pebs_status;
24462454

2447-
pebs_status = get_pebs_status(at) & cpuc->pebs_enabled;
2448-
pebs_status &= mask;
2455+
basic = at;
2456+
if (basic->format_size != cpuc->pebs_record_size)
2457+
continue;
2458+
2459+
pebs_status = basic->applicable_counters & cpuc->pebs_enabled & mask;
2460+
for_each_set_bit(bit, (unsigned long *)&pebs_status, X86_PMC_IDX_MAX) {
2461+
event = cpuc->events[bit];
24492462

2450-
for_each_set_bit(bit, (unsigned long *)&pebs_status, X86_PMC_IDX_MAX)
2451-
counts[bit]++;
2463+
if (WARN_ON_ONCE(!event) ||
2464+
WARN_ON_ONCE(!event->attr.precise_ip))
2465+
continue;
2466+
2467+
if (counts[bit]++) {
2468+
__intel_pmu_pebs_event(event, iregs, regs, data, last[bit],
2469+
setup_pebs_adaptive_sample_data);
2470+
}
2471+
last[bit] = at;
2472+
}
24522473
}
24532474

24542475
for_each_set_bit(bit, (unsigned long *)&mask, X86_PMC_IDX_MAX) {
2455-
if (counts[bit] == 0)
2476+
if (!counts[bit])
24562477
continue;
24572478

24582479
event = cpuc->events[bit];
2459-
if (WARN_ON_ONCE(!event))
2460-
continue;
2461-
2462-
if (WARN_ON_ONCE(!event->attr.precise_ip))
2463-
continue;
24642480

2465-
__intel_pmu_pebs_events(event, iregs, data, base,
2466-
top, bit, counts[bit],
2467-
setup_pebs_adaptive_sample_data);
2481+
__intel_pmu_pebs_last_event(event, iregs, regs, data, last[bit],
2482+
counts[bit], setup_pebs_adaptive_sample_data);
24682483
}
24692484
}
24702485

0 commit comments

Comments
 (0)