Skip to content

Commit 1e69268

Browse files
committed
perf/x86/intel: Support PEBS counters snapshotting
JIRA: https://issues.redhat.com/browse/RHEL-47444 upstream ======== commit e02e9b0 Author: Kan Liang <kan.liang@linux.intel.com> Date: Tue Jan 21 07:23:03 2025 -0800 description =========== The counters snapshotting is a new adaptive PEBS extension, which can capture programmable counters, fixed-function counters, and performance metrics in a PEBS record. The feature is available in the PEBS format V6. The target counters can be configured in the new fields of MSR_PEBS_CFG. Then the PEBS HW will generate the bit mask of counters (Counters Group Header) followed by the content of all the requested counters into a PEBS record. The current Linux perf sample read feature can read all events in the group when any event in the group is overflowed. But the rdpmc in the NMI/overflow handler has a small gap from overflow. Also, there is some overhead for each rdpmc read. The counters snapshotting feature can be used as an accurate and low-overhead replacement. Extend intel_update_topdown_event() to accept the value from PEBS records. Add a new PEBS_CNTR flag to indicate a sample read group that utilizes the counters snapshotting feature. When the group is scheduled, the PEBS configure can be updated accordingly. To prevent the case that a PEBS record value might be in the past relative to what is already in the event, perf always stops the PMU and drains the PEBS buffer before updating the corresponding event->count. Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Kan Liang <kan.liang@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lkml.kernel.org/r/20250121152303.3128733-4-kan.liang@linux.intel.com Signed-off-by: Michael Petlan <mpetlan@redhat.com>
1 parent acef308 commit 1e69268

File tree

6 files changed

+284
-25
lines changed

6 files changed

+284
-25
lines changed

arch/x86/events/core.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases);
9494

9595
DEFINE_STATIC_CALL_NULL(x86_pmu_filter, *x86_pmu.filter);
9696

97+
DEFINE_STATIC_CALL_NULL(x86_pmu_late_setup, *x86_pmu.late_setup);
98+
9799
/*
98100
* This one is magic, it will get called even when PMU init fails (because
99101
* there is no PMU), in which case it should simply return NULL.
@@ -1298,6 +1300,15 @@ static void x86_pmu_enable(struct pmu *pmu)
12981300

12991301
if (cpuc->n_added) {
13001302
int n_running = cpuc->n_events - cpuc->n_added;
1303+
1304+
/*
1305+
* The late setup (after counters are scheduled)
1306+
* is required for some cases, e.g., PEBS counters
1307+
* snapshotting. Because an accurate counter index
1308+
* is needed.
1309+
*/
1310+
static_call_cond(x86_pmu_late_setup)();
1311+
13011312
/*
13021313
* apply assignment obtained either from
13031314
* hw_perf_group_sched_in() or x86_pmu_enable()
@@ -2036,6 +2047,8 @@ static void x86_pmu_static_call_update(void)
20362047

20372048
static_call_update(x86_pmu_guest_get_msrs, x86_pmu.guest_get_msrs);
20382049
static_call_update(x86_pmu_filter, x86_pmu.filter);
2050+
2051+
static_call_update(x86_pmu_late_setup, x86_pmu.late_setup);
20392052
}
20402053

20412054
static void _x86_pmu_read(struct perf_event *event)

arch/x86/events/intel/core.c

Lines changed: 60 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2709,21 +2709,32 @@ static void update_saved_topdown_regs(struct perf_event *event, u64 slots,
27092709
* modify by a NMI. PMU has to be disabled before calling this function.
27102710
*/
27112711

2712-
static u64 intel_update_topdown_event(struct perf_event *event, int metric_end)
2712+
static u64 intel_update_topdown_event(struct perf_event *event, int metric_end, u64 *val)
27132713
{
27142714
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
27152715
struct perf_event *other;
27162716
u64 slots, metrics;
27172717
bool reset = true;
27182718
int idx;
27192719

2720-
/* read Fixed counter 3 */
2721-
rdpmcl((3 | INTEL_PMC_FIXED_RDPMC_BASE), slots);
2722-
if (!slots)
2723-
return 0;
2720+
if (!val) {
2721+
/* read Fixed counter 3 */
2722+
rdpmcl((3 | INTEL_PMC_FIXED_RDPMC_BASE), slots);
2723+
if (!slots)
2724+
return 0;
27242725

2725-
/* read PERF_METRICS */
2726-
rdpmcl(INTEL_PMC_FIXED_RDPMC_METRICS, metrics);
2726+
/* read PERF_METRICS */
2727+
rdpmcl(INTEL_PMC_FIXED_RDPMC_METRICS, metrics);
2728+
} else {
2729+
slots = val[0];
2730+
metrics = val[1];
2731+
/*
2732+
* Don't reset the PERF_METRICS and Fixed counter 3
2733+
* for each PEBS record read. Utilize the RDPMC metrics
2734+
* clear mode.
2735+
*/
2736+
reset = false;
2737+
}
27272738

27282739
for_each_set_bit(idx, cpuc->active_mask, metric_end + 1) {
27292740
if (!is_topdown_idx(idx))
@@ -2766,17 +2777,19 @@ static u64 intel_update_topdown_event(struct perf_event *event, int metric_end)
27662777
return slots;
27672778
}
27682779

2769-
static u64 icl_update_topdown_event(struct perf_event *event)
2780+
static u64 icl_update_topdown_event(struct perf_event *event, u64 *val)
27702781
{
27712782
return intel_update_topdown_event(event, INTEL_PMC_IDX_METRIC_BASE +
2772-
x86_pmu.num_topdown_events - 1);
2783+
x86_pmu.num_topdown_events - 1,
2784+
val);
27732785
}
27742786

2775-
DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, x86_perf_event_update);
2787+
DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, intel_pmu_topdown_event_update);
27762788

27772789
static void intel_pmu_read_event(struct perf_event *event)
27782790
{
2779-
if (event->hw.flags & (PERF_X86_EVENT_AUTO_RELOAD | PERF_X86_EVENT_TOPDOWN)) {
2791+
if (event->hw.flags & (PERF_X86_EVENT_AUTO_RELOAD | PERF_X86_EVENT_TOPDOWN) ||
2792+
is_pebs_counter_event_group(event)) {
27802793
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
27812794
bool pmu_enabled = cpuc->enabled;
27822795

@@ -2788,8 +2801,12 @@ static void intel_pmu_read_event(struct perf_event *event)
27882801
if (pmu_enabled)
27892802
intel_pmu_disable_all();
27902803

2791-
if (is_topdown_event(event))
2792-
static_call(intel_pmu_update_topdown_event)(event);
2804+
/*
2805+
* If the PEBS counters snapshotting is enabled,
2806+
* the topdown event is available in PEBS records.
2807+
*/
2808+
if (is_topdown_event(event) && !is_pebs_counter_event_group(event))
2809+
static_call(intel_pmu_update_topdown_event)(event, NULL);
27932810
else
27942811
intel_pmu_drain_pebs_buffer();
27952812

@@ -2929,7 +2946,7 @@ static int intel_pmu_set_period(struct perf_event *event)
29292946
static u64 intel_pmu_update(struct perf_event *event)
29302947
{
29312948
if (unlikely(is_topdown_count(event)))
2932-
return static_call(intel_pmu_update_topdown_event)(event);
2949+
return static_call(intel_pmu_update_topdown_event)(event, NULL);
29332950

29342951
return x86_perf_event_update(event);
29352952
}
@@ -3095,7 +3112,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
30953112
*/
30963113
if (__test_and_clear_bit(GLOBAL_STATUS_PERF_METRICS_OVF_BIT, (unsigned long *)&status)) {
30973114
handled++;
3098-
static_call(intel_pmu_update_topdown_event)(NULL);
3115+
static_call(intel_pmu_update_topdown_event)(NULL, NULL);
30993116
}
31003117

31013118
/*
@@ -3113,6 +3130,27 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
31133130
if (!test_bit(bit, cpuc->active_mask))
31143131
continue;
31153132

3133+
/*
3134+
* There may be unprocessed PEBS records in the PEBS buffer,
3135+
* which still stores the previous values.
3136+
* Process those records first before handling the latest value.
3137+
* For example,
3138+
* A is a regular counter
3139+
* B is a PEBS event which reads A
3140+
* C is a PEBS event
3141+
*
3142+
* The following can happen:
3143+
* B-assist A=1
3144+
* C A=2
3145+
* B-assist A=3
3146+
* A-overflow-PMI A=4
3147+
* C-assist-PMI (PEBS buffer) A=5
3148+
*
3149+
* The PEBS buffer has to be drained before handling the A-PMI
3150+
*/
3151+
if (is_pebs_counter_event_group(event))
3152+
x86_pmu.drain_pebs(regs, &data);
3153+
31163154
if (!intel_pmu_save_and_restart(event))
31173155
continue;
31183156

@@ -4060,6 +4098,13 @@ static int intel_pmu_hw_config(struct perf_event *event)
40604098
event->hw.flags |= PERF_X86_EVENT_PEBS_VIA_PT;
40614099
}
40624100

4101+
if ((event->attr.sample_type & PERF_SAMPLE_READ) &&
4102+
(x86_pmu.intel_cap.pebs_format >= 6) &&
4103+
x86_pmu.intel_cap.pebs_baseline &&
4104+
is_sampling_event(event) &&
4105+
event->attr.precise_ip)
4106+
event->group_leader->hw.flags |= PERF_X86_EVENT_PEBS_CNTR;
4107+
40634108
if ((event->attr.type == PERF_TYPE_HARDWARE) ||
40644109
(event->attr.type == PERF_TYPE_HW_CACHE))
40654110
return 0;

0 commit comments

Comments
 (0)