Skip to content

Commit bfb5113

Browse files
committed
Merge: perf: PerfMon counters adaptive PEBS extension - counter snapshotting
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/6878 JIRA: https://issues.redhat.com/browse/RHEL-47444 Use the HW feature to improve the existing SW sample read feature in the Linux kernel. Signed-off-by: Michael Petlan <mpetlan@redhat.com> Approved-by: David Arcari <darcari@redhat.com> Approved-by: ashelat <ashelat@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: Augusto Caringi <acaringi@redhat.com>
2 parents ce6b791 + d2f9c78 commit bfb5113

File tree

9 files changed

+481
-145
lines changed

9 files changed

+481
-145
lines changed

arch/x86/events/core.c

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases);
9494

9595
DEFINE_STATIC_CALL_NULL(x86_pmu_filter, *x86_pmu.filter);
9696

97+
DEFINE_STATIC_CALL_NULL(x86_pmu_late_setup, *x86_pmu.late_setup);
98+
9799
/*
98100
* This one is magic, it will get called even when PMU init fails (because
99101
* there is no PMU), in which case it should simply return NULL.
@@ -753,7 +755,7 @@ void x86_pmu_enable_all(int added)
753755
}
754756
}
755757

756-
static inline int is_x86_event(struct perf_event *event)
758+
int is_x86_event(struct perf_event *event)
757759
{
758760
int i;
759761

@@ -1298,6 +1300,15 @@ static void x86_pmu_enable(struct pmu *pmu)
12981300

12991301
if (cpuc->n_added) {
13001302
int n_running = cpuc->n_events - cpuc->n_added;
1303+
1304+
/*
1305+
* The late setup (after counters are scheduled)
1306+
* is required for some cases, e.g., PEBS counters
1307+
* snapshotting. Because an accurate counter index
1308+
* is needed.
1309+
*/
1310+
static_call_cond(x86_pmu_late_setup)();
1311+
13011312
/*
13021313
* apply assignment obtained either from
13031314
* hw_perf_group_sched_in() or x86_pmu_enable()
@@ -2036,6 +2047,8 @@ static void x86_pmu_static_call_update(void)
20362047

20372048
static_call_update(x86_pmu_guest_get_msrs, x86_pmu.guest_get_msrs);
20382049
static_call_update(x86_pmu_filter, x86_pmu.filter);
2050+
2051+
static_call_update(x86_pmu_late_setup, x86_pmu.late_setup);
20392052
}
20402053

20412054
static void _x86_pmu_read(struct perf_event *event)

arch/x86/events/intel/core.c

Lines changed: 81 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -2709,21 +2709,32 @@ static void update_saved_topdown_regs(struct perf_event *event, u64 slots,
27092709
* modify by a NMI. PMU has to be disabled before calling this function.
27102710
*/
27112711

2712-
static u64 intel_update_topdown_event(struct perf_event *event, int metric_end)
2712+
static u64 intel_update_topdown_event(struct perf_event *event, int metric_end, u64 *val)
27132713
{
27142714
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
27152715
struct perf_event *other;
27162716
u64 slots, metrics;
27172717
bool reset = true;
27182718
int idx;
27192719

2720-
/* read Fixed counter 3 */
2721-
rdpmcl((3 | INTEL_PMC_FIXED_RDPMC_BASE), slots);
2722-
if (!slots)
2723-
return 0;
2720+
if (!val) {
2721+
/* read Fixed counter 3 */
2722+
rdpmcl((3 | INTEL_PMC_FIXED_RDPMC_BASE), slots);
2723+
if (!slots)
2724+
return 0;
27242725

2725-
/* read PERF_METRICS */
2726-
rdpmcl(INTEL_PMC_FIXED_RDPMC_METRICS, metrics);
2726+
/* read PERF_METRICS */
2727+
rdpmcl(INTEL_PMC_FIXED_RDPMC_METRICS, metrics);
2728+
} else {
2729+
slots = val[0];
2730+
metrics = val[1];
2731+
/*
2732+
* Don't reset the PERF_METRICS and Fixed counter 3
2733+
* for each PEBS record read. Utilize the RDPMC metrics
2734+
* clear mode.
2735+
*/
2736+
reset = false;
2737+
}
27272738

27282739
for_each_set_bit(idx, cpuc->active_mask, metric_end + 1) {
27292740
if (!is_topdown_idx(idx))
@@ -2766,36 +2777,47 @@ static u64 intel_update_topdown_event(struct perf_event *event, int metric_end)
27662777
return slots;
27672778
}
27682779

2769-
static u64 icl_update_topdown_event(struct perf_event *event)
2780+
static u64 icl_update_topdown_event(struct perf_event *event, u64 *val)
27702781
{
27712782
return intel_update_topdown_event(event, INTEL_PMC_IDX_METRIC_BASE +
2772-
x86_pmu.num_topdown_events - 1);
2783+
x86_pmu.num_topdown_events - 1,
2784+
val);
27732785
}
27742786

2775-
DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, x86_perf_event_update);
2787+
DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, intel_pmu_topdown_event_update);
27762788

2777-
static void intel_pmu_read_topdown_event(struct perf_event *event)
2789+
static void intel_pmu_read_event(struct perf_event *event)
27782790
{
2779-
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2791+
if (event->hw.flags & (PERF_X86_EVENT_AUTO_RELOAD | PERF_X86_EVENT_TOPDOWN) ||
2792+
is_pebs_counter_event_group(event)) {
2793+
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2794+
bool pmu_enabled = cpuc->enabled;
27802795

2781-
/* Only need to call update_topdown_event() once for group read. */
2782-
if ((cpuc->txn_flags & PERF_PMU_TXN_READ) &&
2783-
!is_slots_event(event))
2784-
return;
2796+
/* Only need to call update_topdown_event() once for group read. */
2797+
if (is_metric_event(event) && (cpuc->txn_flags & PERF_PMU_TXN_READ))
2798+
return;
27852799

2786-
perf_pmu_disable(event->pmu);
2787-
static_call(intel_pmu_update_topdown_event)(event);
2788-
perf_pmu_enable(event->pmu);
2789-
}
2800+
cpuc->enabled = 0;
2801+
if (pmu_enabled)
2802+
intel_pmu_disable_all();
27902803

2791-
static void intel_pmu_read_event(struct perf_event *event)
2792-
{
2793-
if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
2794-
intel_pmu_auto_reload_read(event);
2795-
else if (is_topdown_count(event))
2796-
intel_pmu_read_topdown_event(event);
2797-
else
2798-
x86_perf_event_update(event);
2804+
/*
2805+
* If the PEBS counters snapshotting is enabled,
2806+
* the topdown event is available in PEBS records.
2807+
*/
2808+
if (is_topdown_count(event) && !is_pebs_counter_event_group(event))
2809+
static_call(intel_pmu_update_topdown_event)(event, NULL);
2810+
else
2811+
intel_pmu_drain_pebs_buffer();
2812+
2813+
cpuc->enabled = pmu_enabled;
2814+
if (pmu_enabled)
2815+
intel_pmu_enable_all(0);
2816+
2817+
return;
2818+
}
2819+
2820+
x86_perf_event_update(event);
27992821
}
28002822

28012823
static void intel_pmu_enable_fixed(struct perf_event *event)
@@ -2924,7 +2946,7 @@ static int intel_pmu_set_period(struct perf_event *event)
29242946
static u64 intel_pmu_update(struct perf_event *event)
29252947
{
29262948
if (unlikely(is_topdown_count(event)))
2927-
return static_call(intel_pmu_update_topdown_event)(event);
2949+
return static_call(intel_pmu_update_topdown_event)(event, NULL);
29282950

29292951
return x86_perf_event_update(event);
29302952
}
@@ -3062,7 +3084,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
30623084

30633085
handled++;
30643086
x86_pmu_handle_guest_pebs(regs, &data);
3065-
x86_pmu.drain_pebs(regs, &data);
3087+
static_call(x86_pmu_drain_pebs)(regs, &data);
30663088
status &= intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;
30673089

30683090
/*
@@ -3090,7 +3112,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
30903112
*/
30913113
if (__test_and_clear_bit(GLOBAL_STATUS_PERF_METRICS_OVF_BIT, (unsigned long *)&status)) {
30923114
handled++;
3093-
static_call(intel_pmu_update_topdown_event)(NULL);
3115+
static_call(intel_pmu_update_topdown_event)(NULL, NULL);
30943116
}
30953117

30963118
/*
@@ -3108,6 +3130,27 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
31083130
if (!test_bit(bit, cpuc->active_mask))
31093131
continue;
31103132

3133+
/*
3134+
* There may be unprocessed PEBS records in the PEBS buffer,
3135+
* which still stores the previous values.
3136+
* Process those records first before handling the latest value.
3137+
* For example,
3138+
* A is a regular counter
3139+
* B is a PEBS event which reads A
3140+
* C is a PEBS event
3141+
*
3142+
* The following can happen:
3143+
* B-assist A=1
3144+
* C A=2
3145+
* B-assist A=3
3146+
* A-overflow-PMI A=4
3147+
* C-assist-PMI (PEBS buffer) A=5
3148+
*
3149+
* The PEBS buffer has to be drained before handling the A-PMI
3150+
*/
3151+
if (is_pebs_counter_event_group(event))
3152+
x86_pmu.drain_pebs(regs, &data);
3153+
31113154
if (!intel_pmu_save_and_restart(event))
31123155
continue;
31133156

@@ -4055,6 +4098,13 @@ static int intel_pmu_hw_config(struct perf_event *event)
40554098
event->hw.flags |= PERF_X86_EVENT_PEBS_VIA_PT;
40564099
}
40574100

4101+
if ((event->attr.sample_type & PERF_SAMPLE_READ) &&
4102+
(x86_pmu.intel_cap.pebs_format >= 6) &&
4103+
x86_pmu.intel_cap.pebs_baseline &&
4104+
is_sampling_event(event) &&
4105+
event->attr.precise_ip)
4106+
event->group_leader->hw.flags |= PERF_X86_EVENT_PEBS_CNTR;
4107+
40584108
if ((event->attr.type == PERF_TYPE_HARDWARE) ||
40594109
(event->attr.type == PERF_TYPE_HW_CACHE))
40604110
return 0;

0 commit comments

Comments
 (0)