Skip to content

Commit 1cbdb84

Browse files
committed
perf/x86/intel/ds: Clarify adaptive PEBS processing
JIRA: https://issues.redhat.com/browse/RHEL-47444 upstream ======== commit 7087bfb Author: Kan Liang <kan.liang@linux.intel.com> Date: Tue Nov 19 05:55:02 2024 -0800 description =========== Modify the pebs_basic and pebs_meminfo structs to make the bitfields more explicit to ease readability of the code. Co-developed-by: Stephane Eranian <eranian@google.com> Signed-off-by: Stephane Eranian <eranian@google.com> Signed-off-by: Kan Liang <kan.liang@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lore.kernel.org/r/20241119135504.1463839-3-kan.liang@linux.intel.com Signed-off-by: Michael Petlan <mpetlan@redhat.com>
1 parent fb9ceec commit 1cbdb84

File tree

2 files changed

+34
-25
lines changed

2 files changed

+34
-25
lines changed

arch/x86/events/intel/ds.c

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1916,8 +1916,6 @@ static void adaptive_pebs_save_regs(struct pt_regs *regs,
19161916
}
19171917

19181918
#define PEBS_LATENCY_MASK 0xffff
1919-
#define PEBS_CACHE_LATENCY_OFFSET 32
1920-
#define PEBS_RETIRE_LATENCY_OFFSET 32
19211919

19221920
/*
19231921
* With adaptive PEBS the layout depends on what fields are configured.
@@ -1931,8 +1929,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
19311929
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
19321930
struct pebs_basic *basic = __pebs;
19331931
void *next_record = basic + 1;
1934-
u64 sample_type;
1935-
u64 format_size;
1932+
u64 sample_type, format_group;
19361933
struct pebs_meminfo *meminfo = NULL;
19371934
struct pebs_gprs *gprs = NULL;
19381935
struct x86_perf_regs *perf_regs;
@@ -1944,7 +1941,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
19441941
perf_regs->xmm_regs = NULL;
19451942

19461943
sample_type = event->attr.sample_type;
1947-
format_size = basic->format_size;
1944+
format_group = basic->format_group;
19481945
perf_sample_data_init(data, 0, event->hw.last_period);
19491946
data->period = event->hw.last_period;
19501947

@@ -1966,7 +1963,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
19661963

19671964
if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) {
19681965
if (x86_pmu.flags & PMU_FL_RETIRE_LATENCY)
1969-
data->weight.var3_w = format_size >> PEBS_RETIRE_LATENCY_OFFSET & PEBS_LATENCY_MASK;
1966+
data->weight.var3_w = basic->retire_latency;
19701967
else
19711968
data->weight.var3_w = 0;
19721969
}
@@ -1976,12 +1973,12 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
19761973
* But PERF_SAMPLE_TRANSACTION needs gprs->ax.
19771974
* Save the pointer here but process later.
19781975
*/
1979-
if (format_size & PEBS_DATACFG_MEMINFO) {
1976+
if (format_group & PEBS_DATACFG_MEMINFO) {
19801977
meminfo = next_record;
19811978
next_record = meminfo + 1;
19821979
}
19831980

1984-
if (format_size & PEBS_DATACFG_GP) {
1981+
if (format_group & PEBS_DATACFG_GP) {
19851982
gprs = next_record;
19861983
next_record = gprs + 1;
19871984

@@ -1994,27 +1991,27 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
19941991
adaptive_pebs_save_regs(regs, gprs);
19951992
}
19961993

1997-
if (format_size & PEBS_DATACFG_MEMINFO) {
1994+
if (format_group & PEBS_DATACFG_MEMINFO) {
19981995
if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
1999-
u64 weight = meminfo->latency;
1996+
u64 latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ?
1997+
meminfo->cache_latency : meminfo->mem_latency;
20001998

2001-
if (x86_pmu.flags & PMU_FL_INSTR_LATENCY) {
2002-
data->weight.var2_w = weight & PEBS_LATENCY_MASK;
2003-
weight >>= PEBS_CACHE_LATENCY_OFFSET;
2004-
}
1999+
if (x86_pmu.flags & PMU_FL_INSTR_LATENCY)
2000+
data->weight.var2_w = meminfo->instr_latency;
20052001

20062002
/*
20072003
* Although meminfo::latency is defined as a u64,
20082004
* only the lower 32 bits include the valid data
20092005
* in practice on Ice Lake and earlier platforms.
20102006
*/
20112007
if (sample_type & PERF_SAMPLE_WEIGHT) {
2012-
data->weight.full = weight ?:
2008+
data->weight.full = latency ?:
20132009
intel_get_tsx_weight(meminfo->tsx_tuning);
20142010
} else {
2015-
data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?:
2011+
data->weight.var1_dw = (u32)latency ?:
20162012
intel_get_tsx_weight(meminfo->tsx_tuning);
20172013
}
2014+
20182015
data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
20192016
}
20202017

@@ -2035,16 +2032,16 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
20352032
}
20362033
}
20372034

2038-
if (format_size & PEBS_DATACFG_XMMS) {
2035+
if (format_group & PEBS_DATACFG_XMMS) {
20392036
struct pebs_xmm *xmm = next_record;
20402037

20412038
next_record = xmm + 1;
20422039
perf_regs->xmm_regs = xmm->xmm;
20432040
}
20442041

2045-
if (format_size & PEBS_DATACFG_LBRS) {
2042+
if (format_group & PEBS_DATACFG_LBRS) {
20462043
struct lbr_entry *lbr = next_record;
2047-
int num_lbr = ((format_size >> PEBS_DATACFG_LBR_SHIFT)
2044+
int num_lbr = ((format_group >> PEBS_DATACFG_LBR_SHIFT)
20482045
& 0xff) + 1;
20492046
next_record = next_record + num_lbr * sizeof(struct lbr_entry);
20502047

@@ -2054,11 +2051,11 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
20542051
}
20552052
}
20562053

2057-
WARN_ONCE(next_record != __pebs + (format_size >> 48),
2058-
"PEBS record size %llu, expected %llu, config %llx\n",
2059-
format_size >> 48,
2054+
WARN_ONCE(next_record != __pebs + basic->format_size,
2055+
"PEBS record size %u, expected %llu, config %llx\n",
2056+
basic->format_size,
20602057
(u64)(next_record - __pebs),
2061-
basic->format_size);
2058+
format_group);
20622059
}
20632060

20642061
static inline void *

arch/x86/include/asm/perf_event.h

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,9 @@ static inline bool is_topdown_idx(int idx)
422422
*/
423423

424424
struct pebs_basic {
425-
u64 format_size;
425+
u64 format_group:32,
426+
retire_latency:16,
427+
format_size:16;
426428
u64 ip;
427429
u64 applicable_counters;
428430
u64 tsc;
@@ -431,7 +433,17 @@ struct pebs_basic {
431433
struct pebs_meminfo {
432434
u64 address;
433435
u64 aux;
434-
u64 latency;
436+
union {
437+
/* pre Alder Lake */
438+
u64 mem_latency;
439+
/* Alder Lake and later */
440+
struct {
441+
u64 instr_latency:16;
442+
u64 pad2:16;
443+
u64 cache_latency:16;
444+
u64 pad3:16;
445+
};
446+
};
435447
u64 tsx_tuning;
436448
};
437449

0 commit comments

Comments
 (0)