Skip to content

Commit ada163f

Browse files
committed
perf/x86/intel: Support new data source for Lunar Lake
JIRA: https://issues.redhat.com/browse/RHEL-20059 upstream ======== commit 608f697 Author: Kan Liang <kan.liang@linux.intel.com> Date: Wed Jun 26 07:35:37 2024 -0700 description =========== A new PEBS data source format is introduced for the p-core of Lunar Lake. The data source field is extended to 8 bits with new encodings. A new layout is introduced into the union intel_x86_pebs_dse. Introduce the lnl_latency_data() to parse the new format. Enlarge the pebs_data_source[] accordingly to include new encodings. Only the mem load and the mem store events can generate the data source. Introduce INTEL_HYBRID_LDLAT_CONSTRAINT and INTEL_HYBRID_STLAT_CONSTRAINT to mark them. Add two new bits for the new cache-related data src, L2_MHB and MSC. The L2_MHB is short for L2 Miss Handling Buffer, which is similar to LFB (Line Fill Buffer), but to track the L2 Cache misses. The MSC stands for the memory-side cache. Signed-off-by: Kan Liang <kan.liang@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Andi Kleen <ak@linux.intel.com> Reviewed-by: Ian Rogers <irogers@google.com> Link: https://lkml.kernel.org/r/20240626143545.480761-6-kan.liang@linux.intel.com Signed-off-by: Michael Petlan <mpetlan@redhat.com>
1 parent b679221 commit ada163f

File tree

4 files changed

+113
-5
lines changed

4 files changed

+113
-5
lines changed

arch/x86/events/intel/core.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6966,6 +6966,7 @@ __init int intel_pmu_init(void)
69666966
case INTEL_ARROWLAKE:
69676967
intel_pmu_init_hybrid(hybrid_big_small);
69686968

6969+
x86_pmu.pebs_latency_data = lnl_latency_data;
69696970
x86_pmu.get_event_constraints = mtl_get_event_constraints;
69706971
x86_pmu.hw_config = adl_hw_config;
69716972

@@ -6983,6 +6984,7 @@ __init int intel_pmu_init(void)
69836984
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
69846985
intel_pmu_init_skt(&pmu->pmu);
69856986

6987+
intel_pmu_pebs_data_source_lnl();
69866988
pr_cont("Lunarlake Hybrid events, ");
69876989
name = "lunarlake_hybrid";
69886990
break;

arch/x86/events/intel/ds.c

Lines changed: 92 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,15 @@ union intel_x86_pebs_dse {
6262
unsigned int mtl_fwd_blk:1;
6363
unsigned int ld_reserved4:24;
6464
};
65+
struct {
66+
unsigned int lnc_dse:8;
67+
unsigned int ld_reserved5:2;
68+
unsigned int lnc_stlb_miss:1;
69+
unsigned int lnc_locked:1;
70+
unsigned int lnc_data_blk:1;
71+
unsigned int lnc_addr_blk:1;
72+
unsigned int ld_reserved6:18;
73+
};
6574
};
6675

6776

@@ -76,7 +85,7 @@ union intel_x86_pebs_dse {
7685
#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
7786

7887
/* Version for Sandy Bridge and later */
79-
static u64 pebs_data_source[] = {
88+
static u64 pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX] = {
8089
P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
8190
OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x01: L1 local */
8291
OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
@@ -172,6 +181,40 @@ void __init intel_pmu_pebs_data_source_cmt(void)
172181
__intel_pmu_pebs_data_source_cmt(pebs_data_source);
173182
}
174183

184+
/* Version for Lion Cove and later */
185+
static u64 lnc_pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX] = {
186+
P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* 0x00: ukn L3 */
187+
OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x01: L1 hit */
188+
OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x02: L1 hit */
189+
OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x03: LFB/L1 Miss Handling Buffer hit */
190+
0, /* 0x04: Reserved */
191+
OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* 0x05: L2 Hit */
192+
OP_LH | LEVEL(L2_MHB) | P(SNOOP, NONE), /* 0x06: L2 Miss Handling Buffer Hit */
193+
0, /* 0x07: Reserved */
194+
OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* 0x08: L3 Hit */
195+
0, /* 0x09: Reserved */
196+
0, /* 0x0a: Reserved */
197+
0, /* 0x0b: Reserved */
198+
OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* 0x0c: L3 Hit Snoop Fwd */
199+
OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* 0x0d: L3 Hit Snoop HitM */
200+
0, /* 0x0e: Reserved */
201+
P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* 0x0f: L3 Miss Snoop HitM */
202+
OP_LH | LEVEL(MSC) | P(SNOOP, NONE), /* 0x10: Memory-side Cache Hit */
203+
OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, NONE), /* 0x11: Local Memory Hit */
204+
};
205+
206+
void __init intel_pmu_pebs_data_source_lnl(void)
207+
{
208+
u64 *data_source;
209+
210+
data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source;
211+
memcpy(data_source, lnc_pebs_data_source, sizeof(lnc_pebs_data_source));
212+
213+
data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
214+
memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
215+
__intel_pmu_pebs_data_source_cmt(data_source);
216+
}
217+
175218
static u64 precise_store_data(u64 status)
176219
{
177220
union intel_x86_pebs_dse dse;
@@ -263,7 +306,7 @@ static u64 __grt_latency_data(struct perf_event *event, u64 status,
263306

264307
WARN_ON_ONCE(hybrid_pmu(event->pmu)->pmu_type == hybrid_big);
265308

266-
dse &= PERF_PEBS_DATA_SOURCE_MASK;
309+
dse &= PERF_PEBS_DATA_SOURCE_GRT_MASK;
267310
val = hybrid_var(event->pmu, pebs_data_source)[dse];
268311

269312
pebs_set_tlb_lock(&val, tlb, lock);
@@ -299,6 +342,51 @@ u64 cmt_latency_data(struct perf_event *event, u64 status)
299342
dse.mtl_fwd_blk);
300343
}
301344

345+
static u64 lnc_latency_data(struct perf_event *event, u64 status)
346+
{
347+
union intel_x86_pebs_dse dse;
348+
union perf_mem_data_src src;
349+
u64 val;
350+
351+
dse.val = status;
352+
353+
/* LNC core latency data */
354+
val = hybrid_var(event->pmu, pebs_data_source)[status & PERF_PEBS_DATA_SOURCE_MASK];
355+
if (!val)
356+
val = P(OP, LOAD) | LEVEL(NA) | P(SNOOP, NA);
357+
358+
if (dse.lnc_stlb_miss)
359+
val |= P(TLB, MISS) | P(TLB, L2);
360+
else
361+
val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
362+
363+
if (dse.lnc_locked)
364+
val |= P(LOCK, LOCKED);
365+
366+
if (dse.lnc_data_blk)
367+
val |= P(BLK, DATA);
368+
if (dse.lnc_addr_blk)
369+
val |= P(BLK, ADDR);
370+
if (!dse.lnc_data_blk && !dse.lnc_addr_blk)
371+
val |= P(BLK, NA);
372+
373+
src.val = val;
374+
if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
375+
src.mem_op = P(OP, STORE);
376+
377+
return src.val;
378+
}
379+
380+
u64 lnl_latency_data(struct perf_event *event, u64 status)
381+
{
382+
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
383+
384+
if (pmu->pmu_type == hybrid_small)
385+
return cmt_latency_data(event, status);
386+
387+
return lnc_latency_data(event, status);
388+
}
389+
302390
static u64 load_latency_data(struct perf_event *event, u64 status)
303391
{
304392
union intel_x86_pebs_dse dse;
@@ -1089,6 +1177,8 @@ struct event_constraint intel_lnc_pebs_event_constraints[] = {
10891177
INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
10901178
INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
10911179

1180+
INTEL_HYBRID_LDLAT_CONSTRAINT(0x1cd, 0x3ff),
1181+
INTEL_HYBRID_STLAT_CONSTRAINT(0x2cd, 0x3),
10921182
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
10931183
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
10941184
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */

arch/x86/events/perf_event.h

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -476,6 +476,14 @@ struct cpu_hw_events {
476476
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
477477
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID)
478478

479+
#define INTEL_HYBRID_LDLAT_CONSTRAINT(c, n) \
480+
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
481+
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID|PERF_X86_EVENT_PEBS_LD_HSW)
482+
483+
#define INTEL_HYBRID_STLAT_CONSTRAINT(c, n) \
484+
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
485+
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID|PERF_X86_EVENT_PEBS_ST_HSW)
486+
479487
/* Event constraint, but match on all event flags too. */
480488
#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
481489
EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS)
@@ -655,8 +663,10 @@ enum {
655663
x86_lbr_exclusive_max,
656664
};
657665

658-
#define PERF_PEBS_DATA_SOURCE_MAX 0x10
666+
#define PERF_PEBS_DATA_SOURCE_MAX 0x100
659667
#define PERF_PEBS_DATA_SOURCE_MASK (PERF_PEBS_DATA_SOURCE_MAX - 1)
668+
#define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10
669+
#define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1)
660670

661671
enum hybrid_cpu_type {
662672
HYBRID_INTEL_NONE,
@@ -1539,6 +1549,8 @@ u64 grt_latency_data(struct perf_event *event, u64 status);
15391549

15401550
u64 cmt_latency_data(struct perf_event *event, u64 status);
15411551

1552+
u64 lnl_latency_data(struct perf_event *event, u64 status);
1553+
15421554
extern struct event_constraint intel_core2_pebs_event_constraints[];
15431555

15441556
extern struct event_constraint intel_atom_pebs_event_constraints[];
@@ -1660,6 +1672,8 @@ void intel_pmu_pebs_data_source_mtl(void);
16601672

16611673
void intel_pmu_pebs_data_source_cmt(void);
16621674

1675+
void intel_pmu_pebs_data_source_lnl(void);
1676+
16631677
int intel_pmu_setup_lbr_filter(struct perf_event *event);
16641678

16651679
void intel_pt_interrupt(void);

include/uapi/linux/perf_event.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1349,12 +1349,14 @@ union perf_mem_data_src {
13491349
#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */
13501350
#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */
13511351
#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */
1352-
/* 5-0x7 available */
1352+
#define PERF_MEM_LVLNUM_L2_MHB 0x05 /* L2 Miss Handling Buffer */
1353+
#define PERF_MEM_LVLNUM_MSC 0x06 /* Memory-side Cache */
1354+
/* 0x7 available */
13531355
#define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */
13541356
#define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */
13551357
#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */
13561358
#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */
1357-
#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */
1359+
#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB / L1 Miss Handling Buffer */
13581360
#define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */
13591361
#define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */
13601362
#define PERF_MEM_LVLNUM_NA 0x0f /* N/A */

0 commit comments

Comments
 (0)