Skip to content

Commit 7b2e808

Browse files
committed
Merge: perf: Backport patches to refactor data source encoding for NVIDIA Grace
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/7046 JIRA: https://issues.redhat.com/browse/RHEL-60216 This patch set extends support for the Arm SPE data source packet encoding for Neoverse CPUs to other Arm CPU variants. As part of this series, this support is extended to the Neoverse-V2 CPU . Signed-off-by: Anubhav Shelat <ashelat@redhat.com> Approved-by: Michael Petlan <mpetlan@redhat.com> Approved-by: jbrnak <jbrnak@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: Augusto Caringi <acaringi@redhat.com>
2 parents acf9736 + 826f39a commit 7b2e808

File tree

5 files changed

+434
-48
lines changed

5 files changed

+434
-48
lines changed

tools/arch/arm64/include/asm/cputype.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,14 @@
8686
#define ARM_CPU_PART_CORTEX_X2 0xD48
8787
#define ARM_CPU_PART_NEOVERSE_N2 0xD49
8888
#define ARM_CPU_PART_CORTEX_A78C 0xD4B
89+
#define ARM_CPU_PART_CORTEX_X1C 0xD4C
90+
#define ARM_CPU_PART_CORTEX_X3 0xD4E
91+
#define ARM_CPU_PART_NEOVERSE_V2 0xD4F
92+
#define ARM_CPU_PART_CORTEX_A720 0xD81
93+
#define ARM_CPU_PART_CORTEX_X4 0xD82
94+
#define ARM_CPU_PART_NEOVERSE_V3 0xD84
95+
#define ARM_CPU_PART_CORTEX_X925 0xD85
96+
#define ARM_CPU_PART_CORTEX_A725 0xD87
8997

9098
#define APM_CPU_PART_XGENE 0x000
9199
#define APM_CPU_VAR_POTENZA 0x00
@@ -160,6 +168,14 @@
160168
#define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2)
161169
#define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2)
162170
#define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C)
171+
#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C)
172+
#define MIDR_CORTEX_X3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X3)
173+
#define MIDR_NEOVERSE_V2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V2)
174+
#define MIDR_CORTEX_A720 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A720)
175+
#define MIDR_CORTEX_X4 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X4)
176+
#define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3)
177+
#define MIDR_CORTEX_X925 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X925)
178+
#define MIDR_CORTEX_A725 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A725)
163179
#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
164180
#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
165181
#define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)

tools/perf/arch/arm64/util/arm-spe.c

Lines changed: 117 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
#include "../../../util/arm-spe.h"
2727
#include <tools/libc_compat.h> // reallocarray
2828

29+
#define ARM_SPE_CPU_MAGIC 0x1010101010101010ULL
30+
2931
#define KiB(x) ((x) * 1024)
3032
#define MiB(x) ((x) * 1024 * 1024)
3133

@@ -37,32 +39,142 @@ struct arm_spe_recording {
3739
bool *wrapped;
3840
};
3941

42+
/*
43+
* arm_spe_find_cpus() returns a new cpu map, and the caller should invoke
44+
* perf_cpu_map__put() to release the map after use.
45+
*/
46+
static struct perf_cpu_map *arm_spe_find_cpus(struct evlist *evlist)
47+
{
48+
struct perf_cpu_map *event_cpus = evlist->core.user_requested_cpus;
49+
struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
50+
struct perf_cpu_map *intersect_cpus;
51+
52+
/* cpu map is not "any" CPU , we have specific CPUs to work with */
53+
if (!perf_cpu_map__has_any_cpu(event_cpus)) {
54+
intersect_cpus = perf_cpu_map__intersect(event_cpus, online_cpus);
55+
perf_cpu_map__put(online_cpus);
56+
/* Event can be "any" CPU so count all CPUs. */
57+
} else {
58+
intersect_cpus = online_cpus;
59+
}
60+
61+
return intersect_cpus;
62+
}
63+
4064
static size_t
4165
arm_spe_info_priv_size(struct auxtrace_record *itr __maybe_unused,
42-
struct evlist *evlist __maybe_unused)
66+
struct evlist *evlist)
4367
{
44-
return ARM_SPE_AUXTRACE_PRIV_SIZE;
68+
struct perf_cpu_map *cpu_map = arm_spe_find_cpus(evlist);
69+
size_t size;
70+
71+
if (!cpu_map)
72+
return 0;
73+
74+
size = ARM_SPE_AUXTRACE_PRIV_MAX +
75+
ARM_SPE_CPU_PRIV_MAX * perf_cpu_map__nr(cpu_map);
76+
size *= sizeof(u64);
77+
78+
perf_cpu_map__put(cpu_map);
79+
return size;
80+
}
81+
82+
static int arm_spe_save_cpu_header(struct auxtrace_record *itr,
83+
struct perf_cpu cpu, __u64 data[])
84+
{
85+
struct arm_spe_recording *sper =
86+
container_of(itr, struct arm_spe_recording, itr);
87+
struct perf_pmu *pmu = NULL;
88+
struct perf_pmu tmp_pmu;
89+
char cpu_id_str[16];
90+
char *cpuid = NULL;
91+
u64 val;
92+
93+
snprintf(cpu_id_str, sizeof(cpu_id_str), "%d", cpu.cpu);
94+
tmp_pmu.cpus = perf_cpu_map__new(cpu_id_str);
95+
if (!tmp_pmu.cpus)
96+
return -ENOMEM;
97+
98+
/* Read CPU MIDR */
99+
cpuid = perf_pmu__getcpuid(&tmp_pmu);
100+
101+
/* The CPU map will not be used anymore, release it */
102+
perf_cpu_map__put(tmp_pmu.cpus);
103+
104+
if (!cpuid)
105+
return -ENOMEM;
106+
val = strtol(cpuid, NULL, 16);
107+
108+
data[ARM_SPE_MAGIC] = ARM_SPE_CPU_MAGIC;
109+
data[ARM_SPE_CPU] = cpu.cpu;
110+
data[ARM_SPE_CPU_NR_PARAMS] = ARM_SPE_CPU_PRIV_MAX - ARM_SPE_CPU_MIDR;
111+
data[ARM_SPE_CPU_MIDR] = val;
112+
113+
/* Find the associate Arm SPE PMU for the CPU */
114+
if (perf_cpu_map__has(sper->arm_spe_pmu->cpus, cpu))
115+
pmu = sper->arm_spe_pmu;
116+
117+
if (!pmu) {
118+
/* No Arm SPE PMU is found */
119+
data[ARM_SPE_CPU_PMU_TYPE] = ULLONG_MAX;
120+
data[ARM_SPE_CAP_MIN_IVAL] = 0;
121+
} else {
122+
data[ARM_SPE_CPU_PMU_TYPE] = pmu->type;
123+
124+
if (perf_pmu__scan_file(pmu, "caps/min_interval", "%lu", &val) != 1)
125+
val = 0;
126+
data[ARM_SPE_CAP_MIN_IVAL] = val;
127+
}
128+
129+
free(cpuid);
130+
return ARM_SPE_CPU_PRIV_MAX;
45131
}
46132

47133
static int arm_spe_info_fill(struct auxtrace_record *itr,
48134
struct perf_session *session,
49135
struct perf_record_auxtrace_info *auxtrace_info,
50136
size_t priv_size)
51137
{
138+
int i, ret;
139+
size_t offset;
52140
struct arm_spe_recording *sper =
53141
container_of(itr, struct arm_spe_recording, itr);
54142
struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu;
143+
struct perf_cpu_map *cpu_map;
144+
struct perf_cpu cpu;
145+
__u64 *data;
55146

56-
if (priv_size != ARM_SPE_AUXTRACE_PRIV_SIZE)
147+
if (priv_size != arm_spe_info_priv_size(itr, session->evlist))
57148
return -EINVAL;
58149

59150
if (!session->evlist->core.nr_mmaps)
60151
return -EINVAL;
61152

153+
cpu_map = arm_spe_find_cpus(session->evlist);
154+
if (!cpu_map)
155+
return -EINVAL;
156+
62157
auxtrace_info->type = PERF_AUXTRACE_ARM_SPE;
63-
auxtrace_info->priv[ARM_SPE_PMU_TYPE] = arm_spe_pmu->type;
158+
auxtrace_info->priv[ARM_SPE_HEADER_VERSION] = ARM_SPE_HEADER_CURRENT_VERSION;
159+
auxtrace_info->priv[ARM_SPE_HEADER_SIZE] =
160+
ARM_SPE_AUXTRACE_PRIV_MAX - ARM_SPE_HEADER_VERSION;
161+
auxtrace_info->priv[ARM_SPE_PMU_TYPE_V2] = arm_spe_pmu->type;
162+
auxtrace_info->priv[ARM_SPE_CPUS_NUM] = perf_cpu_map__nr(cpu_map);
163+
164+
offset = ARM_SPE_AUXTRACE_PRIV_MAX;
165+
perf_cpu_map__for_each_cpu(cpu, i, cpu_map) {
166+
assert(offset < priv_size);
167+
data = &auxtrace_info->priv[offset];
168+
ret = arm_spe_save_cpu_header(itr, cpu, data);
169+
if (ret < 0)
170+
goto out;
171+
offset += ret;
172+
}
64173

65-
return 0;
174+
ret = 0;
175+
out:
176+
perf_cpu_map__put(cpu_map);
177+
return ret;
66178
}
67179

68180
static void

tools/perf/util/arm-spe-decoder/arm-spe-decoder.h

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -56,15 +56,15 @@ enum arm_spe_op_type {
5656
ARM_SPE_OP_BR_INDIRECT = 1 << 17,
5757
};
5858

59-
enum arm_spe_neoverse_data_source {
60-
ARM_SPE_NV_L1D = 0x0,
61-
ARM_SPE_NV_L2 = 0x8,
62-
ARM_SPE_NV_PEER_CORE = 0x9,
63-
ARM_SPE_NV_LOCAL_CLUSTER = 0xa,
64-
ARM_SPE_NV_SYS_CACHE = 0xb,
65-
ARM_SPE_NV_PEER_CLUSTER = 0xc,
66-
ARM_SPE_NV_REMOTE = 0xd,
67-
ARM_SPE_NV_DRAM = 0xe,
59+
enum arm_spe_common_data_source {
60+
ARM_SPE_COMMON_DS_L1D = 0x0,
61+
ARM_SPE_COMMON_DS_L2 = 0x8,
62+
ARM_SPE_COMMON_DS_PEER_CORE = 0x9,
63+
ARM_SPE_COMMON_DS_LOCAL_CLUSTER = 0xa,
64+
ARM_SPE_COMMON_DS_SYS_CACHE = 0xb,
65+
ARM_SPE_COMMON_DS_PEER_CLUSTER = 0xc,
66+
ARM_SPE_COMMON_DS_REMOTE = 0xd,
67+
ARM_SPE_COMMON_DS_DRAM = 0xe,
6868
};
6969

7070
struct arm_spe_record {

0 commit comments

Comments
 (0)