Skip to content

Commit 996297b

Browse files
committed
Merge: perf/x86: various intel updates
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/4998 JIRA: https://issues.redhat.com/browse/RHEL-20057 JIRA: https://issues.redhat.com/browse/RHEL-20059 In order to keep the code clean, I have taken a part of Tony Luck's large patchset that introduces new macro scheme for Intel CPU model matching which starts with: f055b62 x86/cpu/vfm: Update arch/x86/include/asm/intel-family.h e6dfdc2 x86/cpu/vfm: Add new macros to work with (vendor/family/model) values a9d0adc x86/cpu/vfm: Add/initialize x86_vfm field to struct cpuinfo_x86 Then a large patchset follows, that replaces the macros in various areas of the kernel, but it is obviously not one shot replacement, as Tony Luck performs that in batches. Thus, I have taken the three patches above (these introduce the new macros) and then perf part of the replacement patches, as a pre-requisity for the patches needed for bug RHEL-20059. The rest is about to be taken later by others, when they want to do so. There was another option -- to adjust patches like a310007 ("perf/x86/intel/cstate: Add Arrowlake support") to the legacy naming scheme and take the new scheme later, but that would break the future backports of both another Intel patches and the Tony Luck's patchset too. Thus, I have prefered the first approach. Signed-off-by: Michael Petlan <mpetlan@redhat.com> Approved-by: David Arcari <darcari@redhat.com> Approved-by: Artem Savkov <asavkov@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: Lucas Zampieri <lzampier@redhat.com>
2 parents 9e1d54c + f47eff1 commit 996297b

File tree

26 files changed

+1118
-507
lines changed

26 files changed

+1118
-507
lines changed

arch/x86/events/amd/core.c

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -432,8 +432,10 @@ static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
432432
* be removed on one CPU at a time AND PMU is disabled
433433
* when we come here
434434
*/
435-
for (i = 0; i < x86_pmu.num_counters; i++) {
436-
if (cmpxchg(nb->owners + i, event, NULL) == event)
435+
for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
436+
struct perf_event *tmp = event;
437+
438+
if (try_cmpxchg(nb->owners + i, &tmp, NULL))
437439
break;
438440
}
439441
}
@@ -499,7 +501,7 @@ __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *ev
499501
* because of successive calls to x86_schedule_events() from
500502
* hw_perf_group_sched_in() without hw_perf_enable()
501503
*/
502-
for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) {
504+
for_each_set_bit(idx, c->idxmsk, x86_pmu_max_num_counters(NULL)) {
503505
if (new == -1 || hwc->idx == idx)
504506
/* assign free slot, prefer hwc->idx */
505507
old = cmpxchg(nb->owners + idx, NULL, event);
@@ -542,7 +544,7 @@ static struct amd_nb *amd_alloc_nb(int cpu)
542544
/*
543545
* initialize all possible NB constraints
544546
*/
545-
for (i = 0; i < x86_pmu.num_counters; i++) {
547+
for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
546548
__set_bit(i, nb->event_constraints[i].idxmsk);
547549
nb->event_constraints[i].weight = 1;
548550
}
@@ -735,7 +737,7 @@ static void amd_pmu_check_overflow(void)
735737
* counters are always enabled when this function is called and
736738
* ARCH_PERFMON_EVENTSEL_INT is always set.
737739
*/
738-
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
740+
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
739741
if (!test_bit(idx, cpuc->active_mask))
740742
continue;
741743

@@ -755,7 +757,7 @@ static void amd_pmu_enable_all(int added)
755757

756758
amd_brs_enable_all();
757759

758-
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
760+
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
759761
/* only activate events which are marked as active */
760762
if (!test_bit(idx, cpuc->active_mask))
761763
continue;
@@ -947,7 +949,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
947949
/* Clear any reserved bits set by buggy microcode */
948950
status &= amd_pmu_global_cntr_mask;
949951

950-
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
952+
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
951953
if (!test_bit(idx, cpuc->active_mask))
952954
continue;
953955

@@ -1282,7 +1284,7 @@ static __initconst const struct x86_pmu amd_pmu = {
12821284
.addr_offset = amd_pmu_addr_offset,
12831285
.event_map = amd_pmu_event_map,
12841286
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
1285-
.num_counters = AMD64_NUM_COUNTERS,
1287+
.cntr_mask64 = GENMASK_ULL(AMD64_NUM_COUNTERS - 1, 0),
12861288
.add = amd_pmu_add_event,
12871289
.del = amd_pmu_del_event,
12881290
.cntval_bits = 48,
@@ -1381,7 +1383,7 @@ static int __init amd_core_pmu_init(void)
13811383
*/
13821384
x86_pmu.eventsel = MSR_F15H_PERF_CTL;
13831385
x86_pmu.perfctr = MSR_F15H_PERF_CTR;
1384-
x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE;
1386+
x86_pmu.cntr_mask64 = GENMASK_ULL(AMD64_NUM_COUNTERS_CORE - 1, 0);
13851387

13861388
/* Check for Performance Monitoring v2 support */
13871389
if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) {
@@ -1391,9 +1393,9 @@ static int __init amd_core_pmu_init(void)
13911393
x86_pmu.version = 2;
13921394

13931395
/* Find the number of available Core PMCs */
1394-
x86_pmu.num_counters = ebx.split.num_core_pmc;
1396+
x86_pmu.cntr_mask64 = GENMASK_ULL(ebx.split.num_core_pmc - 1, 0);
13951397

1396-
amd_pmu_global_cntr_mask = (1ULL << x86_pmu.num_counters) - 1;
1398+
amd_pmu_global_cntr_mask = x86_pmu.cntr_mask64;
13971399

13981400
/* Update PMC handling functions */
13991401
x86_pmu.enable_all = amd_pmu_v2_enable_all;
@@ -1421,12 +1423,12 @@ static int __init amd_core_pmu_init(void)
14211423
* even numbered counter that has a consecutive adjacent odd
14221424
* numbered counter following it.
14231425
*/
1424-
for (i = 0; i < x86_pmu.num_counters - 1; i += 2)
1426+
for (i = 0; i < x86_pmu_max_num_counters(NULL) - 1; i += 2)
14251427
even_ctr_mask |= BIT_ULL(i);
14261428

14271429
pair_constraint = (struct event_constraint)
14281430
__EVENT_CONSTRAINT(0, even_ctr_mask, 0,
1429-
x86_pmu.num_counters / 2, 0,
1431+
x86_pmu_max_num_counters(NULL) / 2, 0,
14301432
PERF_X86_EVENT_PAIR);
14311433

14321434
x86_pmu.get_event_constraints = amd_get_event_constraints_f17h;

arch/x86/events/amd/uncore.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,9 @@ static int amd_uncore_add(struct perf_event *event, int flags)
162162
/* if not, take the first available counter */
163163
hwc->idx = -1;
164164
for (i = 0; i < pmu->num_counters; i++) {
165-
if (cmpxchg(&ctx->events[i], NULL, event) == NULL) {
165+
struct perf_event *tmp = NULL;
166+
167+
if (try_cmpxchg(&ctx->events[i], &tmp, event)) {
166168
hwc->idx = i;
167169
break;
168170
}
@@ -196,7 +198,9 @@ static void amd_uncore_del(struct perf_event *event, int flags)
196198
event->pmu->stop(event, PERF_EF_UPDATE);
197199

198200
for (i = 0; i < pmu->num_counters; i++) {
199-
if (cmpxchg(&ctx->events[i], event, NULL) == event)
201+
struct perf_event *tmp = event;
202+
203+
if (try_cmpxchg(&ctx->events[i], &tmp, NULL))
200204
break;
201205
}
202206

0 commit comments

Comments
 (0)