Skip to content

Commit 005ebc3

Browse files
committed
Merge: perf: Add Processor Trace Trigger Tracing and several Intel TH fixes
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/7036 JIRA: https://issues.redhat.com/browse/RHEL-45091 JIRA: https://issues.redhat.com/browse/RHEL-47424 This MR adds features requested by Intel: 1) PTTT = a technology that enables the user to trigger Intel PT tracing by events, such as uprobes, so that the trace does not have to be recorded for the whole application execution time, but rather just for the code path of interest. 2) Adds several CPUs into the list of supported models in Intel Trace Hub. Signed-off-by: Michael Petlan <mpetlan@redhat.com> Approved-by: ashelat <ashelat@redhat.com> Approved-by: Steve Best <sbest@redhat.com> Approved-by: David Arcari <darcari@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: Augusto Caringi <acaringi@redhat.com>
2 parents c18e5c4 + 45f706e commit 005ebc3

32 files changed

+1305
-364
lines changed

arch/x86/events/intel/core.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4004,8 +4004,8 @@ static int intel_pmu_hw_config(struct perf_event *event)
40044004

40054005
if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) {
40064006
event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
4007-
if (!(event->attr.sample_type &
4008-
~intel_pmu_large_pebs_flags(event))) {
4007+
if (!(event->attr.sample_type & ~intel_pmu_large_pebs_flags(event)) &&
4008+
!has_aux_action(event)) {
40094009
event->hw.flags |= PERF_X86_EVENT_LARGE_PEBS;
40104010
event->attach_state |= PERF_ATTACH_SCHED_CB;
40114011
}

arch/x86/events/intel/pt.c

Lines changed: 78 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,9 @@ static void pt_config_start(struct perf_event *event)
419419
struct pt *pt = this_cpu_ptr(&pt_ctx);
420420
u64 ctl = event->hw.aux_config;
421421

422+
if (READ_ONCE(event->hw.aux_paused))
423+
return;
424+
422425
ctl |= RTIT_CTL_TRACEEN;
423426
if (READ_ONCE(pt->vmx_on))
424427
perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL);
@@ -535,7 +538,24 @@ static void pt_config(struct perf_event *event)
535538
reg |= (event->attr.config & PT_CONFIG_MASK);
536539

537540
event->hw.aux_config = reg;
541+
542+
/*
543+
* Allow resume before starting so as not to overwrite a value set by a
544+
* PMI.
545+
*/
546+
barrier();
547+
WRITE_ONCE(pt->resume_allowed, 1);
548+
/* Configuration is complete, it is now OK to handle an NMI */
549+
barrier();
550+
WRITE_ONCE(pt->handle_nmi, 1);
551+
barrier();
538552
pt_config_start(event);
553+
barrier();
554+
/*
555+
* Allow pause after starting so its pt_config_stop() doesn't race with
556+
* pt_config_start().
557+
*/
558+
WRITE_ONCE(pt->pause_allowed, 1);
539559
}
540560

541561
static void pt_config_stop(struct perf_event *event)
@@ -829,11 +849,13 @@ static void pt_buffer_advance(struct pt_buffer *buf)
829849
buf->cur_idx++;
830850

831851
if (buf->cur_idx == buf->cur->last) {
832-
if (buf->cur == buf->last)
852+
if (buf->cur == buf->last) {
833853
buf->cur = buf->first;
834-
else
854+
buf->wrapped = true;
855+
} else {
835856
buf->cur = list_entry(buf->cur->list.next, struct topa,
836857
list);
858+
}
837859
buf->cur_idx = 0;
838860
}
839861
}
@@ -847,8 +869,11 @@ static void pt_buffer_advance(struct pt_buffer *buf)
847869
static void pt_update_head(struct pt *pt)
848870
{
849871
struct pt_buffer *buf = perf_get_aux(&pt->handle);
872+
bool wrapped = buf->wrapped;
850873
u64 topa_idx, base, old;
851874

875+
buf->wrapped = false;
876+
852877
if (buf->single) {
853878
local_set(&buf->data_size, buf->output_off);
854879
return;
@@ -866,7 +891,7 @@ static void pt_update_head(struct pt *pt)
866891
} else {
867892
old = (local64_xchg(&buf->head, base) &
868893
((buf->nr_pages << PAGE_SHIFT) - 1));
869-
if (base < old)
894+
if (base < old || (base == old && wrapped))
870895
base += buf->nr_pages << PAGE_SHIFT;
871896

872897
local_add(base - old, &buf->data_size);
@@ -1512,6 +1537,7 @@ void intel_pt_interrupt(void)
15121537
buf = perf_aux_output_begin(&pt->handle, event);
15131538
if (!buf) {
15141539
event->hw.state = PERF_HES_STOPPED;
1540+
WRITE_ONCE(pt->resume_allowed, 0);
15151541
return;
15161542
}
15171543

@@ -1520,6 +1546,7 @@ void intel_pt_interrupt(void)
15201546
ret = pt_buffer_reset_markers(buf, &pt->handle);
15211547
if (ret) {
15221548
perf_aux_output_end(&pt->handle, 0);
1549+
WRITE_ONCE(pt->resume_allowed, 0);
15231550
return;
15241551
}
15251552

@@ -1574,6 +1601,26 @@ static void pt_event_start(struct perf_event *event, int mode)
15741601
struct pt *pt = this_cpu_ptr(&pt_ctx);
15751602
struct pt_buffer *buf;
15761603

1604+
if (mode & PERF_EF_RESUME) {
1605+
if (READ_ONCE(pt->resume_allowed)) {
1606+
u64 status;
1607+
1608+
/*
1609+
* Only if the trace is not active and the error and
1610+
* stopped bits are clear, is it safe to start, but a
1611+
* PMI might have just cleared these, so resume_allowed
1612+
* must be checked again also.
1613+
*/
1614+
rdmsrl(MSR_IA32_RTIT_STATUS, status);
1615+
if (!(status & (RTIT_STATUS_TRIGGEREN |
1616+
RTIT_STATUS_ERROR |
1617+
RTIT_STATUS_STOPPED)) &&
1618+
READ_ONCE(pt->resume_allowed))
1619+
pt_config_start(event);
1620+
}
1621+
return;
1622+
}
1623+
15771624
buf = perf_aux_output_begin(&pt->handle, event);
15781625
if (!buf)
15791626
goto fail_stop;
@@ -1584,7 +1631,6 @@ static void pt_event_start(struct perf_event *event, int mode)
15841631
goto fail_end_stop;
15851632
}
15861633

1587-
WRITE_ONCE(pt->handle_nmi, 1);
15881634
hwc->state = 0;
15891635

15901636
pt_config_buffer(buf);
@@ -1602,13 +1648,28 @@ static void pt_event_stop(struct perf_event *event, int mode)
16021648
{
16031649
struct pt *pt = this_cpu_ptr(&pt_ctx);
16041650

1651+
if (mode & PERF_EF_PAUSE) {
1652+
if (READ_ONCE(pt->pause_allowed))
1653+
pt_config_stop(event);
1654+
return;
1655+
}
1656+
16051657
/*
16061658
* Protect against the PMI racing with disabling wrmsr,
16071659
* see comment in intel_pt_interrupt().
16081660
*/
16091661
WRITE_ONCE(pt->handle_nmi, 0);
16101662
barrier();
16111663

1664+
/*
1665+
* Prevent a resume from attempting to restart tracing, or a pause
1666+
* during a subsequent start. Do this after clearing handle_nmi so that
1667+
* pt_event_snapshot_aux() will not re-allow them.
1668+
*/
1669+
WRITE_ONCE(pt->pause_allowed, 0);
1670+
WRITE_ONCE(pt->resume_allowed, 0);
1671+
barrier();
1672+
16121673
pt_config_stop(event);
16131674

16141675
if (event->hw.state == PERF_HES_STOPPED)
@@ -1658,6 +1719,10 @@ static long pt_event_snapshot_aux(struct perf_event *event,
16581719
if (WARN_ON_ONCE(!buf->snapshot))
16591720
return 0;
16601721

1722+
/* Prevent pause/resume from attempting to start/stop tracing */
1723+
WRITE_ONCE(pt->pause_allowed, 0);
1724+
WRITE_ONCE(pt->resume_allowed, 0);
1725+
barrier();
16611726
/*
16621727
* There is no PT interrupt in this mode, so stop the trace and it will
16631728
* remain stopped while the buffer is copied.
@@ -1677,8 +1742,13 @@ static long pt_event_snapshot_aux(struct perf_event *event,
16771742
* Here, handle_nmi tells us if the tracing was on.
16781743
* If the tracing was on, restart it.
16791744
*/
1680-
if (READ_ONCE(pt->handle_nmi))
1745+
if (READ_ONCE(pt->handle_nmi)) {
1746+
WRITE_ONCE(pt->resume_allowed, 1);
1747+
barrier();
16811748
pt_config_start(event);
1749+
barrier();
1750+
WRITE_ONCE(pt->pause_allowed, 1);
1751+
}
16821752

16831753
return ret;
16841754
}
@@ -1794,7 +1864,9 @@ static __init int pt_init(void)
17941864
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries))
17951865
pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG;
17961866

1797-
pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
1867+
pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE |
1868+
PERF_PMU_CAP_ITRACE |
1869+
PERF_PMU_CAP_AUX_PAUSE;
17981870
pt_pmu.pmu.attr_groups = pt_attr_groups;
17991871
pt_pmu.pmu.task_ctx_nr = perf_sw_context;
18001872
pt_pmu.pmu.event_init = pt_event_init;

arch/x86/events/intel/pt.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ struct pt_pmu {
6262
* @head: logical write offset inside the buffer
6363
* @snapshot: if this is for a snapshot/overwrite counter
6464
* @single: use Single Range Output instead of ToPA
65+
* @wrapped: buffer advance wrapped back to the first topa table
6566
* @stop_pos: STOP topa entry index
6667
* @intr_pos: INT topa entry index
6768
* @stop_te: STOP topa entry pointer
@@ -79,6 +80,7 @@ struct pt_buffer {
7980
local64_t head;
8081
bool snapshot;
8182
bool single;
83+
bool wrapped;
8284
long stop_pos, intr_pos;
8385
struct topa_entry *stop_te, *intr_te;
8486
void **data_pages;
@@ -114,6 +116,8 @@ struct pt_filters {
114116
* @filters: last configured filters
115117
* @handle_nmi: do handle PT PMI on this cpu, there's an active event
116118
* @vmx_on: 1 if VMX is ON on this cpu
119+
* @pause_allowed: PERF_EF_PAUSE is allowed to stop tracing
120+
* @resume_allowed: PERF_EF_RESUME is allowed to start tracing
117121
* @output_base: cached RTIT_OUTPUT_BASE MSR value
118122
* @output_mask: cached RTIT_OUTPUT_MASK MSR value
119123
*/
@@ -122,6 +126,8 @@ struct pt {
122126
struct pt_filters filters;
123127
int handle_nmi;
124128
int vmx_on;
129+
int pause_allowed;
130+
int resume_allowed;
125131
u64 output_base;
126132
u64 output_mask;
127133
};

drivers/hwtracing/intel_th/pci.c

Lines changed: 42 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ enum {
2323
TH_PCI_RTIT_BAR = 4,
2424
};
2525

26-
#define BAR_MASK (BIT(TH_PCI_CONFIG_BAR) | BIT(TH_PCI_STH_SW_BAR))
2726

2827
#define PCI_REG_NPKDSC 0x80
2928
#define NPKDSC_TSACT BIT(5)
@@ -83,10 +82,16 @@ static int intel_th_pci_probe(struct pci_dev *pdev,
8382
if (err)
8483
return err;
8584

86-
err = pcim_iomap_regions_request_all(pdev, BAR_MASK, DRIVER_NAME);
85+
err = pcim_request_all_regions(pdev, DRIVER_NAME);
8786
if (err)
8887
return err;
8988

89+
if (!pcim_iomap(pdev, TH_PCI_CONFIG_BAR, 0))
90+
return -ENOMEM;
91+
92+
if (!pcim_iomap(pdev, TH_PCI_STH_SW_BAR, 0))
93+
return -ENOMEM;
94+
9095
if (pdev->resource[TH_PCI_RTIT_BAR].start) {
9196
resource[TH_MMIO_RTIT] = pdev->resource[TH_PCI_RTIT_BAR];
9297
r++;
@@ -100,15 +105,21 @@ static int intel_th_pci_probe(struct pci_dev *pdev,
100105
}
101106

102107
th = intel_th_alloc(&pdev->dev, drvdata, resource, r);
103-
if (IS_ERR(th))
104-
return PTR_ERR(th);
108+
if (IS_ERR(th)) {
109+
err = PTR_ERR(th);
110+
goto err_free_irq;
111+
}
105112

106113
th->activate = intel_th_pci_activate;
107114
th->deactivate = intel_th_pci_deactivate;
108115

109116
pci_set_master(pdev);
110117

111118
return 0;
119+
120+
err_free_irq:
121+
pci_free_irq_vectors(pdev);
122+
return err;
112123
}
113124

114125
static void intel_th_pci_remove(struct pci_dev *pdev)
@@ -304,15 +315,40 @@ static const struct pci_device_id intel_th_pci_id_table[] = {
304315
.driver_data = (kernel_ulong_t)&intel_th_2x,
305316
},
306317
{
307-
/* Lunar Lake */
308-
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa824),
318+
/* Granite Rapids */
319+
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x0963),
309320
.driver_data = (kernel_ulong_t)&intel_th_2x,
310321
},
311322
{
312323
/* Granite Rapids SOC */
313324
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3256),
314325
.driver_data = (kernel_ulong_t)&intel_th_2x,
315326
},
327+
{
328+
/* Sapphire Rapids SOC */
329+
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3456),
330+
.driver_data = (kernel_ulong_t)&intel_th_2x,
331+
},
332+
{
333+
/* Lunar Lake */
334+
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa824),
335+
.driver_data = (kernel_ulong_t)&intel_th_2x,
336+
},
337+
{
338+
/* Arrow Lake */
339+
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7724),
340+
.driver_data = (kernel_ulong_t)&intel_th_2x,
341+
},
342+
{
343+
/* Panther Lake-H */
344+
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe324),
345+
.driver_data = (kernel_ulong_t)&intel_th_2x,
346+
},
347+
{
348+
/* Panther Lake-P/U */
349+
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe424),
350+
.driver_data = (kernel_ulong_t)&intel_th_2x,
351+
},
316352
{
317353
/* Alder Lake CPU */
318354
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x466f),

include/linux/perf_event.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,12 @@ struct hw_perf_event {
170170
};
171171
struct { /* aux / Intel-PT */
172172
u64 aux_config;
173+
/*
174+
* For AUX area events, aux_paused cannot be a state
175+
* flag because it can be updated asynchronously to
176+
* state.
177+
*/
178+
unsigned int aux_paused;
173179
};
174180
struct { /* software */
175181
struct hrtimer hrtimer;
@@ -294,6 +300,7 @@ struct perf_event_pmu_context;
294300
#define PERF_PMU_CAP_NO_EXCLUDE 0x0040
295301
#define PERF_PMU_CAP_AUX_OUTPUT 0x0080
296302
#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100
303+
#define PERF_PMU_CAP_AUX_PAUSE 0x0200
297304

298305
/**
299306
* pmu::scope
@@ -384,6 +391,8 @@ struct pmu {
384391
#define PERF_EF_START 0x01 /* start the counter when adding */
385392
#define PERF_EF_RELOAD 0x02 /* reload the counter when starting */
386393
#define PERF_EF_UPDATE 0x04 /* update the counter when stopping */
394+
#define PERF_EF_PAUSE 0x08 /* AUX area event, pause tracing */
395+
#define PERF_EF_RESUME 0x10 /* AUX area event, resume tracing */
387396

388397
/*
389398
* Adds/Removes a counter to/from the PMU, can be done inside a
@@ -423,6 +432,18 @@ struct pmu {
423432
*
424433
* ->start() with PERF_EF_RELOAD will reprogram the counter
425434
* value, must be preceded by a ->stop() with PERF_EF_UPDATE.
435+
*
436+
* ->stop() with PERF_EF_PAUSE will stop as simply as possible. Will not
437+
* overlap another ->stop() with PERF_EF_PAUSE nor ->start() with
438+
* PERF_EF_RESUME.
439+
*
440+
* ->start() with PERF_EF_RESUME will start as simply as possible but
441+
* only if the counter is not otherwise stopped. Will not overlap
442+
* another ->start() with PERF_EF_RESUME nor ->stop() with
443+
* PERF_EF_PAUSE.
444+
*
445+
* Notably, PERF_EF_PAUSE/PERF_EF_RESUME *can* be concurrent with other
446+
* ->stop()/->start() invocations, just not itself.
426447
*/
427448
void (*start) (struct perf_event *event, int flags);
428449
void (*stop) (struct perf_event *event, int flags);
@@ -1687,6 +1708,13 @@ static inline bool has_aux(struct perf_event *event)
16871708
return event->pmu->setup_aux;
16881709
}
16891710

1711+
static inline bool has_aux_action(struct perf_event *event)
1712+
{
1713+
return event->attr.aux_sample_size ||
1714+
event->attr.aux_pause ||
1715+
event->attr.aux_resume;
1716+
}
1717+
16901718
static inline bool is_write_backward(struct perf_event *event)
16911719
{
16921720
return !!event->attr.write_backward;

0 commit comments

Comments
 (0)