Skip to content
This repository was archived by the owner on Jan 17, 2025. It is now read-only.
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions wperf-common/iorequest.h
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ struct spe_ctl_hdr
#define SPE_CTL_FLAG_VAL_MASK 0xFFFF // PMSLATFR_EL1.MINLAT is 16-bit wide
#define SPE_CTL_FLAG_VAL_12_BIT_MASK 0x0FFF // PMSLATFR_EL1.MINLAT is 12-bit wide if CountSize == 0b0010
UINT32 interval;
#define SPE_CTL_INTERVAL_VAL_MASK 0x0FFFFF // INTERVAL, bits [31:8] is interval counter reload value
};

//
Expand Down
72 changes: 57 additions & 15 deletions wperf-driver/spe.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,22 +74,35 @@ VOID SPEWorkItemFunc(WDFWORKITEM WorkItem)
START_WORK_ON_CORE(context->core_idx);

_WriteStatusReg(PMBPTR_EL1, (UINT64)SpeMemoryBuffer);

__isb(_ARM64_BARRIER_SY);

/*
* Writing to PMSIRR_EL1 and PMSICR_EL1 seems to be innefective for some reason.
* When PMSIRR_EL1 is written to its value just goes to 0 and PMSICR_EL1 seems to be unchanged.
* At least this is what can be seen in the logs.
* This looks like an unexpected behaviour as the documentation seems to imply that
* PMSCIR_EL1 needs to be zeroed before sampling starts and PMSIRR_EL1.Interval needs to be set.
_WriteStatusReg(PMSICR_EL1, 0);
if (context->config_flags & SPE_CTL_FLAG_RND)
* Setup `Sampling Interval Reload Register`
*/
UINT64 pmsirr = 0x00;
UINT32 interval = (context->interval & SPE_CTL_INTERVAL_VAL_MASK); // Controlled with period=<n>
{
_WriteStatusReg(PMSIRR_EL1, PMSIRR_EL1_RND | ((UINT64)context->interval << 8));
}
else {
_WriteStatusReg(PMSIRR_EL1, (UINT64)context->interval << 8);
UINT32 min_interval = spe_recommended_min_sampling_interval(_ReadStatusReg(PMSIDR_EL1));
if (interval < min_interval)
{
// Software should set this to a value GREATER
// than the minimum indicated by PMSIDR_EL1.Interval
interval = min_interval + 1;
KdPrintEx((DPFLTR_IHVDRIVER_ID, DPFLTR_INFO_LEVEL, "SPE: jitter=1, interval=%u is below recommended min sampling interval, new interval=%u \n", context->interval, interval));
}
}
KdPrintEx((DPFLTR_IHVDRIVER_ID, DPFLTR_INFO_LEVEL, "SPE: interval=%u \n", interval));
pmsirr |= (UINT64)interval << 8;

/*
* Setup `jitter`, Controls randomization of the sampling interval
*/
if (context->config_flags & SPE_CTL_FLAG_RND)
pmsirr |= PMSIRR_EL1_RND;

KdPrintEx((DPFLTR_IHVDRIVER_ID, DPFLTR_INFO_LEVEL, "SPE: pmsirr=0x%llX \n", pmsirr));
_WriteStatusReg(PMSIRR_EL1, pmsirr);
__isb(_ARM64_BARRIER_SY);

/*
* Setup `Sampling Filter Control Register`
Expand Down Expand Up @@ -123,20 +136,23 @@ VOID SPEWorkItemFunc(WDFWORKITEM WorkItem)
KdPrintEx((DPFLTR_IHVDRIVER_ID, DPFLTR_INFO_LEVEL, "SPE: min_latency=%u is 12-bit, min_latency is trimmed! \n", min_latency));
}
_WriteStatusReg(PMSLATFR_EL1, min_latency); // Configure PMSLATFR_EL1.MINLAT
__isb(_ARM64_BARRIER_SY);

pmsfcr |= PMSFCR_EL1_FL; // Enable Filter by latency
KdPrintEx((DPFLTR_IHVDRIVER_ID, DPFLTR_INFO_LEVEL, "SPE: min_latency=%u PMSFCR_EL1=0x%llX\n", min_latency, pmsfcr));
}

KdPrintEx((DPFLTR_IHVDRIVER_ID, DPFLTR_INFO_LEVEL, "SPE: pmsfcr=0x%llX \n", pmsfcr));
_WriteStatusReg(PMSFCR_EL1, pmsfcr);
__isb(_ARM64_BARRIER_SY);

/*
* Configure PMSCR_EL1 settings based on user-space flags. By default all settings are disabled
* (we zero the register). When user selects flag, e.g. /ts_enable=1/ we enable given setting
* (e.g. TS bit) to "ON" in this register.
*/
_WriteStatusReg(PMSCR_EL1, 0x00);
__isb(_ARM64_BARRIER_SY);
if (context->config_flags & SPE_CTL_FLAG_TS)
{
// Enable timestamps with ts_enable filter:
Expand All @@ -146,9 +162,12 @@ VOID SPEWorkItemFunc(WDFWORKITEM WorkItem)
}

_WriteStatusReg(PMBSR_EL1, _ReadStatusReg(PMBSR_EL1) & (~PMBSR_EL1_S)); // Clear PMBSR_EL1.S
__isb(_ARM64_BARRIER_SY);
//PMBPTR_EL1[63:56] must equal PMBLIMITR_EL1.LIMIT[63:56]
_WriteStatusReg(PMBLIMITR_EL1, (UINT64)SpeMemoryBufferLimit | PMBLIMITR_EL1_E); // Enable PMBLIMITR_ELI1.E
__isb(_ARM64_BARRIER_SY);
_WriteStatusReg(PMSCR_EL1, _ReadStatusReg(PMSCR_EL1) | PMSCR_EL1_E0SPE_E1SPE); // Enable PMSCR_EL1.{E0SPE,E1SPE}
__isb(_ARM64_BARRIER_SY);

KdPrintEx((DPFLTR_IHVDRIVER_ID, DPFLTR_INFO_LEVEL, "Statistical Profiling Extension: memory buffer 0x%llX\n", _ReadStatusReg(PMBPTR_EL1)));
KdPrintEx((DPFLTR_IHVDRIVER_ID, DPFLTR_INFO_LEVEL, "Statistical Profiling Extension: memory buffer limit address %llX\n", _ReadStatusReg(PMBLIMITR_EL1)));
Expand Down Expand Up @@ -180,10 +199,12 @@ VOID SPEWorkItemFunc(WDFWORKITEM WorkItem)
START_WORK_ON_CORE(context->core_idx);

_WriteStatusReg(PMBLIMITR_EL1, 0); // Disable PMBLIMITR_ELI1.E
__isb(_ARM64_BARRIER_SY);
_WriteStatusReg(PMSCR_EL1, _ReadStatusReg(PMSCR_EL1) & (~PMSCR_EL1_E0SPE_E1SPE)); // Disable PMSCR_EL1.{E0SPE,E1SPE}

__isb(_ARM64_BARRIER_SY);
_WriteStatusReg(PMBSR_EL1, _ReadStatusReg(PMBSR_EL1) & (~PMBSR_EL1_S)); // Clear PMBSR_EL1.S

__isb(_ARM64_BARRIER_SY);

STOP_WORK_ON_CORE();

KdPrintEx((DPFLTR_IHVDRIVER_ID, DPFLTR_INFO_LEVEL, "Statistical Profiling Extension: memory buffer 0x%llX\n", _ReadStatusReg(PMBPTR_EL1)));
Expand Down Expand Up @@ -214,8 +235,11 @@ static VOID dpc_spe_overflow(struct _KDPC* dpc, PVOID ctx, PVOID sys_arg1, PVOID
KdPrintEx((DPFLTR_IHVDRIVER_ID, DPFLTR_INFO_LEVEL, "SPE_DPC profiling buffer full\n"));
//Disable sampling
_WriteStatusReg(PMBLIMITR_EL1, 0); // Disable PMBLIMITR_ELI1.E
__isb(_ARM64_BARRIER_SY);
_WriteStatusReg(PMBSR_EL1, _ReadStatusReg(PMBSR_EL1) & (~PMBSR_EL1_S)); // Clear PMBSR_EL1.S
__isb(_ARM64_BARRIER_SY);
_WriteStatusReg(PMSCR_EL1, _ReadStatusReg(PMSCR_EL1) & (~PMSCR_EL1_E0SPE_E1SPE)); // Disable PMSCR_EL1.{E0SPE,E1SPE}
__isb(_ARM64_BARRIER_SY);
spu->profiling_running = FALSE;
}
}
Expand Down Expand Up @@ -384,4 +408,22 @@ void spe_stop(WDFWORKITEM* workItem, UINT32 core_idx)
UNREFERENCED_PARAMETER(workItem);
UNREFERENCED_PARAMETER(core_idx);
#endif
}
}

UINT32 spe_recommended_min_sampling_interval(UINT64 pmsidr_el1_value)
{
const UINT64 interval = (pmsidr_el1_value & PMSIDR_EL1_Interval_MASK) >> 8;
switch (interval)
{
// All other values are reserved.
case 0b0000: return 256;
case 0b0010: return 512;
case 0b0011: return 768;
case 0b0100: return 1024;
case 0b0101: return 1536;
case 0b0110: return 2048;
case 0b0111: return 3072;
default:
case 0b1000: return 4096;
}
}
6 changes: 4 additions & 2 deletions wperf-driver/spe.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,9 @@
#define PMSIDR_EL1_CountSize_MASK (0x0F << 16) // CountSize, bits [19:16]
#define PMSIDR_EL1_CountSize_12Bit 0b0010 // 12-bit saturating counters.
#define PMSIDR_EL1_CountSize_16Bit 0b0011 // 16-bit saturating counters.
#define PMSIDR_EL1_Interval_MASK (0x0F << 8) // Recommended minimum sampling interval.

#define PMSIRR_EL1_RND BIT(0)
#define PMSIRR_EL1_RND BIT(0) // Add (pseudo-)random jitter to sampling interval.
#define PMBLIMITR_EL1_LIMIT_MASK (~((UINT64)0xFFF)) // PMBLIMITR.LIMIT, bits [63:12]

#define SPE_MEMORY_BUFFER_SIZE (PAGE_SIZE*128) // PAGE_SIZE is defined in WDM.h
Expand Down Expand Up @@ -111,4 +112,5 @@ void spe_stop(WDFWORKITEM* workItem, UINT32 core_idx);
void spe_destroy();

NTSTATUS spe_setup(ULONG numCores);
void spe_destroy();
void spe_destroy();
UINT32 spe_recommended_min_sampling_interval(UINT64 pmsidr_el1_value);
5 changes: 4 additions & 1 deletion wperf/pmu_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,7 @@ void pmu_device::spe_start(const std::map<std::wstring, uint64_t>& flags)
ctl.event_filter = 0;
UINT8 opfilter = 0;
UINT64 config_flags = 0;
UINT32 interval = 0; // 0 will force minimum indicated by PMSIDR_EL1.Interval.
/*
* `config_flags` stores multiple values:
*
Expand All @@ -364,6 +365,8 @@ void pmu_device::spe_start(const std::map<std::wstring, uint64_t>& flags)
if (spe_device::get_filter_name(key) == L"store_filter" && val) opfilter |= SPE_OPERATON_FILTER_ST;
if (spe_device::get_filter_name(key) == L"branch_filter" && val) opfilter |= SPE_OPERATON_FILTER_B;
if (spe_device::get_filter_name(key) == L"ts_enable" && val) config_flags |= SPE_CTL_FLAG_TS;
if (spe_device::get_filter_name(key) == L"jitter" && val) config_flags |= SPE_CTL_FLAG_RND;
if (spe_device::get_filter_name(key) == L"period" && val) interval = val & SPE_CTL_INTERVAL_VAL_MASK;
if (spe_device::get_filter_name(key) == L"min_latency" && val)
{
UINT64 minlat = val & SPE_CTL_FLAG_VAL_MASK; // PMSLATFR_EL1.MINLAT is 16 - bit value
Expand All @@ -372,7 +375,7 @@ void pmu_device::spe_start(const std::map<std::wstring, uint64_t>& flags)
}
}
ctl.operation_filter = opfilter;
ctl.interval = 1024;
ctl.interval = interval;
ctl.config_flags = config_flags;

BOOL status = DeviceAsyncIoControl(m_device_handle, PMU_CTL_SPE_START, &ctl, sizeof(struct spe_ctl_hdr), NULL, 0, &res_len);
Expand Down
12 changes: 9 additions & 3 deletions wperf/spe_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,9 @@ const std::vector<std::wstring> spe_device::m_filter_names = {
L"store_filter",
L"branch_filter",
L"ts_enable",
L"min_latency"
L"min_latency",
L"jitter",
L"period"
};

// Filters also have aliases, this structure helps to translate alias to filter name
Expand All @@ -418,7 +420,9 @@ const std::map<std::wstring, std::wstring> spe_device::m_filter_names_aliases =
{ L"st", L"store_filter" },
{ L"b" , L"branch_filter" },
{ L"ts", L"ts_enable" },
{ L"min", L"min_latency" }
{ L"min", L"min_latency" },
{ L"j", L"jitter" },
{ L"per", L"period" }
};

// Filters also have aliases, this structure helps to translate alias to filter name
Expand All @@ -427,7 +431,9 @@ const std::map<std::wstring, std::wstring> spe_device::m_filter_names_descriptio
{ L"store_filter", L"Enables collection of store sampled operations, including all atomic operations." },
{ L"branch_filter", L"Enables collection of branch sampled operations, including direct and indirect branches and exception returns." },
{ L"ts_enable", L"Enables timestamping with value of generic timer." },
{ L"min_latency", L"Collect only samples with this latency or higher." }
{ L"min_latency", L"Collect only samples with this latency or higher." },
{ L"jitter", L"Use jitter to avoid resonance when sampling." },
{ L"pertiod", L"Use period to set interval counter reload value. The minimum interval is used by default." },
};

spe_device::spe_device() {}
Expand Down
2 changes: 2 additions & 0 deletions wperf/spe_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ class spe_device
{
if (get_filter_name(fname) == L"min_latency")
return SPE_CTL_FLAG_VAL_MASK; // PMSLATFR_EL1, Sampling Latency Filter Register, MINLAT, bits [15:0]
else if (get_filter_name(fname) == L"period")
return SPE_CTL_INTERVAL_VAL_MASK; // PMSIRR_EL1, Sampling Interval Reload Register, INTERVAL, bits [31:8]
return 1;
}

Expand Down