Skip to content

Commit ee86ad0

Browse files
feature: Add Support for 64 bit aggregated read/write counters
Related-To: LOCI-4529 Signed-off-by: Mayank Raghuwanshi <mayank.raghuwanshi@intel.com> Source: d028857
1 parent c401011 commit ee86ad0

File tree

5 files changed

+422
-5
lines changed

5 files changed

+422
-5
lines changed

level_zero/tools/source/sysman/linux/pmt/pmt_xml_offsets.h

Lines changed: 58 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2022 Intel Corporation
2+
* Copyright (C) 2022-2023 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -207,7 +207,15 @@ const std::map<std::string, std::map<std::string, uint64_t>> guidToKeyOffsetMap
207207
{"VF1_HBM2_READ", 344},
208208
{"VF1_HBM2_WRITE", 348},
209209
{"VF1_HBM3_READ", 360},
210-
{"VF1_HBM3_WRITE", 364}}},
210+
{"VF1_HBM3_WRITE", 364},
211+
{"VF0_HBM_READ_L", 384},
212+
{"VF0_HBM_READ_H", 388},
213+
{"VF0_HBM_WRITE_L", 392},
214+
{"VF0_HBM_WRITE_H", 396},
215+
{"VF1_HBM_READ_L", 400},
216+
{"VF1_HBM_READ_H", 404},
217+
{"VF1_HBM_WRITE_L", 408},
218+
{"VF1_HBM_WRITE_H", 412}}},
211219
{"0xb15a0edd", // For PVC device
212220
{{"HBM0MaxDeviceTemperature", 28},
213221
{"HBM1MaxDeviceTemperature", 36},
@@ -238,7 +246,54 @@ const std::map<std::string, std::map<std::string, uint64_t>> guidToKeyOffsetMap
238246
{"VF1_HBM2_READ", 344},
239247
{"VF1_HBM2_WRITE", 348},
240248
{"VF1_HBM3_READ", 360},
241-
{"VF1_HBM3_WRITE", 364}}},
249+
{"VF1_HBM3_WRITE", 364},
250+
{"VF0_HBM_READ_L", 384},
251+
{"VF0_HBM_READ_H", 388},
252+
{"VF0_HBM_WRITE_L", 392},
253+
{"VF0_HBM_WRITE_H", 396},
254+
{"VF1_HBM_READ_L", 400},
255+
{"VF1_HBM_READ_H", 404},
256+
{"VF1_HBM_WRITE_L", 408},
257+
{"VF1_HBM_WRITE_H", 412}}},
258+
{"0xb15a0ede", // For PVC device
259+
{{"HBM0MaxDeviceTemperature", 28},
260+
{"HBM1MaxDeviceTemperature", 36},
261+
{"TileMinTemperature", 40},
262+
{"TileMaxTemperature", 44},
263+
{"GTMinTemperature", 48},
264+
{"GTMaxTemperature", 52},
265+
{"VF0_VFID", 88},
266+
{"VF0_HBM0_READ", 92},
267+
{"VF0_HBM0_WRITE", 96},
268+
{"VF0_HBM1_READ", 104},
269+
{"VF0_HBM1_WRITE", 108},
270+
{"VF0_TIMESTAMP_L", 168},
271+
{"VF0_TIMESTAMP_H", 172},
272+
{"VF1_VFID", 176},
273+
{"VF1_HBM0_READ", 180},
274+
{"VF1_HBM0_WRITE", 184},
275+
{"VF1_HBM1_READ", 192},
276+
{"VF1_HBM1_WRITE", 196},
277+
{"VF1_TIMESTAMP_L", 256},
278+
{"VF1_TIMESTAMP_H", 260},
279+
{"HBM2MaxDeviceTemperature", 300},
280+
{"HBM3MaxDeviceTemperature", 308},
281+
{"VF0_HBM2_READ", 312},
282+
{"VF0_HBM2_WRITE", 316},
283+
{"VF0_HBM3_READ", 328},
284+
{"VF0_HBM3_WRITE", 332},
285+
{"VF1_HBM2_READ", 344},
286+
{"VF1_HBM2_WRITE", 348},
287+
{"VF1_HBM3_READ", 360},
288+
{"VF1_HBM3_WRITE", 364},
289+
{"VF0_HBM_READ_L", 384},
290+
{"VF0_HBM_READ_H", 388},
291+
{"VF0_HBM_WRITE_L", 392},
292+
{"VF0_HBM_WRITE_H", 396},
293+
{"VF1_HBM_READ_L", 400},
294+
{"VF1_HBM_READ_H", 404},
295+
{"VF1_HBM_WRITE_L", 408},
296+
{"VF1_HBM_WRITE_H", 412}}},
242297
{"0x41fe79a5", // For PVC root device
243298
{{"PPIN", 152},
244299
{"BoardNumber", 72}}}};

level_zero/tools/source/sysman/memory/linux/os_memory_imp_prelim.cpp

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,75 @@ ze_result_t LinuxMemoryImp::getHbmBandwidth(uint32_t numHbmModules, zes_mem_band
266266
return result;
267267
}
268268

269+
ze_result_t LinuxMemoryImp::getHbmBandwidthPVC(uint32_t numHbmModules, zes_mem_bandwidth_t *pBandwidth) {
270+
pBandwidth->readCounter = 0;
271+
pBandwidth->writeCounter = 0;
272+
pBandwidth->timestamp = 0;
273+
pBandwidth->maxBandwidth = 0;
274+
ze_result_t result = ZE_RESULT_ERROR_UNKNOWN;
275+
std::string vfId = "";
276+
result = getVFIDString(vfId);
277+
if (result != ZE_RESULT_SUCCESS) {
278+
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s():getVFIDString returning error:0x%x while retriving VFID string \n", __FUNCTION__, result);
279+
return result;
280+
}
281+
auto &hwInfo = pDevice->getNEODevice()->getHardwareInfo();
282+
auto productFamily = hwInfo.platform.eProductFamily;
283+
auto &productHelper = pDevice->getNEODevice()->getProductHelper();
284+
auto stepping = productHelper.getSteppingFromHwRevId(hwInfo);
285+
286+
uint32_t readCounterL = 0;
287+
std::string readCounterKey = vfId + "_HBM_READ_L";
288+
result = pPmt->readValue(readCounterKey, readCounterL);
289+
if (result != ZE_RESULT_SUCCESS) {
290+
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s():readValue for readCounterL returning error:0x%x \n", __FUNCTION__, result);
291+
return result;
292+
}
293+
294+
uint32_t readCounterH = 0;
295+
readCounterKey = vfId + "_HBM_READ_H";
296+
result = pPmt->readValue(readCounterKey, readCounterH);
297+
if (result != ZE_RESULT_SUCCESS) {
298+
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s():readValue for readCounterH returning error:0x%x \n", __FUNCTION__, result);
299+
return result;
300+
}
301+
302+
constexpr uint64_t transactionSize = 32;
303+
pBandwidth->readCounter = readCounterH;
304+
pBandwidth->readCounter = (pBandwidth->readCounter << 32) | static_cast<uint64_t>(readCounterL);
305+
pBandwidth->readCounter = (pBandwidth->readCounter * transactionSize);
306+
307+
uint32_t writeCounterL = 0;
308+
std::string writeCounterKey = vfId + "_HBM_WRITE_L";
309+
result = pPmt->readValue(writeCounterKey, writeCounterL);
310+
if (result != ZE_RESULT_SUCCESS) {
311+
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s():readValue for writeCounterL returning error:0x%x \n", __FUNCTION__, result);
312+
return result;
313+
}
314+
315+
uint32_t writeCounterH = 0;
316+
writeCounterKey = vfId + "_HBM_WRITE_H";
317+
result = pPmt->readValue(writeCounterKey, writeCounterH);
318+
if (result != ZE_RESULT_SUCCESS) {
319+
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s():readValue for writeCounterH returning error:0x%x \n", __FUNCTION__, result);
320+
return result;
321+
}
322+
323+
pBandwidth->writeCounter = writeCounterH;
324+
pBandwidth->writeCounter = (pBandwidth->writeCounter << 32) | static_cast<uint64_t>(writeCounterL);
325+
pBandwidth->writeCounter = (pBandwidth->writeCounter * transactionSize);
326+
327+
uint64_t timeStampVal = 0;
328+
memoryGetTimeStamp(timeStampVal);
329+
pBandwidth->timestamp = timeStampVal;
330+
331+
uint64_t hbmFrequency = 0;
332+
getHbmFrequency(productFamily, stepping, hbmFrequency);
333+
334+
pBandwidth->maxBandwidth = memoryBusWidth * hbmFrequency * numHbmModules; // Value in bytes/secs
335+
return result;
336+
}
337+
269338
ze_result_t LinuxMemoryImp::getBandwidth(zes_mem_bandwidth_t *pBandwidth) {
270339
if (pPmt == nullptr) {
271340
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
@@ -284,7 +353,7 @@ ze_result_t LinuxMemoryImp::getBandwidth(zes_mem_bandwidth_t *pBandwidth) {
284353
break;
285354
case IGFX_PVC:
286355
numHbmModules = 4u;
287-
result = getHbmBandwidth(numHbmModules, pBandwidth);
356+
result = getHbmBandwidthPVC(numHbmModules, pBandwidth);
288357
break;
289358
default:
290359
result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;

level_zero/tools/source/sysman/memory/linux/os_memory_imp_prelim.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ class LinuxMemoryImp : public OsMemory, NEO::NonCopyableOrMovableClass {
4444
ze_result_t getVFIDString(std::string &vfID);
4545
ze_result_t getBandwidthForDg2(zes_mem_bandwidth_t *pBandwidth);
4646
ze_result_t getHbmBandwidth(uint32_t numHbmModules, zes_mem_bandwidth_t *pBandwidth);
47+
ze_result_t getHbmBandwidthPVC(uint32_t numHbmModules, zes_mem_bandwidth_t *pBandwidth);
4748
ze_result_t getHbmBandwidthEx(uint32_t numHbmModules, uint32_t counterMaxValue, uint64_t *pReadCounters, uint64_t *pWriteCounters, uint64_t *pMaxBandwidth, uint64_t timeout);
4849
static const std::string deviceMemoryHealth;
4950
bool isSubdevice = false;

level_zero/tools/test/unit_tests/sources/sysman/memory/linux/mock_memory_prelim.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,14 @@ constexpr uint64_t unallocatedSizeRegionTwo = 25 * GB;
3030
constexpr uint64_t unallocatedSizeRegionThree = 3 * GB;
3131
constexpr uint64_t unallocatedSizeRegionFour = 4 * GB;
3232

33+
constexpr uint32_t VF0HbmLRead = 16;
34+
constexpr uint32_t VF0HbmHRead = 2;
35+
constexpr uint32_t VF0HbmLWrite = 8;
36+
constexpr uint32_t VF0HbmHWrite = 2;
37+
constexpr uint32_t VF1HbmLRead = 16;
38+
constexpr uint32_t VF1HbmHRead = 2;
39+
constexpr uint32_t VF1HbmLWrite = 8;
40+
constexpr uint32_t VF1HbmHWrite = 2;
3341
constexpr uint16_t vF0VfidIndex = 88;
3442
constexpr uint16_t vF0Hbm0ReadIndex = 92;
3543
constexpr uint16_t vF0Hbm0WriteIndex = 96;
@@ -332,6 +340,14 @@ struct MockMemoryPmt : public PlatformMonitoringTech {
332340
val = vF0Hbm3ReadValue;
333341
} else if (key.compare("VF0_HBM3_WRITE") == 0) {
334342
val = vF0Hbm3WriteValue;
343+
} else if (key.compare("VF0_HBM_READ_L") == 0) {
344+
val = VF0HbmLRead;
345+
} else if (key.compare("VF0_HBM_READ_H") == 0) {
346+
val = VF0HbmHRead;
347+
} else if (key.compare("VF0_HBM_WRITE_L") == 0) {
348+
val = VF0HbmLWrite;
349+
} else if (key.compare("VF0_HBM_WRITE_H") == 0) {
350+
val = VF0HbmHWrite;
335351
} else {
336352
return ZE_RESULT_ERROR_NOT_AVAILABLE;
337353
}
@@ -363,6 +379,14 @@ struct MockMemoryPmt : public PlatformMonitoringTech {
363379
val = vF1Hbm3ReadValue;
364380
} else if (key.compare("VF1_HBM3_WRITE") == 0) {
365381
val = vF1Hbm3WriteValue;
382+
} else if (key.compare("VF1_HBM_READ_L") == 0) {
383+
val = VF1HbmLRead;
384+
} else if (key.compare("VF1_HBM_READ_H") == 0) {
385+
val = VF1HbmHRead;
386+
} else if (key.compare("VF1_HBM_WRITE_L") == 0) {
387+
val = VF1HbmLWrite;
388+
} else if (key.compare("VF1_HBM_WRITE_H") == 0) {
389+
val = VF1HbmHWrite;
366390
} else {
367391
return ZE_RESULT_ERROR_NOT_AVAILABLE;
368392
}

0 commit comments

Comments
 (0)