Skip to content

Commit a9eae78

Browse files
fix: Add guid check while calculating Sysman memory Bandwidth
Related-To: LOCI-4597 Signed-off-by: Mayank Raghuwanshi <mayank.raghuwanshi@intel.com> Source: c3e2e14
1 parent ee86ad0 commit a9eae78

File tree

6 files changed

+43
-7
lines changed

6 files changed

+43
-7
lines changed

level_zero/tools/source/sysman/linux/pmt/pmt.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ const std::string PlatformMonitoringTech::baseTelemSysFS("/sys/class/intel_pmt")
2222
const std::string PlatformMonitoringTech::telem("telem");
2323
uint32_t PlatformMonitoringTech::rootDeviceTelemNodeIndex = 0;
2424

25+
std::string PlatformMonitoringTech::getGuid() {
26+
return guid;
27+
}
2528
ze_result_t PlatformMonitoringTech::readValue(const std::string key, uint32_t &value) {
2629
auto offset = keyOffsetMap.find(key);
2730
if (offset == keyOffsetMap.end()) {
@@ -146,7 +149,6 @@ ze_result_t PlatformMonitoringTech::init(FsAccess *pFsAccess, const std::string
146149
return ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE;
147150
}
148151

149-
std::string guid;
150152
std::string guidPath = baseTelemSysFSNode + std::string("/guid");
151153
ze_result_t result = pFsAccess->read(guidPath, guid);
152154
if (ZE_RESULT_SUCCESS != result) {

level_zero/tools/source/sysman/linux/pmt/pmt.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2021-2022 Intel Corporation
2+
* Copyright (C) 2021-2023 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -28,6 +28,7 @@ class PlatformMonitoringTech : NEO::NonCopyableOrMovableClass {
2828
virtual ze_result_t readValue(const std::string key, uint32_t &value);
2929
virtual ze_result_t readValue(const std::string key, uint64_t &value);
3030
static ze_result_t enumerateRootTelemIndex(FsAccess *pFsAccess, std::string &gpuUpstreamPortPath);
31+
std::string getGuid();
3132
static void create(const std::vector<ze_device_handle_t> &deviceHandles,
3233
FsAccess *pFsAccess, std::string &gpuUpstreamPortPath,
3334
std::map<uint32_t, L0::PlatformMonitoringTech *> &mapOfSubDeviceIdToPmtObject);
@@ -37,6 +38,7 @@ class PlatformMonitoringTech : NEO::NonCopyableOrMovableClass {
3738
static uint32_t rootDeviceTelemNodeIndex;
3839
std::string telemetryDeviceEntry{};
3940
std::map<std::string, uint64_t> keyOffsetMap;
41+
std::string guid;
4042
ze_result_t init(FsAccess *pFsAccess, const std::string &gpuUpstreamPortPath, PRODUCT_FAMILY productFamily);
4143
static void doInitPmtObject(FsAccess *pFsAccess, uint32_t subdeviceId, PlatformMonitoringTech *pPmt, const std::string &gpuUpstreamPortPath,
4244
std::map<uint32_t, L0::PlatformMonitoringTech *> &mapOfSubDeviceIdToPmtObject, PRODUCT_FAMILY productFamily);

level_zero/tools/source/sysman/memory/linux/os_memory_imp_prelim.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,10 @@ ze_result_t LinuxMemoryImp::getHbmBandwidth(uint32_t numHbmModules, zes_mem_band
267267
}
268268

269269
ze_result_t LinuxMemoryImp::getHbmBandwidthPVC(uint32_t numHbmModules, zes_mem_bandwidth_t *pBandwidth) {
270+
std::string guid = pPmt->getGuid();
271+
if (guid != guid64BitMemoryCounters) {
272+
return getHbmBandwidth(numHbmModules, pBandwidth);
273+
}
270274
pBandwidth->readCounter = 0;
271275
pBandwidth->writeCounter = 0;
272276
pBandwidth->timestamp = 0;

level_zero/tools/source/sysman/sysman_const.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
const std::string vendorIntel("Intel(R) Corporation");
1212
const std::string unknown("unknown");
1313
const std::string intelPciId("0x8086");
14+
const std::string guid64BitMemoryCounters("0xb15a0ede");
1415
constexpr uint32_t MbpsToBytesPerSecond = 125000;
1516
constexpr double milliVoltsFactor = 1000.0;
1617
constexpr uint32_t maxRasErrorCategoryCount = 7;

level_zero/tools/test/unit_tests/sources/sysman/memory/linux/mock_memory_prelim.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,7 @@ struct MockMemoryNeoDrm : public Drm {
276276

277277
struct MockMemoryPmt : public PlatformMonitoringTech {
278278

279+
using PlatformMonitoringTech::guid;
279280
using PlatformMonitoringTech::keyOffsetMap;
280281
std::vector<ze_result_t> mockReadValueReturnStatus{};
281282
std::vector<uint32_t> mockReadArgumentValue{};
@@ -286,7 +287,9 @@ struct MockMemoryPmt : public PlatformMonitoringTech {
286287
bool mockVfid0Status = false;
287288
bool mockVfid1Status = false;
288289
bool isRepeated = false;
289-
290+
void setGuid(std::string guid) {
291+
this->guid = guid;
292+
}
290293
MockMemoryPmt(FsAccess *pFsAccess, ze_bool_t onSubdevice, uint32_t subdeviceId) : PlatformMonitoringTech(pFsAccess, onSubdevice, subdeviceId) {}
291294
ze_result_t readValue(const std::string key, uint32_t &val) override {
292295
ze_result_t result = ZE_RESULT_SUCCESS;

level_zero/tools/test/unit_tests/sources/sysman/memory/linux/test_sysman_memory_prelim.cpp

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -406,7 +406,7 @@ HWTEST2_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingzesSysmanM
406406
auto &productHelper = pLinuxSysmanImp->getDeviceHandle()->getNEODevice()->getProductHelper();
407407
hwInfo->platform.usRevId = productHelper.getHwRevIdFromStepping(REVISION_B, *hwInfo);
408408
auto pPmt = static_cast<MockMemoryPmt *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(properties.subdeviceId));
409-
409+
pPmt->setGuid(guid64BitMemoryCounters);
410410
pPmt->mockVfid0Status = true;
411411
pSysfsAccess->mockReadUInt64Value.push_back(hbmRP0Frequency);
412412
pSysfsAccess->mockReadReturnStatus.push_back(ZE_RESULT_SUCCESS);
@@ -440,7 +440,7 @@ HWTEST2_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingzesSysmanM
440440
auto &productHelper = pLinuxSysmanImp->getDeviceHandle()->getNEODevice()->getProductHelper();
441441
hwInfo->platform.usRevId = productHelper.getHwRevIdFromStepping(REVISION_B, *hwInfo);
442442
auto pPmt = static_cast<MockMemoryPmt *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(properties.subdeviceId));
443-
443+
pPmt->setGuid(guid64BitMemoryCounters);
444444
pPmt->mockVfid1Status = true;
445445
pSysfsAccess->mockReadUInt64Value.push_back(hbmRP0Frequency);
446446
pSysfsAccess->mockReadReturnStatus.push_back(ZE_RESULT_SUCCESS);
@@ -470,6 +470,7 @@ HWTEST2_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingzesSysmanM
470470
zes_mem_bandwidth_t bandwidth;
471471

472472
auto pPmt = static_cast<MockMemoryPmt *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(properties.subdeviceId));
473+
pPmt->setGuid(guid64BitMemoryCounters);
473474
pPmt->mockReadArgumentValue.push_back(1);
474475
pPmt->mockReadValueReturnStatus.push_back(ZE_RESULT_SUCCESS); // Return success after reading VF0_VFID
475476
pPmt->mockReadArgumentValue.push_back(0);
@@ -480,6 +481,24 @@ HWTEST2_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingzesSysmanM
480481
}
481482
}
482483

484+
HWTEST2_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingzesSysmanMemoryGetBandwidthAndVF0_VFIDFailsForOldGuidThenFailureIsReturned, IsPVC) {
485+
setLocalSupportedAndReinit(true);
486+
auto handles = getMemoryHandles(memoryHandleComponentCount);
487+
488+
for (auto &handle : handles) {
489+
zes_mem_properties_t properties = {};
490+
zesMemoryGetProperties(handle, &properties);
491+
492+
zes_mem_bandwidth_t bandwidth;
493+
494+
auto pPmt = static_cast<MockMemoryPmt *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(properties.subdeviceId));
495+
pPmt->setGuid("0xb15a0edd");
496+
pPmt->mockReadArgumentValue.push_back(1);
497+
pPmt->mockReadValueReturnStatus.push_back(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE);
498+
EXPECT_EQ(zesMemoryGetBandwidth(handle, &bandwidth), ZE_RESULT_ERROR_UNSUPPORTED_FEATURE);
499+
}
500+
}
501+
483502
HWTEST2_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingzesSysmanMemoryGetBandwidthAndVF0_HBM_READ_HFailsThenFailureIsReturned, IsPVC) {
484503
setLocalSupportedAndReinit(true);
485504
auto handles = getMemoryHandles(memoryHandleComponentCount);
@@ -491,6 +510,7 @@ HWTEST2_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingzesSysmanM
491510
zes_mem_bandwidth_t bandwidth;
492511

493512
auto pPmt = static_cast<MockMemoryPmt *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(properties.subdeviceId));
513+
pPmt->setGuid(guid64BitMemoryCounters);
494514
pPmt->mockReadArgumentValue.push_back(1);
495515
pPmt->mockReadValueReturnStatus.push_back(ZE_RESULT_SUCCESS); // Return success after reading VF0_VFID
496516
pPmt->mockReadArgumentValue.push_back(0);
@@ -514,6 +534,7 @@ HWTEST2_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingzesSysmanM
514534
zes_mem_bandwidth_t bandwidth;
515535

516536
auto pPmt = static_cast<MockMemoryPmt *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(properties.subdeviceId));
537+
pPmt->setGuid(guid64BitMemoryCounters);
517538
pPmt->mockReadArgumentValue.push_back(1);
518539
pPmt->mockReadValueReturnStatus.push_back(ZE_RESULT_SUCCESS); // Return success after reading VF0_VFID
519540
pPmt->mockReadArgumentValue.push_back(0);
@@ -539,6 +560,7 @@ HWTEST2_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingzesSysmanM
539560
zes_mem_bandwidth_t bandwidth;
540561

541562
auto pPmt = static_cast<MockMemoryPmt *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(properties.subdeviceId));
563+
pPmt->setGuid(guid64BitMemoryCounters);
542564
pPmt->mockReadArgumentValue.push_back(1);
543565
pPmt->mockReadValueReturnStatus.push_back(ZE_RESULT_SUCCESS); // Return success after reading VF0_VFID
544566
pPmt->mockReadArgumentValue.push_back(0);
@@ -732,7 +754,7 @@ HWTEST2_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingGetBandwid
732754
auto &productHelper = pLinuxSysmanImp->getDeviceHandle()->getNEODevice()->getProductHelper();
733755
hwInfo->platform.usRevId = productHelper.getHwRevIdFromStepping(REVISION_B, *hwInfo);
734756
auto pPmt = static_cast<MockMemoryPmt *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(0));
735-
757+
pPmt->setGuid(guid64BitMemoryCounters);
736758
pPmt->mockVfid1Status = true;
737759
pSysfsAccess->mockReadUInt64Value.push_back(hbmRP0Frequency);
738760
pSysfsAccess->mockReadReturnStatus.push_back(ZE_RESULT_SUCCESS);
@@ -754,7 +776,7 @@ HWTEST2_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingGetBandwid
754776
auto &productHelper = pLinuxSysmanImp->getDeviceHandle()->getNEODevice()->getProductHelper();
755777
hwInfo->platform.usRevId = productHelper.getHwRevIdFromStepping(REVISION_B, *hwInfo);
756778
auto pPmt = static_cast<MockMemoryPmt *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(0));
757-
779+
pPmt->setGuid(guid64BitMemoryCounters);
758780
pPmt->mockVfid0Status = true;
759781
pSysfsAccess->mockReadUInt64Value.push_back(hbmRP0Frequency);
760782
pSysfsAccess->mockReadReturnStatus.push_back(ZE_RESULT_SUCCESS);
@@ -813,6 +835,7 @@ HWTEST2_F(SysmanDeviceMemoryFixture, GivenValidUsRevIdForRevisionBWhenCallingzes
813835
hwInfo->platform.usRevId = productHelper.getHwRevIdFromStepping(REVISION_B, *hwInfo);
814836
auto pPmt = static_cast<MockMemoryPmt *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(properties.subdeviceId));
815837
pPmt->mockVfid1Status = true;
838+
pPmt->setGuid(guid64BitMemoryCounters);
816839
pSysfsAccess->mockReadUInt64Value.push_back(hbmRP0Frequency);
817840
pSysfsAccess->mockReadReturnStatus.push_back(ZE_RESULT_SUCCESS);
818841

@@ -1027,6 +1050,7 @@ TEST_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenBothVfid0AndVfid1Are
10271050
zes_mem_bandwidth_t bandwidth;
10281051

10291052
auto pPmt = static_cast<MockMemoryPmt *>(pLinuxSysmanImp->getPlatformMonitoringTechAccess(properties.subdeviceId));
1053+
pPmt->setGuid(guid64BitMemoryCounters);
10301054
pPmt->mockReadArgumentValue.push_back(0);
10311055
pPmt->mockReadValueReturnStatus.push_back(ZE_RESULT_SUCCESS); // Return success after reading VF0_VFID
10321056
pPmt->mockReadArgumentValue.push_back(0);

0 commit comments

Comments
 (0)