Skip to content

Commit 64a80ae

Browse files
fix: Correct alignment check for immediate fill
Resolves: HSD-18042728025 Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
1 parent af6ac59 commit 64a80ae

File tree

2 files changed

+9
-3
lines changed

2 files changed

+9
-3
lines changed

level_zero/core/source/cmdlist/cmdlist_hw.inl

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2382,7 +2382,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
23822382

23832383
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
23842384

2385-
bool useImmediateFill = patternSize == 1 || (patternSize <= 4 && isAligned<sizeof(uint32_t)>(dstAllocation.offset) && isAligned<sizeof(uint32_t) * 4>(size));
2385+
const auto maxWgSize = this->device->getDeviceInfo().maxWorkGroupSize;
2386+
bool useImmediateFill = patternSize == 1 || (patternSize <= 4 && isAligned<sizeof(uint32_t)>(dstAllocation.offset) && isAligned<sizeof(uint32_t) * 4>(size) && (size <= maxWgSize || isAligned(size, maxWgSize)));
23862387
auto builtin = useImmediateFill
23872388
? BuiltinTypeHelper::adjustBuiltinType<Builtin::fillBufferImmediate>(isStateless, isHeapless)
23882389
: BuiltinTypeHelper::adjustBuiltinType<Builtin::fillBufferMiddle>(isStateless, isHeapless);
@@ -2415,6 +2416,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
24152416
if (useImmediateFill) {
24162417
launchParams.numKernelsInSplitLaunch++;
24172418
if (fillArguments.leftRemainingBytes > 0) {
2419+
DEBUG_BREAK_IF(useImmediateFill && patternSize > 1u);
24182420
res = appendUnalignedFillKernel(isStateless, fillArguments.leftRemainingBytes, dstAllocation, pattern, signalEvent, launchParams);
24192421
if (res) {
24202422
return res;
@@ -2459,6 +2461,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
24592461
launchParams.numKernelsExecutedInSplitLaunch++;
24602462

24612463
if (fillArguments.rightRemainingBytes > 0) {
2464+
DEBUG_BREAK_IF(useImmediateFill && patternSize > 1u);
24622465
dstAllocation.offset = fillArguments.rightOffset;
24632466
res = appendUnalignedFillKernel(isStateless, fillArguments.rightRemainingBytes, dstAllocation, pattern, signalEvent, launchParams);
24642467
if (res) {
@@ -4075,7 +4078,8 @@ void CommandListCoreFamily<gfxCoreFamily>::setupFillKernelArguments(size_t baseO
40754078
CmdListFillKernelArguments &outArguments,
40764079
Kernel *kernel) {
40774080
constexpr auto dataTypeSize = sizeof(uint32_t) * 4;
4078-
if (patternSize == 1 || (patternSize <= 4 && isAligned<sizeof(uint32_t)>(baseOffset) && isAligned<dataTypeSize>(dstSize))) {
4081+
const auto maxWgSize = this->device->getDeviceInfo().maxWorkGroupSize;
4082+
if (patternSize == 1 || (patternSize <= 4 && isAligned<sizeof(uint32_t)>(baseOffset) && isAligned<dataTypeSize>(dstSize) && (dstSize <= maxWgSize || isAligned(dstSize, maxWgSize)))) {
40794083
size_t middleSize = dstSize;
40804084
outArguments.mainOffset = baseOffset;
40814085
outArguments.leftRemainingBytes = sizeof(uint32_t) - (baseOffset % sizeof(uint32_t));
@@ -4087,7 +4091,7 @@ void CommandListCoreFamily<gfxCoreFamily>::setupFillKernelArguments(size_t baseO
40874091
}
40884092

40894093
size_t adjustedSize = middleSize / dataTypeSize;
4090-
outArguments.mainGroupSize = this->device->getDeviceInfo().maxWorkGroupSize;
4094+
outArguments.mainGroupSize = maxWgSize;
40914095
if (outArguments.mainGroupSize > adjustedSize && adjustedSize > 0) {
40924096
outArguments.mainGroupSize = adjustedSize;
40934097
}

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_8.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1371,6 +1371,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenUnalignePtrToFillWhenSettingFillPro
13711371
auto queue = std::make_unique<Mock<CommandQueue>>(device, device->getNEODevice()->getDefaultEngine().commandStreamReceiver, &queueDesc);
13721372
MockCommandListImmediateHw<FamilyType::gfxCoreFamily> cmdList;
13731373
cmdList.cmdQImmediate = queue.get();
1374+
cmdList.device = device;
13741375
auto unalignedOffset = 2u;
13751376
auto patternSize = 8u;
13761377
auto sizeToFill = 599u * patternSize;
@@ -1385,6 +1386,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenAlignePtrToFillWhenSettingFillPrope
13851386
auto queue = std::make_unique<Mock<CommandQueue>>(device, device->getNEODevice()->getDefaultEngine().commandStreamReceiver, &queueDesc);
13861387
MockCommandListImmediateHw<FamilyType::gfxCoreFamily> cmdList;
13871388
cmdList.cmdQImmediate = queue.get();
1389+
cmdList.device = device;
13881390
auto unalignedOffset = 4u;
13891391
auto patternSize = 8u;
13901392
auto sizeToFill = 599u * patternSize;

0 commit comments

Comments
 (0)