@@ -1056,15 +1056,18 @@ uint32_t Kernel::getMaxWorkGroupCount(const cl_uint workDim, const size_t *local
10561056 hardwareInfo.gtSystemInfo .EUCount , hardwareInfo.gtSystemInfo .ThreadCount / hardwareInfo.gtSystemInfo .EUCount );
10571057
10581058 auto barrierCount = kernelDescriptor.kernelAttributes .barrierCount ;
1059- return KernelHelper::getMaxWorkGroupCount (kernelInfo.getMaxSimdSize (),
1060- availableThreadCount,
1061- dssCount,
1062- dssCount * KB * hardwareInfo.capabilityTable .slmSize ,
1063- hwHelper.alignSlmSize (slmTotalSize),
1064- static_cast <uint32_t >(hwHelper.getMaxBarrierRegisterPerSlice ()),
1065- hwHelper.getBarriersCountFromHasBarriers (barrierCount),
1066- workDim,
1067- localWorkSize);
1059+ auto maxWorkGroupCount = KernelHelper::getMaxWorkGroupCount (kernelInfo.getMaxSimdSize (),
1060+ availableThreadCount,
1061+ dssCount,
1062+ dssCount * KB * hardwareInfo.capabilityTable .slmSize ,
1063+ hwHelper.alignSlmSize (slmTotalSize),
1064+ static_cast <uint32_t >(hwHelper.getMaxBarrierRegisterPerSlice ()),
1065+ hwHelper.getBarriersCountFromHasBarriers (barrierCount),
1066+ workDim,
1067+ localWorkSize);
1068+ auto isEngineInstanced = commandQueue->getCommandStreamReceiver (false ).getOsContext ().isEngineInstanced ();
1069+ maxWorkGroupCount = hwHelper.adjustMaxWorkGroupCount (maxWorkGroupCount, engineGroupType, hardwareInfo, isEngineInstanced);
1070+ return maxWorkGroupCount;
10681071}
10691072
10701073inline void Kernel::makeArgsResident (CommandStreamReceiver &commandStreamReceiver) {
0 commit comments