Skip to content

Commit de1e4e0

Browse files
Add adjustMaxWorkGroupCount helper
Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
1 parent 8d60fb2 commit de1e4e0

File tree

9 files changed

+34
-15
lines changed

9 files changed

+34
-15
lines changed

level_zero/api/core/ze_module.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ ZE_APIEXPORT ze_result_t ZE_APICALL
111111
zeKernelSuggestMaxCooperativeGroupCount(
112112
ze_kernel_handle_t hKernel,
113113
uint32_t *totalGroupCount) {
114-
return L0::Kernel::fromHandle(hKernel)->suggestMaxCooperativeGroupCount(totalGroupCount);
114+
return L0::Kernel::fromHandle(hKernel)->suggestMaxCooperativeGroupCount(totalGroupCount, NEO::EngineGroupType::Compute, false);
115115
}
116116

117117
ZE_APIEXPORT ze_result_t ZE_APICALL

level_zero/core/source/cmdlist/cmdlist_hw.inl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1880,7 +1880,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::programSyncBuffer(Kernel &kern
18801880
}
18811881

18821882
uint32_t maximalNumberOfWorkgroupsAllowed;
1883-
auto ret = kernel.suggestMaxCooperativeGroupCount(&maximalNumberOfWorkgroupsAllowed);
1883+
auto ret = kernel.suggestMaxCooperativeGroupCount(&maximalNumberOfWorkgroupsAllowed, this->engineGroupType,
1884+
device.getDefaultEngine().osContext->isEngineInstanced());
18841885
UNRECOVERABLE_IF(ret != ZE_RESULT_SUCCESS);
18851886
size_t requestedNumberOfWorkgroups = (pThreadGroupDimensions->groupCountX * pThreadGroupDimensions->groupCountY *
18861887
pThreadGroupDimensions->groupCountZ);

level_zero/core/source/kernel/kernel.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,8 @@ struct Kernel : _ze_kernel_handle_t, virtual NEO::DispatchKernelEncoderI {
118118

119119
virtual void patchWorkDim(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) = 0;
120120

121-
virtual ze_result_t suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount) = 0;
121+
virtual ze_result_t suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount, NEO::EngineGroupType engineGroupType,
122+
bool isEngineInstanced) = 0;
122123
virtual ze_result_t setCacheConfig(ze_cache_config_flags_t flags) = 0;
123124

124125
virtual ze_result_t getProfileInfo(zet_profile_properties_t *pProfileProperties) = 0;

level_zero/core/source/kernel/kernel_imp.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,8 @@ ze_result_t KernelImp::suggestGroupSize(uint32_t globalSizeX, uint32_t globalSiz
393393
return ZE_RESULT_SUCCESS;
394394
}
395395

396-
ze_result_t KernelImp::suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount) {
396+
ze_result_t KernelImp::suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount, NEO::EngineGroupType engineGroupType,
397+
bool isEngineInstanced) {
397398
UNRECOVERABLE_IF(0 == groupSize[0]);
398399
UNRECOVERABLE_IF(0 == groupSize[1]);
399400
UNRECOVERABLE_IF(0 == groupSize[2]);
@@ -423,6 +424,7 @@ ze_result_t KernelImp::suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount
423424
hwHelper.getBarriersCountFromHasBarriers(barrierCount),
424425
workDim,
425426
localWorkSize);
427+
*totalGroupCount = hwHelper.adjustMaxWorkGroupCount(*totalGroupCount, engineGroupType, hardwareInfo, isEngineInstanced);
426428
return ZE_RESULT_SUCCESS;
427429
}
428430

level_zero/core/source/kernel/kernel_imp.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ struct KernelImp : Kernel {
4545

4646
ze_result_t getKernelName(size_t *pSize, char *pName) override;
4747

48-
ze_result_t suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount) override;
48+
ze_result_t suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount, NEO::EngineGroupType engineGroupType,
49+
bool isEngineInstanced) override;
4950

5051
const uint8_t *getCrossThreadData() const override { return crossThreadData.get(); }
5152
uint32_t getCrossThreadDataSize() const override { return crossThreadDataSize; }

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1038,7 +1038,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingSyncBufferWhenAppendLau
10381038
{
10391039
VariableBackup<uint32_t> groupCountX{&groupCount.groupCountX};
10401040
uint32_t maximalNumberOfWorkgroupsAllowed;
1041-
kernel.suggestMaxCooperativeGroupCount(&maximalNumberOfWorkgroupsAllowed);
1041+
kernel.suggestMaxCooperativeGroupCount(&maximalNumberOfWorkgroupsAllowed, NEO::EngineGroupType::Compute, false);
10421042
groupCountX = maximalNumberOfWorkgroupsAllowed + 1;
10431043
pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
10441044
pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);

opencl/source/kernel/kernel.cpp

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1056,15 +1056,18 @@ uint32_t Kernel::getMaxWorkGroupCount(const cl_uint workDim, const size_t *local
10561056
hardwareInfo.gtSystemInfo.EUCount, hardwareInfo.gtSystemInfo.ThreadCount / hardwareInfo.gtSystemInfo.EUCount);
10571057

10581058
auto barrierCount = kernelDescriptor.kernelAttributes.barrierCount;
1059-
return KernelHelper::getMaxWorkGroupCount(kernelInfo.getMaxSimdSize(),
1060-
availableThreadCount,
1061-
dssCount,
1062-
dssCount * KB * hardwareInfo.capabilityTable.slmSize,
1063-
hwHelper.alignSlmSize(slmTotalSize),
1064-
static_cast<uint32_t>(hwHelper.getMaxBarrierRegisterPerSlice()),
1065-
hwHelper.getBarriersCountFromHasBarriers(barrierCount),
1066-
workDim,
1067-
localWorkSize);
1059+
auto maxWorkGroupCount = KernelHelper::getMaxWorkGroupCount(kernelInfo.getMaxSimdSize(),
1060+
availableThreadCount,
1061+
dssCount,
1062+
dssCount * KB * hardwareInfo.capabilityTable.slmSize,
1063+
hwHelper.alignSlmSize(slmTotalSize),
1064+
static_cast<uint32_t>(hwHelper.getMaxBarrierRegisterPerSlice()),
1065+
hwHelper.getBarriersCountFromHasBarriers(barrierCount),
1066+
workDim,
1067+
localWorkSize);
1068+
auto isEngineInstanced = commandQueue->getCommandStreamReceiver(false).getOsContext().isEngineInstanced();
1069+
maxWorkGroupCount = hwHelper.adjustMaxWorkGroupCount(maxWorkGroupCount, engineGroupType, hardwareInfo, isEngineInstanced);
1070+
return maxWorkGroupCount;
10681071
}
10691072

10701073
inline void Kernel::makeArgsResident(CommandStreamReceiver &commandStreamReceiver) {

shared/source/helpers/hw_helper.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,8 @@ class HwHelper {
126126
virtual bool useSystemMemoryPlacementForISA(const HardwareInfo &hwInfo) const = 0;
127127
virtual bool packedFormatsSupported() const = 0;
128128
virtual bool isCooperativeDispatchSupported(const EngineGroupType engineGroupType) const = 0;
129+
virtual uint32_t adjustMaxWorkGroupCount(uint32_t maxWorkGroupCount, const EngineGroupType engineGroupType,
130+
const HardwareInfo &hwInfo, bool isEngineInstanced) const = 0;
129131
virtual size_t getMaxFillPaternSizeForCopyEngine() const = 0;
130132
virtual bool isCopyOnlyEngineType(EngineGroupType type) const = 0;
131133
virtual bool isSipWANeeded(const HardwareInfo &hwInfo) const = 0;
@@ -334,6 +336,9 @@ class HwHelperHw : public HwHelper {
334336

335337
bool isCooperativeDispatchSupported(const EngineGroupType engineGroupType) const override;
336338

339+
uint32_t adjustMaxWorkGroupCount(uint32_t maxWorkGroupCount, const EngineGroupType engineGroupType,
340+
const HardwareInfo &hwInfo, bool isEngineInstanced) const override;
341+
337342
size_t getMaxFillPaternSizeForCopyEngine() const override;
338343

339344
bool isKmdMigrationSupported(const HardwareInfo &hwInfo) const override;

shared/source/helpers/hw_helper_base.inl

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -596,6 +596,12 @@ bool HwHelperHw<GfxFamily>::isCooperativeDispatchSupported(const EngineGroupType
596596
return true;
597597
}
598598

599+
template <typename GfxFamily>
600+
uint32_t HwHelperHw<GfxFamily>::adjustMaxWorkGroupCount(uint32_t maxWorkGroupCount, const EngineGroupType engineGroupType,
601+
const HardwareInfo &hwInfo, bool isEngineInstanced) const {
602+
return maxWorkGroupCount;
603+
}
604+
599605
template <typename GfxFamily>
600606
bool HwHelperHw<GfxFamily>::isKmdMigrationSupported(const HardwareInfo &hwInfo) const {
601607
return false;

0 commit comments

Comments
 (0)