@@ -58,6 +58,7 @@ size_t HardwareCommandsHelper<GfxFamily>::getSizeRequiredDSH(
5858
5959template <typename GfxFamily>
6060size_t HardwareCommandsHelper<GfxFamily>::getSizeRequiredIOH(
61+ uint32_t rootDeviceIndex,
6162 const Kernel &kernel,
6263 size_t localWorkSize) {
6364 typedef typename GfxFamily::WALKER_TYPE WALKER_TYPE;
@@ -67,7 +68,7 @@ size_t HardwareCommandsHelper<GfxFamily>::getSizeRequiredIOH(
6768
6869 auto numChannels = PerThreadDataHelper::getNumLocalIdChannels (*threadPayload);
6970 uint32_t grfSize = sizeof (typename GfxFamily::GRF);
70- return alignUp ((kernel.getCrossThreadDataSize () +
71+ return alignUp ((kernel.getCrossThreadDataSize (rootDeviceIndex ) +
7172 getPerThreadDataSizeTotal (kernel.getKernelInfo ().getMaxSimdSize (), grfSize, numChannels, localWorkSize)),
7273 WALKER_TYPE::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
7374}
@@ -102,7 +103,10 @@ size_t HardwareCommandsHelper<GfxFamily>::getTotalSizeRequiredDSH(
102103template <typename GfxFamily>
103104size_t HardwareCommandsHelper<GfxFamily>::getTotalSizeRequiredIOH(
104105 const MultiDispatchInfo &multiDispatchInfo) {
105- return getSizeRequired (multiDispatchInfo, [](const DispatchInfo &dispatchInfo) { return getSizeRequiredIOH (*dispatchInfo.getKernel (), Math::computeTotalElementsCount (dispatchInfo.getLocalWorkgroupSize ())); });
106+ return getSizeRequired (multiDispatchInfo, [](const DispatchInfo &dispatchInfo) { return getSizeRequiredIOH (
107+ dispatchInfo.getClDevice ().getRootDeviceIndex (),
108+ *dispatchInfo.getKernel (),
109+ Math::computeTotalElementsCount (dispatchInfo.getLocalWorkgroupSize ())); });
106110}
107111
108112template <typename GfxFamily>
@@ -215,10 +219,13 @@ size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
215219 WALKER_TYPE<GfxFamily> *walkerCmd,
216220 INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor,
217221 bool localIdsGenerationByRuntime,
218- const HardwareInfo &hardwareInfo ) {
222+ const Device &device ) {
219223
220224 using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE;
221225
226+ auto &hardwareInfo = device.getHardwareInfo ();
227+ auto rootDeviceIndex = device.getRootDeviceIndex ();
228+
222229 DEBUG_BREAK_IF (simd != 1 && simd != 8 && simd != 16 && simd != 32 );
223230 auto inlineDataProgrammingRequired = HardwareCommandsHelper<GfxFamily>::inlineDataProgrammingRequired (kernel);
224231
@@ -227,7 +234,7 @@ size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
227234 const auto &patchInfo = kernelInfo.patchInfo ;
228235
229236 ssh.align (BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
230- kernel.patchBindlessSurfaceStateOffsets (ssh.getUsed ());
237+ kernel.patchBindlessSurfaceStateOffsets (device, ssh.getUsed ());
231238
232239 auto dstBindingTablePointer = EncodeSurfaceState<GfxFamily>::pushBindingTableAndSurfaceStates (ssh, (kernelInfo.patchInfo .bindingTableState != nullptr ) ? kernelInfo.patchInfo .bindingTableState ->Count : 0 ,
233240 kernel.getSurfaceStateHeap (), kernel.getSurfaceStateHeapSize (),
@@ -248,11 +255,11 @@ size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
248255 auto threadsPerThreadGroup = static_cast <uint32_t >(getThreadsPerWG (simd, localWorkItems));
249256 auto numChannels = PerThreadDataHelper::getNumLocalIdChannels (*threadPayload);
250257
251- uint32_t sizeCrossThreadData = kernel.getCrossThreadDataSize ();
258+ uint32_t sizeCrossThreadData = kernel.getCrossThreadDataSize (rootDeviceIndex );
252259
253260 size_t offsetCrossThreadData = HardwareCommandsHelper<GfxFamily>::sendCrossThreadData (
254261 ioh, kernel, inlineDataProgrammingRequired,
255- walkerCmd, sizeCrossThreadData);
262+ walkerCmd, sizeCrossThreadData, rootDeviceIndex );
256263
257264 size_t sizePerThreadDataTotal = 0 ;
258265 size_t sizePerThreadData = 0 ;
0 commit comments