11/*
2- * Copyright (C) 2018 Intel Corporation
2+ * Copyright (C) 2018-2019 Intel Corporation
33 *
44 * SPDX-License-Identifier: MIT
55 *
1212
1313namespace OCLRT {
1414
15+ template <typename GfxFamily>
16+ inline WALKER_TYPE<GfxFamily> *HardwareInterface<GfxFamily>::allocateWalkerSpace(LinearStream &commandStream,
17+ const Kernel &kernel) {
18+ auto walkerCmd = static_cast <WALKER_TYPE<GfxFamily> *>(commandStream.getSpace (sizeof (WALKER_TYPE<GfxFamily>)));
19+ *walkerCmd = GfxFamily::cmdInitGpgpuWalker;
20+ return walkerCmd;
21+ }
22+
1523template <typename GfxFamily>
1624void HardwareInterface<GfxFamily>::dispatchWalker(
1725 CommandQueue &commandQueue,
@@ -126,9 +134,6 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
126134 DEBUG_BREAK_IF (!(dispatchInfo.getOffset ().z == 0 || dispatchInfo.getDim () == 3 ));
127135 DEBUG_BREAK_IF (!(dispatchInfo.getOffset ().y == 0 || dispatchInfo.getDim () >= 2 ));
128136
129- // Determine SIMD size
130- uint32_t simd = kernel.getKernelInfo ().getMaxSimdSize ();
131-
132137 // If we don't have a required WGS, compute one opportunistically
133138 auto maxWorkGroupSize = static_cast <uint32_t >(commandQueue.getDevice ().getDeviceInfo ().maxWorkGroupSize );
134139 if (commandType == CL_COMMAND_NDRANGE_KERNEL) {
@@ -148,7 +153,6 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
148153 // Compute number of work groups
149154 Vec3<size_t > twgs = (dispatchInfo.getTotalNumberOfWorkgroups ().x > 0 ) ? dispatchInfo.getTotalNumberOfWorkgroups ()
150155 : generateWorkgroupsNumber (gws, lws);
151- Vec3<size_t > nwgs = (dispatchInfo.getNumberOfWorkgroups ().x > 0 ) ? dispatchInfo.getNumberOfWorkgroups () : twgs;
152156
153157 // Patch our kernel constants
154158 *kernel.globalWorkOffsetX = static_cast <uint32_t >(offset.x );
@@ -183,7 +187,6 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
183187
184188 // Send our indirect object data
185189 size_t localWorkSizes[3 ] = {lws.x , lws.y , lws.z };
186- size_t globalWorkSizes[3 ] = {gws.x , gws.y , gws.z };
187190
188191 dispatchProfilingPerfStartCommands (dispatchInfo, multiDispatchInfo, hwTimeStamps,
189192 hwPerfCounter, commandStream, commandQueue);
@@ -195,47 +198,8 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
195198 GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket (commandStream, nullptr , timestampPacket, TimestampPacket::WriteOperationType::BeforeWalker);
196199 }
197200
198- // Program the walker. Invokes execution so all state should already be programmed
199- auto walkerCmd = allocateWalkerSpace (*commandStream, kernel);
200-
201- KernelCommandsHelper<GfxFamily>::programCacheFlushAfterWalkerCommand (commandStream, &kernel);
202-
203- if (currentTimestampPacketNodes && commandQueue.getCommandStreamReceiver ().peekTimestampPacketWriteEnabled ()) {
204- auto timestampPacket = currentTimestampPacketNodes->peekNodes ().at (currentDispatchIndex)->tag ;
205- GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket (commandStream, walkerCmd, timestampPacket, TimestampPacket::WriteOperationType::AfterWalker);
206- }
207-
208- auto idd = obtainInterfaceDescriptorData (walkerCmd);
209-
210- bool localIdsGenerationByRuntime = KernelCommandsHelper<GfxFamily>::isRuntimeLocalIdsGenerationRequired (dim, globalWorkSizes, localWorkSizes);
211- bool inlineDataProgrammingRequired = KernelCommandsHelper<GfxFamily>::inlineDataProgrammingRequired (kernel);
212- bool kernelUsesLocalIds = KernelCommandsHelper<GfxFamily>::kernelUsesLocalIds (kernel);
213- KernelCommandsHelper<GfxFamily>::sendIndirectState (
214- *commandStream,
215- *dsh,
216- *ioh,
217- *ssh,
218- kernel,
219- simd,
220- localWorkSizes,
221- offsetInterfaceDescriptorTable,
222- interfaceDescriptorIndex,
223- preemptionMode,
224- walkerCmd,
225- idd,
226- localIdsGenerationByRuntime,
227- kernelUsesLocalIds,
228- inlineDataProgrammingRequired);
229-
230- size_t globalOffsets[3 ] = {offset.x , offset.y , offset.z };
231- size_t startWorkGroups[3 ] = {swgs.x , swgs.y , swgs.z };
232- size_t numWorkGroups[3 ] = {nwgs.x , nwgs.y , nwgs.z };
233- GpgpuWalkerHelper<GfxFamily>::setGpgpuWalkerThreadData (walkerCmd, globalOffsets, startWorkGroups,
234- numWorkGroups, localWorkSizes, simd, dim,
235- localIdsGenerationByRuntime, inlineDataProgrammingRequired,
236- *kernel.getKernelInfo ().patchInfo .threadPayload );
237-
238- GpgpuWalkerHelper<GfxFamily>::adjustWalkerData (commandStream, walkerCmd, kernel, dispatchInfo);
201+ programWalker (*commandStream, kernel, commandQueue, currentTimestampPacketNodes, *dsh, *ioh, *ssh,
202+ localWorkSizes, preemptionMode, currentDispatchIndex, interfaceDescriptorIndex, dispatchInfo, offsetInterfaceDescriptorTable);
239203
240204 dispatchWorkarounds (commandStream, commandQueue, kernel, false );
241205 if (dispatchInfo.isPipeControlRequired ()) {
@@ -244,6 +208,8 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
244208 *pPipeControlCmd = GfxFamily::cmdInitPipeControl;
245209 pPipeControlCmd->setCommandStreamerStallEnable (true );
246210 }
211+ KernelCommandsHelper<GfxFamily>::programCacheFlushAfterWalkerCommand (commandStream, &kernel);
212+
247213 currentDispatchIndex++;
248214 }
249215 dispatchProfilingPerfEndCommands (hwTimeStamps, hwPerfCounter, commandStream, commandQueue);
0 commit comments