Skip to content

Commit 9fe2ddd

Browse files
Estimate command stream size for marker profiling
Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
1 parent 9c181df commit 9fe2ddd

File tree

13 files changed

+48
-40
lines changed

13 files changed

+48
-40
lines changed

opencl/source/command_queue/command_queue_hw.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -452,7 +452,7 @@ class CommandQueueHw : public CommandQueue {
452452
LinearStream *obtainCommandStream(const CsrDependencies &csrDependencies, bool blitEnqueue, bool blockedQueue,
453453
const MultiDispatchInfo &multiDispatchInfo, const EventsRequest &eventsRequest,
454454
std::unique_ptr<KernelOperation> &blockedCommandsData,
455-
Surface **surfaces, size_t numSurfaces) {
455+
Surface **surfaces, size_t numSurfaces, bool isMarkerWithProfiling) {
456456
LinearStream *commandStream = nullptr;
457457

458458
bool profilingRequired = (this->isProfilingEnabled() && eventsRequest.outEvent);
@@ -469,7 +469,7 @@ class CommandQueueHw : public CommandQueue {
469469
blockedCommandsData = std::make_unique<KernelOperation>(commandStream, *gpgpuCsr.getInternalAllocationStorage());
470470
} else {
471471
commandStream = &getCommandStream<GfxFamily, commandType>(*this, csrDependencies, profilingRequired, perfCountersRequired,
472-
blitEnqueue, multiDispatchInfo, surfaces, numSurfaces);
472+
blitEnqueue, multiDispatchInfo, surfaces, numSurfaces, isMarkerWithProfiling);
473473
}
474474
return commandStream;
475475
}

opencl/source/command_queue/enqueue_common.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
228228
}
229229

230230
auto &commandStream = *obtainCommandStream<commandType>(csrDeps, false, blockQueue, multiDispatchInfo, eventsRequest,
231-
blockedCommandsData, surfacesForResidency, numSurfaceForResidency);
231+
blockedCommandsData, surfacesForResidency, numSurfaceForResidency, isMarkerWithProfiling);
232232
auto commandStreamStart = commandStream.getUsed();
233233

234234
if (this->context->getRootDeviceIndices().size() > 1) {
@@ -1177,7 +1177,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispat
11771177
LinearStream *gpgpuCommandStream = {};
11781178
size_t gpgpuCommandStreamStart = {};
11791179
if (isGpgpuSubmissionForBcsRequired(blockQueue)) {
1180-
gpgpuCommandStream = obtainCommandStream<cmdType>(csrDeps, true, blockQueue, multiDispatchInfo, eventsRequest, blockedCommandsData, nullptr, 0);
1180+
gpgpuCommandStream = obtainCommandStream<cmdType>(csrDeps, true, blockQueue, multiDispatchInfo, eventsRequest, blockedCommandsData, nullptr, 0, false);
11811181
gpgpuCommandStreamStart = gpgpuCommandStream->getUsed();
11821182
}
11831183

opencl/source/command_queue/gpgpu_walker.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ class GpgpuWalkerHelper {
167167
template <typename GfxFamily>
168168
struct EnqueueOperation {
169169
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
170-
static size_t getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo);
170+
static size_t getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, bool isMarkerWithProfiling);
171171
static size_t getSizeRequiredCS(uint32_t cmdType, bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel, const DispatchInfo &dispatchInfo);
172172
static size_t getSizeRequiredForTimestampPacketWrite();
173173
static size_t getSizeForCacheFlushAfterWalkerCommands(const Kernel &kernel, const CommandQueue &commandQueue);
@@ -180,8 +180,8 @@ struct EnqueueOperation {
180180
template <typename GfxFamily, uint32_t eventType>
181181
LinearStream &getCommandStream(CommandQueue &commandQueue, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace,
182182
bool reservePerfCounterCmdsSpace, bool blitEnqueue, const MultiDispatchInfo &multiDispatchInfo,
183-
Surface **surfaces, size_t numSurfaces) {
184-
size_t expectedSizeCS = EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(eventType, csrDeps, reserveProfilingCmdsSpace, reservePerfCounterCmdsSpace, blitEnqueue, commandQueue, multiDispatchInfo);
183+
Surface **surfaces, size_t numSurfaces, bool isMarkerWithProfiling) {
184+
size_t expectedSizeCS = EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(eventType, csrDeps, reserveProfilingCmdsSpace, reservePerfCounterCmdsSpace, blitEnqueue, commandQueue, multiDispatchInfo, isMarkerWithProfiling);
185185
return commandQueue.getCS(expectedSizeCS);
186186
}
187187

opencl/source/command_queue/gpgpu_walker_base.inl

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
*/
77

88
#pragma once
9+
#include "shared/source/command_container/command_encoder.h"
910
#include "shared/source/command_stream/command_stream_receiver.h"
1011
#include "shared/source/helpers/aligned_memory.h"
1112
#include "shared/source/helpers/debug_helpers.h"
@@ -177,7 +178,7 @@ size_t GpgpuWalkerHelper<GfxFamily>::getSizeForWaDisableRccRhwoOptimization(cons
177178
}
178179

179180
template <typename GfxFamily>
180-
size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo) {
181+
size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, bool isMarkerWithProfiling) {
181182
size_t expectedSizeCS = 0;
182183
auto &hwInfo = commandQueue.getDevice().getHardwareInfo();
183184
auto &commandQueueHw = static_cast<CommandQueueHw<GfxFamily> &>(commandQueue);
@@ -205,8 +206,15 @@ size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, c
205206
if (commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
206207
expectedSizeCS += TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(csrDeps);
207208
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredForTimestampPacketWrite();
209+
if (isMarkerWithProfiling) {
210+
expectedSizeCS += 4 * EncodeStoreMMIO<GfxFamily>::size;
211+
}
212+
} else if (isMarkerWithProfiling) {
213+
expectedSizeCS += 2 * MemorySynchronizationCommands<GfxFamily>::getSizeForSinglePipeControl();
214+
if (!HwHelper::get(hwInfo.platform.eRenderCoreFamily).useOnlyGlobalTimestamps()) {
215+
expectedSizeCS += 2 * EncodeStoreMMIO<GfxFamily>::size;
216+
}
208217
}
209-
210218
if (multiDispatchInfo.peekMainKernel()) {
211219
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeForCacheFlushAfterWalkerCommands(*multiDispatchInfo.peekMainKernel(), commandQueue);
212220
}

opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -829,7 +829,7 @@ HWTEST_F(DispatchWalkerTest, givenBlockedEnqueueWhenObtainingCommandStreamThenAl
829829
EventsRequest eventsRequest(0, nullptr, nullptr);
830830
auto cmdStream = mockCmdQ.template obtainCommandStream<CL_COMMAND_NDRANGE_KERNEL>(csrDependencies, false, true,
831831
multiDispatchInfo, eventsRequest, blockedKernelData,
832-
nullptr, 0u);
832+
nullptr, 0u, false);
833833

834834
EXPECT_EQ(expectedSizeCS, cmdStream->getMaxAvailableSpace());
835835
EXPECT_EQ(expectedSizeCSAllocation, cmdStream->getGraphicsAllocation()->getUnderlyingBufferSize());

opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1711,10 +1711,10 @@ HWTEST_F(PauseOnGpuTests, givenGpuScratchWriteEnabledWhenEstimatingCommandStream
17111711
dispatchInfo.setKernel(mockKernel.mockKernel);
17121712
multiDispatchInfo.push(dispatchInfo);
17131713

1714-
auto baseCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, {}, false, false, false, *pCmdQ, multiDispatchInfo);
1714+
auto baseCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, {}, false, false, false, *pCmdQ, multiDispatchInfo, false);
17151715
DebugManager.flags.GpuScratchRegWriteAfterWalker.set(1);
17161716

1717-
auto extendedCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, {}, false, false, false, *pCmdQ, multiDispatchInfo);
1717+
auto extendedCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, {}, false, false, false, *pCmdQ, multiDispatchInfo, false);
17181718

17191719
EXPECT_EQ(baseCommandStreamSize + sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM), extendedCommandStreamSize);
17201720
}

opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenFillingBufferThenHeapsAndCommandBufferCo
9898
auto usedAfterSSH = ssh.getUsed();
9999

100100
auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_FILL_BUFFER, CsrDependencies(), false, false,
101-
false, *pCmdQ, multiDispatchInfo);
101+
false, *pCmdQ, multiDispatchInfo, false);
102102
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredDSH(multiDispatchInfo);
103103
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredIOH(multiDispatchInfo);
104104
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);
@@ -151,7 +151,7 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenCopyingBufferThenHeapsAndCommandBufferCo
151151
auto usedAfterSSH = ssh.getUsed();
152152

153153
auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_COPY_BUFFER, CsrDependencies(), false, false,
154-
false, *pCmdQ, multiDispatchInfo);
154+
false, *pCmdQ, multiDispatchInfo, false);
155155
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredDSH(multiDispatchInfo);
156156
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredIOH(multiDispatchInfo);
157157
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);
@@ -205,7 +205,7 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenReadingBufferNonBlockingThenHeapsAndComm
205205
auto usedAfterSSH = ssh.getUsed();
206206

207207
auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_READ_BUFFER, CsrDependencies(), false, false,
208-
false, *pCmdQ, multiDispatchInfo);
208+
false, *pCmdQ, multiDispatchInfo, false);
209209
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredDSH(multiDispatchInfo);
210210
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredIOH(multiDispatchInfo);
211211
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);
@@ -260,7 +260,7 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenReadingBufferBlockingThenThenHeapsAndCom
260260
auto usedAfterSSH = ssh.getUsed();
261261

262262
auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_READ_BUFFER, CsrDependencies(), false, false,
263-
false, *pCmdQ, multiDispatchInfo);
263+
false, *pCmdQ, multiDispatchInfo, false);
264264
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredDSH(multiDispatchInfo);
265265
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredIOH(multiDispatchInfo);
266266
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);
@@ -315,7 +315,7 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenWritingBufferNonBlockingThenHeapsAndComm
315315
auto usedAfterSSH = ssh.getUsed();
316316

317317
auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_WRITE_BUFFER, CsrDependencies(), false, false,
318-
false, *pCmdQ, multiDispatchInfo);
318+
false, *pCmdQ, multiDispatchInfo, false);
319319
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredDSH(multiDispatchInfo);
320320
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredIOH(multiDispatchInfo);
321321
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);
@@ -367,7 +367,7 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenWritingBufferBlockingThenHeapsAndCommand
367367
auto usedAfterSSH = ssh.getUsed();
368368

369369
auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_WRITE_BUFFER, CsrDependencies(), false, false,
370-
false, *pCmdQ, multiDispatchInfo);
370+
false, *pCmdQ, multiDispatchInfo, false);
371371
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredDSH(multiDispatchInfo);
372372
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredIOH(multiDispatchInfo);
373373
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);

opencl/test/unit_test/execution_model/parent_kernel_dispatch_tests.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandStreamFixture, GivenDispatchInfoW
223223
size_t totalKernelSize = alignUp(numOfKernels * size, MemoryConstants::pageSize);
224224

225225
LinearStream &commandStream = getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, CsrDependencies(), false, false,
226-
false, multiDispatchInfo, nullptr, 0);
226+
false, multiDispatchInfo, nullptr, 0, false);
227227

228228
EXPECT_LT(totalKernelSize, commandStream.getMaxAvailableSpace());
229229

opencl/test/unit_test/execution_model/scheduler_dispatch_tests.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, WhenDispatchingSched
5858
MultiDispatchInfo multiDispatchinfo(&scheduler);
5959
LinearStream &commandStream = getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, CsrDependencies(),
6060
false, false, false, multiDispatchinfo,
61-
nullptr, 0);
61+
nullptr, 0, false);
6262
pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, minRequiredSizeForSchedulerSSH);
6363

6464
GpgpuWalkerHelper<FamilyType>::dispatchScheduler(
@@ -174,7 +174,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, WhenDispatchingSched
174174

175175
MultiDispatchInfo multiDispatchinfo(&scheduler);
176176
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, CsrDependencies(), false, false, false, multiDispatchinfo,
177-
nullptr, 0);
177+
nullptr, 0, false);
178178
pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, minRequiredSizeForSchedulerSSH);
179179

180180
GpgpuWalkerHelper<FamilyType>::dispatchScheduler(
@@ -209,7 +209,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, GivenEarlyReturnSet
209209
MultiDispatchInfo multiDispatchinfo(&scheduler);
210210
LinearStream &commandStream = getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, CsrDependencies(),
211211
false, false, false, multiDispatchinfo,
212-
nullptr, 0);
212+
nullptr, 0, false);
213213
pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, minRequiredSizeForSchedulerSSH);
214214

215215
GpgpuWalkerHelper<FamilyType>::dispatchScheduler(

opencl/test/unit_test/gen8/scheduler_dispatch_tests_gen8.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ BDWTEST_F(BdwSchedulerTest, givenCallToDispatchSchedulerWhenPipeControlWithCSSta
3737
MultiDispatchInfo multiDispatchinfo(&scheduler);
3838
LinearStream &commandStream = getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, CsrDependencies(),
3939
false, false, false, multiDispatchinfo,
40-
nullptr, 0);
40+
nullptr, 0, false);
4141
pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, minRequiredSizeForSchedulerSSH);
4242

4343
GpgpuWalkerHelper<FamilyType>::dispatchScheduler(

0 commit comments

Comments
 (0)