Skip to content

Commit ca39301

Browse files
Reserve SSH space for bindless mode
Related-To: NEO-4767 Change-Id: Id7876ea5a5a4fc12ab0b3192548b006fc1eea75c Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
1 parent b3813d9 commit ca39301

File tree

12 files changed

+148
-42
lines changed

12 files changed

+148
-42
lines changed

opencl/source/command_queue/hardware_interface_base.inl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,9 @@ void HardwareInterface<GfxFamily>::obtainIndirectHeaps(CommandQueue &commandQueu
257257
} else {
258258
if (parentKernel && (commandQueue.getIndirectHeap(IndirectHeap::SURFACE_STATE, 0).getUsed() > 0)) {
259259
commandQueue.releaseIndirectHeap(IndirectHeap::SURFACE_STATE);
260+
// clean reserved bindless offsets
261+
ssh = &getIndirectHeap<GfxFamily, IndirectHeap::SURFACE_STATE>(commandQueue, multiDispatchInfo);
262+
ssh->replaceBuffer(ssh->getCpuBase(), ssh->getMaxAvailableSpace());
260263
}
261264
dsh = &getIndirectHeap<GfxFamily, IndirectHeap::DYNAMIC_STATE>(commandQueue, multiDispatchInfo);
262265
ioh = &getIndirectHeap<GfxFamily, IndirectHeap::INDIRECT_OBJECT>(commandQueue, multiDispatchInfo);

opencl/test/unit_test/command_stream/command_stream_receiver_tests.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,20 @@ HWTEST_F(CommandStreamReceiverTest, givenCsrWhenAllocateHeapMemoryIsCalledThenHe
251251
delete dsh;
252252
}
253253

254+
HWTEST_F(CommandStreamReceiverTest, givenSurfaceStateHeapTypeWhenAllocateHeapMemoryIsCalledThenSSHHasInitialSpaceReserevedForBindlessOffsets) {
255+
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
256+
IndirectHeap *ssh = nullptr;
257+
csr.allocateHeapMemory(IndirectHeap::SURFACE_STATE, 4096u, ssh);
258+
EXPECT_NE(nullptr, ssh);
259+
ASSERT_NE(nullptr, ssh->getGraphicsAllocation());
260+
261+
auto sshReservedSize = UnitTestHelper<FamilyType>::getDefaultSshUsage();
262+
EXPECT_EQ(sshReservedSize, ssh->getUsed());
263+
264+
csr.getMemoryManager()->freeGraphicsMemory(ssh->getGraphicsAllocation());
265+
delete ssh;
266+
}
267+
254268
TEST(CommandStreamReceiverSimpleTest, givenCsrWithoutTagAllocationWhenGetTagAllocationIsCalledThenNullptrIsReturned) {
255269
MockExecutionEnvironment executionEnvironment;
256270
executionEnvironment.prepareRootDeviceEnvironments(1);

opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,25 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu
282282
}
283283
}
284284

285+
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelAndNotUsedSSHWhenEnqueuedThenSSHIsNotReallocated) {
286+
if (std::string(pPlatform->getClDevice(0)->getDeviceInfo().clVersion).find("OpenCL 2.") != std::string::npos) {
287+
288+
const size_t globalOffsets[3] = {0, 0, 0};
289+
const size_t workItems[3] = {1, 1, 1};
290+
291+
pKernel->createReflectionSurface();
292+
MockMultiDispatchInfo multiDispatchInfo(pKernel);
293+
294+
auto ssh = &getIndirectHeap<FamilyType, IndirectHeap::SURFACE_STATE>(*pCmdQ, multiDispatchInfo);
295+
ssh->replaceBuffer(ssh->getCpuBase(), ssh->getMaxAvailableSpace());
296+
297+
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
298+
auto ssh2 = &getIndirectHeap<FamilyType, IndirectHeap::SURFACE_STATE>(*pCmdQ, multiDispatchInfo);
299+
EXPECT_EQ(ssh, ssh2);
300+
EXPECT_EQ(ssh->getGraphicsAllocation(), ssh2->getGraphicsAllocation());
301+
}
302+
}
303+
285304
HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueuedThenBlocksSurfaceStatesAreCopied) {
286305
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
287306
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
@@ -307,10 +326,10 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu
307326
// will be coies
308327
ssh->align(BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
309328

310-
// mark the assumed place for surface states
311-
size_t parentSshOffset = ssh->getUsed();
312-
313329
pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr);
330+
// mark the assumed place for surface states
331+
size_t parentSshOffset = 0;
332+
ssh = &getIndirectHeap<FamilyType, IndirectHeap::SURFACE_STATE>(*pCmdQ, multiDispatchInfo);
314333

315334
void *blockSSH = ptrOffset(ssh->getCpuBase(), parentSshOffset + parentKernelSSHSize); // note : unaligned at this point
316335

opencl/test/unit_test/execution_model/parent_kernel_dispatch_tests.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, MockParentKernelDispatch, GivenUsedSSHHeapWhenParent
371371
nullptr,
372372
CL_COMMAND_NDRANGE_KERNEL);
373373

374-
EXPECT_EQ(UnitTestHelper<FamilyType>::getDefaultSshUsage(), ssh.getUsed());
374+
EXPECT_EQ(0u, ssh.getUsed());
375375

376376
delete mockParentKernel;
377377
}

opencl/test/unit_test/execution_model/submit_blocked_parent_kernel_tests.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWh
297297
delete parentKernel;
298298
}
299299

300-
HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenUsedCommandQueueHeapshenParentKernelIsSubmittedThenQueueHeapsAreNotUsed) {
300+
HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenUsedCommandQueueHeapsWhenParentKernelIsSubmittedThenQueueHeapsAreNotUsed) {
301301
REQUIRE_DEVICE_ENQUEUE_OR_SKIP(device);
302302

303303
cl_queue_properties properties[3] = {0};
@@ -330,6 +330,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenUsedCommandQue
330330
queueDsh.getSpace(usedSize);
331331
queueIoh.getSpace(usedSize);
332332

333+
auto intialSshUsed = queueSsh.getUsed();
334+
333335
auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()});
334336
auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
335337
*pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
@@ -345,7 +347,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenUsedCommandQue
345347
EXPECT_FALSE(cmdQ.releaseIndirectHeapCalled);
346348
EXPECT_EQ(usedSize, queueDsh.getUsed());
347349
EXPECT_EQ(usedSize, queueIoh.getUsed());
348-
EXPECT_EQ(usedSize, queueSsh.getUsed());
350+
EXPECT_EQ(intialSshUsed, queueSsh.getUsed());
349351

350352
delete cmdComputeKernel;
351353
delete parentKernel;
@@ -372,8 +374,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenNotUsedSSHWhen
372374
pCmdQ->allocateHeapMemory(IndirectHeap::SURFACE_STATE, sshSize, ssh);
373375
dsh->getSpace(mockDevQueue.getDshOffset());
374376

375-
EXPECT_EQ(0u, ssh->getUsed());
376-
377377
pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, sshSize);
378378

379379
void *sshBuffer = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getCpuBase();

opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -614,6 +614,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenSendingIndirectStateThenBi
614614
auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 8192);
615615
auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::SURFACE_STATE, 8192);
616616

617+
auto sshUsed = ssh.getUsed();
618+
617619
// Obtain where the pointers will be stored
618620
const auto &kernelInfo = kernel->getKernelInfo();
619621
auto numSurfaceStates = kernelInfo.patchInfo.statelessGlobalMemObjKernelArgs.size() +
@@ -648,8 +650,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenSendingIndirectStateThenBi
648650
nullptr,
649651
true);
650652

651-
EXPECT_EQ(0x00000000u, *(&bindingTableStatesPointers[0]));
652-
EXPECT_EQ(0x00000040u, *(&bindingTableStatesPointers[1]));
653+
EXPECT_EQ(sshUsed + 0x00000000u, *(&bindingTableStatesPointers[0]));
654+
EXPECT_EQ(sshUsed + 0x00000040u, *(&bindingTableStatesPointers[1]));
653655
}
654656

655657
HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenGettingBindingTableStateThenSurfaceStatePointersAreCorrect) {

opencl/test/unit_test/helpers/unit_test_helper.inl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ bool UnitTestHelper<GfxFamily>::isExpectMemoryNotEqualSupported() {
3737

3838
template <typename GfxFamily>
3939
uint32_t UnitTestHelper<GfxFamily>::getDefaultSshUsage() {
40-
return 0;
40+
return sizeof(typename GfxFamily::RENDER_SURFACE_STATE);
4141
}
4242

4343
template <typename GfxFamily>

shared/source/command_container/cmdcontainer.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "shared/source/device/device.h"
1414
#include "shared/source/helpers/debug_helpers.h"
1515
#include "shared/source/helpers/heap_helper.h"
16+
#include "shared/source/helpers/hw_helper.h"
1617
#include "shared/source/indirect_heap/indirect_heap.h"
1718
#include "shared/source/memory_manager/memory_manager.h"
1819

@@ -84,6 +85,8 @@ bool CommandContainer::initialize(Device *device) {
8485

8586
instructionHeapBaseAddress = device->getMemoryManager()->getInternalHeapBaseAddress(device->getRootDeviceIndex(), allocationIndirectHeaps[IndirectHeap::Type::INDIRECT_OBJECT]->isAllocatedInLocalMemoryPool());
8687

88+
reserveBindlessOffsets(*indirectHeaps[IndirectHeap::Type::SURFACE_STATE]);
89+
8790
iddBlock = nullptr;
8891
nextIddInBlock = this->getNumIddPerBlock();
8992

@@ -123,6 +126,8 @@ void CommandContainer::reset() {
123126
indirectHeap->getMaxAvailableSpace());
124127
addToResidencyContainer(indirectHeap->getGraphicsAllocation());
125128
}
129+
130+
reserveBindlessOffsets(*indirectHeaps[HeapType::SURFACE_STATE]);
126131
}
127132

128133
void *CommandContainer::getHeapSpaceAllowGrow(HeapType heapType,
@@ -172,11 +177,15 @@ IndirectHeap *CommandContainer::getHeapWithRequiredSizeAndAlignment(HeapType hea
172177
getDeallocationContainer().push_back(oldAlloc);
173178
setIndirectHeapAllocation(heapType, newAlloc);
174179
setHeapDirty(heapType);
180+
if (heapType == HeapType::SURFACE_STATE) {
181+
reserveBindlessOffsets(*indirectHeap);
182+
}
175183
}
176184

177185
if (alignment) {
178186
indirectHeap->align(alignment);
179187
}
188+
180189
return indirectHeap;
181190
}
182191

@@ -201,4 +210,11 @@ void CommandContainer::allocateNextCommandBuffer() {
201210
addToResidencyContainer(cmdBufferAllocation);
202211
}
203212

213+
void CommandContainer::reserveBindlessOffsets(IndirectHeap &sshHeap) {
214+
UNRECOVERABLE_IF(sshHeap.getUsed() > 0);
215+
auto &helper = HwHelper::get(getDevice()->getHardwareInfo().platform.eRenderCoreFamily);
216+
auto surfaceStateSize = helper.getRenderSurfaceStateSize();
217+
sshHeap.getSpace(surfaceStateSize);
218+
}
219+
204220
} // namespace NEO

shared/source/command_container/cmdcontainer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ class CommandContainer : public NonCopyableOrMovableClass {
9494
uint32_t getNumIddPerBlock() const { return numIddsPerBlock; }
9595

9696
protected:
97+
void reserveBindlessOffsets(IndirectHeap &sshHeap);
9798
void *iddBlock = nullptr;
9899
Device *device = nullptr;
99100
std::unique_ptr<HeapHelper> heapHelper;

shared/source/command_stream/scratch_space_controller_base.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,11 @@ uint64_t ScratchSpaceControllerBase::getScratchPatchAddress() {
7373
}
7474

7575
void ScratchSpaceControllerBase::reserveHeap(IndirectHeap::Type heapType, IndirectHeap *&indirectHeap) {
76+
if (heapType == IndirectHeap::SURFACE_STATE) {
77+
auto &hwHelper = HwHelper::get(executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->platform.eRenderCoreFamily);
78+
auto surfaceStateSize = hwHelper.getRenderSurfaceStateSize();
79+
indirectHeap->getSpace(surfaceStateSize);
80+
}
7681
}
7782

7883
} // namespace NEO

0 commit comments

Comments
 (0)