Skip to content

Commit e1bcad5

Browse files
Fix makeNonResident for csr residency allocations
Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
1 parent b363a4f commit e1bcad5

File tree

13 files changed

+72
-40
lines changed

13 files changed

+72
-40
lines changed

level_zero/core/source/cmdqueue/cmdqueue.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ void CommandQueueImp::submitBatchBuffer(size_t offset, NEO::ResidencyContainer &
6262
NEO::QueueThrottle::HIGH, NEO::QueueSliceCount::defaultSliceCount,
6363
commandStream->getUsed(), commandStream, endingCmdPtr, false);
6464

65-
csr->submitBatchBuffer(batchBuffer, residencyContainer);
65+
csr->submitBatchBuffer(batchBuffer, csr->getResidencyAllocations());
6666
buffers.setCurrentFlushStamp(csr->obtainCurrentFlushStamp());
6767
}
6868

level_zero/core/source/cmdqueue/cmdqueue_hw.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,7 @@ struct CommandQueueHw : public CommandQueueImp {
4646
size_t estimatePipelineSelect();
4747
void programPipelineSelect(NEO::LinearStream &commandStream);
4848

49-
MOCKABLE_VIRTUAL void handleScratchSpace(NEO::ResidencyContainer &residency,
50-
NEO::HeapContainer &heapContainer,
49+
MOCKABLE_VIRTUAL void handleScratchSpace(NEO::HeapContainer &heapContainer,
5150
NEO::ScratchSpaceController *scratchController,
5251
bool &gsbaState, bool &frontEndState,
5352
uint32_t perThreadScratchSpaceSize);

level_zero/core/source/cmdqueue/cmdqueue_hw.inl

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -183,13 +183,12 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
183183

184184
spaceForResidency += residencyContainerSpaceForTagWrite;
185185

186-
residencyContainer.reserve(spaceForResidency);
186+
csr->getResidencyAllocations().reserve(spaceForResidency);
187187

188188
auto scratchSpaceController = csr->getScratchSpaceController();
189189
bool gsbaStateDirty = false;
190190
bool frontEndStateDirty = false;
191-
handleScratchSpace(residencyContainer,
192-
heapContainer,
191+
handleScratchSpace(heapContainer,
193192
scratchSpaceController,
194193
gsbaStateDirty, frontEndStateDirty,
195194
perThreadScratchSpaceSize);
@@ -223,26 +222,26 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
223222

224223
const auto globalFenceAllocation = csr->getGlobalFenceAllocation();
225224
if (globalFenceAllocation) {
226-
residencyContainer.push_back(globalFenceAllocation);
225+
csr->makeResident(*globalFenceAllocation);
227226
}
228227
const auto workPartitionAllocation = csr->getWorkPartitionAllocation();
229228
if (workPartitionAllocation) {
230-
residencyContainer.push_back(workPartitionAllocation);
229+
csr->makeResident(*workPartitionAllocation);
231230
}
232231

233232
if (NEO::DebugManager.flags.EnableSWTags.get()) {
234233
NEO::SWTagsManager *tagsManager = neoDevice->getRootDeviceEnvironment().tagsManager.get();
235234
UNRECOVERABLE_IF(tagsManager == nullptr);
236-
residencyContainer.push_back(tagsManager->getBXMLHeapAllocation());
237-
residencyContainer.push_back(tagsManager->getSWTagHeapAllocation());
235+
csr->makeResident(*tagsManager->getBXMLHeapAllocation());
236+
csr->makeResident(*tagsManager->getSWTagHeapAllocation());
238237
tagsManager->insertBXMLHeapAddress<GfxFamily>(child);
239238
tagsManager->insertSWTagHeapAddress<GfxFamily>(child);
240239
}
241240

242241
csr->programHardwareContext(child);
243242

244243
if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger()) {
245-
residencyContainer.push_back(device->getL0Debugger()->getSbaTrackingBuffer(csr->getOsContext().getContextId()));
244+
csr->makeResident(*device->getL0Debugger()->getSbaTrackingBuffer(csr->getOsContext().getContextId()));
246245
}
247246

248247
if (!isCopyOnlyCommandQueue) {
@@ -281,17 +280,17 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
281280
(neoDevice->getDebugger() != nullptr && NEO::Debugger::isDebugEnabled(internalUsage));
282281

283282
if (devicePreemption == NEO::PreemptionMode::MidThread) {
284-
residencyContainer.push_back(csr->getPreemptionAllocation());
283+
csr->makeResident(*csr->getPreemptionAllocation());
285284
}
286285

287286
if (sipKernelUsed) {
288287
auto sipIsa = NEO::SipKernel::getSipKernel(*neoDevice).getSipAllocation();
289-
residencyContainer.push_back(sipIsa);
288+
csr->makeResident(*sipIsa);
290289
}
291290

292291
if (NEO::Debugger::isDebugEnabled(internalUsage) && neoDevice->getDebugger()) {
293292
UNRECOVERABLE_IF(device->getDebugSurface() == nullptr);
294-
residencyContainer.push_back(device->getDebugSurface());
293+
csr->makeResident(*device->getDebugSurface());
295294
}
296295
}
297296

@@ -355,9 +354,9 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
355354
commandList->getPrintfFunctionContainer().end());
356355

357356
for (auto alloc : commandList->commandContainer.getResidencyContainer()) {
358-
if (residencyContainer.end() ==
359-
std::find(residencyContainer.begin(), residencyContainer.end(), alloc)) {
360-
residencyContainer.push_back(alloc);
357+
if (csr->getResidencyAllocations().end() ==
358+
std::find(csr->getResidencyAllocations().begin(), csr->getResidencyAllocations().end(), alloc)) {
359+
csr->makeResident(*alloc);
361360

362361
if (performMigration) {
363362
if (alloc &&
@@ -385,7 +384,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
385384
commandQueuePreemptionMode = statePreemption;
386385

387386
if (hFence) {
388-
residencyContainer.push_back(&fence->getAllocation());
387+
csr->makeResident(fence->getAllocation());
389388
if (isCopyOnlyCommandQueue) {
390389
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(child, fence->getGpuAddress(), Fence::STATE_SIGNALED, false, true);
391390
} else {
@@ -401,7 +400,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
401400

402401
dispatchTaskCountWrite(child, true);
403402

404-
residencyContainer.push_back(csr->getTagAllocation());
403+
csr->makeResident(*csr->getTagAllocation());
405404
void *endingCmd = nullptr;
406405
if (directSubmissionEnabled) {
407406
endingCmd = child.getSpace(0);
@@ -417,17 +416,16 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
417416
memset(paddingPtr, 0, padding);
418417
}
419418

420-
submitBatchBuffer(ptrDiff(child.getCpuBase(), commandStream->getCpuBase()), residencyContainer, endingCmd);
419+
submitBatchBuffer(ptrDiff(child.getCpuBase(), commandStream->getCpuBase()), csr->getResidencyAllocations(), endingCmd);
421420

422421
this->taskCount = csr->peekTaskCount();
423422

424-
csr->makeSurfacePackNonResident(residencyContainer);
423+
csr->makeSurfacePackNonResident(csr->getResidencyAllocations());
425424

426425
if (getSynchronousMode() == ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS) {
427426
this->synchronize(std::numeric_limits<uint64_t>::max());
428427
}
429428

430-
this->residencyContainer.clear();
431429
this->heapContainer.clear();
432430

433431
return ZE_RESULT_SUCCESS;

level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,7 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateStateBaseAddressCmdSize() {
104104
}
105105

106106
template <GFXCORE_FAMILY gfxCoreFamily>
107-
void CommandQueueHw<gfxCoreFamily>::handleScratchSpace(NEO::ResidencyContainer &residency,
108-
NEO::HeapContainer &heapContainer,
107+
void CommandQueueHw<gfxCoreFamily>::handleScratchSpace(NEO::HeapContainer &heapContainer,
109108
NEO::ScratchSpaceController *scratchController,
110109
bool &gsbaState, bool &frontEndState,
111110
uint32_t perThreadScratchSpaceSize) {
@@ -114,7 +113,7 @@ void CommandQueueHw<gfxCoreFamily>::handleScratchSpace(NEO::ResidencyContainer &
114113
scratchController->setRequiredScratchSpace(nullptr, 0u, perThreadScratchSpaceSize, 0u, csr->peekTaskCount(),
115114
csr->getOsContext(), gsbaState, frontEndState);
116115
auto scratchAllocation = scratchController->getScratchSpaceAllocation();
117-
residency.push_back(scratchAllocation);
116+
csr->makeResident(*scratchAllocation);
118117
}
119118
}
120119

level_zero/core/source/cmdqueue/cmdqueue_imp.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,6 @@ struct CommandQueueImp : public CommandQueue {
9696
std::vector<Kernel *> printfFunctionContainer;
9797
bool gpgpuEnabled = false;
9898
CommandBufferManager buffers;
99-
NEO::ResidencyContainer residencyContainer;
10099
NEO::HeapContainer heapContainer;
101100
};
102101

level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ struct WhiteBox<::L0::CommandQueue> : public ::L0::CommandQueueImp {
2525
using BaseClass::device;
2626
using BaseClass::preemptionCmdSyncProgramming;
2727
using BaseClass::printfFunctionContainer;
28+
using BaseClass::submitBatchBuffer;
2829
using BaseClass::synchronizeByPollingForTaskCount;
2930
using CommandQueue::commandQueuePreemptionMode;
3031
using CommandQueue::internalUsage;
@@ -85,6 +86,7 @@ struct MockCommandQueueHw : public L0::CommandQueueHw<gfxCoreFamily> {
8586
using BaseClass::printfFunctionContainer;
8687
using L0::CommandQueue::internalUsage;
8788
using L0::CommandQueue::preemptionCmdSyncProgramming;
89+
using L0::CommandQueueImp::csr;
8890

8991
MockCommandQueueHw(L0::Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) : L0::CommandQueueHw<gfxCoreFamily>(device, csr, desc) {
9092
}

level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,24 @@ HWTEST_F(CommandQueueCreate, given100CmdListsWhenExecutingThenCommandStreamIsNot
217217
commandQueue->destroy();
218218
}
219219

220+
HWTEST_F(CommandQueueCreate, givenContainerWithAllocationsWhenResidencyContainerIsEmptyThenMakeResidentWasNotCalled) {
221+
auto csr = std::make_unique<MockCommandStreamReceiver>(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
222+
csr->setupContext(*neoDevice->getDefaultEngine().osContext);
223+
const ze_command_queue_desc_t desc = {};
224+
ze_result_t returnValue;
225+
auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
226+
device,
227+
csr.get(),
228+
&desc,
229+
false,
230+
false,
231+
returnValue));
232+
ResidencyContainer container;
233+
commandQueue->submitBatchBuffer(0, container, nullptr);
234+
EXPECT_EQ(csr->makeResidentCalledTimes, 0u);
235+
commandQueue->destroy();
236+
}
237+
220238
TEST_F(CommandQueueCreate, whenCommandQueueCreatedThenExpectLinearStreamInitializedWithExpectedSize) {
221239
const ze_command_queue_desc_t desc = {};
222240
ze_result_t returnValue;
@@ -580,7 +598,19 @@ HWTEST_F(CommandQueueCommandsSingleTile, givenCommandQueueWhenExecutingCommandLi
580598
}
581599

582600
HWTEST_F(CommandQueueCommandsMultiTile, givenCommandQueueOnMultiTileWhenExecutingCommandListsThenWorkPartitionAllocationIsMadeResident) {
583-
MockCsrHw2<FamilyType> csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
601+
class MyCsrMock : public MockCsrHw2<FamilyType> {
602+
using MockCsrHw2<FamilyType>::MockCsrHw2;
603+
604+
public:
605+
void makeResident(GraphicsAllocation &graphicsAllocation) override {
606+
if (expectedGa == &graphicsAllocation) {
607+
expectedGAWasMadeResident = true;
608+
}
609+
}
610+
GraphicsAllocation *expectedGa = nullptr;
611+
bool expectedGAWasMadeResident = false;
612+
};
613+
MyCsrMock csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
584614
csr.initializeTagAllocation();
585615
csr.createWorkPartitionAllocation(*neoDevice);
586616
csr.setupContext(*neoDevice->getDefaultEngine().osContext);
@@ -597,12 +627,13 @@ HWTEST_F(CommandQueueCommandsMultiTile, givenCommandQueueOnMultiTileWhenExecutin
597627

598628
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, returnValue));
599629
auto commandListHandle = commandList->toHandle();
630+
auto workPartitionAllocation = csr.getWorkPartitionAllocation();
631+
csr.expectedGa = workPartitionAllocation;
600632
auto status = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
601633
EXPECT_EQ(status, ZE_RESULT_SUCCESS);
602634

603-
auto workPartitionAllocation = csr.getWorkPartitionAllocation();
604635
ASSERT_NE(nullptr, workPartitionAllocation);
605-
EXPECT_TRUE(isAllocationInResidencyContainer(csr, workPartitionAllocation));
636+
EXPECT_TRUE(csr.expectedGAWasMadeResident);
606637

607638
commandQueue->destroy();
608639
}
@@ -877,12 +908,11 @@ class MockCommandQueue : public L0::CommandQueueHw<gfxCoreFamily> {
877908
MockCommandQueue(L0::Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) : L0::CommandQueueHw<gfxCoreFamily>(device, csr, desc) {}
878909
using BaseClass = ::L0::CommandQueueHw<gfxCoreFamily>;
879910

911+
using BaseClass::csr;
880912
using BaseClass::heapContainer;
881-
using BaseClass::residencyContainer;
882913

883914
NEO::HeapContainer mockHeapContainer;
884-
void handleScratchSpace(NEO::ResidencyContainer &residency,
885-
NEO::HeapContainer &heapContainer,
915+
void handleScratchSpace(NEO::HeapContainer &heapContainer,
886916
NEO::ScratchSpaceController *scratchController,
887917
bool &gsbaState, bool &frontEndState,
888918
uint32_t perThreadScratchSpaceSize) override {
@@ -966,7 +996,7 @@ HWTEST2_F(ExecuteCommandListTests, givenExecuteCommandListWhenItReturnsThenConta
966996

967997
commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
968998

969-
EXPECT_EQ(0u, commandQueue->residencyContainer.size());
999+
EXPECT_EQ(0u, commandQueue->csr->getResidencyAllocations().size());
9701000
EXPECT_EQ(0u, commandQueue->heapContainer.size());
9711001

9721002
commandQueue->destroy();

opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -552,7 +552,7 @@ struct MockScratchController : public ScratchSpaceController {
552552
OsContext &osContext,
553553
bool &stateBaseAddressDirty,
554554
bool &vfeStateDirty,
555-
NEO::ResidencyContainer &residency) override {
555+
NEO::CommandStreamReceiver *csr) override {
556556
}
557557
void reserveHeap(IndirectHeap::Type heapType, IndirectHeap *&indirectHeap) override{};
558558
};

shared/source/command_stream/scratch_space_controller.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ class InternalAllocationStorage;
2020
class MemoryManager;
2121
struct HardwareInfo;
2222
class OsContext;
23+
class CommandStreamReceiver;
2324

2425
namespace ScratchSpaceConstants {
2526
constexpr size_t scratchSpaceOffsetFor64Bit = 4096u;
@@ -69,7 +70,7 @@ class ScratchSpaceController {
6970
OsContext &osContext,
7071
bool &stateBaseAddressDirty,
7172
bool &vfeStateDirty,
72-
ResidencyContainer &residency) = 0;
73+
CommandStreamReceiver *csr) = 0;
7374

7475
protected:
7576
MemoryManager *getMemoryManager() const;

shared/source/command_stream/scratch_space_controller_base.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,6 @@ void ScratchSpaceControllerBase::programBindlessSurfaceStateForScratch(BindlessH
9898
OsContext &osContext,
9999
bool &stateBaseAddressDirty,
100100
bool &vfeStateDirty,
101-
ResidencyContainer &residency) {
101+
NEO::CommandStreamReceiver *csr) {
102102
}
103103
} // namespace NEO

0 commit comments

Comments
 (0)