Skip to content

Commit 4010ff6

Browse files
performance: Use tag allocator for fill pattern
Resolves: NEO-9729 Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
1 parent 0ff9f65 commit 4010ff6

File tree

13 files changed

+143
-35
lines changed

13 files changed

+143
-35
lines changed

level_zero/core/source/cmdlist/cmdlist.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,17 @@ void CommandList::removeMemoryPrefetchAllocations() {
8888
}
8989
}
9090

91+
void CommandList::storeFillPatternResourcesForReuse() {
92+
for (auto &patternAlloc : this->patternAllocations) {
93+
device->storeReusableAllocation(*patternAlloc);
94+
}
95+
this->patternAllocations.clear();
96+
for (auto &patternTag : this->patternTags) {
97+
patternTag->returnTag();
98+
}
99+
this->patternTags.clear();
100+
}
101+
91102
NEO::GraphicsAllocation *CommandList::getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize, bool copyOffload) {
92103
auto allocation = hostPtrMap.lower_bound(buffer);
93104
if (allocation != hostPtrMap.end()) {

level_zero/core/source/cmdlist/cmdlist.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ static_assert(IsCompliantWithDdiHandlesExt<_ze_command_list_handle_t>);
3737

3838
namespace NEO {
3939
class ScratchSpaceController;
40+
class TagNodeBase;
4041
struct EncodeDispatchKernelArgs;
4142
} // namespace NEO
4243

@@ -279,6 +280,7 @@ struct CommandList : _ze_command_list_handle_t {
279280
void removeDeallocationContainerData();
280281
void removeHostPtrAllocations();
281282
void removeMemoryPrefetchAllocations();
283+
void storeFillPatternResourcesForReuse();
282284
void eraseDeallocationContainerEntry(NEO::GraphicsAllocation *allocation);
283285
void eraseResidencyContainerEntry(NEO::GraphicsAllocation *allocation);
284286
bool isCopyOnly(bool copyOffloadOperation) const {
@@ -458,6 +460,7 @@ struct CommandList : _ze_command_list_handle_t {
458460
std::map<const void *, NEO::GraphicsAllocation *> hostPtrMap;
459461
NEO::PrivateAllocsToReuseContainer ownedPrivateAllocations;
460462
std::vector<NEO::GraphicsAllocation *> patternAllocations;
463+
std::vector<NEO::TagNodeBase *> patternTags;
461464
std::vector<std::weak_ptr<Kernel>> printfKernelContainer;
462465

463466
NEO::CommandContainer commandContainer;

level_zero/core/source/cmdlist/cmdlist_hw.inl

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,7 @@ CommandListCoreFamily<gfxCoreFamily>::~CommandListCoreFamily() {
8484
device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(alloc.second);
8585
}
8686
this->ownedPrivateAllocations.clear();
87-
for (auto &patternAlloc : this->patternAllocations) {
88-
device->storeReusableAllocation(*patternAlloc);
89-
}
90-
this->patternAllocations.clear();
87+
this->storeFillPatternResourcesForReuse();
9188
}
9289

9390
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -112,11 +109,7 @@ void CommandListCoreFamily<gfxCoreFamily>::postInitComputeSetup() {
112109

113110
template <GFXCORE_FAMILY gfxCoreFamily>
114111
ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
115-
for (auto &patternAlloc : this->patternAllocations) {
116-
device->storeReusableAllocation(*patternAlloc);
117-
}
118-
this->patternAllocations.clear();
119-
112+
this->storeFillPatternResourcesForReuse();
120113
removeDeallocationContainerData();
121114
removeHostPtrAllocations();
122115
removeMemoryPrefetchAllocations();
@@ -2476,18 +2469,30 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
24762469
} else {
24772470
builtinKernel->setGroupSize(static_cast<uint32_t>(fillArguments.mainGroupSize), 1, 1);
24782471

2472+
NEO::GraphicsAllocation *patternGfxAlloc = nullptr;
2473+
void *patternGfxAllocPtr = nullptr;
24792474
size_t patternAllocationSize = alignUp(patternSize, MemoryConstants::cacheLineSize);
2480-
auto patternGfxAlloc = device->obtainReusableAllocation(patternAllocationSize, NEO::AllocationType::fillPattern);
2481-
if (patternGfxAlloc == nullptr) {
2482-
NEO::AllocationProperties allocationProperties{device->getNEODevice()->getRootDeviceIndex(),
2483-
patternAllocationSize,
2484-
NEO::AllocationType::fillPattern,
2485-
device->getNEODevice()->getDeviceBitfield()};
2486-
allocationProperties.alignment = MemoryConstants::pageSize;
2487-
patternGfxAlloc = device->getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties);
2475+
2476+
if (patternAllocationSize > MemoryConstants::cacheLineSize) {
2477+
patternGfxAlloc = device->obtainReusableAllocation(patternAllocationSize, NEO::AllocationType::fillPattern);
2478+
if (patternGfxAlloc == nullptr) {
2479+
NEO::AllocationProperties allocationProperties{device->getNEODevice()->getRootDeviceIndex(),
2480+
patternAllocationSize,
2481+
NEO::AllocationType::fillPattern,
2482+
device->getNEODevice()->getDeviceBitfield()};
2483+
allocationProperties.alignment = MemoryConstants::pageSize;
2484+
patternGfxAlloc = device->getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties);
2485+
}
2486+
patternGfxAllocPtr = patternGfxAlloc->getUnderlyingBuffer();
2487+
patternAllocations.push_back(patternGfxAlloc);
2488+
} else {
2489+
auto patternTag = device->getFillPatternAllocator()->getTag();
2490+
patternGfxAllocPtr = patternTag->getCpuBase();
2491+
patternGfxAlloc = patternTag->getBaseGraphicsAllocation()->getGraphicsAllocation(device->getRootDeviceIndex());
2492+
this->patternTags.push_back(patternTag);
2493+
commandContainer.addToResidencyContainer(patternGfxAlloc);
24882494
}
2489-
void *patternGfxAllocPtr = patternGfxAlloc->getUnderlyingBuffer();
2490-
patternAllocations.push_back(patternGfxAlloc);
2495+
24912496
uint64_t patternAllocPtr = reinterpret_cast<uintptr_t>(patternGfxAllocPtr);
24922497
uint64_t patternAllocOffset = 0;
24932498
uint64_t patternSizeToCopy = patternSize;

level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1198,10 +1198,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint6
11981198
inOrderExecInfo->releaseNotUsedTempTimestampNodes(false);
11991199
}
12001200

1201-
for (auto &patternAlloc : this->patternAllocations) {
1202-
this->device->storeReusableAllocation(*patternAlloc);
1203-
}
1204-
this->patternAllocations.clear();
1201+
this->storeFillPatternResourcesForReuse();
12051202
}
12061203

12071204
bool hangDetected = status == ZE_RESULT_ERROR_DEVICE_LOST;

level_zero/core/source/device/device.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
#include "shared/source/device/device.h"
1111
#include "shared/source/helpers/aligned_memory.h"
12+
#include "shared/source/helpers/fill_pattern_tag_node.h"
1213
#include "shared/source/helpers/gfx_core_helper.h"
1314
#include "shared/source/helpers/in_order_cmd_helpers.h"
1415

@@ -71,6 +72,21 @@ NEO::TagAllocatorBase *Device::getInOrderTimestampAllocator() {
7172
return inOrderTimestampAllocator.get();
7273
}
7374

75+
NEO::TagAllocatorBase *Device::getFillPatternAllocator() {
76+
if (!this->fillPatternAllocator.get()) {
77+
static std::mutex mtx;
78+
std::unique_lock<std::mutex> lock(mtx);
79+
80+
if (!this->fillPatternAllocator.get()) {
81+
RootDeviceIndicesContainer rootDeviceIndices = {getNEODevice()->getRootDeviceIndex()};
82+
fillPatternAllocator = std::make_unique<NEO::TagAllocator<NEO::FillPaternNodeType>>(rootDeviceIndices, getNEODevice()->getMemoryManager(), MemoryConstants::pageSize2M / MemoryConstants::cacheLineSize,
83+
MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, 0, false, false, getNEODevice()->getDeviceBitfield());
84+
}
85+
}
86+
87+
return this->fillPatternAllocator.get();
88+
}
89+
7490
uint32_t Device::getNextSyncDispatchQueueId() {
7591
auto newValue = syncDispatchQueueIdAllocator.fetch_add(1);
7692

level_zero/core/source/device/device.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ struct Device : _ze_device_handle_t {
153153
NEO::TagAllocatorBase *getDeviceInOrderCounterAllocator();
154154
NEO::TagAllocatorBase *getHostInOrderCounterAllocator();
155155
NEO::TagAllocatorBase *getInOrderTimestampAllocator();
156+
NEO::TagAllocatorBase *getFillPatternAllocator();
156157
NEO::GraphicsAllocation *getSyncDispatchTokenAllocation() const { return syncDispatchTokenAllocation; }
157158
uint32_t getNextSyncDispatchQueueId();
158159
void ensureSyncDispatchTokenAllocation();
@@ -166,6 +167,7 @@ struct Device : _ze_device_handle_t {
166167
std::unique_ptr<NEO::TagAllocatorBase> deviceInOrderCounterAllocator;
167168
std::unique_ptr<NEO::TagAllocatorBase> hostInOrderCounterAllocator;
168169
std::unique_ptr<NEO::TagAllocatorBase> inOrderTimestampAllocator;
170+
std::unique_ptr<NEO::TagAllocatorBase> fillPatternAllocator;
169171
NEO::GraphicsAllocation *syncDispatchTokenAllocation = nullptr;
170172
std::mutex inOrderAllocatorMutex;
171173
std::mutex syncDispatchTokenMutex;

level_zero/core/source/device/device_imp.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1755,6 +1755,7 @@ void DeviceImp::releaseResources() {
17551755
deviceInOrderCounterAllocator.reset();
17561756
hostInOrderCounterAllocator.reset();
17571757
inOrderTimestampAllocator.reset();
1758+
fillPatternAllocator.reset();
17581759

17591760
if (allocationsForReuse.get()) {
17601761
allocationsForReuse->freeAllGraphicsAllocations(neoDevice);

level_zero/core/test/unit_tests/mocks/mock_cmdlist.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
112112
using BaseClass::obtainKernelPreemptionMode;
113113
using BaseClass::partitionCount;
114114
using BaseClass::patternAllocations;
115+
using BaseClass::patternTags;
115116
using BaseClass::pipeControlMultiKernelEventSync;
116117
using BaseClass::pipelineSelectStateTracking;
117118
using BaseClass::requiredStreamState;

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp

Lines changed: 51 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -53,16 +53,17 @@ HWTEST_F(AppendFillTest, givenCallToAppendMemoryFillWithPatternSizeLessOrEqualTh
5353
commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u);
5454

5555
for (const auto patternSize : {1, 2, 4}) {
56-
size_t patternAllocationsVectorSizeBefore = commandList->patternAllocations.size();
56+
size_t patternTagsVectorSizeBefore = commandList->patternTags.size();
5757
CmdListMemoryCopyParams copyParams = {};
5858
ze_result_t result = commandList->appendMemoryFill(dstPtr, pattern, patternSize, allocSize, nullptr, 0, nullptr, copyParams);
5959
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
60-
size_t patternAllocationsVectorSize = commandList->patternAllocations.size();
60+
size_t patternTagsVectorSize = commandList->patternTags.size();
6161
if (patternSize == 1) {
62-
EXPECT_EQ(patternAllocationsVectorSize, patternAllocationsVectorSizeBefore);
62+
EXPECT_EQ(patternTagsVectorSize, patternTagsVectorSizeBefore);
6363
} else {
64-
EXPECT_NE(patternAllocationsVectorSize, patternAllocationsVectorSizeBefore);
64+
EXPECT_NE(patternTagsVectorSize, patternTagsVectorSizeBefore);
6565
}
66+
EXPECT_EQ(0u, commandList->patternAllocations.size());
6667
}
6768
}
6869

@@ -76,6 +77,7 @@ HWTEST_F(AppendFillTest, givenCallToAppendMemoryFillWithPatternSizeLessOrEqualTh
7677
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
7778
size_t patternAllocationsVectorSize = commandList->patternAllocations.size();
7879
EXPECT_EQ(patternAllocationsVectorSize, 0u);
80+
EXPECT_EQ(0u, commandList->patternTags.size());
7981
}
8082
}
8183

@@ -84,13 +86,14 @@ HWTEST_F(AppendFillTest, givenTwoCallsToAppendMemoryFillWithSamePatternThenAlloc
8486
commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u);
8587

8688
CmdListMemoryCopyParams copyParams = {};
87-
ze_result_t result = commandList->appendMemoryFill(dstPtr, pattern, 8, allocSize, nullptr, 0, nullptr, copyParams);
89+
char pattern[65] = {};
90+
ze_result_t result = commandList->appendMemoryFill(dstPtr, pattern, sizeof(pattern), allocSize, nullptr, 0, nullptr, copyParams);
8891
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
8992
size_t patternAllocationsVectorSize = commandList->patternAllocations.size();
9093
EXPECT_EQ(patternAllocationsVectorSize, 1u);
9194

9295
uint8_t *newDstPtr = new uint8_t[allocSize];
93-
result = commandList->appendMemoryFill(newDstPtr, pattern, patternSize, allocSize, nullptr, 0, nullptr, copyParams);
96+
result = commandList->appendMemoryFill(newDstPtr, pattern, sizeof(pattern), allocSize, nullptr, 0, nullptr, copyParams);
9497
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
9598
size_t newPatternAllocationsVectorSize = commandList->patternAllocations.size();
9699

@@ -104,19 +107,58 @@ HWTEST_F(AppendFillTest, givenTwoCallsToAppendMemoryFillWithDifferentPatternsThe
104107
commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u);
105108

106109
CmdListMemoryCopyParams copyParams = {};
107-
ze_result_t result = commandList->appendMemoryFill(dstPtr, pattern, 8, allocSize, nullptr, 0, nullptr, copyParams);
110+
char pattern[65] = {};
111+
ze_result_t result = commandList->appendMemoryFill(dstPtr, pattern, sizeof(pattern), allocSize, nullptr, 0, nullptr, copyParams);
108112
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
109113
size_t patternAllocationsVectorSize = commandList->patternAllocations.size();
110114
EXPECT_EQ(patternAllocationsVectorSize, 1u);
111115

112-
uint8_t newPattern[patternSize] = {1, 2, 3, 4};
113-
result = commandList->appendMemoryFill(dstPtr, newPattern, patternSize, allocSize, nullptr, 0, nullptr, copyParams);
116+
char newPattern[66] = {};
117+
result = commandList->appendMemoryFill(dstPtr, newPattern, sizeof(newPattern), allocSize, nullptr, 0, nullptr, copyParams);
114118
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
115119
size_t newPatternAllocationsVectorSize = commandList->patternAllocations.size();
116120

117121
EXPECT_EQ(patternAllocationsVectorSize + 1u, newPatternAllocationsVectorSize);
118122
}
119123

124+
HWTEST_F(AppendFillTest, givenTwoCallsToAppendMemoryFillWithSamePatternThenTagIsCreatedForEachCall) {
125+
auto commandList = std::make_unique<WhiteBox<MockCommandList<FamilyType::gfxCoreFamily>>>();
126+
commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u);
127+
128+
CmdListMemoryCopyParams copyParams = {};
129+
ze_result_t result = commandList->appendMemoryFill(dstPtr, pattern, 8, allocSize, nullptr, 0, nullptr, copyParams);
130+
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
131+
size_t patternTagsVectorSize = commandList->patternTags.size();
132+
EXPECT_EQ(patternTagsVectorSize, 1u);
133+
134+
uint8_t *newDstPtr = new uint8_t[allocSize];
135+
result = commandList->appendMemoryFill(newDstPtr, pattern, patternSize, allocSize, nullptr, 0, nullptr, copyParams);
136+
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
137+
size_t newPatternTagsVectorSize = commandList->patternTags.size();
138+
139+
EXPECT_GT(newPatternTagsVectorSize, patternTagsVectorSize);
140+
141+
delete[] newDstPtr;
142+
}
143+
144+
HWTEST_F(AppendFillTest, givenTwoCallsToAppendMemoryFillWithDifferentPatternsThenTagIsCreatedForEachPattern) {
145+
auto commandList = std::make_unique<WhiteBox<MockCommandList<FamilyType::gfxCoreFamily>>>();
146+
commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u);
147+
148+
CmdListMemoryCopyParams copyParams = {};
149+
ze_result_t result = commandList->appendMemoryFill(dstPtr, pattern, 8, allocSize, nullptr, 0, nullptr, copyParams);
150+
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
151+
size_t patternTagsVectorSize = commandList->patternTags.size();
152+
EXPECT_EQ(patternTagsVectorSize, 1u);
153+
154+
uint8_t newPattern[patternSize] = {1, 2, 3, 4};
155+
result = commandList->appendMemoryFill(dstPtr, newPattern, patternSize, allocSize, nullptr, 0, nullptr, copyParams);
156+
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
157+
size_t newPatternTagsVectorSize = commandList->patternTags.size();
158+
159+
EXPECT_EQ(patternTagsVectorSize + 1u, newPatternTagsVectorSize);
160+
}
161+
120162
HWTEST_F(AppendFillTest, givenAppendMemoryFillWhenPatternSizeIsOneThenDispatchOneKernel) {
121163
auto commandList = std::make_unique<WhiteBox<MockCommandList<FamilyType::gfxCoreFamily>>>();
122164
commandList->initialize(device, NEO::EngineGroupType::compute, 0u);

level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1871,7 +1871,7 @@ HWTEST2_F(InOrderRegularCmdListTests, givenAppendMemoryFillWhenHostSynchronizeTh
18711871

18721872
constexpr size_t size = 128 * sizeof(uint32_t);
18731873
auto data = allocDeviceMem(size);
1874-
uint64_t pattern = 0u;
1874+
char pattern[65] = {};
18751875

18761876
immCmdList->appendMemoryFill(data, &pattern, sizeof(pattern), size, nullptr, 0, nullptr, copyParams);
18771877
EXPECT_EQ(immCmdList->patternAllocations.size(), 1u);
@@ -1891,7 +1891,7 @@ HWTEST2_F(InOrderRegularCmdListTests, givenAppendMemoryFillWhenResetThenStoreFil
18911891

18921892
constexpr size_t size = 128 * sizeof(uint32_t);
18931893
auto data = allocDeviceMem(size);
1894-
uint64_t pattern = 0u;
1894+
char pattern[65] = {};
18951895

18961896
regularCmdList->appendMemoryFill(data, &pattern, sizeof(pattern), size, nullptr, 0, nullptr, copyParams);
18971897
EXPECT_EQ(regularCmdList->patternAllocations.size(), 1u);

0 commit comments

Comments
 (0)