Skip to content

Commit cd39302

Browse files
Align sync buffer address to max atomic type size
Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
1 parent 6ece353 commit cd39302

File tree

3 files changed

+18
-7
lines changed

3 files changed

+18
-7
lines changed

opencl/test/unit_test/command_queue/sync_buffer_handler_tests.cpp

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -104,10 +104,9 @@ class SyncBufferHandlerTest : public SyncBufferEnqueueHandlerTest {
104104

105105
const cl_uint workDim = 1;
106106
const size_t gwOffset[3] = {0, 0, 0};
107-
const size_t lws[3] = {10, 1, 1};
108-
size_t workgroupCount[3] = {10, 1, 1};
109-
size_t globalWorkSize[3] = {100, 1, 1};
110-
size_t workItemsCount = 10;
107+
const size_t workItemsCount = 16;
108+
const size_t lws[3] = {workItemsCount, 1, 1};
109+
size_t workgroupCount[3] = {workItemsCount, 1, 1};
111110
std::unique_ptr<MockKernelWithInternals> kernelInternals;
112111
MockKernel *kernel;
113112
MockCommandQueue *commandQueue;
@@ -128,6 +127,19 @@ HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenAllocateSyncBufferPatchAndConcurr
128127
static_cast<UltCommandStreamReceiver<FamilyType> *>(pCsr)->latestSentTaskCount);
129128
}
130129

130+
HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenAllocateSyncBufferPatchAndConcurrentKernelWhenEnqueuingKernelThenSyncBufferOffsetIsProperlyAligned) {
131+
patchAllocateSyncBuffer();
132+
133+
workgroupCount[0] = 1;
134+
enqueueNDCount();
135+
136+
auto syncBufferHandler = getSyncBufferHandler();
137+
EXPECT_EQ(CommonConstants::maximalSizeOfAtomicType, syncBufferHandler->usedBufferSize);
138+
139+
enqueueNDCount();
140+
EXPECT_EQ(2u * CommonConstants::maximalSizeOfAtomicType, syncBufferHandler->usedBufferSize);
141+
}
142+
131143
HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenConcurrentKernelWithoutAllocateSyncBufferPatchWhenEnqueuingConcurrentKernelThenSyncBufferIsNotCreated) {
132144
auto retVal = enqueueNDCount();
133145
EXPECT_EQ(CL_SUCCESS, retVal);
@@ -153,7 +165,6 @@ HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenConcurrentKernelWithAllocateSyncB
153165
HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenMaxWorkgroupCountWhenEnqueuingConcurrentKernelThenSuccessIsReturned) {
154166
auto maxWorkGroupCount = kernel->getMaxWorkGroupCount(workDim, lws, commandQueue);
155167
workgroupCount[0] = maxWorkGroupCount;
156-
globalWorkSize[0] = maxWorkGroupCount * lws[0];
157168

158169
auto retVal = enqueueNDCount();
159170
EXPECT_EQ(CL_SUCCESS, retVal);
@@ -162,7 +173,6 @@ HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenMaxWorkgroupCountWhenEnqueuingCon
162173
HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenTooHighWorkgroupCountWhenEnqueuingConcurrentKernelThenErrorIsReturned) {
163174
size_t maxWorkGroupCount = kernel->getMaxWorkGroupCount(workDim, lws, commandQueue);
164175
workgroupCount[0] = maxWorkGroupCount + 1;
165-
globalWorkSize[0] = maxWorkGroupCount * lws[0];
166176

167177
auto retVal = enqueueNDCount();
168178
EXPECT_EQ(CL_INVALID_VALUE, retVal);

shared/source/helpers/constants.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ namespace CommonConstants {
8888
constexpr uint32_t unspecifiedDeviceIndex = std::numeric_limits<uint32_t>::max();
8989
constexpr uint32_t invalidStepping = std::numeric_limits<uint32_t>::max();
9090
constexpr uint32_t maximalSimdSize = 32;
91+
constexpr uint32_t maximalSizeOfAtomicType = 8;
9192
constexpr uint32_t engineGroupCount = static_cast<uint32_t>(NEO::EngineGroupType::MaxEngineGroups);
9293
constexpr uint32_t partitionAddressOffsetDwords = 2u;
9394
constexpr uint32_t partitionAddressOffset = sizeof(uint32_t) * partitionAddressOffsetDwords;

shared/source/program/sync_buffer_handler.inl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
template <typename KernelT>
1111
void NEO::SyncBufferHandler::prepareForEnqueue(size_t workGroupsCount, KernelT &kernel) {
12-
auto requiredSize = workGroupsCount;
12+
auto requiredSize = alignUp(workGroupsCount, CommonConstants::maximalSizeOfAtomicType);
1313
std::lock_guard<std::mutex> guard(this->mutex);
1414

1515
bool isCurrentBufferFull = (usedBufferSize + requiredSize > bufferSize);

0 commit comments

Comments
 (0)