Skip to content

Commit 86dc5ba

Browse files
Optimize BCS flushing scheme [2/n]
Change-Id: I6f1e0115b9c45f89afb86f8fd2304604243541df Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
1 parent d4b12c9 commit 86dc5ba

19 files changed

+291
-134
lines changed

opencl/source/command_queue/command_queue.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -147,28 +147,28 @@ bool CommandQueue::isCompleted(uint32_t taskCount) const {
147147
return tag >= taskCount;
148148
}
149149

150-
void CommandQueue::waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) {
150+
void CommandQueue::waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) {
151151
WAIT_ENTER()
152152

153-
DBG_LOG(LogTaskCounts, __FUNCTION__, "Waiting for taskCount:", taskCountToWait);
153+
DBG_LOG(LogTaskCounts, __FUNCTION__, "Waiting for taskCount:", gpgpuTaskCountToWait);
154154
DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "Current taskCount:", getHwTag());
155155

156156
bool forcePowerSavingMode = this->throttle == QueueThrottle::LOW;
157157

158-
getGpgpuCommandStreamReceiver().waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait,
158+
getGpgpuCommandStreamReceiver().waitForTaskCountWithKmdNotifyFallback(gpgpuTaskCountToWait, flushStampToWait,
159159
useQuickKmdSleep, forcePowerSavingMode);
160-
DEBUG_BREAK_IF(getHwTag() < taskCountToWait);
160+
DEBUG_BREAK_IF(getHwTag() < gpgpuTaskCountToWait);
161161

162162
if (gtpinIsGTPinInitialized()) {
163-
gtpinNotifyTaskCompletion(taskCountToWait);
163+
gtpinNotifyTaskCompletion(gpgpuTaskCountToWait);
164164
}
165165

166166
if (auto bcsCsr = getBcsCommandStreamReceiver()) {
167-
bcsCsr->waitForTaskCountWithKmdNotifyFallback(bcsTaskCount, 0, false, false);
168-
bcsCsr->waitForTaskCountAndCleanTemporaryAllocationList(bcsTaskCount);
167+
bcsCsr->waitForTaskCountWithKmdNotifyFallback(bcsTaskCountToWait, 0, false, false);
168+
bcsCsr->waitForTaskCountAndCleanTemporaryAllocationList(bcsTaskCountToWait);
169169
}
170170

171-
getGpgpuCommandStreamReceiver().waitForTaskCountAndCleanTemporaryAllocationList(taskCountToWait);
171+
getGpgpuCommandStreamReceiver().waitForTaskCountAndCleanTemporaryAllocationList(gpgpuTaskCountToWait);
172172

173173
WAIT_LEAVE()
174174
}

opencl/source/command_queue/command_queue.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
215215

216216
MOCKABLE_VIRTUAL bool isQueueBlocked();
217217

218-
MOCKABLE_VIRTUAL void waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep);
218+
MOCKABLE_VIRTUAL void waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep);
219219

220220
static uint32_t getTaskLevelFromWaitList(uint32_t taskLevel,
221221
cl_uint numEventsInWaitList,
@@ -299,6 +299,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
299299
}
300300

301301
void updateBcsTaskCount(uint32_t newBcsTaskCount) { this->bcsTaskCount = newBcsTaskCount; }
302+
uint32_t peekBcsTaskCount() const { return bcsTaskCount; }
302303

303304
// taskCount of last task
304305
uint32_t taskCount = 0;

opencl/source/command_queue/cpu_data_transfer_handler.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferPropertie
145145

146146
if (outEventObj) {
147147
outEventObj->setEndTimeStamp();
148-
outEventObj->updateTaskCount(this->taskCount);
148+
outEventObj->updateTaskCount(this->taskCount, this->bcsTaskCount);
149149
outEventObj->flushStamp->replaceStampObject(this->flushStamp->getStampReference());
150150
if (eventCompleted) {
151151
outEventObj->setStatus(CL_COMPLETE);

opencl/source/command_queue/enqueue_common.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
305305
getGpgpuCommandStreamReceiver().setMediaVFEStateDirty(true);
306306

307307
if (devQueueHw->getSchedulerReturnInstance() > 0) {
308-
waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp, false);
308+
waitUntilComplete(completionStamp.taskCount, bcsTaskCount, completionStamp.flushStamp, false);
309309
this->runSchedulerSimulation(*devQueueHw, *parentKernel);
310310
}
311311
}
@@ -353,7 +353,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
353353
updateFromCompletionStamp(completionStamp);
354354

355355
if (eventBuilder.getEvent()) {
356-
eventBuilder.getEvent()->updateCompletionStamp(completionStamp.taskCount, completionStamp.taskLevel, completionStamp.flushStamp);
356+
eventBuilder.getEvent()->updateCompletionStamp(completionStamp.taskCount, bcsTaskCount, completionStamp.taskLevel, completionStamp.flushStamp);
357357
FileLoggerInstance().log(DebugManager.flags.EventsDebugEnable.get(), "updateCompletionStamp Event", eventBuilder.getEvent(), "taskLevel", eventBuilder.getEvent()->taskLevel.load());
358358
}
359359

@@ -382,9 +382,9 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
382382
if (blockQueue) {
383383
while (isQueueBlocked()) {
384384
}
385-
waitUntilComplete(taskCount, flushStamp->peekStamp(), false);
385+
waitUntilComplete(taskCount, bcsTaskCount, flushStamp->peekStamp(), false);
386386
} else {
387-
waitUntilComplete(taskCount, flushStamp->peekStamp(), false);
387+
waitUntilComplete(taskCount, bcsTaskCount, flushStamp->peekStamp(), false);
388388
if (printfHandler) {
389389
printfHandler->printEnqueueOutput();
390390
}

opencl/source/command_queue/finish.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ cl_int CommandQueueHw<GfxFamily>::finish() {
2727
auto flushStampToWaitFor = this->flushStamp->peekStamp();
2828

2929
// Stall until HW reaches CQ taskCount
30-
waitUntilComplete(taskCountToWaitFor, flushStampToWaitFor, false);
30+
waitUntilComplete(taskCountToWaitFor, this->bcsTaskCount, flushStampToWaitFor, false);
3131

3232
return CL_SUCCESS;
3333
}

opencl/source/event/event.cpp

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -224,8 +224,9 @@ uint32_t Event::getCompletionStamp() const {
224224
return this->taskCount;
225225
}
226226

227-
void Event::updateCompletionStamp(uint32_t taskCount, uint32_t tasklevel, FlushStamp flushStamp) {
228-
this->taskCount = taskCount;
227+
void Event::updateCompletionStamp(uint32_t gpgpuTaskCount, uint32_t bcsTaskCount, uint32_t tasklevel, FlushStamp flushStamp) {
228+
this->taskCount = gpgpuTaskCount;
229+
this->bcsTaskCount = bcsTaskCount;
229230
this->taskLevel = tasklevel;
230231
this->flushStamp->setStamp(flushStamp);
231232
}
@@ -370,7 +371,7 @@ inline bool Event::wait(bool blocking, bool useQuickKmdSleep) {
370371
}
371372
}
372373

373-
cmdQueue->waitUntilComplete(taskCount.load(), flushStamp->peekStamp(), useQuickKmdSleep);
374+
cmdQueue->waitUntilComplete(taskCount.load(), this->bcsTaskCount, flushStamp->peekStamp(), useQuickKmdSleep);
374375
updateExecutionStatus();
375376

376377
DEBUG_BREAK_IF(this->taskLevel == CompletionStamp::notReady && this->executionStatus >= 0);
@@ -510,11 +511,9 @@ void Event::transitionExecutionStatus(int32_t newExecutionStatus) const {
510511
void Event::submitCommand(bool abortTasks) {
511512
std::unique_ptr<Command> cmdToProcess(cmdToSubmit.exchange(nullptr));
512513
if (cmdToProcess.get() != nullptr) {
513-
std::unique_lock<CommandStreamReceiver::MutexType> lockCSR;
514-
if (this->cmdQueue) {
515-
lockCSR = this->getCommandQueue()->getGpgpuCommandStreamReceiver().obtainUniqueOwnership();
516-
}
517-
if ((this->isProfilingEnabled()) && (this->cmdQueue != nullptr)) {
514+
auto lockCSR = getCommandQueue()->getGpgpuCommandStreamReceiver().obtainUniqueOwnership();
515+
516+
if (this->isProfilingEnabled()) {
518517
if (timeStampNode) {
519518
this->cmdQueue->getGpgpuCommandStreamReceiver().makeResident(*timeStampNode->getBaseGraphicsAllocation());
520519
cmdToProcess->timestamp = timeStampNode;
@@ -530,10 +529,10 @@ void Event::submitCommand(bool abortTasks) {
530529
}
531530
}
532531
auto &complStamp = cmdToProcess->submit(taskLevel, abortTasks);
533-
if (profilingCpuPath && this->isProfilingEnabled() && (this->cmdQueue != nullptr)) {
532+
if (profilingCpuPath && this->isProfilingEnabled()) {
534533
setEndTimeStamp();
535534
}
536-
updateTaskCount(complStamp.taskCount);
535+
updateTaskCount(complStamp.taskCount, cmdQueue->peekBcsTaskCount());
537536
flushStamp->setStamp(complStamp.flushStamp);
538537
submittedCmd.exchange(cmdToProcess.release());
539538
} else if (profilingCpuPath && endTimeStamp == 0) {
@@ -543,7 +542,7 @@ void Event::submitCommand(bool abortTasks) {
543542
if (!this->isUserEvent() && this->eventWithoutCommand) {
544543
if (this->cmdQueue) {
545544
auto lockCSR = this->getCommandQueue()->getGpgpuCommandStreamReceiver().obtainUniqueOwnership();
546-
updateTaskCount(this->cmdQueue->getGpgpuCommandStreamReceiver().peekTaskCount());
545+
updateTaskCount(this->cmdQueue->getGpgpuCommandStreamReceiver().peekTaskCount(), cmdQueue->peekBcsTaskCount());
547546
}
548547
}
549548
//make sure that task count is synchronized for events with kernels

opencl/source/event/event.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
8989
~Event() override;
9090

9191
uint32_t getCompletionStamp(void) const;
92-
void updateCompletionStamp(uint32_t taskCount, uint32_t tasklevel, FlushStamp flushStamp);
92+
void updateCompletionStamp(uint32_t taskCount, uint32_t bcsTaskCount, uint32_t tasklevel, FlushStamp flushStamp);
9393
cl_ulong getDelta(cl_ulong startTime,
9494
cl_ulong endTime);
9595
void setCPUProfilingPath(bool isCPUPath) { this->profilingCpuPath = isCPUPath; }
@@ -243,14 +243,15 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
243243

244244
virtual void unblockEventBy(Event &event, uint32_t taskLevel, int32_t transitionStatus);
245245

246-
void updateTaskCount(uint32_t taskCount) {
247-
if (taskCount == CompletionStamp::notReady) {
246+
void updateTaskCount(uint32_t gpgpuTaskCount, uint32_t bcsTaskCount) {
247+
if (gpgpuTaskCount == CompletionStamp::notReady) {
248248
DEBUG_BREAK_IF(true);
249249
return;
250250
}
251251

252-
uint32_t prevTaskCount = this->taskCount.exchange(taskCount);
253-
if ((prevTaskCount != CompletionStamp::notReady) && (prevTaskCount > taskCount)) {
252+
this->bcsTaskCount = bcsTaskCount;
253+
uint32_t prevTaskCount = this->taskCount.exchange(gpgpuTaskCount);
254+
if ((prevTaskCount != CompletionStamp::notReady) && (prevTaskCount > gpgpuTaskCount)) {
254255
this->taskCount = prevTaskCount;
255256
DEBUG_BREAK_IF(true);
256257
}
@@ -363,6 +364,7 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
363364
uint64_t startTimeStamp;
364365
uint64_t endTimeStamp;
365366
uint64_t completeTimeStamp;
367+
uint32_t bcsTaskCount = 0;
366368
bool perfCountersEnabled;
367369
TagNode<HwTimeStamps> *timeStampNode = nullptr;
368370
TagNode<HwPerfCounter> *perfCounterNode = nullptr;

opencl/source/helpers/task_information.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) {
8888
commandQueue.getDevice());
8989

9090
if (!memObj.isMemObjZeroCopy()) {
91-
commandQueue.waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp, false);
91+
commandQueue.waitUntilComplete(completionStamp.taskCount, commandQueue.peekBcsTaskCount(), completionStamp.flushStamp, false);
9292
if (operationType == MAP) {
9393
memObj.transferDataToHostPtr(copySize, copyOffset);
9494
} else if (!readOnly) {
@@ -268,7 +268,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
268268
}
269269

270270
if (printfHandler) {
271-
commandQueue.waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp, false);
271+
commandQueue.waitUntilComplete(completionStamp.taskCount, commandQueue.peekBcsTaskCount(), completionStamp.flushStamp, false);
272272
printfHandler.get()->printEnqueueOutput();
273273
}
274274

opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1025,6 +1025,132 @@ HWTEST_TEMPLATED_F(BlitEnqueueFlushTests, givenDebugFlagSetWhenCheckingBcsCacheF
10251025
EXPECT_TRUE(mockCommandQueue->isCacheFlushForBcsRequired());
10261026
}
10271027

1028+
using BlitEnqueueTaskCountTests = BlitEnqueueTests<1>;
1029+
1030+
HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, whenWaitUntilCompletionCalledThenWaitForSpecificBcsTaskCount) {
1031+
uint32_t gpgpuTaskCount = 123;
1032+
uint32_t bcsTaskCount = 123;
1033+
1034+
commandQueue->waitUntilComplete(gpgpuTaskCount, bcsTaskCount, 0, false);
1035+
1036+
EXPECT_EQ(gpgpuTaskCount, static_cast<UltCommandStreamReceiver<FamilyType> *>(gpgpuCsr)->latestWaitForCompletionWithTimeoutTaskCount.load());
1037+
EXPECT_EQ(bcsTaskCount, static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsCsr)->latestWaitForCompletionWithTimeoutTaskCount.load());
1038+
}
1039+
1040+
HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenEventWhenWaitingForCompletionThenWaitForCurrentBcsTaskCount) {
1041+
auto buffer = createBuffer(1, false);
1042+
buffer->forceDisallowCPUCopy = true;
1043+
int hostPtr = 0;
1044+
1045+
auto ultGpgpuCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(gpgpuCsr);
1046+
auto ultBcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsCsr);
1047+
1048+
cl_event outEvent1, outEvent2;
1049+
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, &outEvent1);
1050+
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, &outEvent2);
1051+
1052+
clWaitForEvents(1, &outEvent2);
1053+
EXPECT_EQ(2u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
1054+
EXPECT_EQ(2u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
1055+
1056+
clWaitForEvents(1, &outEvent1);
1057+
EXPECT_EQ(1u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
1058+
EXPECT_EQ(1u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
1059+
1060+
clReleaseEvent(outEvent1);
1061+
clReleaseEvent(outEvent2);
1062+
}
1063+
1064+
HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenBlockedEventWhenWaitingForCompletionThenWaitForCurrentBcsTaskCount) {
1065+
auto buffer = createBuffer(1, false);
1066+
buffer->forceDisallowCPUCopy = true;
1067+
int hostPtr = 0;
1068+
1069+
auto ultGpgpuCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(gpgpuCsr);
1070+
auto ultBcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsCsr);
1071+
1072+
cl_event outEvent1, outEvent2;
1073+
UserEvent userEvent;
1074+
cl_event waitlist1 = &userEvent;
1075+
cl_event *waitlist2 = &outEvent1;
1076+
1077+
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 1, &waitlist1, &outEvent1);
1078+
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 1, waitlist2, &outEvent2);
1079+
1080+
userEvent.setStatus(CL_COMPLETE);
1081+
1082+
clWaitForEvents(1, &outEvent2);
1083+
EXPECT_EQ(2u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
1084+
EXPECT_EQ(2u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
1085+
1086+
clWaitForEvents(1, &outEvent1);
1087+
EXPECT_EQ(1u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
1088+
EXPECT_EQ(1u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
1089+
1090+
clReleaseEvent(outEvent1);
1091+
clReleaseEvent(outEvent2);
1092+
1093+
EXPECT_FALSE(commandQueue->isQueueBlocked());
1094+
}
1095+
1096+
HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenBlockedEnqueueWithoutKernelWhenWaitingForCompletionThenWaitForCurrentBcsTaskCount) {
1097+
auto ultGpgpuCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(gpgpuCsr);
1098+
auto ultBcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsCsr);
1099+
1100+
cl_event outEvent1, outEvent2;
1101+
UserEvent userEvent;
1102+
cl_event waitlist1 = &userEvent;
1103+
cl_event *waitlist2 = &outEvent1;
1104+
1105+
commandQueue->enqueueMarkerWithWaitList(1, &waitlist1, &outEvent1);
1106+
commandQueue->enqueueMarkerWithWaitList(1, waitlist2, &outEvent2);
1107+
1108+
userEvent.setStatus(CL_COMPLETE);
1109+
1110+
clWaitForEvents(1, &outEvent2);
1111+
EXPECT_EQ(1u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
1112+
EXPECT_EQ(0u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
1113+
1114+
clWaitForEvents(1, &outEvent1);
1115+
EXPECT_EQ(0u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
1116+
EXPECT_EQ(0u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
1117+
1118+
clReleaseEvent(outEvent1);
1119+
clReleaseEvent(outEvent2);
1120+
1121+
EXPECT_FALSE(commandQueue->isQueueBlocked());
1122+
}
1123+
1124+
HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenEventFromCpuCopyWhenWaitingForCompletionThenWaitForCurrentBcsTaskCount) {
1125+
DebugManager.flags.DoCpuCopyOnWriteBuffer.set(1);
1126+
auto buffer = createBuffer(1, false);
1127+
int hostPtr = 0;
1128+
1129+
auto ultGpgpuCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(gpgpuCsr);
1130+
auto ultBcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsCsr);
1131+
1132+
ultGpgpuCsr->taskCount = 1;
1133+
commandQueue->taskCount = 1;
1134+
1135+
ultBcsCsr->taskCount = 2;
1136+
commandQueue->updateBcsTaskCount(2);
1137+
1138+
cl_event outEvent1, outEvent2;
1139+
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, &outEvent1);
1140+
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, &outEvent2);
1141+
1142+
clWaitForEvents(1, &outEvent2);
1143+
EXPECT_EQ(1u, static_cast<UltCommandStreamReceiver<FamilyType> *>(gpgpuCsr)->latestWaitForCompletionWithTimeoutTaskCount.load());
1144+
EXPECT_EQ(2u, static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsCsr)->latestWaitForCompletionWithTimeoutTaskCount.load());
1145+
1146+
clWaitForEvents(1, &outEvent1);
1147+
EXPECT_EQ(1u, static_cast<UltCommandStreamReceiver<FamilyType> *>(gpgpuCsr)->latestWaitForCompletionWithTimeoutTaskCount.load());
1148+
EXPECT_EQ(2u, static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsCsr)->latestWaitForCompletionWithTimeoutTaskCount.load());
1149+
1150+
clReleaseEvent(outEvent1);
1151+
clReleaseEvent(outEvent2);
1152+
}
1153+
10281154
using BlitEnqueueWithDisabledGpgpuSubmissionTests = BlitEnqueueTests<1>;
10291155

10301156
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenDebugFlagSetWhenDoingBcsCopyThenSubmitToGpgpuOnlyIfPreviousEnqueueWasGpgpu) {

opencl/test/unit_test/command_queue/command_queue_hw_tests.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ HWTEST_F(CommandQueueHwTest, GivenEventsWaitlistOnBlockingMapBufferWillWaitForEv
329329
MockEvent(Context *ctx, uint32_t updateCountBeforeCompleted)
330330
: UserEvent(ctx),
331331
updateCount(0), updateCountBeforeCompleted(updateCountBeforeCompleted) {
332-
this->updateTaskCount(0);
332+
this->updateTaskCount(0, 0);
333333
this->taskLevel = 0;
334334
}
335335

@@ -959,7 +959,7 @@ HWTEST_F(CommandQueueHwTest, givenEventWithRecordedCommandWhenSubmitCommandIsCal
959959
std::thread t([&]() {
960960
while (!go)
961961
;
962-
neoEvent.updateTaskCount(77u);
962+
neoEvent.updateTaskCount(77u, 0);
963963
});
964964

965965
neoEvent.submitCommand(false);

0 commit comments

Comments
 (0)