77
88#include " shared/source/command_stream/command_stream_receiver.h"
99#include " shared/source/memory_manager/surface.h"
10- #include " shared/source/memory_manager/unified_memory_manager .h"
10+ #include " shared/source/utilities/spinlock .h"
1111
1212#include " opencl/source/cl_device/cl_device.h"
1313#include " opencl/source/command_queue/command_queue.h"
@@ -30,15 +30,13 @@ using namespace gtpin;
3030
3131namespace NEO {
3232
33- using GTPinLockType = std::recursive_mutex;
34-
3533extern gtpin::ocl::gtpin_events_t GTPinCallbacks;
3634
3735igc_init_t *pIgcInit = nullptr ;
3836std::atomic<int > sequenceCount (1 );
3937CommandQueue *pCmdQueueForFlushTask = nullptr ;
4038std::deque<gtpinkexec_t > kernelExecQueue;
41- GTPinLockType kernelExecQueueLock;
39+ SpinLock kernelExecQueueLock;
4240
4341void gtpinNotifyContextCreate (cl_context context) {
4442 if (isGTPinInitialized) {
@@ -133,7 +131,7 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) {
133131 kExec .gtpinResource = (cl_mem)resource;
134132 kExec .commandBuffer = commandBuffer;
135133 kExec .pCommandQueue = (CommandQueue *)pCmdQueue;
136- std::unique_lock<GTPinLockType > lock{kernelExecQueueLock};
134+ std::unique_lock<SpinLock > lock{kernelExecQueueLock};
137135 kernelExecQueue.push_back (kExec );
138136 lock.unlock ();
139137 // Patch SSH[gtpinBTI] with GT-Pin resource
@@ -144,19 +142,10 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) {
144142 GTPinHwHelper >pinHelper = GTPinHwHelper::get (genFamily);
145143 size_t gtpinBTI = pKernel->getNumberOfBindingTableStates () - 1 ;
146144 void *pSurfaceState = gtpinHelper.getSurfaceState (pKernel, gtpinBTI);
147- if (gtpinHelper.canUseSharedAllocation (device.getHardwareInfo ())) {
148- auto allocData = reinterpret_cast <SvmAllocationData *>(resource);
149- auto gpuAllocation = allocData->gpuAllocations .getGraphicsAllocation (rootDeviceIndex);
150- size_t size = gpuAllocation->getUnderlyingBufferSize ();
151- Buffer::setSurfaceState (&device, pSurfaceState, false , false , size, gpuAllocation->getUnderlyingBuffer (), 0 , gpuAllocation, 0 , 0 ,
152- pKernel->getKernelInfo ().kernelDescriptor .kernelAttributes .flags .useGlobalAtomics , pContext->getNumDevices ());
153- pKernel->setUnifiedMemoryExecInfo (gpuAllocation);
154- } else {
155- cl_mem buffer = (cl_mem)resource;
156- auto pBuffer = castToObjectOrAbort<Buffer>(buffer);
157- pBuffer->setArgStateful (pSurfaceState, false , false , false , false , device,
158- pKernel->getKernelInfo ().kernelDescriptor .kernelAttributes .flags .useGlobalAtomics , pContext->getNumDevices ());
159- }
145+ cl_mem buffer = (cl_mem)resource;
146+ auto pBuffer = castToObjectOrAbort<Buffer>(buffer);
147+ pBuffer->setArgStateful (pSurfaceState, false , false , false , false , device,
148+ pKernel->getKernelInfo ().kernelDescriptor .kernelAttributes .flags .useGlobalAtomics , pContext->getNumDevices ());
160149 }
161150}
162151
@@ -168,7 +157,7 @@ void gtpinNotifyPreFlushTask(void *pCmdQueue) {
168157
169158void gtpinNotifyFlushTask (uint32_t flushedTaskCount) {
170159 if (isGTPinInitialized) {
171- std::unique_lock<GTPinLockType > lock{kernelExecQueueLock};
160+ std::unique_lock<SpinLock > lock{kernelExecQueueLock};
172161 size_t numElems = kernelExecQueue.size ();
173162 for (size_t n = 0 ; n < numElems; n++) {
174163 if ((kernelExecQueue[n].pCommandQueue == pCmdQueueForFlushTask) && !kernelExecQueue[n].isTaskCountValid ) {
@@ -184,7 +173,7 @@ void gtpinNotifyFlushTask(uint32_t flushedTaskCount) {
184173
185174void gtpinNotifyTaskCompletion (uint32_t completedTaskCount) {
186175 if (isGTPinInitialized) {
187- std::unique_lock<GTPinLockType > lock{kernelExecQueueLock};
176+ std::unique_lock<SpinLock > lock{kernelExecQueueLock};
188177 size_t numElems = kernelExecQueue.size ();
189178 for (size_t n = 0 ; n < numElems;) {
190179 if (kernelExecQueue[n].isTaskCountValid && (kernelExecQueue[n].taskCount <= completedTaskCount)) {
@@ -202,23 +191,15 @@ void gtpinNotifyTaskCompletion(uint32_t completedTaskCount) {
202191
203192void gtpinNotifyMakeResident (void *pKernel, void *pCSR) {
204193 if (isGTPinInitialized) {
205- std::unique_lock<GTPinLockType> lock{kernelExecQueueLock};
206- Context &context = static_cast <Kernel *>(pKernel)->getContext ();
207- GTPinHwHelper >pinHelper = GTPinHwHelper::get (context.getDevice (0 )->getHardwareInfo ().platform .eRenderCoreFamily );
194+ std::unique_lock<SpinLock> lock{kernelExecQueueLock};
208195 size_t numElems = kernelExecQueue.size ();
209196 for (size_t n = 0 ; n < numElems; n++) {
210197 if ((kernelExecQueue[n].pKernel == pKernel) && !kernelExecQueue[n].isResourceResident && kernelExecQueue[n].gtpinResource ) {
211198 // It's time for kernel to make resident its GT-Pin resource
212199 CommandStreamReceiver *pCommandStreamReceiver = reinterpret_cast <CommandStreamReceiver *>(pCSR);
213- GraphicsAllocation *pGfxAlloc = nullptr ;
214- if (gtpinHelper.canUseSharedAllocation (context.getDevice (0 )->getHardwareInfo ())) {
215- auto allocData = reinterpret_cast <SvmAllocationData *>(kernelExecQueue[n].gtpinResource );
216- pGfxAlloc = allocData->gpuAllocations .getGraphicsAllocation (pCommandStreamReceiver->getRootDeviceIndex ());
217- } else {
218- cl_mem gtpinBuffer = kernelExecQueue[n].gtpinResource ;
219- auto pBuffer = castToObjectOrAbort<Buffer>(gtpinBuffer);
220- pGfxAlloc = pBuffer->getGraphicsAllocation (pCommandStreamReceiver->getRootDeviceIndex ());
221- }
200+ cl_mem gtpinBuffer = kernelExecQueue[n].gtpinResource ;
201+ auto pBuffer = castToObjectOrAbort<Buffer>(gtpinBuffer);
202+ GraphicsAllocation *pGfxAlloc = pBuffer->getGraphicsAllocation (pCommandStreamReceiver->getRootDeviceIndex ());
222203 pCommandStreamReceiver->makeResident (*pGfxAlloc);
223204 kernelExecQueue[n].isResourceResident = true ;
224205 break ;
@@ -229,7 +210,7 @@ void gtpinNotifyMakeResident(void *pKernel, void *pCSR) {
229210
230211void gtpinNotifyUpdateResidencyList (void *pKernel, void *pResVec) {
231212 if (isGTPinInitialized) {
232- std::unique_lock<GTPinLockType > lock{kernelExecQueueLock};
213+ std::unique_lock<SpinLock > lock{kernelExecQueueLock};
233214 size_t numElems = kernelExecQueue.size ();
234215 for (size_t n = 0 ; n < numElems; n++) {
235216 if ((kernelExecQueue[n].pKernel == pKernel) && !kernelExecQueue[n].isResourceResident && kernelExecQueue[n].gtpinResource ) {
0 commit comments