Skip to content

Commit 1dede48

Browse files
Revert "Add GTPin feature to allocate buffer in shared memory"
This reverts commit 255e85c. Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
1 parent 9b23990 commit 1dede48

File tree

7 files changed

+38
-278
lines changed

7 files changed

+38
-278
lines changed

opencl/source/gtpin/gtpin_callbacks.cpp

Lines changed: 14 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
#include "shared/source/command_stream/command_stream_receiver.h"
99
#include "shared/source/memory_manager/surface.h"
10-
#include "shared/source/memory_manager/unified_memory_manager.h"
10+
#include "shared/source/utilities/spinlock.h"
1111

1212
#include "opencl/source/cl_device/cl_device.h"
1313
#include "opencl/source/command_queue/command_queue.h"
@@ -30,15 +30,13 @@ using namespace gtpin;
3030

3131
namespace NEO {
3232

33-
using GTPinLockType = std::recursive_mutex;
34-
3533
extern gtpin::ocl::gtpin_events_t GTPinCallbacks;
3634

3735
igc_init_t *pIgcInit = nullptr;
3836
std::atomic<int> sequenceCount(1);
3937
CommandQueue *pCmdQueueForFlushTask = nullptr;
4038
std::deque<gtpinkexec_t> kernelExecQueue;
41-
GTPinLockType kernelExecQueueLock;
39+
SpinLock kernelExecQueueLock;
4240

4341
void gtpinNotifyContextCreate(cl_context context) {
4442
if (isGTPinInitialized) {
@@ -133,7 +131,7 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) {
133131
kExec.gtpinResource = (cl_mem)resource;
134132
kExec.commandBuffer = commandBuffer;
135133
kExec.pCommandQueue = (CommandQueue *)pCmdQueue;
136-
std::unique_lock<GTPinLockType> lock{kernelExecQueueLock};
134+
std::unique_lock<SpinLock> lock{kernelExecQueueLock};
137135
kernelExecQueue.push_back(kExec);
138136
lock.unlock();
139137
// Patch SSH[gtpinBTI] with GT-Pin resource
@@ -144,19 +142,10 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) {
144142
GTPinHwHelper &gtpinHelper = GTPinHwHelper::get(genFamily);
145143
size_t gtpinBTI = pKernel->getNumberOfBindingTableStates() - 1;
146144
void *pSurfaceState = gtpinHelper.getSurfaceState(pKernel, gtpinBTI);
147-
if (gtpinHelper.canUseSharedAllocation(device.getHardwareInfo())) {
148-
auto allocData = reinterpret_cast<SvmAllocationData *>(resource);
149-
auto gpuAllocation = allocData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex);
150-
size_t size = gpuAllocation->getUnderlyingBufferSize();
151-
Buffer::setSurfaceState(&device, pSurfaceState, false, false, size, gpuAllocation->getUnderlyingBuffer(), 0, gpuAllocation, 0, 0,
152-
pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, pContext->getNumDevices());
153-
pKernel->setUnifiedMemoryExecInfo(gpuAllocation);
154-
} else {
155-
cl_mem buffer = (cl_mem)resource;
156-
auto pBuffer = castToObjectOrAbort<Buffer>(buffer);
157-
pBuffer->setArgStateful(pSurfaceState, false, false, false, false, device,
158-
pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, pContext->getNumDevices());
159-
}
145+
cl_mem buffer = (cl_mem)resource;
146+
auto pBuffer = castToObjectOrAbort<Buffer>(buffer);
147+
pBuffer->setArgStateful(pSurfaceState, false, false, false, false, device,
148+
pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, pContext->getNumDevices());
160149
}
161150
}
162151

@@ -168,7 +157,7 @@ void gtpinNotifyPreFlushTask(void *pCmdQueue) {
168157

169158
void gtpinNotifyFlushTask(uint32_t flushedTaskCount) {
170159
if (isGTPinInitialized) {
171-
std::unique_lock<GTPinLockType> lock{kernelExecQueueLock};
160+
std::unique_lock<SpinLock> lock{kernelExecQueueLock};
172161
size_t numElems = kernelExecQueue.size();
173162
for (size_t n = 0; n < numElems; n++) {
174163
if ((kernelExecQueue[n].pCommandQueue == pCmdQueueForFlushTask) && !kernelExecQueue[n].isTaskCountValid) {
@@ -184,7 +173,7 @@ void gtpinNotifyFlushTask(uint32_t flushedTaskCount) {
184173

185174
void gtpinNotifyTaskCompletion(uint32_t completedTaskCount) {
186175
if (isGTPinInitialized) {
187-
std::unique_lock<GTPinLockType> lock{kernelExecQueueLock};
176+
std::unique_lock<SpinLock> lock{kernelExecQueueLock};
188177
size_t numElems = kernelExecQueue.size();
189178
for (size_t n = 0; n < numElems;) {
190179
if (kernelExecQueue[n].isTaskCountValid && (kernelExecQueue[n].taskCount <= completedTaskCount)) {
@@ -202,23 +191,15 @@ void gtpinNotifyTaskCompletion(uint32_t completedTaskCount) {
202191

203192
void gtpinNotifyMakeResident(void *pKernel, void *pCSR) {
204193
if (isGTPinInitialized) {
205-
std::unique_lock<GTPinLockType> lock{kernelExecQueueLock};
206-
Context &context = static_cast<Kernel *>(pKernel)->getContext();
207-
GTPinHwHelper &gtpinHelper = GTPinHwHelper::get(context.getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily);
194+
std::unique_lock<SpinLock> lock{kernelExecQueueLock};
208195
size_t numElems = kernelExecQueue.size();
209196
for (size_t n = 0; n < numElems; n++) {
210197
if ((kernelExecQueue[n].pKernel == pKernel) && !kernelExecQueue[n].isResourceResident && kernelExecQueue[n].gtpinResource) {
211198
// It's time for kernel to make resident its GT-Pin resource
212199
CommandStreamReceiver *pCommandStreamReceiver = reinterpret_cast<CommandStreamReceiver *>(pCSR);
213-
GraphicsAllocation *pGfxAlloc = nullptr;
214-
if (gtpinHelper.canUseSharedAllocation(context.getDevice(0)->getHardwareInfo())) {
215-
auto allocData = reinterpret_cast<SvmAllocationData *>(kernelExecQueue[n].gtpinResource);
216-
pGfxAlloc = allocData->gpuAllocations.getGraphicsAllocation(pCommandStreamReceiver->getRootDeviceIndex());
217-
} else {
218-
cl_mem gtpinBuffer = kernelExecQueue[n].gtpinResource;
219-
auto pBuffer = castToObjectOrAbort<Buffer>(gtpinBuffer);
220-
pGfxAlloc = pBuffer->getGraphicsAllocation(pCommandStreamReceiver->getRootDeviceIndex());
221-
}
200+
cl_mem gtpinBuffer = kernelExecQueue[n].gtpinResource;
201+
auto pBuffer = castToObjectOrAbort<Buffer>(gtpinBuffer);
202+
GraphicsAllocation *pGfxAlloc = pBuffer->getGraphicsAllocation(pCommandStreamReceiver->getRootDeviceIndex());
222203
pCommandStreamReceiver->makeResident(*pGfxAlloc);
223204
kernelExecQueue[n].isResourceResident = true;
224205
break;
@@ -229,7 +210,7 @@ void gtpinNotifyMakeResident(void *pKernel, void *pCSR) {
229210

230211
void gtpinNotifyUpdateResidencyList(void *pKernel, void *pResVec) {
231212
if (isGTPinInitialized) {
232-
std::unique_lock<GTPinLockType> lock{kernelExecQueueLock};
213+
std::unique_lock<SpinLock> lock{kernelExecQueueLock};
233214
size_t numElems = kernelExecQueue.size();
234215
for (size_t n = 0; n < numElems; n++) {
235216
if ((kernelExecQueue[n].pKernel == pKernel) && !kernelExecQueue[n].isResourceResident && kernelExecQueue[n].gtpinResource) {

opencl/source/gtpin/gtpin_helpers.cpp

Lines changed: 21 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,8 @@
88
#include "gtpin_helpers.h"
99

1010
#include "shared/source/memory_manager/memory_manager.h"
11-
#include "shared/source/memory_manager/unified_memory_manager.h"
1211

13-
#include "opencl/source/api/api.h"
14-
#include "opencl/source/cl_device/cl_device.h"
1512
#include "opencl/source/context/context.h"
16-
#include "opencl/source/gtpin/gtpin_hw_helper.h"
1713
#include "opencl/source/helpers/validators.h"
1814
#include "opencl/source/mem_obj/buffer.h"
1915

@@ -31,39 +27,27 @@ GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinCreateBuffer(context_handle_t context
3127
return GTPIN_DI_ERROR_INVALID_ARGUMENT;
3228
}
3329
size_t size = alignUp(reqSize, MemoryConstants::cacheLineSize);
34-
GTPinHwHelper &gtpinHelper = GTPinHwHelper::get(pContext->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily);
35-
if (gtpinHelper.canUseSharedAllocation(pContext->getDevice(0)->getHardwareInfo())) {
36-
void *unfiedMemorySharedAllocation = clSharedMemAllocINTEL(pContext, pContext->getDevice(0), 0, size, 0, &diag);
37-
auto allocationsManager = pContext->getSVMAllocsManager();
38-
auto graphicsAllocation = allocationsManager->getSVMAlloc(unfiedMemorySharedAllocation);
39-
*pResource = (resource_handle_t)graphicsAllocation;
40-
} else {
41-
void *hostPtr = pContext->getMemoryManager()->allocateSystemMemory(size, MemoryConstants::pageSize);
42-
if (hostPtr == nullptr) {
43-
return GTPIN_DI_ERROR_ALLOCATION_FAILED;
44-
}
45-
cl_mem buffer = Buffer::create(pContext, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE | CL_MEM_FORCE_HOST_MEMORY_INTEL, size, hostPtr, diag);
46-
*pResource = (resource_handle_t)buffer;
30+
void *hostPtr = pContext->getMemoryManager()->allocateSystemMemory(size, MemoryConstants::pageSize);
31+
if (hostPtr == nullptr) {
32+
return GTPIN_DI_ERROR_ALLOCATION_FAILED;
4733
}
34+
cl_mem buffer = Buffer::create(pContext, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE | CL_MEM_FORCE_HOST_MEMORY_INTEL, size, hostPtr, diag);
35+
*pResource = (resource_handle_t)buffer;
4836
return GTPIN_DI_SUCCESS;
4937
}
5038

5139
GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinFreeBuffer(context_handle_t context, resource_handle_t resource) {
40+
cl_mem buffer = (cl_mem)resource;
5241
Context *pContext = castToObject<Context>((cl_context)context);
53-
if ((pContext == nullptr) || (resource == nullptr)) {
42+
if ((pContext == nullptr) || (buffer == nullptr)) {
5443
return GTPIN_DI_ERROR_INVALID_ARGUMENT;
5544
}
56-
if (pContext->getMemoryManager()->isLocalMemorySupported(pContext->getDevice(0)->getRootDeviceIndex())) {
57-
auto allocData = reinterpret_cast<SvmAllocationData *>(resource);
58-
clMemFreeINTEL(pContext, allocData->cpuAllocation->getUnderlyingBuffer());
59-
} else {
60-
auto pMemObj = castToObject<MemObj>(resource);
61-
if (pMemObj == nullptr) {
62-
return GTPIN_DI_ERROR_INVALID_ARGUMENT;
63-
}
64-
alignedFree(pMemObj->getHostPtr());
65-
pMemObj->release();
45+
auto pMemObj = castToObject<MemObj>(buffer);
46+
if (pMemObj == nullptr) {
47+
return GTPIN_DI_ERROR_INVALID_ARGUMENT;
6648
}
49+
alignedFree(pMemObj->getHostPtr());
50+
pMemObj->release();
6751
return GTPIN_DI_SUCCESS;
6852
}
6953

@@ -73,31 +57,23 @@ GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinMapBuffer(context_handle_t context, r
7357
if ((pContext == nullptr) || (buffer == nullptr) || (pAddress == nullptr)) {
7458
return GTPIN_DI_ERROR_INVALID_ARGUMENT;
7559
}
76-
GTPinHwHelper &gtpinHelper = GTPinHwHelper::get(pContext->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily);
77-
if (gtpinHelper.canUseSharedAllocation(pContext->getDevice(0)->getHardwareInfo())) {
78-
auto allocData = reinterpret_cast<SvmAllocationData *>(resource);
79-
*pAddress = reinterpret_cast<uint8_t *>(allocData->cpuAllocation->getUnderlyingBuffer());
80-
} else {
81-
auto pMemObj = castToObject<MemObj>(buffer);
82-
if (pMemObj == nullptr) {
83-
return GTPIN_DI_ERROR_INVALID_ARGUMENT;
84-
}
85-
*pAddress = reinterpret_cast<uint8_t *>(pMemObj->getHostPtr());
60+
auto pMemObj = castToObject<MemObj>(buffer);
61+
if (pMemObj == nullptr) {
62+
return GTPIN_DI_ERROR_INVALID_ARGUMENT;
8663
}
64+
*pAddress = (uint8_t *)pMemObj->getHostPtr();
8765
return GTPIN_DI_SUCCESS;
8866
}
8967

9068
GTPIN_DI_STATUS GTPIN_DRIVER_CALLCONV gtpinUnmapBuffer(context_handle_t context, resource_handle_t resource) {
69+
cl_mem buffer = (cl_mem)resource;
9170
Context *pContext = castToObject<Context>((cl_context)context);
92-
if ((pContext == nullptr) || (resource == nullptr)) {
71+
if ((pContext == nullptr) || (buffer == nullptr)) {
9372
return GTPIN_DI_ERROR_INVALID_ARGUMENT;
9473
}
95-
GTPinHwHelper &gtpinHelper = GTPinHwHelper::get(pContext->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily);
96-
if (!gtpinHelper.canUseSharedAllocation(pContext->getDevice(0)->getHardwareInfo())) {
97-
auto pMemObj = castToObject<MemObj>(resource);
98-
if (pMemObj == nullptr) {
99-
return GTPIN_DI_ERROR_INVALID_ARGUMENT;
100-
}
74+
auto pMemObj = castToObject<MemObj>(buffer);
75+
if (pMemObj == nullptr) {
76+
return GTPIN_DI_ERROR_INVALID_ARGUMENT;
10177
}
10278
return GTPIN_DI_SUCCESS;
10379
}

opencl/source/gtpin/gtpin_hw_helper.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ class GTPinHwHelper {
1717
virtual uint32_t getGenVersion() = 0;
1818
virtual bool addSurfaceState(Kernel *pKernel) = 0;
1919
virtual void *getSurfaceState(Kernel *pKernel, size_t bti) = 0;
20-
virtual bool canUseSharedAllocation(const HardwareInfo &hwInfo) const = 0;
2120

2221
protected:
2322
GTPinHwHelper(){};
@@ -33,9 +32,8 @@ class GTPinHwHelperHw : public GTPinHwHelper {
3332
uint32_t getGenVersion() override;
3433
bool addSurfaceState(Kernel *pKernel) override;
3534
void *getSurfaceState(Kernel *pKernel, size_t bti) override;
36-
bool canUseSharedAllocation(const HardwareInfo &hwInfo) const override;
3735

38-
protected:
36+
private:
3937
GTPinHwHelperHw(){};
4038
};
4139
} // namespace NEO

opencl/source/gtpin/gtpin_hw_helper.inl

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -56,14 +56,4 @@ void *GTPinHwHelperHw<GfxFamily>::getSurfaceState(Kernel *pKernel, size_t bti) {
5656
return pSurfaceState;
5757
}
5858

59-
template <typename GfxFamily>
60-
bool GTPinHwHelperHw<GfxFamily>::canUseSharedAllocation(const HardwareInfo &hwInfo) const {
61-
bool canUseSharedAllocation = false;
62-
if (DebugManager.flags.GTPinAllocateBufferInSharedMemory.get() != -1) {
63-
canUseSharedAllocation = !!DebugManager.flags.GTPinAllocateBufferInSharedMemory.get();
64-
}
65-
canUseSharedAllocation &= hwInfo.capabilityTable.ftrSvm;
66-
return canUseSharedAllocation;
67-
}
68-
6959
} // namespace NEO

0 commit comments

Comments
 (0)