@@ -3135,18 +3135,24 @@ cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue commandQueue,
31353135 " event" , DebugManager.getEvents (reinterpret_cast <const uintptr_t *>(event), 1 ));
31363136
31373137 CommandQueue *pCommandQueue = nullptr ;
3138+ Kernel *pKernel = nullptr ;
31383139
31393140 retVal = validateObjects (
31403141 WithCastToInternal (commandQueue, &pCommandQueue),
3141- kernel,
3142+ WithCastToInternal ( kernel, &pKernel) ,
31423143 EventWaitList (numEventsInWaitList, eventWaitList));
31433144
31443145 if (CL_SUCCESS != retVal) {
31453146 TRACING_EXIT (clEnqueueNDRangeKernel, &retVal);
31463147 return retVal;
31473148 }
31483149
3149- auto pKernel = castToObjectOrAbort<Kernel>(kernel);
3150+ if (pKernel->getKernelInfo ().patchInfo .pAllocateSyncBuffer != nullptr ) {
3151+ retVal = CL_INVALID_KERNEL;
3152+ TRACING_EXIT (clEnqueueNDRangeKernel, &retVal);
3153+ return retVal;
3154+ }
3155+
31503156 TakeOwnershipWrapper<Kernel> kernelOwnership (*pKernel, gtpinIsGTPinInitialized ());
31513157 if (gtpinIsGTPinInitialized ()) {
31523158 gtpinNotifyKernelSubmit (kernel, pCommandQueue);
@@ -3947,6 +3953,7 @@ void *CL_API_CALL clGetExtensionFunctionAddress(const char *funcName) {
39473953 RETURN_FUNC_PTR_IF_EXIST (clGetDeviceFunctionPointerINTEL);
39483954 RETURN_FUNC_PTR_IF_EXIST (clGetDeviceGlobalVariablePointerINTEL);
39493955 RETURN_FUNC_PTR_IF_EXIST (clGetExecutionInfoINTEL);
3956+ RETURN_FUNC_PTR_IF_EXIST (clEnqueueNDRangeKernelINTEL);
39503957
39513958 void *ret = sharingFactory.getExtensionFunctionAddress (funcName);
39523959 if (ret != nullptr ) {
@@ -5197,3 +5204,70 @@ cl_int CL_API_CALL clGetExecutionInfoINTEL(cl_command_queue commandQueue,
51975204
51985205 return retVal;
51995206}
5207+
5208+ cl_int CL_API_CALL clEnqueueNDRangeKernelINTEL (cl_command_queue commandQueue,
5209+ cl_kernel kernel,
5210+ cl_uint workDim,
5211+ const size_t *globalWorkOffset,
5212+ const size_t *workgroupCount,
5213+ const size_t *localWorkSize,
5214+ cl_uint numEventsInWaitList,
5215+ const cl_event *eventWaitList,
5216+ cl_event *event) {
5217+ cl_int retVal = CL_SUCCESS;
5218+ API_ENTER (&retVal);
5219+ DBG_LOG_INPUTS (" commandQueue" , commandQueue, " cl_kernel" , kernel,
5220+ " globalWorkOffset[0]" , DebugManager.getInput (globalWorkOffset, 0 ),
5221+ " globalWorkOffset[1]" , DebugManager.getInput (globalWorkOffset, 1 ),
5222+ " globalWorkOffset[2]" , DebugManager.getInput (globalWorkOffset, 2 ),
5223+ " workgroupCount" , DebugManager.getSizes (workgroupCount, workDim, false ),
5224+ " localWorkSize" , DebugManager.getSizes (localWorkSize, workDim, true ),
5225+ " numEventsInWaitList" , numEventsInWaitList,
5226+ " eventWaitList" , DebugManager.getEvents (reinterpret_cast <const uintptr_t *>(eventWaitList), numEventsInWaitList),
5227+ " event" , DebugManager.getEvents (reinterpret_cast <const uintptr_t *>(event), 1 ));
5228+
5229+ CommandQueue *pCommandQueue = nullptr ;
5230+ Kernel *pKernel = nullptr ;
5231+
5232+ retVal = validateObjects (
5233+ WithCastToInternal (commandQueue, &pCommandQueue),
5234+ WithCastToInternal (kernel, &pKernel),
5235+ EventWaitList (numEventsInWaitList, eventWaitList));
5236+
5237+ if (CL_SUCCESS != retVal) {
5238+ return retVal;
5239+ }
5240+
5241+ size_t globalWorkSize[3 ];
5242+ size_t requestedNumberOfWorkgroups = 1 ;
5243+ for (size_t i = 0 ; i < workDim; i++) {
5244+ globalWorkSize[i] = workgroupCount[i] * localWorkSize[i];
5245+ requestedNumberOfWorkgroups *= workgroupCount[i];
5246+ }
5247+
5248+ size_t maximalNumberOfWorkgroupsAllowed = pKernel->getMaxWorkGroupCount (workDim, localWorkSize);
5249+ if (requestedNumberOfWorkgroups > maximalNumberOfWorkgroupsAllowed) {
5250+ retVal = CL_INVALID_VALUE;
5251+ return retVal;
5252+ }
5253+
5254+ TakeOwnershipWrapper<Kernel> kernelOwnership (*pKernel, gtpinIsGTPinInitialized ());
5255+ if (gtpinIsGTPinInitialized ()) {
5256+ gtpinNotifyKernelSubmit (kernel, pCommandQueue);
5257+ }
5258+
5259+ pCommandQueue->getDevice ().allocateSyncBufferHandler ();
5260+
5261+ retVal = pCommandQueue->enqueueKernel (
5262+ kernel,
5263+ workDim,
5264+ globalWorkOffset,
5265+ globalWorkSize,
5266+ localWorkSize,
5267+ numEventsInWaitList,
5268+ eventWaitList,
5269+ event);
5270+
5271+ DBG_LOG_INPUTS (" event" , DebugManager.getEvents (reinterpret_cast <const uintptr_t *>(event), 1u ));
5272+ return retVal;
5273+ }
0 commit comments