@@ -158,7 +158,7 @@ bool operator<(const kernel_map_key & l, const kernel_map_key & r) {
158158 *****************************************************************************/
159159// FIXME: This function should be returning an error.
160160void makeGemmKernel (
161- cl_kernel *clKernel, // ignored as input; returns as output
161+ cl_kernel *clKernel, // ignored as input; returns as output only
162162 cl_command_queue clQueue,
163163 const char *kernelSource,
164164 const char *sourceBuildOptions,
@@ -461,10 +461,10 @@ clblasGemm(
461461 size_t *colKernelBinarySize = 0 ;
462462 size_t *cornerKernelBinarySize = 0 ;
463463 const char *binaryBuildOptions = NULL ;
464- cl_kernel *tileClKernel = NULL ;
465- cl_kernel *rowClKernel = NULL ;
466- cl_kernel *colClKernel = NULL ;
467- cl_kernel *cornerClKernel = NULL ;
464+ cl_kernel *tileClKernelDummy = NULL ; // no longer used; broke thread safety
465+ cl_kernel *rowClKernelDummy = NULL ; // no longer used; broke thread safety
466+ cl_kernel *colClKernelDummy = NULL ; // no longer used; broke thread safety
467+ cl_kernel *cornerClKernelDummy = NULL ; // no longer used; broke thread safety
468468 unsigned int workGroupNumRows;
469469 unsigned int workGroupNumCols;
470470 unsigned int microTileNumRows;
@@ -489,10 +489,10 @@ clblasGemm(
489489 &colKernelBinarySize,
490490 &cornerKernelBinarySize,
491491 &binaryBuildOptions,
492- &tileClKernel ,
493- &rowClKernel ,
494- &colClKernel ,
495- &cornerClKernel ,
492+ &tileClKernelDummy ,
493+ &rowClKernelDummy ,
494+ &colClKernelDummy ,
495+ &cornerClKernelDummy ,
496496 &workGroupNumRows,
497497 &workGroupNumCols,
498498 µTileNumRows,
@@ -530,10 +530,10 @@ clblasGemm(
530530 &colKernelBinarySize,
531531 &cornerKernelBinarySize,
532532 &binaryBuildOptions,
533- &tileClKernel ,
534- &rowClKernel ,
535- &colClKernel ,
536- &cornerClKernel ,
533+ &tileClKernelDummy ,
534+ &rowClKernelDummy ,
535+ &colClKernelDummy ,
536+ &cornerClKernelDummy ,
537537 &workGroupNumRows,
538538 &workGroupNumCols,
539539 µTileNumRows,
@@ -567,14 +567,15 @@ clblasGemm(
567567 * Build kernels
568568 *****************************************************************************/
569569
570- tileClKernel = NULL ;
571- rowClKernel = NULL ;
572- colClKernel = NULL ;
573- cornerClKernel = NULL ;
574- if (needTileKernel) makeGemmKernel ( tileClKernel, commandQueues[0 ], tileKernelSource, sourceBuildOptions, &tileKernelBinary, tileKernelBinarySize, binaryBuildOptions);
575- if (needRowKernel) makeGemmKernel ( rowClKernel, commandQueues[0 ], rowKernelSource, sourceBuildOptions, &rowKernelBinary, rowKernelBinarySize, binaryBuildOptions);
576- if (needColKernel) makeGemmKernel ( colClKernel, commandQueues[0 ], colKernelSource, sourceBuildOptions, &colKernelBinary, colKernelBinarySize, binaryBuildOptions);
577- if (needCornerKernel) makeGemmKernel (cornerClKernel, commandQueues[0 ], cornerKernelSource, sourceBuildOptions, &cornerKernelBinary, cornerKernelBinarySize, binaryBuildOptions);
570+
571+ cl_kernel tileClKernel = NULL ;
572+ cl_kernel rowClKernel = NULL ;
573+ cl_kernel colClKernel = NULL ;
574+ cl_kernel cornerClKernel = NULL ;
575+ if (needTileKernel) makeGemmKernel ( &tileClKernel, commandQueues[0 ], tileKernelSource, sourceBuildOptions, &tileKernelBinary, tileKernelBinarySize, binaryBuildOptions);
576+ if (needRowKernel) makeGemmKernel ( &rowClKernel, commandQueues[0 ], rowKernelSource, sourceBuildOptions, &rowKernelBinary, rowKernelBinarySize, binaryBuildOptions);
577+ if (needColKernel) makeGemmKernel ( &colClKernel, commandQueues[0 ], colKernelSource, sourceBuildOptions, &colKernelBinary, colKernelBinarySize, binaryBuildOptions);
578+ if (needCornerKernel) makeGemmKernel (&cornerClKernel, commandQueues[0 ], cornerKernelSource, sourceBuildOptions, &cornerKernelBinary, cornerKernelBinarySize, binaryBuildOptions);
578579 const size_t localWorkSize[2 ] = { workGroupNumRows, workGroupNumCols };
579580 unsigned int numKernelsEnqueued = 0 ;
580581
@@ -603,7 +604,7 @@ clblasGemm(
603604 if (needTileKernel) {
604605 // printf("enqueueing tile kernel\n");
605606 size_t globalWorkSize[2 ] = {(M/macroTileNumRows)*workGroupNumRows, (N/macroTileNumCols)*workGroupNumCols };
606- err = enqueueGemmKernel ( commandQueues[numKernelsEnqueued%numCommandQueues], * tileClKernel,
607+ err = enqueueGemmKernel ( commandQueues[numKernelsEnqueued%numCommandQueues], tileClKernel,
607608 gemmKernelArgs, gemmKernelArgSizes, numGemmKernelArgs,
608609 globalWorkSize, localWorkSize,
609610 numEventsInWaitList, eventWaitList,
@@ -618,7 +619,7 @@ clblasGemm(
618619 if (needRowKernel) {
619620 // printf("enqueueing row kernel\n");
620621 size_t globalWorkSize[2 ] = {1 *workGroupNumRows, (N/macroTileNumCols)*workGroupNumCols };
621- err = enqueueGemmKernel ( commandQueues[numKernelsEnqueued%numCommandQueues], * rowClKernel,
622+ err = enqueueGemmKernel ( commandQueues[numKernelsEnqueued%numCommandQueues], rowClKernel,
622623 gemmKernelArgs, gemmKernelArgSizes, numGemmKernelArgs,
623624 globalWorkSize, localWorkSize,
624625 numEventsInWaitList, eventWaitList,
@@ -633,7 +634,7 @@ clblasGemm(
633634 if (needColKernel) {
634635 // printf("enqueueing col kernel\n");
635636 size_t globalWorkSize[2 ] = { (M/macroTileNumRows)*workGroupNumRows, 1 *workGroupNumCols };
636- err = enqueueGemmKernel ( commandQueues[numKernelsEnqueued%numCommandQueues], * colClKernel,
637+ err = enqueueGemmKernel ( commandQueues[numKernelsEnqueued%numCommandQueues], colClKernel,
637638 gemmKernelArgs, gemmKernelArgSizes, numGemmKernelArgs,
638639 globalWorkSize, localWorkSize,
639640 numEventsInWaitList, eventWaitList,
@@ -648,7 +649,7 @@ clblasGemm(
648649 if (needCornerKernel) {
649650 // printf("enqueueing corner kernel\n");
650651 size_t globalWorkSize[2 ] = { 1 *workGroupNumRows, 1 *workGroupNumCols };
651- err = enqueueGemmKernel ( commandQueues[numKernelsEnqueued%numCommandQueues], * cornerClKernel,
652+ err = enqueueGemmKernel ( commandQueues[numKernelsEnqueued%numCommandQueues], cornerClKernel,
652653 gemmKernelArgs, gemmKernelArgSizes, numGemmKernelArgs,
653654 globalWorkSize, localWorkSize,
654655 numEventsInWaitList, eventWaitList,
0 commit comments