@@ -25,6 +25,13 @@ def makeOpenCLKernelString(kernel):
2525 kStr += "/* %s */" % kernel .getName ()
2626 kStr += endLine
2727
28+ ####################################
29+ # Double precision pragma
30+ prec = kernel .getName ()[0 ].lower ()
31+ if prec == "d" or prec == "z" :
32+ kStr += endLine
33+ kStr += "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" + endLine
34+
2835 ####################################
2936 # kernel parameters
3037 kStr += endLine
@@ -168,14 +175,14 @@ def makeOpenCLKernelString(kernel):
168175 kStr += endLine
169176 kStr += "/* %dx%d micro-tile */%s" % (kernel .microTileNumRows , kernel .microTileNumCols , endLine )
170177 kStr += "#define MICRO_TILE \\ \\ " + endLine
171- for a in range (0 , kernel .microTileNumRows ):
178+ for a in range (0 , int ( kernel .microTileNumRows ) ):
172179 kStr += " rA[%d] = localA[offA + %d*WG_NUM_ROWS]; \\ \\ %s" % (a , a , endLine )
173- for b in range (0 , kernel .microTileNumCols ):
180+ for b in range (0 , int ( kernel .microTileNumCols ) ):
174181 kStr += " rB[%d] = localB[offB + %d*WG_NUM_COLS]; \\ \\ %s" % (b , b , endLine )
175182 kStr += " offA += (MACRO_TILE_NUM_ROWS+LOCAL_COL_PAD); \\ \\ " + endLine
176183 kStr += " offB += (MACRO_TILE_NUM_COLS+LOCAL_ROW_PAD); \\ \\ " + endLine
177- for a in range (0 , kernel .microTileNumRows ):
178- for b in range (0 , kernel .microTileNumCols ):
184+ for a in range (0 , int ( kernel .microTileNumRows ) ):
185+ for b in range (0 , int ( kernel .microTileNumCols ) ):
179186 kStr += " TYPE_MAD(rA[%d],rB[%d],rC[%d][%d]); \\ \\ %s" % (a , b , a , b , endLine )
180187 kStr += " mem_fence(CLK_LOCAL_MEM_FENCE);" + endLine
181188 kStr += endLine
@@ -365,7 +372,7 @@ def makeOpenCLKernelString(kernel):
365372 zeroString = "(double2)(0.0, 0.0)"
366373 else :
367374 zeroString = "0.0"
368- for a in range (0 , numALoads ):
375+ for a in range (0 , int ( numALoads ) ):
369376 kStr += " lA[ %d*localAStride ] = " % a
370377 if kernel .isRowKernel ():
371378 kStr += "( globalARow(%d) >= M) ? %s : " % ( a , zeroString )
@@ -378,7 +385,7 @@ def makeOpenCLKernelString(kernel):
378385 kStr += "A[ GET_GLOBAL_INDEX_A( globalARow(%d), globalACol(%d) ) ];%s" % (numALoads , numALoads , endLine )
379386 kStr += " }" + endLine
380387
381- for b in range (0 , numBLoads ):
388+ for b in range (0 , int ( numBLoads ) ):
382389 kStr += " lB[ %d*localBStride ] = " % b
383390 if kernel .isColKernel ():
384391 kStr += "( globalBCol(%d) >= N) ? %s : " % ( b , zeroString )
@@ -399,7 +406,7 @@ def makeOpenCLKernelString(kernel):
399406 # do mads
400407 kStr += endLine
401408 kStr += " /* do mads */" + endLine
402- for u in range (0 , kernel .unroll ):
409+ for u in range (0 , int ( kernel .unroll ) ):
403410 kStr += " MICRO_TILE" + endLine
404411
405412 ####################################
@@ -437,8 +444,8 @@ def makeOpenCLKernelString(kernel):
437444 if kernel .precision == "z" :
438445 kStr += " double type_mad_tmp;" + endLine
439446
440- for a in range (0 , kernel .microTileNumRows ):
441- for b in range (0 , kernel .microTileNumCols ):
447+ for a in range (0 , int ( kernel .microTileNumRows ) ):
448+ for b in range (0 , int ( kernel .microTileNumCols ) ):
442449 if kernel .isRowKernel ():
443450 kStr += " if (globalCRow+%d*WG_NUM_ROWS < M)" % a
444451 if kernel .isColKernel ():
@@ -534,7 +541,7 @@ def writeOpenCLKernels():
534541 cornerKernel .macroTileNumCols = 1
535542 writeOpenCLKernelToFile (cornerKernel )
536543 numKernels += 4
537- print "AutoGemm.py: generated %d kernels" % numKernels
544+ print ( "AutoGemm.py: generated %d kernels" % numKernels )
538545
539546
540547
@@ -583,5 +590,4 @@ def writeOpenCLKernels():
583590
584591 kernelName = kernel .getName ()
585592 kernelFileName = Common .getKernelSourcePath () + kernelName + "_src.cpp"
586- print "kernel \" %s\" written to %s" % (kernelName , kernelFileName )
587-
593+ print ("kernel \" %s\" written to %s" % (kernelName , kernelFileName ))
0 commit comments