2121
2222#include <string.h>
2323#include <stdio.h>
24+ #include <stdlib.h>
2425#include <assert.h>
2526
2627#include <clBLAS.h>
@@ -1219,10 +1220,11 @@ genUpdateGenericDiagTile(
12191220 // type of the vectorized coordinates
12201221 Kstring vctype ;
12211222 Kstring constOffs , constShifts , constMasks ;
1222- unsigned int i , j , nops ;
1223+ unsigned int i , j , nops , size ;
12231224 unsigned int maxFetches = 0 ;
12241225 const char * yname , * xname ;
12251226 const char * ldcName ;
1227+ char hexadec [1 ];
12261228
12271229 batch = createStmtBatch ();
12281230 if (batch == NULL ) {
@@ -1253,6 +1255,14 @@ genUpdateGenericDiagTile(
12531255 tifl = (isUpper ) ? TILE_ITER_BACKWARD_ROWS :
12541256 TILE_ITER_BACKWARD_COLS ;
12551257 iterInit (& iter , & tileTempC , 1 , tifl );
1258+ nops = 0 ;
1259+ while (!iterIsEnd (& iter )) {
1260+ nops ++ ;
1261+ size = nops / nrCols ;
1262+ iterIterate (& iter );
1263+ }
1264+
1265+ iterInit (& iter , & tileTempC , 1 , tifl );
12561266
12571267 initTmpResTile (& tileTempC , gset , true);
12581268
@@ -1316,7 +1326,7 @@ genUpdateGenericDiagTile(
13161326 maxFetches = umin (maxFetches , i );
13171327
13181328 // declare vectorized coordinates
1319- declareDiagUpresIndexedVars (ctx , vctype .buf , "cc" , tempRows );
1329+ declareDiagUpresIndexedVars (ctx , vctype .buf , "cc" , size );
13201330
13211331 /*
13221332 * real y coordinate, offset mask and
@@ -1326,8 +1336,8 @@ genUpdateGenericDiagTile(
13261336 "unsigned int mask;\n"
13271337 "int hit;\n" );
13281338 if (withBeta ) {
1329- declareDiagUpresIndexedVars (ctx , typeName , "alphaNew" , tempRows );
1330- declareDiagUpresIndexedVars (ctx , typeName , "betaNew" , tempRows );
1339+ declareDiagUpresIndexedVars (ctx , typeName , "alphaNew" , size );
1340+ declareDiagUpresIndexedVars (ctx , typeName , "betaNew" , size );
13311341 }
13321342
13331343 // declare tile
@@ -1443,7 +1453,8 @@ genUpdateGenericDiagTile(
14431453 ksprintf (& kstr , "cc%u" , i );
14441454 }
14451455 else {
1446- ksprintf (& kstr , "cc%u.s%u" , i , iter .col );
1456+ itoa (iter .col , hexadec , 16 );
1457+ ksprintf (& kstr , "cc%u.s%s" , i , hexadec );
14471458 }
14481459
14491460 // prepare multipliers and fetch
0 commit comments