@@ -1013,12 +1013,15 @@ void
10131013xGemm<cl_double>::
10141014xGemm_Function (bool flush, cl_uint apiCallCount )
10151015{
1016- clblasDgemm (order_, buffer_.trans_a_ , buffer_.trans_b_ ,
1016+ for (int i = 0 ; i < apiCallCount; i++)
1017+ {
1018+ clblasDgemm (order_, buffer_.trans_a_ , buffer_.trans_b_ ,
10171019 buffer_.m_ , buffer_.n_ , buffer_.k_ , buffer_.alpha_ ,
10181020 buffer_.buf_a_ , buffer_.offA_ , buffer_.lda_ ,
10191021 buffer_.buf_b_ , buffer_.offB_ , buffer_.ldb_ ,
10201022 buffer_.beta_ , buffer_.buf_c_ , buffer_.offC_ ,
10211023 buffer_.ldc_ , 1 , &queue_, 0 , NULL , &event_);
1024+ }
10221025 // flush==true if only the kernel time (library call) is timed
10231026 // flush==false if memory time is also timed
10241027 if (flush==true )
@@ -1032,12 +1035,15 @@ void
10321035xGemm<cl_float2>::
10331036xGemm_Function (bool flush, cl_uint apiCallCount )
10341037{
1035- clblasCgemm (order_, buffer_.trans_a_ , buffer_.trans_b_ ,
1038+ for (int i = 0 ; i < apiCallCount; i++)
1039+ {
1040+ clblasCgemm (order_, buffer_.trans_a_ , buffer_.trans_b_ ,
10361041 buffer_.m_ , buffer_.n_ , buffer_.k_ , buffer_.alpha_ ,
10371042 buffer_.buf_a_ , buffer_.offA_ , buffer_.lda_ ,
10381043 buffer_.buf_b_ , buffer_.offB_ , buffer_.ldb_ ,
10391044 buffer_.beta_ , buffer_.buf_c_ , buffer_.offC_ ,
10401045 buffer_.ldc_ , 1 , &queue_, 0 , NULL , &event_);
1046+ }
10411047 // flush==true if only the kernel time (library call) is timed
10421048 // flush==false if memory time is also timed
10431049 if (flush==true )
@@ -1051,12 +1057,15 @@ void
10511057xGemm<cl_double2>::
10521058xGemm_Function (bool flush, cl_uint apiCallCount )
10531059{
1054- clblasZgemm (order_, buffer_.trans_a_ , buffer_.trans_b_ ,
1060+ for (int i = 0 ; i < apiCallCount; i++)
1061+ {
1062+ clblasZgemm (order_, buffer_.trans_a_ , buffer_.trans_b_ ,
10551063 buffer_.m_ , buffer_.n_ , buffer_.k_ , buffer_.alpha_ ,
10561064 buffer_.buf_a_ , buffer_.offA_ , buffer_.lda_ ,
10571065 buffer_.buf_b_ , buffer_.offB_ , buffer_.ldb_ ,
10581066 buffer_.beta_ , buffer_.buf_c_ , buffer_.offC_ ,
10591067 buffer_.ldc_ , 1 , &queue_, 0 , NULL , &event_);
1068+ }
10601069 // flush==true if only the kernel time (library call) is timed
10611070 // flush==false if memory time is also timed
10621071 if (flush==true )
@@ -1070,15 +1079,15 @@ double
10701079xGemm<cl_float2>::
10711080gflops ()
10721081{
1073- return (8.0 *buffer_.m_ *buffer_.n_ *buffer_.k_ )/time_in_ns ();
1082+ return (8.0 *buffer_.m_ *buffer_.n_ *buffer_.k_ )/( time_in_ns () / buffer_. apiCallCount );
10741083}
10751084
10761085template <>
10771086double
10781087xGemm<cl_double2>::
10791088gflops ()
10801089{
1081- return (8.0 *buffer_.m_ *buffer_.n_ *buffer_.k_ )/time_in_ns ();
1090+ return (8.0 *buffer_.m_ *buffer_.n_ *buffer_.k_ )/( time_in_ns () / buffer_. apiCallCount );
10821091}
10831092
10841093template <>
0 commit comments