@@ -459,25 +459,67 @@ class MatMulBias : public TestMapper {
459459 public:
460460 std::string emitCode (
461461 const std::unordered_map<std::string, size_t >& parameters,
462- const std::vector< size_t >& tileSizes ) {
462+ const MappingOptions& mappingOptions ) {
463463 std::string tc = R"TC(
464464def fun(float(N,K) A, float(K,M) B, float(N,M) C) -> (O) {
465465 O(i,j) +=! A(i,k) * B(k,j)
466466 O(i,j) = O(i,j) + C(i,j)
467467}
468468)TC" ;
469469
470- auto mappingOptions = MappingOptions::makeNaiveMappingOptions ()
471- .tile (tileSizes)
472- .useSharedMemory (false )
473- .usePrivateMemory (true );
474470 auto mscop = makeMappedScop (tc, mappingOptions, parameters);
475471 return std::get<0 >(mscop->codegen (" fun" ));
476472 }
477473};
478474
479475TEST_F (MatMulBias, RegisterPromotion) {
480- emitCode ({{" N" , 42 }, {" M" , 56 }, {" K" , 37 }}, {32 , 32 , 32 });
476+ auto mappingOptions = MappingOptions::makeNaiveMappingOptions ()
477+ .tile ({32 , 32 , 32 })
478+ .useSharedMemory (false )
479+ .usePrivateMemory (true );
480+
481+ auto code = emitCode ({{" N" , 42 }, {" M" , 56 }, {" K" , 37 }}, mappingOptions);
482+ auto declPos = code.find (" float32 _O_0" );
483+ auto copyToPos =
484+ code.find (" _O_0[0][0] = O[32*b0 + c3][t0 + 32*b1]" , declPos + 1 );
485+ auto copyFromPos =
486+ code.find (" O[32*b0 + c3][t0 + 32*b1] = _O_0[0][0]" , copyToPos + 1 );
487+
488+ auto originalAccPos = code.find (" O[32*b0 + c3][t0 + 32*b1]" , copyToPos + 1 );
489+ auto cDeclPos = code.find (" float32 _C_0" );
490+ auto aDeclPos = code.find (" float32 _A_0" );
491+
492+ EXPECT_TRUE (declPos != std::string::npos) << " no declaration of the register" ;
493+ EXPECT_TRUE (copyToPos != std::string::npos) << " expected copy to register" ;
494+ EXPECT_TRUE (copyFromPos != std::string::npos)
495+ << " expected copy from register" ;
496+
497+ EXPECT_NE (originalAccPos, copyFromPos)
498+ << " global array reference is used in main computation" ;
499+ EXPECT_TRUE (cDeclPos == std::string::npos)
500+ << " tensor C promoted to register but has no reuse" ;
501+ EXPECT_TRUE (aDeclPos == std::string::npos)
502+ << " tensor A promoted to register but has elements accessed by multiple threads" ;
503+ }
504+
505+ TEST_F (MatMulBias, RegisterPromotionSharedPreference) {
506+ auto mappingOptions = MappingOptions::makeNaiveMappingOptions ()
507+ .tile ({32 , 32 , 32 })
508+ .maxSharedMemory (32768 )
509+ .useSharedMemory (true )
510+ .usePrivateMemory (true );
511+
512+ auto code = emitCode ({{" N" , 42 }, {" M" , 56 }, {" K" , 37 }}, mappingOptions);
513+ auto declPos = code.find (" float32 _O_0[1][1]" );
514+ auto cDeclPos = code.find (" float32 _C_0[1][1]" );
515+ auto aDeclPos = code.find (" float32 _A_0[1][1]" );
516+
517+ EXPECT_TRUE (declPos == std::string::npos)
518+ << " not expected promotion to register because promoted to shared" ;
519+ EXPECT_TRUE (cDeclPos == std::string::npos)
520+ << " tensor C promoted to register but has no reuse" ;
521+ EXPECT_TRUE (aDeclPos == std::string::npos)
522+ << " tensor A promoted to register but has elements accessed by multiple threads" ;
481523}
482524
483525int main (int argc, char ** argv) {
0 commit comments