@@ -1049,8 +1049,9 @@ std::unique_ptr<MappedScop> MappedScop::makeWithOuterBlockInnerThreadStrategy(
10491049 LOG_IF (INFO, FLAGS_debug_tc_mapper) << " After mapping to blocks:" << std::endl
10501050 << *mappedScop->schedule ();
10511051
1052- // 8. Promote to shared memory below the loops mapped to blocks.
1053- // This may split the outer band, so find the new outer band after promotion.
1052+ // 8. Promote to shared memory.
1053+ // If shared promotion depth is specified in the mapping options, use the
1054+ // specified value. Otherwise, promote below the loops mapped to blocks.
10541055 if (cudaOptions.proto ().use_shared_memory ()) {
10551056 size_t sharedMemorySize = cudaOptions.proto ().has_max_shared_memory ()
10561057 ? cudaOptions.proto ().max_shared_memory ()
@@ -1069,29 +1070,24 @@ std::unique_ptr<MappedScop> MappedScop::makeWithOuterBlockInnerThreadStrategy(
10691070 sharedMemorySize -= reductionMemoryRequirement;
10701071 }
10711072
1072- auto band = outerBand->as <ScheduleTreeBand>();
1073- LOG_IF (WARNING, FLAGS_debug_tc_mapper && band->nMember () == 0 )
1074- << " Aborting memory promotion because outer band has 0 members (NYI)" ;
1075- if (band->nMember () > 0 && sharedMemorySize > 0 ) {
1073+ if (sharedMemorySize > 0 ) {
10761074 LOG_IF (
10771075 WARNING,
10781076 cudaOptions.proto ().unroll_copy_shared () &&
10791077 !generic.proto .has_unroll ())
10801078 << " requested to unroll copies to shared memory without providing the unroll size" ;
10811079
1082- promoteGreedilyAtDepth (
1080+ auto depth = cudaOptions.proto ().has_shared_depth ()
1081+ ? cudaOptions.proto ().shared_depth ()
1082+ : std::min (
1083+ outerBand->as <ScheduleTreeBand>()->nOuterCoincident (),
1084+ mappedScop->numBlocks .view .size ());
1085+ promoteToSharedAtDepth (
10831086 *mappedScop,
1084- std::min (band-> nOuterCoincident (), mappedScop-> numBlocks . view . size ()) ,
1087+ depth ,
10851088 sharedMemorySize,
10861089 cudaOptions.proto ().unroll_copy_shared () &&
10871090 generic.proto .has_unroll ());
1088-
1089- auto bands = ScheduleTree::collectDFSPreorder (
1090- scop->scheduleRoot (), ScheduleTreeType::Band);
1091- if (bands.size () == 0 ) { // Sanity check.
1092- throw NoBandsException (" no bands after promotion" );
1093- }
1094- outerBand = bands[0 ];
10951091 }
10961092 }
10971093
0 commit comments