@@ -210,25 +210,19 @@ isl::map fixOuterInputDimsAsParameters(isl::map map, int nDims) {
210210}
211211
212212/*
213- * Check if a reference group features reuse at "depth" after applying
214- * "schedule". In particular, consider first depth schedule dimensions as fixed
215- * by equating them to parameters and check if the resulting relation is not
216- * injective.
213+ * Check if a reference group features reuse within the "outer" schedule.
214+ * In particular, check that for some given point in the outer schedule and
215+ * some given group element, there is more than one statement instance
216+ * accessing the element within the point in the outer schedule.
217+ * In other words, check that the mapping from statement instances
218+ * to pairs of outer schedule points and group elements is not injective.
217219 */
218- bool hasReuse (
220+ bool hasReuseWithin (
219221 const TensorReferenceGroup& group,
220- isl::union_map schedule,
221- size_t depth) {
222- auto scheduledAccessesUMap = group.originalAccesses ().apply_domain (schedule);
223- auto scheduledAccessMaps =
224- isl::UnionAsVector<isl::union_map>(scheduledAccessesUMap);
225- return std::any_of (
226- scheduledAccessMaps.begin (),
227- scheduledAccessMaps.end (),
228- [schedule, depth](isl::map access) {
229- access = fixOuterInputDimsAsParameters (access, static_cast <int >(depth));
230- return !access.is_injective ();
231- });
222+ isl::multi_union_pw_aff outer) {
223+ auto map = isl::union_map::from (outer);
224+ map = map.range_product (group.originalAccesses ());
225+ return !map.is_injective ();
232226}
233227
234228/*
@@ -463,6 +457,8 @@ void promoteToSharedGreedy(
463457 for (auto bandNode : bands) {
464458 auto groupMap = TensorReferenceGroup::accessedBySubtree (bandNode, scop);
465459 auto partialSched = partialSchedule (root, bandNode);
460+ // Pure affine schedule without (mapping) filters.
461+ auto partialSchedMupa = partialScheduleMupa (root, bandNode);
466462 auto activePoints = activeDomainPoints (root, bandNode);
467463
468464 // Prepare groups for sorting, to have specified order necessary for
@@ -522,7 +518,7 @@ void promoteToSharedGreedy(
522518 }
523519 // Do not promote if the group features no reuse and is accessed in a
524520 // coalesced way.
525- if (!hasReuse (*group, fullSched, depth ) &&
521+ if (!hasReuseWithin (*group, partialSchedMupa ) &&
526522 isCoalesced (
527523 threadIdxXScheduleDepthState,
528524 *group,
@@ -606,6 +602,8 @@ void promoteToRegistersBelowThreads(
606602 // per-thread-group access relations.
607603 auto points = activeDomainPoints (root, band);
608604 auto partialSched = partialSchedule (root, band);
605+ // Pure affine schedule without (mapping) filters.
606+ auto partialSchedMupa = partialScheduleMupa (root, band);
609607
610608 size_t nMappedThreads = 0 ;
611609 for (int j = 0 ; j < points.dim (isl::dim_type::param); ++j) {
@@ -643,7 +641,7 @@ void promoteToRegistersBelowThreads(
643641 points)) {
644642 continue ;
645643 }
646- if (!hasReuse (*group, fullSched, depth )) {
644+ if (!hasReuseWithin (*group, partialSchedMupa )) {
647645 continue ;
648646 }
649647 // TODO: if something is already in shared, but reuse it within one
0 commit comments