[RFC] use templated isl types

Sven Verdoolaege · Sven Verdoolaege · commit 2a546c8ffc0e · 2018-06-21T12:47:35.000+02:00
Templated isl types require the user to specify the domain and range
universes of isl objects, allowing the compiler to check whether
it makes sense to combine pairs of objects.
This RFC only converts isPromotableToRegistersBelow and some related
functions to illustrate the effect.
The isPromotableToRegistersBelow was already applying operations
correctly, so the code itself did not require any changes.
However, one variable was reused to store different types
of intermediate result and this one had to be split up into
several variables because they now have different types.
diff --git a/tc/core/polyhedral/cuda/mapped_scop.cc b/tc/core/polyhedral/cuda/mapped_scop.cc
@@ -502,24 +502,26 @@ isl::multi_aff constructThreadToWarp(
 }
 } // namespace
 
-isl::multi_union_pw_aff MappedScop::threadMappingSchedule(
+isl::MultiUnionPwAff<Domain, Thread> MappedScop::threadMappingSchedule(
     const detail::ScheduleTree* tree) const {
   std::vector<mapping::MappingId> ids;
   for (size_t i = 0; i < numThreads.view.size(); ++i) {
     ids.emplace_back(mapping::ThreadId::makeId(i));
   }
   auto tupleId = isl::id(tree->ctx_, kBlock);
-  return extractDomainToIds(scop_->scheduleRoot(), tree, ids, tupleId);
+  auto schedule = extractDomainToIds(scop_->scheduleRoot(), tree, ids, tupleId);
+  return isl::MultiUnionPwAff<Domain, Thread>(schedule);
 }
 
-isl::multi_union_pw_aff MappedScop::blockMappingSchedule(
+isl::MultiUnionPwAff<Domain, Block> MappedScop::blockMappingSchedule(
     const detail::ScheduleTree* tree) const {
   std::vector<mapping::MappingId> ids;
   for (size_t i = 0; i < numBlocks.view.size(); ++i) {
     ids.emplace_back(mapping::BlockId::makeId(i));
   }
   auto tupleId = isl::id(tree->ctx_, kGrid);
-  return extractDomainToIds(scop_->scheduleRoot(), tree, ids, tupleId);
+  auto schedule = extractDomainToIds(scop_->scheduleRoot(), tree, ids, tupleId);
+  return isl::MultiUnionPwAff<Domain, Block>(schedule);
 }
 
 Scop::SyncLevel MappedScop::findBestSync(
diff --git a/tc/core/polyhedral/cuda/mapped_scop.h b/tc/core/polyhedral/cuda/mapped_scop.h
@@ -24,6 +24,7 @@
 #include "tc/core/cuda/cuda_mapping_options.h"
 #include "tc/core/polyhedral/cuda/mapping_types.h"
 #include "tc/core/polyhedral/cuda/memory_promotion_heuristic.h"
+#include "tc/core/polyhedral/domain_types.h"
 #include "tc/core/polyhedral/scop.h"
 #include "tc/core/tensor.h"
 #include "tc/external/isl.h"
@@ -197,14 +198,14 @@ class MappedScop {
   // to the thread identifiers, where all branches in "tree"
   // are assumed to have been mapped to thread identifiers.
   // The result lives in a space of the form block[x, ...].
-  isl::multi_union_pw_aff threadMappingSchedule(
+  isl::MultiUnionPwAff<Domain, Thread> threadMappingSchedule(
       const detail::ScheduleTree* tree) const;
 
   // Extract a mapping from the domain elements active at "tree"
   // to the block identifiers, where all branches in "tree"
   // are assumed to have been mapped to block identifiers.
   // The result lives in a space of the form grid[x, ...].
-  isl::multi_union_pw_aff blockMappingSchedule(
+  isl::MultiUnionPwAff<Domain, Block> blockMappingSchedule(
       const detail::ScheduleTree* tree) const;
 
  private:
diff --git a/tc/core/polyhedral/cuda/memory_promotion_heuristic.cc b/tc/core/polyhedral/cuda/memory_promotion_heuristic.cc
@@ -131,6 +131,8 @@ std::vector<T> collectBranchMarkers(T root, T node) {
   return findThreadSpecificMarkers(node);
 }
 
+struct FullSchedule;
+
 /*
  * Transform schedule bands into a union_map.
  * Takes all partial schedules at leaves as MUPAs (without accounting for
@@ -139,7 +141,8 @@ std::vector<T> collectBranchMarkers(T root, T node) {
  * current leaves and transforms them into union maps.
  * Mapping filters are ignored.
  */
-isl::union_map fullSchedule(const detail::ScheduleTree* root) {
+isl::UnionMap<Domain, FullSchedule> fullSchedule(
+    const detail::ScheduleTree* root) {
   using namespace tc::polyhedral::detail;
 
   if (!root->elemAs<ScheduleTreeElemDomain>()) {
@@ -182,7 +185,7 @@ isl::union_map fullSchedule(const detail::ScheduleTree* root) {
       throw promotion::PromotionLogicError(ss.str());
     }
   }
-  return schedule;
+  return isl::UnionMap<Domain, FullSchedule>(schedule);
 }
 
 /*
@@ -263,7 +266,7 @@ bool promotionImprovesCoalescing(
     const detail::ScheduleTree* root,
     const detail::ScheduleTree* node,
     const TensorReferenceGroup& group,
-    isl::union_map schedule) {
+    isl::UnionMap<Domain, FullSchedule> schedule) {
   auto originalAccesses = group.originalAccesses();
 
   auto markers = collectBranchMarkers(root, node);
@@ -313,6 +316,8 @@ isl::union_set collectMappingsTo(const Scop& scop) {
   return mapping;
 }
 
+struct Unrolled;
+
 /*
  * Check that only unrolled loops may appear in access subscripts.
  * Because the scoping point can be above a branching tree, descend into each
@@ -343,11 +348,12 @@ isl::union_set collectMappingsTo(const Scop& scop) {
  * different references may have different values, but all of them remain
  * independent of non-unrolled loop iterators.
  */
+template <typename Outer>
 bool accessSubscriptsAreUnrolledLoops(
     const TensorReferenceGroup& group,
     const detail::ScheduleTree* root,
     const detail::ScheduleTree* scope,
-    isl::multi_union_pw_aff outerSchedule) {
+    isl::MultiUnionPwAff<Domain, Outer> outerSchedule) {
   using namespace detail;
 
   auto nodes = ScheduleTree::collect(scope);
@@ -365,7 +371,7 @@ bool accessSubscriptsAreUnrolledLoops(
     auto subdomain = activeDomainPointsBelow(root, leaf);
 
     auto unrolledDims = isl::union_pw_aff_list(leaf->ctx_, 1);
-    for (auto node : ancestors) {
+    for (const detail::ScheduleTree* node : ancestors) {
       auto band = node->elemAs<detail::ScheduleTreeElemBand>();
       if (!band) {
         continue;
@@ -383,7 +389,8 @@ bool accessSubscriptsAreUnrolledLoops(
 
     auto space = isl::space(leaf->ctx_, 0, unrolledDims.n())
                      .align_params(subdomain.get_space());
-    auto unrolledDimsMupa = isl::multi_union_pw_aff(space, unrolledDims);
+    auto unrolledDimsMupa =
+        isl::MultiUnionPwAff<Domain, Unrolled>(space, unrolledDims);
 
     // It is possible that no loops are unrolled, in which case
     // unrolledDimsMupa is zero-dimensional and needs an explicit domain
@@ -392,10 +399,11 @@ bool accessSubscriptsAreUnrolledLoops(
         unrolledDimsMupa.intersect_domain(group.originalAccesses().domain());
 
     auto accesses = group.originalAccesses();
-    auto schedule = outerSchedule.flat_range_product(unrolledDimsMupa);
-    accesses = accesses.apply_domain(isl::union_map::from(schedule));
+    auto schedule = outerSchedule.range_product(unrolledDimsMupa);
+    auto scheduleMap = schedule.asUnionMap();
+    auto scheduledAccesses = accesses.apply_domain(scheduleMap);
 
-    if (!accesses.is_single_valued()) {
+    if (!scheduledAccesses.is_single_valued()) {
       return false;
     }
   }
@@ -415,23 +423,25 @@ bool accessSubscriptsAreUnrolledLoops(
  * thread associated to a given pair of tensor element and outer schedule
  * iteration.
  */
+template <typename Outer>
 bool isPromotableToRegistersBelow(
     const TensorReferenceGroup& group,
     const detail::ScheduleTree* root,
     const detail::ScheduleTree* scope,
-    isl::multi_union_pw_aff outer,
-    isl::multi_union_pw_aff thread) {
+    isl::MultiUnionPwAff<Domain, Outer> outer,
+    isl::MultiUnionPwAff<Domain, Thread> thread) {
   if (!accessSubscriptsAreUnrolledLoops(
-          group, root, scope, outer.flat_range_product(thread))) {
+          group, root, scope, outer.range_product(thread))) {
     return false;
   }
 
   auto originalAccesses = group.originalAccesses();
-  auto map = isl::union_map::from(outer);
-  map = map.range_product(originalAccesses);
-  map = map.apply_domain(isl::union_map::from(thread));
+  auto outerMap = isl::UnionMap<Domain, Outer>::from(outer);
+  auto pair = outerMap.range_product(originalAccesses);
+  auto threadMap = isl::UnionMap<Domain, Thread>::from(thread);
+  auto threadToPair = pair.apply_domain(threadMap);
 
-  return map.is_injective();
+  return threadToPair.is_injective();
 }
 
 /*
@@ -654,15 +664,15 @@ void promoteToRegistersBelow(MappedScop& mscop, detail::ScheduleTree* scope) {
   auto blockSchedule = mscop.blockMappingSchedule(mscop.schedule());
 
   // Pure affine schedule without (mapping) filters.
-  auto partialSchedMupa = partialScheduleMupa(root, scope);
+  auto partialSchedMupa = partialScheduleMupa<Scope>(root, scope);
   // Schedule with block mapping filter.
   auto partialSched =
       isl::union_map::from(partialSchedMupa).intersect_domain(blockMapping);
   // The following promotion validity and profitability checks need to be
   // performed with respect to the block mapping, so append the block schedule.
   // If the partial schedule contains it already, it will just end up with
   // identical dimensions without affecting the result of the checks.
-  partialSchedMupa = partialSchedMupa.flat_range_product(blockSchedule);
+  auto partialSchedBlockMupa = partialSchedMupa.range_product(blockSchedule);
 
   for (auto& tensorGroups : groupMap) {
     auto tensorId = tensorGroups.first;
@@ -676,11 +686,11 @@ void promoteToRegistersBelow(MappedScop& mscop, detail::ScheduleTree* scope) {
         continue;
       }
       if (!isPromotableToRegistersBelow(
-              *group, root, scope, partialSchedMupa, threadSchedule)) {
+              *group, root, scope, partialSchedBlockMupa, threadSchedule)) {
         continue;
       }
       // Check reuse within threads.
-      auto schedule = partialSchedMupa.flat_range_product(threadSchedule);
+      auto schedule = partialSchedBlockMupa.flat_range_product(threadSchedule);
       if (!hasReuseWithin(*group, schedule)) {
         continue;
       }
diff --git a/tc/core/polyhedral/domain_types.h b/tc/core/polyhedral/domain_types.h
@@ -0,0 +1,10 @@
+namespace tc {
+namespace polyhedral {
+
+struct Domain;
+struct Scope;
+struct Tensor;
+struct Thread;
+
+} // namespace polyhedral
+} // namespace tc
diff --git a/tc/core/polyhedral/memory_promotion.h b/tc/core/polyhedral/memory_promotion.h
@@ -17,6 +17,7 @@
 
 #include <iostream>
 
+#include "tc/core/polyhedral/domain_types.h"
 #include "tc/core/polyhedral/schedule_tree.h"
 #include "tc/core/polyhedral/scop.h"
 #include "tc/external/isl.h"
@@ -137,8 +138,9 @@ class TensorReferenceGroup {
   // range spaces.
   isl::union_map originalWrites() const;
   isl::union_map originalReads() const;
-  isl::union_map originalAccesses() const {
-    return originalWrites().unite(originalReads());
+  isl::UnionMap<Domain, Tensor> originalAccesses() const {
+    auto accesses = originalWrites().unite(originalReads());
+    return isl::UnionMap<Domain, Tensor>(accesses);
   }
 
   // Rectangular overapproximation of the set of tensor elements accessed below
diff --git a/tc/core/polyhedral/schedule_transforms.cc b/tc/core/polyhedral/schedule_transforms.cc
@@ -98,11 +98,13 @@ isl::union_map prefixSchedule(
   return partialScheduleImpl(root, node, false);
 }
 
+namespace detail {
 isl::union_map partialSchedule(
     const ScheduleTree* root,
     const ScheduleTree* node) {
   return partialScheduleImpl(root, node, true);
 }
+} // namespace detail
 
 namespace {
 /*
@@ -131,7 +133,7 @@ isl::union_set applyMapping(isl::union_set domain, const ScheduleTree* node) {
 // Domain elements are introduced by the root domain node.  Some nodes
 // refine this set of elements based on "filter".  Extension nodes
 // are considered to introduce additional domain points.
-isl::union_set collectDomain(
+isl::UnionSet<Domain> collectDomain(
     const ScheduleTree* root,
     const vector<const ScheduleTree*>& nodes,
     isl::union_set (*filter)(isl::union_set domain, const ScheduleTree* node)) {
@@ -150,12 +152,12 @@ isl::union_set collectDomain(
       domain = domain.unite(parentSchedule.range().apply(extension));
     }
   }
-  return domain;
+  return isl::UnionSet<Domain>(domain);
 }
 
 // Get the set of domain elements that are active below
 // the given branch of nodes.
-isl::union_set activeDomainPointsHelper(
+isl::UnionSet<Domain> activeDomainPointsHelper(
     const ScheduleTree* root,
     const vector<const ScheduleTree*>& nodes) {
   return collectDomain(root, nodes, &applyFilter);
@@ -169,7 +171,7 @@ isl::union_set prefixMappingFilter(
   return collectDomain(root, node->ancestors(root), &applyMapping);
 }
 
-isl::union_set activeDomainPoints(
+isl::UnionSet<Domain> activeDomainPoints(
     const ScheduleTree* root,
     const ScheduleTree* node) {
   return activeDomainPointsHelper(root, node->ancestors(root));
@@ -503,13 +505,15 @@ isl::multi_union_pw_aff prefixScheduleMupa(
   return infixScheduleMupa(root, root, tree);
 }
 
+namespace detail {
 isl::multi_union_pw_aff partialScheduleMupa(
     const detail::ScheduleTree* root,
     const detail::ScheduleTree* tree) {
   auto prefix = prefixScheduleMupa(root, tree);
   auto band = tree->elemAs<ScheduleTreeElemBand>();
   return band ? prefix.flat_range_product(band->mupa_) : prefix;
 }
+} // namespace detail
 
 void updateTopLevelContext(detail::ScheduleTree* root, isl::set context) {
   if (!matchOne(tc::polyhedral::domain(tc::polyhedral::context(any())), root)) {
diff --git a/tc/core/polyhedral/schedule_transforms.h b/tc/core/polyhedral/schedule_transforms.h
@@ -21,6 +21,7 @@
 #include <unordered_set>
 #include <vector>
 
+#include "tc/core/polyhedral/domain_types.h"
 #include "tc/core/polyhedral/functional.h"
 #include "tc/core/polyhedral/mapping_types.h"
 #include "tc/core/polyhedral/options.h"
@@ -272,9 +273,18 @@ isl::union_map extendSchedule(
 
 // Get the partial schedule defined by ancestors of the given node and the node
 // itself.
+namespace detail {
 isl::union_map partialSchedule(
     const detail::ScheduleTree* root,
     const detail::ScheduleTree* node);
+}
+template <typename Schedule>
+isl::UnionMap<Domain, Schedule> partialSchedule(
+    const detail::ScheduleTree* root,
+    const detail::ScheduleTree* tree) {
+  auto partial = detail::partialSchedule(root, tree);
+  return isl::UnionMap<Domain, Schedule>(partial);
+}
 
 // Return the schedule defined by the ancestors of the given node.
 isl::union_map prefixSchedule(
@@ -306,15 +316,24 @@ isl::multi_union_pw_aff prefixScheduleMupa(
 // including that of the node itself.
 // Note that this function does not take into account
 // any intermediate filter nodes.
+namespace detail {
 isl::multi_union_pw_aff partialScheduleMupa(
     const detail::ScheduleTree* root,
     const detail::ScheduleTree* tree);
+}
+template <typename Schedule>
+isl::MultiUnionPwAff<Domain, Schedule> partialScheduleMupa(
+    const detail::ScheduleTree* root,
+    const detail::ScheduleTree* tree) {
+  auto partial = detail::partialScheduleMupa(root, tree);
+  return isl::MultiUnionPwAff<Domain, Schedule>(partial);
+}
 
 // Get the set of domain points active at the given node.  A domain
 // point is active if it was not filtered away on the path from the
 // root to the node.  The root must be a domain element, otherwise no
 // elements would be considered active.
-isl::union_set activeDomainPoints(
+isl::UnionSet<Domain> activeDomainPoints(
     const detail::ScheduleTree* root,
     const detail::ScheduleTree* node);
 
diff --git a/tc/external/detail/islpp.h b/tc/external/detail/islpp.h
diff --git a/test/test_cuda_mapper_memory_promotion.cc b/test/test_cuda_mapper_memory_promotion.cc

Original file line number	Diff line number	Diff line change
`@@ -502,24 +502,26 @@ isl::multi_aff constructThreadToWarp(`
`502`	`502`	`}`
`503`	`503`	`} // namespace`
`504`	`504`
`505`		`-isl::multi_union_pw_aff MappedScop::threadMappingSchedule(`
	`505`	`+isl::MultiUnionPwAff<Domain, Thread> MappedScop::threadMappingSchedule(`
`506`	`506`	`const detail::ScheduleTree* tree) const {`
`507`	`507`	`std::vector<mapping::MappingId> ids;`
`508`	`508`	`for (size_t i = 0; i < numThreads.view.size(); ++i) {`
`509`	`509`	`ids.emplace_back(mapping::ThreadId::makeId(i));`
`510`	`510`	`}`
`511`	`511`	`auto tupleId = isl::id(tree->ctx_, kBlock);`
`512`		`- return extractDomainToIds(scop_->scheduleRoot(), tree, ids, tupleId);`
	`512`	`+ auto schedule = extractDomainToIds(scop_->scheduleRoot(), tree, ids, tupleId);`
	`513`	`+ return isl::MultiUnionPwAff<Domain, Thread>(schedule);`
`513`	`514`	`}`
`514`	`515`
`515`		`-isl::multi_union_pw_aff MappedScop::blockMappingSchedule(`
	`516`	`+isl::MultiUnionPwAff<Domain, Block> MappedScop::blockMappingSchedule(`
`516`	`517`	`const detail::ScheduleTree* tree) const {`
`517`	`518`	`std::vector<mapping::MappingId> ids;`
`518`	`519`	`for (size_t i = 0; i < numBlocks.view.size(); ++i) {`
`519`	`520`	`ids.emplace_back(mapping::BlockId::makeId(i));`
`520`	`521`	`}`
`521`	`522`	`auto tupleId = isl::id(tree->ctx_, kGrid);`
`522`		`- return extractDomainToIds(scop_->scheduleRoot(), tree, ids, tupleId);`
	`523`	`+ auto schedule = extractDomainToIds(scop_->scheduleRoot(), tree, ids, tupleId);`
	`524`	`+ return isl::MultiUnionPwAff<Domain, Block>(schedule);`
`523`	`525`	`}`
`524`	`526`
`525`	`527`	`Scop::SyncLevel MappedScop::findBestSync(`