@@ -46,7 +46,7 @@ using namespace llvm;
4646using namespace bolt ;
4747
4848namespace opts {
49- extern cl::OptionCategory BoltCategory;
49+
5050extern cl::OptionCategory BoltOptCategory;
5151extern cl::opt<bool > Verbosity;
5252
@@ -92,17 +92,6 @@ int32_t ITLBPageSize;
9292// while smaller values result in better i-cache performance
9393int32_t ITLBEntries;
9494
95- const char * cacheKindString (bool UseGainCache, bool UseShortCallCache) {
96- if (UseGainCache && UseShortCallCache)
97- return " gain + short call cache" ;
98- else if (UseGainCache)
99- return " gain cache" ;
100- else if (UseShortCallCache)
101- return " short call cache" ;
102- else
103- return " no cache" ;
104- }
105-
10695// This class maintains adjacency information for all Clusters being
10796// processed. It is used to invalidate cache entries when merging
10897// Clusters and for visiting all neighbors of any given Cluster.
@@ -215,17 +204,16 @@ class PrecomputedResults {
215204 Valid[Index] = true ;
216205 }
217206
218- void invalidate (const AdjacencyMatrix &Adjacent, const Cluster *C) {
219- invalidate (C);
220- Adjacent.forallAdjacent (C, [&](const Cluster *A) { invalidate (A); });
221- }
222- private:
223207 void invalidate (const Cluster *C) {
224208 Valid.reset (C->id () * Size, (C->id () + 1 ) * Size);
209+ for (size_t Id = 0 ; Id < Size; Id++) {
210+ Valid.reset (Id * Size + C->id ());
211+ }
225212 }
226213
214+ private:
227215 size_t index (const Cluster *First, const Cluster *Second) const {
228- return ( First->id () * Size) + Second->id ();
216+ return First->id () * Size + Second->id ();
229217 }
230218
231219 size_t Size;
@@ -347,12 +335,6 @@ class HFSortPlus {
347335 * the same cache page
348336 */
349337 double shortCalls (const Cluster *Cluster) const {
350- if (UseShortCallCache) {
351- auto Itr = ShortCallCache.find (Cluster);
352- if (Itr != ShortCallCache.end ())
353- return Itr->second ;
354- }
355-
356338 double Calls = 0 ;
357339 for (auto TargetId : Cluster->targets ()) {
358340 for (auto Succ : Cg.successors (TargetId)) {
@@ -367,10 +349,6 @@ class HFSortPlus {
367349 }
368350 }
369351
370- if (UseShortCallCache) {
371- ShortCallCache[Cluster] = Calls;
372- }
373-
374352 return Calls;
375353 }
376354
@@ -380,11 +358,6 @@ class HFSortPlus {
380358 */
381359 double shortCalls (const Cluster *ClusterPred,
382360 const Cluster *ClusterSucc) const {
383- if (UseShortCallCache &&
384- ShortCallPairCache.contains (ClusterPred, ClusterSucc)) {
385- return ShortCallPairCache.get (ClusterPred, ClusterSucc);
386- }
387-
388361 double Calls = 0 ;
389362 for (auto TargetId : ClusterPred->targets ()) {
390363 for (auto Succ : Cg.successors (TargetId)) {
@@ -413,10 +386,6 @@ class HFSortPlus {
413386 }
414387 }
415388
416- if (UseShortCallCache) {
417- ShortCallPairCache.set (ClusterPred, ClusterSucc, Calls);
418- }
419-
420389 return Calls;
421390 }
422391
@@ -434,8 +403,8 @@ class HFSortPlus {
434403 */
435404 double mergeGain (const Cluster *ClusterPred,
436405 const Cluster *ClusterSucc) const {
437- if (UseGainCache && Cache .contains (ClusterPred, ClusterSucc)) {
438- return Cache .get (ClusterPred, ClusterSucc);
406+ if (UseGainCache && GainCache .contains (ClusterPred, ClusterSucc)) {
407+ return GainCache .get (ClusterPred, ClusterSucc);
439408 }
440409
441410 // cache misses on the first cluster
@@ -460,7 +429,7 @@ class HFSortPlus {
460429 Gain /= std::min (ClusterPred->size (), ClusterSucc->size ());
461430
462431 if (UseGainCache) {
463- Cache .set (ClusterPred, ClusterSucc, Gain);
432+ GainCache .set (ClusterPred, ClusterSucc, Gain);
464433 }
465434
466435 return Gain;
@@ -513,7 +482,7 @@ class HFSortPlus {
513482 const double ProbOut =
514483 CallsFromPred > 0 ? CallsPredSucc / CallsFromPred : 0 ;
515484 assert (0.0 <= ProbOut && ProbOut <= 1.0 && " incorrect probability" );
516-
485+
517486 // probability that the second cluster is called from the first one
518487 const double ProbIn =
519488 CallsToSucc > 0 ? CallsPredSucc / CallsToSucc : 0 ;
@@ -601,13 +570,12 @@ class HFSortPlus {
601570 */
602571 std::vector<Cluster> run () {
603572 DEBUG (dbgs () << " Starting hfsort+ w/"
604- << cacheKindString (UseGainCache, UseShortCallCache )
573+ << (UseGainCache ? " gain cache " : " no cache " )
605574 << " for " << Clusters.size () << " clusters "
606575 << " with ITLBPageSize = " << ITLBPageSize << " , "
607576 << " ITLBEntries = " << ITLBEntries << " , "
608577 << " and MergeProbability = " << opts::MergeProbability << " \n " );
609578
610-
611579 // Pass 1
612580 runPassOne ();
613581
@@ -628,19 +596,15 @@ class HFSortPlus {
628596 return Result;
629597 }
630598
631- HFSortPlus (const CallGraph &Cg,
632- bool UseGainCache,
633- bool UseShortCallCache)
599+ HFSortPlus (const CallGraph &Cg, bool UseGainCache)
634600 : Cg(Cg),
635601 FuncCluster (Cg.numNodes(), nullptr),
636602 Addr(Cg.numNodes(), InvalidAddr),
637603 TotalSamples(0.0 ),
638604 Clusters(initializeClusters()),
639605 Adjacent(Cg, Clusters, FuncCluster),
640606 UseGainCache(UseGainCache),
641- UseShortCallCache(UseShortCallCache),
642- Cache(Clusters.size()),
643- ShortCallPairCache(Clusters.size()) {
607+ GainCache(Clusters.size()) {
644608 }
645609private:
646610
@@ -696,31 +660,16 @@ class HFSortPlus {
696660 CurAddr = ((CurAddr + Align - 1 ) / Align) * Align;
697661 }
698662
699- // Update caches
700- invalidateCaches (Into);
663+ // Invalidate all cache entries associated with cluster Into
664+ if (UseGainCache) {
665+ GainCache.invalidate (Into);
666+ }
701667
702668 // Remove cluster From from the list of active clusters
703669 auto Iter = std::remove (Clusters.begin (), Clusters.end (), From);
704670 Clusters.erase (Iter, Clusters.end ());
705671 }
706672
707- /*
708- * Invalidate all cache entries associated with cluster C and its neighbors.
709- */
710- void invalidateCaches (const Cluster *C) {
711- if (UseShortCallCache) {
712- maybeErase (ShortCallCache, C);
713- Adjacent.forallAdjacent (C,
714- [this ](const Cluster *A) {
715- maybeErase (ShortCallCache, A);
716- });
717- ShortCallPairCache.invalidate (Adjacent, C);
718- }
719- if (UseGainCache) {
720- Cache.invalidate (Adjacent, C);
721- }
722- }
723-
724673 // The call graph
725674 const CallGraph &Cg;
726675
@@ -746,32 +695,21 @@ class HFSortPlus {
746695 // Use cache for mergeGain results
747696 bool UseGainCache;
748697
749- // Use caches for shortCalls results
750- bool UseShortCallCache;
751-
752698 // A cache that keeps precomputed values of mergeGain for pairs of clusters;
753699 // when a pair of clusters (x,y) gets merged, we need to invalidate the pairs
754700 // containing both x and y and all clusters adjacent to x and y (and recompute
755701 // them on the next iteration).
756- mutable PrecomputedResults Cache;
757-
758- // Cache for shortCalls for a single cluster.
759- mutable std::unordered_map<const Cluster *, double > ShortCallCache;
760-
761- // Cache for shortCalls for a pair of Clusters
762- mutable PrecomputedResults ShortCallPairCache;
702+ mutable PrecomputedResults GainCache;
763703};
764704
765705}
766706
767- std::vector<Cluster> hfsortPlus (CallGraph &Cg,
768- bool UseGainCache,
769- bool UseShortCallCache) {
707+ std::vector<Cluster> hfsortPlus (CallGraph &Cg, bool UseGainCache) {
770708 // It is required that the sum of incoming arc weights is not greater
771709 // than the number of samples for every function.
772710 // Ensuring the call graph obeys the property before running the algorithm.
773711 Cg.adjustArcWeights ();
774- return HFSortPlus (Cg, UseGainCache, UseShortCallCache ).run ();
712+ return HFSortPlus (Cg, UseGainCache).run ();
775713}
776714
777715}}
0 commit comments