@@ -146,7 +146,7 @@ std::vector<size_t> inputDivisorsAndPowers2(
146146}
147147
148148size_t largestDim (const std::vector<const DLTensor*>& inputs) {
149- CHECK_GE (inputs.size (), 0 );
149+ CHECK_GE (inputs.size (), 0u );
150150 auto maxElement = std::max_element (
151151 inputs.begin (), inputs.end (), [](const DLTensor* a, const DLTensor* b) {
152152 return a->ndim < b->ndim ;
@@ -157,7 +157,7 @@ size_t largestDim(const std::vector<const DLTensor*>& inputs) {
157157} // namespace
158158
159159void GeneticTunerHarness::setupTuningParameters () {
160- CHECK_GT (kInputs_ .size (), 0 );
160+ CHECK_GT (kInputs_ .size (), 0u );
161161 auto range = inputDivisorsAndPowers2 (kInputs_ .begin ()->second );
162162 auto rangeUpTo64 = filterHigherThan (range, 64 );
163163
@@ -208,6 +208,9 @@ std::vector<size_t> parseGpus() {
208208 LOG (GSTREAM) << line; \
209209 }
210210
211+ constexpr size_t GeneticTunerHarness::kEarlyPruneFactor ;
212+ constexpr size_t GeneticTunerHarness::kCatastrophicPerfFactor ;
213+
211214// This function is ran on a single pre-determined GPU, in a single thread
212215// It takes the input/output DLTensor objects that reside on that GPU
213216//
@@ -222,7 +225,7 @@ bool GeneticTunerHarness::warmupOrPrune(
222225 const std::vector<DLTensor*>& outputs,
223226 const std::vector<const DLTensor*>& inputs,
224227 size_t handle,
225- size_t bestTimeSoFar) {
228+ Duration bestTimeSoFar) {
226229 // Pruning based on number of threads: if you don't hit at least k warps
227230 // (default k = 8; 256 total threads, controlled by
228231 // FLAGS_tuner_min_launch_total_threads) then it's likely the kernel is not
@@ -276,10 +279,8 @@ bool GeneticTunerHarness::warmupOrPrune(
276279 }
277280
278281 // 1.b.
279- constexpr size_t kCatastrophicPerfFactor = 100 ;
280- if (bestTimeSoFar < std::numeric_limits<size_t >::max () and
281- prof >= std::chrono::microseconds (
282- (kCatastrophicPerfFactor * bestTimeSoFar))) {
282+ if (bestTimeSoFar < Duration::max () and
283+ prof >= kCatastrophicPerfFactor * bestTimeSoFar) {
283284 return true ;
284285 }
285286
@@ -291,8 +292,8 @@ bool GeneticTunerHarness::warmupOrPrune(
291292 // 2. After reasonable warmup, look at the performance and prune with
292293 // kEarlyPruneFactor
293294 prof = engine.run (handle, inputs, outputs, true );
294- if (bestTimeSoFar < std::numeric_limits< size_t > ::max () and
295- prof >= std::chrono::microseconds (( kEarlyPruneFactor * bestTimeSoFar)) ) {
295+ if (bestTimeSoFar < Duration ::max () and
296+ prof >= kEarlyPruneFactor * bestTimeSoFar) {
296297 return true ;
297298 }
298299
@@ -346,9 +347,9 @@ void GeneticTunerHarness::doGpuWork(
346347 ExecutorType& engine,
347348 Printer& printer) {
348349 WithDevice wd (gpu);
349- CHECK_EQ (1 , kInputs_ .count (gpu));
350+ CHECK_EQ (1u , kInputs_ .count (gpu));
350351 auto & inputs = kInputs_ .at (gpu);
351- CHECK_EQ (1 , outputs_.count (gpu));
352+ CHECK_EQ (1u , outputs_.count (gpu));
352353 auto & outputs = outputs_.at (gpu);
353354
354355 while (true ) {
@@ -394,7 +395,7 @@ void GeneticTunerHarness::doGpuWork(
394395
395396 std::vector<Duration> runtimes;
396397 try {
397- size_t bestTimeSoFar;
398+ Duration bestTimeSoFar;
398399 {
399400 std::lock_guard<std::mutex> lock (bestTimeMtx_);
400401 bestTimeSoFar = bestTime_;
@@ -451,8 +452,8 @@ void GeneticTunerHarness::doGpuWork(
451452 // Save best time under lock
452453 {
453454 std::lock_guard<std::mutex> lock (bestTimeMtx_);
454- if (prof_us < bestTime_) {
455- bestTime_ = prof_us ;
455+ if (prof < bestTime_) {
456+ bestTime_ = prof ;
456457 bestCudaMappingOptions_ = options;
457458 }
458459 }
@@ -484,7 +485,7 @@ void GeneticTunerHarness::runOneGeneration(size_t generation) {
484485 currentCompilationJob_.store (0 );
485486 numEvaluations_.store (0 );
486487 readyToEvaluate_.resize (0 );
487- for (int i = 0 ; i < kMaxPopulationSize ; ++i) {
488+ for (size_t i = 0 ; i < kMaxPopulationSize ; ++i) {
488489 readyToEvaluate_.emplace_back ();
489490 readyToEvaluate_[i].store (false );
490491 }
@@ -509,7 +510,7 @@ void GeneticTunerHarness::runOneGeneration(size_t generation) {
509510 cpuCompilationThread.join ();
510511 }
511512 });
512- for (int i = 0 ; i < FLAGS_tuner_threads; ++i) {
513+ for (size_t i = 0 ; i < FLAGS_tuner_threads; ++i) {
513514 cpuCompilationThreads.emplace_back (
514515 [this , &engine]() { this ->doCompile (engine); });
515516 }
0 commit comments