@@ -248,52 +248,68 @@ void TuningHarness<Backend>::runOneIteration(
248248 CHECK (executors_.empty ());
249249 CHECK (configurations_.empty ());
250250 auto & candidates = searchStrategy.candidatesOfStep (step);
251- // Initialize for this round
252- currentCompilationJob_.store (0 );
253- numEvaluations_.store (0 );
254- Printer printer (
255- iteration,
256- step,
257- candidates.size (),
258- currentCompilationJob_,
259- numEvaluations_);
260- auto logIterations = FLAGS_tuner_gen_log_generations;
261- ScopeGuard sgPrinter ([logIterations, &printer]() {
262- printer.stop ();
263- if (logIterations) {
264- printer.printAll ();
265- }
251+ auto firstNew = std::partition (
252+ candidates.begin (),
253+ candidates.end (),
254+ [](const std::unique_ptr<CandidateConfiguration>& c) {
255+ return c->runtime != Duration::zero ();
256+ });
257+ GeneticSearch::Population newCandidates (
258+ std::distance (firstNew, candidates.end ()));
259+ std::move (firstNew, candidates.end (), newCandidates.begin ());
260+ ScopeGuard candidatesSG ([&]() {
261+ std::move (newCandidates.begin (), newCandidates.end (), firstNew);
266262 });
267263
268- // Just spawn and join new threads for each iteration
269- std::vector<std::thread> cpuCompilationThreads;
270- cpuCompilationThreads.reserve (FLAGS_tuner_threads);
271- ScopeGuard sgCompilationThreads ([&cpuCompilationThreads]() {
272- for (auto & cpuCompilationThread : cpuCompilationThreads) {
273- cpuCompilationThread.join ();
274- }
275- });
276- for (size_t i = 0 ; i < FLAGS_tuner_threads; ++i) {
277- cpuCompilationThreads.emplace_back (
278- [this , &candidates]() { this ->doCompile (candidates); });
279- }
264+ if (not newCandidates.empty ()) {
265+ auto populationSize = newCandidates.size ();
266+ // Initialize for this round
267+ currentCompilationJob_.store (0 );
268+ numEvaluations_.store (0 );
269+ Printer printer (
270+ iteration,
271+ step,
272+ populationSize,
273+ currentCompilationJob_,
274+ numEvaluations_);
275+ auto logIterations = FLAGS_tuner_gen_log_generations;
276+ ScopeGuard sgPrinter ([logIterations, &printer]() {
277+ printer.stop ();
278+ if (logIterations) {
279+ printer.printAll ();
280+ }
281+ });
280282
281- // Just spawn and join new threads for each device
282- std::vector<std::thread> workerThreads;
283- workerThreads.reserve (devices.size ());
284- LOG_IF (INFO, tc::FLAGS_debug_tuner)
285- << " Start evaluation: " << devices.size () << " " << executors_.size ()
286- << " " << configurations_.size ();
287- ScopeGuard sgDeviceWorkerThreads ([&workerThreads]() {
288- for (auto & workerThread : workerThreads) {
289- workerThread.join ();
283+ // Just spawn and join new threads for each iteration
284+ std::vector<std::thread> cpuCompilationThreads;
285+ cpuCompilationThreads.reserve (FLAGS_tuner_threads);
286+ ScopeGuard sgCompilationThreads ([&cpuCompilationThreads]() {
287+ for (auto & cpuCompilationThread : cpuCompilationThreads) {
288+ cpuCompilationThread.join ();
289+ }
290+ });
291+ for (size_t i = 0 ; i < FLAGS_tuner_threads; ++i) {
292+ cpuCompilationThreads.emplace_back (
293+ [this , &newCandidates]() { this ->doCompile (newCandidates); });
290294 }
291- });
292- auto populationSize = candidates.size ();
293- for (auto device : devices) {
294- workerThreads.emplace_back ([this , device, populationSize, &printer]() {
295- this ->doEvaluate (device, populationSize, printer);
295+
296+ // Just spawn and join new threads for each device
297+ std::vector<std::thread> workerThreads;
298+ workerThreads.reserve (devices.size ());
299+ LOG_IF (INFO, tc::FLAGS_debug_tuner)
300+ << " Start evaluation: " << devices.size () << " "
301+ << executors_.size () << " " << configurations_.size ();
302+ ScopeGuard sgDeviceWorkerThreads ([&workerThreads]() {
303+ for (auto & workerThread : workerThreads) {
304+ workerThread.join ();
305+ }
296306 });
307+ for (auto device : devices) {
308+ workerThreads.emplace_back (
309+ [this , device, populationSize, &printer]() {
310+ this ->doEvaluate (device, populationSize, printer);
311+ });
312+ }
297313 }
298314 }
299315 searchStrategy.finishStep (step);
0 commit comments