@@ -142,6 +142,7 @@ def evaluate(epoch, step, val_loader, val_feat_proc, labels, model,
142142 continue
143143
144144 model .eval ()
145+ torch .cuda .synchronize ()
145146 start_time = time .time ()
146147 agg = {'losses' : [], 'preds' : [], 'txts' : []}
147148
@@ -166,6 +167,7 @@ def evaluate(epoch, step, val_loader, val_feat_proc, labels, model,
166167 agg ['txts' ] += helpers .gather_transcripts ([txt ], [txt_lens ], labels )
167168
168169 wer , loss = process_evaluation_epoch (agg )
170+ torch .cuda .synchronize ()
169171 log (() if epoch is None else (epoch ,),
170172 step , subset , {'loss' : loss , 'wer' : 100.0 * wer ,
171173 'took' : time .time () - start_time })
@@ -379,11 +381,11 @@ def main():
379381 if multi_gpu and not use_dali :
380382 train_loader .sampler .set_epoch (epoch )
381383
384+ torch .cuda .synchronize ()
385+ epoch_start_time = time .time ()
382386 epoch_utts = 0
383387 epoch_loss = 0
384388 accumulated_batches = 0
385- epoch_start_time = time .time ()
386- epoch_eval_time = 0
387389
388390 for batch in train_loader :
389391
@@ -461,7 +463,6 @@ def main():
461463 step_start_time = time .time ()
462464
463465 if step % args .eval_frequency == 0 :
464- tik = time .time ()
465466 wer = evaluate (epoch , step , val_loader , val_feat_proc ,
466467 symbols , model , ema_model , ctc_loss ,
467468 greedy_decoder , args .amp , use_dali )
@@ -470,7 +471,6 @@ def main():
470471 checkpointer .save (model , ema_model , optimizer , scaler ,
471472 epoch , step , best_wer , is_best = True )
472473 best_wer = wer
473- epoch_eval_time += time .time () - tik
474474
475475 step += 1
476476 accumulated_batches = 0
@@ -481,6 +481,7 @@ def main():
481481 if not use_dali and step > steps_per_epoch * epoch :
482482 break
483483
484+ torch .cuda .synchronize ()
484485 epoch_time = time .time () - epoch_start_time
485486 epoch_loss /= steps_per_epoch
486487 log ((epoch ,), None , 'train_avg' , {'throughput' : epoch_utts / epoch_time ,
0 commit comments