Skip to content

Commit f613b7c

Browse files
suiyoubinv-kkudrynski
authored andcommitted
[EffDet/PyT] Invoking CUDA synchronize() before Timing
1 parent acecffe commit f613b7c

File tree

2 files changed

+8
-0
lines changed

2 files changed

+8
-0
lines changed

PyTorch/Detection/Efficientdet/train.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -521,12 +521,14 @@ def train_epoch(
521521

522522
model.train()
523523

524+
torch.cuda.synchronize()
524525
end = time.time()
525526
last_idx = steps_per_epoch - 1
526527
num_updates = epoch * steps_per_epoch
527528
for batch_idx in range(steps_per_epoch):
528529
input, target = next(loader_iter)
529530
last_batch = batch_idx == last_idx
531+
torch.cuda.synchronize()
530532
data_time_m.update(time.time() - end)
531533

532534
with torch.cuda.amp.autocast(enabled=use_amp):
@@ -575,6 +577,7 @@ def train_epoch(
575577
if lr_scheduler is not None:
576578
lr_scheduler.step_update(num_updates=num_updates, metric=losses_m.avg)
577579

580+
torch.cuda.synchronize()
578581
end = time.time()
579582
if args.benchmark:
580583
if batch_idx >= args.benchmark_steps:
@@ -597,6 +600,7 @@ def validate(model, loader, args, evaluator=None, epoch=0, log_suffix=''):
597600

598601
model.eval()
599602

603+
torch.cuda.synchronize()
600604
end = time.time()
601605
last_idx = len(loader) - 1
602606
with torch.no_grad():

PyTorch/Detection/Efficientdet/validate.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,12 +208,14 @@ def validate(args):
208208
bench.eval()
209209
batch_time = AverageMeter()
210210
throughput = AverageMeter()
211+
torch.cuda.synchronize()
211212
end = time.time()
212213
total_time_start = time.time()
213214
with torch.no_grad():
214215
for i, (input, target) in enumerate(loader):
215216
with torch.cuda.amp.autocast(enabled=args.amp):
216217
output = bench(input, target['img_scale'], target['img_size'])
218+
torch.cuda.synchronize()
217219
batch_time.update(time.time() - end)
218220
throughput.update(input.size(0) / batch_time.val)
219221
evaluator.add_predictions(output, target)
@@ -235,6 +237,7 @@ def validate(args):
235237
)
236238
end = time.time()
237239

240+
torch.cuda.synchronize()
238241
dllogger_metric['total_inference_time'] = time.time() - total_time_start
239242
dllogger_metric['inference_throughput'] = throughput.avg
240243
dllogger_metric['inference_time'] = 1000 / throughput.avg
@@ -245,6 +248,7 @@ def validate(args):
245248
mean_ap = evaluator.evaluate()
246249
else:
247250
evaluator.save_predictions(args.results)
251+
torch.cuda.synchronize()
248252
dllogger_metric['map'] = mean_ap
249253
dllogger_metric['total_eval_time'] = time.time() - total_time_start
250254
else:

0 commit comments

Comments
 (0)