Adding fp16 mode whisper

kkontny · kkontny · commit 56b34fbfb6a4 · 2024-04-25T12:12:16.000+02:00
diff --git a/speech_recognition/whisper/run.py b/speech_recognition/whisper/run.py
@@ -17,8 +17,7 @@
               f"\033[0m")
     sys.exit(1)
 
-
-def run_pytorch_fp32(model_name, num_runs, timeout, **kwargs):
+def run_pytorch(model_name, num_runs, timeout, use_torch_fp16=False):
     import os
     import sys
     import torch
@@ -32,6 +31,10 @@ def run_pytorch_fp32(model_name, num_runs, timeout, **kwargs):
     from speech_recognition.whisper.whisper.whisper.transcribe import transcribe
     model = load_model(model_name)
     model.eval()
+    if use_torch_fp16:
+        model = model.half()
+        model._encoder.half()
+        model._decoder.half()
 
     def single_pass_pytorch(_runner, _librispeech):
         array = _librispeech.get_input_array()
@@ -40,15 +43,22 @@ def single_pass_pytorch(_runner, _librispeech):
             _runner.run(batch_size * array.shape[0], audio)["text"].lstrip().replace(".", "").upper()
         )
 
+    decode_options = {"fp16": use_torch_fp16}
+
     def transcribe_wrapper(audio):
-        return transcribe(model, audio, no_speech_threshold=1.0, verbose=None)
+        return transcribe(model, audio, no_speech_threshold=1.0, verbose=None, **decode_options)
 
     runner = PyTorchRunnerV2(transcribe_wrapper, throughput_only=True)
     librispeech = LibriSpeech()
     print_warning_message("Sampling rate Whisper operates at is 16,000 Hz, therefore throughput values below can be "
                           "divided by 16,000 to derive 'seconds of processed audio per second'")
     return run_model(single_pass_pytorch, runner, librispeech, batch_size, num_runs, timeout)
 
+def run_pytorch_fp32(model_name, num_runs, timeout):
+    return run_pytorch(model_name, num_runs, timeout, use_torch_fp16=False)
+
+def run_pytorch_fp16(model_name, num_runs, timeout):
+    return run_pytorch(model_name, num_runs, timeout, use_torch_fp16=True)
 
 def run_pytorch_cuda(model_name, num_runs, timeout, **kwargs):
     import os