22#
33# SPDX-License-Identifier: MIT
44
5- from datetime import datetime , timedelta
6- from queue import Queue
5+ import time
76
87import speech_recognition as sr
98
109
1110class Listener :
12- def __init__ (self , energy_threshold = 1000 , phrase_timeout = 3.0 , record_timeout = 30 ):
11+ def __init__ (self , api_key , energy_threshold = 300 , record_timeout = 30 ):
1312 self .listener_handle = None
13+ self .microphone = sr .Microphone ()
1414 self .recognizer = sr .Recognizer ()
1515 self .recognizer .energy_threshold = energy_threshold
16- self .recognizer .dynamic_energy_threshold = False
17- self .recognizer .pause_threshold = 1
18- self .last_sample = bytes ()
19- self .phrase_time = datetime .utcnow ()
20- self .phrase_timeout = phrase_timeout
16+ with self .microphone as source :
17+ self .recognizer .adjust_for_ambient_noise (
18+ source
19+ ) # we only need to calibrate once, before we start listening
2120 self .record_timeout = record_timeout
22- self .phrase_complete = False
23- # Thread safe Queue for passing data from the threaded recording callback.
24- self .data_queue = Queue ()
25- self .mic_dev_index = None
21+ self .listener_handle = None
22+ self .audio = None
23+ self .api_key = api_key
2624
2725 def listen (self , ready_callback = None ):
28- self .phrase_complete = False
29- start = datetime .utcnow ()
30- self .start_listening ()
26+ self ._start_listening ()
3127 if ready_callback :
3228 ready_callback ()
33- while (
34- self .listener_handle
35- and not self .speech_waiting ()
36- or not self .phrase_complete
37- ):
38- if self .phrase_time and start - self .phrase_time > timedelta (
39- seconds = self .phrase_timeout
40- ):
41- self .last_sample = bytes ()
42- self .phrase_complete = True
43- self .phrase_time = start
29+ while self .listener_handle and self .audio is None :
30+ time .sleep (0.1 )
4431 self .stop_listening ()
4532
46- def start_listening (self ):
47- if not self .listener_handle :
48- with sr .Microphone () as source :
49- self .recognizer .adjust_for_ambient_noise (source )
50- self .listener_handle = self .recognizer .listen_in_background (
51- sr .Microphone (),
52- self .record_callback ,
53- phrase_time_limit = self .record_timeout ,
54- )
33+ def _save_audio_callback (self , _recognizer , audio ):
34+ self .audio = audio
35+
36+ def _start_listening (self ):
37+ self .listener_handle = self .recognizer .listen_in_background (
38+ self .microphone , self ._save_audio_callback
39+ )
5540
5641 def stop_listening (self , wait_for_stop = False ):
5742 if self .listener_handle :
@@ -61,40 +46,24 @@ def stop_listening(self, wait_for_stop=False):
6146 def is_listening (self ):
6247 return self .listener_handle is not None
6348
64- def record_callback (self , _ , audio : sr .AudioData ) -> None :
65- # Grab the raw bytes and push it into the thread safe queue.
66- data = audio .get_raw_data ()
67- self .data_queue .put (data )
68-
6949 def speech_waiting (self ):
70- return not self .data_queue .empty ()
71-
72- def get_speech (self ):
73- if self .speech_waiting ():
74- return self .data_queue .get ()
75- return None
76-
77- def get_audio_data (self ):
78- now = datetime .utcnow ()
79- if self .speech_waiting ():
80- self .phrase_complete = False
81- if self .phrase_time and now - self .phrase_time > timedelta (
82- seconds = self .phrase_timeout
83- ):
84- self .last_sample = bytes ()
85- self .phrase_complete = True
86- self .phrase_time = now
87-
88- # Concatenate our current audio data with the latest audio data.
89- while self .speech_waiting ():
90- data = self .get_speech ()
91- self .last_sample += data
50+ return self .audio is not None
9251
93- # Use AudioData to convert the raw data to wav data.
94- with sr .Microphone () as source :
95- audio_data = sr .AudioData (
96- self .last_sample , source .SAMPLE_RATE , source .SAMPLE_WIDTH
97- )
98- return audio_data
52+ def recognize (self ):
53+ if self .audio :
54+ # Transcribe the audio data to text using Whisper
55+ print ("Recognizing..." )
56+ attempts = 0
57+ while attempts < 3 :
58+ try :
59+ result = self .recognizer .recognize_whisper_api (
60+ self .audio , api_key = self .api_key
61+ )
9962
63+ return result .strip ()
64+ except sr .RequestError :
65+ time .sleep (3 )
66+ attempts += 1
67+ print ("I wasn't able to understand you. Please repeat that." )
68+ return None
10069 return None
0 commit comments