Skip to content

Commit acc3eae

Browse files
committed
Fix deadlock with AVTTSEngine
1 parent 108b2bf commit acc3eae

File tree

5 files changed

+225
-31
lines changed

5 files changed

+225
-31
lines changed

Sources/Navigator/TTS/AVTTSEngine.swift

Lines changed: 219 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import AVFoundation
88
import Foundation
99
import R2Shared
1010

11+
/// Implementation of a `TTSEngine` using Apple AVFoundation's `AVSpeechSynthesizer`.
1112
public class AVTTSEngine: NSObject, TTSEngine, AVSpeechSynthesizerDelegate, Loggable {
1213

1314
/// Range of valid values for an AVUtterance rate.
@@ -28,12 +29,17 @@ public class AVTTSEngine: NSObject, TTSEngine, AVSpeechSynthesizerDelegate, Logg
2829

2930
public let defaultConfig: TTSConfiguration
3031
public var config: TTSConfiguration
32+
private let debug: Bool
3133

3234
public weak var delegate: TTSEngineDelegate?
3335

3436
private let synthesizer = AVSpeechSynthesizer()
3537

36-
public override init() {
38+
/// Creates a new `AVTTSEngine` instance.
39+
///
40+
/// - Parameters:
41+
/// - debug: Print the state machine transitions.
42+
public init(debug: Bool = false) {
3743
let config = TTSConfiguration(
3844
defaultLanguage: Language(code: .bcp47(AVSpeechSynthesisVoice.currentLanguageCode())),
3945
rate: avRateRange.percentageForValue(Double(AVSpeechUtteranceDefaultSpeechRate)),
@@ -42,6 +48,7 @@ public class AVTTSEngine: NSObject, TTSEngine, AVSpeechSynthesizerDelegate, Logg
4248

4349
self.defaultConfig = config
4450
self.config = config
51+
self.debug = debug
4552

4653
super.init()
4754
synthesizer.delegate = self
@@ -57,43 +64,32 @@ public class AVTTSEngine: NSObject, TTSEngine, AVSpeechSynthesizerDelegate, Logg
5764
}
5865

5966
public func speak(_ utterance: TTSUtterance) {
60-
synthesizer.stopSpeaking(at: .immediate)
61-
synthesizer.speak(avUtterance(from: utterance))
67+
on(.play(utterance))
6268
}
6369

6470
public func stop() {
65-
synthesizer.stopSpeaking(at: .immediate)
71+
on(.stop)
6672
}
67-
68-
private func avUtterance(from utterance: TTSUtterance) -> AVSpeechUtterance {
69-
let avUtterance = AVUtterance(utterance: utterance)
70-
avUtterance.rate = Float(avRateRange.valueForPercentage(config.rate))
71-
avUtterance.pitchMultiplier = Float(avPitchRange.valueForPercentage(config.pitch))
72-
avUtterance.preUtteranceDelay = utterance.delay
73-
avUtterance.postUtteranceDelay = config.delay
74-
avUtterance.voice = voice(for: utterance)
75-
return avUtterance
76-
}
77-
78-
private func voice(for utterance: TTSUtterance) -> AVSpeechSynthesisVoice? {
79-
let language = utterance.language ?? config.defaultLanguage
80-
if let voice = config.voice, voice.language.removingRegion() == language.removingRegion() {
81-
return AVSpeechSynthesisVoice(identifier: voice.identifier)
82-
} else {
83-
return AVSpeechSynthesisVoice(language: language)
73+
74+
75+
// MARK: AVSpeechSynthesizerDelegate
76+
77+
public func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didStart utterance: AVSpeechUtterance) {
78+
guard let utterance = (utterance as? AVUtterance)?.utterance else {
79+
return
8480
}
81+
on(.didStart(utterance))
8582
}
8683

8784
public func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didFinish utterance: AVSpeechUtterance) {
8885
guard let utterance = (utterance as? AVUtterance)?.utterance else {
8986
return
9087
}
91-
delegate?.ttsEngine(self, didFinish: utterance)
88+
on(.didFinish(utterance))
9289
}
9390

9491
public func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, willSpeakRangeOfSpeechString characterRange: NSRange, utterance avUtterance: AVSpeechUtterance) {
9592
guard
96-
let delegate = delegate,
9793
let utterance = (avUtterance as? AVUtterance)?.utterance,
9894
let highlight = utterance.locator.text.highlight,
9995
let range = Range(characterRange, in: highlight)
@@ -104,7 +100,7 @@ public class AVTTSEngine: NSObject, TTSEngine, AVSpeechSynthesizerDelegate, Logg
104100
let rangeLocator = utterance.locator.copy(
105101
text: { text in text = text[range] }
106102
)
107-
delegate.ttsEngine(self, willSpeakRangeAt: rangeLocator, of: utterance)
103+
on(.willSpeakRange(locator: rangeLocator, utterance: utterance))
108104
}
109105

110106
private class AVUtterance: AVSpeechUtterance {
@@ -119,6 +115,204 @@ public class AVTTSEngine: NSObject, TTSEngine, AVSpeechSynthesizerDelegate, Logg
119115
fatalError("Not supported")
120116
}
121117
}
118+
119+
120+
// MARK: State machine
121+
122+
// Submitting new utterances to `AVSpeechSynthesizer` when the `didStart` or
123+
// `didFinish` events for the previous utterance were not received triggers
124+
// a deadlock on iOS 15. The engine ignores the following requests.
125+
//
126+
// The following state machine is used to make sure we never send commands
127+
// to the `AVSpeechSynthesizer` when it's not ready.
128+
//
129+
// To visualize it, paste the following dot graph in https://edotor.net
130+
/*
131+
digraph {
132+
{
133+
stopped [style=filled]
134+
}
135+
136+
stopped -> starting [label = "play"]
137+
138+
starting -> playing [label = "didStart"]
139+
starting -> stopping [label = "play/stop"]
140+
141+
playing -> stopped [label = "didFinish"]
142+
playing -> stopping [label = "play/stop"]
143+
playing -> playing [label = "willSpeakRange"]
144+
145+
stopping -> stopping [label = "play/stop"]
146+
stopping -> stopping [label = "didStart"]
147+
stopping -> starting [label = "didFinish w/ next"]
148+
stopping -> stopped [label = "didFinish w/o next"]
149+
}
150+
*/
151+
152+
/// Represents a state of the TTS engine.
153+
private enum State: Equatable {
154+
/// The TTS engine is waiting for the next utterance to play.
155+
case stopped
156+
/// A new utterance is being processed by the TTS engine, we wait for didStart.
157+
case starting(TTSUtterance)
158+
/// The utterance is currently playing and the engine is ready to process other commands.
159+
case playing(TTSUtterance)
160+
/// The engine was stopped while processing the previous utterance, we wait for didStart
161+
/// and/or didFinish. The queued utterance will be played once the engine is successfully stopped.
162+
case stopping(TTSUtterance, queued: TTSUtterance?)
163+
164+
mutating func on(_ event: Event) -> Effect? {
165+
switch (self, event) {
166+
167+
// stopped
168+
169+
case let (.stopped, .play(utterance)):
170+
self = .starting(utterance)
171+
return .play(utterance)
172+
173+
// starting
174+
175+
case let (.starting(current), .didStart(started)) where current == started:
176+
self = .playing(current)
177+
return nil
178+
179+
case let (.starting(current), .play(next)):
180+
self = .stopping(current, queued: next)
181+
return nil
182+
183+
case let (.starting(current), .stop):
184+
self = .stopping(current, queued: nil)
185+
return nil
186+
187+
// playing
188+
189+
case let (.playing(current), .didFinish(finished)) where current == finished:
190+
self = .stopped
191+
return .notifyDidStopAfterLastUtterance(current)
192+
193+
case let (.playing(current), .play(next)):
194+
self = .stopping(current, queued: next)
195+
return .stop
196+
197+
case let (.playing(current), .stop):
198+
self = .stopping(current, queued: nil)
199+
return .stop
200+
201+
case let (.playing(current), .willSpeakRange(locator: Locator, utterance: speaking)) where current == speaking:
202+
return .notifyWillSpeakRange(locator: Locator, utterance: current)
203+
204+
// stopping
205+
206+
case let (.stopping(current, queued: next), .didStart(started)) where current == started:
207+
self = .stopping(current, queued: next)
208+
return .stop
209+
210+
case let (.stopping(current, queued: next), .didFinish(finished)) where current == finished:
211+
if let next = next {
212+
self = .starting(next)
213+
return .play(next)
214+
} else {
215+
self = .stopped
216+
return .notifyDidStopAfterLastUtterance(current)
217+
}
218+
219+
case let (.stopping(current, queued: _), .play(next)):
220+
self = .stopping(current, queued: next)
221+
return nil
222+
223+
case let (.stopping(current, queued: _), .stop):
224+
self = .stopping(current, queued: nil)
225+
return nil
226+
227+
228+
default:
229+
return nil
230+
}
231+
}
232+
}
233+
234+
/// State machine events triggered by the `AVSpeechSynthesizer` or the client
235+
/// of `AVTTSEngine`.
236+
private enum Event: Equatable {
237+
// AVTTSEngine commands
238+
case play(TTSUtterance)
239+
case stop
240+
241+
// AVSpeechSynthesizer delegate events
242+
case didStart(TTSUtterance)
243+
case willSpeakRange(locator: Locator, utterance: TTSUtterance)
244+
case didFinish(TTSUtterance)
245+
}
246+
247+
/// State machine side effects triggered by a state transition from an event.
248+
private enum Effect: Equatable {
249+
// Ask `AVSpeechSynthesizer` to play the utterance.
250+
case play(TTSUtterance)
251+
// Ask `AVSpeechSynthesizer` to stop the playback.
252+
case stop
253+
254+
// Send notifications to our delegate.
255+
case notifyWillSpeakRange(locator: Locator, utterance: TTSUtterance)
256+
case notifyDidStopAfterLastUtterance(TTSUtterance)
257+
}
258+
259+
private var state: State = .stopped {
260+
didSet {
261+
if (debug) {
262+
log(.debug, "* \(state)")
263+
}
264+
}
265+
}
266+
267+
/// Raises a TTS event triggering a state change and handles its side effects.
268+
private func on(_ event: Event) {
269+
assert(Thread.isMainThread, "Raising AVTTSEngine events must be done from the main thread")
270+
271+
if (debug) {
272+
log(.debug, "-> on \(event)")
273+
}
274+
275+
if let effect = state.on(event) {
276+
handle(effect)
277+
}
278+
}
279+
280+
/// Handles a state machine side effect.
281+
private func handle(_ effect: Effect) {
282+
switch effect {
283+
284+
case let .play(utterance):
285+
synthesizer.speak(avUtterance(from: utterance))
286+
287+
case .stop:
288+
synthesizer.stopSpeaking(at: .immediate)
289+
290+
case let .notifyWillSpeakRange(locator: Locator, utterance: utterance):
291+
delegate?.ttsEngine(self, willSpeakRangeAt: Locator, of: utterance)
292+
293+
case let .notifyDidStopAfterLastUtterance(utterance):
294+
delegate?.ttsEngine(self, didStopAfterLastUtterance: utterance)
295+
}
296+
}
297+
298+
private func avUtterance(from utterance: TTSUtterance) -> AVSpeechUtterance {
299+
let avUtterance = AVUtterance(utterance: utterance)
300+
avUtterance.rate = Float(avRateRange.valueForPercentage(config.rate))
301+
avUtterance.pitchMultiplier = Float(avPitchRange.valueForPercentage(config.pitch))
302+
avUtterance.preUtteranceDelay = utterance.delay
303+
avUtterance.postUtteranceDelay = config.delay
304+
avUtterance.voice = voice(for: utterance)
305+
return avUtterance
306+
}
307+
308+
private func voice(for utterance: TTSUtterance) -> AVSpeechSynthesisVoice? {
309+
let language = utterance.language ?? config.defaultLanguage
310+
if let voice = config.voice, voice.language.removingRegion() == language.removingRegion() {
311+
return AVSpeechSynthesisVoice(identifier: voice.identifier)
312+
} else {
313+
return AVSpeechSynthesisVoice(language: language)
314+
}
315+
}
122316
}
123317

124318
private extension TTSVoice {
@@ -169,4 +363,4 @@ private extension AVSpeechSynthesisVoice {
169363
convenience init?(language: Language) {
170364
self.init(language: language.code.bcp47)
171365
}
172-
}
366+
}

Sources/Navigator/TTS/TTSController.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -280,8 +280,8 @@ public class TTSController: Loggable, TTSEngineDelegate {
280280

281281
// MARK: - TTSEngineDelegate
282282

283-
public func ttsEngine(_ engine: TTSEngine, didFinish utterance: TTSUtterance) {
284-
if isPlaying && currentUtterance == utterance {
283+
public func ttsEngine(_ engine: TTSEngine, didStopAfterLastUtterance utterance: TTSUtterance) {
284+
if isPlaying {
285285
next()
286286
}
287287
}

Sources/Navigator/TTS/TTSEngine.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ public protocol TTSEngine: AnyObject {
2020

2121
public protocol TTSEngineDelegate: AnyObject {
2222
func ttsEngine(_ engine: TTSEngine, willSpeakRangeAt locator: Locator, of utterance: TTSUtterance)
23-
func ttsEngine(_ engine: TTSEngine, didFinish utterance: TTSUtterance)
23+
func ttsEngine(_ engine: TTSEngine, didStopAfterLastUtterance utterance: TTSUtterance)
2424
}
2525

2626
public struct TTSConfiguration {

TestApp/Sources/Reader/Common/ReaderViewController.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ class ReaderViewController: UIViewController, Loggable {
124124
controls.didMove(toParent: self)
125125

126126
state
127-
.sink { [unowned self] state in
127+
.sink { state in
128128
controls.view.isHidden = (state == .stopped)
129129
}
130130
.store(in: &subscriptions)

TestApp/Sources/Reader/Common/TTS/TTSViewModel.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ final class TTSViewModel: ObservableObject, Loggable {
4545
var isMoving = false
4646
playingRangeLocatorSubject
4747
.throttle(for: 1, scheduler: RunLoop.main, latest: true)
48-
.sink { [unowned self] locator in
48+
.sink { locator in
4949
guard !isMoving else {
5050
return
5151
}
@@ -131,4 +131,4 @@ extension TTSViewModel: TTSControllerDelegate {
131131
public func ttsController(_ ttsController: TTSController, willSpeakRangeAt locator: Locator, of utterance: TTSUtterance) {
132132
playingRangeLocatorSubject.send(locator)
133133
}
134-
}
134+
}

0 commit comments

Comments
 (0)