Skip to content

Commit 438f29a

Browse files
committed
Extract AgentSession
1 parent 2bd4e8f commit 438f29a

23 files changed

+482
-653
lines changed
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import Foundation
2+
import LiveKit
3+
4+
typealias Agent = Participant
5+
6+
extension AgentSession {
7+
enum Error: LocalizedError {
8+
case agentNotConnected
9+
case failedToConnect(Swift.Error)
10+
case failedToSend(Swift.Error)
11+
case mediaDevice(Swift.Error)
12+
13+
// var errorDescription: String? {
14+
// switch self {
15+
// case .agentNotConnected:
16+
// "Agent did not connect to the Room"
17+
// }
18+
// }
19+
}
20+
21+
struct Features: OptionSet {
22+
let rawValue: Int
23+
24+
static let voice = Self(rawValue: 1 << 0)
25+
static let text = Self(rawValue: 1 << 1)
26+
static let video = Self(rawValue: 1 << 2)
27+
28+
static let all: Self = [.voice, .text, .video]
29+
}
30+
31+
struct Context {
32+
let room: Room
33+
let features: Features
34+
35+
init(room: Room = .init(), features: Features = .all) {
36+
self.room = room
37+
self.features = features
38+
}
39+
}
40+
41+
enum Environment {
42+
// .envfile?
43+
case sandbox(id: String, room: String = "room-\(Int.random(in: 1000 ... 9999))", participant: String = "participant-\(Int.random(in: 1000 ... 9999))")
44+
case cloud(server: String, token: String)
45+
}
46+
}
Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
import Collections
2+
import Foundation
3+
import LiveKit
4+
5+
@MainActor
6+
final class AgentSession: ObservableObject {
7+
// MARK: - State
8+
9+
@Published private(set) var error: Error?
10+
11+
@Published private(set) var agent: Agent?
12+
13+
@Published private(set) var connectionState: ConnectionState = .disconnected
14+
@Published private(set) var isListening = false
15+
var isAvailable: Bool {
16+
switch connectionState {
17+
case .disconnected where isListening,
18+
.connecting where isListening,
19+
.connected,
20+
.reconnecting:
21+
true
22+
default:
23+
false
24+
}
25+
}
26+
27+
@Published private(set) var localAudioTrack: (any AudioTrack)?
28+
@Published private(set) var localCameraTrack: (any VideoTrack)?
29+
@Published private(set) var localScreenShareTrack: (any VideoTrack)?
30+
31+
// TODO: Move camera switching here (vs Devices)?
32+
33+
var isMicrophoneEnabled: Bool { localAudioTrack != nil }
34+
var isCameraEnabled: Bool { localCameraTrack != nil }
35+
var isScreenShareEnabled: Bool { localScreenShareTrack != nil }
36+
37+
@Published private(set) var agentAudioTrack: (any AudioTrack)?
38+
@Published private(set) var avatarCameraTrack: (any VideoTrack)?
39+
40+
@Published private(set) var messages: OrderedDictionary<ReceivedMessage.ID, ReceivedMessage> = [:]
41+
42+
var supportedFeatures: Features { features }
43+
44+
// MARK: - Dependencies
45+
46+
private let environment: Environment
47+
private let room: Room
48+
private let features: Features
49+
private let senders: [any MessageSender]
50+
private let receivers: [any MessageReceiver]
51+
52+
// MARK: - Internal state
53+
54+
private var waitForAgentTask: Task<Void, Swift.Error>?
55+
56+
// MARK: - Init
57+
58+
init(environment: Environment, context: Context = .init(), senders: [any MessageSender]? = nil, receivers: [any MessageReceiver]? = nil) {
59+
self.environment = environment
60+
room = context.room
61+
features = context.features
62+
63+
let textMessageSender = TextMessageSender(room: room)
64+
self.senders = senders ?? [textMessageSender]
65+
self.receivers = receivers ?? [textMessageSender, TranscriptionStreamReceiver(room: room)]
66+
67+
observeRoom()
68+
observeReceivers()
69+
}
70+
71+
private func observeRoom() {
72+
Task { [weak self] in
73+
guard let changes = self?.room.changes else { return }
74+
for await _ in changes {
75+
guard let self else { return }
76+
77+
connectionState = room.connectionState
78+
agent = room.agentParticipant
79+
80+
localAudioTrack = room.localParticipant.firstAudioTrack
81+
localCameraTrack = room.localParticipant.firstCameraVideoTrack
82+
localScreenShareTrack = room.localParticipant.firstScreenShareVideoTrack
83+
84+
agentAudioTrack = room.agentParticipant?.audioTracks.first(where: { $0.source == .microphone })?.track as? AudioTrack // remove bg audio tracks
85+
avatarCameraTrack = room.agentParticipant?.avatarWorker?.firstCameraVideoTrack
86+
}
87+
}
88+
}
89+
90+
private func observeReceivers() {
91+
for receiver in receivers {
92+
Task { [weak self] in
93+
for await message in try await receiver.messages() {
94+
guard let self else { return }
95+
messages.updateValue(message, forKey: message.id)
96+
}
97+
}
98+
}
99+
}
100+
101+
// MARK: - Public
102+
103+
func connect(options: ConnectOptions? = nil, roomOptions: RoomOptions? = nil, preConnectAudio: Bool = true, waitForAgent: TimeInterval = 20) async {
104+
error = nil
105+
waitForAgentTask?.cancel()
106+
107+
defer {
108+
waitForAgentTask = Task {
109+
try await Task.sleep(for: .seconds(waitForAgent))
110+
try Task.checkCancellation()
111+
if connectionState == .connected, agent == nil {
112+
await disconnect()
113+
self.error = .agentNotConnected
114+
}
115+
}
116+
}
117+
118+
let connection = { @Sendable in
119+
let (server, token) = try! await self.credentials()
120+
try await self.room.connect(url: server, token: token, connectOptions: options, roomOptions: roomOptions)
121+
}
122+
123+
do {
124+
if preConnectAudio {
125+
try await room.withPreConnectAudio { try await connection() }
126+
} else {
127+
try await connection()
128+
}
129+
} catch {
130+
self.error = .failedToConnect(error)
131+
}
132+
}
133+
134+
func disconnect() async {
135+
await room.disconnect()
136+
}
137+
138+
func resetError() {
139+
error = nil
140+
}
141+
142+
func send(message: SentMessage) async {
143+
do {
144+
for sender in senders {
145+
try await sender.send(message)
146+
}
147+
} catch {
148+
self.error = .failedToSend(error)
149+
}
150+
}
151+
152+
func getMessageHistory() -> [ReceivedMessage] {
153+
messages.values.elements
154+
}
155+
156+
func restoreMessageHistory(_ messages: [ReceivedMessage]) {
157+
self.messages = .init(uniqueKeysWithValues: messages.sorted(by: { $0.timestamp < $1.timestamp }).map { ($0.id, $0) })
158+
}
159+
160+
func toggleMicrophone() async {
161+
do {
162+
try await room.localParticipant.setMicrophone(enabled: !isMicrophoneEnabled)
163+
} catch {
164+
self.error = .mediaDevice(error)
165+
}
166+
}
167+
168+
func toggleCamera() async {
169+
let enable = !isCameraEnabled
170+
do {
171+
// One video track at a time
172+
if enable, isScreenShareEnabled {
173+
try await room.localParticipant.setScreenShare(enabled: false)
174+
}
175+
176+
// Hm???
177+
// let device = try await CameraCapturer.captureDevices().first(where: { $0.uniqueID == selectedVideoDeviceID })
178+
try await room.localParticipant.setCamera(enabled: enable) // captureOptions: CameraCaptureOptions(device: device))
179+
} catch {
180+
self.error = .mediaDevice(error)
181+
}
182+
}
183+
184+
func toggleScreenShare() async {
185+
let enable = !isScreenShareEnabled
186+
do {
187+
// One video track at a time
188+
if enable, isCameraEnabled {
189+
try await room.localParticipant.setCamera(enabled: false)
190+
}
191+
try await room.localParticipant.setScreenShare(enabled: enable)
192+
} catch {
193+
self.error = .mediaDevice(error)
194+
}
195+
}
196+
197+
// MARK: - Private
198+
199+
private func credentials() async throws -> (server: String, token: String) {
200+
switch environment {
201+
case let .sandbox(id, room, participant):
202+
let sandboxConnection = try await Sandbox.getConnection(id: id, roomName: room, participantName: participant)
203+
return (sandboxConnection.serverUrl, sandboxConnection.participantToken)
204+
case let .cloud(server, token):
205+
return (server, token)
206+
}
207+
}
208+
}

VoiceAgent/App/AppView.swift

Lines changed: 26 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,15 @@
11
import SwiftUI
22

33
struct AppView: View {
4-
@Environment(AppViewModel.self) private var viewModel
5-
@State private var chatViewModel = ChatViewModel()
4+
@EnvironmentObject private var session: AgentSession
5+
@State private var chat: Bool = false
66

7-
@State private var error: Error?
87
@FocusState private var keyboardFocus: Bool
9-
108
@Namespace private var namespace
119

1210
var body: some View {
1311
ZStack(alignment: .top) {
14-
if viewModel.isInteractive {
12+
if session.isAvailable {
1513
interactions()
1614
} else {
1715
start()
@@ -22,34 +20,31 @@ struct AppView: View {
2220
.environment(\.namespace, namespace)
2321
#if os(visionOS)
2422
.ornament(attachmentAnchor: .scene(.bottom)) {
25-
if viewModel.isInteractive {
26-
ControlBar()
23+
if session.isAvailable {
24+
ControlBar(chat: $chat)
2725
.glassBackgroundEffect()
2826
}
2927
}
30-
.alert("warning.reconnecting", isPresented: .constant(viewModel.connectionState == .reconnecting)) {}
31-
.alert(error?.localizedDescription ?? "error.title", isPresented: .constant(error != nil)) {
32-
Button("error.ok") { error = nil }
28+
.alert("warning.reconnecting", isPresented: .constant(session.connectionState == .reconnecting)) {}
29+
.alert(session.error?.localizedDescription ?? "error.title", isPresented: .constant(session.error != nil)) {
30+
Button("error.ok") { session.resetError() }
3331
}
3432
#else
3533
.safeAreaInset(edge: .bottom) {
36-
if viewModel.isInteractive, !keyboardFocus {
37-
ControlBar()
34+
if session.isAvailable, !keyboardFocus {
35+
ControlBar(chat: $chat)
3836
.transition(.asymmetric(insertion: .move(edge: .bottom).combined(with: .opacity), removal: .opacity))
3937
}
4038
}
4139
#endif
4240
.background(.bg1)
43-
.animation(.default, value: viewModel.isInteractive)
44-
.animation(.default, value: viewModel.interactionMode)
45-
.animation(.default, value: viewModel.isCameraEnabled)
46-
.animation(.default, value: viewModel.isScreenShareEnabled)
47-
.animation(.default, value: error?.localizedDescription)
48-
.onAppear {
49-
Dependencies.shared.errorHandler = { error = $0 }
50-
}
41+
.animation(.default, value: chat)
42+
.animation(.default, value: session.isAvailable)
43+
.animation(.default, value: session.isCameraEnabled)
44+
.animation(.default, value: session.isScreenShareEnabled)
45+
.animation(.default, value: session.error?.localizedDescription)
5146
#if os(iOS)
52-
.sensoryFeedback(.impact, trigger: viewModel.isListening)
47+
.sensoryFeedback(.impact, trigger: session.isListening)
5348
#endif
5449
}
5550

@@ -61,18 +56,15 @@ struct AppView: View {
6156
@ViewBuilder
6257
private func interactions() -> some View {
6358
#if os(visionOS)
64-
VisionInteractionView(keyboardFocus: $keyboardFocus)
65-
.environment(chatViewModel)
59+
VisionInteractionView(chat: chat, keyboardFocus: $keyboardFocus)
6660
.overlay(alignment: .bottom) {
6761
agentListening()
6862
.padding(16 * .grid)
6963
}
7064
#else
71-
switch viewModel.interactionMode {
72-
case .text:
65+
if chat {
7366
TextInteractionView(keyboardFocus: $keyboardFocus)
74-
.environment(chatViewModel)
75-
case .voice:
67+
} else {
7668
VoiceInteractionView()
7769
.overlay(alignment: .bottom) {
7870
agentListening()
@@ -85,27 +77,27 @@ struct AppView: View {
8577
@ViewBuilder
8678
private func errors() -> some View {
8779
#if !os(visionOS)
88-
if case .reconnecting = viewModel.connectionState {
80+
if case .reconnecting = session.connectionState {
8981
WarningView(warning: "warning.reconnecting")
9082
}
9183

92-
if let error {
93-
ErrorView(error: error) { self.error = nil }
84+
if let error = session.error {
85+
ErrorView(error: error) { session.resetError() }
9486
}
9587
#endif
9688
}
9789

9890
@ViewBuilder
9991
private func agentListening() -> some View {
10092
ZStack {
101-
if chatViewModel.messages.isEmpty,
102-
!viewModel.isCameraEnabled,
103-
!viewModel.isScreenShareEnabled
93+
if session.messages.isEmpty,
94+
!session.isCameraEnabled,
95+
!session.isScreenShareEnabled
10496
{
10597
AgentListeningView()
10698
}
10799
}
108-
.animation(.default, value: chatViewModel.messages.isEmpty)
100+
.animation(.default, value: session.messages.isEmpty)
109101
}
110102
}
111103

0 commit comments

Comments
 (0)