Skip to content

Commit b488eae

Browse files
committed
Add POST realtime/session request
1 parent 769dcf9 commit b488eae

File tree

8 files changed

+333
-5
lines changed

8 files changed

+333
-5
lines changed

Sources/OpenAI/AIProxy/AIProxyService.swift

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,16 @@ struct AIProxyService: OpenAIService {
648648
return try await fetch(debugEnabled: debugEnabled, type: OpenAIResponse<BatchObject>.self, with: request)
649649
}
650650

651+
// MARK: Session
652+
653+
func createSession(
654+
parameters: CreateSessionParameters)
655+
async throws -> SessionObject
656+
{
657+
let request = try await OpenAIAPI.session(.create).request(aiproxyPartialKey: partialKey, clientID: clientID, organizationID: organizationID, openAIEnvironment: openAIEnvironment, method: .post, params: parameters, betaHeaderField: Self.assistantsBetaV2)
658+
return try await fetch(debugEnabled: debugEnabled, type: SessionObject.self, with: request)
659+
}
660+
651661
// MARK: Vector Store
652662

653663
func createVectorStore(

Sources/OpenAI/Azure/DefaultOpenAIAzureService.swift

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -593,7 +593,13 @@ final public class DefaultOpenAIAzureService: OpenAIService {
593593
{
594594
fatalError("Currently, this API is not supported. We welcome and encourage contributions to our open-source project. Please consider opening an issue or submitting a pull request to add support for this feature.")
595595
}
596-
596+
597+
// MARK: Session
598+
599+
public func createSession(parameters: CreateSessionParameters) async throws -> SessionObject {
600+
fatalError("Currently, this API is not supported. We welcome and encourage contributions to our open-source project. Please consider opening an issue or submitting a pull request to add support for this feature.")
601+
}
602+
597603
// MARK: Vector Store
598604

599605
public func createVectorStore(

Sources/OpenAI/LocalModelService/LocalModelService.swift

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,11 @@ struct LocalModelService: OpenAIService {
262262
func listBatch(after: String?, limit: Int?) async throws -> OpenAIResponse<BatchObject> {
263263
fatalError("Currently, this API is not supported. We welcome and encourage contributions to our open-source project. Please consider opening an issue or submitting a pull request to add support for this feature.")
264264
}
265-
265+
266+
func createSession(parameters: CreateSessionParameters) async throws -> SessionObject {
267+
fatalError("Currently, this API is not supported. We welcome and encourage contributions to our open-source project. Please consider opening an issue or submitting a pull request to add support for this feature.")
268+
}
269+
266270
func createVectorStore(parameters: VectorStoreParameter) async throws -> VectorStoreObject {
267271
fatalError("Currently, this API is not supported. We welcome and encourage contributions to our open-source project. Please consider opening an issue or submitting a pull request to add support for this feature.")
268272
}

Sources/OpenAI/Private/Networking/OpenAIAPI.swift

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ enum OpenAIAPI {
2525
case runStep(RunStepCategory) // https://platform.openai.com/docs/api-reference/runs/step-object
2626
case thread(ThreadCategory) // https://platform.openai.com/docs/api-reference/threads
2727
case batch(BatchCategory) // https://platform.openai.com/docs/api-reference/batch
28+
case session(SessionCategory) // https://platform.openai.com/docs/api-reference/sessions
2829
case vectorStore(VectorStoreCategory) // https://platform.openai.com/docs/api-reference/vector-stores
2930
case vectorStoreFile(VectorStoreFileCategory) // https://platform.openai.com/docs/api-reference/vector-stores-files
3031
case vectorStoreFileBatch(VectorStoreFileBatch) // https://platform.openai.com/docs/api-reference/vector-stores-file-batches
@@ -110,7 +111,11 @@ enum OpenAIAPI {
110111
case cancel(batchID: String)
111112
case list
112113
}
113-
114+
115+
enum SessionCategory {
116+
case create
117+
}
118+
114119
enum VectorStoreCategory {
115120
case create
116121
case list
@@ -228,6 +233,10 @@ extension OpenAIAPI: Endpoint {
228233
case .create: return "\(version)/threads"
229234
case .retrieve(let threadID), .modify(let threadID), .delete(let threadID): return "\(version)/threads/\(threadID)"
230235
}
236+
case .session(let category):
237+
switch category {
238+
case .create: return "\(version)/realtime/sessions"
239+
}
231240
case .vectorStore(let category):
232241
switch category {
233242
case .create, .list: return "\(version)/vector_stores"
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
import Foundation
2+
3+
public struct CreateSessionParameters: Codable {
4+
/// The format of input audio. Options are pcm16, g711_ulaw, or g711_alaw.
5+
/// For pcm16, input audio must be 16-bit PCM at a 24kHz sample rate, single channel (mono), and little-endian byte order.
6+
public let inputAudioFormat: SessionObject.AudioFormat?
7+
8+
/// Configuration for input audio noise reduction.
9+
public let inputAudioNoiseReduction: SessionObject.InputAudioNoiseReduction?
10+
11+
/// Configuration for input audio transcription.
12+
public let inputAudioTranscription: SessionObject.InputAudioTranscription?
13+
14+
/// The default system instructions (i.e. system message) prepended to model calls.
15+
public let instructions: String?
16+
17+
/// Maximum number of output tokens for a single assistant response.
18+
public let maxResponseOutputTokens: Int?
19+
20+
/// The set of modalities the model can respond with.
21+
public let modalities: [SessionObject.Modality]?
22+
23+
/// The Realtime model used for this session.
24+
public let model: String?
25+
26+
/// The format of output audio.
27+
public let outputAudioFormat: SessionObject.AudioFormat?
28+
29+
/// Sampling temperature for the model, limited to [0.6, 1.2].
30+
public let temperature: Double?
31+
32+
/// How the model chooses tools.
33+
public let toolChoice: ToolChoice?
34+
35+
/// Tools (functions) available to the model.
36+
public let tools: [Tool]?
37+
38+
/// Configuration for turn detection.
39+
public let turnDetection: SessionObject.TurnDetection?
40+
41+
/// The voice the model uses to respond.
42+
public let voice: String?
43+
44+
enum CodingKeys: String, CodingKey {
45+
case inputAudioFormat = "input_audio_format"
46+
case inputAudioNoiseReduction = "input_audio_noise_reduction"
47+
case inputAudioTranscription = "input_audio_transcription"
48+
case instructions
49+
case maxResponseOutputTokens = "max_response_output_tokens"
50+
case modalities
51+
case model
52+
case outputAudioFormat = "output_audio_format"
53+
case temperature
54+
case toolChoice = "tool_choice"
55+
case tools
56+
case turnDetection = "turn_detection"
57+
case voice
58+
}
59+
60+
public init(
61+
inputAudioFormat: SessionObject.AudioFormat? = nil,
62+
inputAudioNoiseReduction: SessionObject.InputAudioNoiseReduction? = nil,
63+
inputAudioTranscription: SessionObject.InputAudioTranscription? = nil,
64+
instructions: String? = nil,
65+
maxResponseOutputTokens: Int? = nil,
66+
modalities: [SessionObject.Modality]? = nil,
67+
model: String? = nil,
68+
outputAudioFormat: SessionObject.AudioFormat? = nil,
69+
temperature: Double? = nil,
70+
toolChoice: ToolChoice? = nil,
71+
tools: [Tool]? = nil,
72+
turnDetection: SessionObject.TurnDetection? = nil,
73+
voice: String? = nil
74+
) {
75+
self.inputAudioFormat = inputAudioFormat
76+
self.inputAudioNoiseReduction = inputAudioNoiseReduction
77+
self.inputAudioTranscription = inputAudioTranscription
78+
self.instructions = instructions
79+
self.maxResponseOutputTokens = maxResponseOutputTokens
80+
self.modalities = modalities
81+
self.model = model
82+
self.outputAudioFormat = outputAudioFormat
83+
self.temperature = temperature
84+
self.toolChoice = toolChoice
85+
self.tools = tools
86+
self.turnDetection = turnDetection
87+
self.voice = voice
88+
}
89+
}
Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
import Foundation
2+
3+
public struct SessionObject: Codable {
4+
/// The unique identifier for the session.
5+
public let id: String
6+
7+
/// The object type, which is always "realtime.session".
8+
public let object: String
9+
10+
/// The model used for this session.
11+
public let model: String
12+
13+
/// The set of modalities the model can respond with.
14+
public let modalities: [Modality]
15+
16+
/// The default system instructions (i.e. system message) prepended to model calls.
17+
public let instructions: String?
18+
19+
/// The voice the model uses to respond.
20+
public let voice: String?
21+
22+
/// The format of input audio.
23+
public let inputAudioFormat: AudioFormat?
24+
25+
/// The format of output audio.
26+
public let outputAudioFormat: AudioFormat?
27+
28+
/// Configuration for input audio transcription.
29+
public let inputAudioTranscription: InputAudioTranscription?
30+
31+
/// Configuration for turn detection.
32+
public let turnDetection: TurnDetection?
33+
34+
/// Tools (functions) available to the model.
35+
public let tools: [Tool]
36+
37+
/// How the model chooses tools.
38+
public let toolChoice: ToolChoice
39+
40+
/// Sampling temperature for the model.
41+
public let temperature: Double?
42+
43+
/// Maximum number of output tokens for a single assistant response.
44+
public let maxResponseOutputTokens: MaxOutputTokens?
45+
46+
/// The client secret containing the ephemeral API token.
47+
public let clientSecret: ClientSecret
48+
49+
public enum AudioFormat: String, Codable {
50+
case pcm16 = "pcm16"
51+
case g711Ulaw = "g711_ulaw"
52+
case g711Alaw = "g711_alaw"
53+
}
54+
55+
public struct InputAudioNoiseReduction: Codable {
56+
public let type: String?
57+
58+
public init(type: String? = nil) {
59+
self.type = type
60+
}
61+
}
62+
63+
public struct InputAudioTranscription: Codable {
64+
public let model: String?
65+
public let language: String?
66+
public let prompt: String?
67+
68+
public init(model: String? = nil, language: String? = nil, prompt: String? = nil) {
69+
self.model = model
70+
self.language = language
71+
self.prompt = prompt
72+
}
73+
}
74+
75+
public enum Modality: String, Codable {
76+
case text = "text"
77+
case audio = "audio"
78+
}
79+
80+
public struct TurnDetection: Codable {
81+
public let createResponse: Bool?
82+
public let eagerness: TurnDetectionEagerness?
83+
public let interruptResponse: Bool?
84+
public let prefixPaddingMs: Int?
85+
public let silenceDurationMs: Int?
86+
public let threshold: Double?
87+
public let type: TurnDetectionType
88+
89+
public enum TurnDetectionType: String, Codable, Sendable {
90+
case serverVad = "server_vad"
91+
case semanticVad = "semantic_vad"
92+
case none
93+
}
94+
95+
public enum TurnDetectionEagerness: String, Codable, Sendable {
96+
case low
97+
case high
98+
case auto
99+
case medium
100+
}
101+
102+
enum CodingKeys: String, CodingKey {
103+
case createResponse = "create_response"
104+
case eagerness
105+
case interruptResponse = "interrupt_response"
106+
case prefixPaddingMs = "prefix_padding_ms"
107+
case silenceDurationMs = "silence_duration_ms"
108+
case threshold
109+
case type
110+
}
111+
112+
public init(
113+
type: TurnDetectionType,
114+
createResponse: Bool? = nil,
115+
eagerness: TurnDetectionEagerness? = nil,
116+
interruptResponse: Bool? = nil,
117+
prefixPaddingMs: Int? = nil,
118+
silenceDurationMs: Int? = nil,
119+
threshold: Double? = nil
120+
) {
121+
self.type = type
122+
self.createResponse = createResponse
123+
self.eagerness = eagerness
124+
self.interruptResponse = interruptResponse
125+
self.prefixPaddingMs = prefixPaddingMs
126+
self.silenceDurationMs = silenceDurationMs
127+
self.threshold = threshold
128+
}
129+
}
130+
131+
public struct ClientSecret: Codable {
132+
/// The ephemeral API token value.
133+
public let value: String
134+
135+
/// The Unix timestamp (in seconds) when the token expires.
136+
public let expiresAt: Int
137+
138+
private enum CodingKeys: String, CodingKey {
139+
case value
140+
case expiresAt = "expires_at"
141+
}
142+
}
143+
144+
public enum MaxOutputTokens: Codable {
145+
case inf
146+
case int(Int)
147+
148+
public init(from decoder: Decoder) throws {
149+
let container = try decoder.singleValueContainer()
150+
if let intValue = try? container.decode(Int.self) {
151+
self = .int(intValue)
152+
} else if let stringValue = try? container.decode(String.self), stringValue == "inf" {
153+
self = .inf
154+
} else {
155+
throw DecodingError.typeMismatch(MaxOutputTokens.self, DecodingError.Context(codingPath: decoder.codingPath, debugDescription: "Wrong type for MaxOutputTokens"))
156+
}
157+
}
158+
159+
public func encode(to encoder: Encoder) throws {
160+
var container = encoder.singleValueContainer()
161+
switch self {
162+
case .inf:
163+
try container.encode("inf")
164+
case .int(let value):
165+
try container.encode(value)
166+
}
167+
}
168+
}
169+
170+
private enum CodingKeys: String, CodingKey {
171+
case id
172+
case object
173+
case model
174+
case modalities
175+
case instructions
176+
case voice
177+
case inputAudioFormat = "input_audio_format"
178+
case outputAudioFormat = "output_audio_format"
179+
case inputAudioTranscription = "input_audio_transcription"
180+
case turnDetection = "turn_detection"
181+
case tools
182+
case toolChoice = "tool_choice"
183+
case temperature
184+
case maxResponseOutputTokens = "max_response_output_tokens"
185+
case clientSecret = "client_secret"
186+
}
187+
}

Sources/OpenAI/Public/Service/DefaultOpenAIService.swift

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -634,7 +634,17 @@ struct DefaultOpenAIService: OpenAIService {
634634
let request = try OpenAIAPI.batch(.list).request(apiKey: apiKey, openAIEnvironment: openAIEnvironment, organizationID: organizationID, method: .get, queryItems: queryItems, extraHeaders: extraHeaders)
635635
return try await fetch(debugEnabled: debugEnabled, type: OpenAIResponse<BatchObject>.self, with: request)
636636
}
637-
637+
638+
// MARK: Session
639+
640+
func createSession(
641+
parameters: CreateSessionParameters)
642+
async throws -> SessionObject
643+
{
644+
let request = try OpenAIAPI.session(.create).request(apiKey: apiKey, openAIEnvironment: openAIEnvironment, organizationID: organizationID, method: .post, params: parameters, betaHeaderField: Self.assistantsBetaV2, extraHeaders: extraHeaders)
645+
return try await fetch(debugEnabled: debugEnabled, type: SessionObject.self, with: request)
646+
}
647+
638648
// MARK: Vector Store
639649

640650
func createVectorStore(

Sources/OpenAI/Public/Service/OpenAIService.swift

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -757,7 +757,20 @@ public protocol OpenAIService {
757757
after: String?,
758758
limit: Int?)
759759
async throws -> OpenAIResponse<BatchObject>
760-
760+
761+
// MARK: Session
762+
763+
/// Create a session.
764+
///
765+
/// - Parameter parameters: The parameters needed to create a session.
766+
/// - Returns: A [Session](https://platform.openai.com/docs/api-reference/realtime-sessions) object.
767+
/// - Throws: An error if the request fails
768+
///
769+
/// For more information, refer to [OpenAI's Session API documentation](https://platform.openai.com/docs/api-reference/realtime-sessions/create).
770+
func createSession(
771+
parameters: CreateSessionParameters)
772+
async throws -> SessionObject
773+
761774
// MARK: Vector Store
762775

763776
/// Create a vector store.

0 commit comments

Comments
 (0)