Add POST realtime/session request

glyuck · glyuck · commit b488eae260f3 · 2025-04-13T22:37:35.000+04:00
diff --git a/Sources/OpenAI/AIProxy/AIProxyService.swift b/Sources/OpenAI/AIProxy/AIProxyService.swift
@@ -648,6 +648,16 @@ struct AIProxyService: OpenAIService {
       return try await fetch(debugEnabled: debugEnabled, type: OpenAIResponse<BatchObject>.self, with: request)
    }
 
+   // MARK: Session
+
+   func createSession(
+      parameters: CreateSessionParameters)
+      async throws -> SessionObject
+   {
+      let request = try await OpenAIAPI.session(.create).request(aiproxyPartialKey: partialKey, clientID: clientID, organizationID: organizationID, openAIEnvironment: openAIEnvironment, method: .post, params: parameters, betaHeaderField: Self.assistantsBetaV2)
+      return try await fetch(debugEnabled: debugEnabled, type: SessionObject.self, with: request)
+   }
+
    // MARK: Vector Store
 
    func createVectorStore(
diff --git a/Sources/OpenAI/Azure/DefaultOpenAIAzureService.swift b/Sources/OpenAI/Azure/DefaultOpenAIAzureService.swift
@@ -593,7 +593,13 @@ final public class DefaultOpenAIAzureService: OpenAIService {
    {
       fatalError("Currently, this API is not supported. We welcome and encourage contributions to our open-source project. Please consider opening an issue or submitting a pull request to add support for this feature.")
    }
-   
+
+   // MARK: Session
+
+   public func createSession(parameters: CreateSessionParameters) async throws -> SessionObject {
+      fatalError("Currently, this API is not supported. We welcome and encourage contributions to our open-source project. Please consider opening an issue or submitting a pull request to add support for this feature.")
+   }
+
    // MARK: Vector Store
 
    public func createVectorStore(
diff --git a/Sources/OpenAI/LocalModelService/LocalModelService.swift b/Sources/OpenAI/LocalModelService/LocalModelService.swift
@@ -262,7 +262,11 @@ struct LocalModelService: OpenAIService {
    func listBatch(after: String?, limit: Int?) async throws -> OpenAIResponse<BatchObject> {
       fatalError("Currently, this API is not supported. We welcome and encourage contributions to our open-source project. Please consider opening an issue or submitting a pull request to add support for this feature.")
    }
-   
+
+   func createSession(parameters: CreateSessionParameters) async throws -> SessionObject {
+      fatalError("Currently, this API is not supported. We welcome and encourage contributions to our open-source project. Please consider opening an issue or submitting a pull request to add support for this feature.")
+   }
+
    func createVectorStore(parameters: VectorStoreParameter) async throws -> VectorStoreObject {
       fatalError("Currently, this API is not supported. We welcome and encourage contributions to our open-source project. Please consider opening an issue or submitting a pull request to add support for this feature.")
    }
diff --git a/Sources/OpenAI/Private/Networking/OpenAIAPI.swift b/Sources/OpenAI/Private/Networking/OpenAIAPI.swift
@@ -25,6 +25,7 @@ enum OpenAIAPI {
    case runStep(RunStepCategory) // https://platform.openai.com/docs/api-reference/runs/step-object
    case thread(ThreadCategory) // https://platform.openai.com/docs/api-reference/threads
    case batch(BatchCategory) // https://platform.openai.com/docs/api-reference/batch
+   case session(SessionCategory) // https://platform.openai.com/docs/api-reference/sessions
    case vectorStore(VectorStoreCategory) // https://platform.openai.com/docs/api-reference/vector-stores
    case vectorStoreFile(VectorStoreFileCategory) // https://platform.openai.com/docs/api-reference/vector-stores-files
    case vectorStoreFileBatch(VectorStoreFileBatch) // https://platform.openai.com/docs/api-reference/vector-stores-file-batches
@@ -110,7 +111,11 @@ enum OpenAIAPI {
       case cancel(batchID: String)
       case list
    }
-   
+
+   enum SessionCategory {
+      case create
+   }
+
    enum VectorStoreCategory {
       case create
       case list
@@ -228,6 +233,10 @@ extension OpenAIAPI: Endpoint {
          case .create: return "\(version)/threads"
          case .retrieve(let threadID), .modify(let threadID), .delete(let threadID): return "\(version)/threads/\(threadID)"
          }
+      case .session(let category):
+         switch category {
+         case .create: return "\(version)/realtime/sessions"
+         }
       case .vectorStore(let category):
          switch category {
          case .create, .list: return "\(version)/vector_stores"
diff --git a/Sources/OpenAI/Public/Parameters/Sessions/CreateSessionParameters.swift b/Sources/OpenAI/Public/Parameters/Sessions/CreateSessionParameters.swift
@@ -0,0 +1,89 @@
+import Foundation
+
+public struct CreateSessionParameters: Codable {
+    /// The format of input audio. Options are pcm16, g711_ulaw, or g711_alaw.
+    /// For pcm16, input audio must be 16-bit PCM at a 24kHz sample rate, single channel (mono), and little-endian byte order.
+    public let inputAudioFormat: SessionObject.AudioFormat?
+
+    /// Configuration for input audio noise reduction.
+    public let inputAudioNoiseReduction: SessionObject.InputAudioNoiseReduction?
+
+    /// Configuration for input audio transcription.
+    public let inputAudioTranscription: SessionObject.InputAudioTranscription?
+
+    /// The default system instructions (i.e. system message) prepended to model calls.
+    public let instructions: String?
+
+    /// Maximum number of output tokens for a single assistant response.
+    public let maxResponseOutputTokens: Int?
+
+    /// The set of modalities the model can respond with.
+    public let modalities: [SessionObject.Modality]?
+
+    /// The Realtime model used for this session.
+    public let model: String?
+
+    /// The format of output audio.
+    public let outputAudioFormat: SessionObject.AudioFormat?
+
+    /// Sampling temperature for the model, limited to [0.6, 1.2].
+    public let temperature: Double?
+
+    /// How the model chooses tools.
+    public let toolChoice: ToolChoice?
+
+    /// Tools (functions) available to the model.
+    public let tools: [Tool]?
+
+    /// Configuration for turn detection.
+    public let turnDetection: SessionObject.TurnDetection?
+
+    /// The voice the model uses to respond.
+    public let voice: String?
+
+    enum CodingKeys: String, CodingKey {
+        case inputAudioFormat = "input_audio_format"
+        case inputAudioNoiseReduction = "input_audio_noise_reduction"
+        case inputAudioTranscription = "input_audio_transcription"
+        case instructions
+        case maxResponseOutputTokens = "max_response_output_tokens"
+        case modalities
+        case model
+        case outputAudioFormat = "output_audio_format"
+        case temperature
+        case toolChoice = "tool_choice"
+        case tools
+        case turnDetection = "turn_detection"
+        case voice
+    }
+
+    public init(
+        inputAudioFormat: SessionObject.AudioFormat? = nil,
+        inputAudioNoiseReduction: SessionObject.InputAudioNoiseReduction? = nil,
+        inputAudioTranscription: SessionObject.InputAudioTranscription? = nil,
+        instructions: String? = nil,
+        maxResponseOutputTokens: Int? = nil,
+        modalities: [SessionObject.Modality]? = nil,
+        model: String? = nil,
+        outputAudioFormat: SessionObject.AudioFormat? = nil,
+        temperature: Double? = nil,
+        toolChoice: ToolChoice? = nil,
+        tools: [Tool]? = nil,
+        turnDetection: SessionObject.TurnDetection? = nil,
+        voice: String? = nil
+    ) {
+        self.inputAudioFormat = inputAudioFormat
+        self.inputAudioNoiseReduction = inputAudioNoiseReduction
+        self.inputAudioTranscription = inputAudioTranscription
+        self.instructions = instructions
+        self.maxResponseOutputTokens = maxResponseOutputTokens
+        self.modalities = modalities
+        self.model = model
+        self.outputAudioFormat = outputAudioFormat
+        self.temperature = temperature
+        self.toolChoice = toolChoice
+        self.tools = tools
+        self.turnDetection = turnDetection
+        self.voice = voice
+    }
+}
diff --git a/Sources/OpenAI/Public/ResponseModels/Sessions/SessionObject.swift b/Sources/OpenAI/Public/ResponseModels/Sessions/SessionObject.swift
@@ -0,0 +1,187 @@
+import Foundation
+
+public struct SessionObject: Codable {
+    /// The unique identifier for the session.
+    public let id: String
+
+    /// The object type, which is always "realtime.session".
+    public let object: String
+
+    /// The model used for this session.
+    public let model: String
+
+    /// The set of modalities the model can respond with.
+    public let modalities: [Modality]
+
+    /// The default system instructions (i.e. system message) prepended to model calls.
+    public let instructions: String?
+
+    /// The voice the model uses to respond.
+    public let voice: String?
+
+    /// The format of input audio.
+    public let inputAudioFormat: AudioFormat?
+
+    /// The format of output audio.
+    public let outputAudioFormat: AudioFormat?
+
+    /// Configuration for input audio transcription.
+    public let inputAudioTranscription: InputAudioTranscription?
+
+    /// Configuration for turn detection.
+    public let turnDetection: TurnDetection?
+
+    /// Tools (functions) available to the model.
+    public let tools: [Tool]
+
+    /// How the model chooses tools.
+    public let toolChoice: ToolChoice
+
+    /// Sampling temperature for the model.
+    public let temperature: Double?
+
+    /// Maximum number of output tokens for a single assistant response.
+    public let maxResponseOutputTokens: MaxOutputTokens?
+
+    /// The client secret containing the ephemeral API token.
+    public let clientSecret: ClientSecret
+
+    public enum AudioFormat: String, Codable {
+        case pcm16 = "pcm16"
+        case g711Ulaw = "g711_ulaw"
+        case g711Alaw = "g711_alaw"
+    }
+
+    public struct InputAudioNoiseReduction: Codable {
+        public let type: String?
+
+        public init(type: String? = nil) {
+            self.type = type
+        }
+    }
+
+    public struct InputAudioTranscription: Codable {
+        public let model: String?
+        public let language: String?
+        public let prompt: String?
+
+        public init(model: String? = nil, language: String? = nil, prompt: String? = nil) {
+            self.model = model
+            self.language = language
+            self.prompt = prompt
+        }
+    }
+
+    public enum Modality: String, Codable {
+        case text = "text"
+        case audio = "audio"
+    }
+
+    public struct TurnDetection: Codable {
+        public let createResponse: Bool?
+        public let eagerness: TurnDetectionEagerness?
+        public let interruptResponse: Bool?
+        public let prefixPaddingMs: Int?
+        public let silenceDurationMs: Int?
+        public let threshold: Double?
+        public let type: TurnDetectionType
+
+		public enum TurnDetectionType: String, Codable, Sendable {
+			case serverVad = "server_vad"
+			case semanticVad = "semantic_vad"
+			case none
+		}
+
+		public enum TurnDetectionEagerness: String, Codable, Sendable {
+			case low
+			case high
+			case auto
+			case medium
+		}
+
+        enum CodingKeys: String, CodingKey {
+            case createResponse = "create_response"
+            case eagerness
+            case interruptResponse = "interrupt_response"
+            case prefixPaddingMs = "prefix_padding_ms"
+            case silenceDurationMs = "silence_duration_ms"
+            case threshold
+            case type
+        }
+
+        public init(
+            type: TurnDetectionType,
+            createResponse: Bool? = nil,
+            eagerness: TurnDetectionEagerness? = nil,
+            interruptResponse: Bool? = nil,
+            prefixPaddingMs: Int? = nil,
+            silenceDurationMs: Int? = nil,
+            threshold: Double? = nil
+        ) {
+            self.type = type
+            self.createResponse = createResponse
+            self.eagerness = eagerness
+            self.interruptResponse = interruptResponse
+            self.prefixPaddingMs = prefixPaddingMs
+            self.silenceDurationMs = silenceDurationMs
+            self.threshold = threshold
+        }
+    }
+
+    public struct ClientSecret: Codable {
+        /// The ephemeral API token value.
+        public let value: String
+
+        /// The Unix timestamp (in seconds) when the token expires.
+        public let expiresAt: Int
+
+        private enum CodingKeys: String, CodingKey {
+            case value
+            case expiresAt = "expires_at"
+        }
+    }
+
+    public enum MaxOutputTokens: Codable {
+        case inf
+        case int(Int)
+
+        public init(from decoder: Decoder) throws {
+            let container = try decoder.singleValueContainer()
+            if let intValue = try? container.decode(Int.self) {
+                self = .int(intValue)
+            } else if let stringValue = try? container.decode(String.self), stringValue == "inf" {
+                self = .inf
+            } else {
+                throw DecodingError.typeMismatch(MaxOutputTokens.self, DecodingError.Context(codingPath: decoder.codingPath, debugDescription: "Wrong type for MaxOutputTokens"))
+            }
+        }
+
+        public func encode(to encoder: Encoder) throws {
+            var container = encoder.singleValueContainer()
+            switch self {
+            case .inf:
+                try container.encode("inf")
+            case .int(let value):
+                try container.encode(value)
+            }
+        }
+    }
+
+    private enum CodingKeys: String, CodingKey {
+        case id
+        case object
+        case model
+        case modalities
+        case instructions
+        case voice
+        case inputAudioFormat = "input_audio_format"
+        case outputAudioFormat = "output_audio_format"
+        case inputAudioTranscription = "input_audio_transcription"
+        case turnDetection = "turn_detection"
+        case tools
+        case toolChoice = "tool_choice"
+        case temperature
+        case maxResponseOutputTokens = "max_response_output_tokens"
+        case clientSecret = "client_secret"
+    }
+}
diff --git a/Sources/OpenAI/Public/Service/DefaultOpenAIService.swift b/Sources/OpenAI/Public/Service/DefaultOpenAIService.swift
@@ -634,7 +634,17 @@ struct DefaultOpenAIService: OpenAIService {
       let request = try OpenAIAPI.batch(.list).request(apiKey: apiKey, openAIEnvironment: openAIEnvironment, organizationID: organizationID, method: .get, queryItems: queryItems, extraHeaders: extraHeaders)
       return try await fetch(debugEnabled: debugEnabled, type: OpenAIResponse<BatchObject>.self, with: request)
    }
-   
+
+   // MARK: Session
+
+   func createSession(
+      parameters: CreateSessionParameters)
+      async throws -> SessionObject
+   {
+      let request = try OpenAIAPI.session(.create).request(apiKey: apiKey, openAIEnvironment: openAIEnvironment, organizationID: organizationID, method: .post, params: parameters, betaHeaderField: Self.assistantsBetaV2, extraHeaders: extraHeaders)
+      return try await fetch(debugEnabled: debugEnabled, type: SessionObject.self, with: request)
+   }
+
    // MARK: Vector Store
 
    func createVectorStore(
diff --git a/Sources/OpenAI/Public/Service/OpenAIService.swift b/Sources/OpenAI/Public/Service/OpenAIService.swift
@@ -757,7 +757,20 @@ public protocol OpenAIService {
       after: String?,
       limit: Int?)
       async throws -> OpenAIResponse<BatchObject>
-   
+
+   // MARK: Session
+
+   /// Create a session.
+   ///
+   /// - Parameter parameters: The parameters needed to create a session.
+   /// - Returns: A [Session](https://platform.openai.com/docs/api-reference/realtime-sessions) object.
+   /// - Throws: An error if the request fails
+   ///
+   /// For more information, refer to [OpenAI's Session API documentation](https://platform.openai.com/docs/api-reference/realtime-sessions/create).
+   func createSession(
+      parameters: CreateSessionParameters)
+      async throws -> SessionObject
+
    // MARK: Vector Store
    
    /// Create a vector store.

Original file line number	Diff line number	Diff line change
`@@ -593,7 +593,13 @@ final public class DefaultOpenAIAzureService: OpenAIService {`
`593`	`593`	`{`
`594`	`594`	`fatalError("Currently, this API is not supported. We welcome and encourage contributions to our open-source project. Please consider opening an issue or submitting a pull request to add support for this feature.")`
`595`	`595`	`}`
`596`		`-`
	`596`	`+`
	`597`	`+ // MARK: Session`
	`598`	`+`
	`599`	`+ public func createSession(parameters: CreateSessionParameters) async throws -> SessionObject {`
	`600`	`+ fatalError("Currently, this API is not supported. We welcome and encourage contributions to our open-source project. Please consider opening an issue or submitting a pull request to add support for this feature.")`
	`601`	`+ }`
	`602`	`+`
`597`	`603`	`// MARK: Vector Store`
`598`	`604`
`599`	`605`	`public func createVectorStore(`
Original file line number	Diff line number	Diff line change
`@@ -262,7 +262,11 @@ struct LocalModelService: OpenAIService {`
`262`	`262`	`func listBatch(after: String?, limit: Int?) async throws -> OpenAIResponse<BatchObject> {`
`263`	`263`	`fatalError("Currently, this API is not supported. We welcome and encourage contributions to our open-source project. Please consider opening an issue or submitting a pull request to add support for this feature.")`
`264`	`264`	`}`
`265`		`-`
	`265`	`+`
	`266`	`+ func createSession(parameters: CreateSessionParameters) async throws -> SessionObject {`
	`267`	`+ fatalError("Currently, this API is not supported. We welcome and encourage contributions to our open-source project. Please consider opening an issue or submitting a pull request to add support for this feature.")`
	`268`	`+ }`
	`269`	`+`
`266`	`270`	`func createVectorStore(parameters: VectorStoreParameter) async throws -> VectorStoreObject {`
`267`	`271`	`fatalError("Currently, this API is not supported. We welcome and encourage contributions to our open-source project. Please consider opening an issue or submitting a pull request to add support for this feature.")`
`268`	`272`	`}`