11// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
22
33import { APIResource } from '../../core/resource' ;
4+ import * as ClientSecretsAPI from './client-secrets' ;
45import * as RealtimeAPI from './realtime' ;
56import * as ResponsesAPI from '../responses/responses' ;
67import { APIPromise } from '../../core/api-promise' ;
@@ -39,14 +40,19 @@ export interface RealtimeSessionClientSecret {
3940 */
4041export interface RealtimeSessionCreateResponse {
4142 /**
42- * Configuration for input and output audio .
43+ * Ephemeral key returned by the API .
4344 */
44- audio ?: RealtimeSessionCreateResponse . Audio ;
45+ client_secret : RealtimeSessionClientSecret ;
4546
4647 /**
47- * Ephemeral key returned by the API.
48+ * The type of session to create. Always `realtime` for the Realtime API.
49+ */
50+ type : 'realtime' ;
51+
52+ /**
53+ * Configuration for input and output audio.
4854 */
49- client_secret ?: RealtimeSessionClientSecret ;
55+ audio ?: RealtimeSessionCreateResponse . Audio ;
5056
5157 /**
5258 * Additional fields to include in server outputs.
@@ -115,7 +121,7 @@ export interface RealtimeSessionCreateResponse {
115121 /**
116122 * Tools available to the model.
117123 */
118- tools ?: Array < RealtimeAPI . Models | RealtimeSessionCreateResponse . McpTool > ;
124+ tools ?: Array < RealtimeAPI . RealtimeFunctionTool | RealtimeSessionCreateResponse . McpTool > ;
119125
120126 /**
121127 * Realtime API can write session traces to the
@@ -132,11 +138,6 @@ export interface RealtimeSessionCreateResponse {
132138 * The default is `auto`.
133139 */
134140 truncation ?: RealtimeAPI . RealtimeTruncation ;
135-
136- /**
137- * The type of session to create. Always `realtime` for the Realtime API.
138- */
139- type ?: 'realtime' ;
140141}
141142
142143export namespace RealtimeSessionCreateResponse {
@@ -238,7 +239,7 @@ export namespace RealtimeSessionCreateResponse {
238239
239240 /**
240241 * Optional idle timeout after which turn detection will auto-timeout when no
241- * additional audio is received.
242+ * additional audio is received and emits a `timeout_triggered` event .
242243 */
243244 idle_timeout_ms ?: number | null ;
244245
@@ -491,87 +492,90 @@ export namespace RealtimeSessionCreateResponse {
491492}
492493
493494/**
494- * Ephemeral key returned by the API. Only present when the session is created on
495- * the server via REST API.
495+ * A Realtime transcription session configuration object.
496496 */
497- export interface RealtimeTranscriptionSessionClientSecret {
498- /**
499- * Timestamp for when the token expires. Currently, all tokens expire after one
500- * minute.
501- */
502- expires_at : number ;
503-
497+ export interface RealtimeTranscriptionSessionCreateResponse {
504498 /**
505- * Ephemeral key usable in client environments to authenticate connections to the
506- * Realtime API. Use this in client-side environments rather than a standard API
507- * token, which should only be used server-side.
499+ * Unique identifier for the session that looks like `sess_1234567890abcdef`.
508500 */
509- value : string ;
510- }
501+ id : string ;
511502
512- /**
513- * A new Realtime transcription session configuration.
514- *
515- * When a session is created on the server via REST API, the session object also
516- * contains an ephemeral key. Default TTL for keys is 10 minutes. This property is
517- * not present when a session is updated via the WebSocket API.
518- */
519- export interface RealtimeTranscriptionSessionCreateResponse {
520503 /**
521- * Ephemeral key returned by the API. Only present when the session is created on
522- * the server via REST API.
504+ * The object type. Always `realtime.transcription_session`.
523505 */
524- client_secret : RealtimeTranscriptionSessionClientSecret ;
506+ object : string ;
525507
526508 /**
527- * The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw` .
509+ * The type of session. Always `transcription` for transcription sessions .
528510 */
529- input_audio_format ?: string ;
511+ type : 'transcription' ;
530512
531513 /**
532- * Configuration of the transcription model .
514+ * Configuration for input audio for the session .
533515 */
534- input_audio_transcription ?: RealtimeTranscriptionSessionInputAudioTranscription ;
516+ audio ?: RealtimeTranscriptionSessionCreateResponse . Audio ;
535517
536518 /**
537- * The set of modalities the model can respond with. To disable audio, set this to
538- * ["text"].
519+ * Expiration timestamp for the session, in seconds since epoch.
539520 */
540- modalities ?: Array < 'text' | 'audio' > ;
521+ expires_at ?: number ;
541522
542523 /**
543- * Configuration for turn detection. Can be set to `null` to turn off. Server VAD
544- * means that the model will detect the start and end of speech based on audio
545- * volume and respond at the end of user speech.
524+ * Additional fields to include in server outputs.
525+ *
526+ * - `item.input_audio_transcription.logprobs`: Include logprobs for input audio
527+ * transcription.
546528 */
547- turn_detection ?: RealtimeTranscriptionSessionTurnDetection ;
529+ include ?: Array < 'item.input_audio_transcription.logprobs' > ;
548530}
549531
550- /**
551- * Configuration of the transcription model.
552- */
553- export interface RealtimeTranscriptionSessionInputAudioTranscription {
532+ export namespace RealtimeTranscriptionSessionCreateResponse {
554533 /**
555- * The language of the input audio. Supplying the input language in
556- * [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
557- * format will improve accuracy and latency.
534+ * Configuration for input audio for the session.
558535 */
559- language ?: string ;
536+ export interface Audio {
537+ input ?: Audio . Input ;
538+ }
560539
561- /**
562- * The model to use for transcription. Current options are `whisper-1`,
563- * `gpt-4o-transcribe-latest`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`.
564- */
565- model ?: 'whisper-1' | 'gpt-4o-transcribe-latest' | 'gpt-4o-mini-transcribe' | 'gpt-4o-transcribe' ;
540+ export namespace Audio {
541+ export interface Input {
542+ /**
543+ * The PCM audio format. Only a 24kHz sample rate is supported.
544+ */
545+ format ?: RealtimeAPI . RealtimeAudioFormats ;
566546
567- /**
568- * An optional text to guide the model's style or continue a previous audio
569- * segment. For `whisper-1`, the
570- * [prompt is a list of keywords](https://platform.openai.com/docs/guides/speech-to-text#prompting).
571- * For `gpt-4o-transcribe` models, the prompt is a free text string, for example
572- * "expect words related to technology".
573- */
574- prompt ?: string ;
547+ /**
548+ * Configuration for input audio noise reduction.
549+ */
550+ noise_reduction ?: Input . NoiseReduction ;
551+
552+ /**
553+ * Configuration of the transcription model.
554+ */
555+ transcription ?: RealtimeAPI . AudioTranscription ;
556+
557+ /**
558+ * Configuration for turn detection. Can be set to `null` to turn off. Server VAD
559+ * means that the model will detect the start and end of speech based on audio
560+ * volume and respond at the end of user speech.
561+ */
562+ turn_detection ?: ClientSecretsAPI . RealtimeTranscriptionSessionTurnDetection ;
563+ }
564+
565+ export namespace Input {
566+ /**
567+ * Configuration for input audio noise reduction.
568+ */
569+ export interface NoiseReduction {
570+ /**
571+ * Type of noise reduction. `near_field` is for close-talking microphones such as
572+ * headphones, `far_field` is for far-field microphones such as laptop or
573+ * conference room microphones.
574+ */
575+ type ?: RealtimeAPI . NoiseReductionType ;
576+ }
577+ }
578+ }
575579}
576580
577581/**
@@ -670,9 +674,7 @@ export declare namespace ClientSecrets {
670674 export {
671675 type RealtimeSessionClientSecret as RealtimeSessionClientSecret ,
672676 type RealtimeSessionCreateResponse as RealtimeSessionCreateResponse ,
673- type RealtimeTranscriptionSessionClientSecret as RealtimeTranscriptionSessionClientSecret ,
674677 type RealtimeTranscriptionSessionCreateResponse as RealtimeTranscriptionSessionCreateResponse ,
675- type RealtimeTranscriptionSessionInputAudioTranscription as RealtimeTranscriptionSessionInputAudioTranscription ,
676678 type RealtimeTranscriptionSessionTurnDetection as RealtimeTranscriptionSessionTurnDetection ,
677679 type ClientSecretCreateResponse as ClientSecretCreateResponse ,
678680 type ClientSecretCreateParams as ClientSecretCreateParams ,
0 commit comments