openai
diff --git a/‎.stats.yml‎
Lines changed: 3 additions & 3 deletions b/‎.stats.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎api.md‎
Lines changed: 2 additions & 0 deletions b/‎api.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/openai/resources/audio/transcriptions.py‎
Lines changed: 93 additions & 1 deletion b/‎src/openai/resources/audio/transcriptions.py‎
Lines changed: 93 additions & 1 deletion
diff --git a/‎src/openai/resources/embeddings.py‎
Lines changed: 8 additions & 6 deletions b/‎src/openai/resources/embeddings.py‎
Lines changed: 8 additions & 6 deletions
diff --git a/‎src/openai/types/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎src/openai/types/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/openai/types/audio/transcription_create_params.py‎
Lines changed: 37 additions & 1 deletion b/‎src/openai/types/audio/transcription_create_params.py‎
Lines changed: 37 additions & 1 deletion
diff --git a/‎src/openai/types/embedding_create_params.py‎
Lines changed: 5 additions & 4 deletions b/‎src/openai/types/embedding_create_params.py‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎src/openai/types/eval_create_params.py‎
Lines changed: 12 additions & 3 deletions b/‎src/openai/types/eval_create_params.py‎
Lines changed: 12 additions & 3 deletions
diff --git a/‎src/openai/types/eval_create_response.py‎
Lines changed: 3 additions & 1 deletion b/‎src/openai/types/eval_create_response.py‎
Lines changed: 3 additions & 1 deletion
@@ -1,4 +1,4 @@
 configured_endpoints: 101
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-794a6ed3c3d3d77887564755168056af8a426b17cf1ec721e3a300503dc22a41.yml
-openapi_spec_hash: 25a81c220713cd5b0bafc221d1dfa79a
-config_hash: 0b768ed1b56c6d82816f0fa40dc4aaf5
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-161ca7f1cfd7b33c1fc07d0ce25dfe4be5a7271c394f4cb526b7fb21b0729900.yml
+openapi_spec_hash: 602e14add4bee018c6774e320ce309b8
+config_hash: 7da27f7260075e8813ddcea542fba1bf
@@ -787,6 +787,7 @@ Types:
 ```python
 from openai.types import (
     EvalCustomDataSourceConfig,
+    EvalLogsDataSourceConfig,
     EvalStoredCompletionsDataSourceConfig,
     EvalCreateResponse,
     EvalRetrieveResponse,
@@ -812,6 +813,7 @@ Types:
 from openai.types.evals import (
     CreateEvalCompletionsRunDataSource,
     CreateEvalJSONLRunDataSource,
+    CreateEvalResponsesRunDataSource,
     EvalAPIError,
     RunCreateResponse,
     RunRetrieveResponse,
 
@@ -57,6 +57,7 @@ def create(
         *,
         file: FileTypes,
         model: Union[str, AudioModel],
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
         include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
         response_format: Union[Literal["json"], NotGiven] = NOT_GIVEN,
         language: str | NotGiven = NOT_GIVEN,
@@ -118,6 +119,7 @@ def create(
         file: FileTypes,
         model: Union[str, AudioModel],
         stream: Literal[True],
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
         include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
         language: str | NotGiven = NOT_GIVEN,
         prompt: str | NotGiven = NOT_GIVEN,
@@ -152,6 +154,11 @@ def create(
 
               Note: Streaming is not supported for the `whisper-1` model and will be ignored.
 
+          chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
+              first normalizes loudness and then uses voice activity detection (VAD) to choose
+              boundaries. `server_vad` object can be provided to tweak VAD detection
+              parameters manually. If unset, the audio is transcribed as a single block.
+
           include: Additional information to include in the transcription response. `logprobs` will
               return the log probabilities of the tokens in the response to understand the
               model's confidence in the transcription. `logprobs` only works with
@@ -200,6 +207,7 @@ def create(
         file: FileTypes,
         model: Union[str, AudioModel],
         stream: bool,
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
         include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
         language: str | NotGiven = NOT_GIVEN,
         prompt: str | NotGiven = NOT_GIVEN,
@@ -234,6 +242,11 @@ def create(
 
               Note: Streaming is not supported for the `whisper-1` model and will be ignored.
 
+          chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
+              first normalizes loudness and then uses voice activity detection (VAD) to choose
+              boundaries. `server_vad` object can be provided to tweak VAD detection
+              parameters manually. If unset, the audio is transcribed as a single block.
+
           include: Additional information to include in the transcription response. `logprobs` will
               return the log probabilities of the tokens in the response to understand the
               model's confidence in the transcription. `logprobs` only works with
@@ -281,6 +294,7 @@ def create(
         *,
         file: FileTypes,
         model: Union[str, AudioModel],
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
         include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
         language: str | NotGiven = NOT_GIVEN,
         prompt: str | NotGiven = NOT_GIVEN,
@@ -299,6 +313,7 @@ def create(
             {
                 "file": file,
                 "model": model,
+                "chunking_strategy": chunking_strategy,
                 "include": include,
                 "language": language,
                 "prompt": prompt,
@@ -357,6 +372,8 @@ async def create(
         *,
         file: FileTypes,
         model: Union[str, AudioModel],
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
+        include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
         response_format: Union[Literal["json"], NotGiven] = NOT_GIVEN,
         language: str | NotGiven = NOT_GIVEN,
         prompt: str | NotGiven = NOT_GIVEN,
@@ -369,7 +386,68 @@ async def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Transcription: ...
+    ) -> TranscriptionCreateResponse:
+        """
+        Transcribes audio into the input language.
+
+        Args:
+          file:
+              The audio file object (not file name) to transcribe, in one of these formats:
+              flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
+
+          model: ID of the model to use. The options are `gpt-4o-transcribe`,
+              `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
+              Whisper V2 model).
+
+          chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
+              first normalizes loudness and then uses voice activity detection (VAD) to choose
+              boundaries. `server_vad` object can be provided to tweak VAD detection
+              parameters manually. If unset, the audio is transcribed as a single block.
+
+          include: Additional information to include in the transcription response. `logprobs` will
+              return the log probabilities of the tokens in the response to understand the
+              model's confidence in the transcription. `logprobs` only works with
+              response_format set to `json` and only with the models `gpt-4o-transcribe` and
+              `gpt-4o-mini-transcribe`.
+
+          language: The language of the input audio. Supplying the input language in
+              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+              format will improve accuracy and latency.
+
+          prompt: An optional text to guide the model's style or continue a previous audio
+              segment. The
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+              should match the audio language.
+
+          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
+              `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
+              the only supported format is `json`.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions)
+              for more information.
+
+              Note: Streaming is not supported for the `whisper-1` model and will be ignored.
+
+          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+              output more random, while lower values like 0.2 will make it more focused and
+              deterministic. If set to 0, the model will use
+              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+              automatically increase the temperature until certain thresholds are hit.
+
+          timestamp_granularities: The timestamp granularities to populate for this transcription.
+              `response_format` must be set `verbose_json` to use timestamp granularities.
+              Either or both of these options are supported: `word`, or `segment`. Note: There
+              is no additional latency for segment timestamps, but generating word timestamps
+              incurs additional latency.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+          """
 
     @overload
     async def create(
@@ -418,6 +496,7 @@ async def create(
         file: FileTypes,
         model: Union[str, AudioModel],
         stream: Literal[True],
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
         include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
         language: str | NotGiven = NOT_GIVEN,
         prompt: str | NotGiven = NOT_GIVEN,
@@ -452,6 +531,11 @@ async def create(
 
               Note: Streaming is not supported for the `whisper-1` model and will be ignored.
 
+          chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
+              first normalizes loudness and then uses voice activity detection (VAD) to choose
+              boundaries. `server_vad` object can be provided to tweak VAD detection
+              parameters manually. If unset, the audio is transcribed as a single block.
+
           include: Additional information to include in the transcription response. `logprobs` will
               return the log probabilities of the tokens in the response to understand the
               model's confidence in the transcription. `logprobs` only works with
@@ -500,6 +584,7 @@ async def create(
         file: FileTypes,
         model: Union[str, AudioModel],
         stream: bool,
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
         include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
         language: str | NotGiven = NOT_GIVEN,
         prompt: str | NotGiven = NOT_GIVEN,
@@ -534,6 +619,11 @@ async def create(
 
               Note: Streaming is not supported for the `whisper-1` model and will be ignored.
 
+          chunking_strategy: Controls how the audio is cut into chunks. When set to `"auto"`, the server
+              first normalizes loudness and then uses voice activity detection (VAD) to choose
+              boundaries. `server_vad` object can be provided to tweak VAD detection
+              parameters manually. If unset, the audio is transcribed as a single block.
+
           include: Additional information to include in the transcription response. `logprobs` will
               return the log probabilities of the tokens in the response to understand the
               model's confidence in the transcription. `logprobs` only works with
@@ -581,6 +671,7 @@ async def create(
         *,
         file: FileTypes,
         model: Union[str, AudioModel],
+        chunking_strategy: Optional[transcription_create_params.ChunkingStrategy] | NotGiven = NOT_GIVEN,
         include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
         language: str | NotGiven = NOT_GIVEN,
         prompt: str | NotGiven = NOT_GIVEN,
@@ -599,6 +690,7 @@ async def create(
             {
                 "file": file,
                 "model": model,
+                "chunking_strategy": chunking_strategy,
                 "include": include,
                 "language": language,
                 "prompt": prompt,
 
@@ -66,11 +66,12 @@ def create(
           input: Input text to embed, encoded as a string or array of tokens. To embed multiple
               inputs in a single request, pass an array of strings or array of token arrays.
               The input must not exceed the max input tokens for the model (8192 tokens for
-              `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048
+              all embedding models), cannot be an empty string, and any array must be 2048
               dimensions or less.
               [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens. Some models may also impose a limit on total number of
-              tokens summed across inputs.
+              for counting tokens. In addition to the per-input token limit, all embedding
+              models enforce a maximum of 300,000 tokens summed across all inputs in a single
+              request.
 
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
@@ -181,11 +182,12 @@ async def create(
           input: Input text to embed, encoded as a string or array of tokens. To embed multiple
               inputs in a single request, pass an array of strings or array of token arrays.
               The input must not exceed the max input tokens for the model (8192 tokens for
-              `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048
+              all embedding models), cannot be an empty string, and any array must be 2048
               dimensions or less.
               [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens. Some models may also impose a limit on total number of
-              tokens summed across inputs.
+              for counting tokens. In addition to the per-input token limit, all embedding
+              models enforce a maximum of 300,000 tokens summed across all inputs in a single
+              request.
 
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
 
@@ -70,6 +70,7 @@
 from .vector_store_search_params import VectorStoreSearchParams as VectorStoreSearchParams
 from .vector_store_update_params import VectorStoreUpdateParams as VectorStoreUpdateParams
 from .moderation_text_input_param import ModerationTextInputParam as ModerationTextInputParam
+from .eval_logs_data_source_config import EvalLogsDataSourceConfig as EvalLogsDataSourceConfig
 from .file_chunking_strategy_param import FileChunkingStrategyParam as FileChunkingStrategyParam
 from .vector_store_search_response import VectorStoreSearchResponse as VectorStoreSearchResponse
 from .websocket_connection_options import WebsocketConnectionOptions as WebsocketConnectionOptions
 
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 from typing import List, Union, Optional
-from typing_extensions import Literal, Required, TypedDict
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
 from ..._types import FileTypes
 from ..audio_model import AudioModel
@@ -12,6 +12,8 @@
 
 __all__ = [
     "TranscriptionCreateParamsBase",
+    "ChunkingStrategy",
+    "ChunkingStrategyVadConfig",
     "TranscriptionCreateParamsNonStreaming",
     "TranscriptionCreateParamsStreaming",
 ]
@@ -31,6 +33,15 @@ class TranscriptionCreateParamsBase(TypedDict, total=False):
     (which is powered by our open source Whisper V2 model).
     """
 
+    chunking_strategy: Optional[ChunkingStrategy]
+    """Controls how the audio is cut into chunks.
+
+    When set to `"auto"`, the server first normalizes loudness and then uses voice
+    activity detection (VAD) to choose boundaries. `server_vad` object can be
+    provided to tweak VAD detection parameters manually. If unset, the audio is
+    transcribed as a single block.
+    """
+
     include: List[TranscriptionInclude]
     """Additional information to include in the transcription response.
 
@@ -82,6 +93,31 @@ class TranscriptionCreateParamsBase(TypedDict, total=False):
     """
 
 
+class ChunkingStrategyVadConfig(TypedDict, total=False):
+    type: Required[Literal["server_vad"]]
+    """Must be set to `server_vad` to enable manual chunking using server side VAD."""
+
+    prefix_padding_ms: int
+    """Amount of audio to include before the VAD detected speech (in milliseconds)."""
+
+    silence_duration_ms: int
+    """
+    Duration of silence to detect speech stop (in milliseconds). With shorter values
+    the model will respond more quickly, but may jump in on short pauses from the
+    user.
+    """
+
+    threshold: float
+    """Sensitivity threshold (0.0 to 1.0) for voice activity detection.
+
+    A higher threshold will require louder audio to activate the model, and thus
+    might perform better in noisy environments.
+    """
+
+
+ChunkingStrategy: TypeAlias = Union[Literal["auto"], ChunkingStrategyVadConfig]
+
+
 class TranscriptionCreateParamsNonStreaming(TranscriptionCreateParamsBase, total=False):
     stream: Optional[Literal[False]]
     """
 
@@ -16,11 +16,12 @@ class EmbeddingCreateParams(TypedDict, total=False):
 
     To embed multiple inputs in a single request, pass an array of strings or array
     of token arrays. The input must not exceed the max input tokens for the model
-    (8192 tokens for `text-embedding-ada-002`), cannot be an empty string, and any
-    array must be 2048 dimensions or less.
+    (8192 tokens for all embedding models), cannot be an empty string, and any array
+    must be 2048 dimensions or less.
     [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-    for counting tokens. Some models may also impose a limit on total number of
-    tokens summed across inputs.
+    for counting tokens. In addition to the per-input token limit, all embedding
+    models enforce a maximum of 300,000 tokens summed across all inputs in a single
+    request.
     """
 
     model: Required[Union[str, EmbeddingModel]]
 
@@ -16,6 +16,7 @@
     "EvalCreateParams",
     "DataSourceConfig",
     "DataSourceConfigCustom",
+    "DataSourceConfigLogs",
     "DataSourceConfigStoredCompletions",
     "TestingCriterion",
     "TestingCriterionLabelModel",
@@ -65,15 +66,23 @@ class DataSourceConfigCustom(TypedDict, total=False):
     """
 
 
+class DataSourceConfigLogs(TypedDict, total=False):
+    type: Required[Literal["logs"]]
+    """The type of data source. Always `logs`."""
+
+    metadata: Dict[str, object]
+    """Metadata filters for the logs data source."""
+
+
 class DataSourceConfigStoredCompletions(TypedDict, total=False):
-    type: Required[Literal["stored_completions"]]
-    """The type of data source. Always `stored_completions`."""
+    type: Required[Literal["stored-completions"]]
+    """The type of data source. Always `stored-completions`."""
 
     metadata: Dict[str, object]
     """Metadata filters for the stored completions data source."""
 
 
-DataSourceConfig: TypeAlias = Union[DataSourceConfigCustom, DataSourceConfigStoredCompletions]
+DataSourceConfig: TypeAlias = Union[DataSourceConfigCustom, DataSourceConfigLogs, DataSourceConfigStoredCompletions]
 
 
 class TestingCriterionLabelModelInputSimpleInputMessage(TypedDict, total=False):
 
@@ -10,6 +10,7 @@
 from .graders.label_model_grader import LabelModelGrader
 from .graders.score_model_grader import ScoreModelGrader
 from .graders.string_check_grader import StringCheckGrader
+from .eval_logs_data_source_config import EvalLogsDataSourceConfig
 from .eval_custom_data_source_config import EvalCustomDataSourceConfig
 from .graders.text_similarity_grader import TextSimilarityGrader
 from .eval_stored_completions_data_source_config import EvalStoredCompletionsDataSourceConfig
@@ -24,7 +25,8 @@
 ]
 
 DataSourceConfig: TypeAlias = Annotated[
-    Union[EvalCustomDataSourceConfig, EvalStoredCompletionsDataSourceConfig], PropertyInfo(discriminator="type")
+    Union[EvalCustomDataSourceConfig, EvalLogsDataSourceConfig, EvalStoredCompletionsDataSourceConfig],
+    PropertyInfo(discriminator="type"),
 ]
Original file line number	Diff line number	Diff line change
`@@ -10,6 +10,7 @@`
`10`	`10`	`from .graders.label_model_grader import LabelModelGrader`
`11`	`11`	`from .graders.score_model_grader import ScoreModelGrader`
`12`	`12`	`from .graders.string_check_grader import StringCheckGrader`
	`13`	`+from .eval_logs_data_source_config import EvalLogsDataSourceConfig`
`13`	`14`	`from .eval_custom_data_source_config import EvalCustomDataSourceConfig`
`14`	`15`	`from .graders.text_similarity_grader import TextSimilarityGrader`
`15`	`16`	`from .eval_stored_completions_data_source_config import EvalStoredCompletionsDataSourceConfig`
`@@ -24,7 +25,8 @@`
`24`	`25`	`]`
`25`	`26`
`26`	`27`	`DataSourceConfig: TypeAlias = Annotated[`
`27`		`- Union[EvalCustomDataSourceConfig, EvalStoredCompletionsDataSourceConfig], PropertyInfo(discriminator="type")`
	`28`	`+ Union[EvalCustomDataSourceConfig, EvalLogsDataSourceConfig, EvalStoredCompletionsDataSourceConfig],`
	`29`	`+ PropertyInfo(discriminator="type"),`
`28`	`30`	`]`
`29`	`31`
`30`	`32`