openai
diff --git a/‎.stats.yml‎
Lines changed: 3 additions & 3 deletions b/‎.stats.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/openai/resources/images.py‎
Lines changed: 24 additions & 6 deletions b/‎src/openai/resources/images.py‎
Lines changed: 24 additions & 6 deletions
diff --git a/‎src/openai/resources/realtime/calls.py‎
Lines changed: 26 additions & 4 deletions b/‎src/openai/resources/realtime/calls.py‎
Lines changed: 26 additions & 4 deletions
diff --git a/‎src/openai/resources/vector_stores/file_batches.py‎
Lines changed: 28 additions & 12 deletions b/‎src/openai/resources/vector_stores/file_batches.py‎
Lines changed: 28 additions & 12 deletions
diff --git a/‎src/openai/types/realtime/call_accept_params.py‎
Lines changed: 13 additions & 2 deletions b/‎src/openai/types/realtime/call_accept_params.py‎
Lines changed: 13 additions & 2 deletions
diff --git a/‎src/openai/types/realtime/realtime_session_create_request.py‎
Lines changed: 13 additions & 2 deletions b/‎src/openai/types/realtime/realtime_session_create_request.py‎
Lines changed: 13 additions & 2 deletions
diff --git a/‎src/openai/types/realtime/realtime_session_create_request_param.py‎
Lines changed: 13 additions & 2 deletions b/‎src/openai/types/realtime/realtime_session_create_request_param.py‎
Lines changed: 13 additions & 2 deletions
@@ -1,4 +1,4 @@
 configured_endpoints: 136
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-f68f718cd45ac3f9336603601bccc38a718af44d0b26601031de3d0a71b7ce2f.yml
-openapi_spec_hash: 1560717860bba4105936647dde8f618d
-config_hash: 50ee3382a63c021a9f821a935950e926
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-3c5d1593d7c6f2b38a7d78d7906041465ee9d6e9022f0651e1da194654488108.yml
+openapi_spec_hash: 0a4d8ad2469823ce24a3fd94f23f1c2b
+config_hash: 032995825500a503a76da119f5354905
@@ -168,7 +168,10 @@ def edit(
               If `transparent`, the output format needs to support transparency, so it should
               be set to either `png` (default value) or `webp`.
 
-          input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
+          input_fidelity: Control how much effort the model will exert to match the style and features,
+              especially facial features, of input images. This parameter is only supported
+              for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+              `low`. Defaults to `low`.
 
           mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
               indicate where `image` should be edited. If there are multiple images provided,
@@ -282,7 +285,10 @@ def edit(
               If `transparent`, the output format needs to support transparency, so it should
               be set to either `png` (default value) or `webp`.
 
-          input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
+          input_fidelity: Control how much effort the model will exert to match the style and features,
+              especially facial features, of input images. This parameter is only supported
+              for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+              `low`. Defaults to `low`.
 
           mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
               indicate where `image` should be edited. If there are multiple images provided,
@@ -392,7 +398,10 @@ def edit(
               If `transparent`, the output format needs to support transparency, so it should
               be set to either `png` (default value) or `webp`.
 
-          input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
+          input_fidelity: Control how much effort the model will exert to match the style and features,
+              especially facial features, of input images. This parameter is only supported
+              for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+              `low`. Defaults to `low`.
 
           mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
               indicate where `image` should be edited. If there are multiple images provided,
@@ -1046,7 +1055,10 @@ async def edit(
               If `transparent`, the output format needs to support transparency, so it should
               be set to either `png` (default value) or `webp`.
 
-          input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
+          input_fidelity: Control how much effort the model will exert to match the style and features,
+              especially facial features, of input images. This parameter is only supported
+              for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+              `low`. Defaults to `low`.
 
           mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
               indicate where `image` should be edited. If there are multiple images provided,
@@ -1160,7 +1172,10 @@ async def edit(
               If `transparent`, the output format needs to support transparency, so it should
               be set to either `png` (default value) or `webp`.
 
-          input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
+          input_fidelity: Control how much effort the model will exert to match the style and features,
+              especially facial features, of input images. This parameter is only supported
+              for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+              `low`. Defaults to `low`.
 
           mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
               indicate where `image` should be edited. If there are multiple images provided,
@@ -1270,7 +1285,10 @@ async def edit(
               If `transparent`, the output format needs to support transparency, so it should
               be set to either `png` (default value) or `webp`.
 
-          input_fidelity: Control how much effort the model will exert to match the style and features, especially facial features, of input images. This parameter is only supported for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
+          input_fidelity: Control how much effort the model will exert to match the style and features,
+              especially facial features, of input images. This parameter is only supported
+              for `gpt-image-1`. Unsupported for `gpt-image-1-mini`. Supports `high` and
+              `low`. Defaults to `low`.
 
           mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
               indicate where `image` should be edited. If there are multiple images provided,
 
@@ -195,8 +195,19 @@ def accept(
               `auto` will create a trace for the session with default values for the workflow
               name, group id, and metadata.
 
-          truncation: Controls how the realtime conversation is truncated prior to model inference.
-              The default is `auto`.
+          truncation: When the number of tokens in a conversation exceeds the model's input token
+              limit, the conversation be truncated, meaning messages (starting from the
+              oldest) will not be included in the model's context. A 32k context model with
+              4,096 max output tokens can only include 28,224 tokens in the context before
+              truncation occurs. Clients can configure truncation behavior to truncate with a
+              lower max token limit, which is an effective way to control token usage and
+              cost. Truncation will reduce the number of cached tokens on the next turn
+              (busting the cache), since messages are dropped from the beginning of the
+              context. However, clients can also configure truncation to retain messages up to
+              a fraction of the maximum context size, which will reduce the need for future
+              truncations and thus improve the cache rate. Truncation can be disabled
+              entirely, which means the server will never truncate but would instead return an
+              error if the conversation exceeds the model's input token limit.
 
           extra_headers: Send extra headers
 
@@ -504,8 +515,19 @@ async def accept(
               `auto` will create a trace for the session with default values for the workflow
               name, group id, and metadata.
 
-          truncation: Controls how the realtime conversation is truncated prior to model inference.
-              The default is `auto`.
+          truncation: When the number of tokens in a conversation exceeds the model's input token
+              limit, the conversation be truncated, meaning messages (starting from the
+              oldest) will not be included in the model's context. A 32k context model with
+              4,096 max output tokens can only include 28,224 tokens in the context before
+              truncation occurs. Clients can configure truncation behavior to truncate with a
+              lower max token limit, which is an effective way to control token usage and
+              cost. Truncation will reduce the number of cached tokens on the next turn
+              (busting the cache), since messages are dropped from the beginning of the
+              context. However, clients can also configure truncation to retain messages up to
+              a fraction of the maximum context size, which will reduce the need for future
+              truncations and thus improve the cache rate. Truncation can be disabled
+              entirely, which means the server will never truncate but would instead return an
+              error if the conversation exceeds the model's input token limit.
 
           extra_headers: Send extra headers
 
 
@@ -52,9 +52,10 @@ def create(
         self,
         vector_store_id: str,
         *,
-        file_ids: SequenceNotStr[str],
         attributes: Optional[Dict[str, Union[str, float, bool]]] | Omit = omit,
         chunking_strategy: FileChunkingStrategyParam | Omit = omit,
+        file_ids: SequenceNotStr[str] | Omit = omit,
+        files: Iterable[file_batch_create_params.File] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -66,10 +67,6 @@ def create(
         Create a vector store file batch.
 
         Args:
-          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
-              the vector store should use. Useful for tools like `file_search` that can access
-              files.
-
           attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format, and
               querying for objects via API or the dashboard. Keys are strings with a maximum
@@ -79,6 +76,16 @@ def create(
           chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
               strategy. Only applicable if `file_ids` is non-empty.
 
+          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+              the vector store should use. Useful for tools like `file_search` that can access
+              files. If `attributes` or `chunking_strategy` are provided, they will be applied
+              to all files in the batch. Mutually exclusive with `files`.
+
+          files: A list of objects that each include a `file_id` plus optional `attributes` or
+              `chunking_strategy`. Use this when you need to override metadata for specific
+              files. The global `attributes` or `chunking_strategy` will be ignored and must
+              be specified for each file. Mutually exclusive with `file_ids`.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -94,9 +101,10 @@ def create(
             f"/vector_stores/{vector_store_id}/file_batches",
             body=maybe_transform(
                 {
-                    "file_ids": file_ids,
                     "attributes": attributes,
                     "chunking_strategy": chunking_strategy,
+                    "file_ids": file_ids,
+                    "files": files,
                 },
                 file_batch_create_params.FileBatchCreateParams,
             ),
@@ -389,9 +397,10 @@ async def create(
         self,
         vector_store_id: str,
         *,
-        file_ids: SequenceNotStr[str],
         attributes: Optional[Dict[str, Union[str, float, bool]]] | Omit = omit,
         chunking_strategy: FileChunkingStrategyParam | Omit = omit,
+        file_ids: SequenceNotStr[str] | Omit = omit,
+        files: Iterable[file_batch_create_params.File] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -403,10 +412,6 @@ async def create(
         Create a vector store file batch.
 
         Args:
-          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
-              the vector store should use. Useful for tools like `file_search` that can access
-              files.
-
           attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format, and
               querying for objects via API or the dashboard. Keys are strings with a maximum
@@ -416,6 +421,16 @@ async def create(
           chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
               strategy. Only applicable if `file_ids` is non-empty.
 
+          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+              the vector store should use. Useful for tools like `file_search` that can access
+              files. If `attributes` or `chunking_strategy` are provided, they will be applied
+              to all files in the batch. Mutually exclusive with `files`.
+
+          files: A list of objects that each include a `file_id` plus optional `attributes` or
+              `chunking_strategy`. Use this when you need to override metadata for specific
+              files. The global `attributes` or `chunking_strategy` will be ignored and must
+              be specified for each file. Mutually exclusive with `file_ids`.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -431,9 +446,10 @@ async def create(
             f"/vector_stores/{vector_store_id}/file_batches",
             body=await async_maybe_transform(
                 {
-                    "file_ids": file_ids,
                     "attributes": attributes,
                     "chunking_strategy": chunking_strategy,
+                    "file_ids": file_ids,
+                    "files": files,
                 },
                 file_batch_create_params.FileBatchCreateParams,
             ),
 
@@ -106,6 +106,17 @@ class CallAcceptParams(TypedDict, total=False):
 
     truncation: RealtimeTruncationParam
     """
-    Controls how the realtime conversation is truncated prior to model inference.
-    The default is `auto`.
+    When the number of tokens in a conversation exceeds the model's input token
+    limit, the conversation be truncated, meaning messages (starting from the
+    oldest) will not be included in the model's context. A 32k context model with
+    4,096 max output tokens can only include 28,224 tokens in the context before
+    truncation occurs. Clients can configure truncation behavior to truncate with a
+    lower max token limit, which is an effective way to control token usage and
+    cost. Truncation will reduce the number of cached tokens on the next turn
+    (busting the cache), since messages are dropped from the beginning of the
+    context. However, clients can also configure truncation to retain messages up to
+    a fraction of the maximum context size, which will reduce the need for future
+    truncations and thus improve the cache rate. Truncation can be disabled
+    entirely, which means the server will never truncate but would instead return an
+    error if the conversation exceeds the model's input token limit.
     """
@@ -106,6 +106,17 @@ class RealtimeSessionCreateRequest(BaseModel):
 
     truncation: Optional[RealtimeTruncation] = None
     """
-    Controls how the realtime conversation is truncated prior to model inference.
-    The default is `auto`.
+    When the number of tokens in a conversation exceeds the model's input token
+    limit, the conversation be truncated, meaning messages (starting from the
+    oldest) will not be included in the model's context. A 32k context model with
+    4,096 max output tokens can only include 28,224 tokens in the context before
+    truncation occurs. Clients can configure truncation behavior to truncate with a
+    lower max token limit, which is an effective way to control token usage and
+    cost. Truncation will reduce the number of cached tokens on the next turn
+    (busting the cache), since messages are dropped from the beginning of the
+    context. However, clients can also configure truncation to retain messages up to
+    a fraction of the maximum context size, which will reduce the need for future
+    truncations and thus improve the cache rate. Truncation can be disabled
+    entirely, which means the server will never truncate but would instead return an
+    error if the conversation exceeds the model's input token limit.
     """
@@ -106,6 +106,17 @@ class RealtimeSessionCreateRequestParam(TypedDict, total=False):
 
     truncation: RealtimeTruncationParam
     """
-    Controls how the realtime conversation is truncated prior to model inference.
-    The default is `auto`.
+    When the number of tokens in a conversation exceeds the model's input token
+    limit, the conversation be truncated, meaning messages (starting from the
+    oldest) will not be included in the model's context. A 32k context model with
+    4,096 max output tokens can only include 28,224 tokens in the context before
+    truncation occurs. Clients can configure truncation behavior to truncate with a
+    lower max token limit, which is an effective way to control token usage and
+    cost. Truncation will reduce the number of cached tokens on the next turn
+    (busting the cache), since messages are dropped from the beginning of the
+    context. However, clients can also configure truncation to retain messages up to
+    a fraction of the maximum context size, which will reduce the need for future
+    truncations and thus improve the cache rate. Truncation can be disabled
+    entirely, which means the server will never truncate but would instead return an
+    error if the conversation exceeds the model's input token limit.
     """