diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 108509ed29..427b8ec423 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
{
- ".": "2.8.1"
+ ".": "2.9.0"
}
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index fe1a09be6b..7adb61ca2e 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 136
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-a7e92d12ebe89ca019a7ac5b29759064eefa2c38fe08d03516f2620e66abb32b.yml
-openapi_spec_hash: acbc703b2739447abc6312b2d753631c
-config_hash: b876221dfb213df9f0a999e75d38a65e
+configured_endpoints: 137
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-fe8a79e6fd407e6c9afec60971f03076b65f711ccd6ea16457933b0e24fb1f6d.yml
+openapi_spec_hash: 38c0a73f4e08843732c5f8002a809104
+config_hash: 2c350086d87a4b4532077363087840e7
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1bfa59348f..6de78290fc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,28 @@
# Changelog
+## 2.9.0 (2025-12-04)
+
+Full Changelog: [v2.8.1...v2.9.0](https://github.com/openai/openai-python/compare/v2.8.1...v2.9.0)
+
+### Features
+
+* **api:** gpt-5.1-codex-max and responses/compact ([22f646e](https://github.com/openai/openai-python/commit/22f646e985b7c93782cf695edbe643844cae7017))
+
+
+### Bug Fixes
+
+* **client:** avoid mutating user-provided response config object ([#2700](https://github.com/openai/openai-python/issues/2700)) ([e040d22](https://github.com/openai/openai-python/commit/e040d22c2df068e908f69dc6b892e7f8b3fe6e99))
+* ensure streams are always closed ([0b1a27f](https://github.com/openai/openai-python/commit/0b1a27f08639d14dfe40bf80b48e2b8a1a51593c))
+* **streaming:** correct indentation ([575bbac](https://github.com/openai/openai-python/commit/575bbac13b3a57731a4e07b67636ae94463d43fa))
+
+
+### Chores
+
+* **deps:** mypy 1.18.1 has a regression, pin to 1.17 ([22cd586](https://github.com/openai/openai-python/commit/22cd586dbd5484b47f625da55db697691116b22b))
+* **docs:** use environment variables for authentication in code snippets ([c2a3cd5](https://github.com/openai/openai-python/commit/c2a3cd502bfb03f68f62f50aed15a40458c0996e))
+* **internal:** codegen related update ([307a066](https://github.com/openai/openai-python/commit/307a0664383b9d1d4151bc1a05a78c4fdcdcc9b0))
+* update lockfile ([b4109c5](https://github.com/openai/openai-python/commit/b4109c5fcf971ccfb25b4bdaef0bf36999f9eca5))
+
## 2.8.1 (2025-11-17)
Full Changelog: [v2.8.0...v2.8.1](https://github.com/openai/openai-python/compare/v2.8.0...v2.8.1)
diff --git a/README.md b/README.md
index 470707e1f3..b8050a4cd6 100644
--- a/README.md
+++ b/README.md
@@ -160,6 +160,7 @@ pip install openai[aiohttp]
Then you can enable it by instantiating the client with `http_client=DefaultAioHttpClient()`:
```python
+import os
import asyncio
from openai import DefaultAioHttpClient
from openai import AsyncOpenAI
@@ -167,7 +168,7 @@ from openai import AsyncOpenAI
async def main() -> None:
async with AsyncOpenAI(
- api_key="My API Key",
+ api_key=os.environ.get("OPENAI_API_KEY"), # This is the default and can be omitted
http_client=DefaultAioHttpClient(),
) as client:
chat_completion = await client.chat.completions.create(
diff --git a/api.md b/api.md
index 28ee551af3..3807603206 100644
--- a/api.md
+++ b/api.md
@@ -733,6 +733,7 @@ Types:
```python
from openai.types.responses import (
ApplyPatchTool,
+ CompactedResponse,
ComputerTool,
CustomTool,
EasyInputMessage,
@@ -752,6 +753,8 @@ from openai.types.responses import (
ResponseCodeInterpreterCallInProgressEvent,
ResponseCodeInterpreterCallInterpretingEvent,
ResponseCodeInterpreterToolCall,
+ ResponseCompactionItem,
+ ResponseCompactionItemParam,
ResponseCompletedEvent,
ResponseComputerToolCall,
ResponseComputerToolCallOutputItem,
@@ -861,6 +864,7 @@ Methods:
- client.responses.retrieve(response_id, \*\*params) -> Response
- client.responses.delete(response_id) -> None
- client.responses.cancel(response_id) -> Response
+- client.responses.compact(\*\*params) -> CompactedResponse
## InputItems
@@ -914,6 +918,7 @@ from openai.types.realtime import (
InputAudioBufferClearedEvent,
InputAudioBufferCommitEvent,
InputAudioBufferCommittedEvent,
+ InputAudioBufferDtmfEventReceivedEvent,
InputAudioBufferSpeechStartedEvent,
InputAudioBufferSpeechStoppedEvent,
InputAudioBufferTimeoutTriggered,
diff --git a/pyproject.toml b/pyproject.toml
index 75118d46be..4735412341 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,22 +1,24 @@
[project]
name = "openai"
-version = "2.8.1"
+version = "2.9.0"
description = "The official Python library for the openai API"
dynamic = ["readme"]
license = "Apache-2.0"
authors = [
{ name = "OpenAI", email = "support@openai.com" },
]
+
dependencies = [
- "httpx>=0.23.0, <1",
- "pydantic>=1.9.0, <3",
+ "httpx>=0.23.0, <1",
+ "pydantic>=1.9.0, <3",
"typing-extensions>=4.11, <5",
- "anyio>=3.5.0, <5",
- "distro>=1.7.0, <2",
- "sniffio",
+ "anyio>=3.5.0, <5",
+ "distro>=1.7.0, <2",
+ "sniffio",
"tqdm > 4",
"jiter>=0.10.0, <1",
]
+
requires-python = ">= 3.9"
classifiers = [
"Typing :: Typed",
@@ -26,6 +28,7 @@ classifiers = [
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
+ "Programming Language :: Python :: 3.14",
"Operating System :: OS Independent",
"Operating System :: POSIX",
"Operating System :: MacOS",
@@ -53,7 +56,7 @@ managed = true
# version pins are in requirements-dev.lock
dev-dependencies = [
"pyright==1.1.399",
- "mypy",
+ "mypy==1.17",
"respx",
"pytest",
"pytest-asyncio",
diff --git a/requirements-dev.lock b/requirements-dev.lock
index b454537b96..a7201a127b 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -12,65 +12,70 @@
-e file:.
aiohappyeyeballs==2.6.1
# via aiohttp
-aiohttp==3.12.13
+aiohttp==3.13.2
# via httpx-aiohttp
# via openai
-aiosignal==1.3.2
+aiosignal==1.4.0
# via aiohttp
-annotated-types==0.6.0
+annotated-types==0.7.0
# via pydantic
-anyio==4.1.0
+anyio==4.12.0
# via httpx
# via openai
-argcomplete==3.1.2
+argcomplete==3.6.3
# via nox
-asttokens==2.4.1
+asttokens==3.0.1
# via inline-snapshot
async-timeout==5.0.1
# via aiohttp
-attrs==24.2.0
+attrs==25.4.0
# via aiohttp
+ # via nox
# via outcome
# via trio
-azure-core==1.31.0
+azure-core==1.36.0
# via azure-identity
-azure-identity==1.19.0
-certifi==2023.7.22
+azure-identity==1.25.1
+backports-asyncio-runner==1.2.0
+ # via pytest-asyncio
+certifi==2025.11.12
# via httpcore
# via httpx
# via requests
-cffi==1.16.0
+cffi==2.0.0
# via cryptography
# via sounddevice
-charset-normalizer==3.3.2
+charset-normalizer==3.4.4
# via requests
colorama==0.4.6
# via griffe
-colorlog==6.7.0
+colorlog==6.10.1
# via nox
-cryptography==42.0.7
+cryptography==46.0.3
# via azure-identity
# via msal
# via pyjwt
-dirty-equals==0.6.0
-distlib==0.3.7
+dependency-groups==1.3.1
+ # via nox
+dirty-equals==0.11
+distlib==0.4.0
# via virtualenv
-distro==1.8.0
+distro==1.9.0
# via openai
-exceptiongroup==1.2.2
+exceptiongroup==1.3.1
# via anyio
# via pytest
# via trio
-execnet==2.1.1
+execnet==2.1.2
# via pytest-xdist
-executing==2.2.0
+executing==2.2.1
# via inline-snapshot
-filelock==3.12.4
+filelock==3.19.1
# via virtualenv
-frozenlist==1.7.0
+frozenlist==1.8.0
# via aiohttp
# via aiosignal
-griffe==1.13.0
+griffe==1.14.0
h11==0.16.0
# via httpcore
httpcore==1.0.9
@@ -81,137 +86,145 @@ httpx==0.28.1
# via respx
httpx-aiohttp==0.1.9
# via openai
-idna==3.4
+humanize==4.13.0
+ # via nox
+idna==3.11
# via anyio
# via httpx
# via requests
# via trio
# via yarl
-importlib-metadata==7.0.0
-iniconfig==2.0.0
+importlib-metadata==8.7.0
+iniconfig==2.1.0
# via pytest
-inline-snapshot==0.28.0
-jiter==0.11.0
+inline-snapshot==0.31.1
+jiter==0.12.0
# via openai
markdown-it-py==3.0.0
# via rich
mdurl==0.1.2
# via markdown-it-py
-msal==1.31.0
+msal==1.34.0
# via azure-identity
# via msal-extensions
-msal-extensions==1.2.0
+msal-extensions==1.3.1
# via azure-identity
-multidict==6.5.0
+multidict==6.7.0
# via aiohttp
# via yarl
-mypy==1.14.1
-mypy-extensions==1.0.0
+mypy==1.17.0
+mypy-extensions==1.1.0
# via mypy
nest-asyncio==1.6.0
-nodeenv==1.8.0
+nodeenv==1.9.1
# via pyright
-nox==2023.4.22
+nox==2025.11.12
numpy==2.0.2
# via openai
# via pandas
# via pandas-stubs
outcome==1.3.0.post0
# via trio
-packaging==23.2
+packaging==25.0
+ # via dependency-groups
# via nox
# via pytest
-pandas==2.2.3
+pandas==2.3.3
# via openai
-pandas-stubs==2.1.4.231227
+pandas-stubs==2.2.2.240807
# via openai
-platformdirs==3.11.0
+pathspec==0.12.1
+ # via mypy
+platformdirs==4.4.0
# via virtualenv
-pluggy==1.5.0
+pluggy==1.6.0
# via pytest
-portalocker==2.10.1
- # via msal-extensions
-propcache==0.3.2
+propcache==0.4.1
# via aiohttp
# via yarl
pycparser==2.23
# via cffi
-pydantic==2.11.9
+pydantic==2.12.5
# via openai
-pydantic-core==2.33.2
+pydantic-core==2.41.5
# via pydantic
-pygments==2.18.0
+pygments==2.19.2
# via pytest
# via rich
-pyjwt==2.8.0
+pyjwt==2.10.1
# via msal
pyright==1.1.399
-pytest==8.4.1
+pytest==8.4.2
# via inline-snapshot
# via pytest-asyncio
# via pytest-xdist
-pytest-asyncio==0.24.0
-pytest-xdist==3.7.0
-python-dateutil==2.8.2
+pytest-asyncio==1.2.0
+pytest-xdist==3.8.0
+python-dateutil==2.9.0.post0
# via pandas
# via time-machine
-pytz==2023.3.post1
- # via dirty-equals
+pytz==2025.2
# via pandas
-requests==2.31.0
+requests==2.32.5
# via azure-core
# via msal
respx==0.22.0
-rich==13.7.1
+rich==14.2.0
# via inline-snapshot
-ruff==0.9.4
-setuptools==68.2.2
- # via nodeenv
-six==1.16.0
- # via asttokens
- # via azure-core
+ruff==0.14.7
+six==1.17.0
# via python-dateutil
-sniffio==1.3.0
- # via anyio
+sniffio==1.3.1
# via openai
# via trio
sortedcontainers==2.4.0
# via trio
-sounddevice==0.5.1
+sounddevice==0.5.3
# via openai
-time-machine==2.9.0
-tomli==2.0.2
+time-machine==2.19.0
+tomli==2.3.0
+ # via dependency-groups
# via inline-snapshot
# via mypy
+ # via nox
# via pytest
-tqdm==4.66.5
+tqdm==4.67.1
# via openai
-trio==0.27.0
-types-pyaudio==0.2.16.20240516
-types-pytz==2024.2.0.20241003
+trio==0.31.0
+types-pyaudio==0.2.16.20250801
+types-pytz==2025.2.0.20251108
# via pandas-stubs
-types-tqdm==4.66.0.20240417
-typing-extensions==4.12.2
+types-requests==2.32.4.20250913
+ # via types-tqdm
+types-tqdm==4.67.0.20250809
+typing-extensions==4.15.0
+ # via aiosignal
+ # via anyio
# via azure-core
# via azure-identity
+ # via cryptography
+ # via exceptiongroup
# via multidict
# via mypy
# via openai
# via pydantic
# via pydantic-core
# via pyright
+ # via pytest-asyncio
# via typing-inspection
-typing-inspection==0.4.1
+ # via virtualenv
+typing-inspection==0.4.2
# via pydantic
-tzdata==2024.1
+tzdata==2025.2
# via pandas
-urllib3==2.2.1
+urllib3==2.5.0
# via requests
-virtualenv==20.24.5
+ # via types-requests
+virtualenv==20.35.4
# via nox
websockets==15.0.1
# via openai
-yarl==1.20.1
+yarl==1.22.0
# via aiohttp
-zipp==3.17.0
+zipp==3.23.0
# via importlib-metadata
diff --git a/requirements.lock b/requirements.lock
index b047cb3f88..8e021bd69b 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -12,30 +12,30 @@
-e file:.
aiohappyeyeballs==2.6.1
# via aiohttp
-aiohttp==3.12.13
+aiohttp==3.13.2
# via httpx-aiohttp
# via openai
-aiosignal==1.3.2
+aiosignal==1.4.0
# via aiohttp
-annotated-types==0.6.0
+annotated-types==0.7.0
# via pydantic
-anyio==4.1.0
+anyio==4.12.0
# via httpx
# via openai
async-timeout==5.0.1
# via aiohttp
-attrs==25.3.0
+attrs==25.4.0
# via aiohttp
-certifi==2023.7.22
+certifi==2025.11.12
# via httpcore
# via httpx
-cffi==1.17.1
+cffi==2.0.0
# via sounddevice
-distro==1.8.0
+distro==1.9.0
# via openai
-exceptiongroup==1.2.2
+exceptiongroup==1.3.1
# via anyio
-frozenlist==1.7.0
+frozenlist==1.8.0
# via aiohttp
# via aiosignal
h11==0.16.0
@@ -47,58 +47,60 @@ httpx==0.28.1
# via openai
httpx-aiohttp==0.1.9
# via openai
-idna==3.4
+idna==3.11
# via anyio
# via httpx
# via yarl
-jiter==0.11.0
+jiter==0.12.0
# via openai
-multidict==6.5.0
+multidict==6.7.0
# via aiohttp
# via yarl
numpy==2.0.2
# via openai
# via pandas
# via pandas-stubs
-pandas==2.2.3
+pandas==2.3.3
# via openai
pandas-stubs==2.2.2.240807
# via openai
-propcache==0.3.2
+propcache==0.4.1
# via aiohttp
# via yarl
pycparser==2.23
# via cffi
-pydantic==2.11.9
+pydantic==2.12.5
# via openai
-pydantic-core==2.33.2
+pydantic-core==2.41.5
# via pydantic
python-dateutil==2.9.0.post0
# via pandas
-pytz==2024.1
+pytz==2025.2
# via pandas
-six==1.16.0
+six==1.17.0
# via python-dateutil
-sniffio==1.3.0
- # via anyio
+sniffio==1.3.1
# via openai
-sounddevice==0.5.1
+sounddevice==0.5.3
# via openai
-tqdm==4.66.5
+tqdm==4.67.1
# via openai
-types-pytz==2024.2.0.20241003
+types-pytz==2025.2.0.20251108
# via pandas-stubs
-typing-extensions==4.12.2
+typing-extensions==4.15.0
+ # via aiosignal
+ # via anyio
+ # via exceptiongroup
# via multidict
# via openai
# via pydantic
# via pydantic-core
# via typing-inspection
-typing-inspection==0.4.1
+typing-inspection==0.4.2
# via pydantic
tzdata==2025.2
# via pandas
websockets==15.0.1
# via openai
-yarl==1.20.1
+yarl==1.22.0
# via aiohttp
diff --git a/src/openai/_streaming.py b/src/openai/_streaming.py
index 05c284a2be..61a742668a 100644
--- a/src/openai/_streaming.py
+++ b/src/openai/_streaming.py
@@ -55,49 +55,51 @@ def __stream__(self) -> Iterator[_T]:
process_data = self._client._process_response_data
iterator = self._iter_events()
- for sse in iterator:
- if sse.data.startswith("[DONE]"):
- break
-
- # we have to special case the Assistants `thread.` events since we won't have an "event" key in the data
- if sse.event and sse.event.startswith("thread."):
- data = sse.json()
-
- if sse.event == "error" and is_mapping(data) and data.get("error"):
- message = None
- error = data.get("error")
- if is_mapping(error):
- message = error.get("message")
- if not message or not isinstance(message, str):
- message = "An error occurred during streaming"
-
- raise APIError(
- message=message,
- request=self.response.request,
- body=data["error"],
- )
-
- yield process_data(data={"data": data, "event": sse.event}, cast_to=cast_to, response=response)
- else:
- data = sse.json()
- if is_mapping(data) and data.get("error"):
- message = None
- error = data.get("error")
- if is_mapping(error):
- message = error.get("message")
- if not message or not isinstance(message, str):
- message = "An error occurred during streaming"
-
- raise APIError(
- message=message,
- request=self.response.request,
- body=data["error"],
- )
-
- yield process_data(data=data, cast_to=cast_to, response=response)
-
- # As we might not fully consume the response stream, we need to close it explicitly
- response.close()
+ try:
+ for sse in iterator:
+ if sse.data.startswith("[DONE]"):
+ break
+
+ # we have to special case the Assistants `thread.` events since we won't have an "event" key in the data
+ if sse.event and sse.event.startswith("thread."):
+ data = sse.json()
+
+ if sse.event == "error" and is_mapping(data) and data.get("error"):
+ message = None
+ error = data.get("error")
+ if is_mapping(error):
+ message = error.get("message")
+ if not message or not isinstance(message, str):
+ message = "An error occurred during streaming"
+
+ raise APIError(
+ message=message,
+ request=self.response.request,
+ body=data["error"],
+ )
+
+ yield process_data(data={"data": data, "event": sse.event}, cast_to=cast_to, response=response)
+ else:
+ data = sse.json()
+ if is_mapping(data) and data.get("error"):
+ message = None
+ error = data.get("error")
+ if is_mapping(error):
+ message = error.get("message")
+ if not message or not isinstance(message, str):
+ message = "An error occurred during streaming"
+
+ raise APIError(
+ message=message,
+ request=self.response.request,
+ body=data["error"],
+ )
+
+ yield process_data(data=data, cast_to=cast_to, response=response)
+
+ finally:
+ # Ensure the response is closed even if the consumer doesn't read all data
+ response.close()
def __enter__(self) -> Self:
return self
@@ -156,49 +158,51 @@ async def __stream__(self) -> AsyncIterator[_T]:
process_data = self._client._process_response_data
iterator = self._iter_events()
- async for sse in iterator:
- if sse.data.startswith("[DONE]"):
- break
-
- # we have to special case the Assistants `thread.` events since we won't have an "event" key in the data
- if sse.event and sse.event.startswith("thread."):
- data = sse.json()
-
- if sse.event == "error" and is_mapping(data) and data.get("error"):
- message = None
- error = data.get("error")
- if is_mapping(error):
- message = error.get("message")
- if not message or not isinstance(message, str):
- message = "An error occurred during streaming"
-
- raise APIError(
- message=message,
- request=self.response.request,
- body=data["error"],
- )
-
- yield process_data(data={"data": data, "event": sse.event}, cast_to=cast_to, response=response)
- else:
- data = sse.json()
- if is_mapping(data) and data.get("error"):
- message = None
- error = data.get("error")
- if is_mapping(error):
- message = error.get("message")
- if not message or not isinstance(message, str):
- message = "An error occurred during streaming"
-
- raise APIError(
- message=message,
- request=self.response.request,
- body=data["error"],
- )
-
- yield process_data(data=data, cast_to=cast_to, response=response)
-
- # As we might not fully consume the response stream, we need to close it explicitly
- await response.aclose()
+ try:
+ async for sse in iterator:
+ if sse.data.startswith("[DONE]"):
+ break
+
+ # we have to special case the Assistants `thread.` events since we won't have an "event" key in the data
+ if sse.event and sse.event.startswith("thread."):
+ data = sse.json()
+
+ if sse.event == "error" and is_mapping(data) and data.get("error"):
+ message = None
+ error = data.get("error")
+ if is_mapping(error):
+ message = error.get("message")
+ if not message or not isinstance(message, str):
+ message = "An error occurred during streaming"
+
+ raise APIError(
+ message=message,
+ request=self.response.request,
+ body=data["error"],
+ )
+
+ yield process_data(data={"data": data, "event": sse.event}, cast_to=cast_to, response=response)
+ else:
+ data = sse.json()
+ if is_mapping(data) and data.get("error"):
+ message = None
+ error = data.get("error")
+ if is_mapping(error):
+ message = error.get("message")
+ if not message or not isinstance(message, str):
+ message = "An error occurred during streaming"
+
+ raise APIError(
+ message=message,
+ request=self.response.request,
+ body=data["error"],
+ )
+
+ yield process_data(data=data, cast_to=cast_to, response=response)
+
+ finally:
+ # Ensure the response is closed even if the consumer doesn't read all data
+ await response.aclose()
async def __aenter__(self) -> Self:
return self
diff --git a/src/openai/_version.py b/src/openai/_version.py
index 6109cebf91..e5ddb8f4eb 100644
--- a/src/openai/_version.py
+++ b/src/openai/_version.py
@@ -1,4 +1,4 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
__title__ = "openai"
-__version__ = "2.8.1" # x-release-please-version
+__version__ = "2.9.0" # x-release-please-version
diff --git a/src/openai/lib/_parsing/_responses.py b/src/openai/lib/_parsing/_responses.py
index 4d7b0b6224..4bed171df7 100644
--- a/src/openai/lib/_parsing/_responses.py
+++ b/src/openai/lib/_parsing/_responses.py
@@ -103,6 +103,7 @@ def parse_response(
or output.type == "file_search_call"
or output.type == "web_search_call"
or output.type == "reasoning"
+ or output.type == "compaction"
or output.type == "mcp_call"
or output.type == "mcp_approval_request"
or output.type == "image_generation_call"
diff --git a/src/openai/resources/beta/assistants.py b/src/openai/resources/beta/assistants.py
index e4ec1dca11..aa1f9f9b48 100644
--- a/src/openai/resources/beta/assistants.py
+++ b/src/openai/resources/beta/assistants.py
@@ -98,9 +98,9 @@ def create(
reasoning_effort: Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -108,6 +108,7 @@ def create(
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
response_format: Specifies the format that the model must output. Compatible with
[GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -312,9 +313,9 @@ def update(
reasoning_effort: Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -322,6 +323,7 @@ def update(
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
response_format: Specifies the format that the model must output. Compatible with
[GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -565,9 +567,9 @@ async def create(
reasoning_effort: Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -575,6 +577,7 @@ async def create(
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
response_format: Specifies the format that the model must output. Compatible with
[GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -779,9 +782,9 @@ async def update(
reasoning_effort: Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -789,6 +792,7 @@ async def update(
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
response_format: Specifies the format that the model must output. Compatible with
[GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
diff --git a/src/openai/resources/beta/threads/runs/runs.py b/src/openai/resources/beta/threads/runs/runs.py
index d7445d52b5..9b6cb3f752 100644
--- a/src/openai/resources/beta/threads/runs/runs.py
+++ b/src/openai/resources/beta/threads/runs/runs.py
@@ -169,9 +169,9 @@ def create(
reasoning_effort: Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -179,6 +179,7 @@ def create(
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
response_format: Specifies the format that the model must output. Compatible with
[GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -330,9 +331,9 @@ def create(
reasoning_effort: Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -340,6 +341,7 @@ def create(
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
response_format: Specifies the format that the model must output. Compatible with
[GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -487,9 +489,9 @@ def create(
reasoning_effort: Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -497,6 +499,7 @@ def create(
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
response_format: Specifies the format that the model must output. Compatible with
[GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -1620,9 +1623,9 @@ async def create(
reasoning_effort: Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -1630,6 +1633,7 @@ async def create(
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
response_format: Specifies the format that the model must output. Compatible with
[GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -1781,9 +1785,9 @@ async def create(
reasoning_effort: Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -1791,6 +1795,7 @@ async def create(
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
response_format: Specifies the format that the model must output. Compatible with
[GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -1938,9 +1943,9 @@ async def create(
reasoning_effort: Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -1948,6 +1953,7 @@ async def create(
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
response_format: Specifies the format that the model must output. Compatible with
[GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
diff --git a/src/openai/resources/chat/completions/completions.py b/src/openai/resources/chat/completions/completions.py
index c205011d10..3f2732a608 100644
--- a/src/openai/resources/chat/completions/completions.py
+++ b/src/openai/resources/chat/completions/completions.py
@@ -411,9 +411,9 @@ def create(
reasoning_effort: Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -421,6 +421,7 @@ def create(
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
response_format: An object specifying the format that the model must output.
@@ -721,9 +722,9 @@ def create(
reasoning_effort: Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -731,6 +732,7 @@ def create(
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
response_format: An object specifying the format that the model must output.
@@ -1022,9 +1024,9 @@ def create(
reasoning_effort: Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -1032,6 +1034,7 @@ def create(
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
response_format: An object specifying the format that the model must output.
@@ -1894,9 +1897,9 @@ async def create(
reasoning_effort: Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -1904,6 +1907,7 @@ async def create(
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
response_format: An object specifying the format that the model must output.
@@ -2204,9 +2208,9 @@ async def create(
reasoning_effort: Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -2214,6 +2218,7 @@ async def create(
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
response_format: An object specifying the format that the model must output.
@@ -2505,9 +2510,9 @@ async def create(
reasoning_effort: Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -2515,6 +2520,7 @@ async def create(
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
response_format: An object specifying the format that the model must output.
diff --git a/src/openai/resources/containers/containers.py b/src/openai/resources/containers/containers.py
index dcdc3e1a3e..0cbb400d4a 100644
--- a/src/openai/resources/containers/containers.py
+++ b/src/openai/resources/containers/containers.py
@@ -60,6 +60,7 @@ def create(
name: str,
expires_after: container_create_params.ExpiresAfter | Omit = omit,
file_ids: SequenceNotStr[str] | Omit = omit,
+ memory_limit: Literal["1g", "4g", "16g", "64g"] | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -77,6 +78,8 @@ def create(
file_ids: IDs of files to copy to the container.
+ memory_limit: Optional memory limit for the container. Defaults to "1g".
+
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
@@ -92,6 +95,7 @@ def create(
"name": name,
"expires_after": expires_after,
"file_ids": file_ids,
+ "memory_limit": memory_limit,
},
container_create_params.ContainerCreateParams,
),
@@ -256,6 +260,7 @@ async def create(
name: str,
expires_after: container_create_params.ExpiresAfter | Omit = omit,
file_ids: SequenceNotStr[str] | Omit = omit,
+ memory_limit: Literal["1g", "4g", "16g", "64g"] | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
@@ -273,6 +278,8 @@ async def create(
file_ids: IDs of files to copy to the container.
+ memory_limit: Optional memory limit for the container. Defaults to "1g".
+
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
@@ -288,6 +295,7 @@ async def create(
"name": name,
"expires_after": expires_after,
"file_ids": file_ids,
+ "memory_limit": memory_limit,
},
container_create_params.ContainerCreateParams,
),
diff --git a/src/openai/resources/realtime/calls.py b/src/openai/resources/realtime/calls.py
index 7d2c92fe86..cdea492d95 100644
--- a/src/openai/resources/realtime/calls.py
+++ b/src/openai/resources/realtime/calls.py
@@ -199,15 +199,20 @@ def accept(
limit, the conversation be truncated, meaning messages (starting from the
oldest) will not be included in the model's context. A 32k context model with
4,096 max output tokens can only include 28,224 tokens in the context before
- truncation occurs. Clients can configure truncation behavior to truncate with a
- lower max token limit, which is an effective way to control token usage and
- cost. Truncation will reduce the number of cached tokens on the next turn
- (busting the cache), since messages are dropped from the beginning of the
- context. However, clients can also configure truncation to retain messages up to
- a fraction of the maximum context size, which will reduce the need for future
- truncations and thus improve the cache rate. Truncation can be disabled
- entirely, which means the server will never truncate but would instead return an
- error if the conversation exceeds the model's input token limit.
+ truncation occurs.
+
+ Clients can configure truncation behavior to truncate with a lower max token
+ limit, which is an effective way to control token usage and cost.
+
+ Truncation will reduce the number of cached tokens on the next turn (busting the
+ cache), since messages are dropped from the beginning of the context. However,
+ clients can also configure truncation to retain messages up to a fraction of the
+ maximum context size, which will reduce the need for future truncations and thus
+ improve the cache rate.
+
+ Truncation can be disabled entirely, which means the server will never truncate
+ but would instead return an error if the conversation exceeds the model's input
+ token limit.
extra_headers: Send extra headers
@@ -519,15 +524,20 @@ async def accept(
limit, the conversation be truncated, meaning messages (starting from the
oldest) will not be included in the model's context. A 32k context model with
4,096 max output tokens can only include 28,224 tokens in the context before
- truncation occurs. Clients can configure truncation behavior to truncate with a
- lower max token limit, which is an effective way to control token usage and
- cost. Truncation will reduce the number of cached tokens on the next turn
- (busting the cache), since messages are dropped from the beginning of the
- context. However, clients can also configure truncation to retain messages up to
- a fraction of the maximum context size, which will reduce the need for future
- truncations and thus improve the cache rate. Truncation can be disabled
- entirely, which means the server will never truncate but would instead return an
- error if the conversation exceeds the model's input token limit.
+ truncation occurs.
+
+ Clients can configure truncation behavior to truncate with a lower max token
+ limit, which is an effective way to control token usage and cost.
+
+ Truncation will reduce the number of cached tokens on the next turn (busting the
+ cache), since messages are dropped from the beginning of the context. However,
+ clients can also configure truncation to retain messages up to a fraction of the
+ maximum context size, which will reduce the need for future truncations and thus
+ improve the cache rate.
+
+ Truncation can be disabled entirely, which means the server will never truncate
+ but would instead return an error if the conversation exceeds the model's input
+ token limit.
extra_headers: Send extra headers
diff --git a/src/openai/resources/realtime/realtime.py b/src/openai/resources/realtime/realtime.py
index 6e69258616..33caba1871 100644
--- a/src/openai/resources/realtime/realtime.py
+++ b/src/openai/resources/realtime/realtime.py
@@ -829,7 +829,7 @@ def retrieve(self, *, item_id: str, event_id: str | Omit = omit) -> None:
class RealtimeOutputAudioBufferResource(BaseRealtimeConnectionResource):
def clear(self, *, event_id: str | Omit = omit) -> None:
- """**WebRTC Only:** Emit to cut off the current audio response.
+ """**WebRTC/SIP Only:** Emit to cut off the current audio response.
This will trigger the server to
stop generating audio and emit a `output_audio_buffer.cleared` event. This
@@ -1066,7 +1066,7 @@ async def retrieve(self, *, item_id: str, event_id: str | Omit = omit) -> None:
class AsyncRealtimeOutputAudioBufferResource(BaseAsyncRealtimeConnectionResource):
async def clear(self, *, event_id: str | Omit = omit) -> None:
- """**WebRTC Only:** Emit to cut off the current audio response.
+ """**WebRTC/SIP Only:** Emit to cut off the current audio response.
This will trigger the server to
stop generating audio and emit a `output_audio_buffer.cleared` event. This
diff --git a/src/openai/resources/responses/responses.py b/src/openai/resources/responses/responses.py
index dcf87ba07c..c532fc0bb0 100644
--- a/src/openai/resources/responses/responses.py
+++ b/src/openai/resources/responses/responses.py
@@ -2,6 +2,7 @@
from __future__ import annotations
+from copy import copy
from typing import Any, List, Type, Union, Iterable, Optional, cast
from functools import partial
from typing_extensions import Literal, overload
@@ -33,7 +34,11 @@
AsyncInputTokensWithStreamingResponse,
)
from ..._base_client import make_request_options
-from ...types.responses import response_create_params, response_retrieve_params
+from ...types.responses import (
+ response_create_params,
+ response_compact_params,
+ response_retrieve_params,
+)
from ...lib._parsing._responses import (
TextFormatT,
parse_response,
@@ -45,11 +50,13 @@
from ...types.shared_params.reasoning import Reasoning
from ...types.responses.parsed_response import ParsedResponse
from ...lib.streaming.responses._responses import ResponseStreamManager, AsyncResponseStreamManager
+from ...types.responses.compacted_response import CompactedResponse
from ...types.responses.response_includable import ResponseIncludable
from ...types.shared_params.responses_model import ResponsesModel
from ...types.responses.response_input_param import ResponseInputParam
from ...types.responses.response_prompt_param import ResponsePromptParam
from ...types.responses.response_stream_event import ResponseStreamEvent
+from ...types.responses.response_input_item_param import ResponseInputItemParam
from ...types.responses.response_text_config_param import ResponseTextConfigParam
__all__ = ["Responses", "AsyncResponses"]
@@ -1046,6 +1053,7 @@ def stream(
if "format" in text:
raise TypeError("Cannot mix and match text.format with text_format")
+ text = copy(text)
text["format"] = _type_to_text_format_param(text_format)
api_request: partial[Stream[ResponseStreamEvent]] = partial(
@@ -1151,7 +1159,7 @@ def parse(
if "format" in text:
raise TypeError("Cannot mix and match text.format with text_format")
-
+ text = copy(text)
text["format"] = _type_to_text_format_param(text_format)
tools = _make_tools(tools)
@@ -1515,6 +1523,154 @@ def cancel(
cast_to=Response,
)
+ def compact(
+ self,
+ *,
+ input: Union[str, Iterable[ResponseInputItemParam], None] | Omit = omit,
+ instructions: Optional[str] | Omit = omit,
+ model: Union[
+ Literal[
+ "gpt-5.1",
+ "gpt-5.1-2025-11-13",
+ "gpt-5.1-codex",
+ "gpt-5.1-mini",
+ "gpt-5.1-chat-latest",
+ "gpt-5",
+ "gpt-5-mini",
+ "gpt-5-nano",
+ "gpt-5-2025-08-07",
+ "gpt-5-mini-2025-08-07",
+ "gpt-5-nano-2025-08-07",
+ "gpt-5-chat-latest",
+ "gpt-4.1",
+ "gpt-4.1-mini",
+ "gpt-4.1-nano",
+ "gpt-4.1-2025-04-14",
+ "gpt-4.1-mini-2025-04-14",
+ "gpt-4.1-nano-2025-04-14",
+ "o4-mini",
+ "o4-mini-2025-04-16",
+ "o3",
+ "o3-2025-04-16",
+ "o3-mini",
+ "o3-mini-2025-01-31",
+ "o1",
+ "o1-2024-12-17",
+ "o1-preview",
+ "o1-preview-2024-09-12",
+ "o1-mini",
+ "o1-mini-2024-09-12",
+ "gpt-4o",
+ "gpt-4o-2024-11-20",
+ "gpt-4o-2024-08-06",
+ "gpt-4o-2024-05-13",
+ "gpt-4o-audio-preview",
+ "gpt-4o-audio-preview-2024-10-01",
+ "gpt-4o-audio-preview-2024-12-17",
+ "gpt-4o-audio-preview-2025-06-03",
+ "gpt-4o-mini-audio-preview",
+ "gpt-4o-mini-audio-preview-2024-12-17",
+ "gpt-4o-search-preview",
+ "gpt-4o-mini-search-preview",
+ "gpt-4o-search-preview-2025-03-11",
+ "gpt-4o-mini-search-preview-2025-03-11",
+ "chatgpt-4o-latest",
+ "codex-mini-latest",
+ "gpt-4o-mini",
+ "gpt-4o-mini-2024-07-18",
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0301",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ "o1-pro",
+ "o1-pro-2025-03-19",
+ "o3-pro",
+ "o3-pro-2025-06-10",
+ "o3-deep-research",
+ "o3-deep-research-2025-06-26",
+ "o4-mini-deep-research",
+ "o4-mini-deep-research-2025-06-26",
+ "computer-use-preview",
+ "computer-use-preview-2025-03-11",
+ "gpt-5-codex",
+ "gpt-5-pro",
+ "gpt-5-pro-2025-10-06",
+ "gpt-5.1-codex-max",
+ ],
+ str,
+ None,
+ ]
+ | Omit = omit,
+ previous_response_id: Optional[str] | Omit = omit,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
+ ) -> CompactedResponse:
+ """
+ Compact conversation
+
+ Args:
+ input: Text, image, or file inputs to the model, used to generate a response
+
+ instructions: A system (or developer) message inserted into the model's context. When used
+ along with `previous_response_id`, the instructions from a previous response
+ will not be carried over to the next response. This makes it simple to swap out
+ system (or developer) messages in new responses.
+
+ model: Model ID used to generate the response, like `gpt-5` or `o3`. OpenAI offers a
+ wide range of models with different capabilities, performance characteristics,
+ and price points. Refer to the
+ [model guide](https://platform.openai.com/docs/models) to browse and compare
+ available models.
+
+ previous_response_id: The unique ID of the previous response to the model. Use this to create
+ multi-turn conversations. Learn more about
+ [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+ Cannot be used in conjunction with `conversation`.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return self._post(
+ "/responses/compact",
+ body=maybe_transform(
+ {
+ "input": input,
+ "instructions": instructions,
+ "model": model,
+ "previous_response_id": previous_response_id,
+ },
+ response_compact_params.ResponseCompactParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=CompactedResponse,
+ )
+
class AsyncResponses(AsyncAPIResource):
@cached_property
@@ -2507,7 +2663,7 @@ def stream(
if "format" in text:
raise TypeError("Cannot mix and match text.format with text_format")
-
+ text = copy(text)
text["format"] = _type_to_text_format_param(text_format)
api_request = self.create(
@@ -2617,7 +2773,7 @@ async def parse(
if "format" in text:
raise TypeError("Cannot mix and match text.format with text_format")
-
+ text = copy(text)
text["format"] = _type_to_text_format_param(text_format)
tools = _make_tools(tools)
@@ -2981,6 +3137,154 @@ async def cancel(
cast_to=Response,
)
+ async def compact(
+ self,
+ *,
+ input: Union[str, Iterable[ResponseInputItemParam], None] | Omit = omit,
+ instructions: Optional[str] | Omit = omit,
+ model: Union[
+ Literal[
+ "gpt-5.1",
+ "gpt-5.1-2025-11-13",
+ "gpt-5.1-codex",
+ "gpt-5.1-mini",
+ "gpt-5.1-chat-latest",
+ "gpt-5",
+ "gpt-5-mini",
+ "gpt-5-nano",
+ "gpt-5-2025-08-07",
+ "gpt-5-mini-2025-08-07",
+ "gpt-5-nano-2025-08-07",
+ "gpt-5-chat-latest",
+ "gpt-4.1",
+ "gpt-4.1-mini",
+ "gpt-4.1-nano",
+ "gpt-4.1-2025-04-14",
+ "gpt-4.1-mini-2025-04-14",
+ "gpt-4.1-nano-2025-04-14",
+ "o4-mini",
+ "o4-mini-2025-04-16",
+ "o3",
+ "o3-2025-04-16",
+ "o3-mini",
+ "o3-mini-2025-01-31",
+ "o1",
+ "o1-2024-12-17",
+ "o1-preview",
+ "o1-preview-2024-09-12",
+ "o1-mini",
+ "o1-mini-2024-09-12",
+ "gpt-4o",
+ "gpt-4o-2024-11-20",
+ "gpt-4o-2024-08-06",
+ "gpt-4o-2024-05-13",
+ "gpt-4o-audio-preview",
+ "gpt-4o-audio-preview-2024-10-01",
+ "gpt-4o-audio-preview-2024-12-17",
+ "gpt-4o-audio-preview-2025-06-03",
+ "gpt-4o-mini-audio-preview",
+ "gpt-4o-mini-audio-preview-2024-12-17",
+ "gpt-4o-search-preview",
+ "gpt-4o-mini-search-preview",
+ "gpt-4o-search-preview-2025-03-11",
+ "gpt-4o-mini-search-preview-2025-03-11",
+ "chatgpt-4o-latest",
+ "codex-mini-latest",
+ "gpt-4o-mini",
+ "gpt-4o-mini-2024-07-18",
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0301",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ "o1-pro",
+ "o1-pro-2025-03-19",
+ "o3-pro",
+ "o3-pro-2025-06-10",
+ "o3-deep-research",
+ "o3-deep-research-2025-06-26",
+ "o4-mini-deep-research",
+ "o4-mini-deep-research-2025-06-26",
+ "computer-use-preview",
+ "computer-use-preview-2025-03-11",
+ "gpt-5-codex",
+ "gpt-5-pro",
+ "gpt-5-pro-2025-10-06",
+ "gpt-5.1-codex-max",
+ ],
+ str,
+ None,
+ ]
+ | Omit = omit,
+ previous_response_id: Optional[str] | Omit = omit,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
+ ) -> CompactedResponse:
+ """
+ Compact conversation
+
+ Args:
+ input: Text, image, or file inputs to the model, used to generate a response
+
+ instructions: A system (or developer) message inserted into the model's context. When used
+ along with `previous_response_id`, the instructions from a previous response
+ will not be carried over to the next response. This makes it simple to swap out
+ system (or developer) messages in new responses.
+
+ model: Model ID used to generate the response, like `gpt-5` or `o3`. OpenAI offers a
+ wide range of models with different capabilities, performance characteristics,
+ and price points. Refer to the
+ [model guide](https://platform.openai.com/docs/models) to browse and compare
+ available models.
+
+ previous_response_id: The unique ID of the previous response to the model. Use this to create
+ multi-turn conversations. Learn more about
+ [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+ Cannot be used in conjunction with `conversation`.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return await self._post(
+ "/responses/compact",
+ body=await async_maybe_transform(
+ {
+ "input": input,
+ "instructions": instructions,
+ "model": model,
+ "previous_response_id": previous_response_id,
+ },
+ response_compact_params.ResponseCompactParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=CompactedResponse,
+ )
+
class ResponsesWithRawResponse:
def __init__(self, responses: Responses) -> None:
@@ -2998,6 +3302,9 @@ def __init__(self, responses: Responses) -> None:
self.cancel = _legacy_response.to_raw_response_wrapper(
responses.cancel,
)
+ self.compact = _legacy_response.to_raw_response_wrapper(
+ responses.compact,
+ )
self.parse = _legacy_response.to_raw_response_wrapper(
responses.parse,
)
@@ -3027,6 +3334,9 @@ def __init__(self, responses: AsyncResponses) -> None:
self.cancel = _legacy_response.async_to_raw_response_wrapper(
responses.cancel,
)
+ self.compact = _legacy_response.async_to_raw_response_wrapper(
+ responses.compact,
+ )
self.parse = _legacy_response.async_to_raw_response_wrapper(
responses.parse,
)
@@ -3056,6 +3366,9 @@ def __init__(self, responses: Responses) -> None:
self.cancel = to_streamed_response_wrapper(
responses.cancel,
)
+ self.compact = to_streamed_response_wrapper(
+ responses.compact,
+ )
@cached_property
def input_items(self) -> InputItemsWithStreamingResponse:
@@ -3082,6 +3395,9 @@ def __init__(self, responses: AsyncResponses) -> None:
self.cancel = async_to_streamed_response_wrapper(
responses.cancel,
)
+ self.compact = async_to_streamed_response_wrapper(
+ responses.compact,
+ )
@cached_property
def input_items(self) -> AsyncInputItemsWithStreamingResponse:
diff --git a/src/openai/resources/videos.py b/src/openai/resources/videos.py
index 4df5f02004..727091c607 100644
--- a/src/openai/resources/videos.py
+++ b/src/openai/resources/videos.py
@@ -84,11 +84,13 @@ def create(
input_reference: Optional image reference that guides generation.
- model: The video generation model to use. Defaults to `sora-2`.
+ model: The video generation model to use (allowed values: sora-2, sora-2-pro). Defaults
+ to `sora-2`.
- seconds: Clip duration in seconds. Defaults to 4 seconds.
+ seconds: Clip duration in seconds (allowed values: 4, 8, 12). Defaults to 4 seconds.
- size: Output resolution formatted as width x height. Defaults to 720x1280.
+ size: Output resolution formatted as width x height (allowed values: 720x1280,
+ 1280x720, 1024x1792, 1792x1024). Defaults to 720x1280.
extra_headers: Send extra headers
@@ -437,11 +439,13 @@ async def create(
input_reference: Optional image reference that guides generation.
- model: The video generation model to use. Defaults to `sora-2`.
+ model: The video generation model to use (allowed values: sora-2, sora-2-pro). Defaults
+ to `sora-2`.
- seconds: Clip duration in seconds. Defaults to 4 seconds.
+ seconds: Clip duration in seconds (allowed values: 4, 8, 12). Defaults to 4 seconds.
- size: Output resolution formatted as width x height. Defaults to 720x1280.
+ size: Output resolution formatted as width x height (allowed values: 720x1280,
+ 1280x720, 1024x1792, 1792x1024). Defaults to 720x1280.
extra_headers: Send extra headers
diff --git a/src/openai/types/beta/assistant_create_params.py b/src/openai/types/beta/assistant_create_params.py
index 009b0f49e3..38b30f212f 100644
--- a/src/openai/types/beta/assistant_create_params.py
+++ b/src/openai/types/beta/assistant_create_params.py
@@ -62,9 +62,9 @@ class AssistantCreateParams(TypedDict, total=False):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -72,6 +72,7 @@ class AssistantCreateParams(TypedDict, total=False):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
response_format: Optional[AssistantResponseFormatOptionParam]
diff --git a/src/openai/types/beta/assistant_update_params.py b/src/openai/types/beta/assistant_update_params.py
index 432116ad52..8f774c4e6c 100644
--- a/src/openai/types/beta/assistant_update_params.py
+++ b/src/openai/types/beta/assistant_update_params.py
@@ -97,9 +97,9 @@ class AssistantUpdateParams(TypedDict, total=False):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -107,6 +107,7 @@ class AssistantUpdateParams(TypedDict, total=False):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
response_format: Optional[AssistantResponseFormatOptionParam]
diff --git a/src/openai/types/beta/threads/run_create_params.py b/src/openai/types/beta/threads/run_create_params.py
index 74786d7d5c..df789decbc 100644
--- a/src/openai/types/beta/threads/run_create_params.py
+++ b/src/openai/types/beta/threads/run_create_params.py
@@ -111,9 +111,9 @@ class RunCreateParamsBase(TypedDict, total=False):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -121,6 +121,7 @@ class RunCreateParamsBase(TypedDict, total=False):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
response_format: Optional[AssistantResponseFormatOptionParam]
diff --git a/src/openai/types/chat/completion_create_params.py b/src/openai/types/chat/completion_create_params.py
index e02c06cbb0..f2d55f7ec4 100644
--- a/src/openai/types/chat/completion_create_params.py
+++ b/src/openai/types/chat/completion_create_params.py
@@ -197,9 +197,9 @@ class CompletionCreateParamsBase(TypedDict, total=False):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -207,6 +207,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
response_format: ResponseFormat
diff --git a/src/openai/types/container_create_params.py b/src/openai/types/container_create_params.py
index 01a48ac410..d629c24d38 100644
--- a/src/openai/types/container_create_params.py
+++ b/src/openai/types/container_create_params.py
@@ -19,6 +19,9 @@ class ContainerCreateParams(TypedDict, total=False):
file_ids: SequenceNotStr[str]
"""IDs of files to copy to the container."""
+ memory_limit: Literal["1g", "4g", "16g", "64g"]
+ """Optional memory limit for the container. Defaults to "1g"."""
+
class ExpiresAfter(TypedDict, total=False):
anchor: Required[Literal["last_active_at"]]
diff --git a/src/openai/types/container_create_response.py b/src/openai/types/container_create_response.py
index c0ccc45a1c..cbad914283 100644
--- a/src/openai/types/container_create_response.py
+++ b/src/openai/types/container_create_response.py
@@ -38,3 +38,9 @@ class ContainerCreateResponse(BaseModel):
point for the expiration. The minutes is the number of minutes after the anchor
before the container expires.
"""
+
+ last_active_at: Optional[int] = None
+ """Unix timestamp (in seconds) when the container was last active."""
+
+ memory_limit: Optional[Literal["1g", "4g", "16g", "64g"]] = None
+ """The memory limit configured for the container."""
diff --git a/src/openai/types/container_list_response.py b/src/openai/types/container_list_response.py
index 2d9c11d8a4..29416f0941 100644
--- a/src/openai/types/container_list_response.py
+++ b/src/openai/types/container_list_response.py
@@ -38,3 +38,9 @@ class ContainerListResponse(BaseModel):
point for the expiration. The minutes is the number of minutes after the anchor
before the container expires.
"""
+
+ last_active_at: Optional[int] = None
+ """Unix timestamp (in seconds) when the container was last active."""
+
+ memory_limit: Optional[Literal["1g", "4g", "16g", "64g"]] = None
+ """The memory limit configured for the container."""
diff --git a/src/openai/types/container_retrieve_response.py b/src/openai/types/container_retrieve_response.py
index eab291b34f..31fedeac64 100644
--- a/src/openai/types/container_retrieve_response.py
+++ b/src/openai/types/container_retrieve_response.py
@@ -38,3 +38,9 @@ class ContainerRetrieveResponse(BaseModel):
point for the expiration. The minutes is the number of minutes after the anchor
before the container expires.
"""
+
+ last_active_at: Optional[int] = None
+ """Unix timestamp (in seconds) when the container was last active."""
+
+ memory_limit: Optional[Literal["1g", "4g", "16g", "64g"]] = None
+ """The memory limit configured for the container."""
diff --git a/src/openai/types/evals/create_eval_completions_run_data_source.py b/src/openai/types/evals/create_eval_completions_run_data_source.py
index 742c27a775..4236746a17 100644
--- a/src/openai/types/evals/create_eval_completions_run_data_source.py
+++ b/src/openai/types/evals/create_eval_completions_run_data_source.py
@@ -172,9 +172,9 @@ class SamplingParams(BaseModel):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -182,6 +182,7 @@ class SamplingParams(BaseModel):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
response_format: Optional[SamplingParamsResponseFormat] = None
diff --git a/src/openai/types/evals/create_eval_completions_run_data_source_param.py b/src/openai/types/evals/create_eval_completions_run_data_source_param.py
index 18cd5018b1..751a1432b8 100644
--- a/src/openai/types/evals/create_eval_completions_run_data_source_param.py
+++ b/src/openai/types/evals/create_eval_completions_run_data_source_param.py
@@ -168,9 +168,9 @@ class SamplingParams(TypedDict, total=False):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -178,6 +178,7 @@ class SamplingParams(TypedDict, total=False):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
response_format: SamplingParamsResponseFormat
diff --git a/src/openai/types/evals/run_cancel_response.py b/src/openai/types/evals/run_cancel_response.py
index b18598b20e..f7fb0ec4ad 100644
--- a/src/openai/types/evals/run_cancel_response.py
+++ b/src/openai/types/evals/run_cancel_response.py
@@ -103,9 +103,9 @@ class DataSourceResponsesSourceResponses(BaseModel):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -113,6 +113,7 @@ class DataSourceResponsesSourceResponses(BaseModel):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
temperature: Optional[float] = None
@@ -245,9 +246,9 @@ class DataSourceResponsesSamplingParams(BaseModel):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -255,6 +256,7 @@ class DataSourceResponsesSamplingParams(BaseModel):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
seed: Optional[int] = None
diff --git a/src/openai/types/evals/run_create_params.py b/src/openai/types/evals/run_create_params.py
index a50433f06d..a70d1923e5 100644
--- a/src/openai/types/evals/run_create_params.py
+++ b/src/openai/types/evals/run_create_params.py
@@ -116,9 +116,9 @@ class DataSourceCreateEvalResponsesRunDataSourceSourceResponses(TypedDict, total
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -126,6 +126,7 @@ class DataSourceCreateEvalResponsesRunDataSourceSourceResponses(TypedDict, total
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
temperature: Optional[float]
@@ -263,9 +264,9 @@ class DataSourceCreateEvalResponsesRunDataSourceSamplingParams(TypedDict, total=
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -273,6 +274,7 @@ class DataSourceCreateEvalResponsesRunDataSourceSamplingParams(TypedDict, total=
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
seed: int
diff --git a/src/openai/types/evals/run_create_response.py b/src/openai/types/evals/run_create_response.py
index 41dac615c7..fb2220b3a1 100644
--- a/src/openai/types/evals/run_create_response.py
+++ b/src/openai/types/evals/run_create_response.py
@@ -103,9 +103,9 @@ class DataSourceResponsesSourceResponses(BaseModel):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -113,6 +113,7 @@ class DataSourceResponsesSourceResponses(BaseModel):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
temperature: Optional[float] = None
@@ -245,9 +246,9 @@ class DataSourceResponsesSamplingParams(BaseModel):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -255,6 +256,7 @@ class DataSourceResponsesSamplingParams(BaseModel):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
seed: Optional[int] = None
diff --git a/src/openai/types/evals/run_list_response.py b/src/openai/types/evals/run_list_response.py
index 61bff95447..adac4ffdc8 100644
--- a/src/openai/types/evals/run_list_response.py
+++ b/src/openai/types/evals/run_list_response.py
@@ -103,9 +103,9 @@ class DataSourceResponsesSourceResponses(BaseModel):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -113,6 +113,7 @@ class DataSourceResponsesSourceResponses(BaseModel):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
temperature: Optional[float] = None
@@ -245,9 +246,9 @@ class DataSourceResponsesSamplingParams(BaseModel):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -255,6 +256,7 @@ class DataSourceResponsesSamplingParams(BaseModel):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
seed: Optional[int] = None
diff --git a/src/openai/types/evals/run_retrieve_response.py b/src/openai/types/evals/run_retrieve_response.py
index 651d7423a9..abdc5ebae5 100644
--- a/src/openai/types/evals/run_retrieve_response.py
+++ b/src/openai/types/evals/run_retrieve_response.py
@@ -103,9 +103,9 @@ class DataSourceResponsesSourceResponses(BaseModel):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -113,6 +113,7 @@ class DataSourceResponsesSourceResponses(BaseModel):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
temperature: Optional[float] = None
@@ -245,9 +246,9 @@ class DataSourceResponsesSamplingParams(BaseModel):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -255,6 +256,7 @@ class DataSourceResponsesSamplingParams(BaseModel):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
seed: Optional[int] = None
diff --git a/src/openai/types/graders/score_model_grader.py b/src/openai/types/graders/score_model_grader.py
index 84686a9642..b3ba6758bb 100644
--- a/src/openai/types/graders/score_model_grader.py
+++ b/src/openai/types/graders/score_model_grader.py
@@ -67,9 +67,9 @@ class SamplingParams(BaseModel):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -77,6 +77,7 @@ class SamplingParams(BaseModel):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
seed: Optional[int] = None
diff --git a/src/openai/types/graders/score_model_grader_param.py b/src/openai/types/graders/score_model_grader_param.py
index aec7a95ad4..eb1f6e03ac 100644
--- a/src/openai/types/graders/score_model_grader_param.py
+++ b/src/openai/types/graders/score_model_grader_param.py
@@ -73,9 +73,9 @@ class SamplingParams(TypedDict, total=False):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -83,6 +83,7 @@ class SamplingParams(TypedDict, total=False):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
seed: Optional[int]
diff --git a/src/openai/types/realtime/__init__.py b/src/openai/types/realtime/__init__.py
index 83e81a034a..c2a141d727 100644
--- a/src/openai/types/realtime/__init__.py
+++ b/src/openai/types/realtime/__init__.py
@@ -175,6 +175,9 @@
from .response_function_call_arguments_done_event import (
ResponseFunctionCallArgumentsDoneEvent as ResponseFunctionCallArgumentsDoneEvent,
)
+from .input_audio_buffer_dtmf_event_received_event import (
+ InputAudioBufferDtmfEventReceivedEvent as InputAudioBufferDtmfEventReceivedEvent,
+)
from .realtime_conversation_item_assistant_message import (
RealtimeConversationItemAssistantMessage as RealtimeConversationItemAssistantMessage,
)
diff --git a/src/openai/types/realtime/call_accept_params.py b/src/openai/types/realtime/call_accept_params.py
index d6fc92b8e5..917b71cb0d 100644
--- a/src/openai/types/realtime/call_accept_params.py
+++ b/src/openai/types/realtime/call_accept_params.py
@@ -110,13 +110,18 @@ class CallAcceptParams(TypedDict, total=False):
limit, the conversation be truncated, meaning messages (starting from the
oldest) will not be included in the model's context. A 32k context model with
4,096 max output tokens can only include 28,224 tokens in the context before
- truncation occurs. Clients can configure truncation behavior to truncate with a
- lower max token limit, which is an effective way to control token usage and
- cost. Truncation will reduce the number of cached tokens on the next turn
- (busting the cache), since messages are dropped from the beginning of the
- context. However, clients can also configure truncation to retain messages up to
- a fraction of the maximum context size, which will reduce the need for future
- truncations and thus improve the cache rate. Truncation can be disabled
- entirely, which means the server will never truncate but would instead return an
- error if the conversation exceeds the model's input token limit.
+ truncation occurs.
+
+ Clients can configure truncation behavior to truncate with a lower max token
+ limit, which is an effective way to control token usage and cost.
+
+ Truncation will reduce the number of cached tokens on the next turn (busting the
+ cache), since messages are dropped from the beginning of the context. However,
+ clients can also configure truncation to retain messages up to a fraction of the
+ maximum context size, which will reduce the need for future truncations and thus
+ improve the cache rate.
+
+ Truncation can be disabled entirely, which means the server will never truncate
+ but would instead return an error if the conversation exceeds the model's input
+ token limit.
"""
diff --git a/src/openai/types/realtime/input_audio_buffer_dtmf_event_received_event.py b/src/openai/types/realtime/input_audio_buffer_dtmf_event_received_event.py
new file mode 100644
index 0000000000..d61ed4bda7
--- /dev/null
+++ b/src/openai/types/realtime/input_audio_buffer_dtmf_event_received_event.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["InputAudioBufferDtmfEventReceivedEvent"]
+
+
+class InputAudioBufferDtmfEventReceivedEvent(BaseModel):
+ event: str
+ """The telephone keypad that was pressed by the user."""
+
+ received_at: int
+ """UTC Unix Timestamp when DTMF Event was received by server."""
+
+ type: Literal["input_audio_buffer.dtmf_event_received"]
+ """The event type, must be `input_audio_buffer.dtmf_event_received`."""
diff --git a/src/openai/types/realtime/realtime_audio_input_turn_detection.py b/src/openai/types/realtime/realtime_audio_input_turn_detection.py
index d3f4e00316..9b55353884 100644
--- a/src/openai/types/realtime/realtime_audio_input_turn_detection.py
+++ b/src/openai/types/realtime/realtime_audio_input_turn_detection.py
@@ -14,9 +14,14 @@ class ServerVad(BaseModel):
"""Type of turn detection, `server_vad` to turn on simple Server VAD."""
create_response: Optional[bool] = None
- """
- Whether or not to automatically generate a response when a VAD stop event
+ """Whether or not to automatically generate a response when a VAD stop event
occurs.
+
+ If `interrupt_response` is set to `false` this may fail to create a response if
+ the model is already responding.
+
+ If both `create_response` and `interrupt_response` are set to `false`, the model
+ will never respond automatically but VAD events will still be emitted.
"""
idle_timeout_ms: Optional[int] = None
@@ -37,9 +42,13 @@ class ServerVad(BaseModel):
interrupt_response: Optional[bool] = None
"""
- Whether or not to automatically interrupt any ongoing response with output to
- the default conversation (i.e. `conversation` of `auto`) when a VAD start event
- occurs.
+ Whether or not to automatically interrupt (cancel) any ongoing response with
+ output to the default conversation (i.e. `conversation` of `auto`) when a VAD
+ start event occurs. If `true` then the response will be cancelled, otherwise it
+ will continue until complete.
+
+ If both `create_response` and `interrupt_response` are set to `false`, the model
+ will never respond automatically but VAD events will still be emitted.
"""
prefix_padding_ms: Optional[int] = None
diff --git a/src/openai/types/realtime/realtime_audio_input_turn_detection_param.py b/src/openai/types/realtime/realtime_audio_input_turn_detection_param.py
index 09b8cfd159..4ce7640727 100644
--- a/src/openai/types/realtime/realtime_audio_input_turn_detection_param.py
+++ b/src/openai/types/realtime/realtime_audio_input_turn_detection_param.py
@@ -13,9 +13,14 @@ class ServerVad(TypedDict, total=False):
"""Type of turn detection, `server_vad` to turn on simple Server VAD."""
create_response: bool
- """
- Whether or not to automatically generate a response when a VAD stop event
+ """Whether or not to automatically generate a response when a VAD stop event
occurs.
+
+ If `interrupt_response` is set to `false` this may fail to create a response if
+ the model is already responding.
+
+ If both `create_response` and `interrupt_response` are set to `false`, the model
+ will never respond automatically but VAD events will still be emitted.
"""
idle_timeout_ms: Optional[int]
@@ -36,9 +41,13 @@ class ServerVad(TypedDict, total=False):
interrupt_response: bool
"""
- Whether or not to automatically interrupt any ongoing response with output to
- the default conversation (i.e. `conversation` of `auto`) when a VAD start event
- occurs.
+ Whether or not to automatically interrupt (cancel) any ongoing response with
+ output to the default conversation (i.e. `conversation` of `auto`) when a VAD
+ start event occurs. If `true` then the response will be cancelled, otherwise it
+ will continue until complete.
+
+ If both `create_response` and `interrupt_response` are set to `false`, the model
+ will never respond automatically but VAD events will still be emitted.
"""
prefix_padding_ms: int
diff --git a/src/openai/types/realtime/realtime_server_event.py b/src/openai/types/realtime/realtime_server_event.py
index 1605b81a97..ead98f1a54 100644
--- a/src/openai/types/realtime/realtime_server_event.py
+++ b/src/openai/types/realtime/realtime_server_event.py
@@ -42,6 +42,7 @@
from .input_audio_buffer_speech_started_event import InputAudioBufferSpeechStartedEvent
from .input_audio_buffer_speech_stopped_event import InputAudioBufferSpeechStoppedEvent
from .response_function_call_arguments_done_event import ResponseFunctionCallArgumentsDoneEvent
+from .input_audio_buffer_dtmf_event_received_event import InputAudioBufferDtmfEventReceivedEvent
from .response_function_call_arguments_delta_event import ResponseFunctionCallArgumentsDeltaEvent
from .conversation_item_input_audio_transcription_segment import ConversationItemInputAudioTranscriptionSegment
from .conversation_item_input_audio_transcription_delta_event import ConversationItemInputAudioTranscriptionDeltaEvent
@@ -116,6 +117,7 @@ class OutputAudioBufferCleared(BaseModel):
RealtimeErrorEvent,
InputAudioBufferClearedEvent,
InputAudioBufferCommittedEvent,
+ InputAudioBufferDtmfEventReceivedEvent,
InputAudioBufferSpeechStartedEvent,
InputAudioBufferSpeechStoppedEvent,
RateLimitsUpdatedEvent,
diff --git a/src/openai/types/realtime/realtime_session_create_request.py b/src/openai/types/realtime/realtime_session_create_request.py
index 016ae45b67..80cf468dc8 100644
--- a/src/openai/types/realtime/realtime_session_create_request.py
+++ b/src/openai/types/realtime/realtime_session_create_request.py
@@ -110,13 +110,18 @@ class RealtimeSessionCreateRequest(BaseModel):
limit, the conversation be truncated, meaning messages (starting from the
oldest) will not be included in the model's context. A 32k context model with
4,096 max output tokens can only include 28,224 tokens in the context before
- truncation occurs. Clients can configure truncation behavior to truncate with a
- lower max token limit, which is an effective way to control token usage and
- cost. Truncation will reduce the number of cached tokens on the next turn
- (busting the cache), since messages are dropped from the beginning of the
- context. However, clients can also configure truncation to retain messages up to
- a fraction of the maximum context size, which will reduce the need for future
- truncations and thus improve the cache rate. Truncation can be disabled
- entirely, which means the server will never truncate but would instead return an
- error if the conversation exceeds the model's input token limit.
+ truncation occurs.
+
+ Clients can configure truncation behavior to truncate with a lower max token
+ limit, which is an effective way to control token usage and cost.
+
+ Truncation will reduce the number of cached tokens on the next turn (busting the
+ cache), since messages are dropped from the beginning of the context. However,
+ clients can also configure truncation to retain messages up to a fraction of the
+ maximum context size, which will reduce the need for future truncations and thus
+ improve the cache rate.
+
+ Truncation can be disabled entirely, which means the server will never truncate
+ but would instead return an error if the conversation exceeds the model's input
+ token limit.
"""
diff --git a/src/openai/types/realtime/realtime_session_create_request_param.py b/src/openai/types/realtime/realtime_session_create_request_param.py
index 8c3998c1ca..578d5a502d 100644
--- a/src/openai/types/realtime/realtime_session_create_request_param.py
+++ b/src/openai/types/realtime/realtime_session_create_request_param.py
@@ -110,13 +110,18 @@ class RealtimeSessionCreateRequestParam(TypedDict, total=False):
limit, the conversation be truncated, meaning messages (starting from the
oldest) will not be included in the model's context. A 32k context model with
4,096 max output tokens can only include 28,224 tokens in the context before
- truncation occurs. Clients can configure truncation behavior to truncate with a
- lower max token limit, which is an effective way to control token usage and
- cost. Truncation will reduce the number of cached tokens on the next turn
- (busting the cache), since messages are dropped from the beginning of the
- context. However, clients can also configure truncation to retain messages up to
- a fraction of the maximum context size, which will reduce the need for future
- truncations and thus improve the cache rate. Truncation can be disabled
- entirely, which means the server will never truncate but would instead return an
- error if the conversation exceeds the model's input token limit.
+ truncation occurs.
+
+ Clients can configure truncation behavior to truncate with a lower max token
+ limit, which is an effective way to control token usage and cost.
+
+ Truncation will reduce the number of cached tokens on the next turn (busting the
+ cache), since messages are dropped from the beginning of the context. However,
+ clients can also configure truncation to retain messages up to a fraction of the
+ maximum context size, which will reduce the need for future truncations and thus
+ improve the cache rate.
+
+ Truncation can be disabled entirely, which means the server will never truncate
+ but would instead return an error if the conversation exceeds the model's input
+ token limit.
"""
diff --git a/src/openai/types/realtime/realtime_session_create_response.py b/src/openai/types/realtime/realtime_session_create_response.py
index c1336cd6e4..df69dd7bdb 100644
--- a/src/openai/types/realtime/realtime_session_create_response.py
+++ b/src/openai/types/realtime/realtime_session_create_response.py
@@ -53,9 +53,14 @@ class AudioInputTurnDetectionServerVad(BaseModel):
"""Type of turn detection, `server_vad` to turn on simple Server VAD."""
create_response: Optional[bool] = None
- """
- Whether or not to automatically generate a response when a VAD stop event
+ """Whether or not to automatically generate a response when a VAD stop event
occurs.
+
+ If `interrupt_response` is set to `false` this may fail to create a response if
+ the model is already responding.
+
+ If both `create_response` and `interrupt_response` are set to `false`, the model
+ will never respond automatically but VAD events will still be emitted.
"""
idle_timeout_ms: Optional[int] = None
@@ -76,9 +81,13 @@ class AudioInputTurnDetectionServerVad(BaseModel):
interrupt_response: Optional[bool] = None
"""
- Whether or not to automatically interrupt any ongoing response with output to
- the default conversation (i.e. `conversation` of `auto`) when a VAD start event
- occurs.
+ Whether or not to automatically interrupt (cancel) any ongoing response with
+ output to the default conversation (i.e. `conversation` of `auto`) when a VAD
+ start event occurs. If `true` then the response will be cancelled, otherwise it
+ will continue until complete.
+
+ If both `create_response` and `interrupt_response` are set to `false`, the model
+ will never respond automatically but VAD events will still be emitted.
"""
prefix_padding_ms: Optional[int] = None
@@ -463,13 +472,18 @@ class RealtimeSessionCreateResponse(BaseModel):
limit, the conversation be truncated, meaning messages (starting from the
oldest) will not be included in the model's context. A 32k context model with
4,096 max output tokens can only include 28,224 tokens in the context before
- truncation occurs. Clients can configure truncation behavior to truncate with a
- lower max token limit, which is an effective way to control token usage and
- cost. Truncation will reduce the number of cached tokens on the next turn
- (busting the cache), since messages are dropped from the beginning of the
- context. However, clients can also configure truncation to retain messages up to
- a fraction of the maximum context size, which will reduce the need for future
- truncations and thus improve the cache rate. Truncation can be disabled
- entirely, which means the server will never truncate but would instead return an
- error if the conversation exceeds the model's input token limit.
+ truncation occurs.
+
+ Clients can configure truncation behavior to truncate with a lower max token
+ limit, which is an effective way to control token usage and cost.
+
+ Truncation will reduce the number of cached tokens on the next turn (busting the
+ cache), since messages are dropped from the beginning of the context. However,
+ clients can also configure truncation to retain messages up to a fraction of the
+ maximum context size, which will reduce the need for future truncations and thus
+ improve the cache rate.
+
+ Truncation can be disabled entirely, which means the server will never truncate
+ but would instead return an error if the conversation exceeds the model's input
+ token limit.
"""
diff --git a/src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection.py b/src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection.py
index 7dc7a8f302..e21844f48f 100644
--- a/src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection.py
+++ b/src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection.py
@@ -14,9 +14,14 @@ class ServerVad(BaseModel):
"""Type of turn detection, `server_vad` to turn on simple Server VAD."""
create_response: Optional[bool] = None
- """
- Whether or not to automatically generate a response when a VAD stop event
+ """Whether or not to automatically generate a response when a VAD stop event
occurs.
+
+ If `interrupt_response` is set to `false` this may fail to create a response if
+ the model is already responding.
+
+ If both `create_response` and `interrupt_response` are set to `false`, the model
+ will never respond automatically but VAD events will still be emitted.
"""
idle_timeout_ms: Optional[int] = None
@@ -37,9 +42,13 @@ class ServerVad(BaseModel):
interrupt_response: Optional[bool] = None
"""
- Whether or not to automatically interrupt any ongoing response with output to
- the default conversation (i.e. `conversation` of `auto`) when a VAD start event
- occurs.
+ Whether or not to automatically interrupt (cancel) any ongoing response with
+ output to the default conversation (i.e. `conversation` of `auto`) when a VAD
+ start event occurs. If `true` then the response will be cancelled, otherwise it
+ will continue until complete.
+
+ If both `create_response` and `interrupt_response` are set to `false`, the model
+ will never respond automatically but VAD events will still be emitted.
"""
prefix_padding_ms: Optional[int] = None
diff --git a/src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection_param.py b/src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection_param.py
index d899b8c5c1..507c43141e 100644
--- a/src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection_param.py
+++ b/src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection_param.py
@@ -13,9 +13,14 @@ class ServerVad(TypedDict, total=False):
"""Type of turn detection, `server_vad` to turn on simple Server VAD."""
create_response: bool
- """
- Whether or not to automatically generate a response when a VAD stop event
+ """Whether or not to automatically generate a response when a VAD stop event
occurs.
+
+ If `interrupt_response` is set to `false` this may fail to create a response if
+ the model is already responding.
+
+ If both `create_response` and `interrupt_response` are set to `false`, the model
+ will never respond automatically but VAD events will still be emitted.
"""
idle_timeout_ms: Optional[int]
@@ -36,9 +41,13 @@ class ServerVad(TypedDict, total=False):
interrupt_response: bool
"""
- Whether or not to automatically interrupt any ongoing response with output to
- the default conversation (i.e. `conversation` of `auto`) when a VAD start event
- occurs.
+ Whether or not to automatically interrupt (cancel) any ongoing response with
+ output to the default conversation (i.e. `conversation` of `auto`) when a VAD
+ start event occurs. If `true` then the response will be cancelled, otherwise it
+ will continue until complete.
+
+ If both `create_response` and `interrupt_response` are set to `false`, the model
+ will never respond automatically but VAD events will still be emitted.
"""
prefix_padding_ms: int
diff --git a/src/openai/types/responses/__init__.py b/src/openai/types/responses/__init__.py
index e707141d9a..a4d939d9ff 100644
--- a/src/openai/types/responses/__init__.py
+++ b/src/openai/types/responses/__init__.py
@@ -28,6 +28,7 @@
from .custom_tool_param import CustomToolParam as CustomToolParam
from .tool_choice_shell import ToolChoiceShell as ToolChoiceShell
from .tool_choice_types import ToolChoiceTypes as ToolChoiceTypes
+from .compacted_response import CompactedResponse as CompactedResponse
from .easy_input_message import EasyInputMessage as EasyInputMessage
from .response_item_list import ResponseItemList as ResponseItemList
from .tool_choice_custom import ToolChoiceCustom as ToolChoiceCustom
@@ -60,6 +61,7 @@
from .response_create_params import ResponseCreateParams as ResponseCreateParams
from .response_created_event import ResponseCreatedEvent as ResponseCreatedEvent
from .response_input_content import ResponseInputContent as ResponseInputContent
+from .response_compact_params import ResponseCompactParams as ResponseCompactParams
from .response_output_message import ResponseOutputMessage as ResponseOutputMessage
from .response_output_refusal import ResponseOutputRefusal as ResponseOutputRefusal
from .response_reasoning_item import ResponseReasoningItem as ResponseReasoningItem
@@ -69,6 +71,7 @@
from .web_search_preview_tool import WebSearchPreviewTool as WebSearchPreviewTool
from .easy_input_message_param import EasyInputMessageParam as EasyInputMessageParam
from .input_token_count_params import InputTokenCountParams as InputTokenCountParams
+from .response_compaction_item import ResponseCompactionItem as ResponseCompactionItem
from .response_completed_event import ResponseCompletedEvent as ResponseCompletedEvent
from .response_retrieve_params import ResponseRetrieveParams as ResponseRetrieveParams
from .response_text_done_event import ResponseTextDoneEvent as ResponseTextDoneEvent
@@ -108,6 +111,7 @@
from .tool_choice_apply_patch_param import ToolChoiceApplyPatchParam as ToolChoiceApplyPatchParam
from .web_search_preview_tool_param import WebSearchPreviewToolParam as WebSearchPreviewToolParam
from .response_apply_patch_tool_call import ResponseApplyPatchToolCall as ResponseApplyPatchToolCall
+from .response_compaction_item_param import ResponseCompactionItemParam as ResponseCompactionItemParam
from .response_file_search_tool_call import ResponseFileSearchToolCall as ResponseFileSearchToolCall
from .response_mcp_call_failed_event import ResponseMcpCallFailedEvent as ResponseMcpCallFailedEvent
from .response_custom_tool_call_param import ResponseCustomToolCallParam as ResponseCustomToolCallParam
@@ -133,6 +137,7 @@
from .response_mcp_call_in_progress_event import ResponseMcpCallInProgressEvent as ResponseMcpCallInProgressEvent
from .response_reasoning_text_delta_event import ResponseReasoningTextDeltaEvent as ResponseReasoningTextDeltaEvent
from .response_audio_transcript_done_event import ResponseAudioTranscriptDoneEvent as ResponseAudioTranscriptDoneEvent
+from .response_compaction_item_param_param import ResponseCompactionItemParamParam as ResponseCompactionItemParamParam
from .response_file_search_tool_call_param import ResponseFileSearchToolCallParam as ResponseFileSearchToolCallParam
from .response_mcp_list_tools_failed_event import ResponseMcpListToolsFailedEvent as ResponseMcpListToolsFailedEvent
from .response_apply_patch_tool_call_output import ResponseApplyPatchToolCallOutput as ResponseApplyPatchToolCallOutput
diff --git a/src/openai/types/responses/compacted_response.py b/src/openai/types/responses/compacted_response.py
new file mode 100644
index 0000000000..5b333b83c0
--- /dev/null
+++ b/src/openai/types/responses/compacted_response.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .response_usage import ResponseUsage
+from .response_output_item import ResponseOutputItem
+
+__all__ = ["CompactedResponse"]
+
+
+class CompactedResponse(BaseModel):
+ id: str
+ """The unique identifier for the compacted response."""
+
+ created_at: int
+ """Unix timestamp (in seconds) when the compacted conversation was created."""
+
+ object: Literal["response.compaction"]
+ """The object type. Always `response.compaction`."""
+
+ output: List[ResponseOutputItem]
+ """The compacted list of output items.
+
+ This is a list of all user messages, followed by a single compaction item.
+ """
+
+ usage: ResponseUsage
+ """
+ Token accounting for the compaction pass, including cached, reasoning, and total
+ tokens.
+ """
diff --git a/src/openai/types/responses/parsed_response.py b/src/openai/types/responses/parsed_response.py
index c120f4641d..a859710590 100644
--- a/src/openai/types/responses/parsed_response.py
+++ b/src/openai/types/responses/parsed_response.py
@@ -6,7 +6,6 @@
from ..._utils import PropertyInfo
from .response import Response
from ..._models import GenericModel
-from ..._utils._transform import PropertyInfo
from .response_output_item import (
McpCall,
McpListTools,
@@ -19,6 +18,7 @@
from .response_output_message import ResponseOutputMessage
from .response_output_refusal import ResponseOutputRefusal
from .response_reasoning_item import ResponseReasoningItem
+from .response_compaction_item import ResponseCompactionItem
from .response_custom_tool_call import ResponseCustomToolCall
from .response_computer_tool_call import ResponseComputerToolCall
from .response_function_tool_call import ResponseFunctionToolCall
@@ -79,6 +79,7 @@ class ParsedResponseFunctionToolCall(ResponseFunctionToolCall):
McpListTools,
ResponseCodeInterpreterToolCall,
ResponseCustomToolCall,
+ ResponseCompactionItem,
ResponseFunctionShellToolCall,
ResponseFunctionShellToolCallOutput,
ResponseApplyPatchToolCall,
diff --git a/src/openai/types/responses/response_compact_params.py b/src/openai/types/responses/response_compact_params.py
new file mode 100644
index 0000000000..fe38b15a9d
--- /dev/null
+++ b/src/openai/types/responses/response_compact_params.py
@@ -0,0 +1,126 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal, TypedDict
+
+from .response_input_item_param import ResponseInputItemParam
+
+__all__ = ["ResponseCompactParams"]
+
+
+class ResponseCompactParams(TypedDict, total=False):
+ input: Union[str, Iterable[ResponseInputItemParam], None]
+ """Text, image, or file inputs to the model, used to generate a response"""
+
+ instructions: Optional[str]
+ """
+ A system (or developer) message inserted into the model's context. When used
+ along with `previous_response_id`, the instructions from a previous response
+ will not be carried over to the next response. This makes it simple to swap out
+ system (or developer) messages in new responses.
+ """
+
+ model: Union[
+ Literal[
+ "gpt-5.1",
+ "gpt-5.1-2025-11-13",
+ "gpt-5.1-codex",
+ "gpt-5.1-mini",
+ "gpt-5.1-chat-latest",
+ "gpt-5",
+ "gpt-5-mini",
+ "gpt-5-nano",
+ "gpt-5-2025-08-07",
+ "gpt-5-mini-2025-08-07",
+ "gpt-5-nano-2025-08-07",
+ "gpt-5-chat-latest",
+ "gpt-4.1",
+ "gpt-4.1-mini",
+ "gpt-4.1-nano",
+ "gpt-4.1-2025-04-14",
+ "gpt-4.1-mini-2025-04-14",
+ "gpt-4.1-nano-2025-04-14",
+ "o4-mini",
+ "o4-mini-2025-04-16",
+ "o3",
+ "o3-2025-04-16",
+ "o3-mini",
+ "o3-mini-2025-01-31",
+ "o1",
+ "o1-2024-12-17",
+ "o1-preview",
+ "o1-preview-2024-09-12",
+ "o1-mini",
+ "o1-mini-2024-09-12",
+ "gpt-4o",
+ "gpt-4o-2024-11-20",
+ "gpt-4o-2024-08-06",
+ "gpt-4o-2024-05-13",
+ "gpt-4o-audio-preview",
+ "gpt-4o-audio-preview-2024-10-01",
+ "gpt-4o-audio-preview-2024-12-17",
+ "gpt-4o-audio-preview-2025-06-03",
+ "gpt-4o-mini-audio-preview",
+ "gpt-4o-mini-audio-preview-2024-12-17",
+ "gpt-4o-search-preview",
+ "gpt-4o-mini-search-preview",
+ "gpt-4o-search-preview-2025-03-11",
+ "gpt-4o-mini-search-preview-2025-03-11",
+ "chatgpt-4o-latest",
+ "codex-mini-latest",
+ "gpt-4o-mini",
+ "gpt-4o-mini-2024-07-18",
+ "gpt-4-turbo",
+ "gpt-4-turbo-2024-04-09",
+ "gpt-4-0125-preview",
+ "gpt-4-turbo-preview",
+ "gpt-4-1106-preview",
+ "gpt-4-vision-preview",
+ "gpt-4",
+ "gpt-4-0314",
+ "gpt-4-0613",
+ "gpt-4-32k",
+ "gpt-4-32k-0314",
+ "gpt-4-32k-0613",
+ "gpt-3.5-turbo",
+ "gpt-3.5-turbo-16k",
+ "gpt-3.5-turbo-0301",
+ "gpt-3.5-turbo-0613",
+ "gpt-3.5-turbo-1106",
+ "gpt-3.5-turbo-0125",
+ "gpt-3.5-turbo-16k-0613",
+ "o1-pro",
+ "o1-pro-2025-03-19",
+ "o3-pro",
+ "o3-pro-2025-06-10",
+ "o3-deep-research",
+ "o3-deep-research-2025-06-26",
+ "o4-mini-deep-research",
+ "o4-mini-deep-research-2025-06-26",
+ "computer-use-preview",
+ "computer-use-preview-2025-03-11",
+ "gpt-5-codex",
+ "gpt-5-pro",
+ "gpt-5-pro-2025-10-06",
+ "gpt-5.1-codex-max",
+ ],
+ str,
+ None,
+ ]
+ """Model ID used to generate the response, like `gpt-5` or `o3`.
+
+ OpenAI offers a wide range of models with different capabilities, performance
+ characteristics, and price points. Refer to the
+ [model guide](https://platform.openai.com/docs/models) to browse and compare
+ available models.
+ """
+
+ previous_response_id: Optional[str]
+ """The unique ID of the previous response to the model.
+
+ Use this to create multi-turn conversations. Learn more about
+ [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+ Cannot be used in conjunction with `conversation`.
+ """
diff --git a/src/openai/types/responses/response_compaction_item.py b/src/openai/types/responses/response_compaction_item.py
new file mode 100644
index 0000000000..dc5f839bb8
--- /dev/null
+++ b/src/openai/types/responses/response_compaction_item.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCompactionItem"]
+
+
+class ResponseCompactionItem(BaseModel):
+ id: str
+ """The unique ID of the compaction item."""
+
+ encrypted_content: str
+
+ type: Literal["compaction"]
+ """The type of the item. Always `compaction`."""
+
+ created_by: Optional[str] = None
diff --git a/src/openai/types/responses/response_compaction_item_param.py b/src/openai/types/responses/response_compaction_item_param.py
new file mode 100644
index 0000000000..8fdc2a561a
--- /dev/null
+++ b/src/openai/types/responses/response_compaction_item_param.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCompactionItemParam"]
+
+
+class ResponseCompactionItemParam(BaseModel):
+ encrypted_content: str
+
+ type: Literal["compaction"]
+ """The type of the item. Always `compaction`."""
+
+ id: Optional[str] = None
+ """The ID of the compaction item."""
diff --git a/src/openai/types/responses/response_compaction_item_param_param.py b/src/openai/types/responses/response_compaction_item_param_param.py
new file mode 100644
index 0000000000..0d12296589
--- /dev/null
+++ b/src/openai/types/responses/response_compaction_item_param_param.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseCompactionItemParamParam"]
+
+
+class ResponseCompactionItemParamParam(TypedDict, total=False):
+ encrypted_content: Required[str]
+
+ type: Required[Literal["compaction"]]
+ """The type of the item. Always `compaction`."""
+
+ id: Optional[str]
+ """The ID of the compaction item."""
diff --git a/src/openai/types/responses/response_function_shell_call_output_content.py b/src/openai/types/responses/response_function_shell_call_output_content.py
index 1429ce9724..e0e2c09ad1 100644
--- a/src/openai/types/responses/response_function_shell_call_output_content.py
+++ b/src/openai/types/responses/response_function_shell_call_output_content.py
@@ -27,10 +27,10 @@ class OutcomeExit(BaseModel):
class ResponseFunctionShellCallOutputContent(BaseModel):
outcome: Outcome
- """The exit or timeout outcome associated with this chunk."""
+ """The exit or timeout outcome associated with this shell call."""
stderr: str
- """Captured stderr output for this chunk of the shell call."""
+ """Captured stderr output for the shell call."""
stdout: str
- """Captured stdout output for this chunk of the shell call."""
+ """Captured stdout output for the shell call."""
diff --git a/src/openai/types/responses/response_function_shell_call_output_content_param.py b/src/openai/types/responses/response_function_shell_call_output_content_param.py
index 6395541cf5..fa065bd4b5 100644
--- a/src/openai/types/responses/response_function_shell_call_output_content_param.py
+++ b/src/openai/types/responses/response_function_shell_call_output_content_param.py
@@ -26,10 +26,10 @@ class OutcomeExit(TypedDict, total=False):
class ResponseFunctionShellCallOutputContentParam(TypedDict, total=False):
outcome: Required[Outcome]
- """The exit or timeout outcome associated with this chunk."""
+ """The exit or timeout outcome associated with this shell call."""
stderr: Required[str]
- """Captured stderr output for this chunk of the shell call."""
+ """Captured stderr output for the shell call."""
stdout: Required[str]
- """Captured stdout output for this chunk of the shell call."""
+ """Captured stdout output for the shell call."""
diff --git a/src/openai/types/responses/response_function_shell_tool_call.py b/src/openai/types/responses/response_function_shell_tool_call.py
index be0a5bcff8..de42cb0640 100644
--- a/src/openai/types/responses/response_function_shell_tool_call.py
+++ b/src/openai/types/responses/response_function_shell_tool_call.py
@@ -20,7 +20,7 @@ class Action(BaseModel):
class ResponseFunctionShellToolCall(BaseModel):
id: str
- """The unique ID of the function shell tool call.
+ """The unique ID of the shell tool call.
Populated when this item is returned via API.
"""
@@ -29,7 +29,7 @@ class ResponseFunctionShellToolCall(BaseModel):
"""The shell commands and limits that describe how to run the tool call."""
call_id: str
- """The unique ID of the function shell tool call generated by the model."""
+ """The unique ID of the shell tool call generated by the model."""
status: Literal["in_progress", "completed", "incomplete"]
"""The status of the shell call.
diff --git a/src/openai/types/responses/response_input_item.py b/src/openai/types/responses/response_input_item.py
index eaf5396087..103c8634ce 100644
--- a/src/openai/types/responses/response_input_item.py
+++ b/src/openai/types/responses/response_input_item.py
@@ -12,6 +12,7 @@
from .response_computer_tool_call import ResponseComputerToolCall
from .response_function_tool_call import ResponseFunctionToolCall
from .response_function_web_search import ResponseFunctionWebSearch
+from .response_compaction_item_param import ResponseCompactionItemParam
from .response_file_search_tool_call import ResponseFileSearchToolCall
from .response_custom_tool_call_output import ResponseCustomToolCallOutput
from .response_code_interpreter_tool_call import ResponseCodeInterpreterToolCall
@@ -215,13 +216,13 @@ class ShellCall(BaseModel):
"""The shell commands and limits that describe how to run the tool call."""
call_id: str
- """The unique ID of the function shell tool call generated by the model."""
+ """The unique ID of the shell tool call generated by the model."""
type: Literal["shell_call"]
- """The type of the item. Always `function_shell_call`."""
+ """The type of the item. Always `shell_call`."""
id: Optional[str] = None
- """The unique ID of the function shell tool call.
+ """The unique ID of the shell tool call.
Populated when this item is returned via API.
"""
@@ -235,7 +236,7 @@ class ShellCall(BaseModel):
class ShellCallOutput(BaseModel):
call_id: str
- """The unique ID of the function shell tool call generated by the model."""
+ """The unique ID of the shell tool call generated by the model."""
output: List[ResponseFunctionShellCallOutputContent]
"""
@@ -244,10 +245,10 @@ class ShellCallOutput(BaseModel):
"""
type: Literal["shell_call_output"]
- """The type of the item. Always `function_shell_call_output`."""
+ """The type of the item. Always `shell_call_output`."""
id: Optional[str] = None
- """The unique ID of the function shell tool call output.
+ """The unique ID of the shell tool call output.
Populated when this item is returned via API.
"""
@@ -462,6 +463,7 @@ class ItemReference(BaseModel):
ResponseFunctionToolCall,
FunctionCallOutput,
ResponseReasoningItem,
+ ResponseCompactionItemParam,
ImageGenerationCall,
ResponseCodeInterpreterToolCall,
LocalShellCall,
diff --git a/src/openai/types/responses/response_input_item_param.py b/src/openai/types/responses/response_input_item_param.py
index 5c2e81c4de..85d9f92b23 100644
--- a/src/openai/types/responses/response_input_item_param.py
+++ b/src/openai/types/responses/response_input_item_param.py
@@ -13,6 +13,7 @@
from .response_computer_tool_call_param import ResponseComputerToolCallParam
from .response_function_tool_call_param import ResponseFunctionToolCallParam
from .response_function_web_search_param import ResponseFunctionWebSearchParam
+from .response_compaction_item_param_param import ResponseCompactionItemParamParam
from .response_file_search_tool_call_param import ResponseFileSearchToolCallParam
from .response_custom_tool_call_output_param import ResponseCustomToolCallOutputParam
from .response_code_interpreter_tool_call_param import ResponseCodeInterpreterToolCallParam
@@ -216,13 +217,13 @@ class ShellCall(TypedDict, total=False):
"""The shell commands and limits that describe how to run the tool call."""
call_id: Required[str]
- """The unique ID of the function shell tool call generated by the model."""
+ """The unique ID of the shell tool call generated by the model."""
type: Required[Literal["shell_call"]]
- """The type of the item. Always `function_shell_call`."""
+ """The type of the item. Always `shell_call`."""
id: Optional[str]
- """The unique ID of the function shell tool call.
+ """The unique ID of the shell tool call.
Populated when this item is returned via API.
"""
@@ -236,7 +237,7 @@ class ShellCall(TypedDict, total=False):
class ShellCallOutput(TypedDict, total=False):
call_id: Required[str]
- """The unique ID of the function shell tool call generated by the model."""
+ """The unique ID of the shell tool call generated by the model."""
output: Required[Iterable[ResponseFunctionShellCallOutputContentParam]]
"""
@@ -245,10 +246,10 @@ class ShellCallOutput(TypedDict, total=False):
"""
type: Required[Literal["shell_call_output"]]
- """The type of the item. Always `function_shell_call_output`."""
+ """The type of the item. Always `shell_call_output`."""
id: Optional[str]
- """The unique ID of the function shell tool call output.
+ """The unique ID of the shell tool call output.
Populated when this item is returned via API.
"""
@@ -461,6 +462,7 @@ class ItemReference(TypedDict, total=False):
ResponseFunctionToolCallParam,
FunctionCallOutput,
ResponseReasoningItemParam,
+ ResponseCompactionItemParamParam,
ImageGenerationCall,
ResponseCodeInterpreterToolCallParam,
LocalShellCall,
diff --git a/src/openai/types/responses/response_input_param.py b/src/openai/types/responses/response_input_param.py
index 365c6b3d7b..bbd8e6af79 100644
--- a/src/openai/types/responses/response_input_param.py
+++ b/src/openai/types/responses/response_input_param.py
@@ -13,6 +13,7 @@
from .response_computer_tool_call_param import ResponseComputerToolCallParam
from .response_function_tool_call_param import ResponseFunctionToolCallParam
from .response_function_web_search_param import ResponseFunctionWebSearchParam
+from .response_compaction_item_param_param import ResponseCompactionItemParamParam
from .response_file_search_tool_call_param import ResponseFileSearchToolCallParam
from .response_custom_tool_call_output_param import ResponseCustomToolCallOutputParam
from .response_code_interpreter_tool_call_param import ResponseCodeInterpreterToolCallParam
@@ -217,13 +218,13 @@ class ShellCall(TypedDict, total=False):
"""The shell commands and limits that describe how to run the tool call."""
call_id: Required[str]
- """The unique ID of the function shell tool call generated by the model."""
+ """The unique ID of the shell tool call generated by the model."""
type: Required[Literal["shell_call"]]
- """The type of the item. Always `function_shell_call`."""
+ """The type of the item. Always `shell_call`."""
id: Optional[str]
- """The unique ID of the function shell tool call.
+ """The unique ID of the shell tool call.
Populated when this item is returned via API.
"""
@@ -237,7 +238,7 @@ class ShellCall(TypedDict, total=False):
class ShellCallOutput(TypedDict, total=False):
call_id: Required[str]
- """The unique ID of the function shell tool call generated by the model."""
+ """The unique ID of the shell tool call generated by the model."""
output: Required[Iterable[ResponseFunctionShellCallOutputContentParam]]
"""
@@ -246,10 +247,10 @@ class ShellCallOutput(TypedDict, total=False):
"""
type: Required[Literal["shell_call_output"]]
- """The type of the item. Always `function_shell_call_output`."""
+ """The type of the item. Always `shell_call_output`."""
id: Optional[str]
- """The unique ID of the function shell tool call output.
+ """The unique ID of the shell tool call output.
Populated when this item is returned via API.
"""
@@ -462,6 +463,7 @@ class ItemReference(TypedDict, total=False):
ResponseFunctionToolCallParam,
FunctionCallOutput,
ResponseReasoningItemParam,
+ ResponseCompactionItemParamParam,
ImageGenerationCall,
ResponseCodeInterpreterToolCallParam,
LocalShellCall,
diff --git a/src/openai/types/responses/response_output_item.py b/src/openai/types/responses/response_output_item.py
index 906ddbb25e..f0a66e1836 100644
--- a/src/openai/types/responses/response_output_item.py
+++ b/src/openai/types/responses/response_output_item.py
@@ -7,6 +7,7 @@
from ..._models import BaseModel
from .response_output_message import ResponseOutputMessage
from .response_reasoning_item import ResponseReasoningItem
+from .response_compaction_item import ResponseCompactionItem
from .response_custom_tool_call import ResponseCustomToolCall
from .response_computer_tool_call import ResponseComputerToolCall
from .response_function_tool_call import ResponseFunctionToolCall
@@ -173,6 +174,7 @@ class McpApprovalRequest(BaseModel):
ResponseFunctionWebSearch,
ResponseComputerToolCall,
ResponseReasoningItem,
+ ResponseCompactionItem,
ImageGenerationCall,
ResponseCodeInterpreterToolCall,
LocalShellCall,
diff --git a/src/openai/types/responses/tool.py b/src/openai/types/responses/tool.py
index ae8b34b1f4..bb32d4e1ec 100644
--- a/src/openai/types/responses/tool.py
+++ b/src/openai/types/responses/tool.py
@@ -174,7 +174,7 @@ class CodeInterpreter(BaseModel):
"""The code interpreter container.
Can be a container ID or an object that specifies uploaded file IDs to make
- available to your code.
+ available to your code, along with an optional `memory_limit` setting.
"""
type: Literal["code_interpreter"]
diff --git a/src/openai/types/responses/tool_param.py b/src/openai/types/responses/tool_param.py
index 18b044ab8c..779acf0a53 100644
--- a/src/openai/types/responses/tool_param.py
+++ b/src/openai/types/responses/tool_param.py
@@ -174,7 +174,7 @@ class CodeInterpreter(TypedDict, total=False):
"""The code interpreter container.
Can be a container ID or an object that specifies uploaded file IDs to make
- available to your code.
+ available to your code, along with an optional `memory_limit` setting.
"""
type: Required[Literal["code_interpreter"]]
diff --git a/src/openai/types/shared/all_models.py b/src/openai/types/shared/all_models.py
index 3e0b09e2d1..ba8e1d82cf 100644
--- a/src/openai/types/shared/all_models.py
+++ b/src/openai/types/shared/all_models.py
@@ -24,5 +24,6 @@
"gpt-5-codex",
"gpt-5-pro",
"gpt-5-pro-2025-10-06",
+ "gpt-5.1-codex-max",
],
]
diff --git a/src/openai/types/shared/reasoning.py b/src/openai/types/shared/reasoning.py
index cf470ca057..b19476bcb5 100644
--- a/src/openai/types/shared/reasoning.py
+++ b/src/openai/types/shared/reasoning.py
@@ -14,9 +14,9 @@ class Reasoning(BaseModel):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -24,6 +24,7 @@ class Reasoning(BaseModel):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
generate_summary: Optional[Literal["auto", "concise", "detailed"]] = None
diff --git a/src/openai/types/shared/reasoning_effort.py b/src/openai/types/shared/reasoning_effort.py
index c890a133cc..24d8516424 100644
--- a/src/openai/types/shared/reasoning_effort.py
+++ b/src/openai/types/shared/reasoning_effort.py
@@ -5,4 +5,4 @@
__all__ = ["ReasoningEffort"]
-ReasoningEffort: TypeAlias = Optional[Literal["none", "minimal", "low", "medium", "high"]]
+ReasoningEffort: TypeAlias = Optional[Literal["none", "minimal", "low", "medium", "high", "xhigh"]]
diff --git a/src/openai/types/shared/responses_model.py b/src/openai/types/shared/responses_model.py
index 432cb82afd..38cdea9a94 100644
--- a/src/openai/types/shared/responses_model.py
+++ b/src/openai/types/shared/responses_model.py
@@ -24,5 +24,6 @@
"gpt-5-codex",
"gpt-5-pro",
"gpt-5-pro-2025-10-06",
+ "gpt-5.1-codex-max",
],
]
diff --git a/src/openai/types/shared_params/reasoning.py b/src/openai/types/shared_params/reasoning.py
index ad58f70b71..71cb37c65e 100644
--- a/src/openai/types/shared_params/reasoning.py
+++ b/src/openai/types/shared_params/reasoning.py
@@ -15,9 +15,9 @@ class Reasoning(TypedDict, total=False):
"""
Constrains effort on reasoning for
[reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
- supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
- reasoning effort can result in faster responses and fewer tokens used on
- reasoning in a response.
+ supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+ Reducing reasoning effort can result in faster responses and fewer tokens used
+ on reasoning in a response.
- `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -25,6 +25,7 @@ class Reasoning(TypedDict, total=False):
- All models before `gpt-5.1` default to `medium` reasoning effort, and do not
support `none`.
- The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+ - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
"""
generate_summary: Optional[Literal["auto", "concise", "detailed"]]
diff --git a/src/openai/types/shared_params/reasoning_effort.py b/src/openai/types/shared_params/reasoning_effort.py
index e388eebff1..8518c2b141 100644
--- a/src/openai/types/shared_params/reasoning_effort.py
+++ b/src/openai/types/shared_params/reasoning_effort.py
@@ -7,4 +7,4 @@
__all__ = ["ReasoningEffort"]
-ReasoningEffort: TypeAlias = Optional[Literal["none", "minimal", "low", "medium", "high"]]
+ReasoningEffort: TypeAlias = Optional[Literal["none", "minimal", "low", "medium", "high", "xhigh"]]
diff --git a/src/openai/types/shared_params/responses_model.py b/src/openai/types/shared_params/responses_model.py
index fe34eb0f62..ad44dd6bf7 100644
--- a/src/openai/types/shared_params/responses_model.py
+++ b/src/openai/types/shared_params/responses_model.py
@@ -26,5 +26,6 @@
"gpt-5-codex",
"gpt-5-pro",
"gpt-5-pro-2025-10-06",
+ "gpt-5.1-codex-max",
],
]
diff --git a/src/openai/types/video_create_params.py b/src/openai/types/video_create_params.py
index 527d62d193..c4d3e0851f 100644
--- a/src/openai/types/video_create_params.py
+++ b/src/openai/types/video_create_params.py
@@ -20,10 +20,16 @@ class VideoCreateParams(TypedDict, total=False):
"""Optional image reference that guides generation."""
model: VideoModel
- """The video generation model to use. Defaults to `sora-2`."""
+ """The video generation model to use (allowed values: sora-2, sora-2-pro).
+
+ Defaults to `sora-2`.
+ """
seconds: VideoSeconds
- """Clip duration in seconds. Defaults to 4 seconds."""
+ """Clip duration in seconds (allowed values: 4, 8, 12). Defaults to 4 seconds."""
size: VideoSize
- """Output resolution formatted as width x height. Defaults to 720x1280."""
+ """
+ Output resolution formatted as width x height (allowed values: 720x1280,
+ 1280x720, 1024x1792, 1792x1024). Defaults to 720x1280.
+ """
diff --git a/tests/api_resources/test_containers.py b/tests/api_resources/test_containers.py
index c972f6539d..cf173c7fd5 100644
--- a/tests/api_resources/test_containers.py
+++ b/tests/api_resources/test_containers.py
@@ -38,6 +38,7 @@ def test_method_create_with_all_params(self, client: OpenAI) -> None:
"minutes": 0,
},
file_ids=["string"],
+ memory_limit="1g",
)
assert_matches_type(ContainerCreateResponse, container, path=["response"])
@@ -197,6 +198,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) ->
"minutes": 0,
},
file_ids=["string"],
+ memory_limit="1g",
)
assert_matches_type(ContainerCreateResponse, container, path=["response"])
diff --git a/tests/api_resources/test_responses.py b/tests/api_resources/test_responses.py
index b57e6099c4..14e2d911ef 100644
--- a/tests/api_resources/test_responses.py
+++ b/tests/api_resources/test_responses.py
@@ -12,6 +12,7 @@
from openai._utils import assert_signatures_in_sync
from openai.types.responses import (
Response,
+ CompactedResponse,
)
base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
@@ -36,7 +37,7 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
max_output_tokens=0,
max_tool_calls=0,
metadata={"foo": "string"},
- model="gpt-4o",
+ model="gpt-5.1",
parallel_tool_calls=True,
previous_response_id="previous_response_id",
prompt={
@@ -117,7 +118,7 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
max_output_tokens=0,
max_tool_calls=0,
metadata={"foo": "string"},
- model="gpt-4o",
+ model="gpt-5.1",
parallel_tool_calls=True,
previous_response_id="previous_response_id",
prompt={
@@ -358,6 +359,41 @@ def test_path_params_cancel(self, client: OpenAI) -> None:
"",
)
+ @parametrize
+ def test_method_compact(self, client: OpenAI) -> None:
+ response = client.responses.compact()
+ assert_matches_type(CompactedResponse, response, path=["response"])
+
+ @parametrize
+ def test_method_compact_with_all_params(self, client: OpenAI) -> None:
+ response = client.responses.compact(
+ input="string",
+ instructions="instructions",
+ model="gpt-5.1",
+ previous_response_id="resp_123",
+ )
+ assert_matches_type(CompactedResponse, response, path=["response"])
+
+ @parametrize
+ def test_raw_response_compact(self, client: OpenAI) -> None:
+ http_response = client.responses.with_raw_response.compact()
+
+ assert http_response.is_closed is True
+ assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+ response = http_response.parse()
+ assert_matches_type(CompactedResponse, response, path=["response"])
+
+ @parametrize
+ def test_streaming_response_compact(self, client: OpenAI) -> None:
+ with client.responses.with_streaming_response.compact() as http_response:
+ assert not http_response.is_closed
+ assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ response = http_response.parse()
+ assert_matches_type(CompactedResponse, response, path=["response"])
+
+ assert cast(Any, http_response.is_closed) is True
+
@pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
def test_parse_method_in_sync(sync: bool, client: OpenAI, async_client: AsyncOpenAI) -> None:
@@ -391,7 +427,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
max_output_tokens=0,
max_tool_calls=0,
metadata={"foo": "string"},
- model="gpt-4o",
+ model="gpt-5.1",
parallel_tool_calls=True,
previous_response_id="previous_response_id",
prompt={
@@ -472,7 +508,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
max_output_tokens=0,
max_tool_calls=0,
metadata={"foo": "string"},
- model="gpt-4o",
+ model="gpt-5.1",
parallel_tool_calls=True,
previous_response_id="previous_response_id",
prompt={
@@ -712,3 +748,38 @@ async def test_path_params_cancel(self, async_client: AsyncOpenAI) -> None:
await async_client.responses.with_raw_response.cancel(
"",
)
+
+ @parametrize
+ async def test_method_compact(self, async_client: AsyncOpenAI) -> None:
+ response = await async_client.responses.compact()
+ assert_matches_type(CompactedResponse, response, path=["response"])
+
+ @parametrize
+ async def test_method_compact_with_all_params(self, async_client: AsyncOpenAI) -> None:
+ response = await async_client.responses.compact(
+ input="string",
+ instructions="instructions",
+ model="gpt-5.1",
+ previous_response_id="resp_123",
+ )
+ assert_matches_type(CompactedResponse, response, path=["response"])
+
+ @parametrize
+ async def test_raw_response_compact(self, async_client: AsyncOpenAI) -> None:
+ http_response = await async_client.responses.with_raw_response.compact()
+
+ assert http_response.is_closed is True
+ assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+ response = http_response.parse()
+ assert_matches_type(CompactedResponse, response, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_compact(self, async_client: AsyncOpenAI) -> None:
+ async with async_client.responses.with_streaming_response.compact() as http_response:
+ assert not http_response.is_closed
+ assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ response = await http_response.parse()
+ assert_matches_type(CompactedResponse, response, path=["response"])
+
+ assert cast(Any, http_response.is_closed) is True