diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 108509ed29..427b8ec423 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "2.8.1" + ".": "2.9.0" } \ No newline at end of file diff --git a/.stats.yml b/.stats.yml index fe1a09be6b..7adb61ca2e 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ -configured_endpoints: 136 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-a7e92d12ebe89ca019a7ac5b29759064eefa2c38fe08d03516f2620e66abb32b.yml -openapi_spec_hash: acbc703b2739447abc6312b2d753631c -config_hash: b876221dfb213df9f0a999e75d38a65e +configured_endpoints: 137 +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-fe8a79e6fd407e6c9afec60971f03076b65f711ccd6ea16457933b0e24fb1f6d.yml +openapi_spec_hash: 38c0a73f4e08843732c5f8002a809104 +config_hash: 2c350086d87a4b4532077363087840e7 diff --git a/CHANGELOG.md b/CHANGELOG.md index 1bfa59348f..6de78290fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,28 @@ # Changelog +## 2.9.0 (2025-12-04) + +Full Changelog: [v2.8.1...v2.9.0](https://github.com/openai/openai-python/compare/v2.8.1...v2.9.0) + +### Features + +* **api:** gpt-5.1-codex-max and responses/compact ([22f646e](https://github.com/openai/openai-python/commit/22f646e985b7c93782cf695edbe643844cae7017)) + + +### Bug Fixes + +* **client:** avoid mutating user-provided response config object ([#2700](https://github.com/openai/openai-python/issues/2700)) ([e040d22](https://github.com/openai/openai-python/commit/e040d22c2df068e908f69dc6b892e7f8b3fe6e99)) +* ensure streams are always closed ([0b1a27f](https://github.com/openai/openai-python/commit/0b1a27f08639d14dfe40bf80b48e2b8a1a51593c)) +* **streaming:** correct indentation ([575bbac](https://github.com/openai/openai-python/commit/575bbac13b3a57731a4e07b67636ae94463d43fa)) + + +### Chores + +* **deps:** mypy 1.18.1 has a regression, pin to 1.17 ([22cd586](https://github.com/openai/openai-python/commit/22cd586dbd5484b47f625da55db697691116b22b)) +* **docs:** use environment variables for authentication in code snippets ([c2a3cd5](https://github.com/openai/openai-python/commit/c2a3cd502bfb03f68f62f50aed15a40458c0996e)) +* **internal:** codegen related update ([307a066](https://github.com/openai/openai-python/commit/307a0664383b9d1d4151bc1a05a78c4fdcdcc9b0)) +* update lockfile ([b4109c5](https://github.com/openai/openai-python/commit/b4109c5fcf971ccfb25b4bdaef0bf36999f9eca5)) + ## 2.8.1 (2025-11-17) Full Changelog: [v2.8.0...v2.8.1](https://github.com/openai/openai-python/compare/v2.8.0...v2.8.1) diff --git a/README.md b/README.md index 470707e1f3..b8050a4cd6 100644 --- a/README.md +++ b/README.md @@ -160,6 +160,7 @@ pip install openai[aiohttp] Then you can enable it by instantiating the client with `http_client=DefaultAioHttpClient()`: ```python +import os import asyncio from openai import DefaultAioHttpClient from openai import AsyncOpenAI @@ -167,7 +168,7 @@ from openai import AsyncOpenAI async def main() -> None: async with AsyncOpenAI( - api_key="My API Key", + api_key=os.environ.get("OPENAI_API_KEY"), # This is the default and can be omitted http_client=DefaultAioHttpClient(), ) as client: chat_completion = await client.chat.completions.create( diff --git a/api.md b/api.md index 28ee551af3..3807603206 100644 --- a/api.md +++ b/api.md @@ -733,6 +733,7 @@ Types: ```python from openai.types.responses import ( ApplyPatchTool, + CompactedResponse, ComputerTool, CustomTool, EasyInputMessage, @@ -752,6 +753,8 @@ from openai.types.responses import ( ResponseCodeInterpreterCallInProgressEvent, ResponseCodeInterpreterCallInterpretingEvent, ResponseCodeInterpreterToolCall, + ResponseCompactionItem, + ResponseCompactionItemParam, ResponseCompletedEvent, ResponseComputerToolCall, ResponseComputerToolCallOutputItem, @@ -861,6 +864,7 @@ Methods: - client.responses.retrieve(response_id, \*\*params) -> Response - client.responses.delete(response_id) -> None - client.responses.cancel(response_id) -> Response +- client.responses.compact(\*\*params) -> CompactedResponse ## InputItems @@ -914,6 +918,7 @@ from openai.types.realtime import ( InputAudioBufferClearedEvent, InputAudioBufferCommitEvent, InputAudioBufferCommittedEvent, + InputAudioBufferDtmfEventReceivedEvent, InputAudioBufferSpeechStartedEvent, InputAudioBufferSpeechStoppedEvent, InputAudioBufferTimeoutTriggered, diff --git a/pyproject.toml b/pyproject.toml index 75118d46be..4735412341 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,22 +1,24 @@ [project] name = "openai" -version = "2.8.1" +version = "2.9.0" description = "The official Python library for the openai API" dynamic = ["readme"] license = "Apache-2.0" authors = [ { name = "OpenAI", email = "support@openai.com" }, ] + dependencies = [ - "httpx>=0.23.0, <1", - "pydantic>=1.9.0, <3", + "httpx>=0.23.0, <1", + "pydantic>=1.9.0, <3", "typing-extensions>=4.11, <5", - "anyio>=3.5.0, <5", - "distro>=1.7.0, <2", - "sniffio", + "anyio>=3.5.0, <5", + "distro>=1.7.0, <2", + "sniffio", "tqdm > 4", "jiter>=0.10.0, <1", ] + requires-python = ">= 3.9" classifiers = [ "Typing :: Typed", @@ -26,6 +28,7 @@ classifiers = [ "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", "Operating System :: OS Independent", "Operating System :: POSIX", "Operating System :: MacOS", @@ -53,7 +56,7 @@ managed = true # version pins are in requirements-dev.lock dev-dependencies = [ "pyright==1.1.399", - "mypy", + "mypy==1.17", "respx", "pytest", "pytest-asyncio", diff --git a/requirements-dev.lock b/requirements-dev.lock index b454537b96..a7201a127b 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -12,65 +12,70 @@ -e file:. aiohappyeyeballs==2.6.1 # via aiohttp -aiohttp==3.12.13 +aiohttp==3.13.2 # via httpx-aiohttp # via openai -aiosignal==1.3.2 +aiosignal==1.4.0 # via aiohttp -annotated-types==0.6.0 +annotated-types==0.7.0 # via pydantic -anyio==4.1.0 +anyio==4.12.0 # via httpx # via openai -argcomplete==3.1.2 +argcomplete==3.6.3 # via nox -asttokens==2.4.1 +asttokens==3.0.1 # via inline-snapshot async-timeout==5.0.1 # via aiohttp -attrs==24.2.0 +attrs==25.4.0 # via aiohttp + # via nox # via outcome # via trio -azure-core==1.31.0 +azure-core==1.36.0 # via azure-identity -azure-identity==1.19.0 -certifi==2023.7.22 +azure-identity==1.25.1 +backports-asyncio-runner==1.2.0 + # via pytest-asyncio +certifi==2025.11.12 # via httpcore # via httpx # via requests -cffi==1.16.0 +cffi==2.0.0 # via cryptography # via sounddevice -charset-normalizer==3.3.2 +charset-normalizer==3.4.4 # via requests colorama==0.4.6 # via griffe -colorlog==6.7.0 +colorlog==6.10.1 # via nox -cryptography==42.0.7 +cryptography==46.0.3 # via azure-identity # via msal # via pyjwt -dirty-equals==0.6.0 -distlib==0.3.7 +dependency-groups==1.3.1 + # via nox +dirty-equals==0.11 +distlib==0.4.0 # via virtualenv -distro==1.8.0 +distro==1.9.0 # via openai -exceptiongroup==1.2.2 +exceptiongroup==1.3.1 # via anyio # via pytest # via trio -execnet==2.1.1 +execnet==2.1.2 # via pytest-xdist -executing==2.2.0 +executing==2.2.1 # via inline-snapshot -filelock==3.12.4 +filelock==3.19.1 # via virtualenv -frozenlist==1.7.0 +frozenlist==1.8.0 # via aiohttp # via aiosignal -griffe==1.13.0 +griffe==1.14.0 h11==0.16.0 # via httpcore httpcore==1.0.9 @@ -81,137 +86,145 @@ httpx==0.28.1 # via respx httpx-aiohttp==0.1.9 # via openai -idna==3.4 +humanize==4.13.0 + # via nox +idna==3.11 # via anyio # via httpx # via requests # via trio # via yarl -importlib-metadata==7.0.0 -iniconfig==2.0.0 +importlib-metadata==8.7.0 +iniconfig==2.1.0 # via pytest -inline-snapshot==0.28.0 -jiter==0.11.0 +inline-snapshot==0.31.1 +jiter==0.12.0 # via openai markdown-it-py==3.0.0 # via rich mdurl==0.1.2 # via markdown-it-py -msal==1.31.0 +msal==1.34.0 # via azure-identity # via msal-extensions -msal-extensions==1.2.0 +msal-extensions==1.3.1 # via azure-identity -multidict==6.5.0 +multidict==6.7.0 # via aiohttp # via yarl -mypy==1.14.1 -mypy-extensions==1.0.0 +mypy==1.17.0 +mypy-extensions==1.1.0 # via mypy nest-asyncio==1.6.0 -nodeenv==1.8.0 +nodeenv==1.9.1 # via pyright -nox==2023.4.22 +nox==2025.11.12 numpy==2.0.2 # via openai # via pandas # via pandas-stubs outcome==1.3.0.post0 # via trio -packaging==23.2 +packaging==25.0 + # via dependency-groups # via nox # via pytest -pandas==2.2.3 +pandas==2.3.3 # via openai -pandas-stubs==2.1.4.231227 +pandas-stubs==2.2.2.240807 # via openai -platformdirs==3.11.0 +pathspec==0.12.1 + # via mypy +platformdirs==4.4.0 # via virtualenv -pluggy==1.5.0 +pluggy==1.6.0 # via pytest -portalocker==2.10.1 - # via msal-extensions -propcache==0.3.2 +propcache==0.4.1 # via aiohttp # via yarl pycparser==2.23 # via cffi -pydantic==2.11.9 +pydantic==2.12.5 # via openai -pydantic-core==2.33.2 +pydantic-core==2.41.5 # via pydantic -pygments==2.18.0 +pygments==2.19.2 # via pytest # via rich -pyjwt==2.8.0 +pyjwt==2.10.1 # via msal pyright==1.1.399 -pytest==8.4.1 +pytest==8.4.2 # via inline-snapshot # via pytest-asyncio # via pytest-xdist -pytest-asyncio==0.24.0 -pytest-xdist==3.7.0 -python-dateutil==2.8.2 +pytest-asyncio==1.2.0 +pytest-xdist==3.8.0 +python-dateutil==2.9.0.post0 # via pandas # via time-machine -pytz==2023.3.post1 - # via dirty-equals +pytz==2025.2 # via pandas -requests==2.31.0 +requests==2.32.5 # via azure-core # via msal respx==0.22.0 -rich==13.7.1 +rich==14.2.0 # via inline-snapshot -ruff==0.9.4 -setuptools==68.2.2 - # via nodeenv -six==1.16.0 - # via asttokens - # via azure-core +ruff==0.14.7 +six==1.17.0 # via python-dateutil -sniffio==1.3.0 - # via anyio +sniffio==1.3.1 # via openai # via trio sortedcontainers==2.4.0 # via trio -sounddevice==0.5.1 +sounddevice==0.5.3 # via openai -time-machine==2.9.0 -tomli==2.0.2 +time-machine==2.19.0 +tomli==2.3.0 + # via dependency-groups # via inline-snapshot # via mypy + # via nox # via pytest -tqdm==4.66.5 +tqdm==4.67.1 # via openai -trio==0.27.0 -types-pyaudio==0.2.16.20240516 -types-pytz==2024.2.0.20241003 +trio==0.31.0 +types-pyaudio==0.2.16.20250801 +types-pytz==2025.2.0.20251108 # via pandas-stubs -types-tqdm==4.66.0.20240417 -typing-extensions==4.12.2 +types-requests==2.32.4.20250913 + # via types-tqdm +types-tqdm==4.67.0.20250809 +typing-extensions==4.15.0 + # via aiosignal + # via anyio # via azure-core # via azure-identity + # via cryptography + # via exceptiongroup # via multidict # via mypy # via openai # via pydantic # via pydantic-core # via pyright + # via pytest-asyncio # via typing-inspection -typing-inspection==0.4.1 + # via virtualenv +typing-inspection==0.4.2 # via pydantic -tzdata==2024.1 +tzdata==2025.2 # via pandas -urllib3==2.2.1 +urllib3==2.5.0 # via requests -virtualenv==20.24.5 + # via types-requests +virtualenv==20.35.4 # via nox websockets==15.0.1 # via openai -yarl==1.20.1 +yarl==1.22.0 # via aiohttp -zipp==3.17.0 +zipp==3.23.0 # via importlib-metadata diff --git a/requirements.lock b/requirements.lock index b047cb3f88..8e021bd69b 100644 --- a/requirements.lock +++ b/requirements.lock @@ -12,30 +12,30 @@ -e file:. aiohappyeyeballs==2.6.1 # via aiohttp -aiohttp==3.12.13 +aiohttp==3.13.2 # via httpx-aiohttp # via openai -aiosignal==1.3.2 +aiosignal==1.4.0 # via aiohttp -annotated-types==0.6.0 +annotated-types==0.7.0 # via pydantic -anyio==4.1.0 +anyio==4.12.0 # via httpx # via openai async-timeout==5.0.1 # via aiohttp -attrs==25.3.0 +attrs==25.4.0 # via aiohttp -certifi==2023.7.22 +certifi==2025.11.12 # via httpcore # via httpx -cffi==1.17.1 +cffi==2.0.0 # via sounddevice -distro==1.8.0 +distro==1.9.0 # via openai -exceptiongroup==1.2.2 +exceptiongroup==1.3.1 # via anyio -frozenlist==1.7.0 +frozenlist==1.8.0 # via aiohttp # via aiosignal h11==0.16.0 @@ -47,58 +47,60 @@ httpx==0.28.1 # via openai httpx-aiohttp==0.1.9 # via openai -idna==3.4 +idna==3.11 # via anyio # via httpx # via yarl -jiter==0.11.0 +jiter==0.12.0 # via openai -multidict==6.5.0 +multidict==6.7.0 # via aiohttp # via yarl numpy==2.0.2 # via openai # via pandas # via pandas-stubs -pandas==2.2.3 +pandas==2.3.3 # via openai pandas-stubs==2.2.2.240807 # via openai -propcache==0.3.2 +propcache==0.4.1 # via aiohttp # via yarl pycparser==2.23 # via cffi -pydantic==2.11.9 +pydantic==2.12.5 # via openai -pydantic-core==2.33.2 +pydantic-core==2.41.5 # via pydantic python-dateutil==2.9.0.post0 # via pandas -pytz==2024.1 +pytz==2025.2 # via pandas -six==1.16.0 +six==1.17.0 # via python-dateutil -sniffio==1.3.0 - # via anyio +sniffio==1.3.1 # via openai -sounddevice==0.5.1 +sounddevice==0.5.3 # via openai -tqdm==4.66.5 +tqdm==4.67.1 # via openai -types-pytz==2024.2.0.20241003 +types-pytz==2025.2.0.20251108 # via pandas-stubs -typing-extensions==4.12.2 +typing-extensions==4.15.0 + # via aiosignal + # via anyio + # via exceptiongroup # via multidict # via openai # via pydantic # via pydantic-core # via typing-inspection -typing-inspection==0.4.1 +typing-inspection==0.4.2 # via pydantic tzdata==2025.2 # via pandas websockets==15.0.1 # via openai -yarl==1.20.1 +yarl==1.22.0 # via aiohttp diff --git a/src/openai/_streaming.py b/src/openai/_streaming.py index 05c284a2be..61a742668a 100644 --- a/src/openai/_streaming.py +++ b/src/openai/_streaming.py @@ -55,49 +55,51 @@ def __stream__(self) -> Iterator[_T]: process_data = self._client._process_response_data iterator = self._iter_events() - for sse in iterator: - if sse.data.startswith("[DONE]"): - break - - # we have to special case the Assistants `thread.` events since we won't have an "event" key in the data - if sse.event and sse.event.startswith("thread."): - data = sse.json() - - if sse.event == "error" and is_mapping(data) and data.get("error"): - message = None - error = data.get("error") - if is_mapping(error): - message = error.get("message") - if not message or not isinstance(message, str): - message = "An error occurred during streaming" - - raise APIError( - message=message, - request=self.response.request, - body=data["error"], - ) - - yield process_data(data={"data": data, "event": sse.event}, cast_to=cast_to, response=response) - else: - data = sse.json() - if is_mapping(data) and data.get("error"): - message = None - error = data.get("error") - if is_mapping(error): - message = error.get("message") - if not message or not isinstance(message, str): - message = "An error occurred during streaming" - - raise APIError( - message=message, - request=self.response.request, - body=data["error"], - ) - - yield process_data(data=data, cast_to=cast_to, response=response) - - # As we might not fully consume the response stream, we need to close it explicitly - response.close() + try: + for sse in iterator: + if sse.data.startswith("[DONE]"): + break + + # we have to special case the Assistants `thread.` events since we won't have an "event" key in the data + if sse.event and sse.event.startswith("thread."): + data = sse.json() + + if sse.event == "error" and is_mapping(data) and data.get("error"): + message = None + error = data.get("error") + if is_mapping(error): + message = error.get("message") + if not message or not isinstance(message, str): + message = "An error occurred during streaming" + + raise APIError( + message=message, + request=self.response.request, + body=data["error"], + ) + + yield process_data(data={"data": data, "event": sse.event}, cast_to=cast_to, response=response) + else: + data = sse.json() + if is_mapping(data) and data.get("error"): + message = None + error = data.get("error") + if is_mapping(error): + message = error.get("message") + if not message or not isinstance(message, str): + message = "An error occurred during streaming" + + raise APIError( + message=message, + request=self.response.request, + body=data["error"], + ) + + yield process_data(data=data, cast_to=cast_to, response=response) + + finally: + # Ensure the response is closed even if the consumer doesn't read all data + response.close() def __enter__(self) -> Self: return self @@ -156,49 +158,51 @@ async def __stream__(self) -> AsyncIterator[_T]: process_data = self._client._process_response_data iterator = self._iter_events() - async for sse in iterator: - if sse.data.startswith("[DONE]"): - break - - # we have to special case the Assistants `thread.` events since we won't have an "event" key in the data - if sse.event and sse.event.startswith("thread."): - data = sse.json() - - if sse.event == "error" and is_mapping(data) and data.get("error"): - message = None - error = data.get("error") - if is_mapping(error): - message = error.get("message") - if not message or not isinstance(message, str): - message = "An error occurred during streaming" - - raise APIError( - message=message, - request=self.response.request, - body=data["error"], - ) - - yield process_data(data={"data": data, "event": sse.event}, cast_to=cast_to, response=response) - else: - data = sse.json() - if is_mapping(data) and data.get("error"): - message = None - error = data.get("error") - if is_mapping(error): - message = error.get("message") - if not message or not isinstance(message, str): - message = "An error occurred during streaming" - - raise APIError( - message=message, - request=self.response.request, - body=data["error"], - ) - - yield process_data(data=data, cast_to=cast_to, response=response) - - # As we might not fully consume the response stream, we need to close it explicitly - await response.aclose() + try: + async for sse in iterator: + if sse.data.startswith("[DONE]"): + break + + # we have to special case the Assistants `thread.` events since we won't have an "event" key in the data + if sse.event and sse.event.startswith("thread."): + data = sse.json() + + if sse.event == "error" and is_mapping(data) and data.get("error"): + message = None + error = data.get("error") + if is_mapping(error): + message = error.get("message") + if not message or not isinstance(message, str): + message = "An error occurred during streaming" + + raise APIError( + message=message, + request=self.response.request, + body=data["error"], + ) + + yield process_data(data={"data": data, "event": sse.event}, cast_to=cast_to, response=response) + else: + data = sse.json() + if is_mapping(data) and data.get("error"): + message = None + error = data.get("error") + if is_mapping(error): + message = error.get("message") + if not message or not isinstance(message, str): + message = "An error occurred during streaming" + + raise APIError( + message=message, + request=self.response.request, + body=data["error"], + ) + + yield process_data(data=data, cast_to=cast_to, response=response) + + finally: + # Ensure the response is closed even if the consumer doesn't read all data + await response.aclose() async def __aenter__(self) -> Self: return self diff --git a/src/openai/_version.py b/src/openai/_version.py index 6109cebf91..e5ddb8f4eb 100644 --- a/src/openai/_version.py +++ b/src/openai/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. __title__ = "openai" -__version__ = "2.8.1" # x-release-please-version +__version__ = "2.9.0" # x-release-please-version diff --git a/src/openai/lib/_parsing/_responses.py b/src/openai/lib/_parsing/_responses.py index 4d7b0b6224..4bed171df7 100644 --- a/src/openai/lib/_parsing/_responses.py +++ b/src/openai/lib/_parsing/_responses.py @@ -103,6 +103,7 @@ def parse_response( or output.type == "file_search_call" or output.type == "web_search_call" or output.type == "reasoning" + or output.type == "compaction" or output.type == "mcp_call" or output.type == "mcp_approval_request" or output.type == "image_generation_call" diff --git a/src/openai/resources/beta/assistants.py b/src/openai/resources/beta/assistants.py index e4ec1dca11..aa1f9f9b48 100644 --- a/src/openai/resources/beta/assistants.py +++ b/src/openai/resources/beta/assistants.py @@ -98,9 +98,9 @@ def create( reasoning_effort: Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -108,6 +108,7 @@ def create( - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. response_format: Specifies the format that the model must output. Compatible with [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), @@ -312,9 +313,9 @@ def update( reasoning_effort: Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -322,6 +323,7 @@ def update( - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. response_format: Specifies the format that the model must output. Compatible with [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), @@ -565,9 +567,9 @@ async def create( reasoning_effort: Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -575,6 +577,7 @@ async def create( - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. response_format: Specifies the format that the model must output. Compatible with [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), @@ -779,9 +782,9 @@ async def update( reasoning_effort: Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -789,6 +792,7 @@ async def update( - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. response_format: Specifies the format that the model must output. Compatible with [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), diff --git a/src/openai/resources/beta/threads/runs/runs.py b/src/openai/resources/beta/threads/runs/runs.py index d7445d52b5..9b6cb3f752 100644 --- a/src/openai/resources/beta/threads/runs/runs.py +++ b/src/openai/resources/beta/threads/runs/runs.py @@ -169,9 +169,9 @@ def create( reasoning_effort: Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -179,6 +179,7 @@ def create( - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. response_format: Specifies the format that the model must output. Compatible with [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), @@ -330,9 +331,9 @@ def create( reasoning_effort: Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -340,6 +341,7 @@ def create( - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. response_format: Specifies the format that the model must output. Compatible with [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), @@ -487,9 +489,9 @@ def create( reasoning_effort: Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -497,6 +499,7 @@ def create( - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. response_format: Specifies the format that the model must output. Compatible with [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), @@ -1620,9 +1623,9 @@ async def create( reasoning_effort: Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -1630,6 +1633,7 @@ async def create( - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. response_format: Specifies the format that the model must output. Compatible with [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), @@ -1781,9 +1785,9 @@ async def create( reasoning_effort: Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -1791,6 +1795,7 @@ async def create( - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. response_format: Specifies the format that the model must output. Compatible with [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), @@ -1938,9 +1943,9 @@ async def create( reasoning_effort: Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -1948,6 +1953,7 @@ async def create( - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. response_format: Specifies the format that the model must output. Compatible with [GPT-4o](https://platform.openai.com/docs/models#gpt-4o), diff --git a/src/openai/resources/chat/completions/completions.py b/src/openai/resources/chat/completions/completions.py index c205011d10..3f2732a608 100644 --- a/src/openai/resources/chat/completions/completions.py +++ b/src/openai/resources/chat/completions/completions.py @@ -411,9 +411,9 @@ def create( reasoning_effort: Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -421,6 +421,7 @@ def create( - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. response_format: An object specifying the format that the model must output. @@ -721,9 +722,9 @@ def create( reasoning_effort: Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -731,6 +732,7 @@ def create( - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. response_format: An object specifying the format that the model must output. @@ -1022,9 +1024,9 @@ def create( reasoning_effort: Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -1032,6 +1034,7 @@ def create( - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. response_format: An object specifying the format that the model must output. @@ -1894,9 +1897,9 @@ async def create( reasoning_effort: Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -1904,6 +1907,7 @@ async def create( - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. response_format: An object specifying the format that the model must output. @@ -2204,9 +2208,9 @@ async def create( reasoning_effort: Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -2214,6 +2218,7 @@ async def create( - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. response_format: An object specifying the format that the model must output. @@ -2505,9 +2510,9 @@ async def create( reasoning_effort: Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -2515,6 +2520,7 @@ async def create( - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. response_format: An object specifying the format that the model must output. diff --git a/src/openai/resources/containers/containers.py b/src/openai/resources/containers/containers.py index dcdc3e1a3e..0cbb400d4a 100644 --- a/src/openai/resources/containers/containers.py +++ b/src/openai/resources/containers/containers.py @@ -60,6 +60,7 @@ def create( name: str, expires_after: container_create_params.ExpiresAfter | Omit = omit, file_ids: SequenceNotStr[str] | Omit = omit, + memory_limit: Literal["1g", "4g", "16g", "64g"] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -77,6 +78,8 @@ def create( file_ids: IDs of files to copy to the container. + memory_limit: Optional memory limit for the container. Defaults to "1g". + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -92,6 +95,7 @@ def create( "name": name, "expires_after": expires_after, "file_ids": file_ids, + "memory_limit": memory_limit, }, container_create_params.ContainerCreateParams, ), @@ -256,6 +260,7 @@ async def create( name: str, expires_after: container_create_params.ExpiresAfter | Omit = omit, file_ids: SequenceNotStr[str] | Omit = omit, + memory_limit: Literal["1g", "4g", "16g", "64g"] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -273,6 +278,8 @@ async def create( file_ids: IDs of files to copy to the container. + memory_limit: Optional memory limit for the container. Defaults to "1g". + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -288,6 +295,7 @@ async def create( "name": name, "expires_after": expires_after, "file_ids": file_ids, + "memory_limit": memory_limit, }, container_create_params.ContainerCreateParams, ), diff --git a/src/openai/resources/realtime/calls.py b/src/openai/resources/realtime/calls.py index 7d2c92fe86..cdea492d95 100644 --- a/src/openai/resources/realtime/calls.py +++ b/src/openai/resources/realtime/calls.py @@ -199,15 +199,20 @@ def accept( limit, the conversation be truncated, meaning messages (starting from the oldest) will not be included in the model's context. A 32k context model with 4,096 max output tokens can only include 28,224 tokens in the context before - truncation occurs. Clients can configure truncation behavior to truncate with a - lower max token limit, which is an effective way to control token usage and - cost. Truncation will reduce the number of cached tokens on the next turn - (busting the cache), since messages are dropped from the beginning of the - context. However, clients can also configure truncation to retain messages up to - a fraction of the maximum context size, which will reduce the need for future - truncations and thus improve the cache rate. Truncation can be disabled - entirely, which means the server will never truncate but would instead return an - error if the conversation exceeds the model's input token limit. + truncation occurs. + + Clients can configure truncation behavior to truncate with a lower max token + limit, which is an effective way to control token usage and cost. + + Truncation will reduce the number of cached tokens on the next turn (busting the + cache), since messages are dropped from the beginning of the context. However, + clients can also configure truncation to retain messages up to a fraction of the + maximum context size, which will reduce the need for future truncations and thus + improve the cache rate. + + Truncation can be disabled entirely, which means the server will never truncate + but would instead return an error if the conversation exceeds the model's input + token limit. extra_headers: Send extra headers @@ -519,15 +524,20 @@ async def accept( limit, the conversation be truncated, meaning messages (starting from the oldest) will not be included in the model's context. A 32k context model with 4,096 max output tokens can only include 28,224 tokens in the context before - truncation occurs. Clients can configure truncation behavior to truncate with a - lower max token limit, which is an effective way to control token usage and - cost. Truncation will reduce the number of cached tokens on the next turn - (busting the cache), since messages are dropped from the beginning of the - context. However, clients can also configure truncation to retain messages up to - a fraction of the maximum context size, which will reduce the need for future - truncations and thus improve the cache rate. Truncation can be disabled - entirely, which means the server will never truncate but would instead return an - error if the conversation exceeds the model's input token limit. + truncation occurs. + + Clients can configure truncation behavior to truncate with a lower max token + limit, which is an effective way to control token usage and cost. + + Truncation will reduce the number of cached tokens on the next turn (busting the + cache), since messages are dropped from the beginning of the context. However, + clients can also configure truncation to retain messages up to a fraction of the + maximum context size, which will reduce the need for future truncations and thus + improve the cache rate. + + Truncation can be disabled entirely, which means the server will never truncate + but would instead return an error if the conversation exceeds the model's input + token limit. extra_headers: Send extra headers diff --git a/src/openai/resources/realtime/realtime.py b/src/openai/resources/realtime/realtime.py index 6e69258616..33caba1871 100644 --- a/src/openai/resources/realtime/realtime.py +++ b/src/openai/resources/realtime/realtime.py @@ -829,7 +829,7 @@ def retrieve(self, *, item_id: str, event_id: str | Omit = omit) -> None: class RealtimeOutputAudioBufferResource(BaseRealtimeConnectionResource): def clear(self, *, event_id: str | Omit = omit) -> None: - """**WebRTC Only:** Emit to cut off the current audio response. + """**WebRTC/SIP Only:** Emit to cut off the current audio response. This will trigger the server to stop generating audio and emit a `output_audio_buffer.cleared` event. This @@ -1066,7 +1066,7 @@ async def retrieve(self, *, item_id: str, event_id: str | Omit = omit) -> None: class AsyncRealtimeOutputAudioBufferResource(BaseAsyncRealtimeConnectionResource): async def clear(self, *, event_id: str | Omit = omit) -> None: - """**WebRTC Only:** Emit to cut off the current audio response. + """**WebRTC/SIP Only:** Emit to cut off the current audio response. This will trigger the server to stop generating audio and emit a `output_audio_buffer.cleared` event. This diff --git a/src/openai/resources/responses/responses.py b/src/openai/resources/responses/responses.py index dcf87ba07c..c532fc0bb0 100644 --- a/src/openai/resources/responses/responses.py +++ b/src/openai/resources/responses/responses.py @@ -2,6 +2,7 @@ from __future__ import annotations +from copy import copy from typing import Any, List, Type, Union, Iterable, Optional, cast from functools import partial from typing_extensions import Literal, overload @@ -33,7 +34,11 @@ AsyncInputTokensWithStreamingResponse, ) from ..._base_client import make_request_options -from ...types.responses import response_create_params, response_retrieve_params +from ...types.responses import ( + response_create_params, + response_compact_params, + response_retrieve_params, +) from ...lib._parsing._responses import ( TextFormatT, parse_response, @@ -45,11 +50,13 @@ from ...types.shared_params.reasoning import Reasoning from ...types.responses.parsed_response import ParsedResponse from ...lib.streaming.responses._responses import ResponseStreamManager, AsyncResponseStreamManager +from ...types.responses.compacted_response import CompactedResponse from ...types.responses.response_includable import ResponseIncludable from ...types.shared_params.responses_model import ResponsesModel from ...types.responses.response_input_param import ResponseInputParam from ...types.responses.response_prompt_param import ResponsePromptParam from ...types.responses.response_stream_event import ResponseStreamEvent +from ...types.responses.response_input_item_param import ResponseInputItemParam from ...types.responses.response_text_config_param import ResponseTextConfigParam __all__ = ["Responses", "AsyncResponses"] @@ -1046,6 +1053,7 @@ def stream( if "format" in text: raise TypeError("Cannot mix and match text.format with text_format") + text = copy(text) text["format"] = _type_to_text_format_param(text_format) api_request: partial[Stream[ResponseStreamEvent]] = partial( @@ -1151,7 +1159,7 @@ def parse( if "format" in text: raise TypeError("Cannot mix and match text.format with text_format") - + text = copy(text) text["format"] = _type_to_text_format_param(text_format) tools = _make_tools(tools) @@ -1515,6 +1523,154 @@ def cancel( cast_to=Response, ) + def compact( + self, + *, + input: Union[str, Iterable[ResponseInputItemParam], None] | Omit = omit, + instructions: Optional[str] | Omit = omit, + model: Union[ + Literal[ + "gpt-5.1", + "gpt-5.1-2025-11-13", + "gpt-5.1-codex", + "gpt-5.1-mini", + "gpt-5.1-chat-latest", + "gpt-5", + "gpt-5-mini", + "gpt-5-nano", + "gpt-5-2025-08-07", + "gpt-5-mini-2025-08-07", + "gpt-5-nano-2025-08-07", + "gpt-5-chat-latest", + "gpt-4.1", + "gpt-4.1-mini", + "gpt-4.1-nano", + "gpt-4.1-2025-04-14", + "gpt-4.1-mini-2025-04-14", + "gpt-4.1-nano-2025-04-14", + "o4-mini", + "o4-mini-2025-04-16", + "o3", + "o3-2025-04-16", + "o3-mini", + "o3-mini-2025-01-31", + "o1", + "o1-2024-12-17", + "o1-preview", + "o1-preview-2024-09-12", + "o1-mini", + "o1-mini-2024-09-12", + "gpt-4o", + "gpt-4o-2024-11-20", + "gpt-4o-2024-08-06", + "gpt-4o-2024-05-13", + "gpt-4o-audio-preview", + "gpt-4o-audio-preview-2024-10-01", + "gpt-4o-audio-preview-2024-12-17", + "gpt-4o-audio-preview-2025-06-03", + "gpt-4o-mini-audio-preview", + "gpt-4o-mini-audio-preview-2024-12-17", + "gpt-4o-search-preview", + "gpt-4o-mini-search-preview", + "gpt-4o-search-preview-2025-03-11", + "gpt-4o-mini-search-preview-2025-03-11", + "chatgpt-4o-latest", + "codex-mini-latest", + "gpt-4o-mini", + "gpt-4o-mini-2024-07-18", + "gpt-4-turbo", + "gpt-4-turbo-2024-04-09", + "gpt-4-0125-preview", + "gpt-4-turbo-preview", + "gpt-4-1106-preview", + "gpt-4-vision-preview", + "gpt-4", + "gpt-4-0314", + "gpt-4-0613", + "gpt-4-32k", + "gpt-4-32k-0314", + "gpt-4-32k-0613", + "gpt-3.5-turbo", + "gpt-3.5-turbo-16k", + "gpt-3.5-turbo-0301", + "gpt-3.5-turbo-0613", + "gpt-3.5-turbo-1106", + "gpt-3.5-turbo-0125", + "gpt-3.5-turbo-16k-0613", + "o1-pro", + "o1-pro-2025-03-19", + "o3-pro", + "o3-pro-2025-06-10", + "o3-deep-research", + "o3-deep-research-2025-06-26", + "o4-mini-deep-research", + "o4-mini-deep-research-2025-06-26", + "computer-use-preview", + "computer-use-preview-2025-03-11", + "gpt-5-codex", + "gpt-5-pro", + "gpt-5-pro-2025-10-06", + "gpt-5.1-codex-max", + ], + str, + None, + ] + | Omit = omit, + previous_response_id: Optional[str] | Omit = omit, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> CompactedResponse: + """ + Compact conversation + + Args: + input: Text, image, or file inputs to the model, used to generate a response + + instructions: A system (or developer) message inserted into the model's context. When used + along with `previous_response_id`, the instructions from a previous response + will not be carried over to the next response. This makes it simple to swap out + system (or developer) messages in new responses. + + model: Model ID used to generate the response, like `gpt-5` or `o3`. OpenAI offers a + wide range of models with different capabilities, performance characteristics, + and price points. Refer to the + [model guide](https://platform.openai.com/docs/models) to browse and compare + available models. + + previous_response_id: The unique ID of the previous response to the model. Use this to create + multi-turn conversations. Learn more about + [conversation state](https://platform.openai.com/docs/guides/conversation-state). + Cannot be used in conjunction with `conversation`. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return self._post( + "/responses/compact", + body=maybe_transform( + { + "input": input, + "instructions": instructions, + "model": model, + "previous_response_id": previous_response_id, + }, + response_compact_params.ResponseCompactParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=CompactedResponse, + ) + class AsyncResponses(AsyncAPIResource): @cached_property @@ -2507,7 +2663,7 @@ def stream( if "format" in text: raise TypeError("Cannot mix and match text.format with text_format") - + text = copy(text) text["format"] = _type_to_text_format_param(text_format) api_request = self.create( @@ -2617,7 +2773,7 @@ async def parse( if "format" in text: raise TypeError("Cannot mix and match text.format with text_format") - + text = copy(text) text["format"] = _type_to_text_format_param(text_format) tools = _make_tools(tools) @@ -2981,6 +3137,154 @@ async def cancel( cast_to=Response, ) + async def compact( + self, + *, + input: Union[str, Iterable[ResponseInputItemParam], None] | Omit = omit, + instructions: Optional[str] | Omit = omit, + model: Union[ + Literal[ + "gpt-5.1", + "gpt-5.1-2025-11-13", + "gpt-5.1-codex", + "gpt-5.1-mini", + "gpt-5.1-chat-latest", + "gpt-5", + "gpt-5-mini", + "gpt-5-nano", + "gpt-5-2025-08-07", + "gpt-5-mini-2025-08-07", + "gpt-5-nano-2025-08-07", + "gpt-5-chat-latest", + "gpt-4.1", + "gpt-4.1-mini", + "gpt-4.1-nano", + "gpt-4.1-2025-04-14", + "gpt-4.1-mini-2025-04-14", + "gpt-4.1-nano-2025-04-14", + "o4-mini", + "o4-mini-2025-04-16", + "o3", + "o3-2025-04-16", + "o3-mini", + "o3-mini-2025-01-31", + "o1", + "o1-2024-12-17", + "o1-preview", + "o1-preview-2024-09-12", + "o1-mini", + "o1-mini-2024-09-12", + "gpt-4o", + "gpt-4o-2024-11-20", + "gpt-4o-2024-08-06", + "gpt-4o-2024-05-13", + "gpt-4o-audio-preview", + "gpt-4o-audio-preview-2024-10-01", + "gpt-4o-audio-preview-2024-12-17", + "gpt-4o-audio-preview-2025-06-03", + "gpt-4o-mini-audio-preview", + "gpt-4o-mini-audio-preview-2024-12-17", + "gpt-4o-search-preview", + "gpt-4o-mini-search-preview", + "gpt-4o-search-preview-2025-03-11", + "gpt-4o-mini-search-preview-2025-03-11", + "chatgpt-4o-latest", + "codex-mini-latest", + "gpt-4o-mini", + "gpt-4o-mini-2024-07-18", + "gpt-4-turbo", + "gpt-4-turbo-2024-04-09", + "gpt-4-0125-preview", + "gpt-4-turbo-preview", + "gpt-4-1106-preview", + "gpt-4-vision-preview", + "gpt-4", + "gpt-4-0314", + "gpt-4-0613", + "gpt-4-32k", + "gpt-4-32k-0314", + "gpt-4-32k-0613", + "gpt-3.5-turbo", + "gpt-3.5-turbo-16k", + "gpt-3.5-turbo-0301", + "gpt-3.5-turbo-0613", + "gpt-3.5-turbo-1106", + "gpt-3.5-turbo-0125", + "gpt-3.5-turbo-16k-0613", + "o1-pro", + "o1-pro-2025-03-19", + "o3-pro", + "o3-pro-2025-06-10", + "o3-deep-research", + "o3-deep-research-2025-06-26", + "o4-mini-deep-research", + "o4-mini-deep-research-2025-06-26", + "computer-use-preview", + "computer-use-preview-2025-03-11", + "gpt-5-codex", + "gpt-5-pro", + "gpt-5-pro-2025-10-06", + "gpt-5.1-codex-max", + ], + str, + None, + ] + | Omit = omit, + previous_response_id: Optional[str] | Omit = omit, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> CompactedResponse: + """ + Compact conversation + + Args: + input: Text, image, or file inputs to the model, used to generate a response + + instructions: A system (or developer) message inserted into the model's context. When used + along with `previous_response_id`, the instructions from a previous response + will not be carried over to the next response. This makes it simple to swap out + system (or developer) messages in new responses. + + model: Model ID used to generate the response, like `gpt-5` or `o3`. OpenAI offers a + wide range of models with different capabilities, performance characteristics, + and price points. Refer to the + [model guide](https://platform.openai.com/docs/models) to browse and compare + available models. + + previous_response_id: The unique ID of the previous response to the model. Use this to create + multi-turn conversations. Learn more about + [conversation state](https://platform.openai.com/docs/guides/conversation-state). + Cannot be used in conjunction with `conversation`. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return await self._post( + "/responses/compact", + body=await async_maybe_transform( + { + "input": input, + "instructions": instructions, + "model": model, + "previous_response_id": previous_response_id, + }, + response_compact_params.ResponseCompactParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=CompactedResponse, + ) + class ResponsesWithRawResponse: def __init__(self, responses: Responses) -> None: @@ -2998,6 +3302,9 @@ def __init__(self, responses: Responses) -> None: self.cancel = _legacy_response.to_raw_response_wrapper( responses.cancel, ) + self.compact = _legacy_response.to_raw_response_wrapper( + responses.compact, + ) self.parse = _legacy_response.to_raw_response_wrapper( responses.parse, ) @@ -3027,6 +3334,9 @@ def __init__(self, responses: AsyncResponses) -> None: self.cancel = _legacy_response.async_to_raw_response_wrapper( responses.cancel, ) + self.compact = _legacy_response.async_to_raw_response_wrapper( + responses.compact, + ) self.parse = _legacy_response.async_to_raw_response_wrapper( responses.parse, ) @@ -3056,6 +3366,9 @@ def __init__(self, responses: Responses) -> None: self.cancel = to_streamed_response_wrapper( responses.cancel, ) + self.compact = to_streamed_response_wrapper( + responses.compact, + ) @cached_property def input_items(self) -> InputItemsWithStreamingResponse: @@ -3082,6 +3395,9 @@ def __init__(self, responses: AsyncResponses) -> None: self.cancel = async_to_streamed_response_wrapper( responses.cancel, ) + self.compact = async_to_streamed_response_wrapper( + responses.compact, + ) @cached_property def input_items(self) -> AsyncInputItemsWithStreamingResponse: diff --git a/src/openai/resources/videos.py b/src/openai/resources/videos.py index 4df5f02004..727091c607 100644 --- a/src/openai/resources/videos.py +++ b/src/openai/resources/videos.py @@ -84,11 +84,13 @@ def create( input_reference: Optional image reference that guides generation. - model: The video generation model to use. Defaults to `sora-2`. + model: The video generation model to use (allowed values: sora-2, sora-2-pro). Defaults + to `sora-2`. - seconds: Clip duration in seconds. Defaults to 4 seconds. + seconds: Clip duration in seconds (allowed values: 4, 8, 12). Defaults to 4 seconds. - size: Output resolution formatted as width x height. Defaults to 720x1280. + size: Output resolution formatted as width x height (allowed values: 720x1280, + 1280x720, 1024x1792, 1792x1024). Defaults to 720x1280. extra_headers: Send extra headers @@ -437,11 +439,13 @@ async def create( input_reference: Optional image reference that guides generation. - model: The video generation model to use. Defaults to `sora-2`. + model: The video generation model to use (allowed values: sora-2, sora-2-pro). Defaults + to `sora-2`. - seconds: Clip duration in seconds. Defaults to 4 seconds. + seconds: Clip duration in seconds (allowed values: 4, 8, 12). Defaults to 4 seconds. - size: Output resolution formatted as width x height. Defaults to 720x1280. + size: Output resolution formatted as width x height (allowed values: 720x1280, + 1280x720, 1024x1792, 1792x1024). Defaults to 720x1280. extra_headers: Send extra headers diff --git a/src/openai/types/beta/assistant_create_params.py b/src/openai/types/beta/assistant_create_params.py index 009b0f49e3..38b30f212f 100644 --- a/src/openai/types/beta/assistant_create_params.py +++ b/src/openai/types/beta/assistant_create_params.py @@ -62,9 +62,9 @@ class AssistantCreateParams(TypedDict, total=False): """ Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -72,6 +72,7 @@ class AssistantCreateParams(TypedDict, total=False): - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. """ response_format: Optional[AssistantResponseFormatOptionParam] diff --git a/src/openai/types/beta/assistant_update_params.py b/src/openai/types/beta/assistant_update_params.py index 432116ad52..8f774c4e6c 100644 --- a/src/openai/types/beta/assistant_update_params.py +++ b/src/openai/types/beta/assistant_update_params.py @@ -97,9 +97,9 @@ class AssistantUpdateParams(TypedDict, total=False): """ Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -107,6 +107,7 @@ class AssistantUpdateParams(TypedDict, total=False): - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. """ response_format: Optional[AssistantResponseFormatOptionParam] diff --git a/src/openai/types/beta/threads/run_create_params.py b/src/openai/types/beta/threads/run_create_params.py index 74786d7d5c..df789decbc 100644 --- a/src/openai/types/beta/threads/run_create_params.py +++ b/src/openai/types/beta/threads/run_create_params.py @@ -111,9 +111,9 @@ class RunCreateParamsBase(TypedDict, total=False): """ Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -121,6 +121,7 @@ class RunCreateParamsBase(TypedDict, total=False): - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. """ response_format: Optional[AssistantResponseFormatOptionParam] diff --git a/src/openai/types/chat/completion_create_params.py b/src/openai/types/chat/completion_create_params.py index e02c06cbb0..f2d55f7ec4 100644 --- a/src/openai/types/chat/completion_create_params.py +++ b/src/openai/types/chat/completion_create_params.py @@ -197,9 +197,9 @@ class CompletionCreateParamsBase(TypedDict, total=False): """ Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -207,6 +207,7 @@ class CompletionCreateParamsBase(TypedDict, total=False): - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. """ response_format: ResponseFormat diff --git a/src/openai/types/container_create_params.py b/src/openai/types/container_create_params.py index 01a48ac410..d629c24d38 100644 --- a/src/openai/types/container_create_params.py +++ b/src/openai/types/container_create_params.py @@ -19,6 +19,9 @@ class ContainerCreateParams(TypedDict, total=False): file_ids: SequenceNotStr[str] """IDs of files to copy to the container.""" + memory_limit: Literal["1g", "4g", "16g", "64g"] + """Optional memory limit for the container. Defaults to "1g".""" + class ExpiresAfter(TypedDict, total=False): anchor: Required[Literal["last_active_at"]] diff --git a/src/openai/types/container_create_response.py b/src/openai/types/container_create_response.py index c0ccc45a1c..cbad914283 100644 --- a/src/openai/types/container_create_response.py +++ b/src/openai/types/container_create_response.py @@ -38,3 +38,9 @@ class ContainerCreateResponse(BaseModel): point for the expiration. The minutes is the number of minutes after the anchor before the container expires. """ + + last_active_at: Optional[int] = None + """Unix timestamp (in seconds) when the container was last active.""" + + memory_limit: Optional[Literal["1g", "4g", "16g", "64g"]] = None + """The memory limit configured for the container.""" diff --git a/src/openai/types/container_list_response.py b/src/openai/types/container_list_response.py index 2d9c11d8a4..29416f0941 100644 --- a/src/openai/types/container_list_response.py +++ b/src/openai/types/container_list_response.py @@ -38,3 +38,9 @@ class ContainerListResponse(BaseModel): point for the expiration. The minutes is the number of minutes after the anchor before the container expires. """ + + last_active_at: Optional[int] = None + """Unix timestamp (in seconds) when the container was last active.""" + + memory_limit: Optional[Literal["1g", "4g", "16g", "64g"]] = None + """The memory limit configured for the container.""" diff --git a/src/openai/types/container_retrieve_response.py b/src/openai/types/container_retrieve_response.py index eab291b34f..31fedeac64 100644 --- a/src/openai/types/container_retrieve_response.py +++ b/src/openai/types/container_retrieve_response.py @@ -38,3 +38,9 @@ class ContainerRetrieveResponse(BaseModel): point for the expiration. The minutes is the number of minutes after the anchor before the container expires. """ + + last_active_at: Optional[int] = None + """Unix timestamp (in seconds) when the container was last active.""" + + memory_limit: Optional[Literal["1g", "4g", "16g", "64g"]] = None + """The memory limit configured for the container.""" diff --git a/src/openai/types/evals/create_eval_completions_run_data_source.py b/src/openai/types/evals/create_eval_completions_run_data_source.py index 742c27a775..4236746a17 100644 --- a/src/openai/types/evals/create_eval_completions_run_data_source.py +++ b/src/openai/types/evals/create_eval_completions_run_data_source.py @@ -172,9 +172,9 @@ class SamplingParams(BaseModel): """ Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -182,6 +182,7 @@ class SamplingParams(BaseModel): - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. """ response_format: Optional[SamplingParamsResponseFormat] = None diff --git a/src/openai/types/evals/create_eval_completions_run_data_source_param.py b/src/openai/types/evals/create_eval_completions_run_data_source_param.py index 18cd5018b1..751a1432b8 100644 --- a/src/openai/types/evals/create_eval_completions_run_data_source_param.py +++ b/src/openai/types/evals/create_eval_completions_run_data_source_param.py @@ -168,9 +168,9 @@ class SamplingParams(TypedDict, total=False): """ Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -178,6 +178,7 @@ class SamplingParams(TypedDict, total=False): - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. """ response_format: SamplingParamsResponseFormat diff --git a/src/openai/types/evals/run_cancel_response.py b/src/openai/types/evals/run_cancel_response.py index b18598b20e..f7fb0ec4ad 100644 --- a/src/openai/types/evals/run_cancel_response.py +++ b/src/openai/types/evals/run_cancel_response.py @@ -103,9 +103,9 @@ class DataSourceResponsesSourceResponses(BaseModel): """ Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -113,6 +113,7 @@ class DataSourceResponsesSourceResponses(BaseModel): - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. """ temperature: Optional[float] = None @@ -245,9 +246,9 @@ class DataSourceResponsesSamplingParams(BaseModel): """ Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -255,6 +256,7 @@ class DataSourceResponsesSamplingParams(BaseModel): - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. """ seed: Optional[int] = None diff --git a/src/openai/types/evals/run_create_params.py b/src/openai/types/evals/run_create_params.py index a50433f06d..a70d1923e5 100644 --- a/src/openai/types/evals/run_create_params.py +++ b/src/openai/types/evals/run_create_params.py @@ -116,9 +116,9 @@ class DataSourceCreateEvalResponsesRunDataSourceSourceResponses(TypedDict, total """ Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -126,6 +126,7 @@ class DataSourceCreateEvalResponsesRunDataSourceSourceResponses(TypedDict, total - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. """ temperature: Optional[float] @@ -263,9 +264,9 @@ class DataSourceCreateEvalResponsesRunDataSourceSamplingParams(TypedDict, total= """ Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -273,6 +274,7 @@ class DataSourceCreateEvalResponsesRunDataSourceSamplingParams(TypedDict, total= - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. """ seed: int diff --git a/src/openai/types/evals/run_create_response.py b/src/openai/types/evals/run_create_response.py index 41dac615c7..fb2220b3a1 100644 --- a/src/openai/types/evals/run_create_response.py +++ b/src/openai/types/evals/run_create_response.py @@ -103,9 +103,9 @@ class DataSourceResponsesSourceResponses(BaseModel): """ Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -113,6 +113,7 @@ class DataSourceResponsesSourceResponses(BaseModel): - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. """ temperature: Optional[float] = None @@ -245,9 +246,9 @@ class DataSourceResponsesSamplingParams(BaseModel): """ Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -255,6 +256,7 @@ class DataSourceResponsesSamplingParams(BaseModel): - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. """ seed: Optional[int] = None diff --git a/src/openai/types/evals/run_list_response.py b/src/openai/types/evals/run_list_response.py index 61bff95447..adac4ffdc8 100644 --- a/src/openai/types/evals/run_list_response.py +++ b/src/openai/types/evals/run_list_response.py @@ -103,9 +103,9 @@ class DataSourceResponsesSourceResponses(BaseModel): """ Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -113,6 +113,7 @@ class DataSourceResponsesSourceResponses(BaseModel): - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. """ temperature: Optional[float] = None @@ -245,9 +246,9 @@ class DataSourceResponsesSamplingParams(BaseModel): """ Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -255,6 +256,7 @@ class DataSourceResponsesSamplingParams(BaseModel): - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. """ seed: Optional[int] = None diff --git a/src/openai/types/evals/run_retrieve_response.py b/src/openai/types/evals/run_retrieve_response.py index 651d7423a9..abdc5ebae5 100644 --- a/src/openai/types/evals/run_retrieve_response.py +++ b/src/openai/types/evals/run_retrieve_response.py @@ -103,9 +103,9 @@ class DataSourceResponsesSourceResponses(BaseModel): """ Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -113,6 +113,7 @@ class DataSourceResponsesSourceResponses(BaseModel): - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. """ temperature: Optional[float] = None @@ -245,9 +246,9 @@ class DataSourceResponsesSamplingParams(BaseModel): """ Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -255,6 +256,7 @@ class DataSourceResponsesSamplingParams(BaseModel): - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. """ seed: Optional[int] = None diff --git a/src/openai/types/graders/score_model_grader.py b/src/openai/types/graders/score_model_grader.py index 84686a9642..b3ba6758bb 100644 --- a/src/openai/types/graders/score_model_grader.py +++ b/src/openai/types/graders/score_model_grader.py @@ -67,9 +67,9 @@ class SamplingParams(BaseModel): """ Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -77,6 +77,7 @@ class SamplingParams(BaseModel): - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. """ seed: Optional[int] = None diff --git a/src/openai/types/graders/score_model_grader_param.py b/src/openai/types/graders/score_model_grader_param.py index aec7a95ad4..eb1f6e03ac 100644 --- a/src/openai/types/graders/score_model_grader_param.py +++ b/src/openai/types/graders/score_model_grader_param.py @@ -73,9 +73,9 @@ class SamplingParams(TypedDict, total=False): """ Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -83,6 +83,7 @@ class SamplingParams(TypedDict, total=False): - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. """ seed: Optional[int] diff --git a/src/openai/types/realtime/__init__.py b/src/openai/types/realtime/__init__.py index 83e81a034a..c2a141d727 100644 --- a/src/openai/types/realtime/__init__.py +++ b/src/openai/types/realtime/__init__.py @@ -175,6 +175,9 @@ from .response_function_call_arguments_done_event import ( ResponseFunctionCallArgumentsDoneEvent as ResponseFunctionCallArgumentsDoneEvent, ) +from .input_audio_buffer_dtmf_event_received_event import ( + InputAudioBufferDtmfEventReceivedEvent as InputAudioBufferDtmfEventReceivedEvent, +) from .realtime_conversation_item_assistant_message import ( RealtimeConversationItemAssistantMessage as RealtimeConversationItemAssistantMessage, ) diff --git a/src/openai/types/realtime/call_accept_params.py b/src/openai/types/realtime/call_accept_params.py index d6fc92b8e5..917b71cb0d 100644 --- a/src/openai/types/realtime/call_accept_params.py +++ b/src/openai/types/realtime/call_accept_params.py @@ -110,13 +110,18 @@ class CallAcceptParams(TypedDict, total=False): limit, the conversation be truncated, meaning messages (starting from the oldest) will not be included in the model's context. A 32k context model with 4,096 max output tokens can only include 28,224 tokens in the context before - truncation occurs. Clients can configure truncation behavior to truncate with a - lower max token limit, which is an effective way to control token usage and - cost. Truncation will reduce the number of cached tokens on the next turn - (busting the cache), since messages are dropped from the beginning of the - context. However, clients can also configure truncation to retain messages up to - a fraction of the maximum context size, which will reduce the need for future - truncations and thus improve the cache rate. Truncation can be disabled - entirely, which means the server will never truncate but would instead return an - error if the conversation exceeds the model's input token limit. + truncation occurs. + + Clients can configure truncation behavior to truncate with a lower max token + limit, which is an effective way to control token usage and cost. + + Truncation will reduce the number of cached tokens on the next turn (busting the + cache), since messages are dropped from the beginning of the context. However, + clients can also configure truncation to retain messages up to a fraction of the + maximum context size, which will reduce the need for future truncations and thus + improve the cache rate. + + Truncation can be disabled entirely, which means the server will never truncate + but would instead return an error if the conversation exceeds the model's input + token limit. """ diff --git a/src/openai/types/realtime/input_audio_buffer_dtmf_event_received_event.py b/src/openai/types/realtime/input_audio_buffer_dtmf_event_received_event.py new file mode 100644 index 0000000000..d61ed4bda7 --- /dev/null +++ b/src/openai/types/realtime/input_audio_buffer_dtmf_event_received_event.py @@ -0,0 +1,18 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing_extensions import Literal + +from ..._models import BaseModel + +__all__ = ["InputAudioBufferDtmfEventReceivedEvent"] + + +class InputAudioBufferDtmfEventReceivedEvent(BaseModel): + event: str + """The telephone keypad that was pressed by the user.""" + + received_at: int + """UTC Unix Timestamp when DTMF Event was received by server.""" + + type: Literal["input_audio_buffer.dtmf_event_received"] + """The event type, must be `input_audio_buffer.dtmf_event_received`.""" diff --git a/src/openai/types/realtime/realtime_audio_input_turn_detection.py b/src/openai/types/realtime/realtime_audio_input_turn_detection.py index d3f4e00316..9b55353884 100644 --- a/src/openai/types/realtime/realtime_audio_input_turn_detection.py +++ b/src/openai/types/realtime/realtime_audio_input_turn_detection.py @@ -14,9 +14,14 @@ class ServerVad(BaseModel): """Type of turn detection, `server_vad` to turn on simple Server VAD.""" create_response: Optional[bool] = None - """ - Whether or not to automatically generate a response when a VAD stop event + """Whether or not to automatically generate a response when a VAD stop event occurs. + + If `interrupt_response` is set to `false` this may fail to create a response if + the model is already responding. + + If both `create_response` and `interrupt_response` are set to `false`, the model + will never respond automatically but VAD events will still be emitted. """ idle_timeout_ms: Optional[int] = None @@ -37,9 +42,13 @@ class ServerVad(BaseModel): interrupt_response: Optional[bool] = None """ - Whether or not to automatically interrupt any ongoing response with output to - the default conversation (i.e. `conversation` of `auto`) when a VAD start event - occurs. + Whether or not to automatically interrupt (cancel) any ongoing response with + output to the default conversation (i.e. `conversation` of `auto`) when a VAD + start event occurs. If `true` then the response will be cancelled, otherwise it + will continue until complete. + + If both `create_response` and `interrupt_response` are set to `false`, the model + will never respond automatically but VAD events will still be emitted. """ prefix_padding_ms: Optional[int] = None diff --git a/src/openai/types/realtime/realtime_audio_input_turn_detection_param.py b/src/openai/types/realtime/realtime_audio_input_turn_detection_param.py index 09b8cfd159..4ce7640727 100644 --- a/src/openai/types/realtime/realtime_audio_input_turn_detection_param.py +++ b/src/openai/types/realtime/realtime_audio_input_turn_detection_param.py @@ -13,9 +13,14 @@ class ServerVad(TypedDict, total=False): """Type of turn detection, `server_vad` to turn on simple Server VAD.""" create_response: bool - """ - Whether or not to automatically generate a response when a VAD stop event + """Whether or not to automatically generate a response when a VAD stop event occurs. + + If `interrupt_response` is set to `false` this may fail to create a response if + the model is already responding. + + If both `create_response` and `interrupt_response` are set to `false`, the model + will never respond automatically but VAD events will still be emitted. """ idle_timeout_ms: Optional[int] @@ -36,9 +41,13 @@ class ServerVad(TypedDict, total=False): interrupt_response: bool """ - Whether or not to automatically interrupt any ongoing response with output to - the default conversation (i.e. `conversation` of `auto`) when a VAD start event - occurs. + Whether or not to automatically interrupt (cancel) any ongoing response with + output to the default conversation (i.e. `conversation` of `auto`) when a VAD + start event occurs. If `true` then the response will be cancelled, otherwise it + will continue until complete. + + If both `create_response` and `interrupt_response` are set to `false`, the model + will never respond automatically but VAD events will still be emitted. """ prefix_padding_ms: int diff --git a/src/openai/types/realtime/realtime_server_event.py b/src/openai/types/realtime/realtime_server_event.py index 1605b81a97..ead98f1a54 100644 --- a/src/openai/types/realtime/realtime_server_event.py +++ b/src/openai/types/realtime/realtime_server_event.py @@ -42,6 +42,7 @@ from .input_audio_buffer_speech_started_event import InputAudioBufferSpeechStartedEvent from .input_audio_buffer_speech_stopped_event import InputAudioBufferSpeechStoppedEvent from .response_function_call_arguments_done_event import ResponseFunctionCallArgumentsDoneEvent +from .input_audio_buffer_dtmf_event_received_event import InputAudioBufferDtmfEventReceivedEvent from .response_function_call_arguments_delta_event import ResponseFunctionCallArgumentsDeltaEvent from .conversation_item_input_audio_transcription_segment import ConversationItemInputAudioTranscriptionSegment from .conversation_item_input_audio_transcription_delta_event import ConversationItemInputAudioTranscriptionDeltaEvent @@ -116,6 +117,7 @@ class OutputAudioBufferCleared(BaseModel): RealtimeErrorEvent, InputAudioBufferClearedEvent, InputAudioBufferCommittedEvent, + InputAudioBufferDtmfEventReceivedEvent, InputAudioBufferSpeechStartedEvent, InputAudioBufferSpeechStoppedEvent, RateLimitsUpdatedEvent, diff --git a/src/openai/types/realtime/realtime_session_create_request.py b/src/openai/types/realtime/realtime_session_create_request.py index 016ae45b67..80cf468dc8 100644 --- a/src/openai/types/realtime/realtime_session_create_request.py +++ b/src/openai/types/realtime/realtime_session_create_request.py @@ -110,13 +110,18 @@ class RealtimeSessionCreateRequest(BaseModel): limit, the conversation be truncated, meaning messages (starting from the oldest) will not be included in the model's context. A 32k context model with 4,096 max output tokens can only include 28,224 tokens in the context before - truncation occurs. Clients can configure truncation behavior to truncate with a - lower max token limit, which is an effective way to control token usage and - cost. Truncation will reduce the number of cached tokens on the next turn - (busting the cache), since messages are dropped from the beginning of the - context. However, clients can also configure truncation to retain messages up to - a fraction of the maximum context size, which will reduce the need for future - truncations and thus improve the cache rate. Truncation can be disabled - entirely, which means the server will never truncate but would instead return an - error if the conversation exceeds the model's input token limit. + truncation occurs. + + Clients can configure truncation behavior to truncate with a lower max token + limit, which is an effective way to control token usage and cost. + + Truncation will reduce the number of cached tokens on the next turn (busting the + cache), since messages are dropped from the beginning of the context. However, + clients can also configure truncation to retain messages up to a fraction of the + maximum context size, which will reduce the need for future truncations and thus + improve the cache rate. + + Truncation can be disabled entirely, which means the server will never truncate + but would instead return an error if the conversation exceeds the model's input + token limit. """ diff --git a/src/openai/types/realtime/realtime_session_create_request_param.py b/src/openai/types/realtime/realtime_session_create_request_param.py index 8c3998c1ca..578d5a502d 100644 --- a/src/openai/types/realtime/realtime_session_create_request_param.py +++ b/src/openai/types/realtime/realtime_session_create_request_param.py @@ -110,13 +110,18 @@ class RealtimeSessionCreateRequestParam(TypedDict, total=False): limit, the conversation be truncated, meaning messages (starting from the oldest) will not be included in the model's context. A 32k context model with 4,096 max output tokens can only include 28,224 tokens in the context before - truncation occurs. Clients can configure truncation behavior to truncate with a - lower max token limit, which is an effective way to control token usage and - cost. Truncation will reduce the number of cached tokens on the next turn - (busting the cache), since messages are dropped from the beginning of the - context. However, clients can also configure truncation to retain messages up to - a fraction of the maximum context size, which will reduce the need for future - truncations and thus improve the cache rate. Truncation can be disabled - entirely, which means the server will never truncate but would instead return an - error if the conversation exceeds the model's input token limit. + truncation occurs. + + Clients can configure truncation behavior to truncate with a lower max token + limit, which is an effective way to control token usage and cost. + + Truncation will reduce the number of cached tokens on the next turn (busting the + cache), since messages are dropped from the beginning of the context. However, + clients can also configure truncation to retain messages up to a fraction of the + maximum context size, which will reduce the need for future truncations and thus + improve the cache rate. + + Truncation can be disabled entirely, which means the server will never truncate + but would instead return an error if the conversation exceeds the model's input + token limit. """ diff --git a/src/openai/types/realtime/realtime_session_create_response.py b/src/openai/types/realtime/realtime_session_create_response.py index c1336cd6e4..df69dd7bdb 100644 --- a/src/openai/types/realtime/realtime_session_create_response.py +++ b/src/openai/types/realtime/realtime_session_create_response.py @@ -53,9 +53,14 @@ class AudioInputTurnDetectionServerVad(BaseModel): """Type of turn detection, `server_vad` to turn on simple Server VAD.""" create_response: Optional[bool] = None - """ - Whether or not to automatically generate a response when a VAD stop event + """Whether or not to automatically generate a response when a VAD stop event occurs. + + If `interrupt_response` is set to `false` this may fail to create a response if + the model is already responding. + + If both `create_response` and `interrupt_response` are set to `false`, the model + will never respond automatically but VAD events will still be emitted. """ idle_timeout_ms: Optional[int] = None @@ -76,9 +81,13 @@ class AudioInputTurnDetectionServerVad(BaseModel): interrupt_response: Optional[bool] = None """ - Whether or not to automatically interrupt any ongoing response with output to - the default conversation (i.e. `conversation` of `auto`) when a VAD start event - occurs. + Whether or not to automatically interrupt (cancel) any ongoing response with + output to the default conversation (i.e. `conversation` of `auto`) when a VAD + start event occurs. If `true` then the response will be cancelled, otherwise it + will continue until complete. + + If both `create_response` and `interrupt_response` are set to `false`, the model + will never respond automatically but VAD events will still be emitted. """ prefix_padding_ms: Optional[int] = None @@ -463,13 +472,18 @@ class RealtimeSessionCreateResponse(BaseModel): limit, the conversation be truncated, meaning messages (starting from the oldest) will not be included in the model's context. A 32k context model with 4,096 max output tokens can only include 28,224 tokens in the context before - truncation occurs. Clients can configure truncation behavior to truncate with a - lower max token limit, which is an effective way to control token usage and - cost. Truncation will reduce the number of cached tokens on the next turn - (busting the cache), since messages are dropped from the beginning of the - context. However, clients can also configure truncation to retain messages up to - a fraction of the maximum context size, which will reduce the need for future - truncations and thus improve the cache rate. Truncation can be disabled - entirely, which means the server will never truncate but would instead return an - error if the conversation exceeds the model's input token limit. + truncation occurs. + + Clients can configure truncation behavior to truncate with a lower max token + limit, which is an effective way to control token usage and cost. + + Truncation will reduce the number of cached tokens on the next turn (busting the + cache), since messages are dropped from the beginning of the context. However, + clients can also configure truncation to retain messages up to a fraction of the + maximum context size, which will reduce the need for future truncations and thus + improve the cache rate. + + Truncation can be disabled entirely, which means the server will never truncate + but would instead return an error if the conversation exceeds the model's input + token limit. """ diff --git a/src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection.py b/src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection.py index 7dc7a8f302..e21844f48f 100644 --- a/src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection.py +++ b/src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection.py @@ -14,9 +14,14 @@ class ServerVad(BaseModel): """Type of turn detection, `server_vad` to turn on simple Server VAD.""" create_response: Optional[bool] = None - """ - Whether or not to automatically generate a response when a VAD stop event + """Whether or not to automatically generate a response when a VAD stop event occurs. + + If `interrupt_response` is set to `false` this may fail to create a response if + the model is already responding. + + If both `create_response` and `interrupt_response` are set to `false`, the model + will never respond automatically but VAD events will still be emitted. """ idle_timeout_ms: Optional[int] = None @@ -37,9 +42,13 @@ class ServerVad(BaseModel): interrupt_response: Optional[bool] = None """ - Whether or not to automatically interrupt any ongoing response with output to - the default conversation (i.e. `conversation` of `auto`) when a VAD start event - occurs. + Whether or not to automatically interrupt (cancel) any ongoing response with + output to the default conversation (i.e. `conversation` of `auto`) when a VAD + start event occurs. If `true` then the response will be cancelled, otherwise it + will continue until complete. + + If both `create_response` and `interrupt_response` are set to `false`, the model + will never respond automatically but VAD events will still be emitted. """ prefix_padding_ms: Optional[int] = None diff --git a/src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection_param.py b/src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection_param.py index d899b8c5c1..507c43141e 100644 --- a/src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection_param.py +++ b/src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection_param.py @@ -13,9 +13,14 @@ class ServerVad(TypedDict, total=False): """Type of turn detection, `server_vad` to turn on simple Server VAD.""" create_response: bool - """ - Whether or not to automatically generate a response when a VAD stop event + """Whether or not to automatically generate a response when a VAD stop event occurs. + + If `interrupt_response` is set to `false` this may fail to create a response if + the model is already responding. + + If both `create_response` and `interrupt_response` are set to `false`, the model + will never respond automatically but VAD events will still be emitted. """ idle_timeout_ms: Optional[int] @@ -36,9 +41,13 @@ class ServerVad(TypedDict, total=False): interrupt_response: bool """ - Whether or not to automatically interrupt any ongoing response with output to - the default conversation (i.e. `conversation` of `auto`) when a VAD start event - occurs. + Whether or not to automatically interrupt (cancel) any ongoing response with + output to the default conversation (i.e. `conversation` of `auto`) when a VAD + start event occurs. If `true` then the response will be cancelled, otherwise it + will continue until complete. + + If both `create_response` and `interrupt_response` are set to `false`, the model + will never respond automatically but VAD events will still be emitted. """ prefix_padding_ms: int diff --git a/src/openai/types/responses/__init__.py b/src/openai/types/responses/__init__.py index e707141d9a..a4d939d9ff 100644 --- a/src/openai/types/responses/__init__.py +++ b/src/openai/types/responses/__init__.py @@ -28,6 +28,7 @@ from .custom_tool_param import CustomToolParam as CustomToolParam from .tool_choice_shell import ToolChoiceShell as ToolChoiceShell from .tool_choice_types import ToolChoiceTypes as ToolChoiceTypes +from .compacted_response import CompactedResponse as CompactedResponse from .easy_input_message import EasyInputMessage as EasyInputMessage from .response_item_list import ResponseItemList as ResponseItemList from .tool_choice_custom import ToolChoiceCustom as ToolChoiceCustom @@ -60,6 +61,7 @@ from .response_create_params import ResponseCreateParams as ResponseCreateParams from .response_created_event import ResponseCreatedEvent as ResponseCreatedEvent from .response_input_content import ResponseInputContent as ResponseInputContent +from .response_compact_params import ResponseCompactParams as ResponseCompactParams from .response_output_message import ResponseOutputMessage as ResponseOutputMessage from .response_output_refusal import ResponseOutputRefusal as ResponseOutputRefusal from .response_reasoning_item import ResponseReasoningItem as ResponseReasoningItem @@ -69,6 +71,7 @@ from .web_search_preview_tool import WebSearchPreviewTool as WebSearchPreviewTool from .easy_input_message_param import EasyInputMessageParam as EasyInputMessageParam from .input_token_count_params import InputTokenCountParams as InputTokenCountParams +from .response_compaction_item import ResponseCompactionItem as ResponseCompactionItem from .response_completed_event import ResponseCompletedEvent as ResponseCompletedEvent from .response_retrieve_params import ResponseRetrieveParams as ResponseRetrieveParams from .response_text_done_event import ResponseTextDoneEvent as ResponseTextDoneEvent @@ -108,6 +111,7 @@ from .tool_choice_apply_patch_param import ToolChoiceApplyPatchParam as ToolChoiceApplyPatchParam from .web_search_preview_tool_param import WebSearchPreviewToolParam as WebSearchPreviewToolParam from .response_apply_patch_tool_call import ResponseApplyPatchToolCall as ResponseApplyPatchToolCall +from .response_compaction_item_param import ResponseCompactionItemParam as ResponseCompactionItemParam from .response_file_search_tool_call import ResponseFileSearchToolCall as ResponseFileSearchToolCall from .response_mcp_call_failed_event import ResponseMcpCallFailedEvent as ResponseMcpCallFailedEvent from .response_custom_tool_call_param import ResponseCustomToolCallParam as ResponseCustomToolCallParam @@ -133,6 +137,7 @@ from .response_mcp_call_in_progress_event import ResponseMcpCallInProgressEvent as ResponseMcpCallInProgressEvent from .response_reasoning_text_delta_event import ResponseReasoningTextDeltaEvent as ResponseReasoningTextDeltaEvent from .response_audio_transcript_done_event import ResponseAudioTranscriptDoneEvent as ResponseAudioTranscriptDoneEvent +from .response_compaction_item_param_param import ResponseCompactionItemParamParam as ResponseCompactionItemParamParam from .response_file_search_tool_call_param import ResponseFileSearchToolCallParam as ResponseFileSearchToolCallParam from .response_mcp_list_tools_failed_event import ResponseMcpListToolsFailedEvent as ResponseMcpListToolsFailedEvent from .response_apply_patch_tool_call_output import ResponseApplyPatchToolCallOutput as ResponseApplyPatchToolCallOutput diff --git a/src/openai/types/responses/compacted_response.py b/src/openai/types/responses/compacted_response.py new file mode 100644 index 0000000000..5b333b83c0 --- /dev/null +++ b/src/openai/types/responses/compacted_response.py @@ -0,0 +1,33 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import List +from typing_extensions import Literal + +from ..._models import BaseModel +from .response_usage import ResponseUsage +from .response_output_item import ResponseOutputItem + +__all__ = ["CompactedResponse"] + + +class CompactedResponse(BaseModel): + id: str + """The unique identifier for the compacted response.""" + + created_at: int + """Unix timestamp (in seconds) when the compacted conversation was created.""" + + object: Literal["response.compaction"] + """The object type. Always `response.compaction`.""" + + output: List[ResponseOutputItem] + """The compacted list of output items. + + This is a list of all user messages, followed by a single compaction item. + """ + + usage: ResponseUsage + """ + Token accounting for the compaction pass, including cached, reasoning, and total + tokens. + """ diff --git a/src/openai/types/responses/parsed_response.py b/src/openai/types/responses/parsed_response.py index c120f4641d..a859710590 100644 --- a/src/openai/types/responses/parsed_response.py +++ b/src/openai/types/responses/parsed_response.py @@ -6,7 +6,6 @@ from ..._utils import PropertyInfo from .response import Response from ..._models import GenericModel -from ..._utils._transform import PropertyInfo from .response_output_item import ( McpCall, McpListTools, @@ -19,6 +18,7 @@ from .response_output_message import ResponseOutputMessage from .response_output_refusal import ResponseOutputRefusal from .response_reasoning_item import ResponseReasoningItem +from .response_compaction_item import ResponseCompactionItem from .response_custom_tool_call import ResponseCustomToolCall from .response_computer_tool_call import ResponseComputerToolCall from .response_function_tool_call import ResponseFunctionToolCall @@ -79,6 +79,7 @@ class ParsedResponseFunctionToolCall(ResponseFunctionToolCall): McpListTools, ResponseCodeInterpreterToolCall, ResponseCustomToolCall, + ResponseCompactionItem, ResponseFunctionShellToolCall, ResponseFunctionShellToolCallOutput, ResponseApplyPatchToolCall, diff --git a/src/openai/types/responses/response_compact_params.py b/src/openai/types/responses/response_compact_params.py new file mode 100644 index 0000000000..fe38b15a9d --- /dev/null +++ b/src/openai/types/responses/response_compact_params.py @@ -0,0 +1,126 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Union, Iterable, Optional +from typing_extensions import Literal, TypedDict + +from .response_input_item_param import ResponseInputItemParam + +__all__ = ["ResponseCompactParams"] + + +class ResponseCompactParams(TypedDict, total=False): + input: Union[str, Iterable[ResponseInputItemParam], None] + """Text, image, or file inputs to the model, used to generate a response""" + + instructions: Optional[str] + """ + A system (or developer) message inserted into the model's context. When used + along with `previous_response_id`, the instructions from a previous response + will not be carried over to the next response. This makes it simple to swap out + system (or developer) messages in new responses. + """ + + model: Union[ + Literal[ + "gpt-5.1", + "gpt-5.1-2025-11-13", + "gpt-5.1-codex", + "gpt-5.1-mini", + "gpt-5.1-chat-latest", + "gpt-5", + "gpt-5-mini", + "gpt-5-nano", + "gpt-5-2025-08-07", + "gpt-5-mini-2025-08-07", + "gpt-5-nano-2025-08-07", + "gpt-5-chat-latest", + "gpt-4.1", + "gpt-4.1-mini", + "gpt-4.1-nano", + "gpt-4.1-2025-04-14", + "gpt-4.1-mini-2025-04-14", + "gpt-4.1-nano-2025-04-14", + "o4-mini", + "o4-mini-2025-04-16", + "o3", + "o3-2025-04-16", + "o3-mini", + "o3-mini-2025-01-31", + "o1", + "o1-2024-12-17", + "o1-preview", + "o1-preview-2024-09-12", + "o1-mini", + "o1-mini-2024-09-12", + "gpt-4o", + "gpt-4o-2024-11-20", + "gpt-4o-2024-08-06", + "gpt-4o-2024-05-13", + "gpt-4o-audio-preview", + "gpt-4o-audio-preview-2024-10-01", + "gpt-4o-audio-preview-2024-12-17", + "gpt-4o-audio-preview-2025-06-03", + "gpt-4o-mini-audio-preview", + "gpt-4o-mini-audio-preview-2024-12-17", + "gpt-4o-search-preview", + "gpt-4o-mini-search-preview", + "gpt-4o-search-preview-2025-03-11", + "gpt-4o-mini-search-preview-2025-03-11", + "chatgpt-4o-latest", + "codex-mini-latest", + "gpt-4o-mini", + "gpt-4o-mini-2024-07-18", + "gpt-4-turbo", + "gpt-4-turbo-2024-04-09", + "gpt-4-0125-preview", + "gpt-4-turbo-preview", + "gpt-4-1106-preview", + "gpt-4-vision-preview", + "gpt-4", + "gpt-4-0314", + "gpt-4-0613", + "gpt-4-32k", + "gpt-4-32k-0314", + "gpt-4-32k-0613", + "gpt-3.5-turbo", + "gpt-3.5-turbo-16k", + "gpt-3.5-turbo-0301", + "gpt-3.5-turbo-0613", + "gpt-3.5-turbo-1106", + "gpt-3.5-turbo-0125", + "gpt-3.5-turbo-16k-0613", + "o1-pro", + "o1-pro-2025-03-19", + "o3-pro", + "o3-pro-2025-06-10", + "o3-deep-research", + "o3-deep-research-2025-06-26", + "o4-mini-deep-research", + "o4-mini-deep-research-2025-06-26", + "computer-use-preview", + "computer-use-preview-2025-03-11", + "gpt-5-codex", + "gpt-5-pro", + "gpt-5-pro-2025-10-06", + "gpt-5.1-codex-max", + ], + str, + None, + ] + """Model ID used to generate the response, like `gpt-5` or `o3`. + + OpenAI offers a wide range of models with different capabilities, performance + characteristics, and price points. Refer to the + [model guide](https://platform.openai.com/docs/models) to browse and compare + available models. + """ + + previous_response_id: Optional[str] + """The unique ID of the previous response to the model. + + Use this to create multi-turn conversations. Learn more about + [conversation state](https://platform.openai.com/docs/guides/conversation-state). + Cannot be used in conjunction with `conversation`. + """ diff --git a/src/openai/types/responses/response_compaction_item.py b/src/openai/types/responses/response_compaction_item.py new file mode 100644 index 0000000000..dc5f839bb8 --- /dev/null +++ b/src/openai/types/responses/response_compaction_item.py @@ -0,0 +1,20 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Optional +from typing_extensions import Literal + +from ..._models import BaseModel + +__all__ = ["ResponseCompactionItem"] + + +class ResponseCompactionItem(BaseModel): + id: str + """The unique ID of the compaction item.""" + + encrypted_content: str + + type: Literal["compaction"] + """The type of the item. Always `compaction`.""" + + created_by: Optional[str] = None diff --git a/src/openai/types/responses/response_compaction_item_param.py b/src/openai/types/responses/response_compaction_item_param.py new file mode 100644 index 0000000000..8fdc2a561a --- /dev/null +++ b/src/openai/types/responses/response_compaction_item_param.py @@ -0,0 +1,18 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Optional +from typing_extensions import Literal + +from ..._models import BaseModel + +__all__ = ["ResponseCompactionItemParam"] + + +class ResponseCompactionItemParam(BaseModel): + encrypted_content: str + + type: Literal["compaction"] + """The type of the item. Always `compaction`.""" + + id: Optional[str] = None + """The ID of the compaction item.""" diff --git a/src/openai/types/responses/response_compaction_item_param_param.py b/src/openai/types/responses/response_compaction_item_param_param.py new file mode 100644 index 0000000000..0d12296589 --- /dev/null +++ b/src/openai/types/responses/response_compaction_item_param_param.py @@ -0,0 +1,18 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Optional +from typing_extensions import Literal, Required, TypedDict + +__all__ = ["ResponseCompactionItemParamParam"] + + +class ResponseCompactionItemParamParam(TypedDict, total=False): + encrypted_content: Required[str] + + type: Required[Literal["compaction"]] + """The type of the item. Always `compaction`.""" + + id: Optional[str] + """The ID of the compaction item.""" diff --git a/src/openai/types/responses/response_function_shell_call_output_content.py b/src/openai/types/responses/response_function_shell_call_output_content.py index 1429ce9724..e0e2c09ad1 100644 --- a/src/openai/types/responses/response_function_shell_call_output_content.py +++ b/src/openai/types/responses/response_function_shell_call_output_content.py @@ -27,10 +27,10 @@ class OutcomeExit(BaseModel): class ResponseFunctionShellCallOutputContent(BaseModel): outcome: Outcome - """The exit or timeout outcome associated with this chunk.""" + """The exit or timeout outcome associated with this shell call.""" stderr: str - """Captured stderr output for this chunk of the shell call.""" + """Captured stderr output for the shell call.""" stdout: str - """Captured stdout output for this chunk of the shell call.""" + """Captured stdout output for the shell call.""" diff --git a/src/openai/types/responses/response_function_shell_call_output_content_param.py b/src/openai/types/responses/response_function_shell_call_output_content_param.py index 6395541cf5..fa065bd4b5 100644 --- a/src/openai/types/responses/response_function_shell_call_output_content_param.py +++ b/src/openai/types/responses/response_function_shell_call_output_content_param.py @@ -26,10 +26,10 @@ class OutcomeExit(TypedDict, total=False): class ResponseFunctionShellCallOutputContentParam(TypedDict, total=False): outcome: Required[Outcome] - """The exit or timeout outcome associated with this chunk.""" + """The exit or timeout outcome associated with this shell call.""" stderr: Required[str] - """Captured stderr output for this chunk of the shell call.""" + """Captured stderr output for the shell call.""" stdout: Required[str] - """Captured stdout output for this chunk of the shell call.""" + """Captured stdout output for the shell call.""" diff --git a/src/openai/types/responses/response_function_shell_tool_call.py b/src/openai/types/responses/response_function_shell_tool_call.py index be0a5bcff8..de42cb0640 100644 --- a/src/openai/types/responses/response_function_shell_tool_call.py +++ b/src/openai/types/responses/response_function_shell_tool_call.py @@ -20,7 +20,7 @@ class Action(BaseModel): class ResponseFunctionShellToolCall(BaseModel): id: str - """The unique ID of the function shell tool call. + """The unique ID of the shell tool call. Populated when this item is returned via API. """ @@ -29,7 +29,7 @@ class ResponseFunctionShellToolCall(BaseModel): """The shell commands and limits that describe how to run the tool call.""" call_id: str - """The unique ID of the function shell tool call generated by the model.""" + """The unique ID of the shell tool call generated by the model.""" status: Literal["in_progress", "completed", "incomplete"] """The status of the shell call. diff --git a/src/openai/types/responses/response_input_item.py b/src/openai/types/responses/response_input_item.py index eaf5396087..103c8634ce 100644 --- a/src/openai/types/responses/response_input_item.py +++ b/src/openai/types/responses/response_input_item.py @@ -12,6 +12,7 @@ from .response_computer_tool_call import ResponseComputerToolCall from .response_function_tool_call import ResponseFunctionToolCall from .response_function_web_search import ResponseFunctionWebSearch +from .response_compaction_item_param import ResponseCompactionItemParam from .response_file_search_tool_call import ResponseFileSearchToolCall from .response_custom_tool_call_output import ResponseCustomToolCallOutput from .response_code_interpreter_tool_call import ResponseCodeInterpreterToolCall @@ -215,13 +216,13 @@ class ShellCall(BaseModel): """The shell commands and limits that describe how to run the tool call.""" call_id: str - """The unique ID of the function shell tool call generated by the model.""" + """The unique ID of the shell tool call generated by the model.""" type: Literal["shell_call"] - """The type of the item. Always `function_shell_call`.""" + """The type of the item. Always `shell_call`.""" id: Optional[str] = None - """The unique ID of the function shell tool call. + """The unique ID of the shell tool call. Populated when this item is returned via API. """ @@ -235,7 +236,7 @@ class ShellCall(BaseModel): class ShellCallOutput(BaseModel): call_id: str - """The unique ID of the function shell tool call generated by the model.""" + """The unique ID of the shell tool call generated by the model.""" output: List[ResponseFunctionShellCallOutputContent] """ @@ -244,10 +245,10 @@ class ShellCallOutput(BaseModel): """ type: Literal["shell_call_output"] - """The type of the item. Always `function_shell_call_output`.""" + """The type of the item. Always `shell_call_output`.""" id: Optional[str] = None - """The unique ID of the function shell tool call output. + """The unique ID of the shell tool call output. Populated when this item is returned via API. """ @@ -462,6 +463,7 @@ class ItemReference(BaseModel): ResponseFunctionToolCall, FunctionCallOutput, ResponseReasoningItem, + ResponseCompactionItemParam, ImageGenerationCall, ResponseCodeInterpreterToolCall, LocalShellCall, diff --git a/src/openai/types/responses/response_input_item_param.py b/src/openai/types/responses/response_input_item_param.py index 5c2e81c4de..85d9f92b23 100644 --- a/src/openai/types/responses/response_input_item_param.py +++ b/src/openai/types/responses/response_input_item_param.py @@ -13,6 +13,7 @@ from .response_computer_tool_call_param import ResponseComputerToolCallParam from .response_function_tool_call_param import ResponseFunctionToolCallParam from .response_function_web_search_param import ResponseFunctionWebSearchParam +from .response_compaction_item_param_param import ResponseCompactionItemParamParam from .response_file_search_tool_call_param import ResponseFileSearchToolCallParam from .response_custom_tool_call_output_param import ResponseCustomToolCallOutputParam from .response_code_interpreter_tool_call_param import ResponseCodeInterpreterToolCallParam @@ -216,13 +217,13 @@ class ShellCall(TypedDict, total=False): """The shell commands and limits that describe how to run the tool call.""" call_id: Required[str] - """The unique ID of the function shell tool call generated by the model.""" + """The unique ID of the shell tool call generated by the model.""" type: Required[Literal["shell_call"]] - """The type of the item. Always `function_shell_call`.""" + """The type of the item. Always `shell_call`.""" id: Optional[str] - """The unique ID of the function shell tool call. + """The unique ID of the shell tool call. Populated when this item is returned via API. """ @@ -236,7 +237,7 @@ class ShellCall(TypedDict, total=False): class ShellCallOutput(TypedDict, total=False): call_id: Required[str] - """The unique ID of the function shell tool call generated by the model.""" + """The unique ID of the shell tool call generated by the model.""" output: Required[Iterable[ResponseFunctionShellCallOutputContentParam]] """ @@ -245,10 +246,10 @@ class ShellCallOutput(TypedDict, total=False): """ type: Required[Literal["shell_call_output"]] - """The type of the item. Always `function_shell_call_output`.""" + """The type of the item. Always `shell_call_output`.""" id: Optional[str] - """The unique ID of the function shell tool call output. + """The unique ID of the shell tool call output. Populated when this item is returned via API. """ @@ -461,6 +462,7 @@ class ItemReference(TypedDict, total=False): ResponseFunctionToolCallParam, FunctionCallOutput, ResponseReasoningItemParam, + ResponseCompactionItemParamParam, ImageGenerationCall, ResponseCodeInterpreterToolCallParam, LocalShellCall, diff --git a/src/openai/types/responses/response_input_param.py b/src/openai/types/responses/response_input_param.py index 365c6b3d7b..bbd8e6af79 100644 --- a/src/openai/types/responses/response_input_param.py +++ b/src/openai/types/responses/response_input_param.py @@ -13,6 +13,7 @@ from .response_computer_tool_call_param import ResponseComputerToolCallParam from .response_function_tool_call_param import ResponseFunctionToolCallParam from .response_function_web_search_param import ResponseFunctionWebSearchParam +from .response_compaction_item_param_param import ResponseCompactionItemParamParam from .response_file_search_tool_call_param import ResponseFileSearchToolCallParam from .response_custom_tool_call_output_param import ResponseCustomToolCallOutputParam from .response_code_interpreter_tool_call_param import ResponseCodeInterpreterToolCallParam @@ -217,13 +218,13 @@ class ShellCall(TypedDict, total=False): """The shell commands and limits that describe how to run the tool call.""" call_id: Required[str] - """The unique ID of the function shell tool call generated by the model.""" + """The unique ID of the shell tool call generated by the model.""" type: Required[Literal["shell_call"]] - """The type of the item. Always `function_shell_call`.""" + """The type of the item. Always `shell_call`.""" id: Optional[str] - """The unique ID of the function shell tool call. + """The unique ID of the shell tool call. Populated when this item is returned via API. """ @@ -237,7 +238,7 @@ class ShellCall(TypedDict, total=False): class ShellCallOutput(TypedDict, total=False): call_id: Required[str] - """The unique ID of the function shell tool call generated by the model.""" + """The unique ID of the shell tool call generated by the model.""" output: Required[Iterable[ResponseFunctionShellCallOutputContentParam]] """ @@ -246,10 +247,10 @@ class ShellCallOutput(TypedDict, total=False): """ type: Required[Literal["shell_call_output"]] - """The type of the item. Always `function_shell_call_output`.""" + """The type of the item. Always `shell_call_output`.""" id: Optional[str] - """The unique ID of the function shell tool call output. + """The unique ID of the shell tool call output. Populated when this item is returned via API. """ @@ -462,6 +463,7 @@ class ItemReference(TypedDict, total=False): ResponseFunctionToolCallParam, FunctionCallOutput, ResponseReasoningItemParam, + ResponseCompactionItemParamParam, ImageGenerationCall, ResponseCodeInterpreterToolCallParam, LocalShellCall, diff --git a/src/openai/types/responses/response_output_item.py b/src/openai/types/responses/response_output_item.py index 906ddbb25e..f0a66e1836 100644 --- a/src/openai/types/responses/response_output_item.py +++ b/src/openai/types/responses/response_output_item.py @@ -7,6 +7,7 @@ from ..._models import BaseModel from .response_output_message import ResponseOutputMessage from .response_reasoning_item import ResponseReasoningItem +from .response_compaction_item import ResponseCompactionItem from .response_custom_tool_call import ResponseCustomToolCall from .response_computer_tool_call import ResponseComputerToolCall from .response_function_tool_call import ResponseFunctionToolCall @@ -173,6 +174,7 @@ class McpApprovalRequest(BaseModel): ResponseFunctionWebSearch, ResponseComputerToolCall, ResponseReasoningItem, + ResponseCompactionItem, ImageGenerationCall, ResponseCodeInterpreterToolCall, LocalShellCall, diff --git a/src/openai/types/responses/tool.py b/src/openai/types/responses/tool.py index ae8b34b1f4..bb32d4e1ec 100644 --- a/src/openai/types/responses/tool.py +++ b/src/openai/types/responses/tool.py @@ -174,7 +174,7 @@ class CodeInterpreter(BaseModel): """The code interpreter container. Can be a container ID or an object that specifies uploaded file IDs to make - available to your code. + available to your code, along with an optional `memory_limit` setting. """ type: Literal["code_interpreter"] diff --git a/src/openai/types/responses/tool_param.py b/src/openai/types/responses/tool_param.py index 18b044ab8c..779acf0a53 100644 --- a/src/openai/types/responses/tool_param.py +++ b/src/openai/types/responses/tool_param.py @@ -174,7 +174,7 @@ class CodeInterpreter(TypedDict, total=False): """The code interpreter container. Can be a container ID or an object that specifies uploaded file IDs to make - available to your code. + available to your code, along with an optional `memory_limit` setting. """ type: Required[Literal["code_interpreter"]] diff --git a/src/openai/types/shared/all_models.py b/src/openai/types/shared/all_models.py index 3e0b09e2d1..ba8e1d82cf 100644 --- a/src/openai/types/shared/all_models.py +++ b/src/openai/types/shared/all_models.py @@ -24,5 +24,6 @@ "gpt-5-codex", "gpt-5-pro", "gpt-5-pro-2025-10-06", + "gpt-5.1-codex-max", ], ] diff --git a/src/openai/types/shared/reasoning.py b/src/openai/types/shared/reasoning.py index cf470ca057..b19476bcb5 100644 --- a/src/openai/types/shared/reasoning.py +++ b/src/openai/types/shared/reasoning.py @@ -14,9 +14,9 @@ class Reasoning(BaseModel): """ Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -24,6 +24,7 @@ class Reasoning(BaseModel): - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. """ generate_summary: Optional[Literal["auto", "concise", "detailed"]] = None diff --git a/src/openai/types/shared/reasoning_effort.py b/src/openai/types/shared/reasoning_effort.py index c890a133cc..24d8516424 100644 --- a/src/openai/types/shared/reasoning_effort.py +++ b/src/openai/types/shared/reasoning_effort.py @@ -5,4 +5,4 @@ __all__ = ["ReasoningEffort"] -ReasoningEffort: TypeAlias = Optional[Literal["none", "minimal", "low", "medium", "high"]] +ReasoningEffort: TypeAlias = Optional[Literal["none", "minimal", "low", "medium", "high", "xhigh"]] diff --git a/src/openai/types/shared/responses_model.py b/src/openai/types/shared/responses_model.py index 432cb82afd..38cdea9a94 100644 --- a/src/openai/types/shared/responses_model.py +++ b/src/openai/types/shared/responses_model.py @@ -24,5 +24,6 @@ "gpt-5-codex", "gpt-5-pro", "gpt-5-pro-2025-10-06", + "gpt-5.1-codex-max", ], ] diff --git a/src/openai/types/shared_params/reasoning.py b/src/openai/types/shared_params/reasoning.py index ad58f70b71..71cb37c65e 100644 --- a/src/openai/types/shared_params/reasoning.py +++ b/src/openai/types/shared_params/reasoning.py @@ -15,9 +15,9 @@ class Reasoning(TypedDict, total=False): """ Constrains effort on reasoning for [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently - supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing - reasoning effort can result in faster responses and fewer tokens used on - reasoning in a response. + supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. + Reducing reasoning effort can result in faster responses and fewer tokens used + on reasoning in a response. - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool @@ -25,6 +25,7 @@ class Reasoning(TypedDict, total=False): - All models before `gpt-5.1` default to `medium` reasoning effort, and do not support `none`. - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort. + - `xhigh` is currently only supported for `gpt-5.1-codex-max`. """ generate_summary: Optional[Literal["auto", "concise", "detailed"]] diff --git a/src/openai/types/shared_params/reasoning_effort.py b/src/openai/types/shared_params/reasoning_effort.py index e388eebff1..8518c2b141 100644 --- a/src/openai/types/shared_params/reasoning_effort.py +++ b/src/openai/types/shared_params/reasoning_effort.py @@ -7,4 +7,4 @@ __all__ = ["ReasoningEffort"] -ReasoningEffort: TypeAlias = Optional[Literal["none", "minimal", "low", "medium", "high"]] +ReasoningEffort: TypeAlias = Optional[Literal["none", "minimal", "low", "medium", "high", "xhigh"]] diff --git a/src/openai/types/shared_params/responses_model.py b/src/openai/types/shared_params/responses_model.py index fe34eb0f62..ad44dd6bf7 100644 --- a/src/openai/types/shared_params/responses_model.py +++ b/src/openai/types/shared_params/responses_model.py @@ -26,5 +26,6 @@ "gpt-5-codex", "gpt-5-pro", "gpt-5-pro-2025-10-06", + "gpt-5.1-codex-max", ], ] diff --git a/src/openai/types/video_create_params.py b/src/openai/types/video_create_params.py index 527d62d193..c4d3e0851f 100644 --- a/src/openai/types/video_create_params.py +++ b/src/openai/types/video_create_params.py @@ -20,10 +20,16 @@ class VideoCreateParams(TypedDict, total=False): """Optional image reference that guides generation.""" model: VideoModel - """The video generation model to use. Defaults to `sora-2`.""" + """The video generation model to use (allowed values: sora-2, sora-2-pro). + + Defaults to `sora-2`. + """ seconds: VideoSeconds - """Clip duration in seconds. Defaults to 4 seconds.""" + """Clip duration in seconds (allowed values: 4, 8, 12). Defaults to 4 seconds.""" size: VideoSize - """Output resolution formatted as width x height. Defaults to 720x1280.""" + """ + Output resolution formatted as width x height (allowed values: 720x1280, + 1280x720, 1024x1792, 1792x1024). Defaults to 720x1280. + """ diff --git a/tests/api_resources/test_containers.py b/tests/api_resources/test_containers.py index c972f6539d..cf173c7fd5 100644 --- a/tests/api_resources/test_containers.py +++ b/tests/api_resources/test_containers.py @@ -38,6 +38,7 @@ def test_method_create_with_all_params(self, client: OpenAI) -> None: "minutes": 0, }, file_ids=["string"], + memory_limit="1g", ) assert_matches_type(ContainerCreateResponse, container, path=["response"]) @@ -197,6 +198,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> "minutes": 0, }, file_ids=["string"], + memory_limit="1g", ) assert_matches_type(ContainerCreateResponse, container, path=["response"]) diff --git a/tests/api_resources/test_responses.py b/tests/api_resources/test_responses.py index b57e6099c4..14e2d911ef 100644 --- a/tests/api_resources/test_responses.py +++ b/tests/api_resources/test_responses.py @@ -12,6 +12,7 @@ from openai._utils import assert_signatures_in_sync from openai.types.responses import ( Response, + CompactedResponse, ) base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") @@ -36,7 +37,7 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None: max_output_tokens=0, max_tool_calls=0, metadata={"foo": "string"}, - model="gpt-4o", + model="gpt-5.1", parallel_tool_calls=True, previous_response_id="previous_response_id", prompt={ @@ -117,7 +118,7 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None: max_output_tokens=0, max_tool_calls=0, metadata={"foo": "string"}, - model="gpt-4o", + model="gpt-5.1", parallel_tool_calls=True, previous_response_id="previous_response_id", prompt={ @@ -358,6 +359,41 @@ def test_path_params_cancel(self, client: OpenAI) -> None: "", ) + @parametrize + def test_method_compact(self, client: OpenAI) -> None: + response = client.responses.compact() + assert_matches_type(CompactedResponse, response, path=["response"]) + + @parametrize + def test_method_compact_with_all_params(self, client: OpenAI) -> None: + response = client.responses.compact( + input="string", + instructions="instructions", + model="gpt-5.1", + previous_response_id="resp_123", + ) + assert_matches_type(CompactedResponse, response, path=["response"]) + + @parametrize + def test_raw_response_compact(self, client: OpenAI) -> None: + http_response = client.responses.with_raw_response.compact() + + assert http_response.is_closed is True + assert http_response.http_request.headers.get("X-Stainless-Lang") == "python" + response = http_response.parse() + assert_matches_type(CompactedResponse, response, path=["response"]) + + @parametrize + def test_streaming_response_compact(self, client: OpenAI) -> None: + with client.responses.with_streaming_response.compact() as http_response: + assert not http_response.is_closed + assert http_response.http_request.headers.get("X-Stainless-Lang") == "python" + + response = http_response.parse() + assert_matches_type(CompactedResponse, response, path=["response"]) + + assert cast(Any, http_response.is_closed) is True + @pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"]) def test_parse_method_in_sync(sync: bool, client: OpenAI, async_client: AsyncOpenAI) -> None: @@ -391,7 +427,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn max_output_tokens=0, max_tool_calls=0, metadata={"foo": "string"}, - model="gpt-4o", + model="gpt-5.1", parallel_tool_calls=True, previous_response_id="previous_response_id", prompt={ @@ -472,7 +508,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn max_output_tokens=0, max_tool_calls=0, metadata={"foo": "string"}, - model="gpt-4o", + model="gpt-5.1", parallel_tool_calls=True, previous_response_id="previous_response_id", prompt={ @@ -712,3 +748,38 @@ async def test_path_params_cancel(self, async_client: AsyncOpenAI) -> None: await async_client.responses.with_raw_response.cancel( "", ) + + @parametrize + async def test_method_compact(self, async_client: AsyncOpenAI) -> None: + response = await async_client.responses.compact() + assert_matches_type(CompactedResponse, response, path=["response"]) + + @parametrize + async def test_method_compact_with_all_params(self, async_client: AsyncOpenAI) -> None: + response = await async_client.responses.compact( + input="string", + instructions="instructions", + model="gpt-5.1", + previous_response_id="resp_123", + ) + assert_matches_type(CompactedResponse, response, path=["response"]) + + @parametrize + async def test_raw_response_compact(self, async_client: AsyncOpenAI) -> None: + http_response = await async_client.responses.with_raw_response.compact() + + assert http_response.is_closed is True + assert http_response.http_request.headers.get("X-Stainless-Lang") == "python" + response = http_response.parse() + assert_matches_type(CompactedResponse, response, path=["response"]) + + @parametrize + async def test_streaming_response_compact(self, async_client: AsyncOpenAI) -> None: + async with async_client.responses.with_streaming_response.compact() as http_response: + assert not http_response.is_closed + assert http_response.http_request.headers.get("X-Stainless-Lang") == "python" + + response = await http_response.parse() + assert_matches_type(CompactedResponse, response, path=["response"]) + + assert cast(Any, http_response.is_closed) is True