diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 108509ed29..427b8ec423 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "2.8.1"
+  ".": "2.9.0"
 }
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index fe1a09be6b..7adb61ca2e 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 136
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-a7e92d12ebe89ca019a7ac5b29759064eefa2c38fe08d03516f2620e66abb32b.yml
-openapi_spec_hash: acbc703b2739447abc6312b2d753631c
-config_hash: b876221dfb213df9f0a999e75d38a65e
+configured_endpoints: 137
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-fe8a79e6fd407e6c9afec60971f03076b65f711ccd6ea16457933b0e24fb1f6d.yml
+openapi_spec_hash: 38c0a73f4e08843732c5f8002a809104
+config_hash: 2c350086d87a4b4532077363087840e7
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1bfa59348f..6de78290fc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,28 @@
 # Changelog
 
+## 2.9.0 (2025-12-04)
+
+Full Changelog: [v2.8.1...v2.9.0](https://github.com/openai/openai-python/compare/v2.8.1...v2.9.0)
+
+### Features
+
+* **api:** gpt-5.1-codex-max and responses/compact ([22f646e](https://github.com/openai/openai-python/commit/22f646e985b7c93782cf695edbe643844cae7017))
+
+
+### Bug Fixes
+
+* **client:** avoid mutating user-provided response config object ([#2700](https://github.com/openai/openai-python/issues/2700)) ([e040d22](https://github.com/openai/openai-python/commit/e040d22c2df068e908f69dc6b892e7f8b3fe6e99))
+* ensure streams are always closed ([0b1a27f](https://github.com/openai/openai-python/commit/0b1a27f08639d14dfe40bf80b48e2b8a1a51593c))
+* **streaming:** correct indentation ([575bbac](https://github.com/openai/openai-python/commit/575bbac13b3a57731a4e07b67636ae94463d43fa))
+
+
+### Chores
+
+* **deps:** mypy 1.18.1 has a regression, pin to 1.17 ([22cd586](https://github.com/openai/openai-python/commit/22cd586dbd5484b47f625da55db697691116b22b))
+* **docs:** use environment variables for authentication in code snippets ([c2a3cd5](https://github.com/openai/openai-python/commit/c2a3cd502bfb03f68f62f50aed15a40458c0996e))
+* **internal:** codegen related update ([307a066](https://github.com/openai/openai-python/commit/307a0664383b9d1d4151bc1a05a78c4fdcdcc9b0))
+* update lockfile ([b4109c5](https://github.com/openai/openai-python/commit/b4109c5fcf971ccfb25b4bdaef0bf36999f9eca5))
+
 ## 2.8.1 (2025-11-17)
 
 Full Changelog: [v2.8.0...v2.8.1](https://github.com/openai/openai-python/compare/v2.8.0...v2.8.1)
diff --git a/README.md b/README.md
index 470707e1f3..b8050a4cd6 100644
--- a/README.md
+++ b/README.md
@@ -160,6 +160,7 @@ pip install openai[aiohttp]
 Then you can enable it by instantiating the client with `http_client=DefaultAioHttpClient()`:
 
 ```python
+import os
 import asyncio
 from openai import DefaultAioHttpClient
 from openai import AsyncOpenAI
@@ -167,7 +168,7 @@ from openai import AsyncOpenAI
 
 async def main() -> None:
     async with AsyncOpenAI(
-        api_key="My API Key",
+        api_key=os.environ.get("OPENAI_API_KEY"),  # This is the default and can be omitted
         http_client=DefaultAioHttpClient(),
     ) as client:
         chat_completion = await client.chat.completions.create(
diff --git a/api.md b/api.md
index 28ee551af3..3807603206 100644
--- a/api.md
+++ b/api.md
@@ -733,6 +733,7 @@ Types:
 ```python
 from openai.types.responses import (
     ApplyPatchTool,
+    CompactedResponse,
     ComputerTool,
     CustomTool,
     EasyInputMessage,
@@ -752,6 +753,8 @@ from openai.types.responses import (
     ResponseCodeInterpreterCallInProgressEvent,
     ResponseCodeInterpreterCallInterpretingEvent,
     ResponseCodeInterpreterToolCall,
+    ResponseCompactionItem,
+    ResponseCompactionItemParam,
     ResponseCompletedEvent,
     ResponseComputerToolCall,
     ResponseComputerToolCallOutputItem,
@@ -861,6 +864,7 @@ Methods:
 - <code title="get /responses/{response_id}">client.responses.<a href="./src/openai/resources/responses/responses.py">retrieve</a>(response_id, \*\*<a href="src/openai/types/responses/response_retrieve_params.py">params</a>) -> <a href="./src/openai/types/responses/response.py">Response</a></code>
 - <code title="delete /responses/{response_id}">client.responses.<a href="./src/openai/resources/responses/responses.py">delete</a>(response_id) -> None</code>
 - <code title="post /responses/{response_id}/cancel">client.responses.<a href="./src/openai/resources/responses/responses.py">cancel</a>(response_id) -> <a href="./src/openai/types/responses/response.py">Response</a></code>
+- <code title="post /responses/compact">client.responses.<a href="./src/openai/resources/responses/responses.py">compact</a>(\*\*<a href="src/openai/types/responses/response_compact_params.py">params</a>) -> <a href="./src/openai/types/responses/compacted_response.py">CompactedResponse</a></code>
 
 ## InputItems
 
@@ -914,6 +918,7 @@ from openai.types.realtime import (
     InputAudioBufferClearedEvent,
     InputAudioBufferCommitEvent,
     InputAudioBufferCommittedEvent,
+    InputAudioBufferDtmfEventReceivedEvent,
     InputAudioBufferSpeechStartedEvent,
     InputAudioBufferSpeechStoppedEvent,
     InputAudioBufferTimeoutTriggered,
diff --git a/pyproject.toml b/pyproject.toml
index 75118d46be..4735412341 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,22 +1,24 @@
 [project]
 name = "openai"
-version = "2.8.1"
+version = "2.9.0"
 description = "The official Python library for the openai API"
 dynamic = ["readme"]
 license = "Apache-2.0"
 authors = [
 { name = "OpenAI", email = "support@openai.com" },
 ]
+
 dependencies = [
-    "httpx>=0.23.0, <1",
-    "pydantic>=1.9.0, <3",
+  "httpx>=0.23.0, <1",
+  "pydantic>=1.9.0, <3",
     "typing-extensions>=4.11, <5",
-    "anyio>=3.5.0, <5",
-    "distro>=1.7.0, <2",
-    "sniffio",
+  "anyio>=3.5.0, <5",
+  "distro>=1.7.0, <2",
+  "sniffio",
     "tqdm > 4",
     "jiter>=0.10.0, <1",
 ]
+
 requires-python = ">= 3.9"
 classifiers = [
   "Typing :: Typed",
@@ -26,6 +28,7 @@ classifiers = [
   "Programming Language :: Python :: 3.11",
   "Programming Language :: Python :: 3.12",
   "Programming Language :: Python :: 3.13",
+  "Programming Language :: Python :: 3.14",
   "Operating System :: OS Independent",
   "Operating System :: POSIX",
   "Operating System :: MacOS",
@@ -53,7 +56,7 @@ managed = true
 # version pins are in requirements-dev.lock
 dev-dependencies = [
     "pyright==1.1.399",
-    "mypy",
+    "mypy==1.17",
     "respx",
     "pytest",
     "pytest-asyncio",
diff --git a/requirements-dev.lock b/requirements-dev.lock
index b454537b96..a7201a127b 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -12,65 +12,70 @@
 -e file:.
 aiohappyeyeballs==2.6.1
     # via aiohttp
-aiohttp==3.12.13
+aiohttp==3.13.2
     # via httpx-aiohttp
     # via openai
-aiosignal==1.3.2
+aiosignal==1.4.0
     # via aiohttp
-annotated-types==0.6.0
+annotated-types==0.7.0
     # via pydantic
-anyio==4.1.0
+anyio==4.12.0
     # via httpx
     # via openai
-argcomplete==3.1.2
+argcomplete==3.6.3
     # via nox
-asttokens==2.4.1
+asttokens==3.0.1
     # via inline-snapshot
 async-timeout==5.0.1
     # via aiohttp
-attrs==24.2.0
+attrs==25.4.0
     # via aiohttp
+    # via nox
     # via outcome
     # via trio
-azure-core==1.31.0
+azure-core==1.36.0
     # via azure-identity
-azure-identity==1.19.0
-certifi==2023.7.22
+azure-identity==1.25.1
+backports-asyncio-runner==1.2.0
+    # via pytest-asyncio
+certifi==2025.11.12
     # via httpcore
     # via httpx
     # via requests
-cffi==1.16.0
+cffi==2.0.0
     # via cryptography
     # via sounddevice
-charset-normalizer==3.3.2
+charset-normalizer==3.4.4
     # via requests
 colorama==0.4.6
     # via griffe
-colorlog==6.7.0
+colorlog==6.10.1
     # via nox
-cryptography==42.0.7
+cryptography==46.0.3
     # via azure-identity
     # via msal
     # via pyjwt
-dirty-equals==0.6.0
-distlib==0.3.7
+dependency-groups==1.3.1
+    # via nox
+dirty-equals==0.11
+distlib==0.4.0
     # via virtualenv
-distro==1.8.0
+distro==1.9.0
     # via openai
-exceptiongroup==1.2.2
+exceptiongroup==1.3.1
     # via anyio
     # via pytest
     # via trio
-execnet==2.1.1
+execnet==2.1.2
     # via pytest-xdist
-executing==2.2.0
+executing==2.2.1
     # via inline-snapshot
-filelock==3.12.4
+filelock==3.19.1
     # via virtualenv
-frozenlist==1.7.0
+frozenlist==1.8.0
     # via aiohttp
     # via aiosignal
-griffe==1.13.0
+griffe==1.14.0
 h11==0.16.0
     # via httpcore
 httpcore==1.0.9
@@ -81,137 +86,145 @@ httpx==0.28.1
     # via respx
 httpx-aiohttp==0.1.9
     # via openai
-idna==3.4
+humanize==4.13.0
+    # via nox
+idna==3.11
     # via anyio
     # via httpx
     # via requests
     # via trio
     # via yarl
-importlib-metadata==7.0.0
-iniconfig==2.0.0
+importlib-metadata==8.7.0
+iniconfig==2.1.0
     # via pytest
-inline-snapshot==0.28.0
-jiter==0.11.0
+inline-snapshot==0.31.1
+jiter==0.12.0
     # via openai
 markdown-it-py==3.0.0
     # via rich
 mdurl==0.1.2
     # via markdown-it-py
-msal==1.31.0
+msal==1.34.0
     # via azure-identity
     # via msal-extensions
-msal-extensions==1.2.0
+msal-extensions==1.3.1
     # via azure-identity
-multidict==6.5.0
+multidict==6.7.0
     # via aiohttp
     # via yarl
-mypy==1.14.1
-mypy-extensions==1.0.0
+mypy==1.17.0
+mypy-extensions==1.1.0
     # via mypy
 nest-asyncio==1.6.0
-nodeenv==1.8.0
+nodeenv==1.9.1
     # via pyright
-nox==2023.4.22
+nox==2025.11.12
 numpy==2.0.2
     # via openai
     # via pandas
     # via pandas-stubs
 outcome==1.3.0.post0
     # via trio
-packaging==23.2
+packaging==25.0
+    # via dependency-groups
     # via nox
     # via pytest
-pandas==2.2.3
+pandas==2.3.3
     # via openai
-pandas-stubs==2.1.4.231227
+pandas-stubs==2.2.2.240807
     # via openai
-platformdirs==3.11.0
+pathspec==0.12.1
+    # via mypy
+platformdirs==4.4.0
     # via virtualenv
-pluggy==1.5.0
+pluggy==1.6.0
     # via pytest
-portalocker==2.10.1
-    # via msal-extensions
-propcache==0.3.2
+propcache==0.4.1
     # via aiohttp
     # via yarl
 pycparser==2.23
     # via cffi
-pydantic==2.11.9
+pydantic==2.12.5
     # via openai
-pydantic-core==2.33.2
+pydantic-core==2.41.5
     # via pydantic
-pygments==2.18.0
+pygments==2.19.2
     # via pytest
     # via rich
-pyjwt==2.8.0
+pyjwt==2.10.1
     # via msal
 pyright==1.1.399
-pytest==8.4.1
+pytest==8.4.2
     # via inline-snapshot
     # via pytest-asyncio
     # via pytest-xdist
-pytest-asyncio==0.24.0
-pytest-xdist==3.7.0
-python-dateutil==2.8.2
+pytest-asyncio==1.2.0
+pytest-xdist==3.8.0
+python-dateutil==2.9.0.post0
     # via pandas
     # via time-machine
-pytz==2023.3.post1
-    # via dirty-equals
+pytz==2025.2
     # via pandas
-requests==2.31.0
+requests==2.32.5
     # via azure-core
     # via msal
 respx==0.22.0
-rich==13.7.1
+rich==14.2.0
     # via inline-snapshot
-ruff==0.9.4
-setuptools==68.2.2
-    # via nodeenv
-six==1.16.0
-    # via asttokens
-    # via azure-core
+ruff==0.14.7
+six==1.17.0
     # via python-dateutil
-sniffio==1.3.0
-    # via anyio
+sniffio==1.3.1
     # via openai
     # via trio
 sortedcontainers==2.4.0
     # via trio
-sounddevice==0.5.1
+sounddevice==0.5.3
     # via openai
-time-machine==2.9.0
-tomli==2.0.2
+time-machine==2.19.0
+tomli==2.3.0
+    # via dependency-groups
     # via inline-snapshot
     # via mypy
+    # via nox
     # via pytest
-tqdm==4.66.5
+tqdm==4.67.1
     # via openai
-trio==0.27.0
-types-pyaudio==0.2.16.20240516
-types-pytz==2024.2.0.20241003
+trio==0.31.0
+types-pyaudio==0.2.16.20250801
+types-pytz==2025.2.0.20251108
     # via pandas-stubs
-types-tqdm==4.66.0.20240417
-typing-extensions==4.12.2
+types-requests==2.32.4.20250913
+    # via types-tqdm
+types-tqdm==4.67.0.20250809
+typing-extensions==4.15.0
+    # via aiosignal
+    # via anyio
     # via azure-core
     # via azure-identity
+    # via cryptography
+    # via exceptiongroup
     # via multidict
     # via mypy
     # via openai
     # via pydantic
     # via pydantic-core
     # via pyright
+    # via pytest-asyncio
     # via typing-inspection
-typing-inspection==0.4.1
+    # via virtualenv
+typing-inspection==0.4.2
     # via pydantic
-tzdata==2024.1
+tzdata==2025.2
     # via pandas
-urllib3==2.2.1
+urllib3==2.5.0
     # via requests
-virtualenv==20.24.5
+    # via types-requests
+virtualenv==20.35.4
     # via nox
 websockets==15.0.1
     # via openai
-yarl==1.20.1
+yarl==1.22.0
     # via aiohttp
-zipp==3.17.0
+zipp==3.23.0
     # via importlib-metadata
diff --git a/requirements.lock b/requirements.lock
index b047cb3f88..8e021bd69b 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -12,30 +12,30 @@
 -e file:.
 aiohappyeyeballs==2.6.1
     # via aiohttp
-aiohttp==3.12.13
+aiohttp==3.13.2
     # via httpx-aiohttp
     # via openai
-aiosignal==1.3.2
+aiosignal==1.4.0
     # via aiohttp
-annotated-types==0.6.0
+annotated-types==0.7.0
     # via pydantic
-anyio==4.1.0
+anyio==4.12.0
     # via httpx
     # via openai
 async-timeout==5.0.1
     # via aiohttp
-attrs==25.3.0
+attrs==25.4.0
     # via aiohttp
-certifi==2023.7.22
+certifi==2025.11.12
     # via httpcore
     # via httpx
-cffi==1.17.1
+cffi==2.0.0
     # via sounddevice
-distro==1.8.0
+distro==1.9.0
     # via openai
-exceptiongroup==1.2.2
+exceptiongroup==1.3.1
     # via anyio
-frozenlist==1.7.0
+frozenlist==1.8.0
     # via aiohttp
     # via aiosignal
 h11==0.16.0
@@ -47,58 +47,60 @@ httpx==0.28.1
     # via openai
 httpx-aiohttp==0.1.9
     # via openai
-idna==3.4
+idna==3.11
     # via anyio
     # via httpx
     # via yarl
-jiter==0.11.0
+jiter==0.12.0
     # via openai
-multidict==6.5.0
+multidict==6.7.0
     # via aiohttp
     # via yarl
 numpy==2.0.2
     # via openai
     # via pandas
     # via pandas-stubs
-pandas==2.2.3
+pandas==2.3.3
     # via openai
 pandas-stubs==2.2.2.240807
     # via openai
-propcache==0.3.2
+propcache==0.4.1
     # via aiohttp
     # via yarl
 pycparser==2.23
     # via cffi
-pydantic==2.11.9
+pydantic==2.12.5
     # via openai
-pydantic-core==2.33.2
+pydantic-core==2.41.5
     # via pydantic
 python-dateutil==2.9.0.post0
     # via pandas
-pytz==2024.1
+pytz==2025.2
     # via pandas
-six==1.16.0
+six==1.17.0
     # via python-dateutil
-sniffio==1.3.0
-    # via anyio
+sniffio==1.3.1
     # via openai
-sounddevice==0.5.1
+sounddevice==0.5.3
     # via openai
-tqdm==4.66.5
+tqdm==4.67.1
     # via openai
-types-pytz==2024.2.0.20241003
+types-pytz==2025.2.0.20251108
     # via pandas-stubs
-typing-extensions==4.12.2
+typing-extensions==4.15.0
+    # via aiosignal
+    # via anyio
+    # via exceptiongroup
     # via multidict
     # via openai
     # via pydantic
     # via pydantic-core
     # via typing-inspection
-typing-inspection==0.4.1
+typing-inspection==0.4.2
     # via pydantic
 tzdata==2025.2
     # via pandas
 websockets==15.0.1
     # via openai
-yarl==1.20.1
+yarl==1.22.0
     # via aiohttp
diff --git a/src/openai/_streaming.py b/src/openai/_streaming.py
index 05c284a2be..61a742668a 100644
--- a/src/openai/_streaming.py
+++ b/src/openai/_streaming.py
@@ -55,49 +55,51 @@ def __stream__(self) -> Iterator[_T]:
         process_data = self._client._process_response_data
         iterator = self._iter_events()
 
-        for sse in iterator:
-            if sse.data.startswith("[DONE]"):
-                break
-
-            # we have to special case the Assistants `thread.` events since we won't have an "event" key in the data
-            if sse.event and sse.event.startswith("thread."):
-                data = sse.json()
-
-                if sse.event == "error" and is_mapping(data) and data.get("error"):
-                    message = None
-                    error = data.get("error")
-                    if is_mapping(error):
-                        message = error.get("message")
-                    if not message or not isinstance(message, str):
-                        message = "An error occurred during streaming"
-
-                    raise APIError(
-                        message=message,
-                        request=self.response.request,
-                        body=data["error"],
-                    )
-
-                yield process_data(data={"data": data, "event": sse.event}, cast_to=cast_to, response=response)
-            else:
-                data = sse.json()
-                if is_mapping(data) and data.get("error"):
-                    message = None
-                    error = data.get("error")
-                    if is_mapping(error):
-                        message = error.get("message")
-                    if not message or not isinstance(message, str):
-                        message = "An error occurred during streaming"
-
-                    raise APIError(
-                        message=message,
-                        request=self.response.request,
-                        body=data["error"],
-                    )
-
-                yield process_data(data=data, cast_to=cast_to, response=response)
-
-        # As we might not fully consume the response stream, we need to close it explicitly
-        response.close()
+        try:
+            for sse in iterator:
+                if sse.data.startswith("[DONE]"):
+                    break
+
+                # we have to special case the Assistants `thread.` events since we won't have an "event" key in the data
+                if sse.event and sse.event.startswith("thread."):
+                    data = sse.json()
+
+                    if sse.event == "error" and is_mapping(data) and data.get("error"):
+                        message = None
+                        error = data.get("error")
+                        if is_mapping(error):
+                            message = error.get("message")
+                        if not message or not isinstance(message, str):
+                            message = "An error occurred during streaming"
+
+                        raise APIError(
+                            message=message,
+                            request=self.response.request,
+                            body=data["error"],
+                        )
+
+                    yield process_data(data={"data": data, "event": sse.event}, cast_to=cast_to, response=response)
+                else:
+                    data = sse.json()
+                    if is_mapping(data) and data.get("error"):
+                        message = None
+                        error = data.get("error")
+                        if is_mapping(error):
+                            message = error.get("message")
+                        if not message or not isinstance(message, str):
+                            message = "An error occurred during streaming"
+
+                        raise APIError(
+                            message=message,
+                            request=self.response.request,
+                            body=data["error"],
+                        )
+
+                    yield process_data(data=data, cast_to=cast_to, response=response)
+
+        finally:
+            # Ensure the response is closed even if the consumer doesn't read all data
+            response.close()
 
     def __enter__(self) -> Self:
         return self
@@ -156,49 +158,51 @@ async def __stream__(self) -> AsyncIterator[_T]:
         process_data = self._client._process_response_data
         iterator = self._iter_events()
 
-        async for sse in iterator:
-            if sse.data.startswith("[DONE]"):
-                break
-
-            # we have to special case the Assistants `thread.` events since we won't have an "event" key in the data
-            if sse.event and sse.event.startswith("thread."):
-                data = sse.json()
-
-                if sse.event == "error" and is_mapping(data) and data.get("error"):
-                    message = None
-                    error = data.get("error")
-                    if is_mapping(error):
-                        message = error.get("message")
-                    if not message or not isinstance(message, str):
-                        message = "An error occurred during streaming"
-
-                    raise APIError(
-                        message=message,
-                        request=self.response.request,
-                        body=data["error"],
-                    )
-
-                yield process_data(data={"data": data, "event": sse.event}, cast_to=cast_to, response=response)
-            else:
-                data = sse.json()
-                if is_mapping(data) and data.get("error"):
-                    message = None
-                    error = data.get("error")
-                    if is_mapping(error):
-                        message = error.get("message")
-                    if not message or not isinstance(message, str):
-                        message = "An error occurred during streaming"
-
-                    raise APIError(
-                        message=message,
-                        request=self.response.request,
-                        body=data["error"],
-                    )
-
-                yield process_data(data=data, cast_to=cast_to, response=response)
-
-        # As we might not fully consume the response stream, we need to close it explicitly
-        await response.aclose()
+        try:
+            async for sse in iterator:
+                if sse.data.startswith("[DONE]"):
+                    break
+
+                # we have to special case the Assistants `thread.` events since we won't have an "event" key in the data
+                if sse.event and sse.event.startswith("thread."):
+                    data = sse.json()
+
+                    if sse.event == "error" and is_mapping(data) and data.get("error"):
+                        message = None
+                        error = data.get("error")
+                        if is_mapping(error):
+                            message = error.get("message")
+                        if not message or not isinstance(message, str):
+                            message = "An error occurred during streaming"
+
+                        raise APIError(
+                            message=message,
+                            request=self.response.request,
+                            body=data["error"],
+                        )
+
+                    yield process_data(data={"data": data, "event": sse.event}, cast_to=cast_to, response=response)
+                else:
+                    data = sse.json()
+                    if is_mapping(data) and data.get("error"):
+                        message = None
+                        error = data.get("error")
+                        if is_mapping(error):
+                            message = error.get("message")
+                        if not message or not isinstance(message, str):
+                            message = "An error occurred during streaming"
+
+                        raise APIError(
+                            message=message,
+                            request=self.response.request,
+                            body=data["error"],
+                        )
+
+                    yield process_data(data=data, cast_to=cast_to, response=response)
+
+        finally:
+            # Ensure the response is closed even if the consumer doesn't read all data
+            await response.aclose()
 
     async def __aenter__(self) -> Self:
         return self
diff --git a/src/openai/_version.py b/src/openai/_version.py
index 6109cebf91..e5ddb8f4eb 100644
--- a/src/openai/_version.py
+++ b/src/openai/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "openai"
-__version__ = "2.8.1"  # x-release-please-version
+__version__ = "2.9.0"  # x-release-please-version
diff --git a/src/openai/lib/_parsing/_responses.py b/src/openai/lib/_parsing/_responses.py
index 4d7b0b6224..4bed171df7 100644
--- a/src/openai/lib/_parsing/_responses.py
+++ b/src/openai/lib/_parsing/_responses.py
@@ -103,6 +103,7 @@ def parse_response(
             or output.type == "file_search_call"
             or output.type == "web_search_call"
             or output.type == "reasoning"
+            or output.type == "compaction"
             or output.type == "mcp_call"
             or output.type == "mcp_approval_request"
             or output.type == "image_generation_call"
diff --git a/src/openai/resources/beta/assistants.py b/src/openai/resources/beta/assistants.py
index e4ec1dca11..aa1f9f9b48 100644
--- a/src/openai/resources/beta/assistants.py
+++ b/src/openai/resources/beta/assistants.py
@@ -98,9 +98,9 @@ def create(
 
           reasoning_effort: Constrains effort on reasoning for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-              supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-              reasoning effort can result in faster responses and fewer tokens used on
-              reasoning in a response.
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
 
               - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
                 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -108,6 +108,7 @@ def create(
               - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
                 support `none`.
               - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -312,9 +313,9 @@ def update(
 
           reasoning_effort: Constrains effort on reasoning for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-              supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-              reasoning effort can result in faster responses and fewer tokens used on
-              reasoning in a response.
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
 
               - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
                 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -322,6 +323,7 @@ def update(
               - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
                 support `none`.
               - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -565,9 +567,9 @@ async def create(
 
           reasoning_effort: Constrains effort on reasoning for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-              supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-              reasoning effort can result in faster responses and fewer tokens used on
-              reasoning in a response.
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
 
               - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
                 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -575,6 +577,7 @@ async def create(
               - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
                 support `none`.
               - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -779,9 +782,9 @@ async def update(
 
           reasoning_effort: Constrains effort on reasoning for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-              supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-              reasoning effort can result in faster responses and fewer tokens used on
-              reasoning in a response.
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
 
               - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
                 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -789,6 +792,7 @@ async def update(
               - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
                 support `none`.
               - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
diff --git a/src/openai/resources/beta/threads/runs/runs.py b/src/openai/resources/beta/threads/runs/runs.py
index d7445d52b5..9b6cb3f752 100644
--- a/src/openai/resources/beta/threads/runs/runs.py
+++ b/src/openai/resources/beta/threads/runs/runs.py
@@ -169,9 +169,9 @@ def create(
 
           reasoning_effort: Constrains effort on reasoning for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-              supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-              reasoning effort can result in faster responses and fewer tokens used on
-              reasoning in a response.
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
 
               - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
                 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -179,6 +179,7 @@ def create(
               - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
                 support `none`.
               - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -330,9 +331,9 @@ def create(
 
           reasoning_effort: Constrains effort on reasoning for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-              supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-              reasoning effort can result in faster responses and fewer tokens used on
-              reasoning in a response.
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
 
               - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
                 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -340,6 +341,7 @@ def create(
               - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
                 support `none`.
               - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -487,9 +489,9 @@ def create(
 
           reasoning_effort: Constrains effort on reasoning for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-              supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-              reasoning effort can result in faster responses and fewer tokens used on
-              reasoning in a response.
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
 
               - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
                 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -497,6 +499,7 @@ def create(
               - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
                 support `none`.
               - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -1620,9 +1623,9 @@ async def create(
 
           reasoning_effort: Constrains effort on reasoning for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-              supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-              reasoning effort can result in faster responses and fewer tokens used on
-              reasoning in a response.
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
 
               - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
                 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -1630,6 +1633,7 @@ async def create(
               - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
                 support `none`.
               - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -1781,9 +1785,9 @@ async def create(
 
           reasoning_effort: Constrains effort on reasoning for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-              supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-              reasoning effort can result in faster responses and fewer tokens used on
-              reasoning in a response.
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
 
               - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
                 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -1791,6 +1795,7 @@ async def create(
               - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
                 support `none`.
               - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
@@ -1938,9 +1943,9 @@ async def create(
 
           reasoning_effort: Constrains effort on reasoning for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-              supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-              reasoning effort can result in faster responses and fewer tokens used on
-              reasoning in a response.
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
 
               - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
                 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -1948,6 +1953,7 @@ async def create(
               - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
                 support `none`.
               - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
diff --git a/src/openai/resources/chat/completions/completions.py b/src/openai/resources/chat/completions/completions.py
index c205011d10..3f2732a608 100644
--- a/src/openai/resources/chat/completions/completions.py
+++ b/src/openai/resources/chat/completions/completions.py
@@ -411,9 +411,9 @@ def create(
 
           reasoning_effort: Constrains effort on reasoning for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-              supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-              reasoning effort can result in faster responses and fewer tokens used on
-              reasoning in a response.
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
 
               - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
                 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -421,6 +421,7 @@ def create(
               - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
                 support `none`.
               - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 
           response_format: An object specifying the format that the model must output.
 
@@ -721,9 +722,9 @@ def create(
 
           reasoning_effort: Constrains effort on reasoning for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-              supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-              reasoning effort can result in faster responses and fewer tokens used on
-              reasoning in a response.
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
 
               - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
                 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -731,6 +732,7 @@ def create(
               - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
                 support `none`.
               - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 
           response_format: An object specifying the format that the model must output.
 
@@ -1022,9 +1024,9 @@ def create(
 
           reasoning_effort: Constrains effort on reasoning for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-              supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-              reasoning effort can result in faster responses and fewer tokens used on
-              reasoning in a response.
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
 
               - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
                 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -1032,6 +1034,7 @@ def create(
               - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
                 support `none`.
               - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 
           response_format: An object specifying the format that the model must output.
 
@@ -1894,9 +1897,9 @@ async def create(
 
           reasoning_effort: Constrains effort on reasoning for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-              supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-              reasoning effort can result in faster responses and fewer tokens used on
-              reasoning in a response.
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
 
               - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
                 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -1904,6 +1907,7 @@ async def create(
               - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
                 support `none`.
               - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 
           response_format: An object specifying the format that the model must output.
 
@@ -2204,9 +2208,9 @@ async def create(
 
           reasoning_effort: Constrains effort on reasoning for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-              supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-              reasoning effort can result in faster responses and fewer tokens used on
-              reasoning in a response.
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
 
               - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
                 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -2214,6 +2218,7 @@ async def create(
               - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
                 support `none`.
               - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 
           response_format: An object specifying the format that the model must output.
 
@@ -2505,9 +2510,9 @@ async def create(
 
           reasoning_effort: Constrains effort on reasoning for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-              supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-              reasoning effort can result in faster responses and fewer tokens used on
-              reasoning in a response.
+              supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+              Reducing reasoning effort can result in faster responses and fewer tokens used
+              on reasoning in a response.
 
               - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
                 reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -2515,6 +2520,7 @@ async def create(
               - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
                 support `none`.
               - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+              - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
 
           response_format: An object specifying the format that the model must output.
 
diff --git a/src/openai/resources/containers/containers.py b/src/openai/resources/containers/containers.py
index dcdc3e1a3e..0cbb400d4a 100644
--- a/src/openai/resources/containers/containers.py
+++ b/src/openai/resources/containers/containers.py
@@ -60,6 +60,7 @@ def create(
         name: str,
         expires_after: container_create_params.ExpiresAfter | Omit = omit,
         file_ids: SequenceNotStr[str] | Omit = omit,
+        memory_limit: Literal["1g", "4g", "16g", "64g"] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -77,6 +78,8 @@ def create(
 
           file_ids: IDs of files to copy to the container.
 
+          memory_limit: Optional memory limit for the container. Defaults to "1g".
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -92,6 +95,7 @@ def create(
                     "name": name,
                     "expires_after": expires_after,
                     "file_ids": file_ids,
+                    "memory_limit": memory_limit,
                 },
                 container_create_params.ContainerCreateParams,
             ),
@@ -256,6 +260,7 @@ async def create(
         name: str,
         expires_after: container_create_params.ExpiresAfter | Omit = omit,
         file_ids: SequenceNotStr[str] | Omit = omit,
+        memory_limit: Literal["1g", "4g", "16g", "64g"] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -273,6 +278,8 @@ async def create(
 
           file_ids: IDs of files to copy to the container.
 
+          memory_limit: Optional memory limit for the container. Defaults to "1g".
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -288,6 +295,7 @@ async def create(
                     "name": name,
                     "expires_after": expires_after,
                     "file_ids": file_ids,
+                    "memory_limit": memory_limit,
                 },
                 container_create_params.ContainerCreateParams,
             ),
diff --git a/src/openai/resources/realtime/calls.py b/src/openai/resources/realtime/calls.py
index 7d2c92fe86..cdea492d95 100644
--- a/src/openai/resources/realtime/calls.py
+++ b/src/openai/resources/realtime/calls.py
@@ -199,15 +199,20 @@ def accept(
               limit, the conversation be truncated, meaning messages (starting from the
               oldest) will not be included in the model's context. A 32k context model with
               4,096 max output tokens can only include 28,224 tokens in the context before
-              truncation occurs. Clients can configure truncation behavior to truncate with a
-              lower max token limit, which is an effective way to control token usage and
-              cost. Truncation will reduce the number of cached tokens on the next turn
-              (busting the cache), since messages are dropped from the beginning of the
-              context. However, clients can also configure truncation to retain messages up to
-              a fraction of the maximum context size, which will reduce the need for future
-              truncations and thus improve the cache rate. Truncation can be disabled
-              entirely, which means the server will never truncate but would instead return an
-              error if the conversation exceeds the model's input token limit.
+              truncation occurs.
+
+              Clients can configure truncation behavior to truncate with a lower max token
+              limit, which is an effective way to control token usage and cost.
+
+              Truncation will reduce the number of cached tokens on the next turn (busting the
+              cache), since messages are dropped from the beginning of the context. However,
+              clients can also configure truncation to retain messages up to a fraction of the
+              maximum context size, which will reduce the need for future truncations and thus
+              improve the cache rate.
+
+              Truncation can be disabled entirely, which means the server will never truncate
+              but would instead return an error if the conversation exceeds the model's input
+              token limit.
 
           extra_headers: Send extra headers
 
@@ -519,15 +524,20 @@ async def accept(
               limit, the conversation be truncated, meaning messages (starting from the
               oldest) will not be included in the model's context. A 32k context model with
               4,096 max output tokens can only include 28,224 tokens in the context before
-              truncation occurs. Clients can configure truncation behavior to truncate with a
-              lower max token limit, which is an effective way to control token usage and
-              cost. Truncation will reduce the number of cached tokens on the next turn
-              (busting the cache), since messages are dropped from the beginning of the
-              context. However, clients can also configure truncation to retain messages up to
-              a fraction of the maximum context size, which will reduce the need for future
-              truncations and thus improve the cache rate. Truncation can be disabled
-              entirely, which means the server will never truncate but would instead return an
-              error if the conversation exceeds the model's input token limit.
+              truncation occurs.
+
+              Clients can configure truncation behavior to truncate with a lower max token
+              limit, which is an effective way to control token usage and cost.
+
+              Truncation will reduce the number of cached tokens on the next turn (busting the
+              cache), since messages are dropped from the beginning of the context. However,
+              clients can also configure truncation to retain messages up to a fraction of the
+              maximum context size, which will reduce the need for future truncations and thus
+              improve the cache rate.
+
+              Truncation can be disabled entirely, which means the server will never truncate
+              but would instead return an error if the conversation exceeds the model's input
+              token limit.
 
           extra_headers: Send extra headers
 
diff --git a/src/openai/resources/realtime/realtime.py b/src/openai/resources/realtime/realtime.py
index 6e69258616..33caba1871 100644
--- a/src/openai/resources/realtime/realtime.py
+++ b/src/openai/resources/realtime/realtime.py
@@ -829,7 +829,7 @@ def retrieve(self, *, item_id: str, event_id: str | Omit = omit) -> None:
 
 class RealtimeOutputAudioBufferResource(BaseRealtimeConnectionResource):
     def clear(self, *, event_id: str | Omit = omit) -> None:
-        """**WebRTC Only:** Emit to cut off the current audio response.
+        """**WebRTC/SIP Only:** Emit to cut off the current audio response.
 
         This will trigger the server to
         stop generating audio and emit a `output_audio_buffer.cleared` event. This
@@ -1066,7 +1066,7 @@ async def retrieve(self, *, item_id: str, event_id: str | Omit = omit) -> None:
 
 class AsyncRealtimeOutputAudioBufferResource(BaseAsyncRealtimeConnectionResource):
     async def clear(self, *, event_id: str | Omit = omit) -> None:
-        """**WebRTC Only:** Emit to cut off the current audio response.
+        """**WebRTC/SIP Only:** Emit to cut off the current audio response.
 
         This will trigger the server to
         stop generating audio and emit a `output_audio_buffer.cleared` event. This
diff --git a/src/openai/resources/responses/responses.py b/src/openai/resources/responses/responses.py
index dcf87ba07c..c532fc0bb0 100644
--- a/src/openai/resources/responses/responses.py
+++ b/src/openai/resources/responses/responses.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+from copy import copy
 from typing import Any, List, Type, Union, Iterable, Optional, cast
 from functools import partial
 from typing_extensions import Literal, overload
@@ -33,7 +34,11 @@
     AsyncInputTokensWithStreamingResponse,
 )
 from ..._base_client import make_request_options
-from ...types.responses import response_create_params, response_retrieve_params
+from ...types.responses import (
+    response_create_params,
+    response_compact_params,
+    response_retrieve_params,
+)
 from ...lib._parsing._responses import (
     TextFormatT,
     parse_response,
@@ -45,11 +50,13 @@
 from ...types.shared_params.reasoning import Reasoning
 from ...types.responses.parsed_response import ParsedResponse
 from ...lib.streaming.responses._responses import ResponseStreamManager, AsyncResponseStreamManager
+from ...types.responses.compacted_response import CompactedResponse
 from ...types.responses.response_includable import ResponseIncludable
 from ...types.shared_params.responses_model import ResponsesModel
 from ...types.responses.response_input_param import ResponseInputParam
 from ...types.responses.response_prompt_param import ResponsePromptParam
 from ...types.responses.response_stream_event import ResponseStreamEvent
+from ...types.responses.response_input_item_param import ResponseInputItemParam
 from ...types.responses.response_text_config_param import ResponseTextConfigParam
 
 __all__ = ["Responses", "AsyncResponses"]
@@ -1046,6 +1053,7 @@ def stream(
                 if "format" in text:
                     raise TypeError("Cannot mix and match text.format with text_format")
 
+                text = copy(text)
                 text["format"] = _type_to_text_format_param(text_format)
 
             api_request: partial[Stream[ResponseStreamEvent]] = partial(
@@ -1151,7 +1159,7 @@ def parse(
 
             if "format" in text:
                 raise TypeError("Cannot mix and match text.format with text_format")
-
+            text = copy(text)
             text["format"] = _type_to_text_format_param(text_format)
 
         tools = _make_tools(tools)
@@ -1515,6 +1523,154 @@ def cancel(
             cast_to=Response,
         )
 
+    def compact(
+        self,
+        *,
+        input: Union[str, Iterable[ResponseInputItemParam], None] | Omit = omit,
+        instructions: Optional[str] | Omit = omit,
+        model: Union[
+            Literal[
+                "gpt-5.1",
+                "gpt-5.1-2025-11-13",
+                "gpt-5.1-codex",
+                "gpt-5.1-mini",
+                "gpt-5.1-chat-latest",
+                "gpt-5",
+                "gpt-5-mini",
+                "gpt-5-nano",
+                "gpt-5-2025-08-07",
+                "gpt-5-mini-2025-08-07",
+                "gpt-5-nano-2025-08-07",
+                "gpt-5-chat-latest",
+                "gpt-4.1",
+                "gpt-4.1-mini",
+                "gpt-4.1-nano",
+                "gpt-4.1-2025-04-14",
+                "gpt-4.1-mini-2025-04-14",
+                "gpt-4.1-nano-2025-04-14",
+                "o4-mini",
+                "o4-mini-2025-04-16",
+                "o3",
+                "o3-2025-04-16",
+                "o3-mini",
+                "o3-mini-2025-01-31",
+                "o1",
+                "o1-2024-12-17",
+                "o1-preview",
+                "o1-preview-2024-09-12",
+                "o1-mini",
+                "o1-mini-2024-09-12",
+                "gpt-4o",
+                "gpt-4o-2024-11-20",
+                "gpt-4o-2024-08-06",
+                "gpt-4o-2024-05-13",
+                "gpt-4o-audio-preview",
+                "gpt-4o-audio-preview-2024-10-01",
+                "gpt-4o-audio-preview-2024-12-17",
+                "gpt-4o-audio-preview-2025-06-03",
+                "gpt-4o-mini-audio-preview",
+                "gpt-4o-mini-audio-preview-2024-12-17",
+                "gpt-4o-search-preview",
+                "gpt-4o-mini-search-preview",
+                "gpt-4o-search-preview-2025-03-11",
+                "gpt-4o-mini-search-preview-2025-03-11",
+                "chatgpt-4o-latest",
+                "codex-mini-latest",
+                "gpt-4o-mini",
+                "gpt-4o-mini-2024-07-18",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0301",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+                "o1-pro",
+                "o1-pro-2025-03-19",
+                "o3-pro",
+                "o3-pro-2025-06-10",
+                "o3-deep-research",
+                "o3-deep-research-2025-06-26",
+                "o4-mini-deep-research",
+                "o4-mini-deep-research-2025-06-26",
+                "computer-use-preview",
+                "computer-use-preview-2025-03-11",
+                "gpt-5-codex",
+                "gpt-5-pro",
+                "gpt-5-pro-2025-10-06",
+                "gpt-5.1-codex-max",
+            ],
+            str,
+            None,
+        ]
+        | Omit = omit,
+        previous_response_id: Optional[str] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> CompactedResponse:
+        """
+        Compact conversation
+
+        Args:
+          input: Text, image, or file inputs to the model, used to generate a response
+
+          instructions: A system (or developer) message inserted into the model's context. When used
+              along with `previous_response_id`, the instructions from a previous response
+              will not be carried over to the next response. This makes it simple to swap out
+              system (or developer) messages in new responses.
+
+          model: Model ID used to generate the response, like `gpt-5` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+              Cannot be used in conjunction with `conversation`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/responses/compact",
+            body=maybe_transform(
+                {
+                    "input": input,
+                    "instructions": instructions,
+                    "model": model,
+                    "previous_response_id": previous_response_id,
+                },
+                response_compact_params.ResponseCompactParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=CompactedResponse,
+        )
+
 
 class AsyncResponses(AsyncAPIResource):
     @cached_property
@@ -2507,7 +2663,7 @@ def stream(
 
                 if "format" in text:
                     raise TypeError("Cannot mix and match text.format with text_format")
-
+                text = copy(text)
                 text["format"] = _type_to_text_format_param(text_format)
 
             api_request = self.create(
@@ -2617,7 +2773,7 @@ async def parse(
 
             if "format" in text:
                 raise TypeError("Cannot mix and match text.format with text_format")
-
+            text = copy(text)
             text["format"] = _type_to_text_format_param(text_format)
 
         tools = _make_tools(tools)
@@ -2981,6 +3137,154 @@ async def cancel(
             cast_to=Response,
         )
 
+    async def compact(
+        self,
+        *,
+        input: Union[str, Iterable[ResponseInputItemParam], None] | Omit = omit,
+        instructions: Optional[str] | Omit = omit,
+        model: Union[
+            Literal[
+                "gpt-5.1",
+                "gpt-5.1-2025-11-13",
+                "gpt-5.1-codex",
+                "gpt-5.1-mini",
+                "gpt-5.1-chat-latest",
+                "gpt-5",
+                "gpt-5-mini",
+                "gpt-5-nano",
+                "gpt-5-2025-08-07",
+                "gpt-5-mini-2025-08-07",
+                "gpt-5-nano-2025-08-07",
+                "gpt-5-chat-latest",
+                "gpt-4.1",
+                "gpt-4.1-mini",
+                "gpt-4.1-nano",
+                "gpt-4.1-2025-04-14",
+                "gpt-4.1-mini-2025-04-14",
+                "gpt-4.1-nano-2025-04-14",
+                "o4-mini",
+                "o4-mini-2025-04-16",
+                "o3",
+                "o3-2025-04-16",
+                "o3-mini",
+                "o3-mini-2025-01-31",
+                "o1",
+                "o1-2024-12-17",
+                "o1-preview",
+                "o1-preview-2024-09-12",
+                "o1-mini",
+                "o1-mini-2024-09-12",
+                "gpt-4o",
+                "gpt-4o-2024-11-20",
+                "gpt-4o-2024-08-06",
+                "gpt-4o-2024-05-13",
+                "gpt-4o-audio-preview",
+                "gpt-4o-audio-preview-2024-10-01",
+                "gpt-4o-audio-preview-2024-12-17",
+                "gpt-4o-audio-preview-2025-06-03",
+                "gpt-4o-mini-audio-preview",
+                "gpt-4o-mini-audio-preview-2024-12-17",
+                "gpt-4o-search-preview",
+                "gpt-4o-mini-search-preview",
+                "gpt-4o-search-preview-2025-03-11",
+                "gpt-4o-mini-search-preview-2025-03-11",
+                "chatgpt-4o-latest",
+                "codex-mini-latest",
+                "gpt-4o-mini",
+                "gpt-4o-mini-2024-07-18",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0301",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+                "o1-pro",
+                "o1-pro-2025-03-19",
+                "o3-pro",
+                "o3-pro-2025-06-10",
+                "o3-deep-research",
+                "o3-deep-research-2025-06-26",
+                "o4-mini-deep-research",
+                "o4-mini-deep-research-2025-06-26",
+                "computer-use-preview",
+                "computer-use-preview-2025-03-11",
+                "gpt-5-codex",
+                "gpt-5-pro",
+                "gpt-5-pro-2025-10-06",
+                "gpt-5.1-codex-max",
+            ],
+            str,
+            None,
+        ]
+        | Omit = omit,
+        previous_response_id: Optional[str] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> CompactedResponse:
+        """
+        Compact conversation
+
+        Args:
+          input: Text, image, or file inputs to the model, used to generate a response
+
+          instructions: A system (or developer) message inserted into the model's context. When used
+              along with `previous_response_id`, the instructions from a previous response
+              will not be carried over to the next response. This makes it simple to swap out
+              system (or developer) messages in new responses.
+
+          model: Model ID used to generate the response, like `gpt-5` or `o3`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+              Cannot be used in conjunction with `conversation`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/responses/compact",
+            body=await async_maybe_transform(
+                {
+                    "input": input,
+                    "instructions": instructions,
+                    "model": model,
+                    "previous_response_id": previous_response_id,
+                },
+                response_compact_params.ResponseCompactParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=CompactedResponse,
+        )
+
 
 class ResponsesWithRawResponse:
     def __init__(self, responses: Responses) -> None:
@@ -2998,6 +3302,9 @@ def __init__(self, responses: Responses) -> None:
         self.cancel = _legacy_response.to_raw_response_wrapper(
             responses.cancel,
         )
+        self.compact = _legacy_response.to_raw_response_wrapper(
+            responses.compact,
+        )
         self.parse = _legacy_response.to_raw_response_wrapper(
             responses.parse,
         )
@@ -3027,6 +3334,9 @@ def __init__(self, responses: AsyncResponses) -> None:
         self.cancel = _legacy_response.async_to_raw_response_wrapper(
             responses.cancel,
         )
+        self.compact = _legacy_response.async_to_raw_response_wrapper(
+            responses.compact,
+        )
         self.parse = _legacy_response.async_to_raw_response_wrapper(
             responses.parse,
         )
@@ -3056,6 +3366,9 @@ def __init__(self, responses: Responses) -> None:
         self.cancel = to_streamed_response_wrapper(
             responses.cancel,
         )
+        self.compact = to_streamed_response_wrapper(
+            responses.compact,
+        )
 
     @cached_property
     def input_items(self) -> InputItemsWithStreamingResponse:
@@ -3082,6 +3395,9 @@ def __init__(self, responses: AsyncResponses) -> None:
         self.cancel = async_to_streamed_response_wrapper(
             responses.cancel,
         )
+        self.compact = async_to_streamed_response_wrapper(
+            responses.compact,
+        )
 
     @cached_property
     def input_items(self) -> AsyncInputItemsWithStreamingResponse:
diff --git a/src/openai/resources/videos.py b/src/openai/resources/videos.py
index 4df5f02004..727091c607 100644
--- a/src/openai/resources/videos.py
+++ b/src/openai/resources/videos.py
@@ -84,11 +84,13 @@ def create(
 
           input_reference: Optional image reference that guides generation.
 
-          model: The video generation model to use. Defaults to `sora-2`.
+          model: The video generation model to use (allowed values: sora-2, sora-2-pro). Defaults
+              to `sora-2`.
 
-          seconds: Clip duration in seconds. Defaults to 4 seconds.
+          seconds: Clip duration in seconds (allowed values: 4, 8, 12). Defaults to 4 seconds.
 
-          size: Output resolution formatted as width x height. Defaults to 720x1280.
+          size: Output resolution formatted as width x height (allowed values: 720x1280,
+              1280x720, 1024x1792, 1792x1024). Defaults to 720x1280.
 
           extra_headers: Send extra headers
 
@@ -437,11 +439,13 @@ async def create(
 
           input_reference: Optional image reference that guides generation.
 
-          model: The video generation model to use. Defaults to `sora-2`.
+          model: The video generation model to use (allowed values: sora-2, sora-2-pro). Defaults
+              to `sora-2`.
 
-          seconds: Clip duration in seconds. Defaults to 4 seconds.
+          seconds: Clip duration in seconds (allowed values: 4, 8, 12). Defaults to 4 seconds.
 
-          size: Output resolution formatted as width x height. Defaults to 720x1280.
+          size: Output resolution formatted as width x height (allowed values: 720x1280,
+              1280x720, 1024x1792, 1792x1024). Defaults to 720x1280.
 
           extra_headers: Send extra headers
 
diff --git a/src/openai/types/beta/assistant_create_params.py b/src/openai/types/beta/assistant_create_params.py
index 009b0f49e3..38b30f212f 100644
--- a/src/openai/types/beta/assistant_create_params.py
+++ b/src/openai/types/beta/assistant_create_params.py
@@ -62,9 +62,9 @@ class AssistantCreateParams(TypedDict, total=False):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -72,6 +72,7 @@ class AssistantCreateParams(TypedDict, total=False):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     response_format: Optional[AssistantResponseFormatOptionParam]
diff --git a/src/openai/types/beta/assistant_update_params.py b/src/openai/types/beta/assistant_update_params.py
index 432116ad52..8f774c4e6c 100644
--- a/src/openai/types/beta/assistant_update_params.py
+++ b/src/openai/types/beta/assistant_update_params.py
@@ -97,9 +97,9 @@ class AssistantUpdateParams(TypedDict, total=False):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -107,6 +107,7 @@ class AssistantUpdateParams(TypedDict, total=False):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     response_format: Optional[AssistantResponseFormatOptionParam]
diff --git a/src/openai/types/beta/threads/run_create_params.py b/src/openai/types/beta/threads/run_create_params.py
index 74786d7d5c..df789decbc 100644
--- a/src/openai/types/beta/threads/run_create_params.py
+++ b/src/openai/types/beta/threads/run_create_params.py
@@ -111,9 +111,9 @@ class RunCreateParamsBase(TypedDict, total=False):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -121,6 +121,7 @@ class RunCreateParamsBase(TypedDict, total=False):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     response_format: Optional[AssistantResponseFormatOptionParam]
diff --git a/src/openai/types/chat/completion_create_params.py b/src/openai/types/chat/completion_create_params.py
index e02c06cbb0..f2d55f7ec4 100644
--- a/src/openai/types/chat/completion_create_params.py
+++ b/src/openai/types/chat/completion_create_params.py
@@ -197,9 +197,9 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -207,6 +207,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     response_format: ResponseFormat
diff --git a/src/openai/types/container_create_params.py b/src/openai/types/container_create_params.py
index 01a48ac410..d629c24d38 100644
--- a/src/openai/types/container_create_params.py
+++ b/src/openai/types/container_create_params.py
@@ -19,6 +19,9 @@ class ContainerCreateParams(TypedDict, total=False):
     file_ids: SequenceNotStr[str]
     """IDs of files to copy to the container."""
 
+    memory_limit: Literal["1g", "4g", "16g", "64g"]
+    """Optional memory limit for the container. Defaults to "1g"."""
+
 
 class ExpiresAfter(TypedDict, total=False):
     anchor: Required[Literal["last_active_at"]]
diff --git a/src/openai/types/container_create_response.py b/src/openai/types/container_create_response.py
index c0ccc45a1c..cbad914283 100644
--- a/src/openai/types/container_create_response.py
+++ b/src/openai/types/container_create_response.py
@@ -38,3 +38,9 @@ class ContainerCreateResponse(BaseModel):
     point for the expiration. The minutes is the number of minutes after the anchor
     before the container expires.
     """
+
+    last_active_at: Optional[int] = None
+    """Unix timestamp (in seconds) when the container was last active."""
+
+    memory_limit: Optional[Literal["1g", "4g", "16g", "64g"]] = None
+    """The memory limit configured for the container."""
diff --git a/src/openai/types/container_list_response.py b/src/openai/types/container_list_response.py
index 2d9c11d8a4..29416f0941 100644
--- a/src/openai/types/container_list_response.py
+++ b/src/openai/types/container_list_response.py
@@ -38,3 +38,9 @@ class ContainerListResponse(BaseModel):
     point for the expiration. The minutes is the number of minutes after the anchor
     before the container expires.
     """
+
+    last_active_at: Optional[int] = None
+    """Unix timestamp (in seconds) when the container was last active."""
+
+    memory_limit: Optional[Literal["1g", "4g", "16g", "64g"]] = None
+    """The memory limit configured for the container."""
diff --git a/src/openai/types/container_retrieve_response.py b/src/openai/types/container_retrieve_response.py
index eab291b34f..31fedeac64 100644
--- a/src/openai/types/container_retrieve_response.py
+++ b/src/openai/types/container_retrieve_response.py
@@ -38,3 +38,9 @@ class ContainerRetrieveResponse(BaseModel):
     point for the expiration. The minutes is the number of minutes after the anchor
     before the container expires.
     """
+
+    last_active_at: Optional[int] = None
+    """Unix timestamp (in seconds) when the container was last active."""
+
+    memory_limit: Optional[Literal["1g", "4g", "16g", "64g"]] = None
+    """The memory limit configured for the container."""
diff --git a/src/openai/types/evals/create_eval_completions_run_data_source.py b/src/openai/types/evals/create_eval_completions_run_data_source.py
index 742c27a775..4236746a17 100644
--- a/src/openai/types/evals/create_eval_completions_run_data_source.py
+++ b/src/openai/types/evals/create_eval_completions_run_data_source.py
@@ -172,9 +172,9 @@ class SamplingParams(BaseModel):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -182,6 +182,7 @@ class SamplingParams(BaseModel):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     response_format: Optional[SamplingParamsResponseFormat] = None
diff --git a/src/openai/types/evals/create_eval_completions_run_data_source_param.py b/src/openai/types/evals/create_eval_completions_run_data_source_param.py
index 18cd5018b1..751a1432b8 100644
--- a/src/openai/types/evals/create_eval_completions_run_data_source_param.py
+++ b/src/openai/types/evals/create_eval_completions_run_data_source_param.py
@@ -168,9 +168,9 @@ class SamplingParams(TypedDict, total=False):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -178,6 +178,7 @@ class SamplingParams(TypedDict, total=False):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     response_format: SamplingParamsResponseFormat
diff --git a/src/openai/types/evals/run_cancel_response.py b/src/openai/types/evals/run_cancel_response.py
index b18598b20e..f7fb0ec4ad 100644
--- a/src/openai/types/evals/run_cancel_response.py
+++ b/src/openai/types/evals/run_cancel_response.py
@@ -103,9 +103,9 @@ class DataSourceResponsesSourceResponses(BaseModel):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -113,6 +113,7 @@ class DataSourceResponsesSourceResponses(BaseModel):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     temperature: Optional[float] = None
@@ -245,9 +246,9 @@ class DataSourceResponsesSamplingParams(BaseModel):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -255,6 +256,7 @@ class DataSourceResponsesSamplingParams(BaseModel):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     seed: Optional[int] = None
diff --git a/src/openai/types/evals/run_create_params.py b/src/openai/types/evals/run_create_params.py
index a50433f06d..a70d1923e5 100644
--- a/src/openai/types/evals/run_create_params.py
+++ b/src/openai/types/evals/run_create_params.py
@@ -116,9 +116,9 @@ class DataSourceCreateEvalResponsesRunDataSourceSourceResponses(TypedDict, total
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -126,6 +126,7 @@ class DataSourceCreateEvalResponsesRunDataSourceSourceResponses(TypedDict, total
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     temperature: Optional[float]
@@ -263,9 +264,9 @@ class DataSourceCreateEvalResponsesRunDataSourceSamplingParams(TypedDict, total=
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -273,6 +274,7 @@ class DataSourceCreateEvalResponsesRunDataSourceSamplingParams(TypedDict, total=
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     seed: int
diff --git a/src/openai/types/evals/run_create_response.py b/src/openai/types/evals/run_create_response.py
index 41dac615c7..fb2220b3a1 100644
--- a/src/openai/types/evals/run_create_response.py
+++ b/src/openai/types/evals/run_create_response.py
@@ -103,9 +103,9 @@ class DataSourceResponsesSourceResponses(BaseModel):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -113,6 +113,7 @@ class DataSourceResponsesSourceResponses(BaseModel):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     temperature: Optional[float] = None
@@ -245,9 +246,9 @@ class DataSourceResponsesSamplingParams(BaseModel):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -255,6 +256,7 @@ class DataSourceResponsesSamplingParams(BaseModel):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     seed: Optional[int] = None
diff --git a/src/openai/types/evals/run_list_response.py b/src/openai/types/evals/run_list_response.py
index 61bff95447..adac4ffdc8 100644
--- a/src/openai/types/evals/run_list_response.py
+++ b/src/openai/types/evals/run_list_response.py
@@ -103,9 +103,9 @@ class DataSourceResponsesSourceResponses(BaseModel):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -113,6 +113,7 @@ class DataSourceResponsesSourceResponses(BaseModel):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     temperature: Optional[float] = None
@@ -245,9 +246,9 @@ class DataSourceResponsesSamplingParams(BaseModel):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -255,6 +256,7 @@ class DataSourceResponsesSamplingParams(BaseModel):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     seed: Optional[int] = None
diff --git a/src/openai/types/evals/run_retrieve_response.py b/src/openai/types/evals/run_retrieve_response.py
index 651d7423a9..abdc5ebae5 100644
--- a/src/openai/types/evals/run_retrieve_response.py
+++ b/src/openai/types/evals/run_retrieve_response.py
@@ -103,9 +103,9 @@ class DataSourceResponsesSourceResponses(BaseModel):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -113,6 +113,7 @@ class DataSourceResponsesSourceResponses(BaseModel):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     temperature: Optional[float] = None
@@ -245,9 +246,9 @@ class DataSourceResponsesSamplingParams(BaseModel):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -255,6 +256,7 @@ class DataSourceResponsesSamplingParams(BaseModel):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     seed: Optional[int] = None
diff --git a/src/openai/types/graders/score_model_grader.py b/src/openai/types/graders/score_model_grader.py
index 84686a9642..b3ba6758bb 100644
--- a/src/openai/types/graders/score_model_grader.py
+++ b/src/openai/types/graders/score_model_grader.py
@@ -67,9 +67,9 @@ class SamplingParams(BaseModel):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -77,6 +77,7 @@ class SamplingParams(BaseModel):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     seed: Optional[int] = None
diff --git a/src/openai/types/graders/score_model_grader_param.py b/src/openai/types/graders/score_model_grader_param.py
index aec7a95ad4..eb1f6e03ac 100644
--- a/src/openai/types/graders/score_model_grader_param.py
+++ b/src/openai/types/graders/score_model_grader_param.py
@@ -73,9 +73,9 @@ class SamplingParams(TypedDict, total=False):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -83,6 +83,7 @@ class SamplingParams(TypedDict, total=False):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     seed: Optional[int]
diff --git a/src/openai/types/realtime/__init__.py b/src/openai/types/realtime/__init__.py
index 83e81a034a..c2a141d727 100644
--- a/src/openai/types/realtime/__init__.py
+++ b/src/openai/types/realtime/__init__.py
@@ -175,6 +175,9 @@
 from .response_function_call_arguments_done_event import (
     ResponseFunctionCallArgumentsDoneEvent as ResponseFunctionCallArgumentsDoneEvent,
 )
+from .input_audio_buffer_dtmf_event_received_event import (
+    InputAudioBufferDtmfEventReceivedEvent as InputAudioBufferDtmfEventReceivedEvent,
+)
 from .realtime_conversation_item_assistant_message import (
     RealtimeConversationItemAssistantMessage as RealtimeConversationItemAssistantMessage,
 )
diff --git a/src/openai/types/realtime/call_accept_params.py b/src/openai/types/realtime/call_accept_params.py
index d6fc92b8e5..917b71cb0d 100644
--- a/src/openai/types/realtime/call_accept_params.py
+++ b/src/openai/types/realtime/call_accept_params.py
@@ -110,13 +110,18 @@ class CallAcceptParams(TypedDict, total=False):
     limit, the conversation be truncated, meaning messages (starting from the
     oldest) will not be included in the model's context. A 32k context model with
     4,096 max output tokens can only include 28,224 tokens in the context before
-    truncation occurs. Clients can configure truncation behavior to truncate with a
-    lower max token limit, which is an effective way to control token usage and
-    cost. Truncation will reduce the number of cached tokens on the next turn
-    (busting the cache), since messages are dropped from the beginning of the
-    context. However, clients can also configure truncation to retain messages up to
-    a fraction of the maximum context size, which will reduce the need for future
-    truncations and thus improve the cache rate. Truncation can be disabled
-    entirely, which means the server will never truncate but would instead return an
-    error if the conversation exceeds the model's input token limit.
+    truncation occurs.
+
+    Clients can configure truncation behavior to truncate with a lower max token
+    limit, which is an effective way to control token usage and cost.
+
+    Truncation will reduce the number of cached tokens on the next turn (busting the
+    cache), since messages are dropped from the beginning of the context. However,
+    clients can also configure truncation to retain messages up to a fraction of the
+    maximum context size, which will reduce the need for future truncations and thus
+    improve the cache rate.
+
+    Truncation can be disabled entirely, which means the server will never truncate
+    but would instead return an error if the conversation exceeds the model's input
+    token limit.
     """
diff --git a/src/openai/types/realtime/input_audio_buffer_dtmf_event_received_event.py b/src/openai/types/realtime/input_audio_buffer_dtmf_event_received_event.py
new file mode 100644
index 0000000000..d61ed4bda7
--- /dev/null
+++ b/src/openai/types/realtime/input_audio_buffer_dtmf_event_received_event.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["InputAudioBufferDtmfEventReceivedEvent"]
+
+
+class InputAudioBufferDtmfEventReceivedEvent(BaseModel):
+    event: str
+    """The telephone keypad that was pressed by the user."""
+
+    received_at: int
+    """UTC Unix Timestamp when DTMF Event was received by server."""
+
+    type: Literal["input_audio_buffer.dtmf_event_received"]
+    """The event type, must be `input_audio_buffer.dtmf_event_received`."""
diff --git a/src/openai/types/realtime/realtime_audio_input_turn_detection.py b/src/openai/types/realtime/realtime_audio_input_turn_detection.py
index d3f4e00316..9b55353884 100644
--- a/src/openai/types/realtime/realtime_audio_input_turn_detection.py
+++ b/src/openai/types/realtime/realtime_audio_input_turn_detection.py
@@ -14,9 +14,14 @@ class ServerVad(BaseModel):
     """Type of turn detection, `server_vad` to turn on simple Server VAD."""
 
     create_response: Optional[bool] = None
-    """
-    Whether or not to automatically generate a response when a VAD stop event
+    """Whether or not to automatically generate a response when a VAD stop event
     occurs.
+
+    If `interrupt_response` is set to `false` this may fail to create a response if
+    the model is already responding.
+
+    If both `create_response` and `interrupt_response` are set to `false`, the model
+    will never respond automatically but VAD events will still be emitted.
     """
 
     idle_timeout_ms: Optional[int] = None
@@ -37,9 +42,13 @@ class ServerVad(BaseModel):
 
     interrupt_response: Optional[bool] = None
     """
-    Whether or not to automatically interrupt any ongoing response with output to
-    the default conversation (i.e. `conversation` of `auto`) when a VAD start event
-    occurs.
+    Whether or not to automatically interrupt (cancel) any ongoing response with
+    output to the default conversation (i.e. `conversation` of `auto`) when a VAD
+    start event occurs. If `true` then the response will be cancelled, otherwise it
+    will continue until complete.
+
+    If both `create_response` and `interrupt_response` are set to `false`, the model
+    will never respond automatically but VAD events will still be emitted.
     """
 
     prefix_padding_ms: Optional[int] = None
diff --git a/src/openai/types/realtime/realtime_audio_input_turn_detection_param.py b/src/openai/types/realtime/realtime_audio_input_turn_detection_param.py
index 09b8cfd159..4ce7640727 100644
--- a/src/openai/types/realtime/realtime_audio_input_turn_detection_param.py
+++ b/src/openai/types/realtime/realtime_audio_input_turn_detection_param.py
@@ -13,9 +13,14 @@ class ServerVad(TypedDict, total=False):
     """Type of turn detection, `server_vad` to turn on simple Server VAD."""
 
     create_response: bool
-    """
-    Whether or not to automatically generate a response when a VAD stop event
+    """Whether or not to automatically generate a response when a VAD stop event
     occurs.
+
+    If `interrupt_response` is set to `false` this may fail to create a response if
+    the model is already responding.
+
+    If both `create_response` and `interrupt_response` are set to `false`, the model
+    will never respond automatically but VAD events will still be emitted.
     """
 
     idle_timeout_ms: Optional[int]
@@ -36,9 +41,13 @@ class ServerVad(TypedDict, total=False):
 
     interrupt_response: bool
     """
-    Whether or not to automatically interrupt any ongoing response with output to
-    the default conversation (i.e. `conversation` of `auto`) when a VAD start event
-    occurs.
+    Whether or not to automatically interrupt (cancel) any ongoing response with
+    output to the default conversation (i.e. `conversation` of `auto`) when a VAD
+    start event occurs. If `true` then the response will be cancelled, otherwise it
+    will continue until complete.
+
+    If both `create_response` and `interrupt_response` are set to `false`, the model
+    will never respond automatically but VAD events will still be emitted.
     """
 
     prefix_padding_ms: int
diff --git a/src/openai/types/realtime/realtime_server_event.py b/src/openai/types/realtime/realtime_server_event.py
index 1605b81a97..ead98f1a54 100644
--- a/src/openai/types/realtime/realtime_server_event.py
+++ b/src/openai/types/realtime/realtime_server_event.py
@@ -42,6 +42,7 @@
 from .input_audio_buffer_speech_started_event import InputAudioBufferSpeechStartedEvent
 from .input_audio_buffer_speech_stopped_event import InputAudioBufferSpeechStoppedEvent
 from .response_function_call_arguments_done_event import ResponseFunctionCallArgumentsDoneEvent
+from .input_audio_buffer_dtmf_event_received_event import InputAudioBufferDtmfEventReceivedEvent
 from .response_function_call_arguments_delta_event import ResponseFunctionCallArgumentsDeltaEvent
 from .conversation_item_input_audio_transcription_segment import ConversationItemInputAudioTranscriptionSegment
 from .conversation_item_input_audio_transcription_delta_event import ConversationItemInputAudioTranscriptionDeltaEvent
@@ -116,6 +117,7 @@ class OutputAudioBufferCleared(BaseModel):
         RealtimeErrorEvent,
         InputAudioBufferClearedEvent,
         InputAudioBufferCommittedEvent,
+        InputAudioBufferDtmfEventReceivedEvent,
         InputAudioBufferSpeechStartedEvent,
         InputAudioBufferSpeechStoppedEvent,
         RateLimitsUpdatedEvent,
diff --git a/src/openai/types/realtime/realtime_session_create_request.py b/src/openai/types/realtime/realtime_session_create_request.py
index 016ae45b67..80cf468dc8 100644
--- a/src/openai/types/realtime/realtime_session_create_request.py
+++ b/src/openai/types/realtime/realtime_session_create_request.py
@@ -110,13 +110,18 @@ class RealtimeSessionCreateRequest(BaseModel):
     limit, the conversation be truncated, meaning messages (starting from the
     oldest) will not be included in the model's context. A 32k context model with
     4,096 max output tokens can only include 28,224 tokens in the context before
-    truncation occurs. Clients can configure truncation behavior to truncate with a
-    lower max token limit, which is an effective way to control token usage and
-    cost. Truncation will reduce the number of cached tokens on the next turn
-    (busting the cache), since messages are dropped from the beginning of the
-    context. However, clients can also configure truncation to retain messages up to
-    a fraction of the maximum context size, which will reduce the need for future
-    truncations and thus improve the cache rate. Truncation can be disabled
-    entirely, which means the server will never truncate but would instead return an
-    error if the conversation exceeds the model's input token limit.
+    truncation occurs.
+
+    Clients can configure truncation behavior to truncate with a lower max token
+    limit, which is an effective way to control token usage and cost.
+
+    Truncation will reduce the number of cached tokens on the next turn (busting the
+    cache), since messages are dropped from the beginning of the context. However,
+    clients can also configure truncation to retain messages up to a fraction of the
+    maximum context size, which will reduce the need for future truncations and thus
+    improve the cache rate.
+
+    Truncation can be disabled entirely, which means the server will never truncate
+    but would instead return an error if the conversation exceeds the model's input
+    token limit.
     """
diff --git a/src/openai/types/realtime/realtime_session_create_request_param.py b/src/openai/types/realtime/realtime_session_create_request_param.py
index 8c3998c1ca..578d5a502d 100644
--- a/src/openai/types/realtime/realtime_session_create_request_param.py
+++ b/src/openai/types/realtime/realtime_session_create_request_param.py
@@ -110,13 +110,18 @@ class RealtimeSessionCreateRequestParam(TypedDict, total=False):
     limit, the conversation be truncated, meaning messages (starting from the
     oldest) will not be included in the model's context. A 32k context model with
     4,096 max output tokens can only include 28,224 tokens in the context before
-    truncation occurs. Clients can configure truncation behavior to truncate with a
-    lower max token limit, which is an effective way to control token usage and
-    cost. Truncation will reduce the number of cached tokens on the next turn
-    (busting the cache), since messages are dropped from the beginning of the
-    context. However, clients can also configure truncation to retain messages up to
-    a fraction of the maximum context size, which will reduce the need for future
-    truncations and thus improve the cache rate. Truncation can be disabled
-    entirely, which means the server will never truncate but would instead return an
-    error if the conversation exceeds the model's input token limit.
+    truncation occurs.
+
+    Clients can configure truncation behavior to truncate with a lower max token
+    limit, which is an effective way to control token usage and cost.
+
+    Truncation will reduce the number of cached tokens on the next turn (busting the
+    cache), since messages are dropped from the beginning of the context. However,
+    clients can also configure truncation to retain messages up to a fraction of the
+    maximum context size, which will reduce the need for future truncations and thus
+    improve the cache rate.
+
+    Truncation can be disabled entirely, which means the server will never truncate
+    but would instead return an error if the conversation exceeds the model's input
+    token limit.
     """
diff --git a/src/openai/types/realtime/realtime_session_create_response.py b/src/openai/types/realtime/realtime_session_create_response.py
index c1336cd6e4..df69dd7bdb 100644
--- a/src/openai/types/realtime/realtime_session_create_response.py
+++ b/src/openai/types/realtime/realtime_session_create_response.py
@@ -53,9 +53,14 @@ class AudioInputTurnDetectionServerVad(BaseModel):
     """Type of turn detection, `server_vad` to turn on simple Server VAD."""
 
     create_response: Optional[bool] = None
-    """
-    Whether or not to automatically generate a response when a VAD stop event
+    """Whether or not to automatically generate a response when a VAD stop event
     occurs.
+
+    If `interrupt_response` is set to `false` this may fail to create a response if
+    the model is already responding.
+
+    If both `create_response` and `interrupt_response` are set to `false`, the model
+    will never respond automatically but VAD events will still be emitted.
     """
 
     idle_timeout_ms: Optional[int] = None
@@ -76,9 +81,13 @@ class AudioInputTurnDetectionServerVad(BaseModel):
 
     interrupt_response: Optional[bool] = None
     """
-    Whether or not to automatically interrupt any ongoing response with output to
-    the default conversation (i.e. `conversation` of `auto`) when a VAD start event
-    occurs.
+    Whether or not to automatically interrupt (cancel) any ongoing response with
+    output to the default conversation (i.e. `conversation` of `auto`) when a VAD
+    start event occurs. If `true` then the response will be cancelled, otherwise it
+    will continue until complete.
+
+    If both `create_response` and `interrupt_response` are set to `false`, the model
+    will never respond automatically but VAD events will still be emitted.
     """
 
     prefix_padding_ms: Optional[int] = None
@@ -463,13 +472,18 @@ class RealtimeSessionCreateResponse(BaseModel):
     limit, the conversation be truncated, meaning messages (starting from the
     oldest) will not be included in the model's context. A 32k context model with
     4,096 max output tokens can only include 28,224 tokens in the context before
-    truncation occurs. Clients can configure truncation behavior to truncate with a
-    lower max token limit, which is an effective way to control token usage and
-    cost. Truncation will reduce the number of cached tokens on the next turn
-    (busting the cache), since messages are dropped from the beginning of the
-    context. However, clients can also configure truncation to retain messages up to
-    a fraction of the maximum context size, which will reduce the need for future
-    truncations and thus improve the cache rate. Truncation can be disabled
-    entirely, which means the server will never truncate but would instead return an
-    error if the conversation exceeds the model's input token limit.
+    truncation occurs.
+
+    Clients can configure truncation behavior to truncate with a lower max token
+    limit, which is an effective way to control token usage and cost.
+
+    Truncation will reduce the number of cached tokens on the next turn (busting the
+    cache), since messages are dropped from the beginning of the context. However,
+    clients can also configure truncation to retain messages up to a fraction of the
+    maximum context size, which will reduce the need for future truncations and thus
+    improve the cache rate.
+
+    Truncation can be disabled entirely, which means the server will never truncate
+    but would instead return an error if the conversation exceeds the model's input
+    token limit.
     """
diff --git a/src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection.py b/src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection.py
index 7dc7a8f302..e21844f48f 100644
--- a/src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection.py
+++ b/src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection.py
@@ -14,9 +14,14 @@ class ServerVad(BaseModel):
     """Type of turn detection, `server_vad` to turn on simple Server VAD."""
 
     create_response: Optional[bool] = None
-    """
-    Whether or not to automatically generate a response when a VAD stop event
+    """Whether or not to automatically generate a response when a VAD stop event
     occurs.
+
+    If `interrupt_response` is set to `false` this may fail to create a response if
+    the model is already responding.
+
+    If both `create_response` and `interrupt_response` are set to `false`, the model
+    will never respond automatically but VAD events will still be emitted.
     """
 
     idle_timeout_ms: Optional[int] = None
@@ -37,9 +42,13 @@ class ServerVad(BaseModel):
 
     interrupt_response: Optional[bool] = None
     """
-    Whether or not to automatically interrupt any ongoing response with output to
-    the default conversation (i.e. `conversation` of `auto`) when a VAD start event
-    occurs.
+    Whether or not to automatically interrupt (cancel) any ongoing response with
+    output to the default conversation (i.e. `conversation` of `auto`) when a VAD
+    start event occurs. If `true` then the response will be cancelled, otherwise it
+    will continue until complete.
+
+    If both `create_response` and `interrupt_response` are set to `false`, the model
+    will never respond automatically but VAD events will still be emitted.
     """
 
     prefix_padding_ms: Optional[int] = None
diff --git a/src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection_param.py b/src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection_param.py
index d899b8c5c1..507c43141e 100644
--- a/src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection_param.py
+++ b/src/openai/types/realtime/realtime_transcription_session_audio_input_turn_detection_param.py
@@ -13,9 +13,14 @@ class ServerVad(TypedDict, total=False):
     """Type of turn detection, `server_vad` to turn on simple Server VAD."""
 
     create_response: bool
-    """
-    Whether or not to automatically generate a response when a VAD stop event
+    """Whether or not to automatically generate a response when a VAD stop event
     occurs.
+
+    If `interrupt_response` is set to `false` this may fail to create a response if
+    the model is already responding.
+
+    If both `create_response` and `interrupt_response` are set to `false`, the model
+    will never respond automatically but VAD events will still be emitted.
     """
 
     idle_timeout_ms: Optional[int]
@@ -36,9 +41,13 @@ class ServerVad(TypedDict, total=False):
 
     interrupt_response: bool
     """
-    Whether or not to automatically interrupt any ongoing response with output to
-    the default conversation (i.e. `conversation` of `auto`) when a VAD start event
-    occurs.
+    Whether or not to automatically interrupt (cancel) any ongoing response with
+    output to the default conversation (i.e. `conversation` of `auto`) when a VAD
+    start event occurs. If `true` then the response will be cancelled, otherwise it
+    will continue until complete.
+
+    If both `create_response` and `interrupt_response` are set to `false`, the model
+    will never respond automatically but VAD events will still be emitted.
     """
 
     prefix_padding_ms: int
diff --git a/src/openai/types/responses/__init__.py b/src/openai/types/responses/__init__.py
index e707141d9a..a4d939d9ff 100644
--- a/src/openai/types/responses/__init__.py
+++ b/src/openai/types/responses/__init__.py
@@ -28,6 +28,7 @@
 from .custom_tool_param import CustomToolParam as CustomToolParam
 from .tool_choice_shell import ToolChoiceShell as ToolChoiceShell
 from .tool_choice_types import ToolChoiceTypes as ToolChoiceTypes
+from .compacted_response import CompactedResponse as CompactedResponse
 from .easy_input_message import EasyInputMessage as EasyInputMessage
 from .response_item_list import ResponseItemList as ResponseItemList
 from .tool_choice_custom import ToolChoiceCustom as ToolChoiceCustom
@@ -60,6 +61,7 @@
 from .response_create_params import ResponseCreateParams as ResponseCreateParams
 from .response_created_event import ResponseCreatedEvent as ResponseCreatedEvent
 from .response_input_content import ResponseInputContent as ResponseInputContent
+from .response_compact_params import ResponseCompactParams as ResponseCompactParams
 from .response_output_message import ResponseOutputMessage as ResponseOutputMessage
 from .response_output_refusal import ResponseOutputRefusal as ResponseOutputRefusal
 from .response_reasoning_item import ResponseReasoningItem as ResponseReasoningItem
@@ -69,6 +71,7 @@
 from .web_search_preview_tool import WebSearchPreviewTool as WebSearchPreviewTool
 from .easy_input_message_param import EasyInputMessageParam as EasyInputMessageParam
 from .input_token_count_params import InputTokenCountParams as InputTokenCountParams
+from .response_compaction_item import ResponseCompactionItem as ResponseCompactionItem
 from .response_completed_event import ResponseCompletedEvent as ResponseCompletedEvent
 from .response_retrieve_params import ResponseRetrieveParams as ResponseRetrieveParams
 from .response_text_done_event import ResponseTextDoneEvent as ResponseTextDoneEvent
@@ -108,6 +111,7 @@
 from .tool_choice_apply_patch_param import ToolChoiceApplyPatchParam as ToolChoiceApplyPatchParam
 from .web_search_preview_tool_param import WebSearchPreviewToolParam as WebSearchPreviewToolParam
 from .response_apply_patch_tool_call import ResponseApplyPatchToolCall as ResponseApplyPatchToolCall
+from .response_compaction_item_param import ResponseCompactionItemParam as ResponseCompactionItemParam
 from .response_file_search_tool_call import ResponseFileSearchToolCall as ResponseFileSearchToolCall
 from .response_mcp_call_failed_event import ResponseMcpCallFailedEvent as ResponseMcpCallFailedEvent
 from .response_custom_tool_call_param import ResponseCustomToolCallParam as ResponseCustomToolCallParam
@@ -133,6 +137,7 @@
 from .response_mcp_call_in_progress_event import ResponseMcpCallInProgressEvent as ResponseMcpCallInProgressEvent
 from .response_reasoning_text_delta_event import ResponseReasoningTextDeltaEvent as ResponseReasoningTextDeltaEvent
 from .response_audio_transcript_done_event import ResponseAudioTranscriptDoneEvent as ResponseAudioTranscriptDoneEvent
+from .response_compaction_item_param_param import ResponseCompactionItemParamParam as ResponseCompactionItemParamParam
 from .response_file_search_tool_call_param import ResponseFileSearchToolCallParam as ResponseFileSearchToolCallParam
 from .response_mcp_list_tools_failed_event import ResponseMcpListToolsFailedEvent as ResponseMcpListToolsFailedEvent
 from .response_apply_patch_tool_call_output import ResponseApplyPatchToolCallOutput as ResponseApplyPatchToolCallOutput
diff --git a/src/openai/types/responses/compacted_response.py b/src/openai/types/responses/compacted_response.py
new file mode 100644
index 0000000000..5b333b83c0
--- /dev/null
+++ b/src/openai/types/responses/compacted_response.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .response_usage import ResponseUsage
+from .response_output_item import ResponseOutputItem
+
+__all__ = ["CompactedResponse"]
+
+
+class CompactedResponse(BaseModel):
+    id: str
+    """The unique identifier for the compacted response."""
+
+    created_at: int
+    """Unix timestamp (in seconds) when the compacted conversation was created."""
+
+    object: Literal["response.compaction"]
+    """The object type. Always `response.compaction`."""
+
+    output: List[ResponseOutputItem]
+    """The compacted list of output items.
+
+    This is a list of all user messages, followed by a single compaction item.
+    """
+
+    usage: ResponseUsage
+    """
+    Token accounting for the compaction pass, including cached, reasoning, and total
+    tokens.
+    """
diff --git a/src/openai/types/responses/parsed_response.py b/src/openai/types/responses/parsed_response.py
index c120f4641d..a859710590 100644
--- a/src/openai/types/responses/parsed_response.py
+++ b/src/openai/types/responses/parsed_response.py
@@ -6,7 +6,6 @@
 from ..._utils import PropertyInfo
 from .response import Response
 from ..._models import GenericModel
-from ..._utils._transform import PropertyInfo
 from .response_output_item import (
     McpCall,
     McpListTools,
@@ -19,6 +18,7 @@
 from .response_output_message import ResponseOutputMessage
 from .response_output_refusal import ResponseOutputRefusal
 from .response_reasoning_item import ResponseReasoningItem
+from .response_compaction_item import ResponseCompactionItem
 from .response_custom_tool_call import ResponseCustomToolCall
 from .response_computer_tool_call import ResponseComputerToolCall
 from .response_function_tool_call import ResponseFunctionToolCall
@@ -79,6 +79,7 @@ class ParsedResponseFunctionToolCall(ResponseFunctionToolCall):
         McpListTools,
         ResponseCodeInterpreterToolCall,
         ResponseCustomToolCall,
+        ResponseCompactionItem,
         ResponseFunctionShellToolCall,
         ResponseFunctionShellToolCallOutput,
         ResponseApplyPatchToolCall,
diff --git a/src/openai/types/responses/response_compact_params.py b/src/openai/types/responses/response_compact_params.py
new file mode 100644
index 0000000000..fe38b15a9d
--- /dev/null
+++ b/src/openai/types/responses/response_compact_params.py
@@ -0,0 +1,126 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal, TypedDict
+
+from .response_input_item_param import ResponseInputItemParam
+
+__all__ = ["ResponseCompactParams"]
+
+
+class ResponseCompactParams(TypedDict, total=False):
+    input: Union[str, Iterable[ResponseInputItemParam], None]
+    """Text, image, or file inputs to the model, used to generate a response"""
+
+    instructions: Optional[str]
+    """
+    A system (or developer) message inserted into the model's context. When used
+    along with `previous_response_id`, the instructions from a previous response
+    will not be carried over to the next response. This makes it simple to swap out
+    system (or developer) messages in new responses.
+    """
+
+    model: Union[
+        Literal[
+            "gpt-5.1",
+            "gpt-5.1-2025-11-13",
+            "gpt-5.1-codex",
+            "gpt-5.1-mini",
+            "gpt-5.1-chat-latest",
+            "gpt-5",
+            "gpt-5-mini",
+            "gpt-5-nano",
+            "gpt-5-2025-08-07",
+            "gpt-5-mini-2025-08-07",
+            "gpt-5-nano-2025-08-07",
+            "gpt-5-chat-latest",
+            "gpt-4.1",
+            "gpt-4.1-mini",
+            "gpt-4.1-nano",
+            "gpt-4.1-2025-04-14",
+            "gpt-4.1-mini-2025-04-14",
+            "gpt-4.1-nano-2025-04-14",
+            "o4-mini",
+            "o4-mini-2025-04-16",
+            "o3",
+            "o3-2025-04-16",
+            "o3-mini",
+            "o3-mini-2025-01-31",
+            "o1",
+            "o1-2024-12-17",
+            "o1-preview",
+            "o1-preview-2024-09-12",
+            "o1-mini",
+            "o1-mini-2024-09-12",
+            "gpt-4o",
+            "gpt-4o-2024-11-20",
+            "gpt-4o-2024-08-06",
+            "gpt-4o-2024-05-13",
+            "gpt-4o-audio-preview",
+            "gpt-4o-audio-preview-2024-10-01",
+            "gpt-4o-audio-preview-2024-12-17",
+            "gpt-4o-audio-preview-2025-06-03",
+            "gpt-4o-mini-audio-preview",
+            "gpt-4o-mini-audio-preview-2024-12-17",
+            "gpt-4o-search-preview",
+            "gpt-4o-mini-search-preview",
+            "gpt-4o-search-preview-2025-03-11",
+            "gpt-4o-mini-search-preview-2025-03-11",
+            "chatgpt-4o-latest",
+            "codex-mini-latest",
+            "gpt-4o-mini",
+            "gpt-4o-mini-2024-07-18",
+            "gpt-4-turbo",
+            "gpt-4-turbo-2024-04-09",
+            "gpt-4-0125-preview",
+            "gpt-4-turbo-preview",
+            "gpt-4-1106-preview",
+            "gpt-4-vision-preview",
+            "gpt-4",
+            "gpt-4-0314",
+            "gpt-4-0613",
+            "gpt-4-32k",
+            "gpt-4-32k-0314",
+            "gpt-4-32k-0613",
+            "gpt-3.5-turbo",
+            "gpt-3.5-turbo-16k",
+            "gpt-3.5-turbo-0301",
+            "gpt-3.5-turbo-0613",
+            "gpt-3.5-turbo-1106",
+            "gpt-3.5-turbo-0125",
+            "gpt-3.5-turbo-16k-0613",
+            "o1-pro",
+            "o1-pro-2025-03-19",
+            "o3-pro",
+            "o3-pro-2025-06-10",
+            "o3-deep-research",
+            "o3-deep-research-2025-06-26",
+            "o4-mini-deep-research",
+            "o4-mini-deep-research-2025-06-26",
+            "computer-use-preview",
+            "computer-use-preview-2025-03-11",
+            "gpt-5-codex",
+            "gpt-5-pro",
+            "gpt-5-pro-2025-10-06",
+            "gpt-5.1-codex-max",
+        ],
+        str,
+        None,
+    ]
+    """Model ID used to generate the response, like `gpt-5` or `o3`.
+
+    OpenAI offers a wide range of models with different capabilities, performance
+    characteristics, and price points. Refer to the
+    [model guide](https://platform.openai.com/docs/models) to browse and compare
+    available models.
+    """
+
+    previous_response_id: Optional[str]
+    """The unique ID of the previous response to the model.
+
+    Use this to create multi-turn conversations. Learn more about
+    [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+    Cannot be used in conjunction with `conversation`.
+    """
diff --git a/src/openai/types/responses/response_compaction_item.py b/src/openai/types/responses/response_compaction_item.py
new file mode 100644
index 0000000000..dc5f839bb8
--- /dev/null
+++ b/src/openai/types/responses/response_compaction_item.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCompactionItem"]
+
+
+class ResponseCompactionItem(BaseModel):
+    id: str
+    """The unique ID of the compaction item."""
+
+    encrypted_content: str
+
+    type: Literal["compaction"]
+    """The type of the item. Always `compaction`."""
+
+    created_by: Optional[str] = None
diff --git a/src/openai/types/responses/response_compaction_item_param.py b/src/openai/types/responses/response_compaction_item_param.py
new file mode 100644
index 0000000000..8fdc2a561a
--- /dev/null
+++ b/src/openai/types/responses/response_compaction_item_param.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCompactionItemParam"]
+
+
+class ResponseCompactionItemParam(BaseModel):
+    encrypted_content: str
+
+    type: Literal["compaction"]
+    """The type of the item. Always `compaction`."""
+
+    id: Optional[str] = None
+    """The ID of the compaction item."""
diff --git a/src/openai/types/responses/response_compaction_item_param_param.py b/src/openai/types/responses/response_compaction_item_param_param.py
new file mode 100644
index 0000000000..0d12296589
--- /dev/null
+++ b/src/openai/types/responses/response_compaction_item_param_param.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseCompactionItemParamParam"]
+
+
+class ResponseCompactionItemParamParam(TypedDict, total=False):
+    encrypted_content: Required[str]
+
+    type: Required[Literal["compaction"]]
+    """The type of the item. Always `compaction`."""
+
+    id: Optional[str]
+    """The ID of the compaction item."""
diff --git a/src/openai/types/responses/response_function_shell_call_output_content.py b/src/openai/types/responses/response_function_shell_call_output_content.py
index 1429ce9724..e0e2c09ad1 100644
--- a/src/openai/types/responses/response_function_shell_call_output_content.py
+++ b/src/openai/types/responses/response_function_shell_call_output_content.py
@@ -27,10 +27,10 @@ class OutcomeExit(BaseModel):
 
 class ResponseFunctionShellCallOutputContent(BaseModel):
     outcome: Outcome
-    """The exit or timeout outcome associated with this chunk."""
+    """The exit or timeout outcome associated with this shell call."""
 
     stderr: str
-    """Captured stderr output for this chunk of the shell call."""
+    """Captured stderr output for the shell call."""
 
     stdout: str
-    """Captured stdout output for this chunk of the shell call."""
+    """Captured stdout output for the shell call."""
diff --git a/src/openai/types/responses/response_function_shell_call_output_content_param.py b/src/openai/types/responses/response_function_shell_call_output_content_param.py
index 6395541cf5..fa065bd4b5 100644
--- a/src/openai/types/responses/response_function_shell_call_output_content_param.py
+++ b/src/openai/types/responses/response_function_shell_call_output_content_param.py
@@ -26,10 +26,10 @@ class OutcomeExit(TypedDict, total=False):
 
 class ResponseFunctionShellCallOutputContentParam(TypedDict, total=False):
     outcome: Required[Outcome]
-    """The exit or timeout outcome associated with this chunk."""
+    """The exit or timeout outcome associated with this shell call."""
 
     stderr: Required[str]
-    """Captured stderr output for this chunk of the shell call."""
+    """Captured stderr output for the shell call."""
 
     stdout: Required[str]
-    """Captured stdout output for this chunk of the shell call."""
+    """Captured stdout output for the shell call."""
diff --git a/src/openai/types/responses/response_function_shell_tool_call.py b/src/openai/types/responses/response_function_shell_tool_call.py
index be0a5bcff8..de42cb0640 100644
--- a/src/openai/types/responses/response_function_shell_tool_call.py
+++ b/src/openai/types/responses/response_function_shell_tool_call.py
@@ -20,7 +20,7 @@ class Action(BaseModel):
 
 class ResponseFunctionShellToolCall(BaseModel):
     id: str
-    """The unique ID of the function shell tool call.
+    """The unique ID of the shell tool call.
 
     Populated when this item is returned via API.
     """
@@ -29,7 +29,7 @@ class ResponseFunctionShellToolCall(BaseModel):
     """The shell commands and limits that describe how to run the tool call."""
 
     call_id: str
-    """The unique ID of the function shell tool call generated by the model."""
+    """The unique ID of the shell tool call generated by the model."""
 
     status: Literal["in_progress", "completed", "incomplete"]
     """The status of the shell call.
diff --git a/src/openai/types/responses/response_input_item.py b/src/openai/types/responses/response_input_item.py
index eaf5396087..103c8634ce 100644
--- a/src/openai/types/responses/response_input_item.py
+++ b/src/openai/types/responses/response_input_item.py
@@ -12,6 +12,7 @@
 from .response_computer_tool_call import ResponseComputerToolCall
 from .response_function_tool_call import ResponseFunctionToolCall
 from .response_function_web_search import ResponseFunctionWebSearch
+from .response_compaction_item_param import ResponseCompactionItemParam
 from .response_file_search_tool_call import ResponseFileSearchToolCall
 from .response_custom_tool_call_output import ResponseCustomToolCallOutput
 from .response_code_interpreter_tool_call import ResponseCodeInterpreterToolCall
@@ -215,13 +216,13 @@ class ShellCall(BaseModel):
     """The shell commands and limits that describe how to run the tool call."""
 
     call_id: str
-    """The unique ID of the function shell tool call generated by the model."""
+    """The unique ID of the shell tool call generated by the model."""
 
     type: Literal["shell_call"]
-    """The type of the item. Always `function_shell_call`."""
+    """The type of the item. Always `shell_call`."""
 
     id: Optional[str] = None
-    """The unique ID of the function shell tool call.
+    """The unique ID of the shell tool call.
 
     Populated when this item is returned via API.
     """
@@ -235,7 +236,7 @@ class ShellCall(BaseModel):
 
 class ShellCallOutput(BaseModel):
     call_id: str
-    """The unique ID of the function shell tool call generated by the model."""
+    """The unique ID of the shell tool call generated by the model."""
 
     output: List[ResponseFunctionShellCallOutputContent]
     """
@@ -244,10 +245,10 @@ class ShellCallOutput(BaseModel):
     """
 
     type: Literal["shell_call_output"]
-    """The type of the item. Always `function_shell_call_output`."""
+    """The type of the item. Always `shell_call_output`."""
 
     id: Optional[str] = None
-    """The unique ID of the function shell tool call output.
+    """The unique ID of the shell tool call output.
 
     Populated when this item is returned via API.
     """
@@ -462,6 +463,7 @@ class ItemReference(BaseModel):
         ResponseFunctionToolCall,
         FunctionCallOutput,
         ResponseReasoningItem,
+        ResponseCompactionItemParam,
         ImageGenerationCall,
         ResponseCodeInterpreterToolCall,
         LocalShellCall,
diff --git a/src/openai/types/responses/response_input_item_param.py b/src/openai/types/responses/response_input_item_param.py
index 5c2e81c4de..85d9f92b23 100644
--- a/src/openai/types/responses/response_input_item_param.py
+++ b/src/openai/types/responses/response_input_item_param.py
@@ -13,6 +13,7 @@
 from .response_computer_tool_call_param import ResponseComputerToolCallParam
 from .response_function_tool_call_param import ResponseFunctionToolCallParam
 from .response_function_web_search_param import ResponseFunctionWebSearchParam
+from .response_compaction_item_param_param import ResponseCompactionItemParamParam
 from .response_file_search_tool_call_param import ResponseFileSearchToolCallParam
 from .response_custom_tool_call_output_param import ResponseCustomToolCallOutputParam
 from .response_code_interpreter_tool_call_param import ResponseCodeInterpreterToolCallParam
@@ -216,13 +217,13 @@ class ShellCall(TypedDict, total=False):
     """The shell commands and limits that describe how to run the tool call."""
 
     call_id: Required[str]
-    """The unique ID of the function shell tool call generated by the model."""
+    """The unique ID of the shell tool call generated by the model."""
 
     type: Required[Literal["shell_call"]]
-    """The type of the item. Always `function_shell_call`."""
+    """The type of the item. Always `shell_call`."""
 
     id: Optional[str]
-    """The unique ID of the function shell tool call.
+    """The unique ID of the shell tool call.
 
     Populated when this item is returned via API.
     """
@@ -236,7 +237,7 @@ class ShellCall(TypedDict, total=False):
 
 class ShellCallOutput(TypedDict, total=False):
     call_id: Required[str]
-    """The unique ID of the function shell tool call generated by the model."""
+    """The unique ID of the shell tool call generated by the model."""
 
     output: Required[Iterable[ResponseFunctionShellCallOutputContentParam]]
     """
@@ -245,10 +246,10 @@ class ShellCallOutput(TypedDict, total=False):
     """
 
     type: Required[Literal["shell_call_output"]]
-    """The type of the item. Always `function_shell_call_output`."""
+    """The type of the item. Always `shell_call_output`."""
 
     id: Optional[str]
-    """The unique ID of the function shell tool call output.
+    """The unique ID of the shell tool call output.
 
     Populated when this item is returned via API.
     """
@@ -461,6 +462,7 @@ class ItemReference(TypedDict, total=False):
     ResponseFunctionToolCallParam,
     FunctionCallOutput,
     ResponseReasoningItemParam,
+    ResponseCompactionItemParamParam,
     ImageGenerationCall,
     ResponseCodeInterpreterToolCallParam,
     LocalShellCall,
diff --git a/src/openai/types/responses/response_input_param.py b/src/openai/types/responses/response_input_param.py
index 365c6b3d7b..bbd8e6af79 100644
--- a/src/openai/types/responses/response_input_param.py
+++ b/src/openai/types/responses/response_input_param.py
@@ -13,6 +13,7 @@
 from .response_computer_tool_call_param import ResponseComputerToolCallParam
 from .response_function_tool_call_param import ResponseFunctionToolCallParam
 from .response_function_web_search_param import ResponseFunctionWebSearchParam
+from .response_compaction_item_param_param import ResponseCompactionItemParamParam
 from .response_file_search_tool_call_param import ResponseFileSearchToolCallParam
 from .response_custom_tool_call_output_param import ResponseCustomToolCallOutputParam
 from .response_code_interpreter_tool_call_param import ResponseCodeInterpreterToolCallParam
@@ -217,13 +218,13 @@ class ShellCall(TypedDict, total=False):
     """The shell commands and limits that describe how to run the tool call."""
 
     call_id: Required[str]
-    """The unique ID of the function shell tool call generated by the model."""
+    """The unique ID of the shell tool call generated by the model."""
 
     type: Required[Literal["shell_call"]]
-    """The type of the item. Always `function_shell_call`."""
+    """The type of the item. Always `shell_call`."""
 
     id: Optional[str]
-    """The unique ID of the function shell tool call.
+    """The unique ID of the shell tool call.
 
     Populated when this item is returned via API.
     """
@@ -237,7 +238,7 @@ class ShellCall(TypedDict, total=False):
 
 class ShellCallOutput(TypedDict, total=False):
     call_id: Required[str]
-    """The unique ID of the function shell tool call generated by the model."""
+    """The unique ID of the shell tool call generated by the model."""
 
     output: Required[Iterable[ResponseFunctionShellCallOutputContentParam]]
     """
@@ -246,10 +247,10 @@ class ShellCallOutput(TypedDict, total=False):
     """
 
     type: Required[Literal["shell_call_output"]]
-    """The type of the item. Always `function_shell_call_output`."""
+    """The type of the item. Always `shell_call_output`."""
 
     id: Optional[str]
-    """The unique ID of the function shell tool call output.
+    """The unique ID of the shell tool call output.
 
     Populated when this item is returned via API.
     """
@@ -462,6 +463,7 @@ class ItemReference(TypedDict, total=False):
     ResponseFunctionToolCallParam,
     FunctionCallOutput,
     ResponseReasoningItemParam,
+    ResponseCompactionItemParamParam,
     ImageGenerationCall,
     ResponseCodeInterpreterToolCallParam,
     LocalShellCall,
diff --git a/src/openai/types/responses/response_output_item.py b/src/openai/types/responses/response_output_item.py
index 906ddbb25e..f0a66e1836 100644
--- a/src/openai/types/responses/response_output_item.py
+++ b/src/openai/types/responses/response_output_item.py
@@ -7,6 +7,7 @@
 from ..._models import BaseModel
 from .response_output_message import ResponseOutputMessage
 from .response_reasoning_item import ResponseReasoningItem
+from .response_compaction_item import ResponseCompactionItem
 from .response_custom_tool_call import ResponseCustomToolCall
 from .response_computer_tool_call import ResponseComputerToolCall
 from .response_function_tool_call import ResponseFunctionToolCall
@@ -173,6 +174,7 @@ class McpApprovalRequest(BaseModel):
         ResponseFunctionWebSearch,
         ResponseComputerToolCall,
         ResponseReasoningItem,
+        ResponseCompactionItem,
         ImageGenerationCall,
         ResponseCodeInterpreterToolCall,
         LocalShellCall,
diff --git a/src/openai/types/responses/tool.py b/src/openai/types/responses/tool.py
index ae8b34b1f4..bb32d4e1ec 100644
--- a/src/openai/types/responses/tool.py
+++ b/src/openai/types/responses/tool.py
@@ -174,7 +174,7 @@ class CodeInterpreter(BaseModel):
     """The code interpreter container.
 
     Can be a container ID or an object that specifies uploaded file IDs to make
-    available to your code.
+    available to your code, along with an optional `memory_limit` setting.
     """
 
     type: Literal["code_interpreter"]
diff --git a/src/openai/types/responses/tool_param.py b/src/openai/types/responses/tool_param.py
index 18b044ab8c..779acf0a53 100644
--- a/src/openai/types/responses/tool_param.py
+++ b/src/openai/types/responses/tool_param.py
@@ -174,7 +174,7 @@ class CodeInterpreter(TypedDict, total=False):
     """The code interpreter container.
 
     Can be a container ID or an object that specifies uploaded file IDs to make
-    available to your code.
+    available to your code, along with an optional `memory_limit` setting.
     """
 
     type: Required[Literal["code_interpreter"]]
diff --git a/src/openai/types/shared/all_models.py b/src/openai/types/shared/all_models.py
index 3e0b09e2d1..ba8e1d82cf 100644
--- a/src/openai/types/shared/all_models.py
+++ b/src/openai/types/shared/all_models.py
@@ -24,5 +24,6 @@
         "gpt-5-codex",
         "gpt-5-pro",
         "gpt-5-pro-2025-10-06",
+        "gpt-5.1-codex-max",
     ],
 ]
diff --git a/src/openai/types/shared/reasoning.py b/src/openai/types/shared/reasoning.py
index cf470ca057..b19476bcb5 100644
--- a/src/openai/types/shared/reasoning.py
+++ b/src/openai/types/shared/reasoning.py
@@ -14,9 +14,9 @@ class Reasoning(BaseModel):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -24,6 +24,7 @@ class Reasoning(BaseModel):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     generate_summary: Optional[Literal["auto", "concise", "detailed"]] = None
diff --git a/src/openai/types/shared/reasoning_effort.py b/src/openai/types/shared/reasoning_effort.py
index c890a133cc..24d8516424 100644
--- a/src/openai/types/shared/reasoning_effort.py
+++ b/src/openai/types/shared/reasoning_effort.py
@@ -5,4 +5,4 @@
 
 __all__ = ["ReasoningEffort"]
 
-ReasoningEffort: TypeAlias = Optional[Literal["none", "minimal", "low", "medium", "high"]]
+ReasoningEffort: TypeAlias = Optional[Literal["none", "minimal", "low", "medium", "high", "xhigh"]]
diff --git a/src/openai/types/shared/responses_model.py b/src/openai/types/shared/responses_model.py
index 432cb82afd..38cdea9a94 100644
--- a/src/openai/types/shared/responses_model.py
+++ b/src/openai/types/shared/responses_model.py
@@ -24,5 +24,6 @@
         "gpt-5-codex",
         "gpt-5-pro",
         "gpt-5-pro-2025-10-06",
+        "gpt-5.1-codex-max",
     ],
 ]
diff --git a/src/openai/types/shared_params/reasoning.py b/src/openai/types/shared_params/reasoning.py
index ad58f70b71..71cb37c65e 100644
--- a/src/openai/types/shared_params/reasoning.py
+++ b/src/openai/types/shared_params/reasoning.py
@@ -15,9 +15,9 @@ class Reasoning(TypedDict, total=False):
     """
     Constrains effort on reasoning for
     [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
-    supported values are `none`, `minimal`, `low`, `medium`, and `high`. Reducing
-    reasoning effort can result in faster responses and fewer tokens used on
-    reasoning in a response.
+    supported values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`.
+    Reducing reasoning effort can result in faster responses and fewer tokens used
+    on reasoning in a response.
 
     - `gpt-5.1` defaults to `none`, which does not perform reasoning. The supported
       reasoning values for `gpt-5.1` are `none`, `low`, `medium`, and `high`. Tool
@@ -25,6 +25,7 @@ class Reasoning(TypedDict, total=False):
     - All models before `gpt-5.1` default to `medium` reasoning effort, and do not
       support `none`.
     - The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    - `xhigh` is currently only supported for `gpt-5.1-codex-max`.
     """
 
     generate_summary: Optional[Literal["auto", "concise", "detailed"]]
diff --git a/src/openai/types/shared_params/reasoning_effort.py b/src/openai/types/shared_params/reasoning_effort.py
index e388eebff1..8518c2b141 100644
--- a/src/openai/types/shared_params/reasoning_effort.py
+++ b/src/openai/types/shared_params/reasoning_effort.py
@@ -7,4 +7,4 @@
 
 __all__ = ["ReasoningEffort"]
 
-ReasoningEffort: TypeAlias = Optional[Literal["none", "minimal", "low", "medium", "high"]]
+ReasoningEffort: TypeAlias = Optional[Literal["none", "minimal", "low", "medium", "high", "xhigh"]]
diff --git a/src/openai/types/shared_params/responses_model.py b/src/openai/types/shared_params/responses_model.py
index fe34eb0f62..ad44dd6bf7 100644
--- a/src/openai/types/shared_params/responses_model.py
+++ b/src/openai/types/shared_params/responses_model.py
@@ -26,5 +26,6 @@
         "gpt-5-codex",
         "gpt-5-pro",
         "gpt-5-pro-2025-10-06",
+        "gpt-5.1-codex-max",
     ],
 ]
diff --git a/src/openai/types/video_create_params.py b/src/openai/types/video_create_params.py
index 527d62d193..c4d3e0851f 100644
--- a/src/openai/types/video_create_params.py
+++ b/src/openai/types/video_create_params.py
@@ -20,10 +20,16 @@ class VideoCreateParams(TypedDict, total=False):
     """Optional image reference that guides generation."""
 
     model: VideoModel
-    """The video generation model to use. Defaults to `sora-2`."""
+    """The video generation model to use (allowed values: sora-2, sora-2-pro).
+
+    Defaults to `sora-2`.
+    """
 
     seconds: VideoSeconds
-    """Clip duration in seconds. Defaults to 4 seconds."""
+    """Clip duration in seconds (allowed values: 4, 8, 12). Defaults to 4 seconds."""
 
     size: VideoSize
-    """Output resolution formatted as width x height. Defaults to 720x1280."""
+    """
+    Output resolution formatted as width x height (allowed values: 720x1280,
+    1280x720, 1024x1792, 1792x1024). Defaults to 720x1280.
+    """
diff --git a/tests/api_resources/test_containers.py b/tests/api_resources/test_containers.py
index c972f6539d..cf173c7fd5 100644
--- a/tests/api_resources/test_containers.py
+++ b/tests/api_resources/test_containers.py
@@ -38,6 +38,7 @@ def test_method_create_with_all_params(self, client: OpenAI) -> None:
                 "minutes": 0,
             },
             file_ids=["string"],
+            memory_limit="1g",
         )
         assert_matches_type(ContainerCreateResponse, container, path=["response"])
 
@@ -197,6 +198,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) ->
                 "minutes": 0,
             },
             file_ids=["string"],
+            memory_limit="1g",
         )
         assert_matches_type(ContainerCreateResponse, container, path=["response"])
 
diff --git a/tests/api_resources/test_responses.py b/tests/api_resources/test_responses.py
index b57e6099c4..14e2d911ef 100644
--- a/tests/api_resources/test_responses.py
+++ b/tests/api_resources/test_responses.py
@@ -12,6 +12,7 @@
 from openai._utils import assert_signatures_in_sync
 from openai.types.responses import (
     Response,
+    CompactedResponse,
 )
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
@@ -36,7 +37,7 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
             max_output_tokens=0,
             max_tool_calls=0,
             metadata={"foo": "string"},
-            model="gpt-4o",
+            model="gpt-5.1",
             parallel_tool_calls=True,
             previous_response_id="previous_response_id",
             prompt={
@@ -117,7 +118,7 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
             max_output_tokens=0,
             max_tool_calls=0,
             metadata={"foo": "string"},
-            model="gpt-4o",
+            model="gpt-5.1",
             parallel_tool_calls=True,
             previous_response_id="previous_response_id",
             prompt={
@@ -358,6 +359,41 @@ def test_path_params_cancel(self, client: OpenAI) -> None:
                 "",
             )
 
+    @parametrize
+    def test_method_compact(self, client: OpenAI) -> None:
+        response = client.responses.compact()
+        assert_matches_type(CompactedResponse, response, path=["response"])
+
+    @parametrize
+    def test_method_compact_with_all_params(self, client: OpenAI) -> None:
+        response = client.responses.compact(
+            input="string",
+            instructions="instructions",
+            model="gpt-5.1",
+            previous_response_id="resp_123",
+        )
+        assert_matches_type(CompactedResponse, response, path=["response"])
+
+    @parametrize
+    def test_raw_response_compact(self, client: OpenAI) -> None:
+        http_response = client.responses.with_raw_response.compact()
+
+        assert http_response.is_closed is True
+        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+        response = http_response.parse()
+        assert_matches_type(CompactedResponse, response, path=["response"])
+
+    @parametrize
+    def test_streaming_response_compact(self, client: OpenAI) -> None:
+        with client.responses.with_streaming_response.compact() as http_response:
+            assert not http_response.is_closed
+            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            response = http_response.parse()
+            assert_matches_type(CompactedResponse, response, path=["response"])
+
+        assert cast(Any, http_response.is_closed) is True
+
 
 @pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
 def test_parse_method_in_sync(sync: bool, client: OpenAI, async_client: AsyncOpenAI) -> None:
@@ -391,7 +427,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
             max_output_tokens=0,
             max_tool_calls=0,
             metadata={"foo": "string"},
-            model="gpt-4o",
+            model="gpt-5.1",
             parallel_tool_calls=True,
             previous_response_id="previous_response_id",
             prompt={
@@ -472,7 +508,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
             max_output_tokens=0,
             max_tool_calls=0,
             metadata={"foo": "string"},
-            model="gpt-4o",
+            model="gpt-5.1",
             parallel_tool_calls=True,
             previous_response_id="previous_response_id",
             prompt={
@@ -712,3 +748,38 @@ async def test_path_params_cancel(self, async_client: AsyncOpenAI) -> None:
             await async_client.responses.with_raw_response.cancel(
                 "",
             )
+
+    @parametrize
+    async def test_method_compact(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.responses.compact()
+        assert_matches_type(CompactedResponse, response, path=["response"])
+
+    @parametrize
+    async def test_method_compact_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.responses.compact(
+            input="string",
+            instructions="instructions",
+            model="gpt-5.1",
+            previous_response_id="resp_123",
+        )
+        assert_matches_type(CompactedResponse, response, path=["response"])
+
+    @parametrize
+    async def test_raw_response_compact(self, async_client: AsyncOpenAI) -> None:
+        http_response = await async_client.responses.with_raw_response.compact()
+
+        assert http_response.is_closed is True
+        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+        response = http_response.parse()
+        assert_matches_type(CompactedResponse, response, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_compact(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.responses.with_streaming_response.compact() as http_response:
+            assert not http_response.is_closed
+            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            response = await http_response.parse()
+            assert_matches_type(CompactedResponse, response, path=["response"])
+
+        assert cast(Any, http_response.is_closed) is True