From 1721d582af1549039638750ea7b2f9a5e618acfa Mon Sep 17 00:00:00 2001 From: Yashwant Bezawada Date: Fri, 7 Nov 2025 10:08:18 -0600 Subject: [PATCH 1/2] Fix Celery SoftTimeLimitExceeded exception handling When using the OpenAI client in Celery tasks with soft time limits, the client's broad exception handling was catching SoftTimeLimitExceeded and treating it as a retryable connection error. This prevented Celery tasks from properly handling timeouts and running cleanup logic. This change adds a check to identify termination signals (like Celery's SoftTimeLimitExceeded or asyncio's CancelledError) and re-raises them immediately without retry. This allows task executors to properly handle these signals. Changes: - Added _should_not_retry() helper to identify termination signals - Modified sync and async exception handlers to check before retrying - Added test to verify termination signals are not retried Fixes #2737 --- src/openai/_base_client.py | 38 ++++++++++++++++++++++++++++++++++++++ tests/test_client.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/src/openai/_base_client.py b/src/openai/_base_client.py index 58490e4430..a03005c998 100644 --- a/src/openai/_base_client.py +++ b/src/openai/_base_client.py @@ -88,6 +88,36 @@ log: logging.Logger = logging.getLogger(__name__) log.addFilter(SensitiveHeadersFilter()) + +def _should_not_retry(exc: Exception) -> bool: + """ + Check if an exception should propagate immediately without retry. + + This includes task cancellation signals from async frameworks + and task executors like Celery that should not be caught and retried. + + Args: + exc: The exception to check + + Returns: + True if the exception should propagate without retry, False otherwise + """ + exc_class = exc.__class__ + exc_module = exc_class.__module__ + exc_name = exc_class.__name__ + + # Celery task termination (don't import celery - check by name) + # Examples: SoftTimeLimitExceeded, TimeLimitExceeded, Terminated + if exc_module.startswith("celery") and ("Limit" in exc_name or "Terminated" in exc_name): + return True + + # asyncio cancellation + if exc_module == "asyncio" and exc_name == "CancelledError": + return True + + return False + + # TODO: make base page type vars covariant SyncPageT = TypeVar("SyncPageT", bound="BaseSyncPage[Any]") AsyncPageT = TypeVar("AsyncPageT", bound="BaseAsyncPage[Any]") @@ -1001,6 +1031,10 @@ def request( except Exception as err: log.debug("Encountered Exception", exc_info=True) + # Check if this is a termination signal that should not be retried + if _should_not_retry(err): + raise + if remaining_retries > 0: self._sleep_for_retry( retries_taken=retries_taken, @@ -1548,6 +1582,10 @@ async def request( except Exception as err: log.debug("Encountered Exception", exc_info=True) + # Check if this is a termination signal that should not be retried + if _should_not_retry(err): + raise + if remaining_retries > 0: await self._sleep_for_retry( retries_taken=retries_taken, diff --git a/tests/test_client.py b/tests/test_client.py index e8d62f17f7..a532f17351 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -885,6 +885,37 @@ def retry_handler(_request: httpx.Request) -> httpx.Response: assert response.http_request.headers.get("x-stainless-retry-count") == "42" + @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout) + @pytest.mark.respx(base_url=base_url) + def test_termination_signal_not_retried(self, respx_mock: MockRouter, client: OpenAI) -> None: + """Test that termination signals (like Celery's SoftTimeLimitExceeded) are not retried.""" + client = client.with_options(max_retries=3) + + # Create a mock exception that mimics Celery's SoftTimeLimitExceeded + class MockCelerySoftTimeLimitExceeded(Exception): + """Mock of celery.exceptions.SoftTimeLimitExceeded""" + + __module__ = "celery.exceptions" + __name__ = "SoftTimeLimitExceeded" + + # Mock the request to raise our termination signal + respx_mock.post("/chat/completions").mock(side_effect=MockCelerySoftTimeLimitExceeded("Time limit exceeded")) + + # Verify the exception propagates without retry + with pytest.raises(MockCelerySoftTimeLimitExceeded): + client.chat.completions.create( + messages=[ + { + "content": "string", + "role": "developer", + } + ], + model="gpt-4o", + ) + + # Verify only one attempt was made (no retries) + assert len(respx_mock.calls) == 1 + @pytest.mark.parametrize("failures_before_success", [0, 2, 4]) @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout) @pytest.mark.respx(base_url=base_url) From 20e8fd0be5922e12251e93d90091bf0830c16ca5 Mon Sep 17 00:00:00 2001 From: Yashwant Bezawada Date: Fri, 7 Nov 2025 10:27:51 -0600 Subject: [PATCH 2/2] Fix asyncio.CancelledError detection and add test coverage - Fix module check to use startswith("asyncio") instead of == "asyncio" to properly match asyncio.exceptions.CancelledError - Add test case for asyncio.CancelledError to verify it bypasses retry loop Addresses review feedback from chatgpt-codex-connector bot --- repos/anthropic-sdk-python | 1 + src/openai/_base_client.py | 2 +- tests/test_client.py | 31 +++++++++++++++++++++++++++++++ 3 files changed, 33 insertions(+), 1 deletion(-) create mode 160000 repos/anthropic-sdk-python diff --git a/repos/anthropic-sdk-python b/repos/anthropic-sdk-python new file mode 160000 index 0000000000..506a6cd37a --- /dev/null +++ b/repos/anthropic-sdk-python @@ -0,0 +1 @@ +Subproject commit 506a6cd37afb791f5d61760375eab841916921bd diff --git a/src/openai/_base_client.py b/src/openai/_base_client.py index a03005c998..9b9a22cde5 100644 --- a/src/openai/_base_client.py +++ b/src/openai/_base_client.py @@ -112,7 +112,7 @@ def _should_not_retry(exc: Exception) -> bool: return True # asyncio cancellation - if exc_module == "asyncio" and exc_name == "CancelledError": + if exc_module.startswith("asyncio") and exc_name == "CancelledError": return True return False diff --git a/tests/test_client.py b/tests/test_client.py index a532f17351..3df853bf8a 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -916,6 +916,37 @@ class MockCelerySoftTimeLimitExceeded(Exception): # Verify only one attempt was made (no retries) assert len(respx_mock.calls) == 1 + @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout) + @pytest.mark.respx(base_url=base_url) + def test_asyncio_cancelled_error_not_retried(self, respx_mock: MockRouter, client: OpenAI) -> None: + """Test that asyncio.CancelledError is not retried.""" + client = client.with_options(max_retries=3) + + # Create a mock exception that mimics asyncio.exceptions.CancelledError + class MockCancelledError(Exception): + """Mock of asyncio.exceptions.CancelledError""" + + __module__ = "asyncio.exceptions" + __name__ = "CancelledError" + + # Mock the request to raise our cancellation signal + respx_mock.post("/chat/completions").mock(side_effect=MockCancelledError("Task cancelled")) + + # Verify the exception propagates without retry + with pytest.raises(MockCancelledError): + client.chat.completions.create( + messages=[ + { + "content": "string", + "role": "developer", + } + ], + model="gpt-4o", + ) + + # Verify only one attempt was made (no retries) + assert len(respx_mock.calls) == 1 + @pytest.mark.parametrize("failures_before_success", [0, 2, 4]) @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout) @pytest.mark.respx(base_url=base_url)