From f581f220cda8b270644a413568cdc79e553a748c Mon Sep 17 00:00:00 2001 From: Paul Fischer Date: Fri, 1 Aug 2025 15:22:15 +0200 Subject: [PATCH 1/6] feat(http): add error handling for exporting --- .../exporter/otlp/proto/http/_log_exporter/__init__.py | 6 +++++- .../exporter/otlp/proto/http/metric_exporter/__init__.py | 8 +++++++- .../exporter/otlp/proto/http/trace_exporter/__init__.py | 6 +++++- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py index 2afdf66002..7122775762 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py +++ b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py @@ -184,7 +184,11 @@ def export(self, batch: Sequence[LogData]) -> LogExportResult: serialized_data = encode_logs(batch).SerializeToString() deadline_sec = time() + self._timeout for retry_num in range(_MAX_RETRYS): - resp = self._export(serialized_data, deadline_sec - time()) + try: + resp = self._export(serialized_data, deadline_sec - time()) + except Exception as error: + _logger.error("Failed to export logs batch reason: %s", error) + return LogExportResult.FAILURE if resp.ok: return LogExportResult.SUCCESS # multiplying by a random number between .8 and 1.2 introduces a +/20% jitter to each backoff. diff --git a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py index c6d657e7ae..81801d1a51 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py +++ b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py @@ -231,7 +231,13 @@ def export( serialized_data = encode_metrics(metrics_data).SerializeToString() deadline_sec = time() + self._timeout for retry_num in range(_MAX_RETRYS): - resp = self._export(serialized_data, deadline_sec - time()) + try: + resp = self._export(serialized_data, deadline_sec - time()) + except Exception as error: + _logger.error( + "Failed to export metrics batch reason: %s", error + ) + return MetricExportResult.FAILURE if resp.ok: return MetricExportResult.SUCCESS # multiplying by a random number between .8 and 1.2 introduces a +/20% jitter to each backoff. diff --git a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py index 055e829dab..f47eccaa8e 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py +++ b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py @@ -179,7 +179,11 @@ def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult: serialized_data = encode_spans(spans).SerializePartialToString() deadline_sec = time() + self._timeout for retry_num in range(_MAX_RETRYS): - resp = self._export(serialized_data, deadline_sec - time()) + try: + resp = self._export(serialized_data, deadline_sec - time()) + except Exception as error: + _logger.error("Failed to export span batch reason: %s", error) + return SpanExportResult.FAILURE if resp.ok: return SpanExportResult.SUCCESS # multiplying by a random number between .8 and 1.2 introduces a +/20% jitter to each backoff. From 2b50d49d7dc232343166d2ae24fffb046ff67c6e Mon Sep 17 00:00:00 2001 From: Paul Fischer Date: Mon, 27 Oct 2025 13:39:31 +0100 Subject: [PATCH 2/6] feat(http_exporter): allow to run retry loop on connection errors --- .../otlp/proto/http/_log_exporter/__init__.py | 28 +++++++++++-------- .../proto/http/metric_exporter/__init__.py | 28 +++++++++++-------- .../proto/http/trace_exporter/__init__.py | 28 +++++++++++-------- 3 files changed, 48 insertions(+), 36 deletions(-) diff --git a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py index 7122775762..23fe1020d6 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py +++ b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py @@ -184,30 +184,34 @@ def export(self, batch: Sequence[LogData]) -> LogExportResult: serialized_data = encode_logs(batch).SerializeToString() deadline_sec = time() + self._timeout for retry_num in range(_MAX_RETRYS): + # multiplying by a random number between .8 and 1.2 introduces a +/20% jitter to each backoff. + backoff_seconds = 2**retry_num * random.uniform(0.8, 1.2) try: resp = self._export(serialized_data, deadline_sec - time()) + if resp.ok: + return LogExportResult.SUCCESS + if not _is_retryable(resp): + _logger.error( + "Failed to export logs batch code: %s, reason: %s", + resp.status_code, + resp.text, + ) + return LogExportResult.FAILURE except Exception as error: _logger.error("Failed to export logs batch reason: %s", error) - return LogExportResult.FAILURE - if resp.ok: - return LogExportResult.SUCCESS - # multiplying by a random number between .8 and 1.2 introduces a +/20% jitter to each backoff. - backoff_seconds = 2**retry_num * random.uniform(0.8, 1.2) + if ( - not _is_retryable(resp) - or retry_num + 1 == _MAX_RETRYS + retry_num + 1 == _MAX_RETRYS or backoff_seconds > (deadline_sec - time()) or self._shutdown ): _logger.error( - "Failed to export logs batch code: %s, reason: %s", - resp.status_code, - resp.text, + "Failed to export logs batch due to timeout," + "max retries or shutdown." ) return LogExportResult.FAILURE _logger.warning( - "Transient error %s encountered while exporting logs batch, retrying in %.2fs.", - resp.reason, + "Transient error encountered while exporting logs batch, retrying in %.2fs.", backoff_seconds, ) shutdown = self._shutdown_is_occuring.wait(backoff_seconds) diff --git a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py index 81801d1a51..0d71a6ed16 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py +++ b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py @@ -231,32 +231,36 @@ def export( serialized_data = encode_metrics(metrics_data).SerializeToString() deadline_sec = time() + self._timeout for retry_num in range(_MAX_RETRYS): + # multiplying by a random number between .8 and 1.2 introduces a +/20% jitter to each backoff. + backoff_seconds = 2**retry_num * random.uniform(0.8, 1.2) try: resp = self._export(serialized_data, deadline_sec - time()) + if resp.ok: + return MetricExportResult.SUCCESS + if not _is_retryable(resp): + _logger.error( + "Failed to export metrics batch code: %s, reason: %s", + resp.status_code, + resp.text, + ) + return MetricExportResult.FAILURE except Exception as error: _logger.error( "Failed to export metrics batch reason: %s", error ) - return MetricExportResult.FAILURE - if resp.ok: - return MetricExportResult.SUCCESS - # multiplying by a random number between .8 and 1.2 introduces a +/20% jitter to each backoff. - backoff_seconds = 2**retry_num * random.uniform(0.8, 1.2) + if ( - not _is_retryable(resp) - or retry_num + 1 == _MAX_RETRYS + retry_num + 1 == _MAX_RETRYS or backoff_seconds > (deadline_sec - time()) or self._shutdown ): _logger.error( - "Failed to export metrics batch code: %s, reason: %s", - resp.status_code, - resp.text, + "Failed to export metrics batch due to timeout," + "max retries or shutdown." ) return MetricExportResult.FAILURE _logger.warning( - "Transient error %s encountered while exporting metrics batch, retrying in %.2fs.", - resp.reason, + "Transient error encountered while exporting metrics batch, retrying in %.2fs.", backoff_seconds, ) shutdown = self._shutdown_in_progress.wait(backoff_seconds) diff --git a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py index f47eccaa8e..91583da5a6 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py +++ b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py @@ -179,30 +179,34 @@ def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult: serialized_data = encode_spans(spans).SerializePartialToString() deadline_sec = time() + self._timeout for retry_num in range(_MAX_RETRYS): + # multiplying by a random number between .8 and 1.2 introduces a +/20% jitter to each backoff. + backoff_seconds = 2**retry_num * random.uniform(0.8, 1.2) try: resp = self._export(serialized_data, deadline_sec - time()) + if resp.ok: + return SpanExportResult.SUCCESS + if not _is_retryable(resp): + _logger.error( + "Failed to export span batch code: %s, reason: %s", + resp.status_code, + resp.text, + ) + return SpanExportResult.FAILURE except Exception as error: _logger.error("Failed to export span batch reason: %s", error) - return SpanExportResult.FAILURE - if resp.ok: - return SpanExportResult.SUCCESS - # multiplying by a random number between .8 and 1.2 introduces a +/20% jitter to each backoff. - backoff_seconds = 2**retry_num * random.uniform(0.8, 1.2) + if ( - not _is_retryable(resp) - or retry_num + 1 == _MAX_RETRYS + retry_num + 1 == _MAX_RETRYS or backoff_seconds > (deadline_sec - time()) or self._shutdown ): _logger.error( - "Failed to export span batch code: %s, reason: %s", - resp.status_code, - resp.text, + "Failed to export span batch due to timeout," + "max retries or shutdown." ) return SpanExportResult.FAILURE _logger.warning( - "Transient error %s encountered while exporting span batch, retrying in %.2fs.", - resp.reason, + "Transient error encountered while exporting span batch, retrying in %.2fs.", backoff_seconds, ) shutdown = self._shutdown_in_progress.wait(backoff_seconds) From 5126b7194771de43cfceb381ae76a526a8ab9d52 Mon Sep 17 00:00:00 2001 From: Paul Fischer Date: Thu, 6 Nov 2025 11:19:22 +0100 Subject: [PATCH 3/6] feat(http): change error types that are caught --- .../exporter/otlp/proto/http/_log_exporter/__init__.py | 2 +- .../exporter/otlp/proto/http/metric_exporter/__init__.py | 2 +- .../exporter/otlp/proto/http/trace_exporter/__init__.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py index 23fe1020d6..4d787ac3b9 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py +++ b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py @@ -197,7 +197,7 @@ def export(self, batch: Sequence[LogData]) -> LogExportResult: resp.text, ) return LogExportResult.FAILURE - except Exception as error: + except requests.exceptions.RequestException as error: _logger.error("Failed to export logs batch reason: %s", error) if ( diff --git a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py index 0d71a6ed16..8b869b2e5a 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py +++ b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py @@ -244,7 +244,7 @@ def export( resp.text, ) return MetricExportResult.FAILURE - except Exception as error: + except requests.exceptions.RequestException as error: _logger.error( "Failed to export metrics batch reason: %s", error ) diff --git a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py index 91583da5a6..a02b945cdc 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py +++ b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py @@ -192,7 +192,7 @@ def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult: resp.text, ) return SpanExportResult.FAILURE - except Exception as error: + except requests.exceptions.RequestException as error: _logger.error("Failed to export span batch reason: %s", error) if ( From 4fb3977af8a5762878b09bdab3bdf7e322cc5e8c Mon Sep 17 00:00:00 2001 From: Paul Fischer Date: Thu, 6 Nov 2025 11:33:16 +0100 Subject: [PATCH 4/6] refactor(http): introduce variables to unify logging --- .../otlp/proto/http/_log_exporter/__init__.py | 26 ++++++++++++------- .../proto/http/metric_exporter/__init__.py | 25 +++++++++++------- .../proto/http/trace_exporter/__init__.py | 26 ++++++++++++------- 3 files changed, 49 insertions(+), 28 deletions(-) diff --git a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py index 4d787ac3b9..5236f7da58 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py +++ b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py @@ -190,15 +190,22 @@ def export(self, batch: Sequence[LogData]) -> LogExportResult: resp = self._export(serialized_data, deadline_sec - time()) if resp.ok: return LogExportResult.SUCCESS - if not _is_retryable(resp): - _logger.error( - "Failed to export logs batch code: %s, reason: %s", - resp.status_code, - resp.text, - ) - return LogExportResult.FAILURE except requests.exceptions.RequestException as error: - _logger.error("Failed to export logs batch reason: %s", error) + reason = str(error) + retryable = True + status_code = None + else: + reason = resp.reason + retryable = _is_retryable(resp) + status_code = resp.status_code + + if not retryable: + _logger.error( + "Failed to export logs batch code: %s, reason: %s", + status_code, + reason, + ) + return LogExportResult.FAILURE if ( retry_num + 1 == _MAX_RETRYS @@ -211,7 +218,8 @@ def export(self, batch: Sequence[LogData]) -> LogExportResult: ) return LogExportResult.FAILURE _logger.warning( - "Transient error encountered while exporting logs batch, retrying in %.2fs.", + "Transient error %s encountered while exporting logs batch, retrying in %.2fs.", + reason, backoff_seconds, ) shutdown = self._shutdown_is_occuring.wait(backoff_seconds) diff --git a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py index 8b869b2e5a..1618ad7c68 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py +++ b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py @@ -237,18 +237,22 @@ def export( resp = self._export(serialized_data, deadline_sec - time()) if resp.ok: return MetricExportResult.SUCCESS - if not _is_retryable(resp): - _logger.error( - "Failed to export metrics batch code: %s, reason: %s", - resp.status_code, - resp.text, - ) - return MetricExportResult.FAILURE except requests.exceptions.RequestException as error: + reason = str(error) + retryable = True + status_code = None + else: + reason = resp.reason + retryable = _is_retryable(resp) + status_code = resp.status_code + + if not retryable: _logger.error( - "Failed to export metrics batch reason: %s", error + "Failed to export metrics batch code: %s, reason: %s", + status_code, + reason, ) - + return MetricExportResult.FAILURE if ( retry_num + 1 == _MAX_RETRYS or backoff_seconds > (deadline_sec - time()) @@ -260,7 +264,8 @@ def export( ) return MetricExportResult.FAILURE _logger.warning( - "Transient error encountered while exporting metrics batch, retrying in %.2fs.", + "Transient error %s encountered while exporting metrics batch, retrying in %.2fs.", + reason, backoff_seconds, ) shutdown = self._shutdown_in_progress.wait(backoff_seconds) diff --git a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py index a02b945cdc..8a974c8462 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py +++ b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py @@ -185,15 +185,22 @@ def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult: resp = self._export(serialized_data, deadline_sec - time()) if resp.ok: return SpanExportResult.SUCCESS - if not _is_retryable(resp): - _logger.error( - "Failed to export span batch code: %s, reason: %s", - resp.status_code, - resp.text, - ) - return SpanExportResult.FAILURE except requests.exceptions.RequestException as error: - _logger.error("Failed to export span batch reason: %s", error) + reason = str(error) + retryable = True + status_code = None + else: + reason = resp.reason + retryable = _is_retryable(resp) + status_code = resp.status_code + + if not retryable: + _logger.error( + "Failed to export span batch code: %s, reason: %s", + status_code, + reason, + ) + return SpanExportResult.FAILURE if ( retry_num + 1 == _MAX_RETRYS @@ -206,7 +213,8 @@ def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult: ) return SpanExportResult.FAILURE _logger.warning( - "Transient error encountered while exporting span batch, retrying in %.2fs.", + "Transient error %s encountered while exporting span batch, retrying in %.2fs.", + reason, backoff_seconds, ) shutdown = self._shutdown_in_progress.wait(backoff_seconds) From 41870e181dcae4c0b22906d54c5adc7ed13b436e Mon Sep 17 00:00:00 2001 From: Paul Fischer Date: Mon, 17 Nov 2025 14:27:25 +0100 Subject: [PATCH 5/6] feat(http_exporter): only retry on connection error --- .../exporter/otlp/proto/http/_log_exporter/__init__.py | 5 ++++- .../exporter/otlp/proto/http/metric_exporter/__init__.py | 5 ++++- .../exporter/otlp/proto/http/trace_exporter/__init__.py | 5 ++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py index 5236f7da58..92524210da 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py +++ b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py @@ -192,7 +192,10 @@ def export(self, batch: Sequence[LogData]) -> LogExportResult: return LogExportResult.SUCCESS except requests.exceptions.RequestException as error: reason = str(error) - retryable = True + if isinstance(error, ConnectionError): + retryable = True + else: + retryable = False status_code = None else: reason = resp.reason diff --git a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py index 1618ad7c68..d3a943bef5 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py +++ b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py @@ -239,7 +239,10 @@ def export( return MetricExportResult.SUCCESS except requests.exceptions.RequestException as error: reason = str(error) - retryable = True + if isinstance(error, ConnectionError): + retryable = True + else: + retryable = False status_code = None else: reason = resp.reason diff --git a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py index 8a974c8462..31a9ff978e 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py +++ b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py @@ -187,7 +187,10 @@ def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult: return SpanExportResult.SUCCESS except requests.exceptions.RequestException as error: reason = str(error) - retryable = True + if isinstance(error, ConnectionError): + retryable = True + else: + retryable = False status_code = None else: reason = resp.reason From dfeb67b48c4a08e81296a3037bc4bf646203085d Mon Sep 17 00:00:00 2001 From: Paul Fischer Date: Mon, 17 Nov 2025 14:27:54 +0100 Subject: [PATCH 6/6] test(http_exporter): add test case for connection errors while exporting --- .../metrics/test_otlp_metrics_exporter.py | 44 +++++++++++++++++++ .../tests/test_proto_log_exporter.py | 43 ++++++++++++++++++ .../tests/test_proto_span_exporter.py | 43 ++++++++++++++++++ 3 files changed, 130 insertions(+) diff --git a/exporter/opentelemetry-exporter-otlp-proto-http/tests/metrics/test_otlp_metrics_exporter.py b/exporter/opentelemetry-exporter-otlp-proto-http/tests/metrics/test_otlp_metrics_exporter.py index d7a5bed2d4..4057f3c02b 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-http/tests/metrics/test_otlp_metrics_exporter.py +++ b/exporter/opentelemetry-exporter-otlp-proto-http/tests/metrics/test_otlp_metrics_exporter.py @@ -19,7 +19,9 @@ from unittest import TestCase from unittest.mock import ANY, MagicMock, Mock, patch +import requests from requests import Session +from requests.exceptions import ConnectionError from requests.models import Response from opentelemetry.exporter.otlp.proto.common.metrics_encoder import ( @@ -556,6 +558,48 @@ def test_retry_timeout(self, mock_post): warning.records[0].message, ) + @patch.object(Session, "post") + def test_export_no_collector_available_retryable(self, mock_post): + exporter = OTLPMetricExporter(timeout=1.5) + msg = "Server not available." + mock_post.side_effect = ConnectionError(msg) + with self.assertLogs(level=WARNING) as warning: + before = time.time() + # Set timeout to 1.5 seconds + self.assertEqual( + exporter.export(self.metrics["sum_int"]), + MetricExportResult.FAILURE, + ) + after = time.time() + # First call at time 0, second at time 1, then an early return before the second backoff sleep b/c it would exceed timeout. + # Additionally every retry results in two calls, therefore 4. + self.assertEqual(mock_post.call_count, 4) + # There's a +/-20% jitter on each backoff. + self.assertTrue(0.75 < after - before < 1.25) + self.assertIn( + f"Transient error {msg} encountered while exporting metrics batch, retrying in", + warning.records[0].message, + ) + + @patch.object(Session, "post") + def test_export_no_collector_available(self, mock_post): + exporter = OTLPMetricExporter(timeout=1.5) + + mock_post.side_effect = requests.exceptions.RequestException() + with self.assertLogs(level=WARNING) as warning: + # Set timeout to 1.5 seconds + self.assertEqual( + exporter.export(self.metrics["sum_int"]), + MetricExportResult.FAILURE, + ) + # First call at time 0, second at time 1, then an early return before the second backoff sleep b/c it would exceed timeout. + self.assertEqual(mock_post.call_count, 1) + # There's a +/-20% jitter on each backoff. + self.assertIn( + "Failed to export metrics batch code", + warning.records[0].message, + ) + @patch.object(Session, "post") def test_timeout_set_correctly(self, mock_post): resp = Response() diff --git a/exporter/opentelemetry-exporter-otlp-proto-http/tests/test_proto_log_exporter.py b/exporter/opentelemetry-exporter-otlp-proto-http/tests/test_proto_log_exporter.py index d136e09ffd..068c2529f8 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-http/tests/test_proto_log_exporter.py +++ b/exporter/opentelemetry-exporter-otlp-proto-http/tests/test_proto_log_exporter.py @@ -24,6 +24,7 @@ import requests from google.protobuf.json_format import MessageToDict from requests import Session +from requests.exceptions import ConnectionError from requests.models import Response from opentelemetry._logs import SeverityNumber @@ -484,6 +485,48 @@ def test_retry_timeout(self, mock_post): warning.records[0].message, ) + @patch.object(Session, "post") + def test_export_no_collector_available_retryable(self, mock_post): + exporter = OTLPLogExporter(timeout=1.5) + msg = "Server not available." + mock_post.side_effect = ConnectionError(msg) + with self.assertLogs(level=WARNING) as warning: + before = time.time() + # Set timeout to 1.5 seconds + self.assertEqual( + exporter.export(self._get_sdk_log_data()), + LogExportResult.FAILURE, + ) + after = time.time() + # First call at time 0, second at time 1, then an early return before the second backoff sleep b/c it would exceed timeout. + # Additionally every retry results in two calls, therefore 4. + self.assertEqual(mock_post.call_count, 4) + # There's a +/-20% jitter on each backoff. + self.assertTrue(0.75 < after - before < 1.25) + self.assertIn( + f"Transient error {msg} encountered while exporting logs batch, retrying in", + warning.records[0].message, + ) + + @patch.object(Session, "post") + def test_export_no_collector_available(self, mock_post): + exporter = OTLPLogExporter(timeout=1.5) + + mock_post.side_effect = requests.exceptions.RequestException() + with self.assertLogs(level=WARNING) as warning: + # Set timeout to 1.5 seconds + self.assertEqual( + exporter.export(self._get_sdk_log_data()), + LogExportResult.FAILURE, + ) + # First call at time 0, second at time 1, then an early return before the second backoff sleep b/c it would exceed timeout. + self.assertEqual(mock_post.call_count, 1) + # There's a +/-20% jitter on each backoff. + self.assertIn( + "Failed to export logs batch code", + warning.records[0].message, + ) + @patch.object(Session, "post") def test_timeout_set_correctly(self, mock_post): resp = Response() diff --git a/exporter/opentelemetry-exporter-otlp-proto-http/tests/test_proto_span_exporter.py b/exporter/opentelemetry-exporter-otlp-proto-http/tests/test_proto_span_exporter.py index 2d6dea71de..9b250723af 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-http/tests/test_proto_span_exporter.py +++ b/exporter/opentelemetry-exporter-otlp-proto-http/tests/test_proto_span_exporter.py @@ -20,6 +20,7 @@ import requests from requests import Session +from requests.exceptions import ConnectionError from requests.models import Response from opentelemetry.exporter.otlp.proto.http import Compression @@ -304,6 +305,48 @@ def test_retry_timeout(self, mock_post): warning.records[0].message, ) + @patch.object(Session, "post") + def test_export_no_collector_available_retryable(self, mock_post): + exporter = OTLPSpanExporter(timeout=1.5) + msg = "Server not available." + mock_post.side_effect = ConnectionError(msg) + with self.assertLogs(level=WARNING) as warning: + before = time.time() + # Set timeout to 1.5 seconds + self.assertEqual( + exporter.export([BASIC_SPAN]), + SpanExportResult.FAILURE, + ) + after = time.time() + # First call at time 0, second at time 1, then an early return before the second backoff sleep b/c it would exceed timeout. + # Additionally every retry results in two calls, therefore 4. + self.assertEqual(mock_post.call_count, 4) + # There's a +/-20% jitter on each backoff. + self.assertTrue(0.75 < after - before < 1.25) + self.assertIn( + f"Transient error {msg} encountered while exporting span batch, retrying in", + warning.records[0].message, + ) + + @patch.object(Session, "post") + def test_export_no_collector_available(self, mock_post): + exporter = OTLPSpanExporter(timeout=1.5) + + mock_post.side_effect = requests.exceptions.RequestException() + with self.assertLogs(level=WARNING) as warning: + # Set timeout to 1.5 seconds + self.assertEqual( + exporter.export([BASIC_SPAN]), + SpanExportResult.FAILURE, + ) + # First call at time 0, second at time 1, then an early return before the second backoff sleep b/c it would exceed timeout. + self.assertEqual(mock_post.call_count, 1) + # There's a +/-20% jitter on each backoff. + self.assertIn( + "Failed to export span batch code", + warning.records[0].message, + ) + @patch.object(Session, "post") def test_timeout_set_correctly(self, mock_post): resp = Response()