From ea253494186d466273aeb3a6358dc8884f4c149d Mon Sep 17 00:00:00 2001
From: tammy-baylis-swi <tammy.baylis@solarwinds.com>
Date: Wed, 29 Oct 2025 15:37:51 -0700
Subject: [PATCH 1/8] Fix FlaskInstrumentor http.server.request.duration
 exemplars

---
 .../instrumentation/flask/__init__.py          | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/instrumentation/opentelemetry-instrumentation-flask/src/opentelemetry/instrumentation/flask/__init__.py b/instrumentation/opentelemetry-instrumentation-flask/src/opentelemetry/instrumentation/flask/__init__.py
index 57f0f75ca3..140ecb0ef7 100644
--- a/instrumentation/opentelemetry-instrumentation-flask/src/opentelemetry/instrumentation/flask/__init__.py
+++ b/instrumentation/opentelemetry-instrumentation-flask/src/opentelemetry/instrumentation/flask/__init__.py
@@ -278,6 +278,7 @@ def response_hook(span: Span, status: str, response_headers: List):
 from opentelemetry.semconv.metrics.http_metrics import (
     HTTP_SERVER_REQUEST_DURATION,
 )
+from opentelemetry.trace.propagation import _SPAN_KEY
 from opentelemetry.util._importlib_metadata import version
 from opentelemetry.util.http import (
     get_excluded_urls,
@@ -418,9 +419,20 @@ def _start_response(status, response_headers, *args, **kwargs):
                 if request_route:
                     duration_attrs_new[HTTP_ROUTE] = str(request_route)
 
-                duration_histogram_new.record(
-                    max(duration_s, 0), duration_attrs_new
-                )
+                # Get the span from wrapped_app_environ and re-create context manually
+                # to pass to histogram for exemplars generation
+                span = wrapped_app_environ.get(_ENVIRON_SPAN_KEY)
+                if span:
+                    exemplar_context = context.set_value(_SPAN_KEY, span)
+                    duration_histogram_new.record(
+                        max(duration_s, 0),
+                        duration_attrs_new,
+                        context=exemplar_context,
+                    )
+                else:
+                    duration_histogram_new.record(
+                        max(duration_s, 0), duration_attrs_new
+                    )
         active_requests_counter.add(-1, active_requests_count_attrs)
         return result
 

From f82f6a226b64dda703decdd9db7a45512fb61e41 Mon Sep 17 00:00:00 2001
From: tammy-baylis-swi <tammy.baylis@solarwinds.com>
Date: Thu, 30 Oct 2025 16:28:09 -0700
Subject: [PATCH 2/8] Fix FlaskInstrumentor http.server.duration exemplars

---
 .../instrumentation/flask/__init__.py           | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/instrumentation/opentelemetry-instrumentation-flask/src/opentelemetry/instrumentation/flask/__init__.py b/instrumentation/opentelemetry-instrumentation-flask/src/opentelemetry/instrumentation/flask/__init__.py
index 140ecb0ef7..46b2d7d633 100644
--- a/instrumentation/opentelemetry-instrumentation-flask/src/opentelemetry/instrumentation/flask/__init__.py
+++ b/instrumentation/opentelemetry-instrumentation-flask/src/opentelemetry/instrumentation/flask/__init__.py
@@ -408,9 +408,20 @@ def _start_response(status, response_headers, *args, **kwargs):
                     # http.target to be included in old semantic conventions
                     duration_attrs_old[HTTP_TARGET] = str(request_route)
 
-                duration_histogram_old.record(
-                    max(round(duration_s * 1000), 0), duration_attrs_old
-                )
+                # Get the span from wrapped_app_environ and re-create context manually
+                # to pass to histogram for exemplars generation
+                span = wrapped_app_environ.get(_ENVIRON_SPAN_KEY)
+                if span:
+                    exemplar_context = context.set_value(_SPAN_KEY, span)
+                    duration_histogram_old.record(
+                        max(round(duration_s * 1000), 0),
+                        duration_attrs_old,
+                        context=exemplar_context,
+                    )
+                else:
+                    duration_histogram_old.record(
+                        max(round(duration_s * 1000), 0), duration_attrs_old
+                    )
             if duration_histogram_new:
                 duration_attrs_new = otel_wsgi._parse_duration_attrs(
                     attributes, _StabilityMode.HTTP

From c4341a8eda4a2229e959ff33ec418663da960e51 Mon Sep 17 00:00:00 2001
From: tammy-baylis-swi <tammy.baylis@solarwinds.com>
Date: Fri, 31 Oct 2025 16:59:34 -0700
Subject: [PATCH 3/8] Add unit test

---
 .../tests/test_programmatic.py                | 45 +++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/instrumentation/opentelemetry-instrumentation-flask/tests/test_programmatic.py b/instrumentation/opentelemetry-instrumentation-flask/tests/test_programmatic.py
index 2da579adda..c769371204 100644
--- a/instrumentation/opentelemetry-instrumentation-flask/tests/test_programmatic.py
+++ b/instrumentation/opentelemetry-instrumentation-flask/tests/test_programmatic.py
@@ -69,6 +69,7 @@
 )
 from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
 from opentelemetry.test.wsgitestutil import WsgiTestBase
+from opentelemetry.trace.propagation import _SPAN_KEY
 from opentelemetry.util.http import (
     OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS,
     OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST,
@@ -810,6 +811,50 @@ def test_flask_metrics_excluded_urls_new_semconv(self):
         self.assertTrue(number_data_point_seen)
         self.assertFalse(histogram_data_point_seen)
 
+    def test_duration_histogram_old_record_with_context(self):
+        with patch("opentelemetry.context.set_value") as mock_set_value:
+            self.client.get("/hello/123")
+
+            # Verify that context.set_value was called for metrics exemplar context
+            # with same trace, span ID as trace
+            mock_set_value.assert_called()
+            call_args = mock_set_value.call_args
+            self.assertEqual(len(call_args[0]), 2)
+            self.assertEqual(call_args[0][0], _SPAN_KEY)
+            span_arg = call_args[0][1]
+            self.assertIsNotNone(span_arg)
+            finished_spans = self.memory_exporter.get_finished_spans()
+            self.assertEqual(len(finished_spans), 1)
+            finished_span = finished_spans[0]
+            self.assertEqual(
+                span_arg.context.trace_id, finished_span.context.trace_id
+            )
+            self.assertEqual(
+                span_arg.context.span_id, finished_span.context.span_id
+            )
+
+    def test_duration_histogram_new_record_with_context_new_semconv(self):
+        with patch("opentelemetry.context.set_value") as mock_set_value:
+            self.client.get("/hello/123")
+
+            # Verify that context.set_value was called for metrics exemplar context
+            # with same trace, span ID as trace
+            mock_set_value.assert_called()
+            call_args = mock_set_value.call_args
+            self.assertEqual(len(call_args[0]), 2)
+            self.assertEqual(call_args[0][0], _SPAN_KEY)
+            span_arg = call_args[0][1]
+            self.assertIsNotNone(span_arg)
+            finished_spans = self.memory_exporter.get_finished_spans()
+            self.assertEqual(len(finished_spans), 1)
+            finished_span = finished_spans[0]
+            self.assertEqual(
+                span_arg.context.trace_id, finished_span.context.trace_id
+            )
+            self.assertEqual(
+                span_arg.context.span_id, finished_span.context.span_id
+            )
+
 
 class TestProgrammaticHooks(InstrumentationTest, WsgiTestBase):
     def setUp(self):

From 4c64b46d5c1241d1d082ff7556c17247d0b26daf Mon Sep 17 00:00:00 2001
From: tammy-baylis-swi <tammy.baylis@solarwinds.com>
Date: Fri, 31 Oct 2025 17:04:31 -0700
Subject: [PATCH 4/8] Changelog

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 024990c91d..c2da4af24d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -23,6 +23,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   ([#3796](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3796))
 - `opentelemetry-instrumentation-fastapi`: Fix handling of APIRoute subclasses
   ([#3681](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3681))
+- `opentelemetry-instrumentation-flask`: Fix exemplars generation for `http.server.request.duration` and `http.server.duration` metrics
+  ([#3912](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3912))
 
 ### Added
 

From 37ac4fb50106ad86b5c18e4d82102a14c3e4e9b3 Mon Sep 17 00:00:00 2001
From: tammy-baylis-swi <tammy.baylis@solarwinds.com>
Date: Fri, 31 Oct 2025 17:28:02 -0700
Subject: [PATCH 5/8] lint

---
 .../src/opentelemetry/instrumentation/flask/__init__.py          | 1 +
 1 file changed, 1 insertion(+)

diff --git a/instrumentation/opentelemetry-instrumentation-flask/src/opentelemetry/instrumentation/flask/__init__.py b/instrumentation/opentelemetry-instrumentation-flask/src/opentelemetry/instrumentation/flask/__init__.py
index 46b2d7d633..1ae8f1de7b 100644
--- a/instrumentation/opentelemetry-instrumentation-flask/src/opentelemetry/instrumentation/flask/__init__.py
+++ b/instrumentation/opentelemetry-instrumentation-flask/src/opentelemetry/instrumentation/flask/__init__.py
@@ -331,6 +331,7 @@ def _rewrapped_app(
     sem_conv_opt_in_mode=_StabilityMode.DEFAULT,
     duration_histogram_new=None,
 ):
+    # pylint: disable=too-many-statements
     def _wrapped_app(wrapped_app_environ, start_response):
         # We want to measure the time for route matching, etc.
         # In theory, we could start the span here and use

From e8ce689c0e8ac2e60ab10c931b34ef9e8bf66806 Mon Sep 17 00:00:00 2001
From: tammy-baylis-swi <tammy.baylis@solarwinds.com>
Date: Fri, 31 Oct 2025 18:29:08 -0700
Subject: [PATCH 6/8] Add flask functional test for exemplars

---
 .../tests/flask/test_flask_functional.py      | 109 ++++++++++++++++++
 .../tests/test-requirements.txt               |   1 +
 tox.ini                                       |   1 +
 3 files changed, 111 insertions(+)
 create mode 100644 tests/opentelemetry-docker-tests/tests/flask/test_flask_functional.py

diff --git a/tests/opentelemetry-docker-tests/tests/flask/test_flask_functional.py b/tests/opentelemetry-docker-tests/tests/flask/test_flask_functional.py
new file mode 100644
index 0000000000..cb593ec6c9
--- /dev/null
+++ b/tests/opentelemetry-docker-tests/tests/flask/test_flask_functional.py
@@ -0,0 +1,109 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from flask import Flask
+
+from opentelemetry import metrics as metrics_api
+from opentelemetry.instrumentation.flask import FlaskInstrumentor
+from opentelemetry.sdk.metrics import AlwaysOnExemplarFilter
+from opentelemetry.test.globals_test import (
+    reset_metrics_globals,
+)
+from opentelemetry.test.test_base import TestBase
+from opentelemetry.trace import (
+    INVALID_SPAN_ID,
+    INVALID_TRACE_ID,
+)
+
+class TestFunctionalFlask(TestBase):
+    def setUp(self):
+        super().setUp()
+        self.memory_exporter.clear()
+        # This is done because set_meter_provider cannot override the
+        # current meter provider.
+        reset_metrics_globals()
+        (
+            self.meter_provider,
+            self.memory_metrics_reader,
+        ) = self.create_meter_provider(
+            exemplar_filter=AlwaysOnExemplarFilter(),
+        )
+        metrics_api.set_meter_provider(self.meter_provider)
+
+        self._app = Flask(__name__)
+        @self._app.route("/test/")
+        def test_route():
+            return "Test response"
+
+        self._client = self._app.test_client()
+
+        FlaskInstrumentor().instrument_app(
+            self._app,
+            meter_provider=self.meter_provider,
+        )
+
+    def tearDown(self):
+        FlaskInstrumentor().uninstrument()
+        super().tearDown()
+
+    def test_duration_metrics_exemplars(self):
+        """Should generate exemplars with trace and span IDs for Flask HTTP requests."""
+        self._client.get("/test/")  
+        self._client.get("/test/")
+        self._client.get("/test/")
+        
+        metrics_data = self.memory_metrics_reader.get_metrics_data()
+        self.assertIsNotNone(metrics_data)
+        self.assertTrue(len(metrics_data.resource_metrics) > 0)
+
+        duration_metric = None
+        metric_names = []
+        for resource_metric in metrics_data.resource_metrics:
+            for scope_metric in resource_metric.scope_metrics:
+                for metric in scope_metric.metrics:
+                    metric_names.append(metric.name)
+                    if metric.name in ["http.server.request.duration", "http.server.duration"]:
+                        duration_metric = metric
+                        break
+                if duration_metric:
+                    break
+            if duration_metric:
+                break
+        
+        self.assertIsNotNone(duration_metric)        
+        data_points = list(duration_metric.data.data_points)
+        self.assertTrue(len(data_points) > 0)
+
+        exemplar_count = 0
+        for data_point in data_points:
+            if hasattr(data_point, 'exemplars') and data_point.exemplars:
+                for exemplar in data_point.exemplars:
+                    exemplar_count += 1
+                    # Exemplar has required fields and valid span context
+                    self.assertIsNotNone(exemplar.value)
+                    self.assertIsNotNone(exemplar.time_unix_nano)
+                    self.assertIsNotNone(exemplar.span_id)
+                    self.assertNotEqual(exemplar.span_id, INVALID_SPAN_ID)
+                    self.assertIsNotNone(exemplar.trace_id)
+                    self.assertNotEqual(exemplar.trace_id, INVALID_TRACE_ID)
+                                        
+                    # Trace and span ID of exemplar are part of finished spans
+                    finished_spans = self.memory_exporter.get_finished_spans()
+                    finished_span_ids = [span.context.span_id for span in finished_spans]
+                    finished_trace_ids = [span.context.trace_id for span in finished_spans]
+                    self.assertIn(exemplar.span_id, finished_span_ids)
+                    self.assertIn(exemplar.trace_id, finished_trace_ids)
+        
+        # At least one exemplar was generated
+        self.assertGreater(exemplar_count, 0)
diff --git a/tests/opentelemetry-docker-tests/tests/test-requirements.txt b/tests/opentelemetry-docker-tests/tests/test-requirements.txt
index 4bee47eed1..365239d960 100644
--- a/tests/opentelemetry-docker-tests/tests/test-requirements.txt
+++ b/tests/opentelemetry-docker-tests/tests/test-requirements.txt
@@ -24,6 +24,7 @@ dockerpty==0.4.1
 docopt==0.6.2
 exceptiongroup==1.2.0
 flaky==3.7.0
+flask==3.0.2
 greenlet==3.0.3
 grpcio==1.63.2
 idna==2.10
diff --git a/tox.ini b/tox.ini
index 0be29fe137..e0155d25e8 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1008,6 +1008,7 @@ deps =
   -e {toxinidir}/instrumentation/opentelemetry-instrumentation-kafka-python
   -e {toxinidir}/instrumentation/opentelemetry-instrumentation-confluent-kafka
   -e {toxinidir}/instrumentation/opentelemetry-instrumentation-dbapi
+  -e {toxinidir}/instrumentation/opentelemetry-instrumentation-flask
   -e {toxinidir}/instrumentation/opentelemetry-instrumentation-mysql
   -e {toxinidir}/instrumentation/opentelemetry-instrumentation-mysqlclient
   -e {toxinidir}/instrumentation/opentelemetry-instrumentation-psycopg

From 5adc27438a9b9dbef402534719b5a5851118536a Mon Sep 17 00:00:00 2001
From: tammy-baylis-swi <tammy.baylis@solarwinds.com>
Date: Fri, 31 Oct 2025 18:31:16 -0700
Subject: [PATCH 7/8] lint

---
 .../tests/flask/test_flask_functional.py      | 29 ++++++++++++-------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/tests/opentelemetry-docker-tests/tests/flask/test_flask_functional.py b/tests/opentelemetry-docker-tests/tests/flask/test_flask_functional.py
index cb593ec6c9..88d60e7cb9 100644
--- a/tests/opentelemetry-docker-tests/tests/flask/test_flask_functional.py
+++ b/tests/opentelemetry-docker-tests/tests/flask/test_flask_functional.py
@@ -26,6 +26,7 @@
     INVALID_TRACE_ID,
 )
 
+
 class TestFunctionalFlask(TestBase):
     def setUp(self):
         super().setUp()
@@ -42,6 +43,7 @@ def setUp(self):
         metrics_api.set_meter_provider(self.meter_provider)
 
         self._app = Flask(__name__)
+
         @self._app.route("/test/")
         def test_route():
             return "Test response"
@@ -59,10 +61,10 @@ def tearDown(self):
 
     def test_duration_metrics_exemplars(self):
         """Should generate exemplars with trace and span IDs for Flask HTTP requests."""
-        self._client.get("/test/")  
         self._client.get("/test/")
         self._client.get("/test/")
-        
+        self._client.get("/test/")
+
         metrics_data = self.memory_metrics_reader.get_metrics_data()
         self.assertIsNotNone(metrics_data)
         self.assertTrue(len(metrics_data.resource_metrics) > 0)
@@ -73,21 +75,24 @@ def test_duration_metrics_exemplars(self):
             for scope_metric in resource_metric.scope_metrics:
                 for metric in scope_metric.metrics:
                     metric_names.append(metric.name)
-                    if metric.name in ["http.server.request.duration", "http.server.duration"]:
+                    if metric.name in [
+                        "http.server.request.duration",
+                        "http.server.duration",
+                    ]:
                         duration_metric = metric
                         break
                 if duration_metric:
                     break
             if duration_metric:
                 break
-        
-        self.assertIsNotNone(duration_metric)        
+
+        self.assertIsNotNone(duration_metric)
         data_points = list(duration_metric.data.data_points)
         self.assertTrue(len(data_points) > 0)
 
         exemplar_count = 0
         for data_point in data_points:
-            if hasattr(data_point, 'exemplars') and data_point.exemplars:
+            if hasattr(data_point, "exemplars") and data_point.exemplars:
                 for exemplar in data_point.exemplars:
                     exemplar_count += 1
                     # Exemplar has required fields and valid span context
@@ -97,13 +102,17 @@ def test_duration_metrics_exemplars(self):
                     self.assertNotEqual(exemplar.span_id, INVALID_SPAN_ID)
                     self.assertIsNotNone(exemplar.trace_id)
                     self.assertNotEqual(exemplar.trace_id, INVALID_TRACE_ID)
-                                        
+
                     # Trace and span ID of exemplar are part of finished spans
                     finished_spans = self.memory_exporter.get_finished_spans()
-                    finished_span_ids = [span.context.span_id for span in finished_spans]
-                    finished_trace_ids = [span.context.trace_id for span in finished_spans]
+                    finished_span_ids = [
+                        span.context.span_id for span in finished_spans
+                    ]
+                    finished_trace_ids = [
+                        span.context.trace_id for span in finished_spans
+                    ]
                     self.assertIn(exemplar.span_id, finished_span_ids)
                     self.assertIn(exemplar.trace_id, finished_trace_ids)
-        
+
         # At least one exemplar was generated
         self.assertGreater(exemplar_count, 0)

From 7384b8bdafce281afffdf59164143009d2c87d59 Mon Sep 17 00:00:00 2001
From: tammy-baylis-swi <tammy.baylis@solarwinds.com>
Date: Fri, 31 Oct 2025 18:38:30 -0700
Subject: [PATCH 8/8] More docker-test deps

---
 tox.ini | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tox.ini b/tox.ini
index e0155d25e8..d85d158529 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1020,6 +1020,8 @@ deps =
   -e {toxinidir}/instrumentation/opentelemetry-instrumentation-aiopg
   -e {toxinidir}/instrumentation/opentelemetry-instrumentation-redis
   -e {toxinidir}/instrumentation/opentelemetry-instrumentation-remoulade
+  -e {toxinidir}/instrumentation/opentelemetry-instrumentation-wsgi
+  -e {toxinidir}/util/opentelemetry-util-http
   opentelemetry-exporter-opencensus@{env:CORE_REPO}\#egg=opentelemetry-exporter-opencensus&subdirectory=exporter/opentelemetry-exporter-opencensus
 
 changedir =