From 62af0eb7f6b795ea6e98e69086ba5bedd552cc66 Mon Sep 17 00:00:00 2001 From: Alexandre Choura Date: Thu, 6 Nov 2025 11:00:40 +0100 Subject: [PATCH 01/12] feat(llmobs): add prompt tracking for reusable prompts in OpenAI integration This update introduces the ability to capture prompt metadata (id, version, variables) for reusable prompts in the OpenAI integration. The changes include enhancements to the `openai_set_meta_tags_from_response` function to validate and store prompt data, as well as new tests to ensure the correct functionality. A new YAML cassette for testing responses with prompt tracking has also been added. --- ddtrace/llmobs/_integrations/utils.py | 19 +++ .../cassettes/v1/response_with_prompt.yaml | 139 ++++++++++++++++++ tests/contrib/openai/test_openai_llmobs.py | 32 ++++ 3 files changed, 190 insertions(+) create mode 100644 tests/contrib/openai/cassettes/v1/response_with_prompt.yaml diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py index f431f9342c6..adfc5a9fc31 100644 --- a/ddtrace/llmobs/_integrations/utils.py +++ b/ddtrace/llmobs/_integrations/utils.py @@ -16,6 +16,7 @@ from ddtrace.llmobs._constants import DISPATCH_ON_LLM_TOOL_CHOICE from ddtrace.llmobs._constants import DISPATCH_ON_TOOL_CALL_OUTPUT_USED from ddtrace.llmobs._constants import INPUT_MESSAGES +from ddtrace.llmobs._constants import INPUT_PROMPT from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY from ddtrace.llmobs._constants import INPUT_VALUE from ddtrace.llmobs._constants import METADATA @@ -26,6 +27,7 @@ from ddtrace.llmobs._constants import TOOL_DEFINITIONS from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY from ddtrace.llmobs._utils import _get_attr +from ddtrace.llmobs._utils import _validate_prompt from ddtrace.llmobs._utils import load_data_value from ddtrace.llmobs._utils import safe_json from ddtrace.llmobs._utils import safe_load_json @@ -741,6 +743,14 @@ def openai_get_metadata_from_response( def openai_set_meta_tags_from_response(span: Span, kwargs: Dict[str, Any], response: Optional[Any]) -> None: """Extract input/output tags from response and set them as temporary "_ml_obs.meta.*" tags.""" input_data = kwargs.get("input", []) + + # For reusable prompts, input may not be in kwargs, extract from response.instructions + if not input_data and response and "prompt" in kwargs: + instructions = _get_attr(response, "instructions", []) + if instructions: + # Convert OpenAI Pydantic objects to dicts + input_data = load_data_value(instructions) + input_messages = openai_get_input_messages_from_response_input(input_data) if "instructions" in kwargs: @@ -753,6 +763,15 @@ def openai_set_meta_tags_from_response(span: Span, kwargs: Dict[str, Any], respo } ) + if "prompt" in kwargs: + prompt_data = kwargs.get("prompt") + if prompt_data: + try: + validated_prompt = _validate_prompt(prompt_data, strict_validation=False) + span._set_ctx_item(INPUT_PROMPT, validated_prompt) + except (TypeError, ValueError) as e: + logger.debug("Failed to validate prompt for OpenAI response: %s", e) + if span.error or not response: span._set_ctx_item(OUTPUT_MESSAGES, [Message(content="")]) return diff --git a/tests/contrib/openai/cassettes/v1/response_with_prompt.yaml b/tests/contrib/openai/cassettes/v1/response_with_prompt.yaml new file mode 100644 index 00000000000..fc078bf299f --- /dev/null +++ b/tests/contrib/openai/cassettes/v1/response_with_prompt.yaml @@ -0,0 +1,139 @@ +interactions: +- request: + body: '{"prompt":{"id":"pmpt_690b24669d8c81948acc0e98da10e6490190feb3a62eee0b","version":"3","variables":{"question":"What + is machine learning?"}}}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '140' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 2.3.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - Linux + x-stainless-package-version: + - 2.3.0 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.10.18 + method: POST + uri: https://api.openai.com/v1/responses + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//lFhtbyM3Dv6+v4LwJwdwDNvxJnG+HBbtHbC4dlFc7wWHbmFwNPRYjUaa + FSk7vmL/+4HSjN+SXq+fEksjieTz8BGpX98BjGw9eoJRJO7Ws8XjaoFmcVfh8uFh/jCb369m5j1V + s+Xjavk4XxncLMwjLmqi6m65uqtGE90iVL+QkWGb4JnKuImEQvUadW7+cL9Yzh/v3i/yHAtKYl1j + Qts5EqrLogrNcxND8mrXBh1TGbbOWd+MnuDXdwAAow4PFHV9TTtyoaM4egfwNX9MMQad88m5PGD9 + cMq6JkHr+HKWJSYjNngd/ykfUI4BGMmhIz2nJWZsimt5wgQv5NW3n34+Dsbg6JVVvV1/cNN+6LTq + YqX1XZK10IscF5d5HXmC0b9DAowECFty3SY5QGbLgl6m8IMjZAL0vKcIsrUMXxKxRuAJ/rVFAcvQ + otlaT+AIo7e++dPoeM7X/r/Xfic+uvyunx+1+LIOSbK94Zn8RfB1UkJwa4PuEpY21OR0z7C8ba23 + t4vZ4v3tbHk7v+95l/d8jVhPaf4fhDa0fFRCP5pq9X61WD3OF2Z+Rw8nIIY4R0IO6v5pilPbYjxk + 4N9EtxjQcvPbFsxX71cmp9TKmLq+o8eH+4fZ5gFfW/CKI7+VO3+MQAMirxiE3gfBYzb8fDHpQtPF + UL0xMxDv+yvaKJUQOFUbS66GsAGMYjfWWHRgvZBztiFvCMYfPt6AKPsauyMG9S4JRQbZEmBlnZUD + SIDPaTGbm3xA/reGDkUoegb0NbT4TFCTsaw+QIjQRaptSXDYxNBCjYIT2FvZhiRQkdpJL52zxoo7 + QBdDE7FtqYZNiEBotsAdGTUbBPl5Ch89BNlShH2INU9ARYQw+7ePVnRDFupuq8Ot/oWYHPEEDiHB + hqgGekHFjtUf9ICuCdHKtp1kF6xAlayrGawwhL3PkYoeHeS00FUtdpBVgGG8IZQUiW90oiDLMD53 + O8RTSG6mn/1n/1c6gK0J+Ul/zafwLQre1tHuyJdTGOCzB8jxXizg7xFtRlSj9wQIJjhHeX91++jR + mKbNFGyLjXqsvJgAk+cQIRLW1jd8U2LqsCJHdQYig2xCjGRkUKax9cCpo7izTPWRUjfqDdu2c4fB + hn4TRTNvynlt8m+tnl669Zc+dk/QEnKKWDkCFIm2SkIM9CIR8wmZOmqmBgDGnX3Rg6xXALO7k8wG + MCF5KRNQB5Na8jIBEjO9Pvt7DbOGskXZUotiDTrYJN9HNUK5mlKkPqx4hBEkEk3AU4rowJPsQ3ye + aLy6EAV2ZCTEQcVvYL8NTNBhxJZyUuXbof4lsbpWp6jIygAxh5KJVjQZ6mSoZGG0zVaODNPkOIQU + T+sGEmSGLabw3aACenBtm/aKUz++xucpH1RobrlsTTUEX9ieM/6uWAAd2shT+Chldc6mnvUDDziZ + LSD3omFQYMdTqEMz6bXDp5ZijvsOXVL+bkPSUEVriG8mioJBIU1QneUO27yHDwL64xrUf7zFunOv + VGlCJIbkhwzIjJIAG+tr2Nq6Jn8GfYigNVGnqQNj4xSzjJdJLKFVNKsDdCmard7rFW1xZ0OcQKQ6 + mZyytiWvrMFBRHeWEzr7H4Jyi7xkG659+RtZvwnRkHL4zBmVrEaHsi6hES5JjB7I72wMvpBe8FmP + x16Fsmy92DYfm9rkUOyOINIeYw3jpBGzHhpsCTqHB+ubCcRQBbGG1R0T2pZ8na8nIN9YT72c3U1P + AqUSSopm+e5tFes0VgWWo/qCVW/UJncA2RM+FwU+S5vxnjQHVNkiEXDnrP4w5CUGq1dBTvTsqvXF + 1VyQKoP1MDmX0atw/xOd7Z1TH0RrMg03k1ogRXmYhLWaI2bYbynfQidyNeQpZmAZ9uTyVeFpP1Ex + JPJ5h8z8ZZMw1jlcDer1BWFHcWMlX15jCQGMC6yRGAZ9sEwK0IUfTFIgWE7hm9C24ew662/kN26T + 76wnjMPU2JWfkZpIrESdgAuNZbHmbPDmGkui2wqVNcM+F+qolEFfh1aVilhxalSGyMttFUJWvvzd + zStVekNDrxz4dCG8xdGaqDtVPmMT/C64JDnv9MsimTtbHIxkUoyaRccppi9Ji6Fri7455fz4+bYl + 9DyBraWI0WyzeJ1U4Wrpt5epnyUhM2z8wzcfJiC3P376843C934KHzqtgUrxd2VAX4z1xsO4dH1Q + k5QSYAIbzDWdJmnjrbyG65NetOjAoW8SNiqxwSiw6tXQbkhEzw7LlkxebJYe9OgObF8h1RGZ7fmZ + GQc+eNkSW75Ws14/KAIfWKhlGLdhZ5Up5Z6T6xM++NCiO5wchfEmYqonA/LACqOVw/XCJMGHNiSG + HW2t0dJIbRvkTEP+0QMxK+CTV/0WYJLQovQ3bx8rrbRqy0ZzVT9KTNrcHQtgWzJ8KISz0P1GGQgl + c7Isldqg7KH3KUMXtAIzoaW+ZlYdPCvCbdvFsCPoKG5CbFEreGRoQ+zro4rKYtyhdVpUTQBr7LKQ + SBhuHc0CdLf7EF2tPlZOQRlquUhfko2lsNhi4nxbbNHXJuJGszeX1dP/pzM9tr/X7akqq3PkLttQ + iam8PHSRdjYkXg+PG+vc3h3b1C6GtpPTw0Rp/rq2k/X9alYtlvf3q/pRe73lIxozo9VjjfMZ3S9X + s/lqtqHqDu8XRDSr+j5stMNoNWB83BVgNDToZ2O/9xxw7Ml+t6X/et7HjnYUuZw0uju9qhRH1wbN + ltbPdHgdhH4uknagZYPjF6c++hgp2mxClNLi1ja1g/un/lpXH89n3JAc1rbWzTeWLl54WOstQ2ux + w6vQBpMrwRixhEgXz0lCbacXfcrj8+msH305AzKzWs4hGIKdv7uOWBXYyuHMm6PhhVfbYE0hYpIw + Ok6cOumRhG591l/PjoPduY0x+SLQ2U3LSpT+7Szld4ITE/3Fk8tiMXk9fvYednQzo1ifFs4uXL1+ + ybl/nL0189bGRw6crV5ebC5B0J1mH2aLYxTzy9L58xAJqs7oAV/fff0vAAAA//8DAHPbpEfNFAAA + headers: + CF-RAY: + - 99a3476d3f40034e-CDG + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Thu, 06 Nov 2025 08:39:19 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=TDFxHybuF51a1ZDEYa83d238sg4jXiDN9XNIgltrxEo-1762418359-1.0.1.1-88kU.E2l.a4zKq49TvFPxb4nR1iu55wGnJ97Yl_lzBJOZjKYu35ZtUQaO7KJRYie1el18xqzGP5dxuvEgiNco_Xe62xhIB74gcexFW347jY; + path=/; expires=Thu, 06-Nov-25 09:09:19 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=fr4jipzxLgLJezlGineMyVXLv7wPB5vaLORozEQbmqM-1762418359532-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-staging + openai-processing-ms: + - '7213' + openai-project: + - proj_gt6TQZPRbZfoY2J9AQlEJMpd + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '7217' + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999772' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_a545001a07e940499245fc93777c84aa + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/openai/test_openai_llmobs.py b/tests/contrib/openai/test_openai_llmobs.py index 01d4ff46850..996f5bdd6b1 100644 --- a/tests/contrib/openai/test_openai_llmobs.py +++ b/tests/contrib/openai/test_openai_llmobs.py @@ -2150,6 +2150,38 @@ class MathResponse(BaseModel): ) ) + @pytest.mark.skipif( + parse_version(openai_module.version.VERSION) < (1, 87), reason="Reusable prompts only available in openai >= 1.87" + ) + def test_response_with_prompt_tracking(self, openai, mock_llmobs_writer, mock_tracer): + """Test that prompt metadata (id, version, variables) is captured for reusable prompts.""" + with get_openai_vcr(subdirectory_name="v1").use_cassette("response_with_prompt.yaml"): + client = openai.OpenAI() + resp = client.responses.create( + prompt={ + "id": "pmpt_690b24669d8c81948acc0e98da10e6490190feb3a62eee0b", + "version": "3", + "variables": {"question": "What is machine learning?"}, + } + ) + span = mock_tracer.pop_traces()[0][0] + assert mock_llmobs_writer.enqueue.call_count == 1 + + call_args = mock_llmobs_writer.enqueue.call_args[0][0] + + # Verify prompt metadata is captured + assert "prompt" in call_args["meta"]["input"] + actual_prompt = call_args["meta"]["input"]["prompt"] + assert actual_prompt["id"] == "pmpt_690b24669d8c81948acc0e98da10e6490190feb3a62eee0b" + assert actual_prompt["version"] == "3" + assert actual_prompt["variables"] == {"question": "What is machine learning?"} + + # Verify the actual prompt content is captured in input messages + input_messages = call_args["meta"]["input"]["messages"] + assert len(input_messages) >= 1 + assert input_messages[0]["role"] == "user" + assert input_messages[0]["content"] == "You are a helpful assistant. Please answer this question: What is machine learning?" + @pytest.mark.parametrize( "ddtrace_global_config", From 848ef66d72a50fb1bc5c963aa991a717e3cc9038 Mon Sep 17 00:00:00 2001 From: Alexandre Choura Date: Thu, 6 Nov 2025 11:02:39 +0100 Subject: [PATCH 02/12] remove comment --- ddtrace/llmobs/_integrations/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py index adfc5a9fc31..8d503b389fa 100644 --- a/ddtrace/llmobs/_integrations/utils.py +++ b/ddtrace/llmobs/_integrations/utils.py @@ -748,7 +748,6 @@ def openai_set_meta_tags_from_response(span: Span, kwargs: Dict[str, Any], respo if not input_data and response and "prompt" in kwargs: instructions = _get_attr(response, "instructions", []) if instructions: - # Convert OpenAI Pydantic objects to dicts input_data = load_data_value(instructions) input_messages = openai_get_input_messages_from_response_input(input_data) From 57b296d1c763018bb886b025f93f9d0378cca979 Mon Sep 17 00:00:00 2001 From: Alexandre Choura Date: Thu, 6 Nov 2025 11:05:01 +0100 Subject: [PATCH 03/12] lint --- ddtrace/llmobs/_integrations/utils.py | 4 ++-- tests/contrib/openai/test_openai_llmobs.py | 18 +++++++++++------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py index 8d503b389fa..1d4ccd5d5ac 100644 --- a/ddtrace/llmobs/_integrations/utils.py +++ b/ddtrace/llmobs/_integrations/utils.py @@ -743,13 +743,13 @@ def openai_get_metadata_from_response( def openai_set_meta_tags_from_response(span: Span, kwargs: Dict[str, Any], response: Optional[Any]) -> None: """Extract input/output tags from response and set them as temporary "_ml_obs.meta.*" tags.""" input_data = kwargs.get("input", []) - + # For reusable prompts, input may not be in kwargs, extract from response.instructions if not input_data and response and "prompt" in kwargs: instructions = _get_attr(response, "instructions", []) if instructions: input_data = load_data_value(instructions) - + input_messages = openai_get_input_messages_from_response_input(input_data) if "instructions" in kwargs: diff --git a/tests/contrib/openai/test_openai_llmobs.py b/tests/contrib/openai/test_openai_llmobs.py index 996f5bdd6b1..f97d5e7fe50 100644 --- a/tests/contrib/openai/test_openai_llmobs.py +++ b/tests/contrib/openai/test_openai_llmobs.py @@ -2151,36 +2151,40 @@ class MathResponse(BaseModel): ) @pytest.mark.skipif( - parse_version(openai_module.version.VERSION) < (1, 87), reason="Reusable prompts only available in openai >= 1.87" + parse_version(openai_module.version.VERSION) < (1, 87), + reason="Reusable prompts only available in openai >= 1.87", ) def test_response_with_prompt_tracking(self, openai, mock_llmobs_writer, mock_tracer): """Test that prompt metadata (id, version, variables) is captured for reusable prompts.""" with get_openai_vcr(subdirectory_name="v1").use_cassette("response_with_prompt.yaml"): client = openai.OpenAI() - resp = client.responses.create( + client.responses.create( prompt={ "id": "pmpt_690b24669d8c81948acc0e98da10e6490190feb3a62eee0b", "version": "3", "variables": {"question": "What is machine learning?"}, } ) - span = mock_tracer.pop_traces()[0][0] + mock_tracer.pop_traces() assert mock_llmobs_writer.enqueue.call_count == 1 - + call_args = mock_llmobs_writer.enqueue.call_args[0][0] - + # Verify prompt metadata is captured assert "prompt" in call_args["meta"]["input"] actual_prompt = call_args["meta"]["input"]["prompt"] assert actual_prompt["id"] == "pmpt_690b24669d8c81948acc0e98da10e6490190feb3a62eee0b" assert actual_prompt["version"] == "3" assert actual_prompt["variables"] == {"question": "What is machine learning?"} - + # Verify the actual prompt content is captured in input messages input_messages = call_args["meta"]["input"]["messages"] assert len(input_messages) >= 1 assert input_messages[0]["role"] == "user" - assert input_messages[0]["content"] == "You are a helpful assistant. Please answer this question: What is machine learning?" + assert ( + input_messages[0]["content"] + == "You are a helpful assistant. Please answer this question: What is machine learning?" + ) @pytest.mark.parametrize( From 27bba9a1fcd176ed89479dbbb0706d240fd14180 Mon Sep 17 00:00:00 2001 From: Alexandre Choura Date: Thu, 6 Nov 2025 15:08:31 +0100 Subject: [PATCH 04/12] feat(llmobs): add chat template extraction from OpenAI response instructions This update introduces a new function, `_extract_chat_template_from_instructions`, which extracts chat templates from OpenAI response instructions by replacing variable values with placeholders. Additionally, the `openai_set_meta_tags_from_response` function has been modified to utilize this new functionality, ensuring that chat templates are included in the prompt data. Tests have been added to verify the correct extraction and formatting of chat templates, including variable placeholders. --- ddtrace/llmobs/_integrations/utils.py | 64 +++++++++++++ .../cassettes/v1/response_with_prompt.yaml | 95 +++++++++---------- tests/contrib/openai/test_openai_llmobs.py | 25 ++++- 3 files changed, 129 insertions(+), 55 deletions(-) diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py index 1d4ccd5d5ac..dd059700520 100644 --- a/ddtrace/llmobs/_integrations/utils.py +++ b/ddtrace/llmobs/_integrations/utils.py @@ -740,6 +740,60 @@ def openai_get_metadata_from_response( return metadata +def _extract_chat_template_from_instructions( + instructions: List[Any], variables: Dict[str, Any] +) -> List[Dict[str, str]]: + """ + Extract a chat template from OpenAI response instructions by replacing variable values with placeholders. + + Args: + instructions: List of instruction messages from the OpenAI response + variables: Dictionary of variables used in the prompt + + Returns: + List of chat template messages with placeholders (e.g., {{variable_name}}) + """ + chat_template = [] + + # Create a mapping of variable values to placeholder names + value_to_placeholder = {} + for var_name, var_value in variables.items(): + if hasattr(var_value, "text"): # ResponseInputText + value_str = str(var_value.text) + else: + value_str = str(var_value) + value_to_placeholder[value_str] = "{{" + var_name + "}}" + + for instruction in instructions: + role = _get_attr(instruction, "role", "") + if not role: + continue + + content_items = _get_attr(instruction, "content", []) + if not content_items: + # Skip empty content (e.g., developer role with no content) + continue + + text_parts = [] + for content_item in content_items: + text = _get_attr(content_item, "text", "") + if text: + text_parts.append(str(text)) + + if not text_parts: + continue + + full_text = "".join(text_parts) + + # Replace variable values with placeholders + for value_str, placeholder in value_to_placeholder.items(): + full_text = full_text.replace(value_str, placeholder) + + chat_template.append({"role": role, "content": full_text}) + + return chat_template + + def openai_set_meta_tags_from_response(span: Span, kwargs: Dict[str, Any], response: Optional[Any]) -> None: """Extract input/output tags from response and set them as temporary "_ml_obs.meta.*" tags.""" input_data = kwargs.get("input", []) @@ -766,6 +820,16 @@ def openai_set_meta_tags_from_response(span: Span, kwargs: Dict[str, Any], respo prompt_data = kwargs.get("prompt") if prompt_data: try: + # Extract chat_template from response instructions if available + if response and not prompt_data.get("chat_template") and not prompt_data.get("template"): + instructions = _get_attr(response, "instructions", None) + variables = prompt_data.get("variables", {}) + if instructions and variables: + chat_template = _extract_chat_template_from_instructions(instructions, variables) + if chat_template: + prompt_data = dict(prompt_data) # Make a copy to avoid modifying the original + prompt_data["chat_template"] = chat_template + validated_prompt = _validate_prompt(prompt_data, strict_validation=False) span._set_ctx_item(INPUT_PROMPT, validated_prompt) except (TypeError, ValueError) as e: diff --git a/tests/contrib/openai/cassettes/v1/response_with_prompt.yaml b/tests/contrib/openai/cassettes/v1/response_with_prompt.yaml index fc078bf299f..02a908c5183 100644 --- a/tests/contrib/openai/cassettes/v1/response_with_prompt.yaml +++ b/tests/contrib/openai/cassettes/v1/response_with_prompt.yaml @@ -1,6 +1,6 @@ interactions: - request: - body: '{"prompt":{"id":"pmpt_690b24669d8c81948acc0e98da10e6490190feb3a62eee0b","version":"3","variables":{"question":"What + body: '{"prompt":{"id":"pmpt_690b24669d8c81948acc0e98da10e6490190feb3a62eee0b","version":"4","variables":{"question":"What is machine learning?"}}}' headers: accept: @@ -40,49 +40,44 @@ interactions: response: body: string: !!binary | - H4sIAAAAAAAAAwAAAP//lFhtbyM3Dv6+v4LwJwdwDNvxJnG+HBbtHbC4dlFc7wWHbmFwNPRYjUaa - FSk7vmL/+4HSjN+SXq+fEksjieTz8BGpX98BjGw9eoJRJO7Ws8XjaoFmcVfh8uFh/jCb369m5j1V - s+Xjavk4XxncLMwjLmqi6m65uqtGE90iVL+QkWGb4JnKuImEQvUadW7+cL9Yzh/v3i/yHAtKYl1j - Qts5EqrLogrNcxND8mrXBh1TGbbOWd+MnuDXdwAAow4PFHV9TTtyoaM4egfwNX9MMQad88m5PGD9 - cMq6JkHr+HKWJSYjNngd/ykfUI4BGMmhIz2nJWZsimt5wgQv5NW3n34+Dsbg6JVVvV1/cNN+6LTq - YqX1XZK10IscF5d5HXmC0b9DAowECFty3SY5QGbLgl6m8IMjZAL0vKcIsrUMXxKxRuAJ/rVFAcvQ - otlaT+AIo7e++dPoeM7X/r/Xfic+uvyunx+1+LIOSbK94Zn8RfB1UkJwa4PuEpY21OR0z7C8ba23 - t4vZ4v3tbHk7v+95l/d8jVhPaf4fhDa0fFRCP5pq9X61WD3OF2Z+Rw8nIIY4R0IO6v5pilPbYjxk - 4N9EtxjQcvPbFsxX71cmp9TKmLq+o8eH+4fZ5gFfW/CKI7+VO3+MQAMirxiE3gfBYzb8fDHpQtPF - UL0xMxDv+yvaKJUQOFUbS66GsAGMYjfWWHRgvZBztiFvCMYfPt6AKPsauyMG9S4JRQbZEmBlnZUD - SIDPaTGbm3xA/reGDkUoegb0NbT4TFCTsaw+QIjQRaptSXDYxNBCjYIT2FvZhiRQkdpJL52zxoo7 - QBdDE7FtqYZNiEBotsAdGTUbBPl5Ch89BNlShH2INU9ARYQw+7ePVnRDFupuq8Ot/oWYHPEEDiHB - hqgGekHFjtUf9ICuCdHKtp1kF6xAlayrGawwhL3PkYoeHeS00FUtdpBVgGG8IZQUiW90oiDLMD53 - O8RTSG6mn/1n/1c6gK0J+Ul/zafwLQre1tHuyJdTGOCzB8jxXizg7xFtRlSj9wQIJjhHeX91++jR - mKbNFGyLjXqsvJgAk+cQIRLW1jd8U2LqsCJHdQYig2xCjGRkUKax9cCpo7izTPWRUjfqDdu2c4fB - hn4TRTNvynlt8m+tnl669Zc+dk/QEnKKWDkCFIm2SkIM9CIR8wmZOmqmBgDGnX3Rg6xXALO7k8wG - MCF5KRNQB5Na8jIBEjO9Pvt7DbOGskXZUotiDTrYJN9HNUK5mlKkPqx4hBEkEk3AU4rowJPsQ3ye - aLy6EAV2ZCTEQcVvYL8NTNBhxJZyUuXbof4lsbpWp6jIygAxh5KJVjQZ6mSoZGG0zVaODNPkOIQU - T+sGEmSGLabw3aACenBtm/aKUz++xucpH1RobrlsTTUEX9ieM/6uWAAd2shT+Chldc6mnvUDDziZ - LSD3omFQYMdTqEMz6bXDp5ZijvsOXVL+bkPSUEVriG8mioJBIU1QneUO27yHDwL64xrUf7zFunOv - VGlCJIbkhwzIjJIAG+tr2Nq6Jn8GfYigNVGnqQNj4xSzjJdJLKFVNKsDdCmard7rFW1xZ0OcQKQ6 - mZyytiWvrMFBRHeWEzr7H4Jyi7xkG659+RtZvwnRkHL4zBmVrEaHsi6hES5JjB7I72wMvpBe8FmP - x16Fsmy92DYfm9rkUOyOINIeYw3jpBGzHhpsCTqHB+ubCcRQBbGG1R0T2pZ8na8nIN9YT72c3U1P - AqUSSopm+e5tFes0VgWWo/qCVW/UJncA2RM+FwU+S5vxnjQHVNkiEXDnrP4w5CUGq1dBTvTsqvXF - 1VyQKoP1MDmX0atw/xOd7Z1TH0RrMg03k1ogRXmYhLWaI2bYbynfQidyNeQpZmAZ9uTyVeFpP1Ex - JPJ5h8z8ZZMw1jlcDer1BWFHcWMlX15jCQGMC6yRGAZ9sEwK0IUfTFIgWE7hm9C24ew662/kN26T - 76wnjMPU2JWfkZpIrESdgAuNZbHmbPDmGkui2wqVNcM+F+qolEFfh1aVilhxalSGyMttFUJWvvzd - zStVekNDrxz4dCG8xdGaqDtVPmMT/C64JDnv9MsimTtbHIxkUoyaRccppi9Ji6Fri7455fz4+bYl - 9DyBraWI0WyzeJ1U4Wrpt5epnyUhM2z8wzcfJiC3P376843C934KHzqtgUrxd2VAX4z1xsO4dH1Q - k5QSYAIbzDWdJmnjrbyG65NetOjAoW8SNiqxwSiw6tXQbkhEzw7LlkxebJYe9OgObF8h1RGZ7fmZ - GQc+eNkSW75Ws14/KAIfWKhlGLdhZ5Up5Z6T6xM++NCiO5wchfEmYqonA/LACqOVw/XCJMGHNiSG - HW2t0dJIbRvkTEP+0QMxK+CTV/0WYJLQovQ3bx8rrbRqy0ZzVT9KTNrcHQtgWzJ8KISz0P1GGQgl - c7Isldqg7KH3KUMXtAIzoaW+ZlYdPCvCbdvFsCPoKG5CbFEreGRoQ+zro4rKYtyhdVpUTQBr7LKQ - SBhuHc0CdLf7EF2tPlZOQRlquUhfko2lsNhi4nxbbNHXJuJGszeX1dP/pzM9tr/X7akqq3PkLttQ - iam8PHSRdjYkXg+PG+vc3h3b1C6GtpPTw0Rp/rq2k/X9alYtlvf3q/pRe73lIxozo9VjjfMZ3S9X - s/lqtqHqDu8XRDSr+j5stMNoNWB83BVgNDToZ2O/9xxw7Ml+t6X/et7HjnYUuZw0uju9qhRH1wbN - ltbPdHgdhH4uknagZYPjF6c++hgp2mxClNLi1ja1g/un/lpXH89n3JAc1rbWzTeWLl54WOstQ2ux - w6vQBpMrwRixhEgXz0lCbacXfcrj8+msH305AzKzWs4hGIKdv7uOWBXYyuHMm6PhhVfbYE0hYpIw - Ok6cOumRhG591l/PjoPduY0x+SLQ2U3LSpT+7Szld4ITE/3Fk8tiMXk9fvYednQzo1ifFs4uXL1+ - ybl/nL0189bGRw6crV5ebC5B0J1mH2aLYxTzy9L58xAJqs7oAV/fff0vAAAA//8DAHPbpEfNFAAA + H4sIAAAAAAAAA7xYWW8byRF+968o8CGQAYoYHqJFvQTGbhZYJM4GmyyMIDaImu6amVr1MemDFL3w + fw+qhxySkh1vXvImdU/X8dVXF397BTBhPXmASaDYb6u7iqo7Pd9sar1EWlbz9aZSuFrS6k7f3883 + c1xu5tWmwfWqUW82b5rJVET4+ldS6STGu0jDuQqEifQW5W7+Zr1YLdfzu/tyFxOmHOWN8rY3lEgP + j2pUj23w2YldDZpIwzEbw66dPMBvrwAAJj0eKMh7TTsyvqcweQXwuXxMIXi5c9mYcsDupGWrKSGb + eH0bU8gqsXdy/q+iYFADMEmHnkSPpRixHVwrF8q7RC6NLy5fXb1k1+e0TfSUxsfDvZw8wOR7DqQS + /AG+825HIaJYggaSdzQZH3w+/vVxNCB4Qy8QOGLwf3Xgnz4DBgKEjkzfZAMYI8eELs3gb4YwEqCL + ewqQOo7w70xRfHyA9x0m4AgWVceOwBAGx6794+/xO8fR5VfH+4nFp63PqdjrH8ldBVouk/dmq9Bc + U8B6TUZk+tWtZce3i2pxd1utbufrI8eLzJfsOKZP/HryNFW1UZI892pxp5ekqvVcKXW/PgfihHMg + jF7cP1/FbC2Ggyj++MXoDgbY2H7VgtV8tdxUYkGt5zXVzRu1vluuNqvqpQUvOPK1PP3fCHSKyAsG + oXM+4Zh5H68ujW/74Osv3JyI9+4ZbYRKCHVApzrwDWBI3LBiNMAukTHcklMESWjX8o4iiFs5UYiQ + OgKs2XA6QPKDTOgxJQouAjoNFh8JNCmOYjA0wVvQmHAKAVNX2I0OahJL6Kk3rDiZA/TBtwGtJQ2N + D0A7CgeIihwG9jP40YEvr/c+6DgFKUeEWuw/+Az7wEkEfsiLaq64uZUUuk0duVtxoxxrCNlQnJYH + DZEuzsRDTGSBnlAiN7jACRpuc6AIPqfyWXaagjmIjiIFvANOEfzezT64D+7PdADWhPFB/hN9iwV8 + jwmLwBEg+OBgMHIJUhCUN0bKmgAENy7bmkKcgoRuCmyxFXsjuegDBELNro1ToKRmr4tggzUZsXff + kQNHpEnPrrS8PccdTesDp84COjSHTxSHGBflyYOmJLaMxhadZiBex32cXbj2TmoB1JmN2HSl8R9C + kbMq/WuOKRZqBSnYPQa0VMh0IwgOISu1ZQjTa7GlpphAYZ9yECb6SKNd1/79VKgakB3paQlVEQUK + 3UDFPpDmoW+JR2dmegFsP4XsIpErKBQP3yE7kKSMwq7nRbfEdz6Dv+eewo4j6TPAxa6TYb9EikN8 + SJ/pdUOzdjaFvvPJR0jYtqSPEKiRpz4cj7Rvj5jMroX/RVRKGlvse1Fd0qy0ILgZeFNgHGoK3BQ7 + 4nMp33lrvYOE8TE+gDLSkBpWJeJwE3u0sIszcD7dyj+vpxCoDRRjuT8B61rofJYABVYUXxcQFzP4 + xcVvYfTeh8cIe04dZHfCasiFASeEng1JHFSOyVsK0OegOmmWHcfkA9MLt35gpyMM44rQxweQYUlw + usrb/w5GjomCWHxzegyRLRsMoy3x9RQ0W3KxzCJSEwPpYUiCm8i2N9yUmiE+DcAsZ/AzsWt8UGTJ + pa8gcwxwfYAUpDJLspeJTWKgiHelEtEegy607smhSSwMSx27R4GsZEVJfGjR0m1vsBiDLbn0HLSS + tXIBPavHCHhMGXaADsjtOHhXDE4eLD6x5U8EKtssFWJHR2PA76TEs6Xi7c+E5nbvgzmnwGWB/JNF + NlCI1rA5Nhg8YiKKYu8TZLdHl0jDsfEOFWAsRDsuIe6DF+gFHm8tOT3QmFzLTmD5K6XG8NMU3lr8 + 5N3rSyE/SsaUl63j8oydtIkgieqoQLSJQ9YC9j3cYE7eYmIFDaqifxjwS0qza6/Ev83JO299jrCj + jpWh53463HGLqVSypmF1+fqHgFkfi7OYNnSsEpka3SO7tkD9vjtII7ClSg4ov+j8vd8LxhbdARKp + znnjWyHNnkAyeOi6Gg9TqZ2F8r5Jexlbo8WQKEzBehliNfYS9WkhZk2iEzBBh07LCgLDIPQkbB0Z + UCosvI1DhsvDYa4YLQOZldhlEkja4PfTF+X3FAoZbwsCOUp4pUsqLmOLyNXI5gCGG4JHoj7KoIFO + OtXs98zM42D+fHCW3mUMmesBOYU87F99oB37HLenFW9bBs9xgO6Dt306r2fDWNrbPm3Xm6perNbr + jb6XOXh1j0pVtLnXOK9ovdpU803VUL3E9YKIqvo4IU52GBhrQ3GUCjA5rQ4XZ99aVMZp8ZvLxufL + CXsiq9igabI675aDo1uFqqPtIx1egnC8CySz8SBg/OI84Y9IUdP4kIbhW3O2J/fPk7+8HvVHbCgd + tqxFeMN0tedG6UmKtolPu3GD2QxgTKSl0NVSncj2FFA6iWzos+p4+nQRyMYHi+kyBCewy3fPEat9 + 5HS48GY0fOBV51kNRMzJT8aL84w/Sb7fXkz+1XjYX9oYshuaeXGToxDl+AtCLhvMmYnuahlczqcv + zy9+FRjdLFHU54fVlavPd8z1avGlmy8JHjlwfj3fLK6kJ5/QXAh/sxxhLEvv5eZKCaXoiIbPrz7/ + BwAA//8DAHhWLkvUEQAA headers: CF-RAY: - - 99a3476d3f40034e-CDG + - 99a4fa22dbb601cc-CDG Connection: - keep-alive Content-Encoding: @@ -90,14 +85,14 @@ interactions: Content-Type: - application/json Date: - - Thu, 06 Nov 2025 08:39:19 GMT + - Thu, 06 Nov 2025 13:36:05 GMT Server: - cloudflare Set-Cookie: - - __cf_bm=TDFxHybuF51a1ZDEYa83d238sg4jXiDN9XNIgltrxEo-1762418359-1.0.1.1-88kU.E2l.a4zKq49TvFPxb4nR1iu55wGnJ97Yl_lzBJOZjKYu35ZtUQaO7KJRYie1el18xqzGP5dxuvEgiNco_Xe62xhIB74gcexFW347jY; - path=/; expires=Thu, 06-Nov-25 09:09:19 GMT; domain=.api.openai.com; HttpOnly; + - __cf_bm=oAc59HaJwUjrUv2uHgTgDkTP1sVynTMJVzliRX11b7o-1762436165-1.0.1.1-STkKgI9BlQHAvGzS.Rqi6UQVssVb5_M5J9QpUZICssvaO35gDy6yDFJo.tYdjVGKAGufaBJ9rwowcVi0u.xMc6oV0zOSTM2nqB6IjkP9W.4; + path=/; expires=Thu, 06-Nov-25 14:06:05 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None - - _cfuvid=fr4jipzxLgLJezlGineMyVXLv7wPB5vaLORozEQbmqM-1762418359532-0.0.1.1-604800000; + - _cfuvid=bDZxnxovYk7l9OeXSX6u2DbwKyUR5GDTvi_l5SLAkiY-1762436165819-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None Strict-Transport-Security: - max-age=31536000; includeSubDomains; preload @@ -112,13 +107,13 @@ interactions: openai-organization: - datadog-staging openai-processing-ms: - - '7213' + - '7512' openai-project: - proj_gt6TQZPRbZfoY2J9AQlEJMpd openai-version: - '2020-10-01' x-envoy-upstream-service-time: - - '7217' + - '7514' x-ratelimit-limit-requests: - '30000' x-ratelimit-limit-tokens: @@ -126,13 +121,13 @@ interactions: x-ratelimit-remaining-requests: - '29999' x-ratelimit-remaining-tokens: - - '149999772' + - '149999762' x-ratelimit-reset-requests: - 2ms x-ratelimit-reset-tokens: - 0s x-request-id: - - req_a545001a07e940499245fc93777c84aa + - req_2409b397395c43bcaa8b763bb736ebf5 status: code: 200 message: OK diff --git a/tests/contrib/openai/test_openai_llmobs.py b/tests/contrib/openai/test_openai_llmobs.py index f97d5e7fe50..b0e2ac84d7f 100644 --- a/tests/contrib/openai/test_openai_llmobs.py +++ b/tests/contrib/openai/test_openai_llmobs.py @@ -2161,7 +2161,7 @@ def test_response_with_prompt_tracking(self, openai, mock_llmobs_writer, mock_tr client.responses.create( prompt={ "id": "pmpt_690b24669d8c81948acc0e98da10e6490190feb3a62eee0b", - "version": "3", + "version": "4", "variables": {"question": "What is machine learning?"}, } ) @@ -2174,15 +2174,30 @@ def test_response_with_prompt_tracking(self, openai, mock_llmobs_writer, mock_tr assert "prompt" in call_args["meta"]["input"] actual_prompt = call_args["meta"]["input"]["prompt"] assert actual_prompt["id"] == "pmpt_690b24669d8c81948acc0e98da10e6490190feb3a62eee0b" - assert actual_prompt["version"] == "3" + assert actual_prompt["version"] == "4" assert actual_prompt["variables"] == {"question": "What is machine learning?"} + # Verify chat_template is extracted with variable placeholders + assert "chat_template" in actual_prompt + chat_template = actual_prompt["chat_template"] + assert len(chat_template) == 2 + # First message: developer role + assert chat_template[0]["role"] == "developer" + assert chat_template[0]["content"] == "Direct & Conversational tone" + # Second message: user role with variable placeholder + assert chat_template[1]["role"] == "user" + assert chat_template[1]["content"] == "You are a helpful assistant. Please answer this question: {{question}}" + # Verify the actual prompt content is captured in input messages input_messages = call_args["meta"]["input"]["messages"] - assert len(input_messages) >= 1 - assert input_messages[0]["role"] == "user" + assert len(input_messages) == 2 + # Developer message + assert input_messages[0]["role"] == "developer" + assert input_messages[0]["content"] == "Direct & Conversational tone" + # User message with rendered variables + assert input_messages[1]["role"] == "user" assert ( - input_messages[0]["content"] + input_messages[1]["content"] == "You are a helpful assistant. Please answer this question: What is machine learning?" ) From 088613a9d4bb840541fc9b6f1ae3dc612cf38c3d Mon Sep 17 00:00:00 2001 From: Alexandre Choura Date: Fri, 7 Nov 2025 10:07:46 +0100 Subject: [PATCH 05/12] handle `prompt_data` not being a dict-like object + sort and use regex --- ddtrace/llmobs/_integrations/utils.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py index dd059700520..9165f662f15 100644 --- a/ddtrace/llmobs/_integrations/utils.py +++ b/ddtrace/llmobs/_integrations/utils.py @@ -764,6 +764,9 @@ def _extract_chat_template_from_instructions( value_str = str(var_value) value_to_placeholder[value_str] = "{{" + var_name + "}}" + sorted_values = sorted(value_to_placeholder.keys(), key=len, reverse=True) + pattern = "|".join(re.escape(v) for v in sorted_values) if sorted_values else None + for instruction in instructions: role = _get_attr(instruction, "role", "") if not role: @@ -785,9 +788,8 @@ def _extract_chat_template_from_instructions( full_text = "".join(text_parts) - # Replace variable values with placeholders - for value_str, placeholder in value_to_placeholder.items(): - full_text = full_text.replace(value_str, placeholder) + if pattern: + full_text = re.sub(pattern, lambda m: value_to_placeholder[m.group(0)], full_text) chat_template.append({"role": role, "content": full_text}) @@ -832,7 +834,7 @@ def openai_set_meta_tags_from_response(span: Span, kwargs: Dict[str, Any], respo validated_prompt = _validate_prompt(prompt_data, strict_validation=False) span._set_ctx_item(INPUT_PROMPT, validated_prompt) - except (TypeError, ValueError) as e: + except (TypeError, ValueError, AttributeError) as e: logger.debug("Failed to validate prompt for OpenAI response: %s", e) if span.error or not response: From a754367a7594fd9ee17402a1c2d3463fecbc3983 Mon Sep 17 00:00:00 2001 From: Alexandre Choura Date: Fri, 7 Nov 2025 11:05:59 +0100 Subject: [PATCH 06/12] fmt + changelog --- ddtrace/llmobs/_integrations/utils.py | 2 +- .../instrument-openai-responses-prompt-d8d0f21a6f21ed4d.yaml | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 releasenotes/notes/instrument-openai-responses-prompt-d8d0f21a6f21ed4d.yaml diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py index 9165f662f15..c51bb8e98ca 100644 --- a/ddtrace/llmobs/_integrations/utils.py +++ b/ddtrace/llmobs/_integrations/utils.py @@ -758,7 +758,7 @@ def _extract_chat_template_from_instructions( # Create a mapping of variable values to placeholder names value_to_placeholder = {} for var_name, var_value in variables.items(): - if hasattr(var_value, "text"): # ResponseInputText + if hasattr(var_value, "text"): # ResponseInputText value_str = str(var_value.text) else: value_str = str(var_value) diff --git a/releasenotes/notes/instrument-openai-responses-prompt-d8d0f21a6f21ed4d.yaml b/releasenotes/notes/instrument-openai-responses-prompt-d8d0f21a6f21ed4d.yaml new file mode 100644 index 00000000000..9c40cc40f6d --- /dev/null +++ b/releasenotes/notes/instrument-openai-responses-prompt-d8d0f21a6f21ed4d.yaml @@ -0,0 +1,5 @@ +--- +features: + - | + LLM Observability: The OpenAI integration now captures prompt metadata (id, version, variables, and chat template) + for reusable prompts when using the ``responses`` endpoint (available in OpenAI SDK >= 1.87.0). From d4ee1407584ebe711b41a53c8f2534e049400117 Mon Sep 17 00:00:00 2001 From: Alexandre Choura Date: Fri, 7 Nov 2025 11:26:21 +0100 Subject: [PATCH 07/12] lint:typing --- ddtrace/llmobs/_integrations/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py index c51bb8e98ca..43e07fb6c2a 100644 --- a/ddtrace/llmobs/_integrations/utils.py +++ b/ddtrace/llmobs/_integrations/utils.py @@ -789,7 +789,7 @@ def _extract_chat_template_from_instructions( full_text = "".join(text_parts) if pattern: - full_text = re.sub(pattern, lambda m: value_to_placeholder[m.group(0)], full_text) + full_text = re.sub(pattern, lambda m: value_to_placeholder[str(m.group(0))], full_text) chat_template.append({"role": role, "content": full_text}) From 9da5837d3673302742b66fc5541b3539bff2d024 Mon Sep 17 00:00:00 2001 From: Alexandre Choura Date: Mon, 10 Nov 2025 10:30:47 +0100 Subject: [PATCH 08/12] Add empty value filtering --- ddtrace/llmobs/_integrations/utils.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py index 43e07fb6c2a..667ad0dbec8 100644 --- a/ddtrace/llmobs/_integrations/utils.py +++ b/ddtrace/llmobs/_integrations/utils.py @@ -758,10 +758,15 @@ def _extract_chat_template_from_instructions( # Create a mapping of variable values to placeholder names value_to_placeholder = {} for var_name, var_value in variables.items(): - if hasattr(var_value, "text"): # ResponseInputText + if hasattr(var_value, "text"): value_str = str(var_value.text) else: value_str = str(var_value) + + # Skip empty values + if not value_str: + continue + value_to_placeholder[value_str] = "{{" + var_name + "}}" sorted_values = sorted(value_to_placeholder.keys(), key=len, reverse=True) From 1a5f481c8da495cae127b65ebce38639d38fa5f1 Mon Sep 17 00:00:00 2001 From: Alexandre Choura Date: Mon, 10 Nov 2025 11:32:26 +0100 Subject: [PATCH 09/12] Add a fallback for large patterns + simple test for extract fn --- ddtrace/llmobs/_integrations/utils.py | 26 ++++- tests/llmobs/test_integrations_utils.py | 134 ++++++++++++++++++++++++ 2 files changed, 156 insertions(+), 4 deletions(-) create mode 100644 tests/llmobs/test_integrations_utils.py diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py index 667ad0dbec8..a5397523c04 100644 --- a/ddtrace/llmobs/_integrations/utils.py +++ b/ddtrace/llmobs/_integrations/utils.py @@ -746,6 +746,9 @@ def _extract_chat_template_from_instructions( """ Extract a chat template from OpenAI response instructions by replacing variable values with placeholders. + Uses regex-based replacement for performance when patterns are small, but falls back to + iterative string replacement for large patterns to avoid regex performance issues. + Args: instructions: List of instruction messages from the OpenAI response variables: Dictionary of variables used in the prompt @@ -755,22 +758,32 @@ def _extract_chat_template_from_instructions( """ chat_template = [] + # Maximum regex pattern length (number of characters) before falling back to iterative replacement + MAX_REGEX_PATTERN_LENGTH = 5000 + # Create a mapping of variable values to placeholder names value_to_placeholder = {} for var_name, var_value in variables.items(): - if hasattr(var_value, "text"): + if hasattr(var_value, "text"): # ResponseInputText value_str = str(var_value.text) else: value_str = str(var_value) - + # Skip empty values if not value_str: continue - + value_to_placeholder[value_str] = "{{" + var_name + "}}" + # Sort by length (longest first) to handle overlapping values correctly sorted_values = sorted(value_to_placeholder.keys(), key=len, reverse=True) - pattern = "|".join(re.escape(v) for v in sorted_values) if sorted_values else None + + total_pattern_length = ( + sum(len(re.escape(v)) for v in sorted_values) + len(sorted_values) - 1 + ) # -1 because n values need n-1 separators + use_regex = sorted_values and total_pattern_length < MAX_REGEX_PATTERN_LENGTH + + pattern = "|".join(re.escape(v) for v in sorted_values) if use_regex else None for instruction in instructions: role = _get_attr(instruction, "role", "") @@ -793,8 +806,13 @@ def _extract_chat_template_from_instructions( full_text = "".join(text_parts) + # Replace variable values with placeholders if pattern: full_text = re.sub(pattern, lambda m: value_to_placeholder[str(m.group(0))], full_text) + elif sorted_values: + for value_str in sorted_values: + placeholder = value_to_placeholder[value_str] + full_text = full_text.replace(value_str, placeholder) chat_template.append({"role": role, "content": full_text}) diff --git a/tests/llmobs/test_integrations_utils.py b/tests/llmobs/test_integrations_utils.py new file mode 100644 index 00000000000..d5ec21653b5 --- /dev/null +++ b/tests/llmobs/test_integrations_utils.py @@ -0,0 +1,134 @@ +from ddtrace.llmobs._integrations.utils import _extract_chat_template_from_instructions + + +class TestExtractChatTemplateFromInstructions: + """Tests for the _extract_chat_template_from_instructions function.""" + + def test_basic_functionality(self): + """Test basic variable replacement with multiple instructions and roles.""" + instructions = [ + { + "role": "developer", + "content": [{"text": "Be helpful"}], + }, + { + "role": "user", + "content": [{"text": "Hello John, your email is john@example.com"}], + }, + ] + variables = { + "name": "John", + "email": "john@example.com", + } + + result = _extract_chat_template_from_instructions(instructions, variables) + + assert len(result) == 2 + assert result[0]["role"] == "developer" + assert result[0]["content"] == "Be helpful" + assert result[1]["role"] == "user" + assert result[1]["content"] == "Hello {{name}}, your email is {{email}}" + + def test_overlapping_values_and_partial_matches(self): + """Test longest-first matching for overlaps and partial word matches.""" + # Test 1: Overlapping values - longest should win + instructions = [ + { + "role": "user", + "content": [{"text": "The phrase is: AI is cool"}], + } + ] + variables = {"short": "AI", "long": "AI is cool"} + result = _extract_chat_template_from_instructions(instructions, variables) + assert result[0]["content"] == "The phrase is: {{long}}" + + # Test 2: Partial word matches should work (e.g., "test" inside "testing") + instructions = [ + { + "role": "user", + "content": [{"text": "We are testing the feature"}], + } + ] + variables = {"action": "test"} + result = _extract_chat_template_from_instructions(instructions, variables) + assert result[0]["content"] == "We are {{action}}ing the feature" + + def test_special_characters_and_escaping(self): + """Test that regex special characters are properly escaped.""" + instructions = [ + { + "role": "user", + "content": [{"text": "The price is $99.99 (plus $5.00 tax)"}], + } + ] + variables = {"price": "$99.99", "tax": "$5.00"} + + result = _extract_chat_template_from_instructions(instructions, variables) + + assert result[0]["content"] == "The price is {{price}} (plus {{tax}} tax)" + + def test_empty_and_edge_cases(self): + """Test empty variables, empty values, and malformed instructions.""" + # Empty variables dict + instructions = [{"role": "user", "content": [{"text": "No variables"}]}] + result = _extract_chat_template_from_instructions(instructions, {}) + assert result[0]["content"] == "No variables" + + # Empty variable values are skipped + instructions = [{"role": "user", "content": [{"text": "Hello world"}]}] + result = _extract_chat_template_from_instructions(instructions, {"empty": "", "greeting": "Hello"}) + assert result[0]["content"] == "{{greeting}} world" + + # Instructions without role or content are skipped + instructions = [ + {"content": [{"text": "No role"}]}, + {"role": "developer", "content": []}, + {"role": "user", "content": [{"text": "Valid"}]}, + ] + result = _extract_chat_template_from_instructions(instructions, {}) + assert len(result) == 1 + assert result[0]["role"] == "user" + + def test_large_pattern_fallback(self): + """Test that large patterns trigger fallback to iterative replacement.""" + # Create variables that combined exceed MAX_REGEX_PATTERN_LENGTH (5000 chars) + # Total pattern: 1500 + 1500 + 3000 + 2 separators = 6002 chars + large_text = "A" * 3000 + instructions = [ + { + "role": "user", + "content": [{"text": f"Here is some text: {large_text}"}], + } + ] + variables = { + "var1": "B" * 1500, + "var2": "C" * 1500, + "large": large_text, + } + + result = _extract_chat_template_from_instructions(instructions, variables) + + assert result[0]["content"] == "Here is some text: {{large}}" + + def test_response_input_text_objects(self): + """Test handling of ResponseInputText objects with .text attribute.""" + + class ResponseInputText: + def __init__(self, text): + self.text = text + + instructions = [ + { + "role": "user", + "content": [ + {"text": "Part one "}, + {"text": "Question: What is AI?"}, + ], + } + ] + variables = {"question": ResponseInputText("What is AI?")} + + result = _extract_chat_template_from_instructions(instructions, variables) + + # Also tests that multiple content items are concatenated + assert result[0]["content"] == "Part one Question: {{question}}" From 535edeffdb338305422f58baca4489ab365800c6 Mon Sep 17 00:00:00 2001 From: Alexandre Choura Date: Mon, 10 Nov 2025 12:55:06 +0100 Subject: [PATCH 10/12] update to use simple longest-first `.replace()` approach --- ddtrace/llmobs/_integrations/utils.py | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py index a5397523c04..146cd5620f8 100644 --- a/ddtrace/llmobs/_integrations/utils.py +++ b/ddtrace/llmobs/_integrations/utils.py @@ -746,9 +746,6 @@ def _extract_chat_template_from_instructions( """ Extract a chat template from OpenAI response instructions by replacing variable values with placeholders. - Uses regex-based replacement for performance when patterns are small, but falls back to - iterative string replacement for large patterns to avoid regex performance issues. - Args: instructions: List of instruction messages from the OpenAI response variables: Dictionary of variables used in the prompt @@ -758,9 +755,6 @@ def _extract_chat_template_from_instructions( """ chat_template = [] - # Maximum regex pattern length (number of characters) before falling back to iterative replacement - MAX_REGEX_PATTERN_LENGTH = 5000 - # Create a mapping of variable values to placeholder names value_to_placeholder = {} for var_name, var_value in variables.items(): @@ -773,18 +767,11 @@ def _extract_chat_template_from_instructions( if not value_str: continue - value_to_placeholder[value_str] = "{{" + var_name + "}}" + value_to_placeholder[value_str] = f"{{{{{var_name}}}}}" # Sort by length (longest first) to handle overlapping values correctly sorted_values = sorted(value_to_placeholder.keys(), key=len, reverse=True) - total_pattern_length = ( - sum(len(re.escape(v)) for v in sorted_values) + len(sorted_values) - 1 - ) # -1 because n values need n-1 separators - use_regex = sorted_values and total_pattern_length < MAX_REGEX_PATTERN_LENGTH - - pattern = "|".join(re.escape(v) for v in sorted_values) if use_regex else None - for instruction in instructions: role = _get_attr(instruction, "role", "") if not role: @@ -792,7 +779,6 @@ def _extract_chat_template_from_instructions( content_items = _get_attr(instruction, "content", []) if not content_items: - # Skip empty content (e.g., developer role with no content) continue text_parts = [] @@ -806,13 +792,10 @@ def _extract_chat_template_from_instructions( full_text = "".join(text_parts) - # Replace variable values with placeholders - if pattern: - full_text = re.sub(pattern, lambda m: value_to_placeholder[str(m.group(0))], full_text) - elif sorted_values: - for value_str in sorted_values: - placeholder = value_to_placeholder[value_str] - full_text = full_text.replace(value_str, placeholder) + # Replace variable values with placeholders (longest first) + for value_str in sorted_values: + placeholder = value_to_placeholder[value_str] + full_text = full_text.replace(value_str, placeholder) chat_template.append({"role": role, "content": full_text}) From 0e0720a14dd4a5949a0a4e7eb606c83ecc2ec98d Mon Sep 17 00:00:00 2001 From: Alexandre Choura Date: Mon, 10 Nov 2025 16:47:43 +0100 Subject: [PATCH 11/12] remove now out-of-context testcase --- tests/llmobs/test_integrations_utils.py | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/tests/llmobs/test_integrations_utils.py b/tests/llmobs/test_integrations_utils.py index d5ec21653b5..416391b5770 100644 --- a/tests/llmobs/test_integrations_utils.py +++ b/tests/llmobs/test_integrations_utils.py @@ -89,27 +89,6 @@ def test_empty_and_edge_cases(self): assert len(result) == 1 assert result[0]["role"] == "user" - def test_large_pattern_fallback(self): - """Test that large patterns trigger fallback to iterative replacement.""" - # Create variables that combined exceed MAX_REGEX_PATTERN_LENGTH (5000 chars) - # Total pattern: 1500 + 1500 + 3000 + 2 separators = 6002 chars - large_text = "A" * 3000 - instructions = [ - { - "role": "user", - "content": [{"text": f"Here is some text: {large_text}"}], - } - ] - variables = { - "var1": "B" * 1500, - "var2": "C" * 1500, - "large": large_text, - } - - result = _extract_chat_template_from_instructions(instructions, variables) - - assert result[0]["content"] == "Here is some text: {{large}}" - def test_response_input_text_objects(self): """Test handling of ResponseInputText objects with .text attribute.""" From c6c54e831561bae292761c582a56fa03e932cb87 Mon Sep 17 00:00:00 2001 From: Alexandre Choura Date: Thu, 13 Nov 2025 09:32:31 +0100 Subject: [PATCH 12/12] Remove the test class wrapper and convert all methods to functions --- tests/llmobs/test_integrations_utils.py | 217 ++++++++++++------------ 1 file changed, 109 insertions(+), 108 deletions(-) diff --git a/tests/llmobs/test_integrations_utils.py b/tests/llmobs/test_integrations_utils.py index 416391b5770..b117f902468 100644 --- a/tests/llmobs/test_integrations_utils.py +++ b/tests/llmobs/test_integrations_utils.py @@ -1,113 +1,114 @@ from ddtrace.llmobs._integrations.utils import _extract_chat_template_from_instructions -class TestExtractChatTemplateFromInstructions: - """Tests for the _extract_chat_template_from_instructions function.""" - - def test_basic_functionality(self): - """Test basic variable replacement with multiple instructions and roles.""" - instructions = [ - { - "role": "developer", - "content": [{"text": "Be helpful"}], - }, - { - "role": "user", - "content": [{"text": "Hello John, your email is john@example.com"}], - }, - ] - variables = { - "name": "John", - "email": "john@example.com", +def test_basic_functionality(): + """Test basic variable replacement with multiple instructions and roles.""" + instructions = [ + { + "role": "developer", + "content": [{"text": "Be helpful"}], + }, + { + "role": "user", + "content": [{"text": "Hello John, your email is john@example.com"}], + }, + ] + variables = { + "name": "John", + "email": "john@example.com", + } + + result = _extract_chat_template_from_instructions(instructions, variables) + + assert len(result) == 2 + assert result[0]["role"] == "developer" + assert result[0]["content"] == "Be helpful" + assert result[1]["role"] == "user" + assert result[1]["content"] == "Hello {{name}}, your email is {{email}}" + + +def test_overlapping_values_and_partial_matches(): + """Test longest-first matching for overlaps and partial word matches.""" + # Test 1: Overlapping values - longest should win + instructions = [ + { + "role": "user", + "content": [{"text": "The phrase is: AI is cool"}], } + ] + variables = {"short": "AI", "long": "AI is cool"} + result = _extract_chat_template_from_instructions(instructions, variables) + assert result[0]["content"] == "The phrase is: {{long}}" + + # Test 2: Partial word matches should work (e.g., "test" inside "testing") + instructions = [ + { + "role": "user", + "content": [{"text": "We are testing the feature"}], + } + ] + variables = {"action": "test"} + result = _extract_chat_template_from_instructions(instructions, variables) + assert result[0]["content"] == "We are {{action}}ing the feature" + + +def test_special_characters_and_escaping(): + """Test that special characters are handled correctly.""" + instructions = [ + { + "role": "user", + "content": [{"text": "The price is $99.99 (plus $5.00 tax)"}], + } + ] + variables = {"price": "$99.99", "tax": "$5.00"} + + result = _extract_chat_template_from_instructions(instructions, variables) + + assert result[0]["content"] == "The price is {{price}} (plus {{tax}} tax)" + + +def test_empty_and_edge_cases(): + """Test empty variables, empty values, and malformed instructions.""" + # Empty variables dict + instructions = [{"role": "user", "content": [{"text": "No variables"}]}] + result = _extract_chat_template_from_instructions(instructions, {}) + assert result[0]["content"] == "No variables" + + # Empty variable values are skipped + instructions = [{"role": "user", "content": [{"text": "Hello world"}]}] + result = _extract_chat_template_from_instructions(instructions, {"empty": "", "greeting": "Hello"}) + assert result[0]["content"] == "{{greeting}} world" + + # Instructions without role or content are skipped + instructions = [ + {"content": [{"text": "No role"}]}, + {"role": "developer", "content": []}, + {"role": "user", "content": [{"text": "Valid"}]}, + ] + result = _extract_chat_template_from_instructions(instructions, {}) + assert len(result) == 1 + assert result[0]["role"] == "user" + + +def test_response_input_text_objects(): + """Test handling of ResponseInputText objects with .text attribute.""" + + class ResponseInputText: + def __init__(self, text): + self.text = text + + instructions = [ + { + "role": "user", + "content": [ + {"text": "Part one "}, + {"text": "Question: What is AI?"}, + ], + } + ] + variables = {"question": ResponseInputText("What is AI?")} + + result = _extract_chat_template_from_instructions(instructions, variables) - result = _extract_chat_template_from_instructions(instructions, variables) - - assert len(result) == 2 - assert result[0]["role"] == "developer" - assert result[0]["content"] == "Be helpful" - assert result[1]["role"] == "user" - assert result[1]["content"] == "Hello {{name}}, your email is {{email}}" - - def test_overlapping_values_and_partial_matches(self): - """Test longest-first matching for overlaps and partial word matches.""" - # Test 1: Overlapping values - longest should win - instructions = [ - { - "role": "user", - "content": [{"text": "The phrase is: AI is cool"}], - } - ] - variables = {"short": "AI", "long": "AI is cool"} - result = _extract_chat_template_from_instructions(instructions, variables) - assert result[0]["content"] == "The phrase is: {{long}}" - - # Test 2: Partial word matches should work (e.g., "test" inside "testing") - instructions = [ - { - "role": "user", - "content": [{"text": "We are testing the feature"}], - } - ] - variables = {"action": "test"} - result = _extract_chat_template_from_instructions(instructions, variables) - assert result[0]["content"] == "We are {{action}}ing the feature" - - def test_special_characters_and_escaping(self): - """Test that regex special characters are properly escaped.""" - instructions = [ - { - "role": "user", - "content": [{"text": "The price is $99.99 (plus $5.00 tax)"}], - } - ] - variables = {"price": "$99.99", "tax": "$5.00"} - - result = _extract_chat_template_from_instructions(instructions, variables) - - assert result[0]["content"] == "The price is {{price}} (plus {{tax}} tax)" - - def test_empty_and_edge_cases(self): - """Test empty variables, empty values, and malformed instructions.""" - # Empty variables dict - instructions = [{"role": "user", "content": [{"text": "No variables"}]}] - result = _extract_chat_template_from_instructions(instructions, {}) - assert result[0]["content"] == "No variables" - - # Empty variable values are skipped - instructions = [{"role": "user", "content": [{"text": "Hello world"}]}] - result = _extract_chat_template_from_instructions(instructions, {"empty": "", "greeting": "Hello"}) - assert result[0]["content"] == "{{greeting}} world" - - # Instructions without role or content are skipped - instructions = [ - {"content": [{"text": "No role"}]}, - {"role": "developer", "content": []}, - {"role": "user", "content": [{"text": "Valid"}]}, - ] - result = _extract_chat_template_from_instructions(instructions, {}) - assert len(result) == 1 - assert result[0]["role"] == "user" - - def test_response_input_text_objects(self): - """Test handling of ResponseInputText objects with .text attribute.""" - - class ResponseInputText: - def __init__(self, text): - self.text = text - - instructions = [ - { - "role": "user", - "content": [ - {"text": "Part one "}, - {"text": "Question: What is AI?"}, - ], - } - ] - variables = {"question": ResponseInputText("What is AI?")} - - result = _extract_chat_template_from_instructions(instructions, variables) - - # Also tests that multiple content items are concatenated - assert result[0]["content"] == "Part one Question: {{question}}" + # Also tests that multiple content items are concatenated + assert result[0]["content"] == "Part one Question: {{question}}"