diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py index f431f9342c6..146cd5620f8 100644 --- a/ddtrace/llmobs/_integrations/utils.py +++ b/ddtrace/llmobs/_integrations/utils.py @@ -16,6 +16,7 @@ from ddtrace.llmobs._constants import DISPATCH_ON_LLM_TOOL_CHOICE from ddtrace.llmobs._constants import DISPATCH_ON_TOOL_CALL_OUTPUT_USED from ddtrace.llmobs._constants import INPUT_MESSAGES +from ddtrace.llmobs._constants import INPUT_PROMPT from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY from ddtrace.llmobs._constants import INPUT_VALUE from ddtrace.llmobs._constants import METADATA @@ -26,6 +27,7 @@ from ddtrace.llmobs._constants import TOOL_DEFINITIONS from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY from ddtrace.llmobs._utils import _get_attr +from ddtrace.llmobs._utils import _validate_prompt from ddtrace.llmobs._utils import load_data_value from ddtrace.llmobs._utils import safe_json from ddtrace.llmobs._utils import safe_load_json @@ -738,9 +740,78 @@ def openai_get_metadata_from_response( return metadata +def _extract_chat_template_from_instructions( + instructions: List[Any], variables: Dict[str, Any] +) -> List[Dict[str, str]]: + """ + Extract a chat template from OpenAI response instructions by replacing variable values with placeholders. + + Args: + instructions: List of instruction messages from the OpenAI response + variables: Dictionary of variables used in the prompt + + Returns: + List of chat template messages with placeholders (e.g., {{variable_name}}) + """ + chat_template = [] + + # Create a mapping of variable values to placeholder names + value_to_placeholder = {} + for var_name, var_value in variables.items(): + if hasattr(var_value, "text"): # ResponseInputText + value_str = str(var_value.text) + else: + value_str = str(var_value) + + # Skip empty values + if not value_str: + continue + + value_to_placeholder[value_str] = f"{{{{{var_name}}}}}" + + # Sort by length (longest first) to handle overlapping values correctly + sorted_values = sorted(value_to_placeholder.keys(), key=len, reverse=True) + + for instruction in instructions: + role = _get_attr(instruction, "role", "") + if not role: + continue + + content_items = _get_attr(instruction, "content", []) + if not content_items: + continue + + text_parts = [] + for content_item in content_items: + text = _get_attr(content_item, "text", "") + if text: + text_parts.append(str(text)) + + if not text_parts: + continue + + full_text = "".join(text_parts) + + # Replace variable values with placeholders (longest first) + for value_str in sorted_values: + placeholder = value_to_placeholder[value_str] + full_text = full_text.replace(value_str, placeholder) + + chat_template.append({"role": role, "content": full_text}) + + return chat_template + + def openai_set_meta_tags_from_response(span: Span, kwargs: Dict[str, Any], response: Optional[Any]) -> None: """Extract input/output tags from response and set them as temporary "_ml_obs.meta.*" tags.""" input_data = kwargs.get("input", []) + + # For reusable prompts, input may not be in kwargs, extract from response.instructions + if not input_data and response and "prompt" in kwargs: + instructions = _get_attr(response, "instructions", []) + if instructions: + input_data = load_data_value(instructions) + input_messages = openai_get_input_messages_from_response_input(input_data) if "instructions" in kwargs: @@ -753,6 +824,25 @@ def openai_set_meta_tags_from_response(span: Span, kwargs: Dict[str, Any], respo } ) + if "prompt" in kwargs: + prompt_data = kwargs.get("prompt") + if prompt_data: + try: + # Extract chat_template from response instructions if available + if response and not prompt_data.get("chat_template") and not prompt_data.get("template"): + instructions = _get_attr(response, "instructions", None) + variables = prompt_data.get("variables", {}) + if instructions and variables: + chat_template = _extract_chat_template_from_instructions(instructions, variables) + if chat_template: + prompt_data = dict(prompt_data) # Make a copy to avoid modifying the original + prompt_data["chat_template"] = chat_template + + validated_prompt = _validate_prompt(prompt_data, strict_validation=False) + span._set_ctx_item(INPUT_PROMPT, validated_prompt) + except (TypeError, ValueError, AttributeError) as e: + logger.debug("Failed to validate prompt for OpenAI response: %s", e) + if span.error or not response: span._set_ctx_item(OUTPUT_MESSAGES, [Message(content="")]) return diff --git a/releasenotes/notes/instrument-openai-responses-prompt-d8d0f21a6f21ed4d.yaml b/releasenotes/notes/instrument-openai-responses-prompt-d8d0f21a6f21ed4d.yaml new file mode 100644 index 00000000000..9c40cc40f6d --- /dev/null +++ b/releasenotes/notes/instrument-openai-responses-prompt-d8d0f21a6f21ed4d.yaml @@ -0,0 +1,5 @@ +--- +features: + - | + LLM Observability: The OpenAI integration now captures prompt metadata (id, version, variables, and chat template) + for reusable prompts when using the ``responses`` endpoint (available in OpenAI SDK >= 1.87.0). diff --git a/tests/contrib/openai/cassettes/v1/response_with_prompt.yaml b/tests/contrib/openai/cassettes/v1/response_with_prompt.yaml new file mode 100644 index 00000000000..02a908c5183 --- /dev/null +++ b/tests/contrib/openai/cassettes/v1/response_with_prompt.yaml @@ -0,0 +1,134 @@ +interactions: +- request: + body: '{"prompt":{"id":"pmpt_690b24669d8c81948acc0e98da10e6490190feb3a62eee0b","version":"4","variables":{"question":"What + is machine learning?"}}}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '140' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 2.3.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - Linux + x-stainless-package-version: + - 2.3.0 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.10.18 + method: POST + uri: https://api.openai.com/v1/responses + response: + body: + string: !!binary | + H4sIAAAAAAAAA7xYWW8byRF+968o8CGQAYoYHqJFvQTGbhZYJM4GmyyMIDaImu6amVr1MemDFL3w + fw+qhxySkh1vXvImdU/X8dVXF397BTBhPXmASaDYb6u7iqo7Pd9sar1EWlbz9aZSuFrS6k7f3883 + c1xu5tWmwfWqUW82b5rJVET4+ldS6STGu0jDuQqEifQW5W7+Zr1YLdfzu/tyFxOmHOWN8rY3lEgP + j2pUj23w2YldDZpIwzEbw66dPMBvrwAAJj0eKMh7TTsyvqcweQXwuXxMIXi5c9mYcsDupGWrKSGb + eH0bU8gqsXdy/q+iYFADMEmHnkSPpRixHVwrF8q7RC6NLy5fXb1k1+e0TfSUxsfDvZw8wOR7DqQS + /AG+825HIaJYggaSdzQZH3w+/vVxNCB4Qy8QOGLwf3Xgnz4DBgKEjkzfZAMYI8eELs3gb4YwEqCL + ewqQOo7w70xRfHyA9x0m4AgWVceOwBAGx6794+/xO8fR5VfH+4nFp63PqdjrH8ldBVouk/dmq9Bc + U8B6TUZk+tWtZce3i2pxd1utbufrI8eLzJfsOKZP/HryNFW1UZI892pxp5ekqvVcKXW/PgfihHMg + jF7cP1/FbC2Ggyj++MXoDgbY2H7VgtV8tdxUYkGt5zXVzRu1vluuNqvqpQUvOPK1PP3fCHSKyAsG + oXM+4Zh5H68ujW/74Osv3JyI9+4ZbYRKCHVApzrwDWBI3LBiNMAukTHcklMESWjX8o4iiFs5UYiQ + OgKs2XA6QPKDTOgxJQouAjoNFh8JNCmOYjA0wVvQmHAKAVNX2I0OahJL6Kk3rDiZA/TBtwGtJQ2N + D0A7CgeIihwG9jP40YEvr/c+6DgFKUeEWuw/+Az7wEkEfsiLaq64uZUUuk0duVtxoxxrCNlQnJYH + DZEuzsRDTGSBnlAiN7jACRpuc6AIPqfyWXaagjmIjiIFvANOEfzezT64D+7PdADWhPFB/hN9iwV8 + jwmLwBEg+OBgMHIJUhCUN0bKmgAENy7bmkKcgoRuCmyxFXsjuegDBELNro1ToKRmr4tggzUZsXff + kQNHpEnPrrS8PccdTesDp84COjSHTxSHGBflyYOmJLaMxhadZiBex32cXbj2TmoB1JmN2HSl8R9C + kbMq/WuOKRZqBSnYPQa0VMh0IwgOISu1ZQjTa7GlpphAYZ9yECb6SKNd1/79VKgakB3paQlVEQUK + 3UDFPpDmoW+JR2dmegFsP4XsIpErKBQP3yE7kKSMwq7nRbfEdz6Dv+eewo4j6TPAxa6TYb9EikN8 + SJ/pdUOzdjaFvvPJR0jYtqSPEKiRpz4cj7Rvj5jMroX/RVRKGlvse1Fd0qy0ILgZeFNgHGoK3BQ7 + 4nMp33lrvYOE8TE+gDLSkBpWJeJwE3u0sIszcD7dyj+vpxCoDRRjuT8B61rofJYABVYUXxcQFzP4 + xcVvYfTeh8cIe04dZHfCasiFASeEng1JHFSOyVsK0OegOmmWHcfkA9MLt35gpyMM44rQxweQYUlw + usrb/w5GjomCWHxzegyRLRsMoy3x9RQ0W3KxzCJSEwPpYUiCm8i2N9yUmiE+DcAsZ/AzsWt8UGTJ + pa8gcwxwfYAUpDJLspeJTWKgiHelEtEegy607smhSSwMSx27R4GsZEVJfGjR0m1vsBiDLbn0HLSS + tXIBPavHCHhMGXaADsjtOHhXDE4eLD6x5U8EKtssFWJHR2PA76TEs6Xi7c+E5nbvgzmnwGWB/JNF + NlCI1rA5Nhg8YiKKYu8TZLdHl0jDsfEOFWAsRDsuIe6DF+gFHm8tOT3QmFzLTmD5K6XG8NMU3lr8 + 5N3rSyE/SsaUl63j8oydtIkgieqoQLSJQ9YC9j3cYE7eYmIFDaqifxjwS0qza6/Ev83JO299jrCj + jpWh53463HGLqVSypmF1+fqHgFkfi7OYNnSsEpka3SO7tkD9vjtII7ClSg4ov+j8vd8LxhbdARKp + znnjWyHNnkAyeOi6Gg9TqZ2F8r5Jexlbo8WQKEzBehliNfYS9WkhZk2iEzBBh07LCgLDIPQkbB0Z + UCosvI1DhsvDYa4YLQOZldhlEkja4PfTF+X3FAoZbwsCOUp4pUsqLmOLyNXI5gCGG4JHoj7KoIFO + OtXs98zM42D+fHCW3mUMmesBOYU87F99oB37HLenFW9bBs9xgO6Dt306r2fDWNrbPm3Xm6perNbr + jb6XOXh1j0pVtLnXOK9ovdpU803VUL3E9YKIqvo4IU52GBhrQ3GUCjA5rQ4XZ99aVMZp8ZvLxufL + CXsiq9igabI675aDo1uFqqPtIx1egnC8CySz8SBg/OI84Y9IUdP4kIbhW3O2J/fPk7+8HvVHbCgd + tqxFeMN0tedG6UmKtolPu3GD2QxgTKSl0NVSncj2FFA6iWzos+p4+nQRyMYHi+kyBCewy3fPEat9 + 5HS48GY0fOBV51kNRMzJT8aL84w/Sb7fXkz+1XjYX9oYshuaeXGToxDl+AtCLhvMmYnuahlczqcv + zy9+FRjdLFHU54fVlavPd8z1avGlmy8JHjlwfj3fLK6kJ5/QXAh/sxxhLEvv5eZKCaXoiIbPrz7/ + BwAA//8DAHhWLkvUEQAA + headers: + CF-RAY: + - 99a4fa22dbb601cc-CDG + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Thu, 06 Nov 2025 13:36:05 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=oAc59HaJwUjrUv2uHgTgDkTP1sVynTMJVzliRX11b7o-1762436165-1.0.1.1-STkKgI9BlQHAvGzS.Rqi6UQVssVb5_M5J9QpUZICssvaO35gDy6yDFJo.tYdjVGKAGufaBJ9rwowcVi0u.xMc6oV0zOSTM2nqB6IjkP9W.4; + path=/; expires=Thu, 06-Nov-25 14:06:05 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=bDZxnxovYk7l9OeXSX6u2DbwKyUR5GDTvi_l5SLAkiY-1762436165819-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - datadog-staging + openai-processing-ms: + - '7512' + openai-project: + - proj_gt6TQZPRbZfoY2J9AQlEJMpd + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '7514' + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999762' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_2409b397395c43bcaa8b763bb736ebf5 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/contrib/openai/test_openai_llmobs.py b/tests/contrib/openai/test_openai_llmobs.py index 01d4ff46850..b0e2ac84d7f 100644 --- a/tests/contrib/openai/test_openai_llmobs.py +++ b/tests/contrib/openai/test_openai_llmobs.py @@ -2150,6 +2150,57 @@ class MathResponse(BaseModel): ) ) + @pytest.mark.skipif( + parse_version(openai_module.version.VERSION) < (1, 87), + reason="Reusable prompts only available in openai >= 1.87", + ) + def test_response_with_prompt_tracking(self, openai, mock_llmobs_writer, mock_tracer): + """Test that prompt metadata (id, version, variables) is captured for reusable prompts.""" + with get_openai_vcr(subdirectory_name="v1").use_cassette("response_with_prompt.yaml"): + client = openai.OpenAI() + client.responses.create( + prompt={ + "id": "pmpt_690b24669d8c81948acc0e98da10e6490190feb3a62eee0b", + "version": "4", + "variables": {"question": "What is machine learning?"}, + } + ) + mock_tracer.pop_traces() + assert mock_llmobs_writer.enqueue.call_count == 1 + + call_args = mock_llmobs_writer.enqueue.call_args[0][0] + + # Verify prompt metadata is captured + assert "prompt" in call_args["meta"]["input"] + actual_prompt = call_args["meta"]["input"]["prompt"] + assert actual_prompt["id"] == "pmpt_690b24669d8c81948acc0e98da10e6490190feb3a62eee0b" + assert actual_prompt["version"] == "4" + assert actual_prompt["variables"] == {"question": "What is machine learning?"} + + # Verify chat_template is extracted with variable placeholders + assert "chat_template" in actual_prompt + chat_template = actual_prompt["chat_template"] + assert len(chat_template) == 2 + # First message: developer role + assert chat_template[0]["role"] == "developer" + assert chat_template[0]["content"] == "Direct & Conversational tone" + # Second message: user role with variable placeholder + assert chat_template[1]["role"] == "user" + assert chat_template[1]["content"] == "You are a helpful assistant. Please answer this question: {{question}}" + + # Verify the actual prompt content is captured in input messages + input_messages = call_args["meta"]["input"]["messages"] + assert len(input_messages) == 2 + # Developer message + assert input_messages[0]["role"] == "developer" + assert input_messages[0]["content"] == "Direct & Conversational tone" + # User message with rendered variables + assert input_messages[1]["role"] == "user" + assert ( + input_messages[1]["content"] + == "You are a helpful assistant. Please answer this question: What is machine learning?" + ) + @pytest.mark.parametrize( "ddtrace_global_config", diff --git a/tests/llmobs/test_integrations_utils.py b/tests/llmobs/test_integrations_utils.py new file mode 100644 index 00000000000..b117f902468 --- /dev/null +++ b/tests/llmobs/test_integrations_utils.py @@ -0,0 +1,114 @@ +from ddtrace.llmobs._integrations.utils import _extract_chat_template_from_instructions + + +def test_basic_functionality(): + """Test basic variable replacement with multiple instructions and roles.""" + instructions = [ + { + "role": "developer", + "content": [{"text": "Be helpful"}], + }, + { + "role": "user", + "content": [{"text": "Hello John, your email is john@example.com"}], + }, + ] + variables = { + "name": "John", + "email": "john@example.com", + } + + result = _extract_chat_template_from_instructions(instructions, variables) + + assert len(result) == 2 + assert result[0]["role"] == "developer" + assert result[0]["content"] == "Be helpful" + assert result[1]["role"] == "user" + assert result[1]["content"] == "Hello {{name}}, your email is {{email}}" + + +def test_overlapping_values_and_partial_matches(): + """Test longest-first matching for overlaps and partial word matches.""" + # Test 1: Overlapping values - longest should win + instructions = [ + { + "role": "user", + "content": [{"text": "The phrase is: AI is cool"}], + } + ] + variables = {"short": "AI", "long": "AI is cool"} + result = _extract_chat_template_from_instructions(instructions, variables) + assert result[0]["content"] == "The phrase is: {{long}}" + + # Test 2: Partial word matches should work (e.g., "test" inside "testing") + instructions = [ + { + "role": "user", + "content": [{"text": "We are testing the feature"}], + } + ] + variables = {"action": "test"} + result = _extract_chat_template_from_instructions(instructions, variables) + assert result[0]["content"] == "We are {{action}}ing the feature" + + +def test_special_characters_and_escaping(): + """Test that special characters are handled correctly.""" + instructions = [ + { + "role": "user", + "content": [{"text": "The price is $99.99 (plus $5.00 tax)"}], + } + ] + variables = {"price": "$99.99", "tax": "$5.00"} + + result = _extract_chat_template_from_instructions(instructions, variables) + + assert result[0]["content"] == "The price is {{price}} (plus {{tax}} tax)" + + +def test_empty_and_edge_cases(): + """Test empty variables, empty values, and malformed instructions.""" + # Empty variables dict + instructions = [{"role": "user", "content": [{"text": "No variables"}]}] + result = _extract_chat_template_from_instructions(instructions, {}) + assert result[0]["content"] == "No variables" + + # Empty variable values are skipped + instructions = [{"role": "user", "content": [{"text": "Hello world"}]}] + result = _extract_chat_template_from_instructions(instructions, {"empty": "", "greeting": "Hello"}) + assert result[0]["content"] == "{{greeting}} world" + + # Instructions without role or content are skipped + instructions = [ + {"content": [{"text": "No role"}]}, + {"role": "developer", "content": []}, + {"role": "user", "content": [{"text": "Valid"}]}, + ] + result = _extract_chat_template_from_instructions(instructions, {}) + assert len(result) == 1 + assert result[0]["role"] == "user" + + +def test_response_input_text_objects(): + """Test handling of ResponseInputText objects with .text attribute.""" + + class ResponseInputText: + def __init__(self, text): + self.text = text + + instructions = [ + { + "role": "user", + "content": [ + {"text": "Part one "}, + {"text": "Question: What is AI?"}, + ], + } + ] + variables = {"question": ResponseInputText("What is AI?")} + + result = _extract_chat_template_from_instructions(instructions, variables) + + # Also tests that multiple content items are concatenated + assert result[0]["content"] == "Part one Question: {{question}}"