From 777e0d13f21d50da00a4062c054c03b254853bce Mon Sep 17 00:00:00 2001 From: Uma Annamalai Date: Thu, 18 Sep 2025 23:57:49 -0700 Subject: [PATCH 1/6] Add bedrock token counting. --- newrelic/hooks/external_botocore.py | 219 +++++++++++++++--- .../test_bedrock_chat_completion_converse.py | 53 +---- ...st_bedrock_chat_completion_invoke_model.py | 102 +------- .../test_bedrock_embeddings.py | 43 +--- .../_test_bedrock_chat_completion.py | 30 +++ .../_test_bedrock_embeddings.py | 2 + ...st_bedrock_chat_completion_invoke_model.py | 150 +++++------- .../test_bedrock_embeddings.py | 43 +--- .../test_chat_completion_converse.py | 64 +---- tests/testing_support/ml_testing_utils.py | 26 +++ 10 files changed, 325 insertions(+), 407 deletions(-) diff --git a/newrelic/hooks/external_botocore.py b/newrelic/hooks/external_botocore.py index d8c18b49db..748b2e5c94 100644 --- a/newrelic/hooks/external_botocore.py +++ b/newrelic/hooks/external_botocore.py @@ -192,6 +192,7 @@ def create_chat_completion_message_event( request_model, request_id, llm_metadata_dict, + all_token_counts, response_id=None, ): if not transaction: @@ -224,6 +225,8 @@ def create_chat_completion_message_event( "vendor": "bedrock", "ingest_source": "Python", } + if all_token_counts: + chat_completion_message_dict["token_count"] = 0 if settings.ai_monitoring.record_content.enabled: chat_completion_message_dict["content"] = content @@ -263,6 +266,8 @@ def create_chat_completion_message_event( "ingest_source": "Python", "is_response": True, } + if all_token_counts: + chat_completion_message_dict["token_count"] = 0 if settings.ai_monitoring.record_content.enabled: chat_completion_message_dict["content"] = content @@ -272,24 +277,21 @@ def create_chat_completion_message_event( transaction.record_custom_event("LlmChatCompletionMessage", chat_completion_message_dict) -def extract_bedrock_titan_text_model_request(request_body, bedrock_attrs): +def extract_bedrock_titan_embedding_model_request(request_body, bedrock_attrs): request_body = json.loads(request_body) - request_config = request_body.get("textGenerationConfig", {}) - input_message_list = [{"role": "user", "content": request_body.get("inputText")}] - - bedrock_attrs["input_message_list"] = input_message_list - bedrock_attrs["request.max_tokens"] = request_config.get("maxTokenCount") - bedrock_attrs["request.temperature"] = request_config.get("temperature") + bedrock_attrs["input"] = request_body.get("inputText") return bedrock_attrs -def extract_bedrock_mistral_text_model_request(request_body, bedrock_attrs): - request_body = json.loads(request_body) - bedrock_attrs["input_message_list"] = [{"role": "user", "content": request_body.get("prompt")}] - bedrock_attrs["request.max_tokens"] = request_body.get("max_tokens") - bedrock_attrs["request.temperature"] = request_body.get("temperature") +def extract_bedrock_titan_embedding_model_response(response_body, bedrock_attrs): + if response_body: + response_body = json.loads(response_body) + + input_tokens = response_body.get("inputTextTokenCount", 0) + bedrock_attrs["response.usage.total_tokens"] = input_tokens + return bedrock_attrs @@ -297,16 +299,34 @@ def extract_bedrock_titan_text_model_response(response_body, bedrock_attrs): if response_body: response_body = json.loads(response_body) + input_tokens = response_body.get("inputTextTokenCount", 0) + completion_tokens = sum(result["tokenCount"] for result in response_body.get("results", [])) + if not completion_tokens: + completion_tokens = 0 + + total_tokens = input_tokens + completion_tokens + output_message_list = [ {"role": "assistant", "content": result["outputText"]} for result in response_body.get("results", []) ] bedrock_attrs["response.choices.finish_reason"] = response_body["results"][0]["completionReason"] + bedrock_attrs["response.usage.completion_tokens"] = completion_tokens + bedrock_attrs["response.usage.prompt_tokens"] = input_tokens + bedrock_attrs["response.usage.total_tokens"] = total_tokens bedrock_attrs["output_message_list"] = output_message_list return bedrock_attrs +def extract_bedrock_mistral_text_model_request(request_body, bedrock_attrs): + request_body = json.loads(request_body) + bedrock_attrs["input_message_list"] = [{"role": "user", "content": request_body.get("prompt")}] + bedrock_attrs["request.max_tokens"] = request_body.get("max_tokens") + bedrock_attrs["request.temperature"] = request_body.get("temperature") + return bedrock_attrs + + def extract_bedrock_mistral_text_model_response(response_body, bedrock_attrs): if response_body: response_body = json.loads(response_body) @@ -319,17 +339,6 @@ def extract_bedrock_mistral_text_model_response(response_body, bedrock_attrs): return bedrock_attrs -def extract_bedrock_titan_text_model_streaming_response(response_body, bedrock_attrs): - if response_body: - if "outputText" in response_body: - bedrock_attrs["output_message_list"] = messages = bedrock_attrs.get("output_message_list", []) - messages.append({"role": "assistant", "content": response_body["outputText"]}) - - bedrock_attrs["response.choices.finish_reason"] = response_body.get("completionReason", None) - - return bedrock_attrs - - def extract_bedrock_mistral_text_model_streaming_response(response_body, bedrock_attrs): if response_body: outputs = response_body.get("outputs") @@ -342,10 +351,42 @@ def extract_bedrock_mistral_text_model_streaming_response(response_body, bedrock return bedrock_attrs -def extract_bedrock_titan_embedding_model_request(request_body, bedrock_attrs): +def extract_bedrock_titan_text_model_request(request_body, bedrock_attrs): request_body = json.loads(request_body) + request_config = request_body.get("textGenerationConfig", {}) + + input_message_list = [{"role": "user", "content": request_body.get("inputText")}] + + bedrock_attrs["input_message_list"] = input_message_list + bedrock_attrs["request.max_tokens"] = request_config.get("maxTokenCount") + bedrock_attrs["request.temperature"] = request_config.get("temperature") + + return bedrock_attrs - bedrock_attrs["input"] = request_body.get("inputText") + +def extract_bedrock_titan_text_model_streaming_response(response_body, bedrock_attrs): + if response_body: + if "outputText" in response_body: + bedrock_attrs["output_message_list"] = messages = bedrock_attrs.get("output_message_list", []) + messages.append({"role": "assistant", "content": response_body["outputText"]}) + + bedrock_attrs["response.choices.finish_reason"] = response_body.get("completionReason", None) + + # Extract token information + invocation_metrics = response_body.get("amazon-bedrock-invocationMetrics", {}) + prompt_tokens = invocation_metrics.get("inputTokenCount", 0) + completion_tokens = invocation_metrics.get("outputTokenCount", 0) + total_tokens = prompt_tokens + completion_tokens + + bedrock_attrs["response.usage.completion_tokens"] = ( + bedrock_attrs.get("response.usage.completion_tokens", 0) + completion_tokens + ) + bedrock_attrs["response.usage.prompt_tokens"] = ( + bedrock_attrs.get("response.usage.prompt_tokens", 0) + prompt_tokens + ) + bedrock_attrs["response.usage.total_tokens"] = ( + bedrock_attrs.get("response.usage.total_tokens", 0) + total_tokens + ) return bedrock_attrs @@ -409,6 +450,17 @@ def extract_bedrock_claude_model_response(response_body, bedrock_attrs): output_message_list = [{"role": role, "content": content}] bedrock_attrs["response.choices.finish_reason"] = response_body.get("stop_reason") bedrock_attrs["output_message_list"] = output_message_list + bedrock_attrs[""] = str(response_body.get("id")) + + # Extract token information + token_usage = response_body.get("usage", {}) + if token_usage: + prompt_tokens = token_usage.get("input_tokens", 0) + completion_tokens = token_usage.get("output_tokens", 0) + total_tokens = prompt_tokens + completion_tokens + bedrock_attrs["response.usage.prompt_tokens"] = prompt_tokens + bedrock_attrs["response.usage.completion_tokens"] = completion_tokens + bedrock_attrs["response.usage.total_tokens"] = total_tokens return bedrock_attrs @@ -420,6 +472,23 @@ def extract_bedrock_claude_model_streaming_response(response_body, bedrock_attrs bedrock_attrs["output_message_list"] = [{"role": "assistant", "content": ""}] bedrock_attrs["output_message_list"][0]["content"] += content bedrock_attrs["response.choices.finish_reason"] = response_body.get("stop_reason") + + # Extract token information + invocation_metrics = response_body.get("amazon-bedrock-invocationMetrics", {}) + prompt_tokens = invocation_metrics.get("inputTokenCount", 0) + completion_tokens = invocation_metrics.get("outputTokenCount", 0) + total_tokens = prompt_tokens + completion_tokens + + bedrock_attrs["response.usage.completion_tokens"] = ( + bedrock_attrs.get("response.usage.completion_tokens", 0) + completion_tokens + ) + bedrock_attrs["response.usage.prompt_tokens"] = ( + bedrock_attrs.get("response.usage.prompt_tokens", 0) + prompt_tokens + ) + bedrock_attrs["response.usage.total_tokens"] = ( + bedrock_attrs.get("response.usage.total_tokens", 0) + total_tokens + ) + return bedrock_attrs @@ -440,6 +509,13 @@ def extract_bedrock_llama_model_response(response_body, bedrock_attrs): response_body = json.loads(response_body) output_message_list = [{"role": "assistant", "content": response_body.get("generation")}] + prompt_tokens = response_body.get("prompt_token_count", 0) + completion_tokens = response_body.get("generation_token_count", 0) + total_tokens = prompt_tokens + completion_tokens + + bedrock_attrs["response.usage.completion_tokens"] = completion_tokens + bedrock_attrs["response.usage.prompt_tokens"] = prompt_tokens + bedrock_attrs["response.usage.total_tokens"] = total_tokens bedrock_attrs["response.choices.finish_reason"] = response_body.get("stop_reason") bedrock_attrs["output_message_list"] = output_message_list @@ -453,6 +529,22 @@ def extract_bedrock_llama_model_streaming_response(response_body, bedrock_attrs) bedrock_attrs["output_message_list"] = [{"role": "assistant", "content": ""}] bedrock_attrs["output_message_list"][0]["content"] += content bedrock_attrs["response.choices.finish_reason"] = response_body.get("stop_reason") + + # Extract token information + invocation_metrics = response_body.get("amazon-bedrock-invocationMetrics", {}) + prompt_tokens = invocation_metrics.get("inputTokenCount", 0) + completion_tokens = invocation_metrics.get("outputTokenCount", 0) + total_tokens = prompt_tokens + completion_tokens + + bedrock_attrs["response.usage.completion_tokens"] = ( + bedrock_attrs.get("response.usage.completion_tokens", 0) + completion_tokens + ) + bedrock_attrs["response.usage.prompt_tokens"] = ( + bedrock_attrs.get("response.usage.prompt_tokens", 0) + prompt_tokens + ) + bedrock_attrs["response.usage.total_tokens"] = ( + bedrock_attrs.get("response.usage.total_tokens", 0) + total_tokens + ) return bedrock_attrs @@ -493,12 +585,33 @@ def extract_bedrock_cohere_model_streaming_response(response_body, bedrock_attrs bedrock_attrs["response.choices.finish_reason"] = response_body["generations"][0]["finish_reason"] bedrock_attrs["response_id"] = str(response_body.get("id")) + # Extract token information + invocation_metrics = response_body.get("amazon-bedrock-invocationMetrics", {}) + prompt_tokens = invocation_metrics.get("inputTokenCount", 0) + completion_tokens = invocation_metrics.get("outputTokenCount", 0) + total_tokens = prompt_tokens + completion_tokens + + bedrock_attrs["response.usage.completion_tokens"] = ( + bedrock_attrs.get("response.usage.completion_tokens", 0) + completion_tokens + ) + bedrock_attrs["response.usage.prompt_tokens"] = ( + bedrock_attrs.get("response.usage.prompt_tokens", 0) + prompt_tokens + ) + bedrock_attrs["response.usage.total_tokens"] = ( + bedrock_attrs.get("response.usage.total_tokens", 0) + total_tokens + ) + return bedrock_attrs NULL_EXTRACTOR = lambda *args: {} # noqa: E731 # Empty extractor that returns nothing MODEL_EXTRACTORS = [ # Order is important here, avoiding dictionaries - ("amazon.titan-embed", extract_bedrock_titan_embedding_model_request, NULL_EXTRACTOR, NULL_EXTRACTOR), + ( + "amazon.titan-embed", + extract_bedrock_titan_embedding_model_request, + extract_bedrock_titan_embedding_model_response, + NULL_EXTRACTOR, + ), ("cohere.embed", extract_bedrock_cohere_embedding_model_request, NULL_EXTRACTOR, NULL_EXTRACTOR), ( "amazon.titan", @@ -801,6 +914,7 @@ def _wrap_bedrock_runtime_converse(wrapped, instance, args, kwargs): try: # For aioboto3 clients, this will call make_api_call instrumentation in external_aiobotocore response = wrapped(*args, **kwargs) + except Exception as exc: handle_bedrock_exception( exc, False, model, span_id, trace_id, request_extractor, {}, ft, transaction, kwargs, is_converse=True @@ -848,6 +962,10 @@ def extract_bedrock_converse_attrs(kwargs, response, response_headers, model, sp for result in response.get("output").get("message").get("content", []) ] + response_prompt_tokens = response.get("usage", {}).get("inputTokens") if response else None + response_completion_tokens = response.get("usage", {}).get("outputTokens") if response else None + response_total_tokens = response.get("usage", {}).get("totalTokens") if response else None + bedrock_attrs = { "request_id": response_headers.get("x-amzn-requestid"), "model": model, @@ -858,6 +976,9 @@ def extract_bedrock_converse_attrs(kwargs, response, response_headers, model, sp "request.max_tokens": kwargs.get("inferenceConfig", {}).get("maxTokens", None), "request.temperature": kwargs.get("inferenceConfig", {}).get("temperature", None), "input_message_list": input_message_list, + "response.usage.prompt_tokens": response_prompt_tokens, + "response.usage.completion_tokens": response_completion_tokens, + "response.usage.total_tokens": response_total_tokens, } return bedrock_attrs @@ -1014,23 +1135,28 @@ def handle_embedding_event(transaction, bedrock_attrs): model = bedrock_attrs.get("model", None) input_ = bedrock_attrs.get("input") + response_total_tokens = bedrock_attrs.get("response.usage.total_tokens", None) + + total_tokens = ( + settings.ai_monitoring.llm_token_count_callback(model, input_) + if settings.ai_monitoring.llm_token_count_callback and input_ + else response_total_tokens + ) + embedding_dict = { "vendor": "bedrock", "ingest_source": "Python", "id": embedding_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(model, input_) - if settings.ai_monitoring.llm_token_count_callback - else None - ), "request_id": request_id, "duration": bedrock_attrs.get("duration", None), "request.model": model, "response.model": model, + "response.usage.total_tokens": total_tokens, "error": bedrock_attrs.get("error", None), } + embedding_dict.update(llm_metadata_dict) if settings.ai_monitoring.record_content.enabled: @@ -1041,6 +1167,7 @@ def handle_embedding_event(transaction, bedrock_attrs): def handle_chat_completion_event(transaction, bedrock_attrs): + settings = transaction.settings or global_settings() chat_completion_id = str(uuid.uuid4()) # Grab LLM-related custom attributes off of the transaction to store as metadata on LLM events custom_attrs_dict = transaction._custom_params @@ -1055,12 +1182,35 @@ def handle_chat_completion_event(transaction, bedrock_attrs): response_id = bedrock_attrs.get("response_id", None) model = bedrock_attrs.get("model", None) + response_prompt_tokens = bedrock_attrs.get("response.usage.prompt_tokens", None) + response_completion_tokens = bedrock_attrs.get("response.usage.completion_tokens", None) + response_total_tokens = bedrock_attrs.get("response.usage.total_tokens", None) + input_message_list = bedrock_attrs.get("input_message_list", []) output_message_list = bedrock_attrs.get("output_message_list", []) number_of_messages = ( len(input_message_list) + len(output_message_list) ) or None # If 0, attribute will be set to None and removed + input_message_content = " ".join([msg.get("content", "") for msg in input_message_list if msg.get("content")]) + prompt_tokens = ( + settings.ai_monitoring.llm_token_count_callback(model, input_message_content) + if settings.ai_monitoring.llm_token_count_callback and input_message_content + else response_prompt_tokens + ) + + output_message_content = " ".join([msg.get("content", "") for msg in output_message_list if msg.get("content")]) + completion_tokens = ( + settings.ai_monitoring.llm_token_count_callback(model, output_message_content) + if settings.ai_monitoring.llm_token_count_callback and output_message_content + else response_completion_tokens + ) + total_tokens = ( + prompt_tokens + completion_tokens if all([prompt_tokens, completion_tokens]) else response_total_tokens + ) + + all_token_counts = bool(prompt_tokens and completion_tokens and total_tokens) + chat_completion_summary_dict = { "vendor": "bedrock", "ingest_source": "Python", @@ -1078,6 +1228,12 @@ def handle_chat_completion_event(transaction, bedrock_attrs): "response.choices.finish_reason": bedrock_attrs.get("response.choices.finish_reason", None), "error": bedrock_attrs.get("error", None), } + + if all_token_counts: + chat_completion_summary_dict["response.usage.prompt_tokens"] = prompt_tokens + chat_completion_summary_dict["response.usage.completion_tokens"] = completion_tokens + chat_completion_summary_dict["response.usage.total_tokens"] = total_tokens + chat_completion_summary_dict.update(llm_metadata_dict) chat_completion_summary_dict = {k: v for k, v in chat_completion_summary_dict.items() if v is not None} transaction.record_custom_event("LlmChatCompletionSummary", chat_completion_summary_dict) @@ -1092,6 +1248,7 @@ def handle_chat_completion_event(transaction, bedrock_attrs): request_model=model, request_id=request_id, llm_metadata_dict=llm_metadata_dict, + all_token_counts=all_token_counts, response_id=response_id, ) diff --git a/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py b/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py index da9c5818e7..87dfa1f1b6 100644 --- a/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py +++ b/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py @@ -17,7 +17,7 @@ from conftest import BOTOCORE_VERSION from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -49,6 +49,9 @@ "duration": None, # Response time varies each test run "request.model": "anthropic.claude-3-sonnet-20240229-v1:0", "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "response.usage.prompt_tokens": 26, + "response.usage.completion_tokens": 100, + "response.usage.total_tokens": 126, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "max_tokens", @@ -70,6 +73,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -88,6 +92,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -106,6 +111,7 @@ "role": "assistant", "completion_id": None, "sequence": 2, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -189,7 +195,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model): - @validate_custom_events(add_token_count_to_events(chat_completion_expected_events)) + @validate_custom_events(add_token_counts_to_chat_events(chat_completion_expected_events)) # One summary event, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -476,46 +482,3 @@ def _test(): converse_invalid_model(loop, bedrock_converse_server) _test() - - -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_error_incorrect_access_key_with_token_count( - monkeypatch, bedrock_converse_server, loop, set_trace_info -): - """ - A request is made to the server with invalid credentials. botocore will reach out to the server and receive an - UnrecognizedClientException as a response. Information from the request will be parsed and reported in customer - events. The error response can also be parsed, and will be included as attributes on the recorded exception. - """ - - @validate_custom_events(add_token_count_to_events(chat_completion_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_chat_completion_incorrect_access_key_with_token_count", - scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], - rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion_incorrect_access_key_with_token_count") - def _test(): - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - converse_incorrect_access_key(loop, bedrock_converse_server, monkeypatch) - - _test() diff --git a/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py b/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py index 65cb276c77..159fe9ddda 100644 --- a/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py +++ b/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py @@ -34,7 +34,8 @@ ) from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_counts_to_chat_events, + add_token_count_streaming_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -206,7 +207,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model, expected_events, expected_metrics): - @validate_custom_events(add_token_count_to_events(expected_events)) + @validate_custom_events(add_token_counts_to_chat_events((add_token_count_streaming_events(expected_events)))) # One summary event, one user message, and one response message from the assistant @validate_custom_event_count(count=3) @validate_transaction_metrics( @@ -455,51 +456,6 @@ def _test(): _test() -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_error_incorrect_access_key_with_token( - monkeypatch, - bedrock_server, - exercise_model, - set_trace_info, - expected_invalid_access_key_error_events, - expected_metrics, -): - @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_chat_completion", - scoped_metrics=expected_metrics, - rollup_metrics=expected_metrics, - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion") - def _test(): - monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") - - with pytest.raises(_client_error): # not sure where this exception actually comes from - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - exercise_model(prompt="Invalid Token", temperature=0.7, max_tokens=100) - - _test() - - def invoke_model_malformed_request_body(loop, bedrock_server, response_streaming): async def _coro(): with pytest.raises(_client_error): @@ -798,58 +754,6 @@ async def _test(): loop.run_until_complete(_test()) -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_expected_streaming_error_events)) -@validate_custom_event_count(count=2) -@validate_error_trace_attributes( - _event_stream_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "error.message": "Malformed input request, please reformat your input and try again.", - "error.code": "ValidationException", - }, - }, - forgone_params={"agent": (), "intrinsic": (), "user": ("http.statusCode")}, -) -@validate_transaction_metrics( - name="test_bedrock_chat_completion", - scoped_metrics=[("Llm/completion/Bedrock/invoke_model_with_response_stream", 1)], - rollup_metrics=[("Llm/completion/Bedrock/invoke_model_with_response_stream", 1)], - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, -) -@background_task(name="test_bedrock_chat_completion") -def test_bedrock_chat_completion_error_streaming_exception_with_token_count(loop, bedrock_server, set_trace_info): - """ - Duplicate of test_bedrock_chat_completion_error_streaming_exception, but with token callback being set. - - See the original test for a description of the error case. - """ - - async def _test(): - with pytest.raises(_event_stream_error): - model = "amazon.titan-text-express-v1" - body = (chat_completion_payload_templates[model] % ("Streaming Exception", 0.7, 100)).encode("utf-8") - - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - response = await bedrock_server.invoke_model_with_response_stream( - body=body, modelId=model, accept="application/json", contentType="application/json" - ) - - body = response.get("body") - async for resp in body: - assert resp - - loop.run_until_complete(_test()) - - def test_bedrock_chat_completion_functions_marked_as_wrapped_for_sdk_compatibility(bedrock_server): assert bedrock_server._nr_wrapped diff --git a/tests/external_aiobotocore/test_bedrock_embeddings.py b/tests/external_aiobotocore/test_bedrock_embeddings.py index 96b930feb5..dacfbb4eed 100644 --- a/tests/external_aiobotocore/test_bedrock_embeddings.py +++ b/tests/external_aiobotocore/test_bedrock_embeddings.py @@ -27,7 +27,7 @@ ) from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_to_embedding_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -164,7 +164,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_embedding_with_token_count(set_trace_info, exercise_model, expected_events): - @validate_custom_events(add_token_count_to_events(expected_events)) + @validate_custom_events(add_token_count_to_embedding_events(expected_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_bedrock_embedding", @@ -289,45 +289,6 @@ def _test(): _test() -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_embedding_error_incorrect_access_key_with_token_count( - monkeypatch, bedrock_server, exercise_model, set_trace_info, expected_invalid_access_key_error_events -): - @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_embedding", - scoped_metrics=[("Llm/embedding/Bedrock/invoke_model", 1)], - rollup_metrics=[("Llm/embedding/Bedrock/invoke_model", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_embedding") - def _test(): - monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") - - with pytest.raises(_client_error): # not sure where this exception actually comes from - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - exercise_model(prompt="Invalid Token") - - _test() - - @reset_core_stats_engine() @validate_custom_events(embedding_expected_malformed_request_body_events) @validate_custom_event_count(count=1) diff --git a/tests/external_botocore/_test_bedrock_chat_completion.py b/tests/external_botocore/_test_bedrock_chat_completion.py index 155b6c993c..6b65af8cb2 100644 --- a/tests/external_botocore/_test_bedrock_chat_completion.py +++ b/tests/external_botocore/_test_bedrock_chat_completion.py @@ -97,6 +97,9 @@ "duration": None, # Response time varies each test run "request.model": "amazon.titan-text-express-v1", "response.model": "amazon.titan-text-express-v1", + "response.usage.completion_tokens": 32, + "response.usage.total_tokens": 44, + "response.usage.prompt_tokens": 12, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "FINISH", @@ -118,6 +121,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "amazon.titan-text-express-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -136,6 +140,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "amazon.titan-text-express-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -335,6 +340,9 @@ "duration": None, # Response time varies each test run "request.model": "meta.llama2-13b-chat-v1", "response.model": "meta.llama2-13b-chat-v1", + "response.usage.prompt_tokens": 17, + "response.usage.completion_tokens": 69, + "response.usage.total_tokens": 86, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "stop", @@ -356,6 +364,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "meta.llama2-13b-chat-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -374,6 +383,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "meta.llama2-13b-chat-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -919,6 +929,9 @@ "duration": None, # Response time varies each test run "request.model": "amazon.titan-text-express-v1", "response.model": "amazon.titan-text-express-v1", + "response.usage.completion_tokens": 35, + "response.usage.total_tokens": 47, + "response.usage.prompt_tokens": 12, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "FINISH", @@ -940,6 +953,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "amazon.titan-text-express-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -958,6 +972,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "amazon.titan-text-express-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -978,6 +993,9 @@ "duration": None, # Response time varies each test run "request.model": "anthropic.claude-instant-v1", "response.model": "anthropic.claude-instant-v1", + "response.usage.completion_tokens": 99, + "response.usage.prompt_tokens": 19, + "response.usage.total_tokens": 118, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "stop_sequence", @@ -999,6 +1017,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "anthropic.claude-instant-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -1017,6 +1036,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "anthropic.claude-instant-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -1038,6 +1058,9 @@ "duration": None, # Response time varies each test run "request.model": "cohere.command-text-v14", "response.model": "cohere.command-text-v14", + "response.usage.completion_tokens": 91, + "response.usage.total_tokens": 100, + "response.usage.prompt_tokens": 9, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "COMPLETE", @@ -1059,6 +1082,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "cohere.command-text-v14", "vendor": "bedrock", "ingest_source": "Python", @@ -1077,6 +1101,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "cohere.command-text-v14", "vendor": "bedrock", "ingest_source": "Python", @@ -1097,6 +1122,9 @@ "duration": None, # Response time varies each test run "request.model": "meta.llama2-13b-chat-v1", "response.model": "meta.llama2-13b-chat-v1", + "response.usage.prompt_tokens": 17, + "response.usage.completion_tokens": 100, + "response.usage.total_tokens": 117, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "length", @@ -1118,6 +1146,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "meta.llama2-13b-chat-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -1136,6 +1165,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "meta.llama2-13b-chat-v1", "vendor": "bedrock", "ingest_source": "Python", diff --git a/tests/external_botocore/_test_bedrock_embeddings.py b/tests/external_botocore/_test_bedrock_embeddings.py index f5c227b9c3..af544af001 100644 --- a/tests/external_botocore/_test_bedrock_embeddings.py +++ b/tests/external_botocore/_test_bedrock_embeddings.py @@ -33,6 +33,7 @@ "response.model": "amazon.titan-embed-text-v1", "request.model": "amazon.titan-embed-text-v1", "request_id": "11233989-07e8-4ecb-9ba6-79601ba6d8cc", + "response.usage.total_tokens": 6, "vendor": "bedrock", "ingest_source": "Python", }, @@ -52,6 +53,7 @@ "response.model": "amazon.titan-embed-g1-text-02", "request.model": "amazon.titan-embed-g1-text-02", "request_id": "b10ac895-eae3-4f07-b926-10b2866c55ed", + "response.usage.total_tokens": 6, "vendor": "bedrock", "ingest_source": "Python", }, diff --git a/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py b/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py index 94a88e7a56..9d005e46c5 100644 --- a/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py +++ b/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import json import os from io import BytesIO @@ -35,7 +36,8 @@ from conftest import BOTOCORE_VERSION from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_counts_to_chat_events, + add_token_count_streaming_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -129,6 +131,14 @@ def expected_events(model_id, response_streaming): return chat_completion_expected_events[model_id] +@pytest.fixture(scope="module") +def expected_events(model_id, response_streaming): + if response_streaming: + return chat_completion_streaming_expected_events[model_id] + else: + return chat_completion_expected_events[model_id] + + @pytest.fixture(scope="module") def expected_metrics(response_streaming): if response_streaming: @@ -200,7 +210,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model, expected_events, expected_metrics): - @validate_custom_events(add_token_count_to_events(expected_events)) + @validate_custom_events(add_token_counts_to_chat_events(add_token_count_streaming_events(expected_events))) # One summary event, one user message, and one response message from the assistant @validate_custom_event_count(count=3) @validate_transaction_metrics( @@ -438,49 +448,50 @@ def _test(): _test() -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_error_incorrect_access_key_with_token( - monkeypatch, - bedrock_server, - exercise_model, - set_trace_info, - expected_invalid_access_key_error_events, - expected_metrics, -): - @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_chat_completion", - scoped_metrics=expected_metrics, - rollup_metrics=expected_metrics, - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion") - def _test(): - monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") - - with pytest.raises(_client_error): # not sure where this exception actually comes from - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - exercise_model(prompt="Invalid Token", temperature=0.7, max_tokens=100) - - _test() +# +# @reset_core_stats_engine() +# @override_llm_token_callback_settings(llm_token_count_callback) +# def test_bedrock_chat_completion_error_incorrect_access_key_with_token( +# monkeypatch, +# bedrock_server, +# exercise_model, +# set_trace_info, +# expected_invalid_access_key_error_events, +# expected_metrics, +# ): +# @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events)) +# @validate_error_trace_attributes( +# _client_error_name, +# exact_attrs={ +# "agent": {}, +# "intrinsic": {}, +# "user": { +# "http.statusCode": 403, +# "error.message": "The security token included in the request is invalid.", +# "error.code": "UnrecognizedClientException", +# }, +# }, +# ) +# @validate_transaction_metrics( +# name="test_bedrock_chat_completion", +# scoped_metrics=expected_metrics, +# rollup_metrics=expected_metrics, +# custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], +# background_task=True, +# ) +# @background_task(name="test_bedrock_chat_completion") +# def _test(): +# monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") +# +# with pytest.raises(_client_error): # not sure where this exception actually comes from +# set_trace_info() +# add_custom_attribute("llm.conversation_id", "my-awesome-id") +# add_custom_attribute("llm.foo", "bar") +# add_custom_attribute("non_llm_attr", "python-agent") +# +# exercise_model(prompt="Invalid Token", temperature=0.7, max_tokens=100) +# +# _test() @reset_core_stats_engine() @@ -762,55 +773,6 @@ def _test(): _test() -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_error_streaming_exception_with_token_count(bedrock_server, set_trace_info): - """ - Duplicate of test_bedrock_chat_completion_error_streaming_exception, but with token callback being set. - - See the original test for a description of the error case. - """ - - @validate_custom_events(add_token_count_to_events(chat_completion_expected_streaming_error_events)) - @validate_custom_event_count(count=2) - @validate_error_trace_attributes( - _event_stream_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "error.message": "Malformed input request, please reformat your input and try again.", - "error.code": "ValidationException", - }, - }, - forgone_params={"agent": (), "intrinsic": (), "user": ("http.statusCode")}, - ) - @validate_transaction_metrics( - name="test_bedrock_chat_completion", - scoped_metrics=[("Llm/completion/Bedrock/invoke_model_with_response_stream", 1)], - rollup_metrics=[("Llm/completion/Bedrock/invoke_model_with_response_stream", 1)], - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion") - def _test(): - with pytest.raises(_event_stream_error): - model = "amazon.titan-text-express-v1" - body = (chat_completion_payload_templates[model] % ("Streaming Exception", 0.7, 100)).encode("utf-8") - - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - response = bedrock_server.invoke_model_with_response_stream( - body=body, modelId=model, accept="application/json", contentType="application/json" - ) - list(response["body"]) # Iterate - - _test() - - def test_bedrock_chat_completion_functions_marked_as_wrapped_for_sdk_compatibility(bedrock_server): assert bedrock_server._nr_wrapped diff --git a/tests/external_botocore/test_bedrock_embeddings.py b/tests/external_botocore/test_bedrock_embeddings.py index 417e24b2d9..de2cb201e7 100644 --- a/tests/external_botocore/test_bedrock_embeddings.py +++ b/tests/external_botocore/test_bedrock_embeddings.py @@ -28,7 +28,7 @@ from conftest import BOTOCORE_VERSION from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_to_embedding_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -161,7 +161,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_embedding_with_token_count(set_trace_info, exercise_model, expected_events): - @validate_custom_events(add_token_count_to_events(expected_events)) + @validate_custom_events(add_token_count_to_embedding_events(expected_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_bedrock_embedding", @@ -286,45 +286,6 @@ def _test(): _test() -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_embedding_error_incorrect_access_key_with_token_count( - monkeypatch, bedrock_server, exercise_model, set_trace_info, expected_invalid_access_key_error_events -): - @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_embedding", - scoped_metrics=[("Llm/embedding/Bedrock/invoke_model", 1)], - rollup_metrics=[("Llm/embedding/Bedrock/invoke_model", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_embedding") - def _test(): - monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") - - with pytest.raises(_client_error): # not sure where this exception actually comes from - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - exercise_model(prompt="Invalid Token") - - _test() - - @reset_core_stats_engine() def test_bedrock_embedding_error_malformed_request_body(bedrock_server, set_trace_info): """ diff --git a/tests/external_botocore/test_chat_completion_converse.py b/tests/external_botocore/test_chat_completion_converse.py index 96ead41dd7..2d38d6b4a4 100644 --- a/tests/external_botocore/test_chat_completion_converse.py +++ b/tests/external_botocore/test_chat_completion_converse.py @@ -17,7 +17,7 @@ from conftest import BOTOCORE_VERSION from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -49,6 +49,9 @@ "duration": None, # Response time varies each test run "request.model": "anthropic.claude-3-sonnet-20240229-v1:0", "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "response.usage.prompt_tokens": 26, + "response.usage.completion_tokens": 100, + "response.usage.total_tokens": 126, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "max_tokens", @@ -70,6 +73,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -88,6 +92,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -106,6 +111,7 @@ "role": "assistant", "completion_id": None, "sequence": 2, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -185,7 +191,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model): - @validate_custom_events(add_token_count_to_events(chat_completion_expected_events)) + @validate_custom_events(add_token_counts_to_chat_events(chat_completion_expected_events)) # One summary event, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -468,57 +474,3 @@ def _test(): assert response _test() - - -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_error_incorrect_access_key_with_token_count( - monkeypatch, bedrock_converse_server, exercise_model, set_trace_info -): - """ - A request is made to the server with invalid credentials. botocore will reach out to the server and receive an - UnrecognizedClientException as a response. Information from the request will be parsed and reported in customer - events. The error response can also be parsed, and will be included as attributes on the recorded exception. - """ - - @validate_custom_events(add_token_count_to_events(chat_completion_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_chat_completion_incorrect_access_key_with_token_count", - scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], - rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion_incorrect_access_key_with_token_count") - def _test(): - monkeypatch.setattr(bedrock_converse_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") - - with pytest.raises(_client_error): - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - message = [{"role": "user", "content": [{"text": "Invalid Token"}]}] - - response = bedrock_converse_server.converse( - modelId="anthropic.claude-3-sonnet-20240229-v1:0", - messages=message, - inferenceConfig={"temperature": 0.7, "maxTokens": 100}, - ) - - assert response - - _test() diff --git a/tests/testing_support/ml_testing_utils.py b/tests/testing_support/ml_testing_utils.py index 0e7307bfb0..5df269b7a0 100644 --- a/tests/testing_support/ml_testing_utils.py +++ b/tests/testing_support/ml_testing_utils.py @@ -37,6 +37,32 @@ def add_token_count_to_events(expected_events): return events +def add_token_count_to_embedding_events(expected_events): + events = copy.deepcopy(expected_events) + for event in events: + if event[0]["type"] == "LlmEmbedding": + event[1]["response.usage.total_tokens"] = 105 + return events + + +def add_token_count_streaming_events(expected_events): + events = copy.deepcopy(expected_events) + for event in events: + if event[0]["type"] == "LlmChatCompletionMessage": + event[1]["token_count"] = 0 + return events + + +def add_token_counts_to_chat_events(expected_events): + events = copy.deepcopy(expected_events) + for event in events: + if event[0]["type"] == "LlmChatCompletionSummary": + event[1]["response.usage.prompt_tokens"] = 105 + event[1]["response.usage.completion_tokens"] = 105 + event[1]["response.usage.total_tokens"] = 210 + return events + + def events_sans_content(event): new_event = copy.deepcopy(event) for _event in new_event: From 7b415dc6c36fb9f12cf21e15e258af2924dd5244 Mon Sep 17 00:00:00 2001 From: "mergify[bot]" <37929162+mergify[bot]@users.noreply.github.com> Date: Tue, 7 Oct 2025 17:32:41 +0000 Subject: [PATCH 2/6] [MegaLinter] Apply linters fixes --- .../test_bedrock_chat_completion_invoke_model.py | 4 ++-- .../test_bedrock_chat_completion_invoke_model.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py b/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py index 159fe9ddda..e3a897d0c8 100644 --- a/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py +++ b/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py @@ -34,8 +34,8 @@ ) from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_counts_to_chat_events, add_token_count_streaming_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -207,7 +207,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model, expected_events, expected_metrics): - @validate_custom_events(add_token_counts_to_chat_events((add_token_count_streaming_events(expected_events)))) + @validate_custom_events(add_token_counts_to_chat_events(add_token_count_streaming_events(expected_events))) # One summary event, one user message, and one response message from the assistant @validate_custom_event_count(count=3) @validate_transaction_metrics( diff --git a/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py b/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py index 9d005e46c5..7a471b950e 100644 --- a/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py +++ b/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py @@ -36,8 +36,8 @@ from conftest import BOTOCORE_VERSION from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_counts_to_chat_events, add_token_count_streaming_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, From 47202b1450166992af09689eb47cebbf956c55f1 Mon Sep 17 00:00:00 2001 From: Uma Annamalai Date: Thu, 18 Sep 2025 23:57:49 -0700 Subject: [PATCH 3/6] Add bedrock token counting. --- newrelic/hooks/external_botocore.py | 219 +++++++++++++++--- .../test_bedrock_chat_completion_converse.py | 53 +---- ...st_bedrock_chat_completion_invoke_model.py | 102 +------- .../test_bedrock_embeddings.py | 43 +--- .../_test_bedrock_chat_completion.py | 30 +++ .../_test_bedrock_embeddings.py | 2 + ...st_bedrock_chat_completion_invoke_model.py | 150 +++++------- .../test_bedrock_embeddings.py | 43 +--- .../test_chat_completion_converse.py | 64 +---- 9 files changed, 299 insertions(+), 407 deletions(-) diff --git a/newrelic/hooks/external_botocore.py b/newrelic/hooks/external_botocore.py index d8c18b49db..748b2e5c94 100644 --- a/newrelic/hooks/external_botocore.py +++ b/newrelic/hooks/external_botocore.py @@ -192,6 +192,7 @@ def create_chat_completion_message_event( request_model, request_id, llm_metadata_dict, + all_token_counts, response_id=None, ): if not transaction: @@ -224,6 +225,8 @@ def create_chat_completion_message_event( "vendor": "bedrock", "ingest_source": "Python", } + if all_token_counts: + chat_completion_message_dict["token_count"] = 0 if settings.ai_monitoring.record_content.enabled: chat_completion_message_dict["content"] = content @@ -263,6 +266,8 @@ def create_chat_completion_message_event( "ingest_source": "Python", "is_response": True, } + if all_token_counts: + chat_completion_message_dict["token_count"] = 0 if settings.ai_monitoring.record_content.enabled: chat_completion_message_dict["content"] = content @@ -272,24 +277,21 @@ def create_chat_completion_message_event( transaction.record_custom_event("LlmChatCompletionMessage", chat_completion_message_dict) -def extract_bedrock_titan_text_model_request(request_body, bedrock_attrs): +def extract_bedrock_titan_embedding_model_request(request_body, bedrock_attrs): request_body = json.loads(request_body) - request_config = request_body.get("textGenerationConfig", {}) - input_message_list = [{"role": "user", "content": request_body.get("inputText")}] - - bedrock_attrs["input_message_list"] = input_message_list - bedrock_attrs["request.max_tokens"] = request_config.get("maxTokenCount") - bedrock_attrs["request.temperature"] = request_config.get("temperature") + bedrock_attrs["input"] = request_body.get("inputText") return bedrock_attrs -def extract_bedrock_mistral_text_model_request(request_body, bedrock_attrs): - request_body = json.loads(request_body) - bedrock_attrs["input_message_list"] = [{"role": "user", "content": request_body.get("prompt")}] - bedrock_attrs["request.max_tokens"] = request_body.get("max_tokens") - bedrock_attrs["request.temperature"] = request_body.get("temperature") +def extract_bedrock_titan_embedding_model_response(response_body, bedrock_attrs): + if response_body: + response_body = json.loads(response_body) + + input_tokens = response_body.get("inputTextTokenCount", 0) + bedrock_attrs["response.usage.total_tokens"] = input_tokens + return bedrock_attrs @@ -297,16 +299,34 @@ def extract_bedrock_titan_text_model_response(response_body, bedrock_attrs): if response_body: response_body = json.loads(response_body) + input_tokens = response_body.get("inputTextTokenCount", 0) + completion_tokens = sum(result["tokenCount"] for result in response_body.get("results", [])) + if not completion_tokens: + completion_tokens = 0 + + total_tokens = input_tokens + completion_tokens + output_message_list = [ {"role": "assistant", "content": result["outputText"]} for result in response_body.get("results", []) ] bedrock_attrs["response.choices.finish_reason"] = response_body["results"][0]["completionReason"] + bedrock_attrs["response.usage.completion_tokens"] = completion_tokens + bedrock_attrs["response.usage.prompt_tokens"] = input_tokens + bedrock_attrs["response.usage.total_tokens"] = total_tokens bedrock_attrs["output_message_list"] = output_message_list return bedrock_attrs +def extract_bedrock_mistral_text_model_request(request_body, bedrock_attrs): + request_body = json.loads(request_body) + bedrock_attrs["input_message_list"] = [{"role": "user", "content": request_body.get("prompt")}] + bedrock_attrs["request.max_tokens"] = request_body.get("max_tokens") + bedrock_attrs["request.temperature"] = request_body.get("temperature") + return bedrock_attrs + + def extract_bedrock_mistral_text_model_response(response_body, bedrock_attrs): if response_body: response_body = json.loads(response_body) @@ -319,17 +339,6 @@ def extract_bedrock_mistral_text_model_response(response_body, bedrock_attrs): return bedrock_attrs -def extract_bedrock_titan_text_model_streaming_response(response_body, bedrock_attrs): - if response_body: - if "outputText" in response_body: - bedrock_attrs["output_message_list"] = messages = bedrock_attrs.get("output_message_list", []) - messages.append({"role": "assistant", "content": response_body["outputText"]}) - - bedrock_attrs["response.choices.finish_reason"] = response_body.get("completionReason", None) - - return bedrock_attrs - - def extract_bedrock_mistral_text_model_streaming_response(response_body, bedrock_attrs): if response_body: outputs = response_body.get("outputs") @@ -342,10 +351,42 @@ def extract_bedrock_mistral_text_model_streaming_response(response_body, bedrock return bedrock_attrs -def extract_bedrock_titan_embedding_model_request(request_body, bedrock_attrs): +def extract_bedrock_titan_text_model_request(request_body, bedrock_attrs): request_body = json.loads(request_body) + request_config = request_body.get("textGenerationConfig", {}) + + input_message_list = [{"role": "user", "content": request_body.get("inputText")}] + + bedrock_attrs["input_message_list"] = input_message_list + bedrock_attrs["request.max_tokens"] = request_config.get("maxTokenCount") + bedrock_attrs["request.temperature"] = request_config.get("temperature") + + return bedrock_attrs - bedrock_attrs["input"] = request_body.get("inputText") + +def extract_bedrock_titan_text_model_streaming_response(response_body, bedrock_attrs): + if response_body: + if "outputText" in response_body: + bedrock_attrs["output_message_list"] = messages = bedrock_attrs.get("output_message_list", []) + messages.append({"role": "assistant", "content": response_body["outputText"]}) + + bedrock_attrs["response.choices.finish_reason"] = response_body.get("completionReason", None) + + # Extract token information + invocation_metrics = response_body.get("amazon-bedrock-invocationMetrics", {}) + prompt_tokens = invocation_metrics.get("inputTokenCount", 0) + completion_tokens = invocation_metrics.get("outputTokenCount", 0) + total_tokens = prompt_tokens + completion_tokens + + bedrock_attrs["response.usage.completion_tokens"] = ( + bedrock_attrs.get("response.usage.completion_tokens", 0) + completion_tokens + ) + bedrock_attrs["response.usage.prompt_tokens"] = ( + bedrock_attrs.get("response.usage.prompt_tokens", 0) + prompt_tokens + ) + bedrock_attrs["response.usage.total_tokens"] = ( + bedrock_attrs.get("response.usage.total_tokens", 0) + total_tokens + ) return bedrock_attrs @@ -409,6 +450,17 @@ def extract_bedrock_claude_model_response(response_body, bedrock_attrs): output_message_list = [{"role": role, "content": content}] bedrock_attrs["response.choices.finish_reason"] = response_body.get("stop_reason") bedrock_attrs["output_message_list"] = output_message_list + bedrock_attrs[""] = str(response_body.get("id")) + + # Extract token information + token_usage = response_body.get("usage", {}) + if token_usage: + prompt_tokens = token_usage.get("input_tokens", 0) + completion_tokens = token_usage.get("output_tokens", 0) + total_tokens = prompt_tokens + completion_tokens + bedrock_attrs["response.usage.prompt_tokens"] = prompt_tokens + bedrock_attrs["response.usage.completion_tokens"] = completion_tokens + bedrock_attrs["response.usage.total_tokens"] = total_tokens return bedrock_attrs @@ -420,6 +472,23 @@ def extract_bedrock_claude_model_streaming_response(response_body, bedrock_attrs bedrock_attrs["output_message_list"] = [{"role": "assistant", "content": ""}] bedrock_attrs["output_message_list"][0]["content"] += content bedrock_attrs["response.choices.finish_reason"] = response_body.get("stop_reason") + + # Extract token information + invocation_metrics = response_body.get("amazon-bedrock-invocationMetrics", {}) + prompt_tokens = invocation_metrics.get("inputTokenCount", 0) + completion_tokens = invocation_metrics.get("outputTokenCount", 0) + total_tokens = prompt_tokens + completion_tokens + + bedrock_attrs["response.usage.completion_tokens"] = ( + bedrock_attrs.get("response.usage.completion_tokens", 0) + completion_tokens + ) + bedrock_attrs["response.usage.prompt_tokens"] = ( + bedrock_attrs.get("response.usage.prompt_tokens", 0) + prompt_tokens + ) + bedrock_attrs["response.usage.total_tokens"] = ( + bedrock_attrs.get("response.usage.total_tokens", 0) + total_tokens + ) + return bedrock_attrs @@ -440,6 +509,13 @@ def extract_bedrock_llama_model_response(response_body, bedrock_attrs): response_body = json.loads(response_body) output_message_list = [{"role": "assistant", "content": response_body.get("generation")}] + prompt_tokens = response_body.get("prompt_token_count", 0) + completion_tokens = response_body.get("generation_token_count", 0) + total_tokens = prompt_tokens + completion_tokens + + bedrock_attrs["response.usage.completion_tokens"] = completion_tokens + bedrock_attrs["response.usage.prompt_tokens"] = prompt_tokens + bedrock_attrs["response.usage.total_tokens"] = total_tokens bedrock_attrs["response.choices.finish_reason"] = response_body.get("stop_reason") bedrock_attrs["output_message_list"] = output_message_list @@ -453,6 +529,22 @@ def extract_bedrock_llama_model_streaming_response(response_body, bedrock_attrs) bedrock_attrs["output_message_list"] = [{"role": "assistant", "content": ""}] bedrock_attrs["output_message_list"][0]["content"] += content bedrock_attrs["response.choices.finish_reason"] = response_body.get("stop_reason") + + # Extract token information + invocation_metrics = response_body.get("amazon-bedrock-invocationMetrics", {}) + prompt_tokens = invocation_metrics.get("inputTokenCount", 0) + completion_tokens = invocation_metrics.get("outputTokenCount", 0) + total_tokens = prompt_tokens + completion_tokens + + bedrock_attrs["response.usage.completion_tokens"] = ( + bedrock_attrs.get("response.usage.completion_tokens", 0) + completion_tokens + ) + bedrock_attrs["response.usage.prompt_tokens"] = ( + bedrock_attrs.get("response.usage.prompt_tokens", 0) + prompt_tokens + ) + bedrock_attrs["response.usage.total_tokens"] = ( + bedrock_attrs.get("response.usage.total_tokens", 0) + total_tokens + ) return bedrock_attrs @@ -493,12 +585,33 @@ def extract_bedrock_cohere_model_streaming_response(response_body, bedrock_attrs bedrock_attrs["response.choices.finish_reason"] = response_body["generations"][0]["finish_reason"] bedrock_attrs["response_id"] = str(response_body.get("id")) + # Extract token information + invocation_metrics = response_body.get("amazon-bedrock-invocationMetrics", {}) + prompt_tokens = invocation_metrics.get("inputTokenCount", 0) + completion_tokens = invocation_metrics.get("outputTokenCount", 0) + total_tokens = prompt_tokens + completion_tokens + + bedrock_attrs["response.usage.completion_tokens"] = ( + bedrock_attrs.get("response.usage.completion_tokens", 0) + completion_tokens + ) + bedrock_attrs["response.usage.prompt_tokens"] = ( + bedrock_attrs.get("response.usage.prompt_tokens", 0) + prompt_tokens + ) + bedrock_attrs["response.usage.total_tokens"] = ( + bedrock_attrs.get("response.usage.total_tokens", 0) + total_tokens + ) + return bedrock_attrs NULL_EXTRACTOR = lambda *args: {} # noqa: E731 # Empty extractor that returns nothing MODEL_EXTRACTORS = [ # Order is important here, avoiding dictionaries - ("amazon.titan-embed", extract_bedrock_titan_embedding_model_request, NULL_EXTRACTOR, NULL_EXTRACTOR), + ( + "amazon.titan-embed", + extract_bedrock_titan_embedding_model_request, + extract_bedrock_titan_embedding_model_response, + NULL_EXTRACTOR, + ), ("cohere.embed", extract_bedrock_cohere_embedding_model_request, NULL_EXTRACTOR, NULL_EXTRACTOR), ( "amazon.titan", @@ -801,6 +914,7 @@ def _wrap_bedrock_runtime_converse(wrapped, instance, args, kwargs): try: # For aioboto3 clients, this will call make_api_call instrumentation in external_aiobotocore response = wrapped(*args, **kwargs) + except Exception as exc: handle_bedrock_exception( exc, False, model, span_id, trace_id, request_extractor, {}, ft, transaction, kwargs, is_converse=True @@ -848,6 +962,10 @@ def extract_bedrock_converse_attrs(kwargs, response, response_headers, model, sp for result in response.get("output").get("message").get("content", []) ] + response_prompt_tokens = response.get("usage", {}).get("inputTokens") if response else None + response_completion_tokens = response.get("usage", {}).get("outputTokens") if response else None + response_total_tokens = response.get("usage", {}).get("totalTokens") if response else None + bedrock_attrs = { "request_id": response_headers.get("x-amzn-requestid"), "model": model, @@ -858,6 +976,9 @@ def extract_bedrock_converse_attrs(kwargs, response, response_headers, model, sp "request.max_tokens": kwargs.get("inferenceConfig", {}).get("maxTokens", None), "request.temperature": kwargs.get("inferenceConfig", {}).get("temperature", None), "input_message_list": input_message_list, + "response.usage.prompt_tokens": response_prompt_tokens, + "response.usage.completion_tokens": response_completion_tokens, + "response.usage.total_tokens": response_total_tokens, } return bedrock_attrs @@ -1014,23 +1135,28 @@ def handle_embedding_event(transaction, bedrock_attrs): model = bedrock_attrs.get("model", None) input_ = bedrock_attrs.get("input") + response_total_tokens = bedrock_attrs.get("response.usage.total_tokens", None) + + total_tokens = ( + settings.ai_monitoring.llm_token_count_callback(model, input_) + if settings.ai_monitoring.llm_token_count_callback and input_ + else response_total_tokens + ) + embedding_dict = { "vendor": "bedrock", "ingest_source": "Python", "id": embedding_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(model, input_) - if settings.ai_monitoring.llm_token_count_callback - else None - ), "request_id": request_id, "duration": bedrock_attrs.get("duration", None), "request.model": model, "response.model": model, + "response.usage.total_tokens": total_tokens, "error": bedrock_attrs.get("error", None), } + embedding_dict.update(llm_metadata_dict) if settings.ai_monitoring.record_content.enabled: @@ -1041,6 +1167,7 @@ def handle_embedding_event(transaction, bedrock_attrs): def handle_chat_completion_event(transaction, bedrock_attrs): + settings = transaction.settings or global_settings() chat_completion_id = str(uuid.uuid4()) # Grab LLM-related custom attributes off of the transaction to store as metadata on LLM events custom_attrs_dict = transaction._custom_params @@ -1055,12 +1182,35 @@ def handle_chat_completion_event(transaction, bedrock_attrs): response_id = bedrock_attrs.get("response_id", None) model = bedrock_attrs.get("model", None) + response_prompt_tokens = bedrock_attrs.get("response.usage.prompt_tokens", None) + response_completion_tokens = bedrock_attrs.get("response.usage.completion_tokens", None) + response_total_tokens = bedrock_attrs.get("response.usage.total_tokens", None) + input_message_list = bedrock_attrs.get("input_message_list", []) output_message_list = bedrock_attrs.get("output_message_list", []) number_of_messages = ( len(input_message_list) + len(output_message_list) ) or None # If 0, attribute will be set to None and removed + input_message_content = " ".join([msg.get("content", "") for msg in input_message_list if msg.get("content")]) + prompt_tokens = ( + settings.ai_monitoring.llm_token_count_callback(model, input_message_content) + if settings.ai_monitoring.llm_token_count_callback and input_message_content + else response_prompt_tokens + ) + + output_message_content = " ".join([msg.get("content", "") for msg in output_message_list if msg.get("content")]) + completion_tokens = ( + settings.ai_monitoring.llm_token_count_callback(model, output_message_content) + if settings.ai_monitoring.llm_token_count_callback and output_message_content + else response_completion_tokens + ) + total_tokens = ( + prompt_tokens + completion_tokens if all([prompt_tokens, completion_tokens]) else response_total_tokens + ) + + all_token_counts = bool(prompt_tokens and completion_tokens and total_tokens) + chat_completion_summary_dict = { "vendor": "bedrock", "ingest_source": "Python", @@ -1078,6 +1228,12 @@ def handle_chat_completion_event(transaction, bedrock_attrs): "response.choices.finish_reason": bedrock_attrs.get("response.choices.finish_reason", None), "error": bedrock_attrs.get("error", None), } + + if all_token_counts: + chat_completion_summary_dict["response.usage.prompt_tokens"] = prompt_tokens + chat_completion_summary_dict["response.usage.completion_tokens"] = completion_tokens + chat_completion_summary_dict["response.usage.total_tokens"] = total_tokens + chat_completion_summary_dict.update(llm_metadata_dict) chat_completion_summary_dict = {k: v for k, v in chat_completion_summary_dict.items() if v is not None} transaction.record_custom_event("LlmChatCompletionSummary", chat_completion_summary_dict) @@ -1092,6 +1248,7 @@ def handle_chat_completion_event(transaction, bedrock_attrs): request_model=model, request_id=request_id, llm_metadata_dict=llm_metadata_dict, + all_token_counts=all_token_counts, response_id=response_id, ) diff --git a/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py b/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py index da9c5818e7..87dfa1f1b6 100644 --- a/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py +++ b/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py @@ -17,7 +17,7 @@ from conftest import BOTOCORE_VERSION from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -49,6 +49,9 @@ "duration": None, # Response time varies each test run "request.model": "anthropic.claude-3-sonnet-20240229-v1:0", "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "response.usage.prompt_tokens": 26, + "response.usage.completion_tokens": 100, + "response.usage.total_tokens": 126, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "max_tokens", @@ -70,6 +73,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -88,6 +92,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -106,6 +111,7 @@ "role": "assistant", "completion_id": None, "sequence": 2, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -189,7 +195,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model): - @validate_custom_events(add_token_count_to_events(chat_completion_expected_events)) + @validate_custom_events(add_token_counts_to_chat_events(chat_completion_expected_events)) # One summary event, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -476,46 +482,3 @@ def _test(): converse_invalid_model(loop, bedrock_converse_server) _test() - - -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_error_incorrect_access_key_with_token_count( - monkeypatch, bedrock_converse_server, loop, set_trace_info -): - """ - A request is made to the server with invalid credentials. botocore will reach out to the server and receive an - UnrecognizedClientException as a response. Information from the request will be parsed and reported in customer - events. The error response can also be parsed, and will be included as attributes on the recorded exception. - """ - - @validate_custom_events(add_token_count_to_events(chat_completion_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_chat_completion_incorrect_access_key_with_token_count", - scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], - rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion_incorrect_access_key_with_token_count") - def _test(): - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - converse_incorrect_access_key(loop, bedrock_converse_server, monkeypatch) - - _test() diff --git a/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py b/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py index 65cb276c77..159fe9ddda 100644 --- a/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py +++ b/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py @@ -34,7 +34,8 @@ ) from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_counts_to_chat_events, + add_token_count_streaming_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -206,7 +207,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model, expected_events, expected_metrics): - @validate_custom_events(add_token_count_to_events(expected_events)) + @validate_custom_events(add_token_counts_to_chat_events((add_token_count_streaming_events(expected_events)))) # One summary event, one user message, and one response message from the assistant @validate_custom_event_count(count=3) @validate_transaction_metrics( @@ -455,51 +456,6 @@ def _test(): _test() -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_error_incorrect_access_key_with_token( - monkeypatch, - bedrock_server, - exercise_model, - set_trace_info, - expected_invalid_access_key_error_events, - expected_metrics, -): - @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_chat_completion", - scoped_metrics=expected_metrics, - rollup_metrics=expected_metrics, - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion") - def _test(): - monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") - - with pytest.raises(_client_error): # not sure where this exception actually comes from - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - exercise_model(prompt="Invalid Token", temperature=0.7, max_tokens=100) - - _test() - - def invoke_model_malformed_request_body(loop, bedrock_server, response_streaming): async def _coro(): with pytest.raises(_client_error): @@ -798,58 +754,6 @@ async def _test(): loop.run_until_complete(_test()) -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_expected_streaming_error_events)) -@validate_custom_event_count(count=2) -@validate_error_trace_attributes( - _event_stream_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "error.message": "Malformed input request, please reformat your input and try again.", - "error.code": "ValidationException", - }, - }, - forgone_params={"agent": (), "intrinsic": (), "user": ("http.statusCode")}, -) -@validate_transaction_metrics( - name="test_bedrock_chat_completion", - scoped_metrics=[("Llm/completion/Bedrock/invoke_model_with_response_stream", 1)], - rollup_metrics=[("Llm/completion/Bedrock/invoke_model_with_response_stream", 1)], - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, -) -@background_task(name="test_bedrock_chat_completion") -def test_bedrock_chat_completion_error_streaming_exception_with_token_count(loop, bedrock_server, set_trace_info): - """ - Duplicate of test_bedrock_chat_completion_error_streaming_exception, but with token callback being set. - - See the original test for a description of the error case. - """ - - async def _test(): - with pytest.raises(_event_stream_error): - model = "amazon.titan-text-express-v1" - body = (chat_completion_payload_templates[model] % ("Streaming Exception", 0.7, 100)).encode("utf-8") - - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - response = await bedrock_server.invoke_model_with_response_stream( - body=body, modelId=model, accept="application/json", contentType="application/json" - ) - - body = response.get("body") - async for resp in body: - assert resp - - loop.run_until_complete(_test()) - - def test_bedrock_chat_completion_functions_marked_as_wrapped_for_sdk_compatibility(bedrock_server): assert bedrock_server._nr_wrapped diff --git a/tests/external_aiobotocore/test_bedrock_embeddings.py b/tests/external_aiobotocore/test_bedrock_embeddings.py index 96b930feb5..dacfbb4eed 100644 --- a/tests/external_aiobotocore/test_bedrock_embeddings.py +++ b/tests/external_aiobotocore/test_bedrock_embeddings.py @@ -27,7 +27,7 @@ ) from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_to_embedding_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -164,7 +164,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_embedding_with_token_count(set_trace_info, exercise_model, expected_events): - @validate_custom_events(add_token_count_to_events(expected_events)) + @validate_custom_events(add_token_count_to_embedding_events(expected_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_bedrock_embedding", @@ -289,45 +289,6 @@ def _test(): _test() -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_embedding_error_incorrect_access_key_with_token_count( - monkeypatch, bedrock_server, exercise_model, set_trace_info, expected_invalid_access_key_error_events -): - @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_embedding", - scoped_metrics=[("Llm/embedding/Bedrock/invoke_model", 1)], - rollup_metrics=[("Llm/embedding/Bedrock/invoke_model", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_embedding") - def _test(): - monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") - - with pytest.raises(_client_error): # not sure where this exception actually comes from - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - exercise_model(prompt="Invalid Token") - - _test() - - @reset_core_stats_engine() @validate_custom_events(embedding_expected_malformed_request_body_events) @validate_custom_event_count(count=1) diff --git a/tests/external_botocore/_test_bedrock_chat_completion.py b/tests/external_botocore/_test_bedrock_chat_completion.py index 155b6c993c..6b65af8cb2 100644 --- a/tests/external_botocore/_test_bedrock_chat_completion.py +++ b/tests/external_botocore/_test_bedrock_chat_completion.py @@ -97,6 +97,9 @@ "duration": None, # Response time varies each test run "request.model": "amazon.titan-text-express-v1", "response.model": "amazon.titan-text-express-v1", + "response.usage.completion_tokens": 32, + "response.usage.total_tokens": 44, + "response.usage.prompt_tokens": 12, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "FINISH", @@ -118,6 +121,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "amazon.titan-text-express-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -136,6 +140,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "amazon.titan-text-express-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -335,6 +340,9 @@ "duration": None, # Response time varies each test run "request.model": "meta.llama2-13b-chat-v1", "response.model": "meta.llama2-13b-chat-v1", + "response.usage.prompt_tokens": 17, + "response.usage.completion_tokens": 69, + "response.usage.total_tokens": 86, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "stop", @@ -356,6 +364,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "meta.llama2-13b-chat-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -374,6 +383,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "meta.llama2-13b-chat-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -919,6 +929,9 @@ "duration": None, # Response time varies each test run "request.model": "amazon.titan-text-express-v1", "response.model": "amazon.titan-text-express-v1", + "response.usage.completion_tokens": 35, + "response.usage.total_tokens": 47, + "response.usage.prompt_tokens": 12, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "FINISH", @@ -940,6 +953,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "amazon.titan-text-express-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -958,6 +972,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "amazon.titan-text-express-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -978,6 +993,9 @@ "duration": None, # Response time varies each test run "request.model": "anthropic.claude-instant-v1", "response.model": "anthropic.claude-instant-v1", + "response.usage.completion_tokens": 99, + "response.usage.prompt_tokens": 19, + "response.usage.total_tokens": 118, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "stop_sequence", @@ -999,6 +1017,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "anthropic.claude-instant-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -1017,6 +1036,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "anthropic.claude-instant-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -1038,6 +1058,9 @@ "duration": None, # Response time varies each test run "request.model": "cohere.command-text-v14", "response.model": "cohere.command-text-v14", + "response.usage.completion_tokens": 91, + "response.usage.total_tokens": 100, + "response.usage.prompt_tokens": 9, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "COMPLETE", @@ -1059,6 +1082,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "cohere.command-text-v14", "vendor": "bedrock", "ingest_source": "Python", @@ -1077,6 +1101,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "cohere.command-text-v14", "vendor": "bedrock", "ingest_source": "Python", @@ -1097,6 +1122,9 @@ "duration": None, # Response time varies each test run "request.model": "meta.llama2-13b-chat-v1", "response.model": "meta.llama2-13b-chat-v1", + "response.usage.prompt_tokens": 17, + "response.usage.completion_tokens": 100, + "response.usage.total_tokens": 117, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "length", @@ -1118,6 +1146,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "meta.llama2-13b-chat-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -1136,6 +1165,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "meta.llama2-13b-chat-v1", "vendor": "bedrock", "ingest_source": "Python", diff --git a/tests/external_botocore/_test_bedrock_embeddings.py b/tests/external_botocore/_test_bedrock_embeddings.py index f5c227b9c3..af544af001 100644 --- a/tests/external_botocore/_test_bedrock_embeddings.py +++ b/tests/external_botocore/_test_bedrock_embeddings.py @@ -33,6 +33,7 @@ "response.model": "amazon.titan-embed-text-v1", "request.model": "amazon.titan-embed-text-v1", "request_id": "11233989-07e8-4ecb-9ba6-79601ba6d8cc", + "response.usage.total_tokens": 6, "vendor": "bedrock", "ingest_source": "Python", }, @@ -52,6 +53,7 @@ "response.model": "amazon.titan-embed-g1-text-02", "request.model": "amazon.titan-embed-g1-text-02", "request_id": "b10ac895-eae3-4f07-b926-10b2866c55ed", + "response.usage.total_tokens": 6, "vendor": "bedrock", "ingest_source": "Python", }, diff --git a/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py b/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py index 94a88e7a56..9d005e46c5 100644 --- a/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py +++ b/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import json import os from io import BytesIO @@ -35,7 +36,8 @@ from conftest import BOTOCORE_VERSION from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_counts_to_chat_events, + add_token_count_streaming_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -129,6 +131,14 @@ def expected_events(model_id, response_streaming): return chat_completion_expected_events[model_id] +@pytest.fixture(scope="module") +def expected_events(model_id, response_streaming): + if response_streaming: + return chat_completion_streaming_expected_events[model_id] + else: + return chat_completion_expected_events[model_id] + + @pytest.fixture(scope="module") def expected_metrics(response_streaming): if response_streaming: @@ -200,7 +210,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model, expected_events, expected_metrics): - @validate_custom_events(add_token_count_to_events(expected_events)) + @validate_custom_events(add_token_counts_to_chat_events(add_token_count_streaming_events(expected_events))) # One summary event, one user message, and one response message from the assistant @validate_custom_event_count(count=3) @validate_transaction_metrics( @@ -438,49 +448,50 @@ def _test(): _test() -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_error_incorrect_access_key_with_token( - monkeypatch, - bedrock_server, - exercise_model, - set_trace_info, - expected_invalid_access_key_error_events, - expected_metrics, -): - @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_chat_completion", - scoped_metrics=expected_metrics, - rollup_metrics=expected_metrics, - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion") - def _test(): - monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") - - with pytest.raises(_client_error): # not sure where this exception actually comes from - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - exercise_model(prompt="Invalid Token", temperature=0.7, max_tokens=100) - - _test() +# +# @reset_core_stats_engine() +# @override_llm_token_callback_settings(llm_token_count_callback) +# def test_bedrock_chat_completion_error_incorrect_access_key_with_token( +# monkeypatch, +# bedrock_server, +# exercise_model, +# set_trace_info, +# expected_invalid_access_key_error_events, +# expected_metrics, +# ): +# @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events)) +# @validate_error_trace_attributes( +# _client_error_name, +# exact_attrs={ +# "agent": {}, +# "intrinsic": {}, +# "user": { +# "http.statusCode": 403, +# "error.message": "The security token included in the request is invalid.", +# "error.code": "UnrecognizedClientException", +# }, +# }, +# ) +# @validate_transaction_metrics( +# name="test_bedrock_chat_completion", +# scoped_metrics=expected_metrics, +# rollup_metrics=expected_metrics, +# custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], +# background_task=True, +# ) +# @background_task(name="test_bedrock_chat_completion") +# def _test(): +# monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") +# +# with pytest.raises(_client_error): # not sure where this exception actually comes from +# set_trace_info() +# add_custom_attribute("llm.conversation_id", "my-awesome-id") +# add_custom_attribute("llm.foo", "bar") +# add_custom_attribute("non_llm_attr", "python-agent") +# +# exercise_model(prompt="Invalid Token", temperature=0.7, max_tokens=100) +# +# _test() @reset_core_stats_engine() @@ -762,55 +773,6 @@ def _test(): _test() -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_error_streaming_exception_with_token_count(bedrock_server, set_trace_info): - """ - Duplicate of test_bedrock_chat_completion_error_streaming_exception, but with token callback being set. - - See the original test for a description of the error case. - """ - - @validate_custom_events(add_token_count_to_events(chat_completion_expected_streaming_error_events)) - @validate_custom_event_count(count=2) - @validate_error_trace_attributes( - _event_stream_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "error.message": "Malformed input request, please reformat your input and try again.", - "error.code": "ValidationException", - }, - }, - forgone_params={"agent": (), "intrinsic": (), "user": ("http.statusCode")}, - ) - @validate_transaction_metrics( - name="test_bedrock_chat_completion", - scoped_metrics=[("Llm/completion/Bedrock/invoke_model_with_response_stream", 1)], - rollup_metrics=[("Llm/completion/Bedrock/invoke_model_with_response_stream", 1)], - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion") - def _test(): - with pytest.raises(_event_stream_error): - model = "amazon.titan-text-express-v1" - body = (chat_completion_payload_templates[model] % ("Streaming Exception", 0.7, 100)).encode("utf-8") - - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - response = bedrock_server.invoke_model_with_response_stream( - body=body, modelId=model, accept="application/json", contentType="application/json" - ) - list(response["body"]) # Iterate - - _test() - - def test_bedrock_chat_completion_functions_marked_as_wrapped_for_sdk_compatibility(bedrock_server): assert bedrock_server._nr_wrapped diff --git a/tests/external_botocore/test_bedrock_embeddings.py b/tests/external_botocore/test_bedrock_embeddings.py index 417e24b2d9..de2cb201e7 100644 --- a/tests/external_botocore/test_bedrock_embeddings.py +++ b/tests/external_botocore/test_bedrock_embeddings.py @@ -28,7 +28,7 @@ from conftest import BOTOCORE_VERSION from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_to_embedding_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -161,7 +161,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_embedding_with_token_count(set_trace_info, exercise_model, expected_events): - @validate_custom_events(add_token_count_to_events(expected_events)) + @validate_custom_events(add_token_count_to_embedding_events(expected_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_bedrock_embedding", @@ -286,45 +286,6 @@ def _test(): _test() -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_embedding_error_incorrect_access_key_with_token_count( - monkeypatch, bedrock_server, exercise_model, set_trace_info, expected_invalid_access_key_error_events -): - @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_embedding", - scoped_metrics=[("Llm/embedding/Bedrock/invoke_model", 1)], - rollup_metrics=[("Llm/embedding/Bedrock/invoke_model", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_embedding") - def _test(): - monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") - - with pytest.raises(_client_error): # not sure where this exception actually comes from - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - exercise_model(prompt="Invalid Token") - - _test() - - @reset_core_stats_engine() def test_bedrock_embedding_error_malformed_request_body(bedrock_server, set_trace_info): """ diff --git a/tests/external_botocore/test_chat_completion_converse.py b/tests/external_botocore/test_chat_completion_converse.py index 96ead41dd7..2d38d6b4a4 100644 --- a/tests/external_botocore/test_chat_completion_converse.py +++ b/tests/external_botocore/test_chat_completion_converse.py @@ -17,7 +17,7 @@ from conftest import BOTOCORE_VERSION from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -49,6 +49,9 @@ "duration": None, # Response time varies each test run "request.model": "anthropic.claude-3-sonnet-20240229-v1:0", "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "response.usage.prompt_tokens": 26, + "response.usage.completion_tokens": 100, + "response.usage.total_tokens": 126, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "max_tokens", @@ -70,6 +73,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -88,6 +92,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -106,6 +111,7 @@ "role": "assistant", "completion_id": None, "sequence": 2, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -185,7 +191,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model): - @validate_custom_events(add_token_count_to_events(chat_completion_expected_events)) + @validate_custom_events(add_token_counts_to_chat_events(chat_completion_expected_events)) # One summary event, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -468,57 +474,3 @@ def _test(): assert response _test() - - -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_error_incorrect_access_key_with_token_count( - monkeypatch, bedrock_converse_server, exercise_model, set_trace_info -): - """ - A request is made to the server with invalid credentials. botocore will reach out to the server and receive an - UnrecognizedClientException as a response. Information from the request will be parsed and reported in customer - events. The error response can also be parsed, and will be included as attributes on the recorded exception. - """ - - @validate_custom_events(add_token_count_to_events(chat_completion_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_chat_completion_incorrect_access_key_with_token_count", - scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], - rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion_incorrect_access_key_with_token_count") - def _test(): - monkeypatch.setattr(bedrock_converse_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") - - with pytest.raises(_client_error): - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - message = [{"role": "user", "content": [{"text": "Invalid Token"}]}] - - response = bedrock_converse_server.converse( - modelId="anthropic.claude-3-sonnet-20240229-v1:0", - messages=message, - inferenceConfig={"temperature": 0.7, "maxTokens": 100}, - ) - - assert response - - _test() From 8109abc01cc52a39fc2b614e2dbbfbd99d643244 Mon Sep 17 00:00:00 2001 From: Uma Annamalai Date: Tue, 7 Oct 2025 14:27:17 -0700 Subject: [PATCH 4/6] Add safeguards when grabbing token counts. --- newrelic/hooks/external_botocore.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/newrelic/hooks/external_botocore.py b/newrelic/hooks/external_botocore.py index 748b2e5c94..a11064ac36 100644 --- a/newrelic/hooks/external_botocore.py +++ b/newrelic/hooks/external_botocore.py @@ -300,14 +300,11 @@ def extract_bedrock_titan_text_model_response(response_body, bedrock_attrs): response_body = json.loads(response_body) input_tokens = response_body.get("inputTextTokenCount", 0) - completion_tokens = sum(result["tokenCount"] for result in response_body.get("results", [])) - if not completion_tokens: - completion_tokens = 0 - + completion_tokens = sum(result.get("tokenCount", 0) for result in response_body.get("results", [])) total_tokens = input_tokens + completion_tokens output_message_list = [ - {"role": "assistant", "content": result["outputText"]} for result in response_body.get("results", []) + {"role": "assistant", "content": result.get("outputText")} for result in response_body.get("results", []) ] bedrock_attrs["response.choices.finish_reason"] = response_body["results"][0]["completionReason"] From 74198ea6927a37961eb6dd72d6969117778303b5 Mon Sep 17 00:00:00 2001 From: Uma Annamalai Date: Mon, 10 Nov 2025 09:51:21 -0800 Subject: [PATCH 5/6] Remove extra None defaults. --- newrelic/hooks/external_botocore.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/newrelic/hooks/external_botocore.py b/newrelic/hooks/external_botocore.py index a11064ac36..08497fe50d 100644 --- a/newrelic/hooks/external_botocore.py +++ b/newrelic/hooks/external_botocore.py @@ -1132,7 +1132,7 @@ def handle_embedding_event(transaction, bedrock_attrs): model = bedrock_attrs.get("model", None) input_ = bedrock_attrs.get("input") - response_total_tokens = bedrock_attrs.get("response.usage.total_tokens", None) + response_total_tokens = bedrock_attrs.get("response.usage.total_tokens") total_tokens = ( settings.ai_monitoring.llm_token_count_callback(model, input_) @@ -1179,9 +1179,9 @@ def handle_chat_completion_event(transaction, bedrock_attrs): response_id = bedrock_attrs.get("response_id", None) model = bedrock_attrs.get("model", None) - response_prompt_tokens = bedrock_attrs.get("response.usage.prompt_tokens", None) - response_completion_tokens = bedrock_attrs.get("response.usage.completion_tokens", None) - response_total_tokens = bedrock_attrs.get("response.usage.total_tokens", None) + response_prompt_tokens = bedrock_attrs.get("response.usage.prompt_tokens") + response_completion_tokens = bedrock_attrs.get("response.usage.completion_tokens") + response_total_tokens = bedrock_attrs.get("response.usage.total_tokens") input_message_list = bedrock_attrs.get("input_message_list", []) output_message_list = bedrock_attrs.get("output_message_list", []) From 81cfd2ad881bbb02067e04ffdaedca4d4dd96326 Mon Sep 17 00:00:00 2001 From: Uma Annamalai Date: Tue, 11 Nov 2025 14:37:22 -0800 Subject: [PATCH 6/6] Cleanup default None checks. --- newrelic/hooks/external_botocore.py | 48 ++++++++++++++--------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/newrelic/hooks/external_botocore.py b/newrelic/hooks/external_botocore.py index 08497fe50d..a3da091284 100644 --- a/newrelic/hooks/external_botocore.py +++ b/newrelic/hooks/external_botocore.py @@ -344,7 +344,7 @@ def extract_bedrock_mistral_text_model_streaming_response(response_body, bedrock "output_message_list", [{"role": "assistant", "content": ""}] ) bedrock_attrs["output_message_list"][0]["content"] += outputs[0].get("text", "") - bedrock_attrs["response.choices.finish_reason"] = outputs[0].get("stop_reason", None) + bedrock_attrs["response.choices.finish_reason"] = outputs[0].get("stop_reason") return bedrock_attrs @@ -367,7 +367,7 @@ def extract_bedrock_titan_text_model_streaming_response(response_body, bedrock_a bedrock_attrs["output_message_list"] = messages = bedrock_attrs.get("output_message_list", []) messages.append({"role": "assistant", "content": response_body["outputText"]}) - bedrock_attrs["response.choices.finish_reason"] = response_body.get("completionReason", None) + bedrock_attrs["response.choices.finish_reason"] = response_body.get("completionReason") # Extract token information invocation_metrics = response_body.get("amazon-bedrock-invocationMetrics", {}) @@ -660,8 +660,8 @@ def handle_bedrock_exception( input_message_list = [] bedrock_attrs["input_message_list"] = input_message_list - bedrock_attrs["request.max_tokens"] = kwargs.get("inferenceConfig", {}).get("maxTokens", None) - bedrock_attrs["request.temperature"] = kwargs.get("inferenceConfig", {}).get("temperature", None) + bedrock_attrs["request.max_tokens"] = kwargs.get("inferenceConfig", {}).get("maxTokens") + bedrock_attrs["request.temperature"] = kwargs.get("inferenceConfig", {}).get("temperature") try: request_extractor(request_body, bedrock_attrs) @@ -970,8 +970,8 @@ def extract_bedrock_converse_attrs(kwargs, response, response_headers, model, sp "trace_id": trace_id, "response.choices.finish_reason": response.get("stopReason"), "output_message_list": output_message_list, - "request.max_tokens": kwargs.get("inferenceConfig", {}).get("maxTokens", None), - "request.temperature": kwargs.get("inferenceConfig", {}).get("temperature", None), + "request.max_tokens": kwargs.get("inferenceConfig", {}).get("maxTokens"), + "request.temperature": kwargs.get("inferenceConfig", {}).get("temperature"), "input_message_list": input_message_list, "response.usage.prompt_tokens": response_prompt_tokens, "response.usage.completion_tokens": response_completion_tokens, @@ -1126,10 +1126,10 @@ def handle_embedding_event(transaction, bedrock_attrs): custom_attrs_dict = transaction._custom_params llm_metadata_dict = {key: value for key, value in custom_attrs_dict.items() if key.startswith("llm.")} - span_id = bedrock_attrs.get("span_id", None) - trace_id = bedrock_attrs.get("trace_id", None) - request_id = bedrock_attrs.get("request_id", None) - model = bedrock_attrs.get("model", None) + span_id = bedrock_attrs.get("span_id") + trace_id = bedrock_attrs.get("trace_id") + request_id = bedrock_attrs.get("request_id") + model = bedrock_attrs.get("model") input_ = bedrock_attrs.get("input") response_total_tokens = bedrock_attrs.get("response.usage.total_tokens") @@ -1147,11 +1147,11 @@ def handle_embedding_event(transaction, bedrock_attrs): "span_id": span_id, "trace_id": trace_id, "request_id": request_id, - "duration": bedrock_attrs.get("duration", None), + "duration": bedrock_attrs.get("duration"), "request.model": model, "response.model": model, "response.usage.total_tokens": total_tokens, - "error": bedrock_attrs.get("error", None), + "error": bedrock_attrs.get("error"), } embedding_dict.update(llm_metadata_dict) @@ -1173,11 +1173,11 @@ def handle_chat_completion_event(transaction, bedrock_attrs): llm_context_attrs = getattr(transaction, "_llm_context_attrs", None) if llm_context_attrs: llm_metadata_dict.update(llm_context_attrs) - span_id = bedrock_attrs.get("span_id", None) - trace_id = bedrock_attrs.get("trace_id", None) - request_id = bedrock_attrs.get("request_id", None) - response_id = bedrock_attrs.get("response_id", None) - model = bedrock_attrs.get("model", None) + span_id = bedrock_attrs.get("span_id") + trace_id = bedrock_attrs.get("trace_id") + request_id = bedrock_attrs.get("request_id") + response_id = bedrock_attrs.get("response_id") + model = bedrock_attrs.get("model") response_prompt_tokens = bedrock_attrs.get("response.usage.prompt_tokens") response_completion_tokens = bedrock_attrs.get("response.usage.completion_tokens") @@ -1189,14 +1189,14 @@ def handle_chat_completion_event(transaction, bedrock_attrs): len(input_message_list) + len(output_message_list) ) or None # If 0, attribute will be set to None and removed - input_message_content = " ".join([msg.get("content", "") for msg in input_message_list if msg.get("content")]) + input_message_content = " ".join([msg.get("content") for msg in input_message_list if msg.get("content")]) prompt_tokens = ( settings.ai_monitoring.llm_token_count_callback(model, input_message_content) if settings.ai_monitoring.llm_token_count_callback and input_message_content else response_prompt_tokens ) - output_message_content = " ".join([msg.get("content", "") for msg in output_message_list if msg.get("content")]) + output_message_content = " ".join([msg.get("content") for msg in output_message_list if msg.get("content")]) completion_tokens = ( settings.ai_monitoring.llm_token_count_callback(model, output_message_content) if settings.ai_monitoring.llm_token_count_callback and output_message_content @@ -1216,14 +1216,14 @@ def handle_chat_completion_event(transaction, bedrock_attrs): "trace_id": trace_id, "request_id": request_id, "response_id": response_id, - "duration": bedrock_attrs.get("duration", None), - "request.max_tokens": bedrock_attrs.get("request.max_tokens", None), - "request.temperature": bedrock_attrs.get("request.temperature", None), + "duration": bedrock_attrs.get("duration"), + "request.max_tokens": bedrock_attrs.get("request.max_tokens"), + "request.temperature": bedrock_attrs.get("request.temperature"), "request.model": model, "response.model": model, # Duplicate data required by the UI "response.number_of_messages": number_of_messages, - "response.choices.finish_reason": bedrock_attrs.get("response.choices.finish_reason", None), - "error": bedrock_attrs.get("error", None), + "response.choices.finish_reason": bedrock_attrs.get("response.choices.finish_reason"), + "error": bedrock_attrs.get("error"), } if all_token_counts: