66import logging
77from typing import Dict , Union , List , Optional
88from typing_extensions import overload , override
9- from azure .ai .evaluation ._exceptions import EvaluationException , ErrorBlame , ErrorCategory , ErrorTarget
9+ from azure .ai .evaluation ._exceptions import (
10+ EvaluationException ,
11+ ErrorBlame ,
12+ ErrorCategory ,
13+ ErrorTarget ,
14+ )
1015from azure .ai .evaluation ._evaluators ._common import PromptyEvaluatorBase
1116from azure .ai .evaluation ._common ._experimental import experimental
1217
1520
1621
1722@experimental
18- class _ToolSuccessEvaluator (PromptyEvaluatorBase [Union [str , float ]]):
19- """The Tool Success evaluator determines whether tool calls done by an AI agent includes failures or not.
23+ class _ToolCallSuccessEvaluator (PromptyEvaluatorBase [Union [str , float ]]):
24+ """The Tool Call Success evaluator determines whether tool calls done by an AI agent includes failures or not.
2025
2126 This evaluator focuses solely on tool call results and tool definitions, disregarding user's query to
2227 the agent, conversation history and agent's final response. Although tool definitions is optional,
@@ -36,34 +41,34 @@ class _ToolSuccessEvaluator(PromptyEvaluatorBase[Union[str, float]]):
3641
3742 .. admonition:: Example:
3843 .. literalinclude:: ../samples/evaluation_samples_evaluate.py
39- :start-after: [START tool_success_evaluator ]
40- :end-before: [END tool_success_evaluator ]
44+ :start-after: [START tool_call_success_evaluator ]
45+ :end-before: [END tool_call_success_evaluator ]
4146 :language: python
4247 :dedent: 8
43- :caption: Initialize and call a _ToolSuccessEvaluator with a tool definitions and response.
48+ :caption: Initialize and call a _ToolCallSuccessEvaluator with a tool definitions and response.
4449
4550 .. admonition:: Example using Azure AI Project URL:
4651
4752 .. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
48- :start-after: [START tool_success_evaluator ]
49- :end-before: [END tool_success_evaluator ]
53+ :start-after: [START tool_call_success_evaluator ]
54+ :end-before: [END tool_call_success_evaluator ]
5055 :language: python
5156 :dedent: 8
52- :caption: Initialize and call a _ToolSuccessEvaluator using Azure AI Project URL in the following
57+ :caption: Initialize and call a _ToolCallSuccessEvaluator using Azure AI Project URL in the following
5358 format https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
5459
5560 """
5661
57- _PROMPTY_FILE = "tool_success .prompty"
58- _RESULT_KEY = "tool_success "
62+ _PROMPTY_FILE = "tool_call_success .prompty"
63+ _RESULT_KEY = "tool_call_success "
5964 _OPTIONAL_PARAMS = ["tool_definitions" ]
6065
61- id = "azureai://built-in/evaluators/tool_success "
66+ id = "azureai://built-in/evaluators/tool_call_success "
6267 """Evaluator identifier, experimental and to be used only with evaluation in cloud."""
6368
6469 @override
6570 def __init__ (self , model_config , * , credential = None , ** kwargs ):
66- """Initialize the Tool Success evaluator."""
71+ """Initialize the Tool Call Success evaluator."""
6772 current_dir = os .path .dirname (__file__ )
6873 prompty_path = os .path .join (current_dir , self ._PROMPTY_FILE )
6974 super ().__init__ (
@@ -86,7 +91,7 @@ def __call__(
8691 """Evaluate tool call success for a given response, and optionally tool definitions.
8792
8893 Example with list of messages:
89- evaluator = _ToolSuccessEvaluator (model_config)
94+ evaluator = _ToolCallSuccessEvaluator (model_config)
9095 response = [{'createdAt': 1700000070, 'run_id': '0', 'role': 'assistant',
9196 'content': [{'type': 'text', 'text': '**Day 1:** Morning: Visit Louvre Museum (9 AM - 12 PM)...'}]}]
9297
@@ -97,7 +102,7 @@ def __call__(
97102 :paramtype response: Union[str, List[dict]]
98103 :keyword tool_definitions: Optional tool definitions to use for evaluation.
99104 :paramtype tool_definitions: Union[dict, List[dict]]
100- :return: A dictionary with the tool success evaluation results.
105+ :return: A dictionary with the Tool Call Success evaluation results.
101106 :rtype: Dict[str, Union[str, float]]
102107 """
103108
@@ -116,7 +121,7 @@ def __call__( # pylint: disable=docstring-missing-param
116121
117122 @override
118123 async def _do_eval (self , eval_input : Dict ) -> Dict [str , Union [str , float ]]: # type: ignore[override]
119- """Do Tool Success evaluation.
124+ """Do Tool Call Success evaluation.
120125
121126 :param eval_input: The input to the evaluator. Expected to contain whatever inputs are
122127 needed for the _flow method
@@ -126,19 +131,19 @@ async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[str, float]]: # t
126131 """
127132 if "response" not in eval_input :
128133 raise EvaluationException (
129- message = "response is a required input to the Tool Success evaluator." ,
130- internal_message = "response is a required input to the Tool Success evaluator." ,
134+ message = "response is a required input to the Tool Call Success evaluator." ,
135+ internal_message = "response is a required input to the Tool Call Success evaluator." ,
131136 blame = ErrorBlame .USER_ERROR ,
132137 category = ErrorCategory .MISSING_FIELD ,
133- target = ErrorTarget .TOOL_SUCCESS_EVALUATOR ,
138+ target = ErrorTarget .TOOL_CALL_SUCCESS_EVALUATOR ,
134139 )
135140 if eval_input ["response" ] is None or eval_input ["response" ] == []:
136141 raise EvaluationException (
137- message = "response cannot be None or empty for the Tool Success evaluator." ,
138- internal_message = "response cannot be None or empty for the Tool Success evaluator." ,
142+ message = "response cannot be None or empty for the Tool Call Success evaluator." ,
143+ internal_message = "response cannot be None or empty for the Tool Call Success evaluator." ,
139144 blame = ErrorBlame .USER_ERROR ,
140145 category = ErrorCategory .INVALID_VALUE ,
141- target = ErrorTarget .TOOL_SUCCESS_EVALUATOR ,
146+ target = ErrorTarget .TOOL_CALL_SUCCESS_EVALUATOR ,
142147 )
143148
144149 eval_input ["tool_calls" ] = _reformat_tool_calls_results (eval_input ["response" ], logger )
0 commit comments