Skip to content

Commit d0c3eea

Browse files
authored
Rename Tool Success to Tool Call Success (#43810)
* rename * update import * updates * updates * black
1 parent bb35e87 commit d0c3eea

File tree

10 files changed

+328
-95
lines changed

10 files changed

+328
-95
lines changed

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
from ._evaluators._tool_call_accuracy import ToolCallAccuracyEvaluator
3434
from ._evaluators._document_retrieval import DocumentRetrievalEvaluator
3535
from ._evaluators._tool_output_utilization import _ToolOutputUtilizationEvaluator
36+
from ._evaluators._tool_call_success import _ToolCallSuccessEvaluator
3637
from ._model_configurations import (
3738
AzureAIProject,
3839
AzureOpenAIModelConfiguration,
@@ -133,6 +134,7 @@ def lazy_import():
133134
"UngroundedAttributesEvaluator",
134135
"ToolCallAccuracyEvaluator",
135136
"_ToolOutputUtilizationEvaluator",
137+
"_ToolCallSuccessEvaluator",
136138
"AzureOpenAIGrader",
137139
"AzureOpenAILabelGrader",
138140
"AzureOpenAIStringCheckGrader",

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
"tool_output_utilization",
2121
"task_completion",
2222
"tool_input_accuracy",
23-
"tool_success",
23+
"tool_call_success",
2424
"tool_call_accuracy",
2525
]
2626

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_constants.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,6 @@ class _EvaluatorMetricMapping:
143143
"hate_unfairness": ["hate_unfairness"],
144144
"tool_input_accuracy": ["tool_input_accuracy"],
145145
"task_completion": ["task_completion"],
146-
"tool_success": ["tool_success"],
147146
"tool_call_success": ["tool_call_success"],
148147
"tool_selection": ["tool_selection"],
149148
"tool_output_utilization": ["tool_output_utilization"],

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_eval_mapping.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,14 @@
1212
# Import all evals
1313
from azure.ai.evaluation._evaluators._eci._eci import ECIEvaluator
1414
from azure.ai.evaluation._evaluators._task_completion import _TaskCompletionEvaluator
15-
from azure.ai.evaluation._evaluators._tool_input_accuracy import _ToolInputAccuracyEvaluator
15+
from azure.ai.evaluation._evaluators._tool_input_accuracy import (
16+
_ToolInputAccuracyEvaluator,
17+
)
1618
from azure.ai.evaluation._evaluators._tool_selection import _ToolSelectionEvaluator
17-
from azure.ai.evaluation._evaluators._tool_success import _ToolSuccessEvaluator
18-
from azure.ai.evaluation._evaluators._task_navigation_efficiency import _TaskNavigationEfficiencyEvaluator
19+
from azure.ai.evaluation._evaluators._tool_call_success import _ToolCallSuccessEvaluator
20+
from azure.ai.evaluation._evaluators._task_navigation_efficiency import (
21+
_TaskNavigationEfficiencyEvaluator,
22+
)
1923
from azure.ai.evaluation import (
2024
BleuScoreEvaluator,
2125
CodeVulnerabilityEvaluator,
@@ -77,7 +81,7 @@
7781
ToolCallAccuracyEvaluator: "tool_call_accuracy",
7882
_ToolInputAccuracyEvaluator: "tool_input_accuracy",
7983
_ToolSelectionEvaluator: "tool_selection",
80-
_ToolSuccessEvaluator: "tool_success",
84+
_ToolCallSuccessEvaluator: "tool_call_success",
8185
UngroundedAttributesEvaluator: "ungrounded_attributes",
8286
ViolenceEvaluator: "violence",
8387
}

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_success/__init__.py renamed to sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_call_success/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22
# Copyright (c) Microsoft Corporation. All rights reserved.
33
# ---------------------------------------------------------
44

5-
from ._tool_success import _ToolSuccessEvaluator
5+
from ._tool_call_success import _ToolCallSuccessEvaluator
66

7-
__all__ = ["_ToolSuccessEvaluator"]
7+
__all__ = ["_ToolCallSuccessEvaluator"]

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_success/_tool_success.py renamed to sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_call_success/_tool_call_success.py

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,12 @@
66
import logging
77
from typing import Dict, Union, List, Optional
88
from typing_extensions import overload, override
9-
from azure.ai.evaluation._exceptions import EvaluationException, ErrorBlame, ErrorCategory, ErrorTarget
9+
from azure.ai.evaluation._exceptions import (
10+
EvaluationException,
11+
ErrorBlame,
12+
ErrorCategory,
13+
ErrorTarget,
14+
)
1015
from azure.ai.evaluation._evaluators._common import PromptyEvaluatorBase
1116
from azure.ai.evaluation._common._experimental import experimental
1217

@@ -15,8 +20,8 @@
1520

1621

1722
@experimental
18-
class _ToolSuccessEvaluator(PromptyEvaluatorBase[Union[str, float]]):
19-
"""The Tool Success evaluator determines whether tool calls done by an AI agent includes failures or not.
23+
class _ToolCallSuccessEvaluator(PromptyEvaluatorBase[Union[str, float]]):
24+
"""The Tool Call Success evaluator determines whether tool calls done by an AI agent includes failures or not.
2025
2126
This evaluator focuses solely on tool call results and tool definitions, disregarding user's query to
2227
the agent, conversation history and agent's final response. Although tool definitions is optional,
@@ -36,34 +41,34 @@ class _ToolSuccessEvaluator(PromptyEvaluatorBase[Union[str, float]]):
3641
3742
.. admonition:: Example:
3843
.. literalinclude:: ../samples/evaluation_samples_evaluate.py
39-
:start-after: [START tool_success_evaluator]
40-
:end-before: [END tool_success_evaluator]
44+
:start-after: [START tool_call_success_evaluator]
45+
:end-before: [END tool_call_success_evaluator]
4146
:language: python
4247
:dedent: 8
43-
:caption: Initialize and call a _ToolSuccessEvaluator with a tool definitions and response.
48+
:caption: Initialize and call a _ToolCallSuccessEvaluator with a tool definitions and response.
4449
4550
.. admonition:: Example using Azure AI Project URL:
4651
4752
.. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
48-
:start-after: [START tool_success_evaluator]
49-
:end-before: [END tool_success_evaluator]
53+
:start-after: [START tool_call_success_evaluator]
54+
:end-before: [END tool_call_success_evaluator]
5055
:language: python
5156
:dedent: 8
52-
:caption: Initialize and call a _ToolSuccessEvaluator using Azure AI Project URL in the following
57+
:caption: Initialize and call a _ToolCallSuccessEvaluator using Azure AI Project URL in the following
5358
format https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
5459
5560
"""
5661

57-
_PROMPTY_FILE = "tool_success.prompty"
58-
_RESULT_KEY = "tool_success"
62+
_PROMPTY_FILE = "tool_call_success.prompty"
63+
_RESULT_KEY = "tool_call_success"
5964
_OPTIONAL_PARAMS = ["tool_definitions"]
6065

61-
id = "azureai://built-in/evaluators/tool_success"
66+
id = "azureai://built-in/evaluators/tool_call_success"
6267
"""Evaluator identifier, experimental and to be used only with evaluation in cloud."""
6368

6469
@override
6570
def __init__(self, model_config, *, credential=None, **kwargs):
66-
"""Initialize the Tool Success evaluator."""
71+
"""Initialize the Tool Call Success evaluator."""
6772
current_dir = os.path.dirname(__file__)
6873
prompty_path = os.path.join(current_dir, self._PROMPTY_FILE)
6974
super().__init__(
@@ -86,7 +91,7 @@ def __call__(
8691
"""Evaluate tool call success for a given response, and optionally tool definitions.
8792
8893
Example with list of messages:
89-
evaluator = _ToolSuccessEvaluator(model_config)
94+
evaluator = _ToolCallSuccessEvaluator(model_config)
9095
response = [{'createdAt': 1700000070, 'run_id': '0', 'role': 'assistant',
9196
'content': [{'type': 'text', 'text': '**Day 1:** Morning: Visit Louvre Museum (9 AM - 12 PM)...'}]}]
9297
@@ -97,7 +102,7 @@ def __call__(
97102
:paramtype response: Union[str, List[dict]]
98103
:keyword tool_definitions: Optional tool definitions to use for evaluation.
99104
:paramtype tool_definitions: Union[dict, List[dict]]
100-
:return: A dictionary with the tool success evaluation results.
105+
:return: A dictionary with the Tool Call Success evaluation results.
101106
:rtype: Dict[str, Union[str, float]]
102107
"""
103108

@@ -116,7 +121,7 @@ def __call__( # pylint: disable=docstring-missing-param
116121

117122
@override
118123
async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[str, float]]: # type: ignore[override]
119-
"""Do Tool Success evaluation.
124+
"""Do Tool Call Success evaluation.
120125
121126
:param eval_input: The input to the evaluator. Expected to contain whatever inputs are
122127
needed for the _flow method
@@ -126,19 +131,19 @@ async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[str, float]]: # t
126131
"""
127132
if "response" not in eval_input:
128133
raise EvaluationException(
129-
message="response is a required input to the Tool Success evaluator.",
130-
internal_message="response is a required input to the Tool Success evaluator.",
134+
message="response is a required input to the Tool Call Success evaluator.",
135+
internal_message="response is a required input to the Tool Call Success evaluator.",
131136
blame=ErrorBlame.USER_ERROR,
132137
category=ErrorCategory.MISSING_FIELD,
133-
target=ErrorTarget.TOOL_SUCCESS_EVALUATOR,
138+
target=ErrorTarget.TOOL_CALL_SUCCESS_EVALUATOR,
134139
)
135140
if eval_input["response"] is None or eval_input["response"] == []:
136141
raise EvaluationException(
137-
message="response cannot be None or empty for the Tool Success evaluator.",
138-
internal_message="response cannot be None or empty for the Tool Success evaluator.",
142+
message="response cannot be None or empty for the Tool Call Success evaluator.",
143+
internal_message="response cannot be None or empty for the Tool Call Success evaluator.",
139144
blame=ErrorBlame.USER_ERROR,
140145
category=ErrorCategory.INVALID_VALUE,
141-
target=ErrorTarget.TOOL_SUCCESS_EVALUATOR,
146+
target=ErrorTarget.TOOL_CALL_SUCCESS_EVALUATOR,
142147
)
143148

144149
eval_input["tool_calls"] = _reformat_tool_calls_results(eval_input["response"], logger)
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
---
2-
name: Tool Success
2+
name: Tool Call Success
33
description: Evaluates whether a Tool call was successful or resulted in a technical error
44
model:
55
api: chat

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_exceptions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ class ErrorTarget(Enum):
8585
SIMILARITY_EVALUATOR = "SimilarityEvaluator"
8686
FLUENCY_EVALUATOR = "FluencyEvaluator"
8787
RETRIEVAL_EVALUATOR = "RetrievalEvaluator"
88-
TOOL_SUCCESS_EVALUATOR = "_ToolSuccessEvaluator"
88+
TOOL_CALL_SUCCESS_EVALUATOR = "_ToolCallSuccessEvaluator"
8989
TASK_ADHERENCE_EVALUATOR = "TaskAdherenceEvaluator"
9090
TASK_COMPLETION_EVALUATOR = "_TaskCompletionEvaluator"
9191
INDIRECT_ATTACK_EVALUATOR = "IndirectAttackEvaluator"

0 commit comments

Comments
 (0)