Skip to content

Commit 06e07f4

Browse files
Neehar DuvvuriNeehar Duvvuri
authored andcommitted
fix relevance and prompty test
1 parent 4f05fb5 commit 06e07f4

File tree

2 files changed

+10
-2
lines changed

2 files changed

+10
-2
lines changed

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_relevance/_relevance.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,8 @@ async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[float, str]]: # t
177177
eval_input["query"] = reformat_conversation_history(eval_input["query"], logger)
178178
if not isinstance(eval_input["response"], str):
179179
eval_input["response"] = reformat_agent_response(eval_input["response"], logger)
180-
llm_output = await self._flow(timeout=self._LLM_CALL_TIMEOUT, **eval_input)
180+
result = await self._flow(timeout=self._LLM_CALL_TIMEOUT, **eval_input)
181+
llm_output = result["llm_output"]
181182
score = math.nan
182183

183184
if isinstance(llm_output, dict):
@@ -191,6 +192,13 @@ async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[float, str]]: # t
191192
f"{self._result_key}_reason": reason,
192193
f"{self._result_key}_result": binary_result,
193194
f"{self._result_key}_threshold": self._threshold,
195+
f"{self._result_key}_prompt_tokens": result.get("input_token_count", 0),
196+
f"{self._result_key}_completion_tokens": result.get("output_token_count", 0),
197+
f"{self._result_key}_total_tokens": result.get("total_token_count", 0),
198+
f"{self._result_key}_finish_reason": result.get("finish_reason", ""),
199+
f"{self._result_key}_model": result.get("model_id", ""),
200+
f"{self._result_key}_sample_input": result.get("sample_input", ""),
201+
f"{self._result_key}_sample_output": result.get("sample_output", ""),
194202
}
195203

196204
binary_result = self._get_binary_result(score)

sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_prompty_async.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ async def test_first_match_text_json(self, prompty_config: Dict[str, Any], outpu
169169
# Should have only first name, and answer
170170
assert "lastName" not in llm_output
171171
else:
172-
assert "lastName" in result
172+
assert "lastName" in llm_output
173173
assert llm_output["lastName"] == "Doh"
174174

175175
@pytest.mark.asyncio

0 commit comments

Comments
 (0)