Skip to content

Commit 29d44b3

Browse files
Neehar DuvvuriNeehar Duvvuri
authored andcommitted
change prompty output to dict
1 parent d0323df commit 29d44b3

File tree

2 files changed

+59
-31
lines changed

2 files changed

+59
-31
lines changed

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,12 +133,20 @@ async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[float, str]]: # t
133133
target=ErrorTarget.CONVERSATION,
134134
)
135135
# Call the prompty flow to get the evaluation result.
136-
llm_output, input_token_count, output_token_count, total_token_count, finish_reason, model_id, sample_input, sample_output = await self._flow(
136+
prompty_output_dict = await self._flow(
137137
timeout=self._LLM_CALL_TIMEOUT, **eval_input
138138
)
139139

140140
score = math.nan
141-
if llm_output:
141+
if prompty_output_dict:
142+
llm_output = prompty_output_dict.get("llm_output", "")
143+
input_token_count = prompty_output_dict.get("input_token_count", 0)
144+
output_token_count = prompty_output_dict.get("output_token_count", 0)
145+
total_token_count = prompty_output_dict.get("total_token_count", 0)
146+
finish_reason = prompty_output_dict.get("finish_reason", "")
147+
model_id = prompty_output_dict.get("model_id", "")
148+
sample_input = prompty_output_dict.get("sample_input", "")
149+
sample_output = prompty_output_dict.get("sample_output", "")
142150
# Parse out score and reason from evaluators known to possess them.
143151
if self._result_key in PROMPT_BASED_REASON_EVALUATORS:
144152
score, reason = parse_quality_evaluator_reason_score(llm_output)

sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_prompty_async.py

Lines changed: 49 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -81,13 +81,15 @@ def test_load_images(self, prompty_config: Dict[str, Any]):
8181
@pytest.mark.asyncio
8282
async def test_first_match_text(self, prompty_config: Dict[str, Any]):
8383
prompty = AsyncPrompty(COHERENCE_PROMPTY, **prompty_config)
84-
result, _, _, _, _, _, _, _ = await prompty(query="What is the capital of France?", response="France capital Paris")
84+
result = await prompty(query="What is the capital of France?", response="France capital Paris")
85+
assert isinstance(result, dict)
86+
llm_output = result["llm_output"]
8587

8688
# We expect an output string that contains <S0>chain of thoughts</S0> <S1>explanation<S1> <S2>int_score</S2>
87-
assert isinstance(result, str)
89+
assert isinstance(llm_output, str)
8890
matched = re.match(
8991
r"^\s*<S0>(.*)</S0>\s*<S1>(.*)</S1>\s*<S2>(.*)</S2>\s*$",
90-
result,
92+
llm_output,
9193
re.MULTILINE | re.DOTALL,
9294
)
9395
assert matched
@@ -99,19 +101,27 @@ async def test_first_match_text(self, prompty_config: Dict[str, Any]):
99101
@pytest.mark.asyncio
100102
async def test_first_match_image(self, prompty_config: Dict[str, Any]):
101103
prompty = AsyncPrompty(IMAGE_PROMPTY, **prompty_config)
102-
result, _, _, _, _, _, _, _ = await prompty(image="image1.jpg", question="What is this a picture of?")
103-
assert isinstance(result, str)
104-
assert "apple" in result.lower()
104+
result = await prompty(image="image1.jpg", question="What is this a picture of?")
105+
assert isinstance(result, dict)
106+
llm_output = result["llm_output"]
107+
assert isinstance(llm_output, AsyncGenerator)
108+
combined = ""
109+
async for chunk in llm_output:
110+
assert isinstance(chunk, str)
111+
combined += chunk
112+
assert "apple" in combined
105113

106114
@pytest.mark.asyncio
107115
async def test_first_match_text_streaming(self, prompty_config: Dict[str, Any]):
108116
prompty_config["model"]["parameters"]["stream"] = True
109117
prompty = AsyncPrompty(BASIC_PROMPTY, **prompty_config)
110-
result, _, _, _, _, _, _, _ = await prompty(firstName="Bob", question="What is the capital of France?")
118+
result = await prompty(firstName="Bob", question="What is the capital of France?")
119+
assert isinstance(result, dict)
120+
llm_output = result["llm_output"]
111121

112-
assert isinstance(result, AsyncGenerator)
122+
assert isinstance(llm_output, AsyncGenerator)
113123
combined = ""
114-
async for chunk in result:
124+
async for chunk in llm_output:
115125
assert isinstance(chunk, str)
116126
combined += chunk
117127

@@ -122,11 +132,13 @@ async def test_first_match_text_streaming(self, prompty_config: Dict[str, Any]):
122132
async def test_first_match_image_streaming(self, prompty_config: Dict[str, Any]):
123133
prompty_config["model"]["parameters"]["stream"] = True
124134
prompty = AsyncPrompty(IMAGE_PROMPTY, **prompty_config)
125-
result, _, _, _, _, _, _, _ = await prompty(image="image1.jpg", question="What is this a picture of?")
135+
result = await prompty(image="image1.jpg", question="What is this a picture of?")
136+
assert isinstance(result, dict)
137+
llm_output = result["llm_output"]
126138

127-
assert isinstance(result, AsyncGenerator)
139+
assert isinstance(llm_output, AsyncGenerator)
128140
combined = ""
129-
async for chunk in result:
141+
async for chunk in llm_output:
130142
assert isinstance(chunk, str)
131143
combined += chunk
132144

@@ -143,20 +155,22 @@ async def test_first_match_image_streaming(self, prompty_config: Dict[str, Any])
143155
async def test_first_match_text_json(self, prompty_config: Dict[str, Any], outputs: Mapping[str, Any]):
144156
prompty_config["outputs"] = outputs
145157
prompty = AsyncPrompty(JSON_PROMPTY, **prompty_config)
146-
result, _, _, _, _, _, _, _ = await prompty(question="What is the capital of France?")
158+
result = await prompty(question="What is the capital of France?")
159+
assert isinstance(result, dict)
160+
llm_output = result["llm_output"]
147161

148-
assert isinstance(result, Mapping)
149-
assert "firstName" in result
150-
assert result["firstName"] == "John"
151-
assert "answer" in result
152-
assert "Paris" in result["answer"]
162+
assert isinstance(llm_output, Mapping)
163+
assert "firstName" in llm_output
164+
assert llm_output["firstName"] == "John"
165+
assert "answer" in llm_output
166+
assert "Paris" in llm_output["answer"]
153167

154168
if outputs:
155-
# Should ahve only first name, and answer
156-
assert "lastName" not in result
169+
# Should have only first name, and answer
170+
assert "lastName" not in llm_output
157171
else:
158172
assert "lastName" in result
159-
assert result["lastName"] == "Doh"
173+
assert llm_output["lastName"] == "Doh"
160174

161175
@pytest.mark.asyncio
162176
async def test_first_match_text_json_missing(self, prompty_config: Dict[str, Any]):
@@ -170,20 +184,26 @@ async def test_first_match_text_json_missing(self, prompty_config: Dict[str, Any
170184
async def test_first_match_text_json_streaming(self, prompty_config: Dict[str, Any]):
171185
prompty_config["model"]["parameters"]["stream"] = True
172186
prompty = AsyncPrompty(JSON_PROMPTY, **prompty_config)
173-
result, _, _, _, _, _, _, _ = await prompty(
187+
result = await prompty(
174188
question="What is the capital of France?", firstName="Barbra", lastName="Streisand"
175189
)
176-
assert isinstance(result, Mapping)
177-
assert result["firstName"] == "Barbra"
178-
assert result["lastName"] == "Streisand"
179-
assert "Paris" in result["answer"]
190+
assert isinstance(result, dict)
191+
llm_output = result["llm_output"]
192+
assert isinstance(llm_output, Mapping)
193+
assert llm_output["firstName"] == "Barbra"
194+
assert llm_output["lastName"] == "Streisand"
195+
assert "Paris" in llm_output["answer"]
180196

181197
@pytest.mark.asyncio
182198
async def test_full_text(self, prompty_config: Dict[str, Any]):
183199
prompty_config["model"]["response"] = "full"
184200
prompty = AsyncPrompty(BASIC_PROMPTY, **prompty_config)
185-
result, _, _, _, _, _, _, _ = await prompty(firstName="Bob", question="What is the capital of France?")
186-
assert isinstance(result, ChatCompletion)
187-
response: str = result.choices[0].message.content or ""
201+
result = await prompty(
202+
question="What is the capital of France?", firstName="Barbra", lastName="Streisand"
203+
)
204+
assert isinstance(result, dict)
205+
llm_output = result["llm_output"]
206+
assert isinstance(llm_output, ChatCompletion)
207+
response: str = llm_output.choices[0].message.content or ""
188208
assert "Bob" in response
189209
assert "Paris" in response

0 commit comments

Comments
 (0)