Skip to content

Commit 201af13

Browse files
Neehar DuvvuriNeehar Duvvuri
authored andcommitted
merge
2 parents 36e45f6 + eec6eb2 commit 201af13

File tree

4 files changed

+19
-12
lines changed

4 files changed

+19
-12
lines changed

sdk/evaluation/azure-ai-evaluation/assets.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22
"AssetsRepo": "Azure/azure-sdk-assets",
33
"AssetsRepoPrefixPath": "python",
44
"TagPrefix": "python/evaluation/azure-ai-evaluation",
5-
"Tag": "python/evaluation/azure-ai-evaluation_d7b00f22b8"
5+
"Tag": "python/evaluation/azure-ai-evaluation_5bef6dc713"
66
}

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/rai_service.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,13 @@ def _parse_content_harm_response(
533533
except Exception: # pylint: disable=broad-exception-caught
534534
harm_response = response[metric_name]
535535

536+
total_tokens = 0
537+
prompt_tokens = 0
538+
completion_tokens = 0
539+
finish_reason = ""
540+
sample_input = ""
541+
sample_output = ""
542+
model = ""
536543
if harm_response != "" and isinstance(harm_response, dict):
537544
# check if "output" is one key in harm_response
538545
if "output" in harm_response:

sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_mass_evaluate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,7 @@ def test_evaluate_multimodal(
388388
# imageurls_with_target has 1 extra column: outputs.conversation due to the target mapping
389389
assert len(row_result_df.keys()) >= 33
390390
else:
391-
assert len(row_result_df.keys()) == 32
391+
assert len(row_result_df.keys()) == 88
392392
known_keys = [
393393
"outputs.content_safety.hate_unfairness",
394394
"outputs.content_safety.hate_unfairness_score",

sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1156,21 +1156,21 @@ def run_test():
11561156
assert "metrics" in converted_results
11571157
assert "rows" in converted_results
11581158
assert "studio_url" in converted_results
1159-
assert "evaluation_results_list" in converted_results
1160-
assert "evaluation_summary" in converted_results
1159+
assert "_evaluation_results_list" in converted_results
1160+
assert "_evaluation_summary" in converted_results
11611161

11621162
# Verify metrics preserved
11631163
assert converted_results["metrics"]["overall_score"] == 0.75
11641164

11651165
# Verify studio URL preserved
11661166
assert converted_results["studio_url"] == "https://test-studio.com"
11671167

1168-
# Verify evaluation_results_list is same as rows (converted format)
1169-
assert len(converted_results["evaluation_results_list"]) == len(test_rows)
1170-
assert len(converted_results["evaluation_results_list"]) == len(converted_results["rows"])
1168+
# Verify _evaluation_results_list is same as rows (converted format)
1169+
assert len(converted_results["_evaluation_results_list"]) == len(test_rows)
1170+
assert len(converted_results["_evaluation_results_list"]) == len(converted_results["rows"])
11711171

11721172
# Verify conversion structure for each row
1173-
for i, converted_row in enumerate(converted_results["evaluation_results_list"]):
1173+
for i, converted_row in enumerate(converted_results["_evaluation_results_list"]):
11741174
# Check RunOutputItem structure
11751175
assert "object" in converted_row
11761176
assert converted_row["object"] == "eval.run.output_item"
@@ -1213,8 +1213,8 @@ def run_test():
12131213
assert "name" in result
12141214
assert "metric" in result
12151215

1216-
# Verify evaluation summary structure
1217-
summary = converted_results["evaluation_summary"]
1216+
# Verify _evaluation_summary structure
1217+
summary = converted_results["_evaluation_summary"]
12181218
assert "result_counts" in summary
12191219
assert "per_model_usage" in summary
12201220
assert "per_testing_criteria_results" in summary
@@ -1262,8 +1262,8 @@ def run_test():
12621262
empty_converted = empty_results
12631263

12641264
assert len(empty_converted["rows"]) == 0
1265-
assert len(empty_converted["evaluation_results_list"]) == 0
1266-
assert empty_converted["evaluation_summary"]["result_counts"]["total"] == 0
1265+
assert len(empty_converted["_evaluation_results_list"]) == 0
1266+
assert empty_converted["_evaluation_summary"]["result_counts"]["total"] == 0
12671267

12681268

12691269
@pytest.mark.unittest

0 commit comments

Comments
 (0)