Skip to content

Commit b8b4fa7

Browse files
authored
Allow to update item_schema from external (#43863)
* Allow to update item_schema from external for some use cases like data synthesizing * Enhance copy to deepcopy * Fix linter run Black
1 parent 37670b3 commit b8b4fa7

File tree

2 files changed

+131
-4
lines changed

2 files changed

+131
-4
lines changed

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate_aoai.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -152,14 +152,14 @@ def _begin_single_aoai_evaluation(
152152
grader_name_list = []
153153
grader_list = []
154154

155-
data_source = {}
156-
data_source_config = {}
155+
data_source: Dict[str, Any] = {}
156+
data_source_config: Dict[str, Any] = {}
157157

158158
if kwargs.get("data_source_config") is not None:
159-
data_source_config = kwargs.get("data_source_config")
159+
data_source_config = kwargs.get("data_source_config", {})
160160

161161
if kwargs.get("data_source") is not None:
162-
data_source = kwargs.get("data_source")
162+
data_source = kwargs.get("data_source", {})
163163

164164
# It's expected that all graders supplied for a single eval run use the same credentials
165165
# so grab a client from the first grader.
@@ -176,6 +176,10 @@ def _begin_single_aoai_evaluation(
176176

177177
# Create eval group
178178
LOGGER.info(f"AOAI: Creating eval group with {len(grader_list)} testing criteria...")
179+
180+
# Combine with the item schema with generated data outside Eval SDK
181+
_combine_item_schemas(data_source_config, kwargs)
182+
179183
eval_group_info = client.evals.create(
180184
data_source_config=data_source_config, testing_criteria=grader_list, metadata={"is_foundry_eval": "true"}
181185
)
@@ -212,6 +216,20 @@ def _begin_single_aoai_evaluation(
212216
)
213217

214218

219+
def _combine_item_schemas(data_source_config: Dict[str, Any], kwargs: Dict[str, Any]) -> None:
220+
if "item_schema" not in kwargs or "properties" not in kwargs["item_schema"]:
221+
return
222+
223+
if "item_schema" in data_source_config:
224+
item_schema = kwargs["item_schema"]["required"] if "required" in kwargs["item_schema"] else []
225+
for key in kwargs["item_schema"]["properties"]:
226+
if key not in data_source_config["item_schema"]["properties"]:
227+
data_source_config["item_schema"]["properties"][key] = kwargs["item_schema"]["properties"][key]
228+
229+
if key in item_schema:
230+
data_source_config["item_schema"]["required"].append(key)
231+
232+
215233
def _get_evaluation_run_results(all_run_info: List[OAIEvalRunCreationInfo]) -> Tuple[pd.DataFrame, Dict[str, Any]]:
216234
"""
217235
Get the results of an OAI evaluation run, formatted in a way that is easy for the rest of the evaluation
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
import pytest
2+
import copy
3+
from azure.ai.evaluation._evaluate._evaluate_aoai import _combine_item_schemas
4+
5+
6+
@pytest.fixture
7+
def default_data_source_config():
8+
return {
9+
"type": "custom",
10+
"item_schema": {
11+
"type": "object",
12+
"properties": {
13+
"id": {"type": "string"},
14+
"text": {"type": "string"},
15+
},
16+
"required": ["id", "text"],
17+
},
18+
"include_sample_schema": False,
19+
}
20+
21+
22+
class TestCombineItemSchemas:
23+
"""Unit tests for _combine_item_schemas"""
24+
25+
def test_combine_item_schemas_success(self, default_data_source_config):
26+
data_source_config = copy.deepcopy(default_data_source_config)
27+
kwargs = {
28+
"item_schema": {
29+
"properties": {
30+
"metadata": {"type": "object"},
31+
"timestamp": {"type": "string", "format": "date-time"},
32+
},
33+
"required": ["metadata"],
34+
}
35+
}
36+
_combine_item_schemas(data_source_config, kwargs)
37+
38+
expected_properties = {
39+
"id": {"type": "string"},
40+
"text": {"type": "string"},
41+
"metadata": {"type": "object"},
42+
"timestamp": {"type": "string", "format": "date-time"},
43+
}
44+
expected_required = ["id", "text", "metadata"]
45+
assert data_source_config["item_schema"]["properties"] == expected_properties
46+
assert data_source_config["item_schema"]["required"] == expected_required
47+
48+
def test_combine_item_schemas_without_item_schema(self, default_data_source_config):
49+
data_source_config = copy.deepcopy(default_data_source_config)
50+
51+
expected_properties = {
52+
"id": {"type": "string"},
53+
"text": {"type": "string"},
54+
}
55+
expected_required = ["id", "text"]
56+
57+
# No "item_schema" in kwargs
58+
kwargs = {}
59+
_combine_item_schemas(data_source_config, kwargs)
60+
assert data_source_config["item_schema"]["properties"] == expected_properties
61+
assert data_source_config["item_schema"]["required"] == expected_required
62+
63+
# "item_schema" without "properties" in kwargs
64+
kwargs = {"item_schema": {}}
65+
_combine_item_schemas(data_source_config, kwargs)
66+
assert data_source_config["item_schema"]["properties"] == expected_properties
67+
assert data_source_config["item_schema"]["required"] == expected_required
68+
69+
def test_combine_item_schemas_with_empty_external_properties(self, default_data_source_config):
70+
data_source_config = copy.deepcopy(default_data_source_config)
71+
kwargs = {
72+
"item_schema": {
73+
"properties": {},
74+
"required": [],
75+
}
76+
}
77+
_combine_item_schemas(data_source_config, kwargs)
78+
79+
expected_properties = {
80+
"id": {"type": "string"},
81+
"text": {"type": "string"},
82+
}
83+
expected_required = ["id", "text"]
84+
85+
assert data_source_config["item_schema"]["properties"] == expected_properties
86+
assert data_source_config["item_schema"]["required"] == expected_required
87+
88+
def test_combine_item_schemas_with_external_properties_without_required(self, default_data_source_config):
89+
data_source_config = copy.deepcopy(default_data_source_config)
90+
kwargs = {
91+
"item_schema": {
92+
"properties": {
93+
"metadata": {"type": "object"},
94+
"timestamp": {"type": "string", "format": "date-time"},
95+
},
96+
}
97+
}
98+
_combine_item_schemas(data_source_config, kwargs)
99+
100+
expected_properties = {
101+
"id": {"type": "string"},
102+
"text": {"type": "string"},
103+
"metadata": {"type": "object"},
104+
"timestamp": {"type": "string", "format": "date-time"},
105+
}
106+
expected_required = ["id", "text"]
107+
108+
assert data_source_config["item_schema"]["properties"] == expected_properties
109+
assert data_source_config["item_schema"]["required"] == expected_required

0 commit comments

Comments
 (0)