1717from azure .core .credentials_async import AsyncTokenCredential
1818from azure .ai .projects .models import AgentReference , PromptAgentDefinition
1919from azure .ai .projects .models import FileSearchTool , AzureAISearchAgentTool , Tool , AgentVersionObject , AzureAISearchToolResource , AISearchIndexResource
20+
21+ from azure .ai .projects .models import (
22+ PromptAgentDefinition ,
23+ EvaluationRule ,
24+ ContinuousEvaluationRuleAction ,
25+ EvaluationRuleFilter ,
26+ EvaluationRuleEventType ,
27+ EvaluatorCategory ,
28+ EvaluatorDefinitionType
29+ )
30+
31+
2032from openai import AsyncOpenAI
2133from dotenv import load_dotenv
2234
@@ -174,6 +186,95 @@ async def create_agent(ai_project: AIProjectClient,
174186 )
175187 return agent
176188
189+ async def initialize_eval (project_client : AIProjectClient , agent : AgentVersionObject ):
190+ print ("Creating a single evaluator version - Prompt based (json style)" )
191+ prompt_evaluator = await project_client .evaluators .create_version (
192+ name = "my_custom_evaluator_prompt" ,
193+ evaluator_version = {
194+ "name" : "my_custom_evaluator_prompt" ,
195+ "categories" : [EvaluatorCategory .QUALITY ],
196+ "display_name" : "my_custom_evaluator_prompt" ,
197+ "description" : "Custom evaluator to for groundedness" ,
198+ "definition" : {
199+ "type" : EvaluatorDefinitionType .PROMPT ,
200+ "prompt_text" : """
201+ You are a Groundedness Evaluator.
202+
203+ Your task is to evaluate how well the given response is grounded in the provided ground truth.
204+ Groundedness means the response's statements are factually supported by the ground truth.
205+ Evaluate factual alignment only — ignore grammar, fluency, or completeness.
206+
207+ ---
208+
209+ ### Input:
210+ Query:
211+ {query}
212+
213+ Response:
214+ {response}
215+
216+ Ground Truth:
217+ {ground_truth}
218+
219+ ---
220+
221+ ### Scoring Scale (1-5):
222+ 5 → Fully grounded. All claims supported by ground truth.
223+ 4 → Mostly grounded. Minor unsupported details.
224+ 3 → Partially grounded. About half the claims supported.
225+ 2 → Mostly ungrounded. Only a few details supported.
226+ 1 → Not grounded. Almost all information unsupported.
227+
228+ ---
229+
230+ ### Output should be Integer:
231+ <integer from 1 to 5>
232+ """ ,
233+ "init_parameters" : {
234+ "type" : "object" ,
235+ "properties" : {"deployment_name" : {"type" : "string" }, "threshold" : {"type" : "number" }},
236+ "required" : ["deployment_name" ],
237+ },
238+ "data_schema" : {
239+ "type" : "object" ,
240+ "properties" : {
241+ "query" : {"type" : "string" },
242+ "response" : {"type" : "string" },
243+ "ground_truth" : {"type" : "string" },
244+ },
245+ "required" : ["query" , "response" , "ground_truth" ],
246+ },
247+ "metrics" : {
248+ "tool_selection" : {
249+ "type" : "ordinal" ,
250+ "desirable_direction" : "increase" ,
251+ "min_value" : 1 ,
252+ "max_value" : 5 ,
253+ }
254+ },
255+ },
256+ },
257+ )
258+
259+ print (f"Evaluator version created (id: { prompt_evaluator .id } , name: { prompt_evaluator .name } )" )
260+
261+ print ("Creating continuous evaluation rule to run evaluator on agent responses" )
262+ continuous_eval_rule = await project_client .evaluation_rules .create_or_update (
263+ id = "my-continuous-eval-rule" ,
264+ evaluation_rule = EvaluationRule (
265+ display_name = "My Continuous Eval Rule" ,
266+ description = "An eval rule that runs on agent response completions" ,
267+ action = ContinuousEvaluationRuleAction (eval_id = prompt_evaluator .id , max_hourly_runs = 10 ),
268+ event_type = EvaluationRuleEventType .RESPONSE_COMPLETED ,
269+ filter = EvaluationRuleFilter (agent_name = agent .name ),
270+ enabled = True ,
271+ ),
272+ )
273+ print (
274+ f"Continuous Evaluation Rule created (id: { continuous_eval_rule .id } , name: { continuous_eval_rule .display_name } )"
275+ )
276+
277+
177278
178279async def initialize_resources ():
179280 try :
@@ -196,7 +297,6 @@ async def initialize_resources():
196297 agent_version = agentID .split (":" )[1 ]
197298 agent_obj = await ai_project .agents .retrieve_version (agent_name , agent_version )
198299 logger .info (f"Found agent by ID: { agent_obj .id } " )
199- return
200300 except Exception as e :
201301 logger .warning (
202302 "Could not retrieve agent by AZURE_EXISTING_AGENT_ID = "
@@ -222,6 +322,8 @@ async def initialize_resources():
222322
223323 os .environ ["AZURE_EXISTING_AGENT_ID" ] = agent_obj .id
224324
325+ await initialize_eval (ai_project , agent_obj )
326+
225327 except Exception as e :
226328 logger .info ("Error creating agent: {e}" , exc_info = True )
227329 raise RuntimeError (f"Failed to create the agent: { e } " )
0 commit comments