run black

Neehar Duvvuri · Neehar Duvvuri · commit 4f05fb54e0c0 · 2025-10-17T15:03:58.000-04:00
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py
@@ -318,8 +318,12 @@ def _get_token_count_columns_to_exclude(df: pd.DataFrame) -> List[str]:
         col
         for col in df.columns
         if (
-            any(col.endswith(f"{metric}_prompt_tokens") or col.endswith(f"{metric}_completion_tokens") or col.endswith(f"{metric}_total_tokens")
-            for metric in all_known_metrics)
+            any(
+                col.endswith(f"{metric}_prompt_tokens")
+                or col.endswith(f"{metric}_completion_tokens")
+                or col.endswith(f"{metric}_total_tokens")
+                for metric in all_known_metrics
+            )
         )
     ]
 
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_prompty_eval.py
@@ -133,9 +133,7 @@ async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[float, str]]:  # t
                 target=ErrorTarget.CONVERSATION,
             )
         # Call the prompty flow to get the evaluation result.
-        prompty_output_dict = await self._flow(
-            timeout=self._LLM_CALL_TIMEOUT, **eval_input
-        )
+        prompty_output_dict = await self._flow(timeout=self._LLM_CALL_TIMEOUT, **eval_input)
 
         score = math.nan
         if prompty_output_dict:
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_legacy/prompty/_utils.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_legacy/prompty/_utils.py
@@ -534,12 +534,12 @@ async def format_stream(llm_response: AsyncStream[ChatCompletionChunk]) -> Async
         "finish_reason": "",
         "model_id": "",
         "sample_input": "",
-        "sample_output": ""
+        "sample_output": "",
     }
 
     if not is_first_choice:
         to_ret["llm_output"] = response
-        return to_ret # we don't actually use this code path since streaming is not used, so set token counts to 0
+        return to_ret  # we don't actually use this code path since streaming is not used, so set token counts to 0
 
     is_json_format = isinstance(response_format, dict) and response_format.get("type") == "json_object"
     if isinstance(response, AsyncStream):
@@ -555,22 +555,26 @@ async def format_stream(llm_response: AsyncStream[ChatCompletionChunk]) -> Async
             response.usage.completion_tokens if response.usage and response.usage.completion_tokens else 0
         )
         total_token_count = response.usage.total_tokens if response.usage and response.usage.total_tokens else 0
-        finish_reason = response.choices[0].finish_reason if response.choices and response.choices[0].finish_reason else ""
+        finish_reason = (
+            response.choices[0].finish_reason if response.choices and response.choices[0].finish_reason else ""
+        )
         model_id = response.model if response.model else ""
-        sample_output_list = [{"role": response.choices[0].message.role, "content": response.choices[0].message.content}] if (response.choices and response.choices[0].message.content
-                                                                                                                              and response.choices[0].message.role) else []
+        sample_output_list = (
+            [{"role": response.choices[0].message.role, "content": response.choices[0].message.content}]
+            if (response.choices and response.choices[0].message.content and response.choices[0].message.role)
+            else []
+        )
         sample_output = json.dumps(sample_output_list)
         input_str = f"{json.dumps(inputs)}" if inputs else ""
         if inputs and len(inputs) > 0:
-            sample_input_json = []         
+            sample_input_json = []
             msg = ChatCompletionUserMessageParam(
                 role="user",
                 content=input_str,
             )
             sample_input_json.append(msg)
             sample_input = json.dumps(sample_input_json)
 
-
     # When calling function/tool, function_call/tool_call response will be returned as a field in message,
     # so we need return message directly. Otherwise, we only return content.
     # https://platform.openai.com/docs/api-reference/chat/object#chat/object-choices
@@ -589,6 +593,7 @@ async def format_stream(llm_response: AsyncStream[ChatCompletionChunk]) -> Async
     to_ret["sample_output"] = sample_output
     return to_ret
 
+
 def openai_error_retryable(
     error: OpenAIError, retry: int, entity_retry: List[int], max_entity_retries: int
 ) -> Tuple[bool, float]:
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_prompty_async.py b/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_prompty_async.py
@@ -184,9 +184,7 @@ async def test_first_match_text_json_missing(self, prompty_config: Dict[str, Any
     async def test_first_match_text_json_streaming(self, prompty_config: Dict[str, Any]):
         prompty_config["model"]["parameters"]["stream"] = True
         prompty = AsyncPrompty(JSON_PROMPTY, **prompty_config)
-        result = await prompty(
-            question="What is the capital of France?", firstName="Barbra", lastName="Streisand"
-        )
+        result = await prompty(question="What is the capital of France?", firstName="Barbra", lastName="Streisand")
         assert isinstance(result, dict)
         llm_output = result["llm_output"]
         assert isinstance(llm_output, Mapping)
@@ -198,9 +196,7 @@ async def test_first_match_text_json_streaming(self, prompty_config: Dict[str, A
     async def test_full_text(self, prompty_config: Dict[str, Any]):
         prompty_config["model"]["response"] = "full"
         prompty = AsyncPrompty(BASIC_PROMPTY, **prompty_config)
-        result = await prompty(
-            question="What is the capital of France?", firstName="Barbra", lastName="Streisand"
-        )
+        result = await prompty(question="What is the capital of France?", firstName="Barbra", lastName="Streisand")
         assert isinstance(result, dict)
         llm_output = result["llm_output"]
         assert isinstance(llm_output, ChatCompletion)

Original file line number	Diff line number	Diff line change
`@@ -318,8 +318,12 @@ def _get_token_count_columns_to_exclude(df: pd.DataFrame) -> List[str]:`
`318`	`318`	`col`
`319`	`319`	`for col in df.columns`
`320`	`320`	`if (`
`321`		`- any(col.endswith(f"{metric}_prompt_tokens") or col.endswith(f"{metric}_completion_tokens") or col.endswith(f"{metric}_total_tokens")`
`322`		`- for metric in all_known_metrics)`
	`321`	`+ any(`
	`322`	`+ col.endswith(f"{metric}_prompt_tokens")`
	`323`	`+ or col.endswith(f"{metric}_completion_tokens")`
	`324`	`+ or col.endswith(f"{metric}_total_tokens")`
	`325`	`+ for metric in all_known_metrics`
	`326`	`+ )`
`323`	`327`	`)`
`324`	`328`	`]`
`325`	`329`