@@ -27,7 +27,7 @@ async def quality_response_async_mock(*args, **kwargs):
2727
2828
2929async def quality_no_response_async_mock ():
30- return "1"
30+ return { "llm_output" : "1" }
3131
3232
3333@pytest .mark .usefixtures ("mock_model_config" )
@@ -73,9 +73,21 @@ def test_similarity_evaluator_keys(self, mock_model_config):
7373 )
7474 assert result ["similarity" ] == result ["gpt_similarity" ] == 1
7575 # Updated assertion to expect 4 keys instead of 2
76- assert len (result ) == 4
76+ assert len (result ) == 11
7777 # Verify all expected keys are present
78- assert set (result .keys ()) == {"similarity" , "gpt_similarity" , "similarity_result" , "similarity_threshold" }
78+ assert set (result .keys ()) == {
79+ "similarity" ,
80+ "gpt_similarity" ,
81+ "similarity_result" ,
82+ "similarity_threshold" ,
83+ "similarity_prompt_tokens" ,
84+ "similarity_completion_tokens" ,
85+ "similarity_total_tokens" ,
86+ "similarity_finish_reason" ,
87+ "similarity_model" ,
88+ "similarity_sample_input" ,
89+ "similarity_sample_output" ,
90+ }
7991
8092 def test_retrieval_evaluator_keys (self , mock_model_config ):
8193 retrieval_eval = RetrievalEvaluator (model_config = mock_model_config )
0 commit comments