@@ -345,9 +345,7 @@ async def _complete_chat_stream(self, messages: List[dict], is_thinking: bool, *
345345 if chunk .choices [0 ].finish_reason :
346346 break
347347
348- os .makedirs (".recorded_inference" , exist_ok = True )
349- with open (file , "wb" ) as f :
350- f .write (dill .dumps (chunks ))
348+ await self ._store_cache (file , chunks )
351349 else :
352350 chunks = await self .router ().acompletion (self .name , messages = messages , stream = True , ** kwargs )
353351
@@ -389,9 +387,7 @@ async def _complete_chat(self, messages: List[dict], **kwargs):
389387 self .logger .info (f"Finished processing messages with model '{ self .original_name } '" )
390388
391389 if _cache_inference :
392- os .makedirs (".recorded_inference" , exist_ok = True )
393- with open (file , "wb" ) as f :
394- f .write (dill .dumps (response ))
390+ await self ._store_cache (file , response )
395391
396392 end_think_tag = response .choices [0 ].message .content .find ("</think>" )
397393 if end_think_tag != - 1 :
@@ -424,6 +420,11 @@ async def embeddings(self, text: List[str], **kwargs) -> List[List[float]]:
424420
425421 return [embedding ["embedding" ] for embedding in embedding .data ]
426422
423+ async def _store_cache (self , file , data ):
424+ os .makedirs (".recorded_inference" , exist_ok = True )
425+ with open (file , "wb" ) as f :
426+ f .write (dill .dumps (data ))
427+
427428 def _hash_completion_request (self , model_name : str , messages : List [dict ], stream : bool , kwargs : dict ) -> str :
428429 request = json .dumps ({"model" : model_name , "messages" : messages , "stream" : stream , ** kwargs }, sort_keys = True )
429430 return hashlib .sha256 (request .encode ("utf-8" )).hexdigest ()
0 commit comments