chore(python): deduplicate some cache code in model.py

davide-baldo · davide-baldo · commit b74991166799 · 2025-06-30T19:34:37.000Z
diff --git a/implementations/python/python/ockam/models/model.py b/implementations/python/python/ockam/models/model.py
@@ -345,9 +345,7 @@ async def _complete_chat_stream(self, messages: List[dict], is_thinking: bool, *
                     if chunk.choices[0].finish_reason:
                         break
 
-                os.makedirs(".recorded_inference", exist_ok=True)
-                with open(file, "wb") as f:
-                    f.write(dill.dumps(chunks))
+                await self._store_cache(file, chunks)
             else:
                 chunks = await self.router().acompletion(self.name, messages=messages, stream=True, **kwargs)
 
@@ -389,9 +387,7 @@ async def _complete_chat(self, messages: List[dict], **kwargs):
         self.logger.info(f"Finished processing messages with model '{self.original_name}'")
 
         if _cache_inference:
-            os.makedirs(".recorded_inference", exist_ok=True)
-            with open(file, "wb") as f:
-                f.write(dill.dumps(response))
+            await self._store_cache(file, response)
 
         end_think_tag = response.choices[0].message.content.find("</think>")
         if end_think_tag != -1:
@@ -424,6 +420,11 @@ async def embeddings(self, text: List[str], **kwargs) -> List[List[float]]:
 
         return [embedding["embedding"] for embedding in embedding.data]
 
+    async def _store_cache(self, file, data):
+        os.makedirs(".recorded_inference", exist_ok=True)
+        with open(file, "wb") as f:
+            f.write(dill.dumps(data))
+
     def _hash_completion_request(self, model_name: str, messages: List[dict], stream: bool, kwargs: dict) -> str:
         request = json.dumps({"model": model_name, "messages": messages, "stream": stream, **kwargs}, sort_keys=True)
         return hashlib.sha256(request.encode("utf-8")).hexdigest()