@@ -198,6 +198,7 @@ def __init__(
198198 A Llama instance.
199199 """
200200 self .verbose = verbose
201+ self ._stack = contextlib .ExitStack ()
201202
202203 set_verbose (verbose )
203204
@@ -365,8 +366,6 @@ def __init__(
365366 if not os .path .exists (model_path ):
366367 raise ValueError (f"Model path does not exist: { model_path } " )
367368
368- self ._stack = contextlib .ExitStack ()
369-
370369 self ._model = self ._stack .enter_context (
371370 contextlib .closing (
372371 _LlamaModel (
@@ -420,6 +419,15 @@ def __init__(
420419 raise RuntimeError (
421420 f"Failed to initialize LoRA adapter from lora path: { self .lora_path } "
422421 )
422+
423+ def free_lora_adapter ():
424+ if self ._lora_adapter is None :
425+ return
426+ llama_cpp .llama_lora_adapter_free (self ._lora_adapter )
427+ self ._lora_adapter = None
428+
429+ self ._stack .callback (free_lora_adapter )
430+
423431 assert self ._ctx .ctx is not None
424432 if llama_cpp .llama_lora_adapter_set (
425433 self ._ctx .ctx , self ._lora_adapter , self .lora_scale
@@ -2085,14 +2093,9 @@ def pooling_type(self) -> str:
20852093
20862094 def close (self ) -> None :
20872095 """Explicitly free the model from memory."""
2088- if hasattr (self ,'_stack' ):
2089- if self ._stack is not None :
2090- self ._stack .close ()
2096+ self ._stack .close ()
20912097
20922098 def __del__ (self ) -> None :
2093- if hasattr (self ,'_lora_adapter' ):
2094- if self ._lora_adapter is not None :
2095- llama_cpp .llama_lora_adapter_free (self ._lora_adapter )
20962099 self .close ()
20972100
20982101 @staticmethod
0 commit comments