@@ -2681,7 +2681,6 @@ def generate_streaming(tools, functions, function_call, prompt):
26812681 usage = completion ["usage" ],
26822682 )
26832683
2684-
26852684class Llava15ChatHandler :
26862685 DEFAULT_SYSTEM_MESSAGE : Optional [str ] = (
26872686 "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions."
@@ -2722,9 +2721,9 @@ class Llava15ChatHandler:
27222721 "{% endif %}"
27232722 )
27242723
2725- def __init__ (self , clip_model_path : str , llama_model : Optional [ llama . Llama ] = None , verbose : bool = True ):
2724+ def __init__ (self , clip_model_path : str , verbose : bool = True ):
27262725 import llama_cpp .mtmd_cpp as mtmd_cpp
2727-
2726+
27282727 self .clip_model_path = clip_model_path
27292728 self .verbose = verbose
27302729 self ._mtmd_cpp = mtmd_cpp
@@ -2769,15 +2768,6 @@ def mtmd_free():
27692768
27702769 self ._exit_stack .callback (mtmd_free )
27712770
2772- def __call__ (self , * args , ** kwargs ):
2773- if self .clip_ctx is None :
2774- # Initialize MTMD context with the llama model from the first argument
2775- if len (args ) > 0 and isinstance (args [0 ], llama .Llama ):
2776- self .initialize_mtmd_context (args [0 ])
2777- else :
2778- raise ValueError ("MTMD context not initialized. Please call initialize_mtmd_context with a llama model first." )
2779- return super ().__call__ (* args , ** kwargs )
2780-
27812771 def load_image (self , image_url : str ) -> bytes :
27822772 return self ._load_image (image_url )
27832773
@@ -3062,26 +3052,6 @@ def __call__(
30623052 )
30633053 return _convert_completion_to_chat (completion_or_chunks , stream = stream )
30643054
3065- def eval_image (self , llama : llama .Llama , image_url : str ):
3066- image_bytes = self .load_image (image_url )
3067- embed = self ._embed_image_bytes (image_bytes , llama .context_params .n_threads_batch )
3068- if llama .n_tokens + embed .contents .n_image_pos > llama .n_ctx ():
3069- raise ValueError (
3070- f"Prompt exceeds n_ctx: { llama .n_tokens + embed .contents .n_image_pos } > { llama .n_ctx ()} "
3071- )
3072- n_past = ctypes .c_int (llama .n_tokens )
3073- n_past_p = ctypes .pointer (n_past )
3074- with suppress_stdout_stderr (disable = self .verbose ):
3075- self ._mtmd_cpp .mtmd_cpp_eval_image_embed (
3076- llama .ctx ,
3077- embed ,
3078- llama .n_batch ,
3079- n_past_p ,
3080- )
3081- # Required to avoid issues with hf tokenizer
3082- llama .input_ids [llama .n_tokens : n_past .value ] = - 1
3083- llama .n_tokens = n_past .value
3084-
30853055 @staticmethod
30863056 def _load_image (image_url : str ) -> bytes :
30873057 # TODO: Add Pillow support for other image formats beyond (jpg, png)
0 commit comments