1717 MultiModalUUIDDict ,
1818)
1919from vllm .multimodal .processing import BaseMultiModalProcessor
20- from vllm .transformers_utils .tokenizer import AnyTokenizer
20+ from vllm .transformers_utils .tokenizer import AnyTokenizer , init_tokenizer_from_configs
21+ from vllm .utils .jsontree import json_iter_leaves
2122
2223from .data import (
2324 DecoderOnlyInputs ,
@@ -44,17 +45,20 @@ class InputPreprocessor:
4445 def __init__ (
4546 self ,
4647 model_config : ModelConfig ,
47- tokenizer : Optional [AnyTokenizer ],
4848 mm_registry : MultiModalRegistry = MULTIMODAL_REGISTRY ,
4949 mm_processor_cache : Optional [BaseMultiModalProcessorCache ] = None ,
5050 ) -> None :
5151 super ().__init__ ()
5252
5353 self .model_config = model_config
54- self .tokenizer = tokenizer
5554 self .mm_registry = mm_registry
5655 self .mm_processor_cache = mm_processor_cache
5756
57+ if model_config .skip_tokenizer_init :
58+ self .tokenizer = None
59+ else :
60+ self .tokenizer = init_tokenizer_from_configs (model_config )
61+
5862 def get_tokenizer (self ) -> AnyTokenizer :
5963 if self .tokenizer is None :
6064 raise ValueError (
@@ -273,7 +277,10 @@ def _process_multimodal(
273277 mm_hashes = mm_input ["mm_hashes" ]
274278
275279 # Validate that all mm items have a string as their hash
276- if not contains_only_strings (mm_hashes ):
280+ contains_only_strings = all (
281+ isinstance (leaf , str ) for leaf in json_iter_leaves (mm_hashes )
282+ )
283+ if not contains_only_strings :
277284 raise ValueError (
278285 f"mm_hashes must contain only strings, got: { mm_hashes } . "
279286 "This is likely due to an incorrect custom implementation of "
@@ -693,15 +700,3 @@ def preprocess(
693700 def clear_cache (self ) -> None :
694701 if self .mm_processor_cache is not None :
695702 self .mm_processor_cache .clear_cache ()
696-
697-
698- # Helper function to validate that a nested dictionary contains
699- # only strings or list of strings as the leaf values.
700- def contains_only_strings (obj : object ):
701- if isinstance (obj , str ):
702- return True
703- if isinstance (obj , list ):
704- return all (isinstance (x , str ) for x in obj )
705- if isinstance (obj , dict ):
706- return all (contains_only_strings (v ) for v in obj .values ())
707- return False
0 commit comments