File tree Expand file tree Collapse file tree 2 files changed +27
-6
lines changed
tests/entrypoints/offline_mode Expand file tree Collapse file tree 2 files changed +27
-6
lines changed Original file line number Diff line number Diff line change 2323 "max_num_seqs" : 64 ,
2424 "tensor_parallel_size" : 1 ,
2525 },
26+ {
27+ "model" : "Qwen/Qwen3-0.6B" ,
28+ "enforce_eager" : True ,
29+ "gpu_memory_utilization" : 0.50 ,
30+ "max_model_len" : 64 ,
31+ "max_num_batched_tokens" : 64 ,
32+ "max_num_seqs" : 64 ,
33+ "tensor_parallel_size" : 1 ,
34+ "tokenizer" : "Qwen/Qwen3-4B" ,
35+ },
2636 {
2737 "model" : "mistralai/Mistral-7B-Instruct-v0.1" ,
2838 "enforce_eager" : True ,
Original file line number Diff line number Diff line change @@ -581,15 +581,26 @@ def __post_init__(self):
581581 from vllm .plugins import load_general_plugins
582582
583583 load_general_plugins ()
584- # when use hf offline,replace model id to local model path
584+ # when use hf offline,replace model and tokenizer id to local model path
585585 if huggingface_hub .constants .HF_HUB_OFFLINE :
586586 model_id = self .model
587587 self .model = get_model_path (self .model , self .revision )
588- logger .info (
589- "HF_HUB_OFFLINE is True, replace model_id [%s] to model_path [%s]" ,
590- model_id ,
591- self .model ,
592- )
588+ if model_id is not self .model :
589+ logger .info (
590+ "HF_HUB_OFFLINE is True, replace model_id [%s] to model_path [%s]" ,
591+ model_id ,
592+ self .model ,
593+ )
594+ if self .tokenizer is not None :
595+ tokenizer_id = self .tokenizer
596+ self .tokenizer = get_model_path (self .tokenizer , self .tokenizer_revision )
597+ if tokenizer_id is not self .tokenizer :
598+ logger .info (
599+ "HF_HUB_OFFLINE is True, replace tokenizer_id [%s] "
600+ "to tokenizer_path [%s]" ,
601+ tokenizer_id ,
602+ self .tokenizer ,
603+ )
593604
594605 @staticmethod
595606 def add_cli_args (parser : FlexibleArgumentParser ) -> FlexibleArgumentParser :
You can’t perform that action at this time.
0 commit comments