From 1e05f120f087872a1255d79a376a76052fb20b4c Mon Sep 17 00:00:00 2001 From: Tsukasa OI Date: Fri, 28 Nov 2025 13:13:35 +0000 Subject: [PATCH 1/2] Log offline model path replacement only when `model` changes When HF_HUB_OFFLINE is true, vLLM always rewrites the model ID/path to the corresponding local path. However, this does not mean the ID/path is necessarily altered. If `model` already points to a local directory or a GGUF file, the value remains unchanged and there is no need to inform the user via the log. This change updates the offline-conversion logging to emit a message only when the value of `model` actually changes. Signed-off-by: Tsukasa OI --- vllm/engine/arg_utils.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 31825980f3a1..1af33a2710b9 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -585,11 +585,12 @@ def __post_init__(self): if huggingface_hub.constants.HF_HUB_OFFLINE: model_id = self.model self.model = get_model_path(self.model, self.revision) - logger.info( - "HF_HUB_OFFLINE is True, replace model_id [%s] to model_path [%s]", - model_id, - self.model, - ) + if model_id is not self.model: + logger.info( + "HF_HUB_OFFLINE is True, replace model_id [%s] to model_path [%s]", + model_id, + self.model, + ) @staticmethod def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: From d6de5f3070c073321d24173d76c29b09573cc6c5 Mon Sep 17 00:00:00 2001 From: Tsukasa OI Date: Fri, 28 Nov 2025 13:27:29 +0000 Subject: [PATCH 2/2] Perform offline path replacement to `tokenizer` If `tokenizer` is a Hugging Face model, vLLM attempts to access Hugging Face even if the tokenizer is already available offline. It prevents specifying a Hugging Face model as `tokenizer` on the offline mode (i.e. `HF_HUB_OFFLINE` is true). With this commit, vLLM performs offline path replacement also on `tokenizer`, not only on `model`. A test case is added because error related to the offline mode only occurs when the model and the tokenizer are different. Signed-off-by: Tsukasa OI --- tests/entrypoints/offline_mode/test_offline_mode.py | 10 ++++++++++ vllm/engine/arg_utils.py | 12 +++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/tests/entrypoints/offline_mode/test_offline_mode.py b/tests/entrypoints/offline_mode/test_offline_mode.py index 25e663f3af0e..539ff89abe9c 100644 --- a/tests/entrypoints/offline_mode/test_offline_mode.py +++ b/tests/entrypoints/offline_mode/test_offline_mode.py @@ -23,6 +23,16 @@ "max_num_seqs": 64, "tensor_parallel_size": 1, }, + { + "model": "Qwen/Qwen3-0.6B", + "enforce_eager": True, + "gpu_memory_utilization": 0.50, + "max_model_len": 64, + "max_num_batched_tokens": 64, + "max_num_seqs": 64, + "tensor_parallel_size": 1, + "tokenizer": "Qwen/Qwen3-4B", + }, { "model": "mistralai/Mistral-7B-Instruct-v0.1", "enforce_eager": True, diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 1af33a2710b9..186a2a414187 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -581,7 +581,7 @@ def __post_init__(self): from vllm.plugins import load_general_plugins load_general_plugins() - # when use hf offline,replace model id to local model path + # when use hf offline,replace model and tokenizer id to local model path if huggingface_hub.constants.HF_HUB_OFFLINE: model_id = self.model self.model = get_model_path(self.model, self.revision) @@ -591,6 +591,16 @@ def __post_init__(self): model_id, self.model, ) + if self.tokenizer is not None: + tokenizer_id = self.tokenizer + self.tokenizer = get_model_path(self.tokenizer, self.tokenizer_revision) + if tokenizer_id is not self.tokenizer: + logger.info( + "HF_HUB_OFFLINE is True, replace tokenizer_id [%s] " + "to tokenizer_path [%s]", + tokenizer_id, + self.tokenizer, + ) @staticmethod def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: