From 1e05f120f087872a1255d79a376a76052fb20b4c Mon Sep 17 00:00:00 2001
From: Tsukasa OI <floss_llm@irq.a4lg.com>
Date: Fri, 28 Nov 2025 13:13:35 +0000
Subject: [PATCH 1/2] Log offline model path replacement only when `model`
 changes

When HF_HUB_OFFLINE is true, vLLM always rewrites the model ID/path to the
corresponding local path.  However, this does not mean the ID/path is
necessarily altered.  If `model` already points to a local directory or a
GGUF file, the value remains unchanged and there is no need to inform the
user via the log.

This change updates the offline-conversion logging to emit a message only
when the value of `model` actually changes.

Signed-off-by: Tsukasa OI <floss_llm@irq.a4lg.com>
---
 vllm/engine/arg_utils.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 31825980f3a1..1af33a2710b9 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -585,11 +585,12 @@ def __post_init__(self):
         if huggingface_hub.constants.HF_HUB_OFFLINE:
             model_id = self.model
             self.model = get_model_path(self.model, self.revision)
-            logger.info(
-                "HF_HUB_OFFLINE is True, replace model_id [%s] to model_path [%s]",
-                model_id,
-                self.model,
-            )
+            if model_id is not self.model:
+                logger.info(
+                    "HF_HUB_OFFLINE is True, replace model_id [%s] to model_path [%s]",
+                    model_id,
+                    self.model,
+                )
 
     @staticmethod
     def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:

From d6de5f3070c073321d24173d76c29b09573cc6c5 Mon Sep 17 00:00:00 2001
From: Tsukasa OI <floss_llm@irq.a4lg.com>
Date: Fri, 28 Nov 2025 13:27:29 +0000
Subject: [PATCH 2/2] Perform offline path replacement to `tokenizer`

If `tokenizer` is a Hugging Face model, vLLM attempts to access Hugging
Face even if the tokenizer is already available offline.  It prevents
specifying a Hugging Face model as `tokenizer` on the offline mode
(i.e. `HF_HUB_OFFLINE` is true).

With this commit, vLLM performs offline path replacement also on
`tokenizer`, not only on `model`.

A test case is added because error related to the offline mode only
occurs when the model and the tokenizer are different.

Signed-off-by: Tsukasa OI <floss_llm@irq.a4lg.com>
---
 tests/entrypoints/offline_mode/test_offline_mode.py | 10 ++++++++++
 vllm/engine/arg_utils.py                            | 12 +++++++++++-
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/tests/entrypoints/offline_mode/test_offline_mode.py b/tests/entrypoints/offline_mode/test_offline_mode.py
index 25e663f3af0e..539ff89abe9c 100644
--- a/tests/entrypoints/offline_mode/test_offline_mode.py
+++ b/tests/entrypoints/offline_mode/test_offline_mode.py
@@ -23,6 +23,16 @@
         "max_num_seqs": 64,
         "tensor_parallel_size": 1,
     },
+    {
+        "model": "Qwen/Qwen3-0.6B",
+        "enforce_eager": True,
+        "gpu_memory_utilization": 0.50,
+        "max_model_len": 64,
+        "max_num_batched_tokens": 64,
+        "max_num_seqs": 64,
+        "tensor_parallel_size": 1,
+        "tokenizer": "Qwen/Qwen3-4B",
+    },
     {
         "model": "mistralai/Mistral-7B-Instruct-v0.1",
         "enforce_eager": True,
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 1af33a2710b9..186a2a414187 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -581,7 +581,7 @@ def __post_init__(self):
         from vllm.plugins import load_general_plugins
 
         load_general_plugins()
-        # when use hf offline,replace model id to local model path
+        # when use hf offline,replace model and tokenizer id to local model path
         if huggingface_hub.constants.HF_HUB_OFFLINE:
             model_id = self.model
             self.model = get_model_path(self.model, self.revision)
@@ -591,6 +591,16 @@ def __post_init__(self):
                     model_id,
                     self.model,
                 )
+            if self.tokenizer is not None:
+                tokenizer_id = self.tokenizer
+                self.tokenizer = get_model_path(self.tokenizer, self.tokenizer_revision)
+                if tokenizer_id is not self.tokenizer:
+                    logger.info(
+                        "HF_HUB_OFFLINE is True, replace tokenizer_id [%s] "
+                        "to tokenizer_path [%s]",
+                        tokenizer_id,
+                        self.tokenizer,
+                    )
 
     @staticmethod
     def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: