[Bugfix] resolve Qwen3-VL GPTQModel quantized model loading failure (#28663)

GuanH · Isotr0py · web-flow · commit cec275efcef6 · 2025-11-14T18:44:27.000Z
Signed-off-by: GuanH &lt;guansdrailib@gmail.com&gt;
Signed-off-by: Isotr0py &lt;mozf@mail2.sysu.edu.cn&gt;
Co-authored-by: Isotr0py &lt;mozf@mail2.sysu.edu.cn&gt;
diff --git a/vllm/model_executor/models/qwen3_vl.py b/vllm/model_executor/models/qwen3_vl.py
@@ -1138,7 +1138,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         self.config = config
 
         self.quant_config = quant_config
-        self.model = Qwen3LLMModel(vllm_config=vllm_config, prefix=prefix)
+        self.model = Qwen3LLMModel(
+            vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
+        )
 
         if get_pp_group().is_last_rank:
             if config.tie_word_embeddings:
diff --git a/vllm/model_executor/models/utils.py b/vllm/model_executor/models/utils.py
@@ -117,9 +117,10 @@ class AutoWeightsLoader:
     environment variable `VLLM_LOGGING_LEVEL=DEBUG`.
     """
 
-    # Models trained using early version ColossalAI
-    # may include these tensors in checkpoint. Skip them.
+    # Models trained using early version ColossalAI or quantized by
+    # GPTQModel may include these tensors in checkpoint. Skip them.
     ROTARY_EMBEDS_UNUSED_WEIGHTS = [
+        "rotary_pos_emb.inv_freq",
         "rotary_emb.inv_freq",
         "rotary_emb.cos_cached",
         "rotary_emb.sin_cached",