fix

shen-shanshan · shen-shanshan · commit 0bb70dc32461 · 2025-12-01T02:07:22.000Z
Signed-off-by: Shanshan Shen &lt;87969357+shen-shanshan@users.noreply.github.com&gt;
diff --git a/vllm_ascend/__init__.py b/vllm_ascend/__init__.py
@@ -23,8 +23,8 @@ def register():
 
 
 def register_model():
-    from .models import register_model
-    register_model()
+    from vllm_ascend.torchair.utils import register_torchair_model
+    register_torchair_model()
 
 
 def register_connector():
diff --git a/vllm_ascend/torchair/torchair_model_runner.py b/vllm_ascend/torchair/torchair_model_runner.py
@@ -39,8 +39,8 @@
 from vllm_ascend.torchair.utils import (
     TORCHAIR_CACHE_DIR, TorchairCommonAttentionMetadata,
     check_torchair_cache_exist, converting_weight_acl_format,
-    register_torchair_model, torchair_ops_patch,
-    torchair_quant_method_register, write_kv_cache_bytes_to_file)
+    torchair_ops_patch, torchair_quant_method_register,
+    write_kv_cache_bytes_to_file)
 from vllm_ascend.utils import (ACL_FORMAT_FRACTAL_ND, ACL_FORMAT_FRACTAL_NZ,
                                AscendDeviceType, get_ascend_device_type)
 from vllm_ascend.worker.model_runner_v1 import NPUModelRunner
@@ -60,7 +60,6 @@ def __init__(self, vllm_config: VllmConfig, device: torch.device):
             None, None, vllm_config, device)
         self.use_sparse = hasattr(self.model_config.hf_config, "index_topk")
 
-        register_torchair_model()
         torchair_ops_patch()
         torchair_quant_method_register()
         if self.enable_shared_expert_dp: