upgrade torch npu version

wangxiyuan · wangxiyuan · commit 708ebbc34044 · 2025-11-26T14:44:57.000+08:00
Signed-off-by: wangxiyuan &lt;wangxiyuan1007@gmail.com&gt;
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -22,9 +22,9 @@ find_package(Torch REQUIRED)
 
 run_python(TORCH_VERSION
   "import torch; print(torch.__version__)" "Failed to locate torch path")
-# check torch version is 2.7.1
-if(NOT ${TORCH_VERSION} VERSION_EQUAL "2.7.1")
-  message(FATAL_ERROR "Expected PyTorch version 2.7.1, but found ${TORCH_VERSION}")
+# check torch version is 2.8.0
+if(NOT ${TORCH_VERSION} VERSION_EQUAL "2.8.0")
+  message(FATAL_ERROR "Expected PyTorch version 2.8.0, but found ${TORCH_VERSION}")
 endif()
 
 set(RUN_MODE "npu" CACHE STRING "cpu/sim/npu")
diff --git a/README.md b/README.md
@@ -43,7 +43,7 @@ By using vLLM Ascend plugin, popular open-source models, including Transformer-l
 - Software:
   * Python >= 3.10, < 3.12
   * CANN >= 8.3.rc1 (Ascend HDK version refers to [here](https://www.hiascend.com/document/detail/zh/canncommercial/83RC1/releasenote/releasenote_0000.html))
-  * PyTorch == 2.7.1, torch-npu == 2.7.1
+  * PyTorch == 2.8.0, torch-npu == 2.8.0
   * vLLM (the same version as vllm-ascend)
 
 ## Getting Started
diff --git a/README.zh.md b/README.zh.md
@@ -44,7 +44,7 @@ vLLM 昇腾插件 (`vllm-ascend`) 是一个由社区维护的让vLLM在Ascend NP
 - 软件：
   * Python >= 3.10, < 3.12
   * CANN >= 8.3.rc1 (Ascend HDK 版本参考[这里](https://www.hiascend.com/document/detail/zh/canncommercial/83RC1/releasenote/releasenote_0000.html))
-  * PyTorch == 2.7.1, torch-npu == 2.7.1
+  * PyTorch == 2.8.0, torch-npu == 2.8.0
   * vLLM (与vllm-ascend版本一致)
 
 ## 开始使用
diff --git a/pyproject.toml b/pyproject.toml
@@ -15,8 +15,8 @@ requires = [
     "setuptools>=64",
     "setuptools-scm>=8",
     "transformers<=4.57.1",
-    "torch-npu==2.7.1",
-    "torch==2.7.1",
+    "torch-npu==2.8.0",
+    "torch==2.8.0",
     "torchvision",
     "wheel",
     "msgpack",
diff --git a/requirements.txt b/requirements.txt
@@ -11,7 +11,7 @@ scipy
 pandas
 setuptools>=64
 setuptools-scm>=8
-torch==2.7.1
+torch==2.8.0
 torchvision
 wheel
 pandas-stubs
@@ -27,6 +27,6 @@ numba
 # Install torch_npu
 #--pre
 #--extra-index-url https://mirrors.huaweicloud.com/ascend/repos/pypi
-torch-npu==2.7.1
+torch-npu==2.8.0
 
 transformers<=4.57.1
diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py
@@ -270,7 +270,7 @@ class VllmRunner:
     def __init__(
         self,
         model_name: str,
-        task: TaskOption = "auto",
+        runner: str = "auto",
         tokenizer_name: Optional[str] = None,
         tokenizer_mode: str = "auto",
         # Use smaller max model length, otherwise bigger model cannot run due
@@ -288,7 +288,7 @@ def __init__(
     ) -> None:
         self.model = LLM(
             model=model_name,
-            task=task,
+            runner=runner,
             tokenizer=tokenizer_name,
             tokenizer_mode=tokenizer_mode,
             trust_remote_code=True,
diff --git a/tests/e2e/singlecard/test_bge_model.py b/tests/e2e/singlecard/test_bge_model.py
@@ -28,7 +28,7 @@ def test_bge_model_correctness():
     model_name = snapshot_download("BAAI/bge-m3")
     with VllmRunner(
             model_name,
-            task="embed",
+            runner="pooling",
             enforce_eager=True,
     ) as vllm_runner:
         vllm_outputs = vllm_runner.encode(queries)
diff --git a/tests/e2e/singlecard/test_embedding.py b/tests/e2e/singlecard/test_embedding.py
@@ -28,7 +28,7 @@ def test_embed_models_correctness():
     model_name = snapshot_download("Qwen/Qwen3-Embedding-0.6B")
     with VllmRunner(
             model_name,
-            task="embed",
+            runner="pooling",
             enforce_eager=False,
     ) as vllm_runner:
         vllm_outputs = vllm_runner.encode(queries)
diff --git a/tests/e2e/singlecard/test_embedding_aclgraph.py b/tests/e2e/singlecard/test_embedding_aclgraph.py
@@ -34,14 +34,14 @@ def test_aclgrpah_embed_models_correctness(model_name):
 
     with VllmRunner(
             model_name,
-            task="embed",
+            runner="pooling",
             enforce_eager=False,
     ) as vllm_aclgraph_runner:
         vllm_aclgraph_outputs = vllm_aclgraph_runner.encode(queries)
 
     with VllmRunner(
             model_name,
-            task="embed",
+            runner="pooling",
             enforce_eager=True,
     ) as vllm_runner:
         vllm_outputs = vllm_runner.encode(queries)
diff --git a/tests/ut/core/test_scheduler.py b/tests/ut/core/test_scheduler.py
@@ -123,7 +123,6 @@ def create_scheduler(self, mock_compute_encoder_budget):
 
         model_config = ModelConfig(
             model=MODEL,
-            task="auto",
             tokenizer=MODEL,
             tokenizer_mode="auto",
             trust_remote_code=True,
@@ -838,7 +837,6 @@ def create_scheduler(self, mock_compute_encoder_budget):
 
         model_config = ModelConfig(
             model=MODEL,
-            task="auto",
             tokenizer=MODEL,
             tokenizer_mode="auto",
             trust_remote_code=True,
diff --git a/vllm_ascend/patch/platform/__init__.py b/vllm_ascend/patch/platform/__init__.py
@@ -18,7 +18,6 @@
 
 import vllm_ascend.patch.platform.patch_config  # noqa
 import vllm_ascend.patch.platform.patch_distributed  # noqa
-import vllm_ascend.patch.platform.patch_dynamo_vllm_backend  # noqa
 import vllm_ascend.patch.platform.patch_mamba_config  # noqa
 import vllm_ascend.patch.platform.patch_sched_yield  # noqa
 
diff --git a/vllm_ascend/patch/platform/patch_dynamo_vllm_backend.py b/vllm_ascend/patch/platform/patch_dynamo_vllm_backend.py