Skip to content

Commit 708ebbc

Browse files
committed
upgrade torch npu version
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
1 parent 136ea9f commit 708ebbc

File tree

12 files changed

+15
-34
lines changed

12 files changed

+15
-34
lines changed

CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ find_package(Torch REQUIRED)
2222

2323
run_python(TORCH_VERSION
2424
"import torch; print(torch.__version__)" "Failed to locate torch path")
25-
# check torch version is 2.7.1
26-
if(NOT ${TORCH_VERSION} VERSION_EQUAL "2.7.1")
27-
message(FATAL_ERROR "Expected PyTorch version 2.7.1, but found ${TORCH_VERSION}")
25+
# check torch version is 2.8.0
26+
if(NOT ${TORCH_VERSION} VERSION_EQUAL "2.8.0")
27+
message(FATAL_ERROR "Expected PyTorch version 2.8.0, but found ${TORCH_VERSION}")
2828
endif()
2929

3030
set(RUN_MODE "npu" CACHE STRING "cpu/sim/npu")

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ By using vLLM Ascend plugin, popular open-source models, including Transformer-l
4343
- Software:
4444
* Python >= 3.10, < 3.12
4545
* CANN >= 8.3.rc1 (Ascend HDK version refers to [here](https://www.hiascend.com/document/detail/zh/canncommercial/83RC1/releasenote/releasenote_0000.html))
46-
* PyTorch == 2.7.1, torch-npu == 2.7.1
46+
* PyTorch == 2.8.0, torch-npu == 2.8.0
4747
* vLLM (the same version as vllm-ascend)
4848

4949
## Getting Started

README.zh.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ vLLM 昇腾插件 (`vllm-ascend`) 是一个由社区维护的让vLLM在Ascend NP
4444
- 软件:
4545
* Python >= 3.10, < 3.12
4646
* CANN >= 8.3.rc1 (Ascend HDK 版本参考[这里](https://www.hiascend.com/document/detail/zh/canncommercial/83RC1/releasenote/releasenote_0000.html))
47-
* PyTorch == 2.7.1, torch-npu == 2.7.1
47+
* PyTorch == 2.8.0, torch-npu == 2.8.0
4848
* vLLM (与vllm-ascend版本一致)
4949

5050
## 开始使用

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ requires = [
1515
"setuptools>=64",
1616
"setuptools-scm>=8",
1717
"transformers<=4.57.1",
18-
"torch-npu==2.7.1",
19-
"torch==2.7.1",
18+
"torch-npu==2.8.0",
19+
"torch==2.8.0",
2020
"torchvision",
2121
"wheel",
2222
"msgpack",

requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ scipy
1111
pandas
1212
setuptools>=64
1313
setuptools-scm>=8
14-
torch==2.7.1
14+
torch==2.8.0
1515
torchvision
1616
wheel
1717
pandas-stubs
@@ -27,6 +27,6 @@ numba
2727
# Install torch_npu
2828
#--pre
2929
#--extra-index-url https://mirrors.huaweicloud.com/ascend/repos/pypi
30-
torch-npu==2.7.1
30+
torch-npu==2.8.0
3131

3232
transformers<=4.57.1

tests/e2e/conftest.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ class VllmRunner:
270270
def __init__(
271271
self,
272272
model_name: str,
273-
task: TaskOption = "auto",
273+
runner: str = "auto",
274274
tokenizer_name: Optional[str] = None,
275275
tokenizer_mode: str = "auto",
276276
# Use smaller max model length, otherwise bigger model cannot run due
@@ -288,7 +288,7 @@ def __init__(
288288
) -> None:
289289
self.model = LLM(
290290
model=model_name,
291-
task=task,
291+
runner=runner,
292292
tokenizer=tokenizer_name,
293293
tokenizer_mode=tokenizer_mode,
294294
trust_remote_code=True,

tests/e2e/singlecard/test_bge_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def test_bge_model_correctness():
2828
model_name = snapshot_download("BAAI/bge-m3")
2929
with VllmRunner(
3030
model_name,
31-
task="embed",
31+
runner="pooling",
3232
enforce_eager=True,
3333
) as vllm_runner:
3434
vllm_outputs = vllm_runner.encode(queries)

tests/e2e/singlecard/test_embedding.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def test_embed_models_correctness():
2828
model_name = snapshot_download("Qwen/Qwen3-Embedding-0.6B")
2929
with VllmRunner(
3030
model_name,
31-
task="embed",
31+
runner="pooling",
3232
enforce_eager=False,
3333
) as vllm_runner:
3434
vllm_outputs = vllm_runner.encode(queries)

tests/e2e/singlecard/test_embedding_aclgraph.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,14 @@ def test_aclgrpah_embed_models_correctness(model_name):
3434

3535
with VllmRunner(
3636
model_name,
37-
task="embed",
37+
runner="pooling",
3838
enforce_eager=False,
3939
) as vllm_aclgraph_runner:
4040
vllm_aclgraph_outputs = vllm_aclgraph_runner.encode(queries)
4141

4242
with VllmRunner(
4343
model_name,
44-
task="embed",
44+
runner="pooling",
4545
enforce_eager=True,
4646
) as vllm_runner:
4747
vllm_outputs = vllm_runner.encode(queries)

tests/ut/core/test_scheduler.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,6 @@ def create_scheduler(self, mock_compute_encoder_budget):
123123

124124
model_config = ModelConfig(
125125
model=MODEL,
126-
task="auto",
127126
tokenizer=MODEL,
128127
tokenizer_mode="auto",
129128
trust_remote_code=True,
@@ -838,7 +837,6 @@ def create_scheduler(self, mock_compute_encoder_budget):
838837

839838
model_config = ModelConfig(
840839
model=MODEL,
841-
task="auto",
842840
tokenizer=MODEL,
843841
tokenizer_mode="auto",
844842
trust_remote_code=True,

0 commit comments

Comments
 (0)