Skip to content

Commit 38bd952

Browse files
[Model] Add qwen3Next support in Main (#4596)
### What this PR does / why we need it? Add Qwen3Next support in main ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.11.2 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.2 --------- Signed-off-by: SunnyLee219 <3294305115@qq.com>
1 parent 3f81c4b commit 38bd952

File tree

4 files changed

+7
-3
lines changed

4 files changed

+7
-3
lines changed

.github/workflows/_e2e_test.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,4 +286,4 @@ jobs:
286286
VLLM_USE_MODELSCOPE: True
287287
run: |
288288
. /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
289-
#pytest -sv tests/e2e/multicard/test_qwen3_next.py
289+
pytest -sv tests/e2e/multicard/test_qwen3_next.py

tests/e2e/multicard/test_qwen3_next.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import os
2525
from unittest.mock import patch
2626

27+
import pytest
2728
from modelscope import snapshot_download # type: ignore
2829

2930
from tests.e2e.conftest import VllmRunner
@@ -63,6 +64,7 @@ def test_models_distributed_Qwen3_NEXT_TP4_FULL_DECODE_ONLY():
6364
del vllm_model
6465

6566

67+
@pytest.mark.skip
6668
def test_models_distributed_Qwen3_NEXT_MTP_TP4_SIMILARITY():
6769
example_prompts = [
6870
"Hello, my name is",
@@ -113,6 +115,7 @@ def test_models_distributed_Qwen3_NEXT_MTP_TP4_SIMILARITY():
113115

114116

115117
# TODO: will conduct accuracy verification after the subsequent version becomes stable
118+
@pytest.mark.skip
116119
@patch.dict(os.environ, {"HCCL_BUFFSIZE": "1024"})
117120
def test_models_distributed_Qwen3_NEXT_W8A8DYNAMIC_WITH_EP():
118121
example_prompts = [

vllm_ascend/ops/triton/mamba/casual_conv1d.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
# and https://github.com/vllm-project/vllm/blob/main/vllm/model_executor/layers/mamba/ops/causal_conv1d.py
88
# mypy: ignore-errors
99

10-
from typing import Optional, Union
10+
from typing import Any, Optional, Union
1111

1212
import torch
1313
import torch.nn.functional as F
@@ -72,6 +72,7 @@ def causal_conv1d_fn(
7272
conv_states: Optional[torch.Tensor] = None,
7373
activation: Optional[str] = "silu",
7474
pad_slot_id: int = PAD_SLOT_ID,
75+
metadata: Optional[Any] = None,
7576
):
7677
"""
7778
x: (batch, dim, seqlen) or (dim,cu_seq_len) for varlen

vllm_ascend/patch/worker/patch_triton.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,4 @@
1111
vllm.model_executor.layers.mamba.ops.causal_conv1d.causal_conv1d_fn = causal_conv1d_fn
1212
vllm.model_executor.layers.fla.ops.fused_recurrent.fused_recurrent_gated_delta_rule_fwd_kernel = fused_recurrent_gated_delta_rule_fwd_kernel
1313
vllm.model_executor.layers.fla.ops.layernorm_guard.LayerNormFn = LayerNormFn
14-
vllm.model_executor.layers.fla.ops.chunk.chunk_gated_delta_rule = chunk_gated_delta_rule
14+
vllm.model_executor.layers.fla.ops.chunk_gated_delta_rule = chunk_gated_delta_rule

0 commit comments

Comments
 (0)