Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/_e2e_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -286,4 +286,4 @@ jobs:
VLLM_USE_MODELSCOPE: True
run: |
. /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
#pytest -sv tests/e2e/multicard/test_qwen3_next.py
pytest -sv tests/e2e/multicard/test_qwen3_next.py
3 changes: 3 additions & 0 deletions tests/e2e/multicard/test_qwen3_next.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import os
from unittest.mock import patch

import pytest
from modelscope import snapshot_download # type: ignore

from tests.e2e.conftest import VllmRunner
Expand Down Expand Up @@ -63,6 +64,7 @@ def test_models_distributed_Qwen3_NEXT_TP4_FULL_DECODE_ONLY():
del vllm_model


@pytest.mark.skip
def test_models_distributed_Qwen3_NEXT_MTP_TP4_SIMILARITY():
example_prompts = [
"Hello, my name is",
Expand Down Expand Up @@ -113,6 +115,7 @@ def test_models_distributed_Qwen3_NEXT_MTP_TP4_SIMILARITY():


# TODO: will conduct accuracy verification after the subsequent version becomes stable
@pytest.mark.skip
@patch.dict(os.environ, {"HCCL_BUFFSIZE": "1024"})
def test_models_distributed_Qwen3_NEXT_W8A8DYNAMIC_WITH_EP():
example_prompts = [
Expand Down
3 changes: 2 additions & 1 deletion vllm_ascend/ops/triton/mamba/casual_conv1d.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# and https://github.com/vllm-project/vllm/blob/main/vllm/model_executor/layers/mamba/ops/causal_conv1d.py
# mypy: ignore-errors

from typing import Optional, Union
from typing import Any, Optional, Union

import torch
import torch.nn.functional as F
Expand Down Expand Up @@ -72,6 +72,7 @@ def causal_conv1d_fn(
conv_states: Optional[torch.Tensor] = None,
activation: Optional[str] = "silu",
pad_slot_id: int = PAD_SLOT_ID,
metadata: Optional[Any] = None,
):
"""
x: (batch, dim, seqlen) or (dim,cu_seq_len) for varlen
Expand Down
2 changes: 1 addition & 1 deletion vllm_ascend/patch/worker/patch_triton.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@
vllm.model_executor.layers.mamba.ops.causal_conv1d.causal_conv1d_fn = causal_conv1d_fn
vllm.model_executor.layers.fla.ops.fused_recurrent.fused_recurrent_gated_delta_rule_fwd_kernel = fused_recurrent_gated_delta_rule_fwd_kernel
vllm.model_executor.layers.fla.ops.layernorm_guard.LayerNormFn = LayerNormFn
vllm.model_executor.layers.fla.ops.chunk.chunk_gated_delta_rule = chunk_gated_delta_rule
vllm.model_executor.layers.fla.ops.chunk_gated_delta_rule = chunk_gated_delta_rule
Loading