Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,6 @@ target_link_libraries(
opapi
)

target_link_options(vllm_ascend_C PRIVATE "-Wl,-rpath,$ORIGIN:$ORIGIN/lib")
target_link_options(vllm_ascend_C PRIVATE "-Wl,-rpath,$ORIGIN:$ORIGIN/lib:$ORIGIN/_cann_ops_custom/vendors/vllm-ascend/op_api/lib")

install(TARGETS vllm_ascend_C vllm_ascend_kernels DESTINATION ${VLLM_ASCEND_INSTALL_PATH})
2 changes: 1 addition & 1 deletion csrc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ option(BUILD_OPEN_PROJECT "Build open ascend ops project." ON)
option(ENABLE_CCACHE "Enable ccache capability" ON)
set(ASCEND_COMPUTE_UNIT "ascend910b" CACHE STRING "soc that need to be compiled")
set(ASCEND_OP_NAME "ALL" CACHE STRING "operators that need to be compiled")
set(VENDOR_NAME "customize" CACHE STRING "vendor name")
set(VENDOR_NAME "vllm-ascend" CACHE STRING "vendor name")

include(cmake/config.cmake)
include(cmake/func.cmake)
Expand Down
1 change: 0 additions & 1 deletion csrc/build_aclnn.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,3 @@ bash build.sh -n $CUSTOM_OPS -c $SOC_ARG

# install custom ops to vllm_ascend/_cann_ops_custom
./output/CANN-custom_ops*.run --install-path=$ROOT_DIR/vllm_ascend/_cann_ops_custom
source $ROOT_DIR/vllm_ascend/_cann_ops_custom/vendors/customize/bin/set_env.bash
6 changes: 3 additions & 3 deletions csrc/torch_binding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -568,9 +568,9 @@ std::tuple<at::Tensor, at::Tensor, at::Tensor> grouped_matmul_swiglu_quant_weigh
int m = x_size[0];
int k = x_size[1];

at::Tensor output = at::zeros({m, n/2}, x.options().dtype(at::kChar));
at::Tensor output_scale = at::zeros({m}, x.options().dtype(at::kFloat));
at::Tensor output_offset = at::zeros({m}, x.options().dtype(at::kFloat));
at::Tensor output = at::empty({m, n/2}, x.options().dtype(at::kChar));
at::Tensor output_scale = at::empty({m}, x.options().dtype(at::kFloat));
at::Tensor output_offset = at::empty({m}, x.options().dtype(at::kFloat));
Comment on lines +571 to +573
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

The size of these output tensors depends on n, which is calculated on line 567 from the weight TensorList (weight[0].sizes()[1]). This is unsafe because if the weight TensorList is empty, accessing weight[0] will cause a crash. It's crucial to add a check to ensure weight is not empty before its elements are accessed.

For example, you could add the following check before line 567:

TORCH_CHECK(!weight.empty(), "weight tensor list cannot be empty");


EXEC_NPU_CMD(
aclnnGroupedMatmulSwigluQuantWeightNzTensorList,
Expand Down
21 changes: 0 additions & 21 deletions vllm_ascend/platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,27 +38,6 @@
prefill_context_parallel_enable, update_aclgraph_sizes,
update_cudagraph_capture_sizes, update_default_aclgraph_sizes)

# set custom ops path
CUR_DIR = os.path.dirname(os.path.realpath(__file__))
CUSTOM_OPP_PATH = os.path.join(CUR_DIR, "vllm_ascend", "_cann_ops_custom",
"vendors", "customize")
CUSTOM_LIB_PATH = os.path.join(CUSTOM_OPP_PATH, "op_api", "lib")

if os.path.exists(CUSTOM_OPP_PATH):
current_cust_opp_path = os.environ.get("ASCEND_CUSTOM_OPP_PATH", "")
if current_cust_opp_path:
os.environ[
"ASCEND_CUSTOM_OPP_PATH"] = f"{CUSTOM_OPP_PATH}:{current_cust_opp_path}"
else:
os.environ["ASCEND_CUSTOM_OPP_PATH"] = CUSTOM_OPP_PATH

if os.path.exists(CUSTOM_LIB_PATH):
current_lib_path = os.environ.get("LD_LIBRARY_PATH", "")
if current_lib_path:
os.environ["LD_LIBRARY_PATH"] = f"{CUSTOM_LIB_PATH}:{current_lib_path}"
else:
os.environ["LD_LIBRARY_PATH"] = CUSTOM_LIB_PATH

if TYPE_CHECKING:
from vllm.config import ModelConfig, VllmConfig
from vllm.utils import FlexibleArgumentParser
Expand Down
13 changes: 13 additions & 0 deletions vllm_ascend/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,19 @@ def enable_custom_op():
Ensure that ASCEND_RT_VISIBLE_DEVICES can be dynamically modified before torch.npu.set_device().
"""
global _CUSTOM_OP_ENABLED

# set custom ops path
CUR_DIR = os.path.dirname(os.path.realpath(__file__))
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@wangxiyuan This line causes the test tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ to fail.
os.path.realpath involves system calls that are not supported by torch.compile (Dynamo) during graph capture. This triggers a runtime crash when graph mode is enabled.
Maybe move the CUR_DIR calculation to the module level (global scope) to avoid this trace error and unnecessary re-calculation.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, I found it. Thanks for reminding

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@hukongyi #4675 I think this one should fix the CI

CUSTOM_OPP_PATH = os.path.join(CUR_DIR, "_cann_ops_custom", "vendors",
"vllm-ascend")
if os.path.exists(CUSTOM_OPP_PATH):
current_cust_opp_path = os.environ.get("ASCEND_CUSTOM_OPP_PATH", "")
if current_cust_opp_path:
os.environ[
"ASCEND_CUSTOM_OPP_PATH"] = f"{CUSTOM_OPP_PATH}:{current_cust_opp_path}"
else:
os.environ["ASCEND_CUSTOM_OPP_PATH"] = CUSTOM_OPP_PATH

if _CUSTOM_OP_ENABLED is not None:
return _CUSTOM_OP_ENABLED
try:
Expand Down
Loading