Skip to content

Commit 8f3616f

Browse files
Remove old cutlass mla (#23961)
Signed-off-by: Matthew Bonanni <mbonanni001@gmail.com> Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
1 parent 47f670b commit 8f3616f

File tree

6 files changed

+10
-345
lines changed

6 files changed

+10
-345
lines changed

CMakeLists.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,6 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
308308
"csrc/quantization/fp4/nvfp4_blockwise_moe_kernel.cu"
309309
"csrc/sparse/cutlass/sparse_scaled_mm_entry.cu"
310310
"csrc/cutlass_extensions/common.cpp"
311-
"csrc/attention/mla/cutlass_mla_entry.cu"
312311
"csrc/quantization/fp8/per_token_group_quant.cu")
313312

314313
set_gencode_flags_for_srcs(
@@ -595,7 +594,6 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
595594
cuda_archs_loose_intersection(MLA_ARCHS "10.0a" "${CUDA_ARCHS}")
596595
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8 AND MLA_ARCHS)
597596
set(SRCS
598-
"csrc/attention/mla/cutlass_mla_kernels.cu"
599597
"csrc/attention/mla/sm100_cutlass_mla_kernel.cu")
600598
set_gencode_flags_for_srcs(
601599
SRCS "${SRCS}"

csrc/attention/mla/cutlass_mla_entry.cu

Lines changed: 0 additions & 38 deletions
This file was deleted.

csrc/attention/mla/cutlass_mla_kernels.cu

Lines changed: 0 additions & 225 deletions
This file was deleted.

csrc/torch_bindings.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -510,13 +510,6 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
510510
ops.def("cutlass_sparse_compress(Tensor a) -> Tensor[]");
511511
ops.impl("cutlass_sparse_compress", &cutlass_sparse_compress);
512512

513-
// CUTLASS MLA decode
514-
ops.def(
515-
"cutlass_mla_decode(Tensor! out, Tensor q_nope, Tensor q_pe,"
516-
" Tensor kv_c_and_k_pe_cache, Tensor seq_lens,"
517-
" Tensor page_table, float scale) -> ()");
518-
ops.impl("cutlass_mla_decode", torch::kCUDA, &cutlass_mla_decode);
519-
520513
// SM100 CUTLASS MLA decode
521514
ops.def(
522515
"sm100_cutlass_mla_decode(Tensor! out, Tensor! lse, Tensor q_nope,"

vllm/_custom_ops.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1823,15 +1823,6 @@ def flash_mla_with_kvcache(
18231823
return out, softmax_lse
18241824

18251825

1826-
def cutlass_mla_decode(out: torch.Tensor, q_nope: torch.Tensor,
1827-
q_pe: torch.Tensor, kv_c_and_k_pe_cache: torch.Tensor,
1828-
seq_lens: torch.Tensor, page_table: torch.Tensor,
1829-
scale: float) -> torch.Tensor:
1830-
torch.ops._C.cutlass_mla_decode(out, q_nope, q_pe, kv_c_and_k_pe_cache,
1831-
seq_lens, page_table, scale)
1832-
return out
1833-
1834-
18351826
def sm100_cutlass_mla_decode(out: torch.Tensor, lse: torch.Tensor,
18361827
q_nope: torch.Tensor, q_pe: torch.Tensor,
18371828
kv_c_and_k_pe_cache: torch.Tensor,

0 commit comments

Comments
 (0)