Skip to content

Commit 3fd1fb0

Browse files
authored
Revert "[LoRA] Support FusedMoE LoRA Triton kernel for mxfp4 (#28971)" (#29697)
Signed-off-by: Huamin Li <3ericli@gmail.com>
1 parent a51f418 commit 3fd1fb0

File tree

4 files changed

+11
-440
lines changed

4 files changed

+11
-440
lines changed

tests/kernels/moe/test_modular_oai_triton_moe.py

Lines changed: 0 additions & 250 deletions
This file was deleted.

vllm/lora/layers/fused_moe.py

Lines changed: 9 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,24 +20,15 @@
2020
_get_config_dtype_str,
2121
)
2222
from vllm.model_executor.layers.fused_moe.fused_marlin_moe import (
23-
MarlinExperts,
23+
modular_marlin_fused_moe,
2424
)
2525
from vllm.model_executor.layers.fused_moe.fused_moe import (
26-
TritonExperts,
26+
modular_triton_fused_moe,
2727
try_get_optimal_moe_config,
2828
)
2929
from vllm.model_executor.layers.fused_moe.fused_moe_modular_method import (
3030
FusedMoEModularMethod,
3131
)
32-
from vllm.model_executor.layers.fused_moe.gpt_oss_triton_kernels_moe import (
33-
UnfusedOAITritonExperts,
34-
)
35-
from vllm.model_executor.layers.fused_moe.modular_kernel import (
36-
FusedMoEModularKernel,
37-
)
38-
from vllm.model_executor.layers.fused_moe.prepare_finalize import (
39-
MoEPrepareAndFinalizeNoEP,
40-
)
4132

4233
from .utils import _get_lora_device
4334

@@ -123,23 +114,15 @@ def _inject_lora_into_fused_moe(self):
123114
self.base_layer.ensure_moe_quant_config_init()
124115
quant_config = self.base_layer.quant_method.moe_quant_config
125116

126-
prepare_finalize = MoEPrepareAndFinalizeNoEP()
127-
m_fused_moe_fn = FusedMoEModularKernel(
128-
prepare_finalize,
129-
self.base_layer.quant_method.select_gemm_impl(
130-
prepare_finalize, self.base_layer
131-
),
132-
self.base_layer.shared_experts,
133-
getattr(self.base_layer, "shared_experts_stream", None),
134-
)
135-
if quant_config.use_mxfp4_w4a16:
136-
assert isinstance(
137-
m_fused_moe_fn.fused_experts, (MarlinExperts, UnfusedOAITritonExperts)
117+
m_fused_moe_fn = (
118+
modular_triton_fused_moe(
119+
quant_config, shared_experts=self.base_layer.shared_experts
138120
)
139-
else:
140-
assert isinstance(
141-
m_fused_moe_fn.fused_experts, (MarlinExperts, TritonExperts)
121+
if not quant_config.use_mxfp4_w4a16
122+
else modular_marlin_fused_moe(
123+
quant_config, shared_experts=self.base_layer.shared_experts
142124
)
125+
)
143126

144127
def fwd_decorator(layer, func):
145128
def wrapper(*args, **kwargs):

0 commit comments

Comments
 (0)