File tree Expand file tree Collapse file tree 1 file changed +6
-4
lines changed
vllm/model_executor/layers/quantization Expand file tree Collapse file tree 1 file changed +6
-4
lines changed Original file line number Diff line number Diff line change @@ -196,9 +196,10 @@ def get_quant_method(
196196 # TODO: Add support for MXFP4 Linear Method.
197197 # MXFP4 LinearMethod is available in AMD-Quark, refer to that implementation
198198 # if you are interested in enabling MXFP4 here.
199- logger .warning_once (
199+ logger .debug_once (
200200 "MXFP4 linear layer is not implemented - falling back to "
201- "UnquantizedLinearMethod."
201+ "UnquantizedLinearMethod." ,
202+ scope = "local" ,
202203 )
203204 return UnquantizedLinearMethod ()
204205 elif isinstance (layer , FusedMoE ):
@@ -208,9 +209,10 @@ def get_quant_method(
208209 return Mxfp4MoEMethod (layer .moe_config )
209210 elif isinstance (layer , Attention ):
210211 # TODO: Add support for MXFP4 Attention.
211- logger .warning_once (
212+ logger .debug_once (
212213 "MXFP4 attention layer is not implemented. "
213- "Skipping quantization for this layer."
214+ "Skipping quantization for this layer." ,
215+ scope = "local" ,
214216 )
215217 return None
216218
You can’t perform that action at this time.
0 commit comments