2323from transformers .models .qwen3_vl .configuration_qwen3_vl import \
2424 Qwen3VLVisionConfig
2525from vllm .attention .backends .registry import AttentionBackendEnum
26- from vllm .attention .layer import check_upstream_fa_availability
2726from vllm .model_executor .layers .activation import _ACTIVATION_REGISTRY
2827from vllm .model_executor .layers .quantization import QuantizationConfig
2928from vllm .model_executor .layers .rotary_embedding import get_rope
@@ -133,12 +132,6 @@ def __init__(
133132 dtype = torch .get_default_dtype (),
134133 attn_backend_override = attn_backend_override ,
135134 )
136- use_upstream_fa = False
137- if (self .attn_backend != AttentionBackendEnum .FLASH_ATTN
138- and self .attn_backend != AttentionBackendEnum .ROCM_AITER_FA
139- and check_upstream_fa_availability (torch .get_default_dtype ())):
140- self .attn_backend = AttentionBackendEnum .FLASH_ATTN
141- use_upstream_fa = True
142135
143136 if self .attn_backend not in {
144137 AttentionBackendEnum .FLASH_ATTN ,
@@ -159,7 +152,6 @@ def __init__(
159152 prefix = f"{ prefix } .blocks.{ layer_idx } " ,
160153 use_data_parallel = use_data_parallel ,
161154 attn_backend = self .attn_backend ,
162- use_upstream_fa = use_upstream_fa ,
163155 ) for layer_idx in range (vision_config .depth )
164156 ])
165157
0 commit comments