File tree Expand file tree Collapse file tree 1 file changed +5
-1
lines changed Expand file tree Collapse file tree 1 file changed +5
-1
lines changed Original file line number Diff line number Diff line change 99import vllm .envs as envs
1010from vllm .logger import init_logger
1111from vllm .utils import DEFAULT_MAX_NUM_BATCHED_TOKENS
12- from vllm .v1 .attention .backends .utils import set_kv_cache_layout
1312
1413from .interface import DeviceCapability , Platform , PlatformEnum , _Backend
1514
@@ -164,11 +163,16 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
164163 vllm_config .scheduler_config .max_num_batched_tokens = max (
165164 vllm_config .scheduler_config .max_model_len ,
166165 DEFAULT_MAX_NUM_BATCHED_TOKENS )
166+ from vllm .v1 .attention .backends .utils import set_kv_cache_layout
167167
168168 set_kv_cache_layout ("NHD" )
169169 logger .info ("Setting VLLM_KV_CACHE_LAYOUT to 'NHD' for XPU; "
170170 "only NHD layout is supported by XPU attention kernels." )
171171
172+ @classmethod
173+ def support_hybrid_kv_cache (cls ) -> bool :
174+ return True
175+
172176 @classmethod
173177 def is_pin_memory_available (cls ):
174178 return True
You can’t perform that action at this time.
0 commit comments