Skip to content

Commit 0abc794

Browse files
authored
[caching] Add enable_prompt_embeds and cpu_offload_gb to compile hashes. (#29435)
Signed-off-by: zhxchen17 <zhxchen17@fb.com>
1 parent 4e57c65 commit 0abc794

File tree

2 files changed

+1
-4
lines changed

2 files changed

+1
-4
lines changed

vllm/config/cache.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ class CacheConfig:
144144

145145
kv_offloading_backend: KVOffloadingBackend | None = None
146146
"""The backend to use for KV cache offloading. Supported backends include
147-
'native' (vLLM native CPU offloading), 'lmcache' This option must be used
147+
'native' (vLLM native CPU offloading), 'lmcache' This option must be used
148148
together with kv_offloading_size."""
149149

150150
def compute_hash(self) -> str:
@@ -167,8 +167,6 @@ def compute_hash(self) -> str:
167167
"num_gpu_blocks_override",
168168
"enable_prefix_caching",
169169
"prefix_caching_hash_algo",
170-
# `cpu_offload_gb` does not use `torch.compile` yet.
171-
"cpu_offload_gb",
172170
"cpu_kvcache_space_bytes",
173171
"mamba_page_size_padded",
174172
# Post-init/derived counters

vllm/config/model.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,6 @@ def compute_hash(self) -> str:
345345
"logprobs_mode",
346346
"disable_cascade_attn",
347347
"skip_tokenizer_init",
348-
"enable_prompt_embeds",
349348
"served_model_name",
350349
"config_format",
351350
"hf_token",

0 commit comments

Comments
 (0)