[caching] Add enable_prompt_embeds and cpu_offload_gb to compile hashes. (#29435)

zhxchen17 · web-flow · commit 0abc79482a6d · 2025-11-25T21:46:41.000Z
Signed-off-by: zhxchen17 &lt;zhxchen17@fb.com&gt;
diff --git a/vllm/config/cache.py b/vllm/config/cache.py
@@ -144,7 +144,7 @@ class CacheConfig:
 
     kv_offloading_backend: KVOffloadingBackend | None = None
     """The backend to use for KV cache offloading. Supported backends include
-    'native' (vLLM native CPU offloading), 'lmcache' This option must be used 
+    'native' (vLLM native CPU offloading), 'lmcache' This option must be used
     together with kv_offloading_size."""
 
     def compute_hash(self) -> str:
@@ -167,8 +167,6 @@ def compute_hash(self) -> str:
             "num_gpu_blocks_override",
             "enable_prefix_caching",
             "prefix_caching_hash_algo",
-            # `cpu_offload_gb` does not use `torch.compile` yet.
-            "cpu_offload_gb",
             "cpu_kvcache_space_bytes",
             "mamba_page_size_padded",
             # Post-init/derived counters
diff --git a/vllm/config/model.py b/vllm/config/model.py
@@ -345,7 +345,6 @@ def compute_hash(self) -> str:
             "logprobs_mode",
             "disable_cascade_attn",
             "skip_tokenizer_init",
-            "enable_prompt_embeds",
             "served_model_name",
             "config_format",
             "hf_token",