From 64808d38b2e7c8853962c69c520eb0df7a725531 Mon Sep 17 00:00:00 2001 From: rainyfly <1435317881@qq.com> Date: Sun, 30 Nov 2025 16:20:29 +0800 Subject: [PATCH] [Feature] Enable prefix caching for mtp --- fastdeploy/engine/args_utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/fastdeploy/engine/args_utils.py b/fastdeploy/engine/args_utils.py index 3f7952fc5aa..a98429b1845 100644 --- a/fastdeploy/engine/args_utils.py +++ b/fastdeploy/engine/args_utils.py @@ -486,8 +486,6 @@ def __post_init__(self): self.tokenizer = self.model if self.splitwise_role == "decode": self.enable_prefix_caching = False - if self.speculative_config is not None: - self.enable_prefix_caching = False if not current_platform.is_cuda() and not current_platform.is_xpu() and not current_platform.is_intel_hpu(): self.enable_prefix_caching = False # if self.dynamic_load_weight: