diff --git a/fastdeploy/engine/args_utils.py b/fastdeploy/engine/args_utils.py index 3f7952fc5aa..a98429b1845 100644 --- a/fastdeploy/engine/args_utils.py +++ b/fastdeploy/engine/args_utils.py @@ -486,8 +486,6 @@ def __post_init__(self): self.tokenizer = self.model if self.splitwise_role == "decode": self.enable_prefix_caching = False - if self.speculative_config is not None: - self.enable_prefix_caching = False if not current_platform.is_cuda() and not current_platform.is_xpu() and not current_platform.is_intel_hpu(): self.enable_prefix_caching = False # if self.dynamic_load_weight: