[bug fix]kvstar delta kvcache block select bugfix (#341)

saki-daisuki · web-flow · commit a1d9058e65c7 · 2025-11-05T16:33:29.000+08:00
* kvstar delta kvcache block select bugfix

* clean code

* suitable inner attn_begin api

* suitable inner attn_finish api
diff --git a/ucm/sparse/kvstar/multistep.py b/ucm/sparse/kvstar/multistep.py
@@ -1,7 +1,7 @@
 import enum
 import math
 from dataclasses import dataclass, field
-from typing import Dict, List, Union
+from typing import Dict, List, Optional, Union
 
 import torch
 from vllm.config import VllmConfig
@@ -352,6 +352,7 @@ def attention_begin(
         key: torch.Tensor,
         value: torch.Tensor,
         forward_context: ForwardContext,
+        phase: Optional[str] = None,
     ) -> None:
         index_in_batch = self.req_meta.index_in_batch
         query_start_loc = self.req_meta.query_start_loc
@@ -446,6 +447,9 @@ def load_retrieve_result_async(self, load_step, candidate_swap_vllm_block_ids):
         retrieve_result_hash_list = self.step_group_retrieve_result.get(
             need_retrieve_record
         ).copy()
+        fixed_origin_candidate_swap_vllm_block_ids = (
+            candidate_swap_vllm_block_ids.copy()
+        )
         if need_retrieve_record != "prefill" or load_step == 1:
             if len(self.layer_wise_pre_swap_area_block_hashes) == 0:
                 self.layer_wise_pre_swap_area_block_hashes = {
@@ -456,7 +460,7 @@ def load_retrieve_result_async(self, load_step, candidate_swap_vllm_block_ids):
                 }
             else:
                 already_matched_record = {}
-                for logic_blk_id in candidate_swap_vllm_block_ids:
+                for logic_blk_id in fixed_origin_candidate_swap_vllm_block_ids:
                     if (
                         logic_blk_id in self.layer_wise_pre_swap_area_block_hashes
                         and self.layer_wise_pre_swap_area_block_hashes[logic_blk_id]
@@ -540,6 +544,7 @@ def attention_finished(
         value: torch.Tensor,
         attn_output: torch.Tensor,
         forward_context: ForwardContext,
+        phase: Optional[str] = None,
     ) -> None:
         if self.req_meta.stage != ReqStage.PREFILL:
             if (