File tree Expand file tree Collapse file tree 2 files changed +13
-0
lines changed Expand file tree Collapse file tree 2 files changed +13
-0
lines changed Original file line number Diff line number Diff line change @@ -152,6 +152,7 @@ def updata_req_state(
152152 self .calc_block_table = []
153153 self .calc_repre_slot_mapping = []
154154 if len (self .repre_slot_mapping ) > len (self .blocks ):
155+ self .topk_buf_tmp = None
155156 self .repre_slot_mapping = self .repre_slot_mapping [: len (self .blocks )]
156157
157158 def _get_sparse_and_free_block (self ):
@@ -265,6 +266,8 @@ def get_model_input(
265266 input_batch .req_id_to_index [req_id ],
266267 )
267268 for new_req in scheduler_output .scheduled_new_reqs :
269+ if new_req .req_id in self .gsa_stats :
270+ del self .gsa_stats [new_req .req_id ]
268271 self .gsa_stats [new_req .req_id ] = GSAReqStat (new_req .req_id )
269272 self .gsa_stats [new_req .req_id ].add_req_new (
270273 scheduler_output .num_scheduled_tokens [new_req .req_id ],
Original file line number Diff line number Diff line change @@ -517,6 +517,16 @@ def _no_gsa_input_deal(
517517 self .is_gsa_req_id [req_id ]
518518 and gsa_metadata .gsa_stats [req_id ].topk_buf_tmp != None
519519 ):
520+ if (torch .max (gsa_metadata .gsa_stats [req_id ].topk_buf_tmp ) >
521+ (len (self .block_table_list_bs [index ]) - 1 )
522+ ):
523+ self .gsa_seq_len [:, bs_index ] = gsa_metadata .gsa_stats [
524+ req_id
525+ ].get_seq_len ()
526+ self .use_block_table [
527+ :, bs_index , : len (gsa_metadata .gsa_stats [req_id ].blocks )
528+ ] = one_block_table
529+ continue
520530 remain_slot = (
521531 gsa_metadata .gsa_stats [req_id ].get_seq_len () % self .block_size
522532 )
You can’t perform that action at this time.
0 commit comments