@@ -101,7 +101,6 @@ struct ModelInputParams {
101101 params.block_tables = safe_to (block_tables, device, true );
102102 params.kv_seq_lens_vec = kv_seq_lens_vec;
103103 params.q_seq_lens_vec = q_seq_lens_vec;
104- params.decode_seq_range = decode_seq_range;
105104
106105 params.input_embedding = safe_to (input_embedding, device);
107106
@@ -153,7 +152,8 @@ struct ModelInputParams {
153152 << " , q_max_seq_len is " << q_max_seq_len;
154153 LOG (INFO) << " ModelInputParams: kv_seq_lens_vec is " << kv_seq_lens_vec;
155154 LOG (INFO) << " ModelInputParams: q_seq_lens_vec is " << q_seq_lens_vec;
156- LOG (INFO) << " ModelInputParams: decode_seq_range is " << decode_seq_range;
155+ LOG (INFO) << " ModelInputParams: batch_forward_type is "
156+ << batch_forward_type.to_string ();
157157 print_tensor (kv_seq_lens, " ModelInputParams: kv_seq_lens" , 4 );
158158 print_tensor (q_seq_lens, " ModelInputParams: q_seq_lens" , 4 );
159159 print_tensor (new_cache_slots, " ModelInputParams: new_cache_slots" , 4 );
@@ -172,15 +172,7 @@ struct ModelInputParams {
172172 torch::Tensor kv_seq_lens;
173173 std::vector<int > kv_seq_lens_vec;
174174 std::vector<int > q_seq_lens_vec;
175- // Range of decode sequence indices in the batch [start, end].
176- // Decode sequences are identified by q_seq_lens == 1,
177- // prefill sequences by q_seq_lens > 1 .
178- // Used to determine whether to use prefill_node_ or
179- // decode_node_ in NPU layers
180- // Values: {-1, -1} if no decode requests (all prefill),
181- // {0, batch_size-1} if all decode requests,
182- // {start_idx, end_idx} if mixed prefill/decode requests
183- std::pair<int , int > decode_seq_range;
175+
184176 // max length for qkv.
185177 int32_t kv_max_seq_len = 0 ;
186178 int32_t q_max_seq_len = 0 ;
0 commit comments