@@ -523,6 +523,8 @@ def record_recv_token(self, cur_time: float = None):
523523 cur_time = time .time () if cur_time is None else cur_time
524524 self .engine_recv_latest_token_time = cur_time
525525 self .llm_engine_recv_latest_token_timestamp = cur_time
526+ self .model_execute_time = cur_time - self .arrival_time
527+ self .model_forward_time = cur_time - self .inference_start_time
526528
527529 def record_decode_recv_second_token (self ):
528530 cur_time = time .time ()
@@ -540,7 +542,6 @@ def cal_cost_time(self):
540542 self .first_token_time = self .engine_recv_first_token_time - self .inference_start_time
541543 self .time_in_queue = time .time () - self .preprocess_end_time
542544 self .preprocess_cost_time = self .preprocess_end_time - self .preprocess_start_time
543- self .model_execute_time = self .engine_recv_first_token_time - self .inference_start_time
544545 self .request_start_time = self .arrival_time
545546
546547 # for compatibility with old metrics
@@ -623,10 +624,12 @@ def add(self, next_output: RequestOutput) -> None:
623624 self .outputs .index = next_output .outputs .index
624625 self .outputs .token_ids .extend (next_output .outputs .token_ids )
625626
626- if next_output .metrics .arrival_time is not None and self .metrics .inference_start_time is not None :
627- self .metrics .model_forward_time = next_output .metrics .arrival_time - self .metrics .inference_start_time
628- if next_output .metrics .arrival_time is not None and self .metrics .arrival_time is not None :
629- self .metrics .model_execute_time = next_output .metrics .arrival_time - self .metrics .arrival_time
627+ if next_output .metrics .model_forward_time is not None :
628+ self .metrics .model_forward_time = next_output .metrics .model_forward_time
629+ if next_output .metrics .model_execute_time is not None :
630+ self .metrics .model_execute_time = next_output .metrics .model_execute_time
631+ if next_output .metrics .engine_recv_latest_token_time is not None :
632+ self .metrics .engine_recv_latest_token_time = next_output .metrics .engine_recv_latest_token_time
630633 if next_output .outputs .top_logprobs is not None :
631634 self .outputs .top_logprobs .logprob_token_ids .extend (next_output .outputs .top_logprobs .logprob_token_ids )
632635 self .outputs .top_logprobs .logprobs .extend (next_output .outputs .top_logprobs .logprobs )
0 commit comments