diff --git a/vllm/v1/metrics/stats.py b/vllm/v1/metrics/stats.py index 4e9db98db0bc..2e06bcfe0b66 100644 --- a/vllm/v1/metrics/stats.py +++ b/vllm/v1/metrics/stats.py @@ -194,6 +194,9 @@ class RequestStateStats: # Track if this request is corrupted (NaNs in logits) is_corrupted: bool = False + # list of ttit's + inter_token_latencies: list[float] = field(default_factory=list) + @dataclass class FinishedRequestStats: @@ -283,6 +286,7 @@ def update_from_output( else: itl = engine_core_timestamp - req_stats.last_token_ts self.inter_token_latencies_iter.append(itl) + req_stats.inter_token_latencies.append(itl) req_stats.last_token_ts = engine_core_timestamp