From 2ae96645b7ed4ac19c1e1857bdb17aa428d0b8aa Mon Sep 17 00:00:00 2001 From: Jayden Yu Date: Fri, 14 Nov 2025 13:54:59 -0800 Subject: [PATCH] Add TTIT tracking array to RequestStateStats to trunk Summary: This allows us to track TTIT's internally at token generation time and with our own tracking instead of using Prometheus (vLLM's officially supported way to track TTIT) Furthermore, TTFT is saved to RequestStateStats but not TTIT <- weird discrepancy Test Plan: N/A (this only adds a field) Differential Revision: D86552981 --- vllm/v1/metrics/stats.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vllm/v1/metrics/stats.py b/vllm/v1/metrics/stats.py index 4e9db98db0bc..2e06bcfe0b66 100644 --- a/vllm/v1/metrics/stats.py +++ b/vllm/v1/metrics/stats.py @@ -194,6 +194,9 @@ class RequestStateStats: # Track if this request is corrupted (NaNs in logits) is_corrupted: bool = False + # list of ttit's + inter_token_latencies: list[float] = field(default_factory=list) + @dataclass class FinishedRequestStats: @@ -283,6 +286,7 @@ def update_from_output( else: itl = engine_core_timestamp - req_stats.last_token_ts self.inter_token_latencies_iter.append(itl) + req_stats.inter_token_latencies.append(itl) req_stats.last_token_ts = engine_core_timestamp