File tree Expand file tree Collapse file tree 3 files changed +24
-11
lines changed Expand file tree Collapse file tree 3 files changed +24
-11
lines changed Original file line number Diff line number Diff line change @@ -86,3 +86,16 @@ def test_max_model_len():
8686 # It can be less if generation finishes due to other reasons (e.g., EOS)
8787 # before reaching the absolute model length limit.
8888 assert num_total_tokens <= max_model_len
89+
90+
91+ def test_log_stats ():
92+ llm = LLM (
93+ model = MODEL_NAME ,
94+ disable_log_stats = False ,
95+ gpu_memory_utilization = 0.10 ,
96+ enforce_eager = True , # reduce test time
97+ )
98+ outputs = llm .generate (PROMPTS , sampling_params = None )
99+
100+ # disable_log_stats is False, every output should have metrics
101+ assert all (output .metrics is not None for output in outputs )
Original file line number Diff line number Diff line change 1414from vllm .lora .request import LoRARequest
1515from vllm .multimodal .inputs import MultiModalPlaceholderDict
1616from vllm .sequence import RequestMetrics
17+ from vllm .v1 .metrics .stats import RequestStateStats
1718
1819logger = init_logger (__name__ )
1920
@@ -108,7 +109,7 @@ def __init__(
108109 prompt_logprobs : Optional [PromptLogprobs ],
109110 outputs : list [CompletionOutput ],
110111 finished : bool ,
111- metrics : Optional [RequestMetrics ] = None ,
112+ metrics : Optional [Union [ RequestMetrics , RequestStateStats ] ] = None ,
112113 lora_request : Optional [LoRARequest ] = None ,
113114 encoder_prompt : Optional [str ] = None ,
114115 encoder_prompt_token_ids : Optional [list [int ]] = None ,
Original file line number Diff line number Diff line change @@ -248,16 +248,15 @@ def _new_request_output(
248248 if prompt_token_ids is None and self .prompt_embeds is not None :
249249 prompt_token_ids = [0 ] * len (self .prompt_embeds )
250250
251- return RequestOutput (
252- request_id = request_id ,
253- prompt = self .prompt ,
254- prompt_token_ids = prompt_token_ids ,
255- prompt_logprobs = prompt_logprobs ,
256- outputs = cast (list [CompletionOutput ], outputs ),
257- finished = finished ,
258- kv_transfer_params = kv_transfer_params ,
259- num_cached_tokens = self .num_cached_tokens ,
260- )
251+ return RequestOutput (request_id = request_id ,
252+ prompt = self .prompt ,
253+ prompt_token_ids = prompt_token_ids ,
254+ prompt_logprobs = prompt_logprobs ,
255+ outputs = cast (list [CompletionOutput ], outputs ),
256+ finished = finished ,
257+ kv_transfer_params = kv_transfer_params ,
258+ num_cached_tokens = self .num_cached_tokens ,
259+ metrics = self .stats )
261260
262261 def _new_completion_output (
263262 self ,
You can’t perform that action at this time.
0 commit comments