fix mypy

njhill · njhill · commit ec1339c47265 · 2025-11-14T18:02:38.000-08:00
Signed-off-by: Nick Hill &lt;nhill@redhat.com&gt;
diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py
@@ -11,7 +11,7 @@
 from contextlib import ExitStack, contextmanager
 from inspect import isclass, signature
 from logging import DEBUG
-from typing import Any, TypeVar
+from typing import Any, TypeVar, cast
 
 import msgspec
 import zmq
@@ -382,12 +382,17 @@ def step_with_batch_queue(
         deferred_scheduler_output = None
         if self.scheduler.has_requests():
             scheduler_output = self.scheduler.schedule()
-            future = self.model_executor.execute_model(scheduler_output, non_block=True)
+            exec_future = self.model_executor.execute_model(
+                scheduler_output, non_block=True
+            )
             if not self.ec_producer:
                 model_executed = scheduler_output.total_num_scheduled_tokens > 0
 
-            if model_executed:
-                future.add_done_callback(self._log_err_callback(scheduler_output))
+            if not model_executed:
+                # No sampling required (no requests scheduled).
+                future = cast(Future[ModelRunnerOutput], exec_future)
+            else:
+                exec_future.add_done_callback(self._log_err_callback(scheduler_output))
 
                 if not scheduler_output.pending_structured_output_tokens:
                     # We aren't waiting for any tokens, get any grammar output