File tree Expand file tree Collapse file tree 1 file changed +6
-0
lines changed Expand file tree Collapse file tree 1 file changed +6
-0
lines changed Original file line number Diff line number Diff line change 1616from vllm .platforms import current_platform
1717from vllm .sequence import ExecuteModelRequest , IntermediateTensors
1818from vllm .utils import get_ip
19+ from vllm .v1 .outputs import AsyncModelRunnerOutput
1920from vllm .v1 .worker .worker_base import WorkerWrapperBase
2021
2122if TYPE_CHECKING :
@@ -142,6 +143,11 @@ def execute_model_ray(
142143 # but may still be finished requests.
143144 assert not output or not output .req_ids
144145 output = scheduler_output , None
146+ # Ensure outputs crossing Ray compiled DAG are serializable.
147+ # AsyncModelRunnerOutput holds CUDA events and cannot be
148+ # pickled.
149+ if isinstance (output , AsyncModelRunnerOutput ):
150+ output = output .get_output ()
145151 return output
146152
147153 def override_env_vars (self , vars : Dict [str , str ]):
You can’t perform that action at this time.
0 commit comments