Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions fastdeploy/engine/sched/resource_manager_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -648,9 +648,11 @@ def _allocate_decode_and_extend():
break

request = self.waiting[0]
if (self._is_mm_request(request) and self.exist_mm_prefill(scheduled_reqs)) or (
paddle.is_compiled_with_xpu() and self.exist_prefill(scheduled_reqs)
):
if (
not envs.FD_ENABLE_MAX_PREFILL
and self._is_mm_request(request)
and self.exist_mm_prefill(scheduled_reqs)
) or (paddle.is_compiled_with_xpu() and self.exist_prefill(scheduled_reqs)):
break
if request.status == RequestStatus.WAITING:
result = self._waiting_async_process(request)
Expand Down
3 changes: 3 additions & 0 deletions fastdeploy/entrypoints/engine_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
ParameterError,
StatefulSemaphore,
api_server_logger,
to_tensor,
)


Expand Down Expand Up @@ -401,6 +402,8 @@ def _send_task(self, task):
if not self.enable_mm:
self.zmq_client.send_json(task)
else:
if envs.FD_ENABLE_E2W_TENSOR_CONVERT:
to_tensor([task])
self.zmq_client.send_pyobj(task)

def valid_parameters(self, data):
Expand Down
22 changes: 14 additions & 8 deletions fastdeploy/worker/gpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,12 +424,14 @@ def _apply_mm_inputs(self, request: Request, multi_vision_inputs: dict, rope_3d_
multi_vision_inputs["grid_thw_lst"].extend(
inputs["grid_thw"][request.num_image_start : request.num_image_end]
)
multi_vision_inputs["cu_seqlens"].extend(
inputs["vit_seqlen"][request.num_image_start : request.num_image_end]
)
multi_vision_inputs["vit_position_ids_lst"].extend(
inputs["vit_position_ids"][request.num_image_start : request.num_image_end]
)
if hasattr(inputs, "vit_seqlen"):
multi_vision_inputs["cu_seqlens"].extend(
inputs["vit_seqlen"][request.num_image_start : request.num_image_end]
)
if hasattr(inputs, "vit_position_ids"):
multi_vision_inputs["vit_seqlens"].extend(
inputs["vit_seqlen"][request.num_image_start : request.num_image_end]
)
else:
vision_inputs = inputs
if self.encoder_cache:
Expand Down Expand Up @@ -2672,8 +2674,12 @@ def extract_vision_features_ernie(self, inputs: list[paddle.Tensor]) -> paddle.T

def extract_vision_features_qwen(self, inputs: list[paddle.Tensor]) -> paddle.Tensor:
assert inputs["images"] is not None
grid_thw = inputs["grid_thw"]
images = inputs["images"]
if envs.FD_ENABLE_MAX_PREFILL:
images = paddle.concat(inputs["images_lst"]).cast("bfloat16")
grid_thw = paddle.to_tensor(inputs["grid_thw_lst"], dtype="int64")
else:
grid_thw = inputs["grid_thw"]
images = inputs["images"]
with paddle.amp.auto_cast(
True,
custom_black_list=self.amp_black,
Expand Down
Loading