Skip to content

Commit 7aeac97

Browse files
authored
[https://nvbugs/5622938][fix] Use async send_requests_to_next_pp. (#9041)
Signed-off-by: Yuxian Qiu <142763828+yuxianq@users.noreply.github.com>
1 parent 6bf4e59 commit 7aeac97

File tree

1 file changed

+6
-9
lines changed

1 file changed

+6
-9
lines changed

tensorrt_llm/_torch/pyexecutor/executor_request_queue.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ def __init__(self, dist: Distributed, enable_attention_dp: bool,
6666
self.start_times = {}
6767
self.active = True
6868
self.batch_wait_timeout_ms = batch_wait_timeout_ms
69+
self.send_requests_handler = None
6970

7071
# State tracking
7172
self.num_fetch_requests = 0
@@ -609,15 +610,11 @@ def _broadcast_new_requests(
609610

610611
if not self.dist.is_last_pp_rank:
611612
with nvtx_range("send_requests_to_next_pp"):
612-
if self._disable_mpi:
613-
isend_payload = self.dist.isend_object(
614-
payloads,
615-
self.dist.next_pp_rank,
616-
tag,
617-
)
618-
isend_payload.wait()
619-
else:
620-
self.dist.send_object(payloads, self.dist.next_pp_rank, tag)
613+
if self.send_requests_handler is not None:
614+
with nvtx_range("wait_prev_send_requests_handler"):
615+
self.send_requests_handler.wait()
616+
self.send_requests_handler = self.dist.isend_object(
617+
payloads, self.dist.next_pp_rank, tag)
621618

622619
return payloads
623620

0 commit comments

Comments
 (0)