diff --git a/tensorrt_llm/_torch/pyexecutor/executor_request_queue.py b/tensorrt_llm/_torch/pyexecutor/executor_request_queue.py index 7ba9943451a..4c415bb4dea 100644 --- a/tensorrt_llm/_torch/pyexecutor/executor_request_queue.py +++ b/tensorrt_llm/_torch/pyexecutor/executor_request_queue.py @@ -50,7 +50,7 @@ class ExecutorRequestQueue: def __init__(self, dist: Distributed, enable_attention_dp: bool, max_batch_size: int, max_beam_width: int, max_num_active_requests: int, enable_iter_perf_stats: bool, - batch_wait_timeout_ms: float, is_disaggregated: bool): + batch_wait_timeout_ms: float): self.dist = dist self.request_queue: queue.Queue[RequestQueueItem] = queue.Queue() self.waiting_queue: deque[RequestQueueItem] = deque() @@ -59,7 +59,6 @@ def __init__(self, dist: Distributed, enable_attention_dp: bool, self.max_batch_size = max_batch_size self.max_beam_width = max_beam_width self.max_num_active_requests = max_num_active_requests - self.is_disaggregated = is_disaggregated self.enqueue_lock = threading.Lock() self.next_request_id = max_batch_size self.enable_iter_perf_stats = enable_iter_perf_stats diff --git a/tensorrt_llm/_torch/pyexecutor/py_executor.py b/tensorrt_llm/_torch/pyexecutor/py_executor.py index 3eb9181f6c1..0272f0ed8fb 100644 --- a/tensorrt_llm/_torch/pyexecutor/py_executor.py +++ b/tensorrt_llm/_torch/pyexecutor/py_executor.py @@ -254,7 +254,6 @@ def __init__(self, max_num_active_requests=self.max_num_active_requests, enable_iter_perf_stats=self.enable_iter_perf_stats, batch_wait_timeout_ms=self.batch_wait_timeout_ms, - is_disaggregated=kv_cache_transceiver is not None, ) self.executor_request_queue.set_exclude_last_generation_logits( self.disable_overlap_scheduler, self.dist.pp_size) diff --git a/tests/unittest/_torch/executor/test_executor_request_queue.py b/tests/unittest/_torch/executor/test_executor_request_queue.py index e54c2c5bf7c..c09347e033d 100644 --- a/tests/unittest/_torch/executor/test_executor_request_queue.py +++ b/tests/unittest/_torch/executor/test_executor_request_queue.py @@ -42,8 +42,7 @@ def executor_queue(mock_dist): max_beam_width=1, max_num_active_requests=16, enable_iter_perf_stats=True, - batch_wait_timeout_ms=0.0, - is_disaggregated=False) + batch_wait_timeout_ms=0.0) @pytest.fixture @@ -55,8 +54,7 @@ def integration_queue(mock_dist): max_beam_width=2, max_num_active_requests=8, enable_iter_perf_stats=True, - batch_wait_timeout_ms=0.0, - is_disaggregated=False) + batch_wait_timeout_ms=0.0) def test_executor_queue_init(executor_queue, mock_dist): @@ -65,7 +63,6 @@ def test_executor_queue_init(executor_queue, mock_dist): assert not executor_queue.enable_attention_dp assert executor_queue.max_beam_width == 1 assert executor_queue.max_num_active_requests == 16 - assert not executor_queue.is_disaggregated assert executor_queue.next_request_id == 8 assert executor_queue.enable_iter_perf_stats assert executor_queue.active @@ -124,8 +121,7 @@ def test_merge_helix_requests_with_padding(mock_dist): max_beam_width=1, max_num_active_requests=16, enable_iter_perf_stats=True, - batch_wait_timeout_ms=0.0, - is_disaggregated=True) + batch_wait_timeout_ms=0.0) # Mock _should_exclude_last_generation_logits. with patch.object(executor_queue, @@ -181,8 +177,7 @@ def test_merge_helix_requests_without_padding(mock_dist): max_beam_width=1, max_num_active_requests=16, enable_iter_perf_stats=True, - batch_wait_timeout_ms=0.0, - is_disaggregated=True) + batch_wait_timeout_ms=0.0) # Mock _should_exclude_last_generation_logits. with patch.object(executor_queue, @@ -235,8 +230,7 @@ def test_merge_helix_requests_insufficient_blocks_error(mock_dist): max_beam_width=1, max_num_active_requests=16, enable_iter_perf_stats=True, - batch_wait_timeout_ms=0.0, - is_disaggregated=True) + batch_wait_timeout_ms=0.0) with pytest.raises( ValueError, @@ -598,8 +592,7 @@ def attention_dp_queue(mock_dist_attention_dp): max_beam_width=2, max_num_active_requests=8, enable_iter_perf_stats=True, - batch_wait_timeout_ms=0.0, - is_disaggregated=False) + batch_wait_timeout_ms=0.0) # Initialize all_ranks_num_active_requests return queue