We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent bbc2b08 commit 3c4e6a4Copy full SHA for 3c4e6a4
tensorrt_llm/_torch/attention_backend/sparse/dsa.py
@@ -432,6 +432,7 @@ def __post_init__(self):
432
dtype=torch.int32,
433
capture_graph=capture_graph,
434
)
435
+ # TODO: remove these expanded buffers when fp8_paged_mqa_logits supports MTP > 1.
436
self.kv_lens_expanded_cuda = self.get_empty(
437
self.cuda_graph_buffers,
438
(self.max_num_sequences * (1 + self.max_draft_tokens), ),
0 commit comments