We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent d87042d commit f201d10Copy full SHA for f201d10
tensorrt_llm/_torch/attention_backend/sparse/dsa.py
@@ -432,6 +432,7 @@ def __post_init__(self):
432
dtype=torch.int32,
433
capture_graph=capture_graph,
434
)
435
+ # TODO: remove these expanded buffers when fp8_paged_mqa_logits supports MTP > 1.
436
self.kv_lens_expanded_cuda = self.get_empty(
437
self.cuda_graph_buffers,
438
(self.max_num_sequences * (1 + self.max_draft_tokens), ),
0 commit comments