Skip to content

Commit 1a56722

Browse files
authored
[None][fix] Remove unnecessary attention workspace memory check (#9064)
Signed-off-by: Jiagan Cheng <jiaganc@nvidia.com>
1 parent fd703fb commit 1a56722

File tree

1 file changed

+1
-10
lines changed

1 file changed

+1
-10
lines changed

cpp/tensorrt_llm/thop/attentionOp.cpp

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -816,16 +816,6 @@ void attention(torch::Tensor q, std::optional<torch::Tensor> k, std::optional<to
816816
= runner->getWorkspaceSize(*op, num_tokens, max_attention_window_size, num_gen_tokens, max_blocks_per_sequence);
817817
TLLM_LOG_TRACE("Expected workspace size is %ld bytes", workspace_size);
818818

819-
if (workspace_size >= (16l << 30))
820-
{
821-
auto const [free_mem, total_mem] = tensorrt_llm::common::getDeviceMemoryInfo(false);
822-
if (workspace_size >= static_cast<int64_t const>(free_mem))
823-
{
824-
throw std::runtime_error("attention workspace size " + std::to_string(workspace_size)
825-
+ " bytes, exceeds available CUDA memory " + std::to_string(free_mem) + " bytes");
826-
}
827-
}
828-
829819
torch::Tensor workspace;
830820
if (workspace_.has_value())
831821
{
@@ -839,6 +829,7 @@ void attention(torch::Tensor q, std::optional<torch::Tensor> k, std::optional<to
839829
}
840830
else
841831
{
832+
TLLM_LOG_TRACE("Allocate new attention workspace with size %ld bytes", workspace_size);
842833
workspace = torch::empty({workspace_size}, torch::dtype(torch::kByte).device(qkv_or_q.device()));
843834
}
844835

0 commit comments

Comments
 (0)