Skip to content

Commit 4681136

Browse files
authored
[bugfix] fix gsa coredump (#265)
* [bugfix] fix gsa coredump * [bugfix] fix build_sparse_meta param lost * clean code
1 parent ad5b285 commit 4681136

File tree

3 files changed

+6
-16
lines changed

3 files changed

+6
-16
lines changed

ucm/sparse/gsa/gsa.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -497,9 +497,7 @@ def init_topk_cal(
497497
self.gsa_offload_ops = gsa_offload_ops.CalKpreAndTopk(
498498
self.layer_num, block_size, MAX_BS, att_num_heads, head_size
499499
)
500-
self.gsa_offload_ops.set_kpre_method_param(
501-
int(max_model_len / block_size) * MAX_BS, kv_num_heads, 1
502-
)
500+
self.gsa_offload_ops.set_kpre_method_param(kv_num_heads, 1)
503501
self.gsa_offload_ops.set_kpre_cache(prefetch_engine.kpre_caches)
504502
self.is_cal_kpre = [False] * self.layer_num
505503
self.gsa_q_cache = torch.zeros(
@@ -868,10 +866,7 @@ def execute_finished(self):
868866
)
869867

870868
def build_sparse_meta(
871-
self,
872-
scheduler_output: SchedulerOutput,
873-
requests,
874-
input_batch,
869+
self, scheduler_output: SchedulerOutput, requests, input_batch, attn_metadata
875870
) -> None:
876871
self.gsa_metadata = self.build_gsa_metadata(
877872
scheduler_output, requests, input_batch

ucm/sparse/gsa/offload_ops/include/cal_kpre_and_topk.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ class __attribute__((visibility("hidden"))) CalKpreAndTopk
4646
public:
4747
CalKpreAndTopk(uint32_t layerNum, uint32_t blockSize, uint32_t maxBs, uint32_t numHeads, uint32_t headSize);
4848
~CalKpreAndTopk();
49-
void SetKpreMethodParam(uint32_t maxBlockNum, uint32_t numHeads, uint32_t numKpre);
49+
void SetKpreMethodParam(uint32_t numHeads, uint32_t numKpre);
5050
void SetKpreCache(std::vector<torch::Tensor>& kpreCache);
5151
void SetTopkCache(std::vector<torch::Tensor>& topkCache, std::vector<uint32_t>& topkLens);
5252
void SetCommonParam(std::vector<uint32_t>& calTopkIdx, std::vector<bool>& isDecode);

ucm/sparse/gsa/offload_ops/src/cal_kpre_and_topk.cpp

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,15 +27,10 @@ CalKpreAndTopk::CalKpreAndTopk(uint32_t layerNum, uint32_t blockSize, uint32_t m
2727
m_count = 0;
2828
}
2929

30-
void CalKpreAndTopk::SetKpreMethodParam(uint32_t maxBlockNum, uint32_t numHeads, uint32_t numKpre)
30+
void CalKpreAndTopk::SetKpreMethodParam(uint32_t numHeads, uint32_t numKpre)
3131
{
32-
// m_kNumHeads = numHeads;
33-
// m_numKpre = numKpre;
34-
// auto optionsForKCache = torch::TensorOptions().device("cpu").dtype(torch::kFloat32);
35-
// for (uint32_t i = 0; i < m_layerNum; i++) {
36-
// torch::Tensor layerKCache = torch::zeros({maxBlockNum, m_kNumHeads, m_blockSize, m_headSize}, optionsForKCache);
37-
// m_kCache.push_back(layerKCache);
38-
// }
32+
m_kNumHeads = numHeads;
33+
m_numKpre = numKpre;
3934
}
4035

4136
void CalKpreAndTopk::SetKpreCache(std::vector<torch::Tensor>& kpreCache)

0 commit comments

Comments
 (0)