@@ -162,6 +162,8 @@ int main(int argc, char ** argv) {
162162 return 1 ;
163163 }
164164
165+ llama_kv_cache * kv = llama_get_kv_cache (ctx);
166+
165167 const llama_vocab * vocab = llama_model_get_vocab (model);
166168
167169 LOG_INF (" %s: llama threadpool init, n_threads = %d\n " , __func__, (int ) params.cpuparams .n_threads );
@@ -306,7 +308,7 @@ int main(int argc, char ** argv) {
306308 }
307309
308310 // remove any "future" tokens that we might have inherited from the previous session
309- llama_kv_cache_seq_rm (ctx , -1 , n_matching_session_tokens, -1 );
311+ llama_kv_cache_seq_rm (kv , -1 , n_matching_session_tokens, -1 );
310312 }
311313
312314 LOG_DBG (" recalculate the cached logits (check): embd_inp.size() %zu, n_matching_session_tokens %zu, embd_inp.size() %zu, session_tokens.size() %zu\n " ,
@@ -543,8 +545,8 @@ int main(int argc, char ** argv) {
543545 LOG_DBG (" context full, swapping: n_past = %d, n_left = %d, n_ctx = %d, n_keep = %d, n_discard = %d\n " ,
544546 n_past, n_left, n_ctx, params.n_keep , n_discard);
545547
546- llama_kv_cache_seq_rm (ctx , 0 , params.n_keep , params.n_keep + n_discard);
547- llama_kv_cache_seq_add (ctx , 0 , params.n_keep + n_discard, n_past, -n_discard);
548+ llama_kv_cache_seq_rm (kv , 0 , params.n_keep , params.n_keep + n_discard);
549+ llama_kv_cache_seq_add (kv , 0 , params.n_keep + n_discard, n_past, -n_discard);
548550
549551 n_past -= n_discard;
550552
@@ -567,9 +569,9 @@ int main(int argc, char ** argv) {
567569 LOG_DBG (" div: [%6d, %6d] / %6d -> [%6d, %6d]\n " , ga_i + ib*bd, ga_i + ib*bd + ga_w, ga_n, (ga_i + ib*bd)/ga_n, (ga_i + ib*bd + ga_w)/ga_n);
568570 LOG_DBG (" shift: [%6d, %6d] + %6d -> [%6d, %6d]\n " , ga_i + ib*bd + ga_w, n_past + ib*bd, dd, ga_i + ib*bd + ga_w + dd, n_past + ib*bd + dd);
569571
570- llama_kv_cache_seq_add (ctx , 0 , ga_i, n_past, ib*bd);
571- llama_kv_cache_seq_div (ctx , 0 , ga_i + ib*bd, ga_i + ib*bd + ga_w, ga_n);
572- llama_kv_cache_seq_add (ctx , 0 , ga_i + ib*bd + ga_w, n_past + ib*bd, dd);
572+ llama_kv_cache_seq_add (kv , 0 , ga_i, n_past, ib*bd);
573+ llama_kv_cache_seq_div (kv , 0 , ga_i + ib*bd, ga_i + ib*bd + ga_w, ga_n);
574+ llama_kv_cache_seq_add (kv , 0 , ga_i + ib*bd + ga_w, n_past + ib*bd, dd);
573575
574576 n_past -= bd;
575577
0 commit comments