@@ -582,7 +582,7 @@ def eval(self, tokens: Sequence[int]):
582582 Args:
583583 tokens: The list of tokens to evaluate.
584584 """
585- self ._ctx .kv_self_seq_rm (- 1 , self .n_tokens , - 1 )
585+ self ._ctx .kv_cache_seq_rm (- 1 , self .n_tokens , - 1 )
586586 for i in range (0 , len (tokens ), self .n_batch ):
587587 batch = tokens [i : min (len (tokens ), i + self .n_batch )]
588588 n_past = self .n_tokens
@@ -890,7 +890,7 @@ def generate(
890890
891891 if sample_idx < self .n_tokens and token != self ._input_ids [sample_idx ]:
892892 self .n_tokens = sample_idx
893- self ._ctx .kv_self_seq_rm (- 1 , self .n_tokens , - 1 )
893+ self ._ctx .kv_cache_seq_rm (- 1 , self .n_tokens , - 1 )
894894 break
895895
896896 if self .draft_model is not None :
@@ -986,7 +986,7 @@ def embed(
986986 data : Union [List [List [float ]], List [List [List [float ]]]] = []
987987
988988 def decode_batch (seq_sizes : List [int ]):
989- llama_cpp .llama_kv_self_clear (self ._ctx .ctx )
989+ llama_cpp .llama_kv_cache_clear (self ._ctx .ctx )
990990 self ._ctx .decode (self ._batch )
991991 self ._batch .reset ()
992992
@@ -1057,7 +1057,7 @@ def decode_batch(seq_sizes: List[int]):
10571057
10581058 output = data [0 ] if isinstance (input , str ) else data
10591059
1060- llama_cpp .llama_kv_self_clear (self ._ctx .ctx )
1060+ llama_cpp .llama_kv_cache_clear (self ._ctx .ctx )
10611061 self .reset ()
10621062
10631063 if return_count :
0 commit comments