fix deprecated

okaris · okaris · commit dae49b8fffe7 · 2025-09-06T07:13:02.000Z
diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
@@ -582,7 +582,7 @@ def eval(self, tokens: Sequence[int]):
         Args:
             tokens: The list of tokens to evaluate.
         """
-        self._ctx.kv_self_seq_rm(-1, self.n_tokens, -1)
+        self._ctx.kv_cache_seq_rm(-1, self.n_tokens, -1)
         for i in range(0, len(tokens), self.n_batch):
             batch = tokens[i : min(len(tokens), i + self.n_batch)]
             n_past = self.n_tokens
@@ -890,7 +890,7 @@ def generate(
 
                 if sample_idx < self.n_tokens and token != self._input_ids[sample_idx]:
                     self.n_tokens = sample_idx
-                    self._ctx.kv_self_seq_rm(-1, self.n_tokens, -1)
+                    self._ctx.kv_cache_seq_rm(-1, self.n_tokens, -1)
                     break
 
             if self.draft_model is not None:
@@ -986,7 +986,7 @@ def embed(
         data: Union[List[List[float]], List[List[List[float]]]] = []
 
         def decode_batch(seq_sizes: List[int]):
-            llama_cpp.llama_kv_self_clear(self._ctx.ctx)
+            llama_cpp.llama_kv_cache_clear(self._ctx.ctx)
             self._ctx.decode(self._batch)
             self._batch.reset()
 
@@ -1057,7 +1057,7 @@ def decode_batch(seq_sizes: List[int]):
 
         output = data[0] if isinstance(input, str) else data
 
-        llama_cpp.llama_kv_self_clear(self._ctx.ctx)
+        llama_cpp.llama_kv_cache_clear(self._ctx.ctx)
         self.reset()
 
         if return_count: