@@ -201,25 +201,6 @@ def llama_apply_lora_from_file(
201201_lib .llama_apply_lora_from_file .restype = c_int
202202
203203
204- # Returns the KV cache that will contain the context for the
205- # ongoing prediction with the model.
206- def llama_get_kv_cache (ctx : llama_context_p ):
207- return _lib .llama_get_kv_cache (ctx )
208-
209-
210- _lib .llama_get_kv_cache .argtypes = [llama_context_p ]
211- _lib .llama_get_kv_cache .restype = POINTER (c_uint8 )
212-
213-
214- # Returns the size of the KV cache
215- def llama_get_kv_cache_size (ctx : llama_context_p ) -> c_size_t :
216- return _lib .llama_get_kv_cache_size (ctx )
217-
218-
219- _lib .llama_get_kv_cache_size .argtypes = [llama_context_p ]
220- _lib .llama_get_kv_cache_size .restype = c_size_t
221-
222-
223204# Returns the number of tokens in the KV cache
224205def llama_get_kv_cache_token_count (ctx : llama_context_p ) -> c_int :
225206 return _lib .llama_get_kv_cache_token_count (ctx )
@@ -229,17 +210,6 @@ def llama_get_kv_cache_token_count(ctx: llama_context_p) -> c_int:
229210_lib .llama_get_kv_cache_token_count .restype = c_int
230211
231212
232- # Sets the KV cache containing the current context for the model
233- def llama_set_kv_cache (
234- ctx : llama_context_p , kv_cache , n_size : c_size_t , n_token_count : c_int
235- ):
236- return _lib .llama_set_kv_cache (ctx , kv_cache , n_size , n_token_count )
237-
238-
239- _lib .llama_set_kv_cache .argtypes = [llama_context_p , POINTER (c_uint8 ), c_size_t , c_int ]
240- _lib .llama_set_kv_cache .restype = None
241-
242-
243213# Returns the size in bytes of the state (rng, logits, embedding and kv_cache)
244214def llama_get_state_size (ctx : llama_context_p ) -> c_size_t :
245215 return _lib .llama_get_state_size (ctx )
0 commit comments