llamacpp wrong default value passed for f16_kv (langchain-ai#3320)

zatevakhin · web-flow · commit 77bb6c99f7ee · 2023-04-22T18:46:55.000-07:00
Fixes default f16_kv value in llamacpp; corrects incorrect parameter passed. See: https://github.com/abetlen/llama-cpp-python/blob/ba3959eafd38080f3bf3028746406f350a8ef793/llama_cpp/llama.py#L33 Fixes langchain-ai#3241 Fixes langchain-ai#3301
diff --git a/langchain/llms/llamacpp.py b/langchain/llms/llamacpp.py
@@ -31,13 +31,13 @@ class LlamaCpp(LLM):
     """Token context window."""
 
     n_parts: int = Field(-1, alias="n_parts")
-    """Number of parts to split the model into. 
+    """Number of parts to split the model into.
     If -1, the number of parts is automatically determined."""
 
     seed: int = Field(-1, alias="seed")
     """Seed. If -1, a random seed is used."""
 
-    f16_kv: bool = Field(False, alias="f16_kv")
+    f16_kv: bool = Field(True, alias="f16_kv")
     """Use half-precision for key/value cache."""
 
     logits_all: bool = Field(False, alias="logits_all")
@@ -50,7 +50,7 @@ class LlamaCpp(LLM):
     """Force system to keep model in RAM."""
 
     n_threads: Optional[int] = Field(None, alias="n_threads")
-    """Number of threads to use. 
+    """Number of threads to use.
     If None, the number of threads is automatically determined."""
 
     n_batch: Optional[int] = Field(8, alias="n_batch")