Add verbose attribute to LLM models for debugging purpose.

qiuosier · qiuosier · commit 6e39272c1b44 · 2023-10-31T10:08:26.000-04:00
diff --git a/ads/llm/langchain/plugins/base.py b/ads/llm/langchain/plugins/base.py
@@ -44,6 +44,26 @@ class BaseLLM(LLM):
     stop: Optional[List[str]] = None
     """Stop words to use when generating. Model output is cut off at the first occurrence of any of these substrings."""
 
+    verbose: int = 0
+    """Verbose level for debugging purpose.
+    The LLM implementation should print out debugging information base on the verbose level:
+    0 - No debugging information
+    1 - Print prompt and response(completion) from LLM
+    2 - In addition to prompt and response(completion) from LLM, also print the parameters (payloads).
+    """
+
+    def _print_request(self, prompt, params):
+        if self.verbose >= 1:
+            print(f"LLM API Request:\n{prompt}")
+        elif self.verbose == 2:
+            print(f"LLM API Parameters:\n{params}")
+
+    def _print_response(self, completion, response):
+        if self.verbose == 1:
+            print(f"LLM API Completion:\n{completion}")
+        elif self.verbose == 2:
+            print(f"LLM API Response:\n{response}")
+
 
 class GenerativeAiClientModel(BaseModel):
     client: Any  #: :meta private:
diff --git a/ads/llm/langchain/plugins/llm_gen_ai.py b/ads/llm/langchain/plugins/llm_gen_ai.py
@@ -146,6 +146,7 @@ def _call(
         """
 
         params = self._invocation_params(stop, **kwargs)
+        self._print_request(prompt, params)
 
         try:
             response = (
@@ -163,7 +164,9 @@ def _call(
             )
             raise
 
-        return self._process_response(response, params.get("num_generations", 1))
+        completion = self._process_response(response, params.get("num_generations", 1))
+        self._print_response(completion, response)
+        return completion
 
     def _process_response(self, response: Any, num_generations: int = 1) -> str:
         if self.task == Task.SUMMARY_TEXT:
diff --git a/ads/llm/langchain/plugins/llm_md.py b/ads/llm/langchain/plugins/llm_md.py
@@ -94,8 +94,11 @@ def _call(
         """
         params = self._invocation_params(stop, **kwargs)
         body = self._construct_json_body(prompt, params)
+        self._print_request(prompt, params)
         response = self.send_request(data=body, endpoint=self.endpoint)
-        return self._process_response(response)
+        completion = self._process_response(response)
+        self._print_response(completion, response)
+        return completion
 
     def send_request(
         self,
@@ -134,9 +137,7 @@ def send_request(
         request_kwargs["headers"] = header
         request_kwargs["auth"] = self.auth.get("signer")
         timeout = kwargs.pop("timeout", DEFAULT_TIME_OUT)
-        response = requests.post(
-            endpoint, timeout=timeout, **request_kwargs, **kwargs
-        )
+        response = requests.post(endpoint, timeout=timeout, **request_kwargs, **kwargs)
 
         try:
             response.raise_for_status()
@@ -205,7 +206,7 @@ def _construct_json_body(self, prompt, params):
         }
 
     def _process_response(self, response_json: dict):
-        return str(response_json.get("generated_text", response_json))
+        return str(response_json.get("generated_text", response_json)) + "\n"
 
 
 class ModelDeploymentVLLM(ModelDeploymentLLM):