Add support for OpenAI assistants

gustavocidornelas · whoseoyster · commit 2cbd1ebc00f9 · 2024-02-09T15:29:35.000-08:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 ## Unreleased
 
 ### Added
+* Added support for OpenAI assistants. The `llm_monitor` now supports monitoring OpenAI assistant runs with the function `monitor_run`.
 * Added the ability to use the `llm_monitor.OpenAIMonitor` as a context manager.
 * Added `openlayer_inference_pipeline_id` as an optional parameter to the `OpenAIMonitor`. This is an alternative to `openlayer_inference_pipeline_name` and `openlayer_inference_project_name` parameters for identifying the inference pipeline on the platform.
 * Added `monitor_output_only` as an argument to the OpenAI `llm_monitor`. If set to `True`, the monitor will only record the output of the model, and not the input.
diff --git a/openlayer/llm_monitors.py b/openlayer/llm_monitors.py
@@ -477,3 +477,101 @@ def data_config(self) -> Dict[str, any]:
     def data(self) -> pd.DataFrame:
         """Dataframe accumulated after monitoring was switched on."""
         return self.df
+
+    def monitor_thread_run(self, run: openai.types.beta.threads.run.Run) -> None:
+        """Monitor a run from an OpenAI assistant.
+
+        Once the run is completed, the thread data is published to Openlayer,
+        along with the latency, cost, and number of tokens used."""
+        self._type_check_run(run)
+
+        # Do nothing if the run is not completed
+        if run.status != "completed":
+            return
+
+        try:
+            # Extract vars
+            run_vars = self._extract_run_vars(run)
+
+            # Convert thread to prompt
+            messages = self.openai_client.beta.threads.messages.list(
+                thread_id=run_vars["openai_thread_id"], order="asc"
+            )
+            populated_prompt = self.thread_messages_to_prompt(messages)
+            prompt, input_variables = self.format_input(populated_prompt)
+
+            # Data
+            input_data = {
+                **input_variables,
+                **{
+                    "output": prompt[-1]["content"],
+                    "tokens": run_vars["total_num_tokens"],
+                    "latency": run_vars["latency"],
+                    "cost": run_vars["cost"],
+                    "thread_id": run_vars["openai_thread_id"],
+                    "assistant_id": run_vars["openai_assistant_id"],
+                    "timestamp": run_vars["timestamp"],
+                },
+            }
+
+            # Config
+            config = self.data_config.copy()
+            config["inputVariableNames"] = input_variables.keys()
+            config["prompt"] = prompt[:-1]  # Remove the last message (the output)
+            config["timestampColumnName"] = "timestamp"
+
+            self.data_streamer.stream_data(data=input_data, config=config)
+            print("Data published to Openlayer.")
+        # pylint: disable=broad-except
+        except Exception as e:
+            print(f"Failed to monitor run. {e}")
+
+    def _type_check_run(self, run: openai.types.beta.threads.run.Run) -> None:
+        """Validate the run object."""
+        if not isinstance(run, openai.types.beta.threads.run.Run):
+            raise ValueError(f"Expected a Run object, but got {type(run)}.")
+
+    def _extract_run_vars(
+        self, run: openai.types.beta.threads.run.Run
+    ) -> Dict[str, any]:
+        """Extract the variables from the run object."""
+        return {
+            "openai_thread_id": run.thread_id,
+            "openai_assistant_id": run.assistant_id,
+            "latency": (run.completed_at - run.created_at) * 1000,  # Convert to ms
+            "timestamp": run.created_at,  # Convert to ms
+            "num_input_tokens": run.usage["prompt_tokens"],
+            "num_output_tokens": run.usage["completion_tokens"],
+            "total_num_tokens": run.usage["total_tokens"],
+            "cost": self.get_cost_estimate(
+                model=run.model,
+                num_input_tokens=run.usage["prompt_tokens"],
+                num_output_tokens=run.usage["completion_tokens"],
+            ),
+        }
+
+    @staticmethod
+    def thread_messages_to_prompt(
+        messages: List[openai.types.beta.threads.thread_message.ThreadMessage],
+    ) -> List[Dict[str, str]]:
+        """Given list of ThreadMessage, return its contents in the `prompt` format,
+        i.e., a list of dicts with 'role' and 'content' keys."""
+        prompt = []
+        for message in list(messages):
+            role = message.role
+            contents = message.content
+
+            for content in contents:
+                content_type = content.type
+                if content_type == "text":
+                    text_content = content.text.value
+                if content_type == "image_file":
+                    text_content = content.image_file.file_id
+
+                prompt.append(
+                    {
+                        "role": role,
+                        "content": text_content,
+                    }
+                )
+        return prompt
diff --git a/openlayer/services/data_streamer.py b/openlayer/services/data_streamer.py
@@ -129,7 +129,9 @@ def _load_inference_pipeline(self) -> None:
                             name=self.openlayer_project_name,
                             task_type=tasks.TaskType.LLM,
                         )
-                        inference_pipeline = project.create_inference_pipeline()
+                        inference_pipeline = project.create_inference_pipeline(
+                            name=self.openlayer_inference_pipeline_name
+                        )
 
         self.inference_pipeline = inference_pipeline
 

Original file line number	Diff line number	Diff line change
`@@ -129,7 +129,9 @@ def _load_inference_pipeline(self) -> None:`
`129`	`129`	`name=self.openlayer_project_name,`
`130`	`130`	`task_type=tasks.TaskType.LLM,`
`131`	`131`	`)`
`132`		`- inference_pipeline = project.create_inference_pipeline()`
	`132`	`+ inference_pipeline = project.create_inference_pipeline(`
	`133`	`+ name=self.openlayer_inference_pipeline_name`
	`134`	`+ )`
`133`	`135`
`134`	`136`	`self.inference_pipeline = inference_pipeline`
`135`	`137`