Completes OPEN-5861 Client's llm_monitors should support pricing using models from Azure

gustavocidornelas · whoseoyster · commit 5e29153c62c5 · 2024-03-26T10:19:09.000-07:00
diff --git a/openlayer/constants.py b/openlayer/constants.py
@@ -107,3 +107,21 @@
         "output": 0.12e-3,
     },
 }
+# Last update: 2024-03-26
+AZURE_OPENAI_COST_PER_TOKEN = {
+    "babbage-002": {
+        "input": 0.0004e-3,
+        "output": 0.0004e-3,
+    },
+    "davinci-002": {
+        "input": 0.002e-3,
+        "output": 0.002e-3,
+    },
+    "gpt-35-turbo": {"input": 0.0005e-3, "output": 0.0015e-3},
+    "gpt-35-turbo-0125": {"input": 0.0005e-3, "output": 0.0015e-3},
+    "gpt-35-turbo-instruct": {"input": 0.0015e-3, "output": 0.002e-3},
+    "gpt-4-turbo": {"input": 0.01e-3, "output": 0.03e-3},
+    "gpt-4-turbo-vision": {"input": 0.01e-3, "output": 0.03e-3},
+    "gpt-4-8k": {"input": 0.03e-3, "output": 0.06e-3},
+    "gpt-4-32k": {"input": 0.06e-3, "output": 0.12e-3},
+}
diff --git a/openlayer/llm_monitors.py b/openlayer/llm_monitors.py
@@ -165,12 +165,12 @@ def modified_create_chat_completion(*args, **kwargs) -> str:
                     else:
                         output_data = None
                     cost = self.get_cost_estimate(
-                        model=kwargs.get("model"),
+                        model=response.model,
                         num_input_tokens=response.usage.prompt_tokens,
                         num_output_tokens=response.usage.completion_tokens,
                     )
 
-                    tracer.add_openai_chat_completion_step_to_trace(
+                    self._add_to_trace(
                         end_time=end_time,
                         inputs={
                             "prompt": kwargs["messages"],
@@ -181,10 +181,9 @@ def modified_create_chat_completion(*args, **kwargs) -> str:
                         cost=cost,
                         prompt_tokens=response.usage.prompt_tokens,
                         completion_tokens=response.usage.completion_tokens,
-                        model=kwargs.get("model"),
+                        model=response.model,
                         model_parameters=kwargs.get("model_parameters"),
                         raw_output=response.model_dump(),
-                        provider="OpenAI",
                     )
                 # pylint: disable=broad-except
                 except Exception as e:
@@ -269,7 +268,7 @@ def stream_chunks():
                                 ),
                             )
 
-                            tracer.add_openai_chat_completion_step_to_trace(
+                            self._add_to_trace(
                                 end_time=end_time,
                                 inputs={
                                     "prompt": kwargs["messages"],
@@ -290,7 +289,6 @@ def stream_chunks():
                                         else None
                                     )
                                 },
-                                provider="OpenAI",
                             )
                         # pylint: disable=broad-except
                         except Exception as e:
@@ -318,12 +316,12 @@ def modified_create_completion(*args, **kwargs):
                     output_data = choices[0].text.strip()
                     num_of_tokens = int(response.usage.total_tokens / len(prompts))
                     cost = self.get_cost_estimate(
-                        model=kwargs.get("model"),
+                        model=response.model,
                         num_input_tokens=response.usage.prompt_tokens,
                         num_output_tokens=response.usage.completion_tokens,
                     )
 
-                    tracer.add_openai_chat_completion_step_to_trace(
+                    self._add_to_trace(
                         end_time=end_time,
                         inputs={
                             "prompt": [{"role": "user", "content": input_data}],
@@ -334,10 +332,9 @@ def modified_create_completion(*args, **kwargs):
                         cost=cost,
                         prompt_tokens=response.usage.prompt_tokens,
                         completion_tokens=response.usage.completion_tokens,
-                        model=kwargs.get("model"),
+                        model=response.model,
                         model_parameters=kwargs.get("model_parameters"),
                         raw_output=response.model_dump(),
-                        provider="OpenAI",
                     )
             # pylint: disable=broad-except
             except Exception as e:
@@ -347,6 +344,13 @@ def modified_create_completion(*args, **kwargs):
 
         return modified_create_completion
 
+    def _add_to_trace(self, **kwargs) -> None:
+        """Add a step to the trace."""
+        tracer.add_openai_chat_completion_step_to_trace(
+            **kwargs,
+            provider="OpenAI",
+        )
+
     @staticmethod
     def _split_list(lst: List, n_parts: int) -> List[List]:
         """Split a list into n_parts."""
@@ -486,3 +490,32 @@ def thread_messages_to_prompt(
                     }
                 )
         return prompt
+
+
+class AzureOpenAIMonitor(OpenAIMonitor):
+    def __init__(
+        self,
+        client=None,
+    ) -> None:
+        super().__init__(client)
+
+    @staticmethod
+    def get_cost_estimate(
+        num_input_tokens: int, num_output_tokens: int, model: str
+    ) -> float:
+        """Returns the cost estimate for a given model and number of tokens."""
+        if model not in constants.AZURE_OPENAI_COST_PER_TOKEN:
+            return None
+        cost_per_token = constants.AZURE_OPENAI_COST_PER_TOKEN[model]
+        return (
+            cost_per_token["input"] * num_input_tokens
+            + cost_per_token["output"] * num_output_tokens
+        )
+
+    def _add_to_trace(self, **kwargs) -> None:
+        """Add a step to the trace."""
+        tracer.add_openai_chat_completion_step_to_trace(
+            **kwargs,
+            name="Azure OpenAI Chat Completion",
+            provider="Azure OpenAI",
+        )
diff --git a/openlayer/tracing/tracer.py b/openlayer/tracing/tracer.py
@@ -101,7 +101,8 @@ def create_step(
 def add_openai_chat_completion_step_to_trace(**kwargs) -> None:
     """Adds an OpenAI chat completion step to the trace."""
     with create_step(
-        step_type=enums.StepType.CHAT_COMPLETION, name="OpenAI Chat Completion"
+        step_type=enums.StepType.CHAT_COMPLETION,
+        name=kwargs.get("name", "OpenAI Chat Completion"),
     ) as step:
         step.log(**kwargs)