chore: move cost estimation logic to the backend

gustavocidornelas · whoseoyster · commit b9e113481e57 · 2024-07-05T12:14:18.000-07:00
diff --git a/src/openlayer/lib/constants.py b/src/openlayer/lib/constants.py
diff --git a/src/openlayer/lib/integrations/anthropic_tracer.py b/src/openlayer/lib/integrations/anthropic_tracer.py
@@ -23,7 +23,6 @@ def trace_anthropic(
     - end_time: The time when the completion was received.
     - latency: The time it took to generate the completion.
     - tokens: The total number of tokens used to generate the completion.
-    - cost: The estimated cost of the completion.
     - prompt_tokens: The number of tokens in the prompt.
     - completion_tokens: The number of tokens in the completion.
     - model: The model used to generate the completion.
@@ -152,15 +151,12 @@ def stream_chunks(
                 collected_function_call["inputs"] = json.loads(collected_function_call["inputs"])
                 output_data = collected_function_call
 
-            cost = 0
-
             trace_args = create_trace_args(
                 end_time=end_time,
                 inputs={"prompt": kwargs["messages"]},
                 output=output_data,
                 latency=latency,
                 tokens=num_of_completion_tokens,
-                cost=cost,
                 prompt_tokens=num_of_prompt_tokens,
                 completion_tokens=num_of_completion_tokens,
                 model=kwargs.get("model"),
@@ -206,14 +202,12 @@ def handle_non_streaming_create(
     # Try to add step to the trace
     try:
         output_data = parse_non_streaming_output_data(response)
-        cost = 0
         trace_args = create_trace_args(
             end_time=end_time,
             inputs={"prompt": kwargs["messages"]},
             output=output_data,
             latency=(end_time - start_time) * 1000,
             tokens=response.usage.input_tokens + response.usage.output_tokens,
-            cost=cost,
             prompt_tokens=response.usage.input_tokens,
             completion_tokens=response.usage.output_tokens,
             model=response.model,
@@ -275,7 +269,6 @@ def create_trace_args(
     output: str,
     latency: float,
     tokens: int,
-    cost: float,
     prompt_tokens: int,
     completion_tokens: int,
     model: str,
@@ -291,7 +284,6 @@ def create_trace_args(
         "output": output,
         "latency": latency,
         "tokens": tokens,
-        "cost": cost,
         "prompt_tokens": prompt_tokens,
         "completion_tokens": completion_tokens,
         "model": model,
diff --git a/src/openlayer/lib/integrations/langchain_callback.py b/src/openlayer/lib/integrations/langchain_callback.py
@@ -7,7 +7,6 @@
 from langchain import schema as langchain_schema
 from langchain.callbacks.base import BaseCallbackHandler
 
-from .. import constants
 from ..tracing import tracer
 
 LANGCHAIN_TO_OPENLAYER_PROVIDER_MAP = {"openai-chat": "OpenAI"}
@@ -27,7 +26,6 @@ def __init__(self, **kwargs: Any) -> None:
         self.provider: str = None
         self.model: Optional[str] = None
         self.model_parameters: Dict[str, Any] = None
-        self.cost: Optional[float] = None
         self.prompt_tokens: int = None
         self.completion_tokens: int = None
         self.total_tokens: int = None
@@ -87,10 +85,6 @@ def on_llm_end(self, response: langchain_schema.LLMResult, **kwargs: Any) -> Any
         if response.llm_output and "token_usage" in response.llm_output:
             self.prompt_tokens = response.llm_output["token_usage"].get("prompt_tokens", 0)
             self.completion_tokens = response.llm_output["token_usage"].get("completion_tokens", 0)
-            self.cost = self._get_cost_estimate(
-                num_input_tokens=self.prompt_tokens,
-                num_output_tokens=self.completion_tokens,
-            )
             self.total_tokens = response.llm_output["token_usage"].get("total_tokens", 0)
 
         for generations in response.generations:
@@ -99,13 +93,6 @@ def on_llm_end(self, response: langchain_schema.LLMResult, **kwargs: Any) -> Any
 
         self._add_to_trace()
 
-    def _get_cost_estimate(self, num_input_tokens: int, num_output_tokens: int) -> float:
-        """Returns the cost estimate for a given model and number of tokens."""
-        if self.model not in constants.OPENAI_COST_PER_TOKEN:
-            return None
-        cost_per_token = constants.OPENAI_COST_PER_TOKEN[self.model]
-        return cost_per_token["input"] * num_input_tokens + cost_per_token["output"] * num_output_tokens
-
     def _add_to_trace(self) -> None:
         """Adds to the trace."""
         name = PROVIDER_TO_STEP_NAME.get(self.provider, "Chat Completion Model")
@@ -114,7 +101,6 @@ def _add_to_trace(self) -> None:
             provider=self.provider,
             inputs={"prompt": self.prompt},
             output=self.output,
-            cost=self.cost,
             tokens=self.total_tokens,
             latency=self.latency,
             start_time=self.start_time,
diff --git a/src/openlayer/lib/integrations/openai_tracer.py b/src/openlayer/lib/integrations/openai_tracer.py
@@ -8,7 +8,6 @@
 
 import openai
 
-from .. import constants
 from ..tracing import tracer
 
 logger = logging.getLogger(__name__)
@@ -24,7 +23,6 @@ def trace_openai(
     - end_time: The time when the completion was received.
     - latency: The time it took to generate the completion.
     - tokens: The total number of tokens used to generate the completion.
-    - cost: The estimated cost of the completion.
     - prompt_tokens: The number of tokens in the prompt.
     - completion_tokens: The number of tokens in the completion.
     - model: The model used to generate the completion.
@@ -161,20 +159,13 @@ def stream_chunks(
             else:
                 collected_function_call["arguments"] = json.loads(collected_function_call["arguments"])
                 output_data = collected_function_call
-            completion_cost = estimate_cost(
-                model=kwargs.get("model"),
-                prompt_tokens=0,
-                completion_tokens=(num_of_completion_tokens if num_of_completion_tokens else 0),
-                is_azure_openai=is_azure_openai,
-            )
 
             trace_args = create_trace_args(
                 end_time=end_time,
                 inputs={"prompt": kwargs["messages"]},
                 output=output_data,
                 latency=latency,
                 tokens=num_of_completion_tokens,
-                cost=completion_cost,
                 prompt_tokens=0,
                 completion_tokens=num_of_completion_tokens,
                 model=kwargs.get("model"),
@@ -196,21 +187,6 @@ def stream_chunks(
             )
 
 
-def estimate_cost(
-    prompt_tokens: int,
-    completion_tokens: int,
-    model: str,
-    is_azure_openai: bool = False,
-) -> float:
-    """Returns the cost estimate for a given OpenAI model and number of tokens."""
-    if is_azure_openai and model in constants.AZURE_OPENAI_COST_PER_TOKEN:
-        cost_per_token = constants.AZURE_OPENAI_COST_PER_TOKEN[model]
-    elif model in constants.OPENAI_COST_PER_TOKEN:
-        cost_per_token = constants.OPENAI_COST_PER_TOKEN[model]
-        return cost_per_token["input"] * prompt_tokens + cost_per_token["output"] * completion_tokens
-    return None
-
-
 def get_model_parameters(kwargs: Dict[str, Any]) -> Dict[str, Any]:
     """Gets the model parameters from the kwargs."""
     return {
@@ -234,7 +210,6 @@ def create_trace_args(
     output: str,
     latency: float,
     tokens: int,
-    cost: float,
     prompt_tokens: int,
     completion_tokens: int,
     model: str,
@@ -250,7 +225,6 @@ def create_trace_args(
         "output": output,
         "latency": latency,
         "tokens": tokens,
-        "cost": cost,
         "prompt_tokens": prompt_tokens,
         "completion_tokens": completion_tokens,
         "model": model,
@@ -300,19 +274,12 @@ def handle_non_streaming_create(
     # Try to add step to the trace
     try:
         output_data = parse_non_streaming_output_data(response)
-        cost = estimate_cost(
-            model=response.model,
-            prompt_tokens=response.usage.prompt_tokens,
-            completion_tokens=response.usage.completion_tokens,
-            is_azure_openai=is_azure_openai,
-        )
         trace_args = create_trace_args(
             end_time=end_time,
             inputs={"prompt": kwargs["messages"]},
             output=output_data,
             latency=(end_time - start_time) * 1000,
             tokens=response.usage.total_tokens,
-            cost=cost,
             prompt_tokens=response.usage.prompt_tokens,
             completion_tokens=response.usage.completion_tokens,
             model=response.model,
@@ -373,7 +340,7 @@ def trace_openai_assistant_thread_run(client: openai.OpenAI, run: "openai.types.
     """Trace a run from an OpenAI assistant.
 
     Once the run is completed, the thread data is published to Openlayer,
-    along with the latency, cost, and number of tokens used."""
+    along with the latency, and number of tokens used."""
     _type_check_run(run)
 
     # Do nothing if the run is not completed
@@ -420,11 +387,6 @@ def _extract_run_vars(run: "openai.types.beta.threads.run.Run") -> Dict[str, any
         "completion_tokens": run.usage.completion_tokens,
         "tokens": run.usage.total_tokens,
         "model": run.model,
-        "cost": estimate_cost(
-            model=run.model,
-            prompt_tokens=run.usage.prompt_tokens,
-            completion_tokens=run.usage.completion_tokens,
-        ),
     }
 
 
diff --git a/src/openlayer/lib/tracing/tracer.py b/src/openlayer/lib/tracing/tracer.py
@@ -306,7 +306,7 @@ def post_process_trace(
     else:
         input_variable_names = []
 
-    processed_steps = bubble_up_costs_and_tokens(trace_obj.to_dict())
+    processed_steps = trace_obj.to_dict()
 
     trace_data = {
         "inferenceTimestamp": root_step.start_time,
@@ -322,34 +322,3 @@ def post_process_trace(
         trace_data.update(input_variables)
 
     return trace_data, input_variable_names
-
-
-def bubble_up_costs_and_tokens(trace_dict: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
-    """Adds the cost and number of tokens of nested steps to their parent steps."""
-
-    def add_step_costs_and_tokens(step: Dict[str, Any]) -> Tuple[float, int]:
-        step_cost = step_tokens = 0
-
-        if "cost" in step and step["cost"] is not None:
-            step_cost += step["cost"]
-        if "tokens" in step and step["tokens"] is not None:
-            step_tokens += step["tokens"]
-
-        # Recursively add costs and tokens from nested steps
-        for nested_step in step.get("steps", []):
-            nested_cost, nested_tokens = add_step_costs_and_tokens(nested_step)
-            step_cost += nested_cost
-            step_tokens += nested_tokens
-
-        if "steps" in step:
-            if step_cost > 0 and "cost" not in step:
-                step["cost"] = step_cost
-            if step_tokens > 0 and "tokens" not in step:
-                step["tokens"] = step_tokens
-
-        return step_cost, step_tokens
-
-    for root_step_dict in trace_dict:
-        add_step_costs_and_tokens(root_step_dict)
-
-    return trace_dict