88
99import openai
1010
11- from .. import constants
1211from ..tracing import tracer
1312
1413logger = logging .getLogger (__name__ )
@@ -24,7 +23,6 @@ def trace_openai(
2423 - end_time: The time when the completion was received.
2524 - latency: The time it took to generate the completion.
2625 - tokens: The total number of tokens used to generate the completion.
27- - cost: The estimated cost of the completion.
2826 - prompt_tokens: The number of tokens in the prompt.
2927 - completion_tokens: The number of tokens in the completion.
3028 - model: The model used to generate the completion.
@@ -161,20 +159,13 @@ def stream_chunks(
161159 else :
162160 collected_function_call ["arguments" ] = json .loads (collected_function_call ["arguments" ])
163161 output_data = collected_function_call
164- completion_cost = estimate_cost (
165- model = kwargs .get ("model" ),
166- prompt_tokens = 0 ,
167- completion_tokens = (num_of_completion_tokens if num_of_completion_tokens else 0 ),
168- is_azure_openai = is_azure_openai ,
169- )
170162
171163 trace_args = create_trace_args (
172164 end_time = end_time ,
173165 inputs = {"prompt" : kwargs ["messages" ]},
174166 output = output_data ,
175167 latency = latency ,
176168 tokens = num_of_completion_tokens ,
177- cost = completion_cost ,
178169 prompt_tokens = 0 ,
179170 completion_tokens = num_of_completion_tokens ,
180171 model = kwargs .get ("model" ),
@@ -196,21 +187,6 @@ def stream_chunks(
196187 )
197188
198189
199- def estimate_cost (
200- prompt_tokens : int ,
201- completion_tokens : int ,
202- model : str ,
203- is_azure_openai : bool = False ,
204- ) -> float :
205- """Returns the cost estimate for a given OpenAI model and number of tokens."""
206- if is_azure_openai and model in constants .AZURE_OPENAI_COST_PER_TOKEN :
207- cost_per_token = constants .AZURE_OPENAI_COST_PER_TOKEN [model ]
208- elif model in constants .OPENAI_COST_PER_TOKEN :
209- cost_per_token = constants .OPENAI_COST_PER_TOKEN [model ]
210- return cost_per_token ["input" ] * prompt_tokens + cost_per_token ["output" ] * completion_tokens
211- return None
212-
213-
214190def get_model_parameters (kwargs : Dict [str , Any ]) -> Dict [str , Any ]:
215191 """Gets the model parameters from the kwargs."""
216192 return {
@@ -234,7 +210,6 @@ def create_trace_args(
234210 output : str ,
235211 latency : float ,
236212 tokens : int ,
237- cost : float ,
238213 prompt_tokens : int ,
239214 completion_tokens : int ,
240215 model : str ,
@@ -250,7 +225,6 @@ def create_trace_args(
250225 "output" : output ,
251226 "latency" : latency ,
252227 "tokens" : tokens ,
253- "cost" : cost ,
254228 "prompt_tokens" : prompt_tokens ,
255229 "completion_tokens" : completion_tokens ,
256230 "model" : model ,
@@ -300,19 +274,12 @@ def handle_non_streaming_create(
300274 # Try to add step to the trace
301275 try :
302276 output_data = parse_non_streaming_output_data (response )
303- cost = estimate_cost (
304- model = response .model ,
305- prompt_tokens = response .usage .prompt_tokens ,
306- completion_tokens = response .usage .completion_tokens ,
307- is_azure_openai = is_azure_openai ,
308- )
309277 trace_args = create_trace_args (
310278 end_time = end_time ,
311279 inputs = {"prompt" : kwargs ["messages" ]},
312280 output = output_data ,
313281 latency = (end_time - start_time ) * 1000 ,
314282 tokens = response .usage .total_tokens ,
315- cost = cost ,
316283 prompt_tokens = response .usage .prompt_tokens ,
317284 completion_tokens = response .usage .completion_tokens ,
318285 model = response .model ,
@@ -373,7 +340,7 @@ def trace_openai_assistant_thread_run(client: openai.OpenAI, run: "openai.types.
373340 """Trace a run from an OpenAI assistant.
374341
375342 Once the run is completed, the thread data is published to Openlayer,
376- along with the latency, cost, and number of tokens used."""
343+ along with the latency, and number of tokens used."""
377344 _type_check_run (run )
378345
379346 # Do nothing if the run is not completed
@@ -420,11 +387,6 @@ def _extract_run_vars(run: "openai.types.beta.threads.run.Run") -> Dict[str, any
420387 "completion_tokens" : run .usage .completion_tokens ,
421388 "tokens" : run .usage .total_tokens ,
422389 "model" : run .model ,
423- "cost" : estimate_cost (
424- model = run .model ,
425- prompt_tokens = run .usage .prompt_tokens ,
426- completion_tokens = run .usage .completion_tokens ,
427- ),
428390 }
429391
430392
0 commit comments