@@ -86,6 +86,66 @@ class OpenAIMonitor:
8686
8787 """
8888
89+ # Last update: 2024-01-05
90+ COST_PER_TOKEN = {
91+ "babbage-002" : {
92+ "input" : 0.0004e-3 ,
93+ "output" : 0.0004e-3 ,
94+ },
95+ "davinci-002" : {
96+ "input" : 0.002e-3 ,
97+ "output" : 0.002e-3 ,
98+ },
99+ "gpt-3.5-turbo" : {
100+ "input" : 0.003e-3 ,
101+ "output" : 0.006e-3 ,
102+ },
103+ "gpt-3.5-turbo-0301" : {
104+ "input" : 0.0015e-3 ,
105+ "output" : 0.002e-3 ,
106+ },
107+ "gpt-3.5-turbo-0613" : {
108+ "input" : 0.0015e-3 ,
109+ "output" : 0.002e-3 ,
110+ },
111+ "gpt-3.5-turbo-1106" : {
112+ "input" : 0.001e-3 ,
113+ "output" : 0.002e-3 ,
114+ },
115+ "gpt-3.5-turbo-16k-0613" : {
116+ "input" : 0.003e-3 ,
117+ "output" : 0.004e-3 ,
118+ },
119+ "gpt-3.5-turbo-instruct" : {
120+ "input" : 0.0015e-3 ,
121+ "output" : 0.002e-3 ,
122+ },
123+ "gpt-4" : {
124+ "input" : 0.03e-3 ,
125+ "output" : 0.06e-3 ,
126+ },
127+ "gpt-4-0314" : {
128+ "input" : 0.03e-3 ,
129+ "output" : 0.06e-3 ,
130+ },
131+ "gpt-4-1106-preview" : {
132+ "input" : 0.01e-3 ,
133+ "output" : 0.03e-3 ,
134+ },
135+ "gpt-4-1106-vision-preview" : {
136+ "input" : 0.01e-3 ,
137+ "output" : 0.03e-3 ,
138+ },
139+ "gpt-4-32k" : {
140+ "input" : 0.06e-3 ,
141+ "output" : 0.12e-3 ,
142+ },
143+ "gpt-4-32k-0314" : {
144+ "input" : 0.06e-3 ,
145+ "output" : 0.12e-3 ,
146+ },
147+ }
148+
89149 def __init__ (
90150 self ,
91151 publish : bool = False ,
@@ -207,15 +267,23 @@ def modified_create_chat_completion(*args, **kwargs) -> str:
207267 prompt , input_data = self .format_input (kwargs ["messages" ])
208268 output_data = response .choices [0 ].message .content .strip ()
209269 num_of_tokens = response .usage .total_tokens
270+ cost = self .get_cost_estimate (
271+ model = kwargs .get ("model" ),
272+ num_input_tokens = response .usage .prompt_tokens ,
273+ num_output_tokens = response .usage .completion_tokens ,
274+ )
275+
210276 config = self .data_config .copy ()
211277 config ["prompt" ] = prompt
212278 config .update ({"inputVariableNames" : list (input_data .keys ())})
279+ config ["costColumnName" ] = "cost"
213280
214281 self ._append_row_to_df (
215282 input_data = input_data ,
216283 output_data = output_data ,
217284 num_of_tokens = num_of_tokens ,
218285 latency = latency ,
286+ cost = cost ,
219287 )
220288
221289 self ._handle_data_publishing (config = config )
@@ -243,15 +311,24 @@ def modified_create_completion(*args, **kwargs):
243311 for input_data , choices in zip (prompts , choices_splits ):
244312 output_data = choices [0 ].text .strip ()
245313 num_of_tokens = int (response .usage .total_tokens / len (prompts ))
314+ cost = self .get_cost_estimate (
315+ model = kwargs .get ("model" ),
316+ num_input_tokens = response .usage .prompt_tokens ,
317+ num_output_tokens = response .usage .completion_tokens ,
318+ )
246319
247320 self ._append_row_to_df (
248321 input_data = {"message" : input_data },
249322 output_data = output_data ,
250323 num_of_tokens = num_of_tokens ,
251324 latency = latency ,
325+ cost = cost ,
252326 )
253327
254- self ._handle_data_publishing ()
328+ config = self .data_config .copy ()
329+ config ["costColumnName" ] = "cost"
330+
331+ self ._handle_data_publishing (config = config )
255332 # pylint: disable=broad-except
256333 except Exception as e :
257334 logger .error ("Failed to monitor completion request. %s" , e )
@@ -323,12 +400,25 @@ def _split_list(lst: List, n_parts: int) -> List[List]:
323400 start = end
324401 return result
325402
403+ def get_cost_estimate (
404+ self , num_input_tokens : int , num_output_tokens : int , model : str
405+ ) -> float :
406+ """Returns the cost estimate for a given model and number of tokens."""
407+ if model not in self .COST_PER_TOKEN :
408+ return None
409+ cost_per_token = self .COST_PER_TOKEN [model ]
410+ return (
411+ cost_per_token ["input" ] * num_input_tokens
412+ + cost_per_token ["output" ] * num_output_tokens
413+ )
414+
326415 def _append_row_to_df (
327416 self ,
328417 input_data : Dict [str , str ],
329418 output_data : str ,
330419 num_of_tokens : int ,
331420 latency : float ,
421+ cost : float ,
332422 ) -> None :
333423 """Appends a row with input/output, number of tokens, and latency to the
334424 df."""
@@ -340,6 +430,7 @@ def _append_row_to_df(
340430 "output" : output_data ,
341431 "tokens" : num_of_tokens ,
342432 "latency" : latency ,
433+ "cost" : cost ,
343434 },
344435 }
345436 ]
@@ -352,7 +443,9 @@ def _append_row_to_df(
352443 # Perform casting
353444 input_columns = [col for col in self .df .columns if col .startswith ("message" )]
354445 casting_dict = {col : object for col in input_columns }
355- casting_dict .update ({"output" : object , "tokens" : int , "latency" : float })
446+ casting_dict .update (
447+ {"output" : object , "tokens" : int , "latency" : float , "cost" : float }
448+ )
356449 self .df = self .df .astype (casting_dict )
357450
358451 def _handle_data_publishing (self , config : Optional [Dict [str , any ]] = None ) -> None :
0 commit comments