@@ -26,7 +26,7 @@ use crate::{
2626 ChatCompletion , ChatCompletionChoice , ChatCompletionChunk , ChatCompletionComplete ,
2727 ChatCompletionDelta , ChatCompletionLogprob , ChatCompletionLogprobs , ChatCompletionTopLogprob ,
2828 ChatRequest , Chunk , CompatGenerateRequest , Completion , CompletionComplete , CompletionFinal ,
29- CompletionRequest , CompletionType , DeltaToolCall , Function , Prompt , Tool ,
29+ CompletionRequest , CompletionType , DeltaToolCall , Function , Prompt , Tool , EnergyMonitor ,
3030} ;
3131use crate :: { ChatTokenizeResponse , JsonSchemaConfig } ;
3232use crate :: { FunctionDefinition , HubPreprocessorConfig , ToolCall , ToolChoice } ;
@@ -293,6 +293,7 @@ pub(crate) async fn generate_internal(
293293 span : tracing:: Span ,
294294) -> Result < ( HeaderMap , u32 , Json < GenerateResponse > ) , ( StatusCode , Json < ErrorResponse > ) > {
295295 let start_time = Instant :: now ( ) ;
296+ let start_energy = EnergyMonitor :: total_energy_mj ( ) ;
296297 metrics:: counter!( "tgi_request_count" ) . increment ( 1 ) ;
297298
298299 // Do not long ultra long inputs, like image payloads.
@@ -317,6 +318,12 @@ pub(crate) async fn generate_internal(
317318 }
318319 _ => ( infer. generate ( req) . await ?, None ) ,
319320 } ;
321+
322+ let end_energy = EnergyMonitor :: total_energy_mj ( ) ;
323+ let energy_mj = match ( start_energy, end_energy) {
324+ ( Some ( start) , Some ( end) ) => Some ( end. saturating_sub ( start) ) ,
325+ _ => None ,
326+ } ;
320327
321328 // Token details
322329 let input_length = response. _input_length ;
@@ -354,6 +361,7 @@ pub(crate) async fn generate_internal(
354361 seed : response. generated_text . seed ,
355362 best_of_sequences,
356363 top_tokens : response. top_tokens ,
364+ energy_mj,
357365 } )
358366 }
359367 false => None ,
@@ -515,6 +523,7 @@ async fn generate_stream_internal(
515523 impl Stream < Item = Result < StreamResponse , InferError > > ,
516524) {
517525 let start_time = Instant :: now ( ) ;
526+ let start_energy = EnergyMonitor :: total_energy_mj ( ) ;
518527 metrics:: counter!( "tgi_request_count" ) . increment ( 1 ) ;
519528
520529 tracing:: debug!( "Input: {}" , req. inputs) ;
@@ -590,13 +599,19 @@ async fn generate_stream_internal(
590599 queued,
591600 top_tokens,
592601 } => {
602+ let end_energy = EnergyMonitor :: total_energy_mj( ) ;
603+ let energy_mj = match ( start_energy, end_energy) {
604+ ( Some ( start) , Some ( end) ) => Some ( end. saturating_sub( start) ) ,
605+ _ => None ,
606+ } ;
593607 // Token details
594608 let details = match details {
595609 true => Some ( StreamDetails {
596610 finish_reason: generated_text. finish_reason,
597611 generated_tokens: generated_text. generated_tokens,
598612 seed: generated_text. seed,
599613 input_length,
614+ energy_mj,
600615 } ) ,
601616 false => None ,
602617 } ;
0 commit comments