1+ # Copyright (c) 2023 Intel Corporation
2+ #
3+ # Licensed under the Apache License, Version 2.0 (the "License");
4+ # you may not use this file except in compliance with the License.
5+ # You may obtain a copy of the License at
6+ #
7+ # http://www.apache.org/licenses/LICENSE-2.0
8+ #
9+ # Unless required by applicable law or agreed to in writing, software
10+ # distributed under the License is distributed on an "AS IS" BASIS,
11+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+ # See the License for the specific language governing permissions and
13+ # limitations under the License.
14+
115import numpy as np
216import re
317import sys
418import os
19+
20+
521def calculate_percentile (data , percentile ):
622 return np .percentile (data , percentile , method = "closest_observation" )
723
24+
825def calculate_mean (data ):
926 return np .mean (data )
1027
28+
1129def parse_output_file (file_path ):
1230 predictions = []
1331 with open (file_path , 'r' , encoding = 'UTF-8' , errors = 'ignore' ) as file :
@@ -17,6 +35,8 @@ def parse_output_file(file_path):
1735 prediction_time = float (match .group (1 )) # Assuming the prediction time is in the second column
1836 predictions .append (prediction_time )
1937 return predictions
38+
39+
2040def parse_memory_file (memory_file ):
2141 memory_values = []
2242 if os .path .exists (memory_file ):
@@ -44,14 +64,15 @@ def parse_memory_file(memory_file):
4464 batch_size = sys .argv [5 ]
4565 model_input = sys .argv [6 ]
4666 model_output = sys .argv [7 ]
47- memory_file = os .environ .get ("WORKING_DIR " ) + "/memory.txt"
67+ memory_file = os .environ .get ("WORKSPACE " ) + "/memory.txt"
4868 predictions = parse_output_file (output_file )
69+ assert len (predictions ) > 0 , "Model has no ouput tokens!"
4970 first_token_latency = predictions [0 ]
5071 p90 = calculate_percentile (predictions , 90 )
5172 p99 = calculate_percentile (predictions , 99 )
5273 latency_mean = calculate_mean (predictions [1 :])
5374 total_latency = np .sum (predictions )
54-
75+
5576 print ("P90: {:.2f} ms" .format (p90 ))
5677 print ("P99: {:.2f} ms" .format (p99 ))
5778 print ("average_latency: {:.2f} ms" .format (latency_mean ))
@@ -63,9 +84,10 @@ def parse_memory_file(memory_file):
6384 memory_mean = calculate_mean (top_50_percent )
6485
6586 print ("Memory Mean (Top 50%): {:.2f}" .format (memory_mean ))
66- log_file = os .environ .get ("WORKING_DIR" ) + "/cpp_graph_summary.log"
67- link = os .environ .get ("WORKING_DIR" ) + os .path .basename (output_file )
68- with open (log_file , 'a' ) as f :
87+ log_file = os .environ .get ("WORKSPACE" ) + "/cpp_graph_summary.log"
88+ log_prefix = os .environ .get ("log_prefix" )
89+ link = str (log_prefix ) + os .path .basename (output_file )
90+ with open (log_file , 'a' ) as f :
6991 f .write ("engine," )
7092 f .write ("latency," )
7193 f .write (model + "," )
@@ -81,8 +103,8 @@ def parse_memory_file(memory_file):
81103 f .write (link + "," )
82104 f .write ("{:.2f}," .format (p90 ))
83105 f .write ("{:.2f}," .format (p99 ))
84- #f.write(",latency:")
85- #for latency in predictions:
106+ # f.write(",latency:")
107+ # for latency in predictions:
86108 # f.write(",{:.2f}".format(latency))
87109 f .write ("\n " )
88110 f .close ()
0 commit comments