1- import os
21import datetime
2+ import os
33import re
44
55import numpy as np
1010REPORT_NAME = "artifacts/output/rq4.csv"
1111TEX_REPORT_NAME = "artifacts/output/rq4.tex"
1212
13+ CALC_NAMES = ['Vanilla' , 'Improved' ]
14+
15+ propertyShortNames = {
16+ "TestSmartByteSerializer#canRoundTripBytes" : 'byte' ,
17+ "TestSmartIntegerSerializer#canRoundTripIntegers" : 'int' ,
18+ "TestSmartListSerializer#canRoundTripSerializableLists" : 'list' ,
19+ "TestSmartLongSerializer#canRoundTripLongs" : 'long' ,
20+ "TestSmartOptionalSerializer#canRoundTripPresentOptionals" : 'optionals' ,
21+ "TestSmartPairSerializer#canRoundTripPairs" : 'pair' ,
22+ "TestSmartShortSerializer#canRoundTripShort" : 'short' ,
23+ "TestSmartStringSerializer#canRoundTripStrings" : 'string' ,
24+ "TestSmartListSerializer#canRoundTripSerializableListsWithGenerator" : 'list*' ,
25+ "GenTestFormat#dataRoundTrip" : 'data' ,
26+ "GenTestFormat#messageRoundTrip" : 'message' ,
27+ "GenTestFormat#primitiveRoundTrip" : 'primitive' ,
28+ "CharClassesQuickcheck#addSet" : 'addSet' ,
29+ "CharClassesQuickcheck#addSingle" : 'addSingle' ,
30+ "CharClassesQuickcheck#addSingleSingleton" : 'addSingleton' ,
31+ "CharClassesQuickcheck#addString" : 'addString' ,
32+ "StateSetQuickcheck#addStateDoesNotRemove" : 'add' ,
33+ "StateSetQuickcheck#containsElements" : 'contains' ,
34+ "StateSetQuickcheck#removeAdd" : 'remove'
35+ }
36+
37+ row_count = 1
38+
1339def obtain_stats_directories (results_directory : str ) -> list [str ]:
1440 directory_tree = [x for x in os .walk (results_directory )] # os.walk returns a tuple with structure (directory, subdirectories, files)
1541 return directory_tree [0 ][1 ]
@@ -45,14 +71,15 @@ def evaluate_directories(project_name: str, results_directory: str, directories:
4571def retrieve_time_elapsed (directory_path : str , valid_htmls : list [str ]) -> dict [str , str ]:
4672 times_elapsed_dict = {}
4773 for html_file in valid_htmls :
48- property_name = html_file .replace (".html" , "" ).replace ("#" , "-" )
74+ property_name = html_file .replace (".html" , "" )
75+ property_short_name = propertyShortNames [property_name ]
4976 file_path = directory_path + html_file
5077 with open (file_path ) as f :
5178 contents = f .read ()
5279 time_elapsed_regrex = re .search ('Total Time Elapsed: (.+?) seconds' , contents )
5380 if time_elapsed_regrex :
5481 time_elapsed = time_elapsed_regrex .group (1 )
55- times_elapsed_dict [property_name ] = round (float (time_elapsed ), 2 )
82+ times_elapsed_dict [property_short_name ] = round (float (time_elapsed ), 2 )
5683 return times_elapsed_dict
5784
5885def generate_report_stats (stat_values : dict [str , dict ]) -> dict [str , str ]:
@@ -72,8 +99,8 @@ def generate_report_stats(stat_values: dict[str, dict]) -> dict[str, str]:
7299 property_stats_dict = {}
73100 for key , val in property_dict .items ():
74101 np_array = np .array (val )
75- mean = round (np_array .mean (), 2 )
76- standard_dev = round (np_array .std (), 2 )
102+ mean = '{:.2f}' . format ( round (np_array .mean (), 2 ) )
103+ standard_dev = '{:.2f}' . format ( round (np_array .std (), 2 ) )
77104 property_stats_dict [key ] = str (mean ) + " \u00B1 " + str (standard_dev )
78105 return property_stats_dict
79106
@@ -83,11 +110,25 @@ def generate_project_report(project_name: str, final_stats: dict[str, str], fina
83110 return final_report_dict
84111
85112
113+ def generate_project_df (final_stats : dict [str , str ], final_fixed_stats : dict [str , str ]) -> pd .DataFrame ():
114+ vanilla_df = pd .DataFrame ()
115+ vanilla_df ['Property' ] = [key for key in final_stats .keys ()]
116+ vanilla_df ['Vanilla' ] = [val for val in final_stats .values ()]
117+
118+ improved_df = pd .DataFrame ()
119+ improved_df ['Property' ] = [key for key in final_fixed_stats .keys ()]
120+ improved_df ['Improved' ] = [val for val in final_fixed_stats .values ()]
121+
122+ merged_df = pd .merge (vanilla_df , improved_df , how = 'outer' , on = 'Property' )
123+ merged_df ['N' ] = pd .RangeIndex (start = row_count , stop = len (merged_df .index ) + row_count )
124+
125+ final_df = merged_df [['N' , 'Property' , 'Vanilla' , 'Improved' ]]
126+ return final_df
127+
128+
86129def main ():
87- final_report = {}
88- df_dict = {}
130+ final_dataset = {}
89131 for project_name in PROJECTS :
90- print ("Starting " + project_name )
91132 fixed_project_name = project_name + "-fixed"
92133 results_directory = BASE_RESULT_DIR + project_name + "/"
93134 fixed_results_directory = BASE_RESULT_DIR + fixed_project_name + "/"
@@ -104,15 +145,63 @@ def main():
104145 # obtain mean/st dev
105146 final_stats = generate_report_stats (stat_values = raw_stats )
106147 final_fixed_stats = generate_report_stats (stat_values = fixed_raw_stats )
107- report = generate_project_report (project_name = project_name , final_stats = final_stats , final_fixed_stats = final_fixed_stats )
108- df_dict [project_name ] = report
109- final_report .update (report )
110- print ("Completed " + project_name )
111-
112- for key , val in df_dict .items ():
113- df = pd .DataFrame (val ).reset_index ()
114- df .to_csv (path_or_buf = "artifacts/output/" + key + "_rq4.csv" )
115- df .style .to_latex (buf = "artifacts/output/" + key + "_rq4.tex" )
148+ project_df = generate_project_df (final_stats = final_stats , final_fixed_stats = final_fixed_stats )
149+ final_dataset [project_name ] = project_df
150+
151+
152+ with open (TEX_REPORT_NAME , 'w' ) as tf :
153+ df = pd .DataFrame ()
154+ for project in PROJECTS :
155+ final_dataset [project ]['_style' ] = ''
156+ proj_mean_and_std = final_dataset [project ][CALC_NAMES ].copy ()
157+ vanilla_mean = pd .DataFrame (proj_mean_and_std ['Vanilla' ].apply (lambda v : float (v .split (" \u00B1 " )[0 ]) if
158+ " \u00B1 " in str (v ) else np .nan )).reset_index ()
159+ improved_mean = pd .DataFrame (proj_mean_and_std ['Improved' ].apply (lambda v : float (v .split (" \u00B1 " )[0 ]) if
160+ " \u00B1 " in str (v ) else np .nan )).reset_index ()
161+
162+ proj_stats = pd .merge (vanilla_mean .copy (), improved_mean .copy (), how = 'outer' , on = 'index' )[CALC_NAMES ]
163+ final_dataset [project ]['Difference' ] = proj_stats [['Vanilla' , 'Improved' ]].pct_change (axis = 'columns' )['Improved' ]
164+ proj_mean = pd .merge (vanilla_mean , improved_mean , how = 'outer' , on = 'index' )[CALC_NAMES ].mean ()
165+ proj_mean ['_style' ] = 'BOLD'
166+ proj_mean ['N' ] = ''
167+ proj_mean ['Property' ] = 'Average'
168+ final_dataset [project ].loc ['mean' ] = proj_mean
169+
170+ header = dict (zip (['N' , 'Property' , 'Vanilla' , 'Improved' , 'Difference' ], ['' , '' , '' , '' , '' ]))
171+ df = pd .concat ([
172+ df ,
173+ pd .DataFrame (header | {'_style' : 'HEADER' , 'Property' : project }, index = [0 ]),
174+ final_dataset [project ]
175+ ], ignore_index = True )
176+ # break
177+ bold_rows = df [ df ['_style' ] == 'BOLD' ].index
178+ header_rows = df [ df ['_style' ] == 'HEADER' ].index
179+
180+ latexTable = df \
181+ .drop (columns = ['_style' ]) \
182+ .style \
183+ .hide (axis = 0 ) \
184+ .format (precision = 2 ) \
185+ .set_properties (subset = pd .IndexSlice [header_rows , :], ** {'HEADER' : '' }) \
186+ .set_properties (subset = pd .IndexSlice [bold_rows , :], ** {'textbf' : '--rwrap' }) \
187+ .to_latex (hrules = False )
188+
189+ outTable = ''
190+
191+ # transform to sub headers
192+ for line in latexTable .splitlines (keepends = True ):
193+ s = line .split ('&' )
194+ c = str (len (s ))
195+
196+ possibleCommand = s [0 ].strip ()
197+
198+ if possibleCommand == '\HEADER' :
199+ outTable += '\\ hline' + "\n " + '\multicolumn{' + c + '}{c}{' + s [1 ].strip ()[7 :].strip () + '}' + " \\ \\ \n " + '\\ hline' + "\n "
200+ else :
201+ outTable += line
202+
203+ tf .write (outTable )
204+
116205
117206
118207if __name__ == "__main__" :
0 commit comments