Skip to content

Commit 4101af0

Browse files
authored
Merge pull request gousiosg#25 from bitslab/alekh
Updated RQ4
2 parents 06159d5 + 0a375e3 commit 4101af0

File tree

1 file changed

+106
-17
lines changed

1 file changed

+106
-17
lines changed

artifacts/experiments/RQ4/generateResults.py

Lines changed: 106 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
import os
21
import datetime
2+
import os
33
import re
44

55
import numpy as np
@@ -10,6 +10,32 @@
1010
REPORT_NAME = "artifacts/output/rq4.csv"
1111
TEX_REPORT_NAME = "artifacts/output/rq4.tex"
1212

13+
CALC_NAMES = ['Vanilla', 'Improved']
14+
15+
propertyShortNames = {
16+
"TestSmartByteSerializer#canRoundTripBytes": 'byte',
17+
"TestSmartIntegerSerializer#canRoundTripIntegers": 'int',
18+
"TestSmartListSerializer#canRoundTripSerializableLists": 'list',
19+
"TestSmartLongSerializer#canRoundTripLongs": 'long',
20+
"TestSmartOptionalSerializer#canRoundTripPresentOptionals": 'optionals',
21+
"TestSmartPairSerializer#canRoundTripPairs": 'pair',
22+
"TestSmartShortSerializer#canRoundTripShort": 'short',
23+
"TestSmartStringSerializer#canRoundTripStrings": 'string',
24+
"TestSmartListSerializer#canRoundTripSerializableListsWithGenerator": 'list*',
25+
"GenTestFormat#dataRoundTrip": 'data',
26+
"GenTestFormat#messageRoundTrip": 'message',
27+
"GenTestFormat#primitiveRoundTrip": 'primitive',
28+
"CharClassesQuickcheck#addSet": 'addSet',
29+
"CharClassesQuickcheck#addSingle": 'addSingle',
30+
"CharClassesQuickcheck#addSingleSingleton": 'addSingleton',
31+
"CharClassesQuickcheck#addString": 'addString',
32+
"StateSetQuickcheck#addStateDoesNotRemove": 'add',
33+
"StateSetQuickcheck#containsElements": 'contains',
34+
"StateSetQuickcheck#removeAdd": 'remove'
35+
}
36+
37+
row_count = 1
38+
1339
def obtain_stats_directories(results_directory: str) -> list[str]:
1440
directory_tree = [x for x in os.walk(results_directory)] # os.walk returns a tuple with structure (directory, subdirectories, files)
1541
return directory_tree[0][1]
@@ -45,14 +71,15 @@ def evaluate_directories(project_name: str, results_directory: str, directories:
4571
def retrieve_time_elapsed(directory_path: str, valid_htmls: list[str]) -> dict[str, str]:
4672
times_elapsed_dict = {}
4773
for html_file in valid_htmls:
48-
property_name = html_file.replace(".html", "").replace("#", "-")
74+
property_name = html_file.replace(".html", "")
75+
property_short_name = propertyShortNames[property_name]
4976
file_path = directory_path + html_file
5077
with open(file_path) as f:
5178
contents = f.read()
5279
time_elapsed_regrex = re.search('Total Time Elapsed: (.+?) seconds', contents)
5380
if time_elapsed_regrex:
5481
time_elapsed = time_elapsed_regrex.group(1)
55-
times_elapsed_dict[property_name] = round(float(time_elapsed), 2)
82+
times_elapsed_dict[property_short_name] = round(float(time_elapsed), 2)
5683
return times_elapsed_dict
5784

5885
def generate_report_stats(stat_values: dict[str, dict]) -> dict[str, str]:
@@ -72,8 +99,8 @@ def generate_report_stats(stat_values: dict[str, dict]) -> dict[str, str]:
7299
property_stats_dict = {}
73100
for key, val in property_dict.items():
74101
np_array = np.array(val)
75-
mean = round(np_array.mean(), 2)
76-
standard_dev = round(np_array.std(), 2)
102+
mean = '{:.2f}'.format(round(np_array.mean(), 2))
103+
standard_dev = '{:.2f}'.format(round(np_array.std(), 2))
77104
property_stats_dict[key] = str(mean) + " \u00B1 " + str(standard_dev)
78105
return property_stats_dict
79106

@@ -83,11 +110,25 @@ def generate_project_report(project_name: str, final_stats: dict[str, str], fina
83110
return final_report_dict
84111

85112

113+
def generate_project_df(final_stats: dict[str, str], final_fixed_stats: dict[str, str]) -> pd.DataFrame():
114+
vanilla_df = pd.DataFrame()
115+
vanilla_df['Property'] = [key for key in final_stats.keys()]
116+
vanilla_df['Vanilla'] = [val for val in final_stats.values()]
117+
118+
improved_df = pd.DataFrame()
119+
improved_df['Property'] = [key for key in final_fixed_stats.keys()]
120+
improved_df['Improved'] = [val for val in final_fixed_stats.values()]
121+
122+
merged_df = pd.merge(vanilla_df, improved_df, how='outer', on='Property')
123+
merged_df['N'] = pd.RangeIndex(start=row_count, stop=len(merged_df.index) + row_count)
124+
125+
final_df = merged_df[['N', 'Property', 'Vanilla', 'Improved']]
126+
return final_df
127+
128+
86129
def main():
87-
final_report = {}
88-
df_dict = {}
130+
final_dataset = {}
89131
for project_name in PROJECTS:
90-
print("Starting " + project_name)
91132
fixed_project_name = project_name + "-fixed"
92133
results_directory = BASE_RESULT_DIR + project_name + "/"
93134
fixed_results_directory = BASE_RESULT_DIR + fixed_project_name + "/"
@@ -104,15 +145,63 @@ def main():
104145
# obtain mean/st dev
105146
final_stats = generate_report_stats(stat_values=raw_stats)
106147
final_fixed_stats = generate_report_stats(stat_values=fixed_raw_stats)
107-
report = generate_project_report(project_name=project_name, final_stats=final_stats, final_fixed_stats=final_fixed_stats)
108-
df_dict[project_name] = report
109-
final_report.update(report)
110-
print("Completed " + project_name)
111-
112-
for key, val in df_dict.items():
113-
df = pd.DataFrame(val).reset_index()
114-
df.to_csv(path_or_buf="artifacts/output/" + key + "_rq4.csv")
115-
df.style.to_latex(buf="artifacts/output/" + key + "_rq4.tex")
148+
project_df = generate_project_df(final_stats=final_stats, final_fixed_stats=final_fixed_stats)
149+
final_dataset[project_name] = project_df
150+
151+
152+
with open(TEX_REPORT_NAME, 'w') as tf:
153+
df = pd.DataFrame()
154+
for project in PROJECTS:
155+
final_dataset[project]['_style'] = ''
156+
proj_mean_and_std = final_dataset[project][CALC_NAMES].copy()
157+
vanilla_mean = pd.DataFrame(proj_mean_and_std['Vanilla'].apply(lambda v: float(v.split(" \u00B1 ")[0]) if
158+
" \u00B1 " in str(v) else np.nan)).reset_index()
159+
improved_mean = pd.DataFrame(proj_mean_and_std['Improved'].apply(lambda v: float(v.split(" \u00B1 ")[0]) if
160+
" \u00B1 " in str(v) else np.nan)).reset_index()
161+
162+
proj_stats = pd.merge(vanilla_mean.copy(), improved_mean.copy(), how='outer', on='index')[CALC_NAMES]
163+
final_dataset[project]['Difference'] = proj_stats[['Vanilla', 'Improved']].pct_change(axis='columns')['Improved']
164+
proj_mean = pd.merge(vanilla_mean, improved_mean, how='outer', on='index')[CALC_NAMES].mean()
165+
proj_mean['_style'] = 'BOLD'
166+
proj_mean['N'] = ''
167+
proj_mean['Property'] = 'Average'
168+
final_dataset[project].loc['mean'] = proj_mean
169+
170+
header = dict(zip(['N', 'Property', 'Vanilla', 'Improved', 'Difference'], ['', '', '', '', '']))
171+
df = pd.concat([
172+
df,
173+
pd.DataFrame(header | {'_style': 'HEADER', 'Property': project}, index=[0]),
174+
final_dataset[project]
175+
], ignore_index=True)
176+
# break
177+
bold_rows = df[ df['_style'] == 'BOLD' ].index
178+
header_rows = df[ df['_style'] == 'HEADER' ].index
179+
180+
latexTable = df \
181+
.drop(columns=['_style']) \
182+
.style \
183+
.hide(axis=0) \
184+
.format(precision=2) \
185+
.set_properties(subset=pd.IndexSlice[header_rows, :], **{'HEADER': ''}) \
186+
.set_properties(subset=pd.IndexSlice[bold_rows, :], **{'textbf': '--rwrap'}) \
187+
.to_latex(hrules=False)
188+
189+
outTable = ''
190+
191+
# transform to sub headers
192+
for line in latexTable.splitlines(keepends=True):
193+
s = line.split('&')
194+
c = str(len(s))
195+
196+
possibleCommand = s[0].strip()
197+
198+
if possibleCommand == '\HEADER':
199+
outTable += '\\hline' + "\n" + '\multicolumn{' + c + '}{c}{' + s[1].strip()[7:].strip() + '}' + " \\\\\n" + '\\hline' + "\n"
200+
else:
201+
outTable += line
202+
203+
tf.write(outTable)
204+
116205

117206

118207
if __name__ == "__main__":

0 commit comments

Comments
 (0)