Skip to content

Commit 9fcbddb

Browse files
authored
Jesse (gousiosg#21)
* Progress on comparison tool for RQ1 * Progress on comparison tool for RQ1 * Progress on comparison tool for RQ1 * Progress on comparison tool for RQ1 * Progress on comparison tool for RQ1 * Fix issue with copying output with subprojects and timestamps * Add rq1 experiment bash and python script * Updated rq1 experiment bash and python script * Updated rq1 experiment bash and python script
1 parent 8d5c67e commit 9fcbddb

File tree

9 files changed

+544
-10
lines changed

9 files changed

+544
-10
lines changed
File renamed without changes.

common.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
shortNames = {
2+
"com.indeed.mph.serializers.TestSmartByteSerializer.canRoundTripBytes(B)V": 'byte',
3+
"com.indeed.mph.serializers.TestSmartIntegerSerializer.canRoundTripIntegers(I)V": 'int',
4+
"com.indeed.mph.serializers.TestSmartListSerializer.canRoundTripSerializableLists(Ljava/util/List;Ljava/util/List;Ljava/util/List;)V": 'list',
5+
"com.indeed.mph.serializers.TestSmartLongSerializer.canRoundTripLongs(J)V": 'long',
6+
"com.indeed.mph.serializers.TestSmartOptionalSerializer.canRoundTripPresentOptionals(J)V": 'optionals',
7+
"com.indeed.mph.serializers.TestSmartPairSerializer.canRoundTripPairs(Lcom/indeed/util/core/Pair;)V": 'pair',
8+
"com.indeed.mph.serializers.TestSmartShortSerializer.canRoundTripShort(S)V": 'short',
9+
"com.indeed.mph.serializers.TestSmartStringSerializer.canRoundTripStrings(Ljava/lang/String;)V": 'string',
10+
"convex.comms.GenTestFormat.dataRoundTrip(Lconvex/core/data/ACell;)V": 'data',
11+
"convex.comms.GenTestFormat.messageRoundTrip(Ljava/lang/String;)V": 'message',
12+
"convex.comms.GenTestFormat.primitiveRoundTrip(Lconvex/core/data/ACell;)V": 'primitive',
13+
"jflex.core.unicode.CharClassesQuickcheck.addSet(Ljflex/core/unicode/CharClasses;Ljflex/core/unicode/IntCharSet;I)V": 'addSet',
14+
"jflex.core.unicode.CharClassesQuickcheck.addSingle(Ljflex/core/unicode/CharClasses;II)V": 'addSingle',
15+
"jflex.core.unicode.CharClassesQuickcheck.addSingleSingleton(Ljflex/core/unicode/CharClasses;I)V": 'addSingleton',
16+
"jflex.core.unicode.CharClassesQuickcheck.addString(Ljflex/core/unicode/CharClasses;Ljava/lang/String;I)V": 'addString',
17+
"jflex.state.StateSetQuickcheck.addStateDoesNotRemove(Ljflex/state/StateSet;I)V": 'add',
18+
"jflex.state.StateSetQuickcheck.containsElements(Ljflex/state/StateSet;I)V": 'contains',
19+
"jflex.state.StateSetQuickcheck.removeAdd(Ljflex/state/StateSet;I)V": 'remove'
20+
}

rq1_data.py

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
import pandas as pd
2+
from common import shortNames
3+
4+
CALC_NAMES = ['FP', 'FN', 'TP']
5+
6+
projects = [
7+
('Convex', 'artifacts/experiment/rq1_convex.csv', 'artifacts/experiment/rq1_table_convex.tex'),
8+
('jFlex', 'artifacts/experiment/rq1_jflex.csv', 'artifacts/experiment/rq1_table_jflex.tex'),
9+
('MPH Table', 'artifacts/experiment/rq1_mph-table.csv', 'artifacts/experiment/rq1_table_mph-table.tex'),
10+
]
11+
12+
byProjNameFile = 'artifacts/experiment/rq1_table_projects.tex'
13+
14+
byAllEntrypointNameFile = 'artifacts/experiment/rq1_table_all_entrypoints.tex'
15+
16+
dataSet = pd.DataFrame()
17+
dataSetSum = {}
18+
rowCount = 1
19+
20+
for project in projects:
21+
projName = project[0]
22+
csvFile = project[1]
23+
texFile = project[2]
24+
25+
data = pd.read_csv(csvFile, sep=',', header=0)
26+
data['Project'] = projName
27+
data['inJaCoCo'] = data['inJaCoCo'] == "Y" #convert Y/N to True/False
28+
data['inPrunedGraph'] = data['inPrunedGraph'] == "Y" #convert Y/N to True/False
29+
30+
data['reachableJaCoCo'] = data['inJaCoCo']
31+
data['reachableProperty'] = data['inPrunedGraph']
32+
33+
34+
# false-positives: tool identifies code as reachable,
35+
# but cannot be reached by a property test
36+
data['FP'] = (data['reachableJaCoCo'] & ~data['reachableProperty'])
37+
data['FP'] = data['FP'].apply(lambda v: 1 if v else 0)
38+
39+
# false-negatives: code that is reachable from the property
40+
# test but the tool does not identify it as such
41+
data['FN'] = (~data['reachableJaCoCo'] & data['reachableProperty'])
42+
data['FN'] = data['FN'].apply(lambda v: 1 if v else 0)
43+
44+
# JaCoCo and our tool agree that is reachability
45+
data['TP'] = (data['reachableJaCoCo'] & data['reachableProperty'])
46+
data['TP'] = data['TP'].apply(lambda v: 1 if v else 0)
47+
48+
# JaCoCo and our tool agree that is NOT reachable
49+
data['TN'] = (~data['reachableJaCoCo'] & ~data['reachableProperty'])
50+
data['TN'] = data['TN'].apply(lambda v: 1 if v else 0)
51+
52+
# add Name as a friendly name for each entrypoint
53+
data['Property'] = data['entryPoint'].apply(lambda v: shortNames[v])
54+
55+
df = data[['Property', 'FP', 'FN', 'TP']].groupby(by='Property').sum().round(2)
56+
df['N'] = pd.RangeIndex(start=rowCount, stop=len(df.index) + rowCount)
57+
df.reset_index(inplace=True)
58+
dfSubset = df[['N', 'Property', 'FP', 'FN', 'TP']]
59+
60+
rowCount = len(df.index) + rowCount
61+
dataSetSum[projName] = dfSubset.copy()
62+
63+
with open(texFile, 'w') as tf:
64+
tf.write(dfSubset.to_latex(index=False, header=False))
65+
66+
dataSet = pd.concat([dataSet, data.copy()])
67+
68+
69+
# output sum group by projName
70+
with open(byProjNameFile, 'w') as tf:
71+
fpfnSum = dataSet[['Project', 'FP', 'FN', 'TP']]\
72+
.sort_values(by='Project')\
73+
.groupby(by='Project')\
74+
.sum()
75+
76+
fpfnSum['Total'] = dataSet[['Project']].groupby(by='Project').size()
77+
tf.write(fpfnSum.reset_index().style.hide(axis="index").to_latex())
78+
79+
80+
# output all projects with project headings
81+
with open(byAllEntrypointNameFile, 'w') as tf:
82+
newDF = pd.DataFrame()
83+
84+
for project in projects:
85+
projName = project[0]
86+
dataSetSum[projName]['_style'] = ''
87+
88+
projMean = dataSetSum[projName][CALC_NAMES].mean()
89+
projMean['_style'] = 'BOLD'
90+
projMean['N'] = ''
91+
projMean['Property'] = 'Average'
92+
dataSetSum[projName].loc['mean'] = projMean
93+
94+
header = dict(zip(['N', 'Property', 'FP', 'FN', 'TP'], ['', '', '', '', '']))
95+
96+
newDF = pd.concat([
97+
newDF,
98+
pd.DataFrame(header | {'_style': 'HEADER', 'Property': projName}, index=[0]), # project header
99+
dataSetSum[projName] # project data / avg
100+
], ignore_index=True)
101+
102+
# header_rows = newDF[newDF['N'] == '0HEADER'].index
103+
bold_rows = newDF[ newDF['_style'] == 'BOLD' ].index
104+
header_rows = newDF[ newDF['_style'] == 'HEADER' ].index
105+
106+
latexTable = newDF \
107+
.drop(columns=['_style']) \
108+
.style \
109+
.hide(axis=0) \
110+
.format(precision=2) \
111+
.set_properties(subset=pd.IndexSlice[header_rows, :], **{'HEADER': ''}) \
112+
.set_properties(subset=pd.IndexSlice[bold_rows, :], **{'textbf': '--rwrap'}) \
113+
.to_latex(hrules=False)
114+
115+
outTable = ''
116+
117+
# transform to sub headers
118+
for line in latexTable.splitlines(keepends=True):
119+
s = line.split('&')
120+
c = str(len(s))
121+
122+
possibleCommand = s[0].strip()
123+
124+
if possibleCommand == '\HEADER':
125+
outTable += '\\hline' + "\n" + '\multicolumn{' + c + '}{c}{' + s[1].strip()[7:].strip() + '}' + " \\\\\n" + '\\hline' + "\n"
126+
else:
127+
outTable += line
128+
129+
tf.write(outTable)

run_experiment_rq1.sh

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#!/bin/bash
2+
3+
# exit if anything throws a bad exit code
4+
set -e
5+
6+
# SET JCG_HOME based on the directory where this script resides
7+
JCG_HOME="$(pwd)/$( dirname -- "$0"; )";
8+
9+
cd $JCG_HOME || exit
10+
11+
mkdir -p artifacts/experiment
12+
13+
for PROJECT in mph-table convex jflex
14+
do
15+
FILE=artifacts/experiment/rq1_$PROJECT.csv
16+
17+
# run experiment
18+
echo Running RQ1 for project $PROJECT with output going to $FILE
19+
java -cp target/javacg-0.1-SNAPSHOT-jar-with-dependencies.jar edu.uic.bitslab.callgraph.Comparison -p $PROJECT -o $FILE
20+
21+
# check that the expected file exists
22+
if [ ! -f $FILE ]; then
23+
echo "Experiment did not produce the expected output file"
24+
exit
25+
fi
26+
done
27+
28+
python rq1_data.py
29+

runall.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ mkdir -p serializedGraphs
1010

1111
for type in original fixed
1212
do
13-
for project in convex jflex mph-table JQF rpki-commons
13+
for project in convex jflex mph-table # JQF rpki-commons
1414
do
1515
echo $type for $project
1616

0 commit comments

Comments
 (0)