pombredanne
diff --git a/‎output/.gitkeep‎ renamed to ‎artifacts/experiment/.gitkeep‎ b/‎output/.gitkeep‎ renamed to ‎artifacts/experiment/.gitkeep‎
diff --git a/‎common.py‎
Lines changed: 20 additions & 0 deletions b/‎common.py‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎rq1_data.py‎
Lines changed: 129 additions & 0 deletions b/‎rq1_data.py‎
Lines changed: 129 additions & 0 deletions
diff --git a/‎run_experiment_rq1.sh‎
Lines changed: 29 additions & 0 deletions b/‎run_experiment_rq1.sh‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎runall.sh‎
Lines changed: 1 addition & 1 deletion b/‎runall.sh‎
Lines changed: 1 addition & 1 deletion
@@ -0,0 +1,20 @@
+shortNames = {
+    "com.indeed.mph.serializers.TestSmartByteSerializer.canRoundTripBytes(B)V": 'byte',
+    "com.indeed.mph.serializers.TestSmartIntegerSerializer.canRoundTripIntegers(I)V": 'int',
+    "com.indeed.mph.serializers.TestSmartListSerializer.canRoundTripSerializableLists(Ljava/util/List;Ljava/util/List;Ljava/util/List;)V": 'list',
+    "com.indeed.mph.serializers.TestSmartLongSerializer.canRoundTripLongs(J)V": 'long',
+    "com.indeed.mph.serializers.TestSmartOptionalSerializer.canRoundTripPresentOptionals(J)V": 'optionals',
+    "com.indeed.mph.serializers.TestSmartPairSerializer.canRoundTripPairs(Lcom/indeed/util/core/Pair;)V": 'pair',
+    "com.indeed.mph.serializers.TestSmartShortSerializer.canRoundTripShort(S)V": 'short',
+    "com.indeed.mph.serializers.TestSmartStringSerializer.canRoundTripStrings(Ljava/lang/String;)V": 'string',
+    "convex.comms.GenTestFormat.dataRoundTrip(Lconvex/core/data/ACell;)V": 'data',
+    "convex.comms.GenTestFormat.messageRoundTrip(Ljava/lang/String;)V": 'message',
+    "convex.comms.GenTestFormat.primitiveRoundTrip(Lconvex/core/data/ACell;)V": 'primitive',
+    "jflex.core.unicode.CharClassesQuickcheck.addSet(Ljflex/core/unicode/CharClasses;Ljflex/core/unicode/IntCharSet;I)V": 'addSet',
+    "jflex.core.unicode.CharClassesQuickcheck.addSingle(Ljflex/core/unicode/CharClasses;II)V": 'addSingle',
+    "jflex.core.unicode.CharClassesQuickcheck.addSingleSingleton(Ljflex/core/unicode/CharClasses;I)V": 'addSingleton',
+    "jflex.core.unicode.CharClassesQuickcheck.addString(Ljflex/core/unicode/CharClasses;Ljava/lang/String;I)V": 'addString',
+    "jflex.state.StateSetQuickcheck.addStateDoesNotRemove(Ljflex/state/StateSet;I)V": 'add',
+    "jflex.state.StateSetQuickcheck.containsElements(Ljflex/state/StateSet;I)V": 'contains',
+    "jflex.state.StateSetQuickcheck.removeAdd(Ljflex/state/StateSet;I)V": 'remove'
+}
@@ -0,0 +1,129 @@
+import pandas as pd
+from common import shortNames
+
+CALC_NAMES = ['FP', 'FN', 'TP']
+
+projects = [
+    ('Convex', 'artifacts/experiment/rq1_convex.csv', 'artifacts/experiment/rq1_table_convex.tex'),
+    ('jFlex', 'artifacts/experiment/rq1_jflex.csv', 'artifacts/experiment/rq1_table_jflex.tex'),
+    ('MPH Table', 'artifacts/experiment/rq1_mph-table.csv', 'artifacts/experiment/rq1_table_mph-table.tex'),
+]
+
+byProjNameFile = 'artifacts/experiment/rq1_table_projects.tex'
+
+byAllEntrypointNameFile = 'artifacts/experiment/rq1_table_all_entrypoints.tex'
+
+dataSet = pd.DataFrame()
+dataSetSum = {}
+rowCount = 1
+
+for project in projects:
+    projName = project[0]
+    csvFile = project[1]
+    texFile = project[2]
+
+    data = pd.read_csv(csvFile, sep=',', header=0)
+    data['Project'] = projName
+    data['inJaCoCo'] = data['inJaCoCo'] == "Y"  #convert Y/N to True/False
+    data['inPrunedGraph'] = data['inPrunedGraph'] == "Y"  #convert Y/N to True/False
+
+    data['reachableJaCoCo'] = data['inJaCoCo']
+    data['reachableProperty'] = data['inPrunedGraph']
+
+
+    # false-positives: tool identifies code as reachable,
+    #   but cannot be reached by a property test
+    data['FP'] = (data['reachableJaCoCo'] & ~data['reachableProperty'])
+    data['FP'] = data['FP'].apply(lambda v: 1 if v else 0)
+
+    # false-negatives: code that is reachable from the property
+    #   test but the tool does not identify it as such
+    data['FN'] = (~data['reachableJaCoCo'] & data['reachableProperty'])
+    data['FN'] = data['FN'].apply(lambda v: 1 if v else 0)
+
+    # JaCoCo and our tool agree that is reachability
+    data['TP'] = (data['reachableJaCoCo'] & data['reachableProperty'])
+    data['TP'] = data['TP'].apply(lambda v: 1 if v else 0)
+
+    # JaCoCo and our tool agree that is NOT reachable
+    data['TN'] = (~data['reachableJaCoCo'] & ~data['reachableProperty'])
+    data['TN'] = data['TN'].apply(lambda v: 1 if v else 0)
+
+    # add Name as a friendly name for each entrypoint
+    data['Property'] = data['entryPoint'].apply(lambda v: shortNames[v])
+
+    df = data[['Property', 'FP', 'FN', 'TP']].groupby(by='Property').sum().round(2)
+    df['N'] = pd.RangeIndex(start=rowCount, stop=len(df.index) + rowCount)
+    df.reset_index(inplace=True)
+    dfSubset = df[['N', 'Property', 'FP', 'FN', 'TP']]
+
+    rowCount = len(df.index) + rowCount
+    dataSetSum[projName] = dfSubset.copy()
+
+    with open(texFile, 'w') as tf:
+        tf.write(dfSubset.to_latex(index=False, header=False))
+
+    dataSet = pd.concat([dataSet, data.copy()])
+
+
+# output sum group by projName
+with open(byProjNameFile, 'w') as tf:
+    fpfnSum = dataSet[['Project', 'FP', 'FN', 'TP']]\
+        .sort_values(by='Project')\
+        .groupby(by='Project')\
+        .sum()
+
+    fpfnSum['Total'] = dataSet[['Project']].groupby(by='Project').size()
+    tf.write(fpfnSum.reset_index().style.hide(axis="index").to_latex())
+
+
+# output all projects with project headings
+with open(byAllEntrypointNameFile, 'w') as tf:
+    newDF = pd.DataFrame()
+
+    for project in projects:
+        projName = project[0]
+        dataSetSum[projName]['_style'] = ''
+
+        projMean = dataSetSum[projName][CALC_NAMES].mean()
+        projMean['_style'] = 'BOLD'
+        projMean['N'] = ''
+        projMean['Property'] = 'Average'
+        dataSetSum[projName].loc['mean'] = projMean
+
+        header = dict(zip(['N', 'Property', 'FP', 'FN', 'TP'], ['', '', '', '', '']))
+
+        newDF = pd.concat([
+            newDF,
+            pd.DataFrame(header | {'_style': 'HEADER', 'Property': projName}, index=[0]), # project header
+            dataSetSum[projName] # project data / avg
+        ], ignore_index=True)
+
+    # header_rows = newDF[newDF['N'] == '0HEADER'].index
+    bold_rows = newDF[ newDF['_style'] == 'BOLD' ].index
+    header_rows = newDF[ newDF['_style'] == 'HEADER' ].index
+
+    latexTable = newDF \
+        .drop(columns=['_style']) \
+        .style \
+        .hide(axis=0) \
+        .format(precision=2) \
+        .set_properties(subset=pd.IndexSlice[header_rows, :], **{'HEADER': ''}) \
+        .set_properties(subset=pd.IndexSlice[bold_rows, :], **{'textbf': '--rwrap'}) \
+        .to_latex(hrules=False)
+
+    outTable = ''
+
+    # transform to sub headers
+    for line in latexTable.splitlines(keepends=True):
+        s = line.split('&')
+        c = str(len(s))
+
+        possibleCommand = s[0].strip()
+
+        if possibleCommand == '\HEADER':
+            outTable += '\\hline' + "\n" + '\multicolumn{' + c + '}{c}{' + s[1].strip()[7:].strip() + '}' + " \\\\\n" + '\\hline' + "\n"
+        else:
+            outTable += line
+
+    tf.write(outTable)
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+# exit if anything throws a bad exit code
+set -e
+
+# SET JCG_HOME based on the directory where this script resides
+JCG_HOME="$(pwd)/$( dirname -- "$0"; )";
+
+cd $JCG_HOME || exit
+
+mkdir -p artifacts/experiment
+
+for PROJECT in mph-table convex jflex
+do
+	FILE=artifacts/experiment/rq1_$PROJECT.csv
+
+	# run experiment
+	echo Running RQ1 for project $PROJECT with output going to $FILE
+	java -cp target/javacg-0.1-SNAPSHOT-jar-with-dependencies.jar edu.uic.bitslab.callgraph.Comparison -p $PROJECT -o $FILE
+
+	# check that the expected file exists
+	if [ ! -f $FILE ]; then
+	  echo "Experiment did not produce the expected output file"
+		exit
+	fi
+done
+
+python rq1_data.py
+
@@ -10,7 +10,7 @@ mkdir -p serializedGraphs
 
 for type in original fixed
 do
-  for project in convex jflex mph-table JQF rpki-commons
+  for project in convex jflex mph-table # JQF rpki-commons
   do
     echo $type for $project