Skip to content

Commit fdb574a

Browse files
committed
Add graph visualizations to anomaly detection
1 parent 5d1a5e4 commit fdb574a

File tree

4 files changed

+247
-1
lines changed

4 files changed

+247
-1
lines changed
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// This is a GraphViz dot template file for the visualization of a anomaly archetype graphs with a selected central node.
2+
// The main part of the template is marked by the comments "Begin-Template" and "End-Template".
3+
// It also contains a simple example graph.
4+
//
5+
strict digraph top_central_template {
6+
//Begin-Template
7+
graph [layout = "fdp"; start = "7", fontname = "Helvetica,Arial,sans-serif"; labelloc = "t"; splines = "spline"; beautify = true;];
8+
node [fontsize = 8;];
9+
edge [fontsize = 4;];
10+
node [style = "filled"; color = "0.58 0.75 0.75"; fillcolor = "0.58 0.15 0.99"; margin = "0.00001,0.00001";];
11+
edge [color = "0.58 0.75 0.85"; arrowsize = "0.4";];
12+
13+
central [shape = "doublecircle";];
14+
central [fontsize = 10;];
15+
central [color = "0.52 0.7 0.7"; fillcolor = "0.52 0.4 0.9"; penwidth = 3;];
16+
17+
limit_hint [color = "0.52 0.7 0.7"; fillcolor = "0.52 0.4 0.9";]
18+
limit_hint [shape = "note"; penwidth = 2; fontsize = 10]
19+
limit_hint [label = "limited\nnode count...";]
20+
limit_hint -> central // Signals that the number of edges might have been limited
21+
22+
//End-Template
23+
"A" -> "central" [penwidth = 1.0; label = 1;];
24+
"A" -> "B" [penwidth = 3.0; label = 4;];
25+
"B" -> "central" [penwidth = 2.0; label = 2;];
26+
}
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
// Anomaly Detection Graphs: Find top nodes marked as "central" including their incoming dependencies and output them in Graphviz format.
2+
3+
RETURN "graph [label=\"" + $projection_language + " " + $projection_node_label + " - Top Rank " + $projection_node_rank + " Hub\"];" AS graphVizDotNotationLine
4+
//Debugging
5+
// ,null AS sourceName
6+
// ,null AS targetName
7+
// ,null AS targetEndNodeOrNullName
8+
// ,null AS nonTargetEndNodeOrNullName
9+
// ,null AS centralNode
10+
// ,null AS penWidth
11+
// ,null AS normalizedWeight
12+
// ,null AS weight
13+
// ,null AS minWeight
14+
// ,null AS maxWeight
15+
UNION ALL
16+
// Step 1: Query overall statistics, e.g. min/max weight for later normalization
17+
MATCH (sourceForStatistics)-[dependencyForStatistics:DEPENDS_ON]->(targetForStatistics)
18+
WHERE $projection_node_label IN labels(sourceForStatistics)
19+
AND $projection_node_label IN labels(targetForStatistics)
20+
WITH min(coalesce(dependencyForStatistics.weight25PercentInterfaces, dependencyForStatistics.weight)) AS minWeight
21+
,max(coalesce(dependencyForStatistics.weight25PercentInterfaces, dependencyForStatistics.weight)) AS maxWeight
22+
// Step 2: Query direct dependencies to the target
23+
MATCH (target)
24+
WHERE $projection_node_label IN labels(target)
25+
AND target.anomalyHubRank = toInteger($projection_node_rank)
26+
MATCH (source)-[directDependency:DEPENDS_ON]->(target)
27+
WHERE $projection_node_label IN labels(source)
28+
ORDER BY directDependency.weight DESC
29+
WITH minWeight
30+
,maxWeight
31+
,target
32+
// Limit direct dependencies for a cleaner visualization (noted in the Graph)
33+
,collect(source)[0..50] AS sources
34+
,collect(directDependency)[0..50] AS directDependencies
35+
// Step 3: Query dependencies among sources
36+
UNWIND sources AS source1
37+
OPTIONAL MATCH (source1)-[indirectDependency:DEPENDS_ON]->(source2)-[:DEPENDS_ON]->(target)
38+
WHERE $projection_node_label IN labels(source2)
39+
AND source1 <> source2
40+
WITH minWeight
41+
,maxWeight
42+
,target
43+
,directDependencies
44+
,source1
45+
// Limit indirect dependencies to one per direct dependency
46+
,collect(DISTINCT indirectDependency)[0] AS firstSource1Dependency
47+
WITH minWeight
48+
,maxWeight
49+
,target
50+
,directDependencies
51+
,collect(firstSource1Dependency) AS indirectFirstDependencies
52+
WITH *, directDependencies + indirectFirstDependencies AS allDependencies
53+
// Step 4: Prepare results in GraphViz format for all dependencies
54+
UNWIND allDependencies AS dependency
55+
WITH *, (endNode(dependency) = target) AS isTargetEndNode
56+
WITH *, CASE WHEN isTargetEndNode THEN endNode(dependency) ELSE null END AS targetEndNodeOrNull
57+
WITH *, CASE WHEN isTargetEndNode THEN null ELSE endNode(dependency) END AS nonTargetEndNodeOrNull
58+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight) AS weight
59+
WITH *, toFloat(weight - minWeight) / toFloat(maxWeight - minWeight) AS normalizedWeight
60+
WITH *, round((normalizedWeight * 2) + 0.4, 1) AS penWidth
61+
WITH *, coalesce(target.fqn, target.globalFqn, target.fileName, target.signature, target.name) AS targetName
62+
WITH *, replace(replace(targetName, '.', '.\\n'), '/', '/\\n') AS targetNameSplit
63+
WITH *, "\\n(hub #" + targetEndNodeOrNull.anomalyHubRank + ")" AS centralNodeSubLabel
64+
WITH *, "\"" + targetNameSplit + centralNodeSubLabel + "\"" AS centralNodeLabel
65+
WITH *, coalesce("central [label=" + centralNodeLabel+ ";]; ", "") AS centralNode
66+
WITH *, "\"" + startNode(dependency).name + "\"" AS sourceNode
67+
WITH *, coalesce("\"" + nonTargetEndNodeOrNull.name + "\"", "\"central\"") AS targetNode
68+
WITH *, " -> " + targetNode
69+
+ " [label = " + weight + ";"
70+
+ " penwidth = " + penWidth + ";"
71+
+ " ];" AS graphVizDotNotationEdge
72+
WITH *, centralNode + sourceNode + coalesce(graphVizDotNotationEdge, " [];") AS graphVizDotNotationLine
73+
ORDER BY targetEndNodeOrNull ASC, dependency.weight DESC, nonTargetEndNodeOrNull.name ASC
74+
RETURN DISTINCT graphVizDotNotationLine
75+
//Debugging
76+
// ,startNode(dependency).name AS sourceName
77+
// ,endNode(dependency).name AS targetName
78+
// ,targetEndNodeOrNull.name AS targetEndNodeOrNullName
79+
// ,nonTargetEndNodeOrNull.name AS nonTargetEndNodeOrNullName
80+
// ,centralNode
81+
// ,penWidth
82+
// ,normalizedWeight
83+
// ,dependency.weight AS weight
84+
// ,minWeight
85+
// ,maxWeight
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
#!/usr/bin/env bash
2+
3+
# Executes selected anomaly detection Cypher queries for GraphViz visualization.
4+
# Visualizes top ranked anomaly archetypes.
5+
# Requires an already running Neo4j graph database with already scanned and analyzed artifacts.
6+
# The reports (csv, dot and svg files) will be written into the sub directory reports/anomaly-detection/{language}_{codeUnit}.
7+
8+
# Requires executeQueryFunctions.sh, visualizeQueryResults.sh, cleanupAfterReportGeneration.sh
9+
10+
# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
11+
set -o errexit -o pipefail
12+
13+
# Overrideable Constants (defaults also defined in sub scripts)
14+
REPORTS_DIRECTORY=${REPORTS_DIRECTORY:-"reports"}
15+
16+
## Get this "scripts/reports" directory if not already set
17+
# Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution.
18+
# CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes.
19+
# This way non-standard tools like readlink aren't needed.
20+
ANOMALY_DETECTION_GRAPHS_DIR=${REPORTS_SCRIPT_DIR:-$( CDPATH=. cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P )}
21+
#echo "anomalyDetectionGraphVisualization: ANOMALY_DETECTION_GRAPHS_DIR=${ANOMALY_DETECTION_GRAPHS_DIR}"
22+
23+
# Get the "scripts" directory by taking the path of this script and going one directory up.
24+
SCRIPTS_DIR=${SCRIPTS_DIR:-"${ANOMALY_DETECTION_GRAPHS_DIR}/../../../scripts"} # Repository directory containing the shell scripts
25+
# echo "anomalyDetectionGraphVisualization: SCRIPTS_DIR=${SCRIPTS_DIR}"
26+
27+
# Get the "scripts/visualization" directory.
28+
VISUALIZATION_SCRIPTS_DIR=${VISUALIZATION_SCRIPTS_DIR:-"${SCRIPTS_DIR}/visualization"} # Repository directory containing the shell scripts for visualization
29+
# echo "anomalyDetectionGraphVisualization: VISUALIZATION_SCRIPTS_DIR=${VISUALIZATION_SCRIPTS_DIR}"
30+
31+
# Define functions to execute cypher queries from within a given file
32+
source "${SCRIPTS_DIR}/executeQueryFunctions.sh"
33+
34+
# Runs a parametrized query, converts their results in GraphViz format and creates a Graph visualization.
35+
# Outputs (at most) 10 indexed files (for report_name="TopHub" then TopHub1, TopHub2,...) with a focused visualization of one selected node and its surroundings.
36+
#
37+
# Required Parameters:
38+
# - report_name=...
39+
# Name of the query and then also the resulting visualization file.
40+
# - template_name=...
41+
# Name of the GraphViz template gv file.
42+
# - projection_language=...
43+
# Name of the associated programming language. Examples: "Java", "Typescript"
44+
# - projection_node_label=...
45+
# Label of the nodes that will be used for the projection. Example: "Package"
46+
create_graph_visualization() {
47+
local nodeLabel
48+
nodeLabel=$( extractQueryParameter "projection_node_label" "${@}" )
49+
50+
local language
51+
language=$( extractQueryParameter "projection_language" "${@}" )
52+
53+
local report_name
54+
report_name=$( extractQueryParameter "report_name" "${@}" )
55+
56+
local template_name
57+
template_name=$( extractQueryParameter "template_name" "${@}" )
58+
59+
echo "anomalyDetectionGraphVisualization: $(date +'%Y-%m-%dT%H:%M:%S%z') Creating ${language} ${nodeLabel} ${report_name} visualizations with template ${template_name}..."
60+
61+
local detail_report_directory_name="${language}_${nodeLabel}"
62+
local detail_report_directory="${FULL_REPORT_DIRECTORY}/${detail_report_directory_name}/GraphVisualizations"
63+
mkdir -p "${detail_report_directory}"
64+
65+
#TODO output all 10 indices
66+
for index in {1..2}; do
67+
# Query Graph data
68+
local resultFileName="${detail_report_directory}/${report_name}${index}"
69+
local queryResultFile="${resultFileName}.csv"
70+
execute_cypher "${ANOMALY_DETECTION_GRAPHS_DIR}/${report_name}.cypher" "${@}" "projection_node_rank=${index}" > "${queryResultFile}" || true
71+
72+
# Remove empty files
73+
# Note: Afterwards, detail_report_directory might be deleted as well.
74+
# In that case the image generation is finished and the loop needs to be terminated.
75+
source "${SCRIPTS_DIR}/cleanupAfterReportGeneration.sh" "${detail_report_directory}"
76+
# Stop generation as soon as the first query result is empty or the directory is deleted.
77+
if [ ! -f "${queryResultFile}" ] ; then
78+
break;
79+
fi
80+
81+
# Generate svg image using GraphViz
82+
source "${VISUALIZATION_SCRIPTS_DIR}/visualizeQueryResults.sh" "${queryResultFile}" --template "${ANOMALY_DETECTION_GRAPHS_DIR}/${template_name}.template.gv"
83+
84+
# Clean up after graph visualization image generation:
85+
rm -rf "${queryResultFile}" # Remove query result # TODO reactive cleanup
86+
# Collect graphviz files in a "graphviz" sub directory
87+
mkdir -p "${detail_report_directory}/graphviz"
88+
mv -f "${resultFileName}.gv" "${detail_report_directory}/graphviz"
89+
90+
# Create visualization reference Markdown file to be embeddable in main Markdown report
91+
if [ "${index}" == "1" ]; then
92+
{
93+
echo ""
94+
echo "##### ${language} ${nodeLabel} - ${report_name} Graph Visualizations"
95+
echo ""
96+
} > "${detail_report_directory}/VisualizationsReference.md"
97+
fi
98+
echo "![${report_name} ${index}](./${detail_report_directory_name}/GraphVisualizations/${report_name}${index}.svg)" >> "${detail_report_directory}/VisualizationsReference.md"
99+
done
100+
}
101+
102+
# Run queries, outputs their results in GraphViz format and create Graph visualizations.
103+
#
104+
# Required Parameters:
105+
# - projection_language=...
106+
# Name of the associated programming language. Examples: "Java", "Typescript"
107+
# - projection_node_label=...
108+
# Label of the nodes that will be used for the projection. Example: "Package"
109+
anomaly_detection_graph_visualization() {
110+
111+
create_graph_visualization "report_name=TopHub" "template_name=TopCentral" "${@}"
112+
113+
}
114+
115+
116+
# Create report directory
117+
REPORT_NAME="anomaly-detection"
118+
FULL_REPORT_DIRECTORY="${REPORTS_DIRECTORY}/${REPORT_NAME}"
119+
mkdir -p "${FULL_REPORT_DIRECTORY}"
120+
121+
# Query Parameter key pairs for projection and algorithm side
122+
QUERY_NODE="projection_node_label"
123+
QUERY_LANGUAGE="projection_language"
124+
125+
# -- Detail Reports for each code type -------------------------------
126+
127+
#TODO output all types
128+
#anomaly_detection_graph_visualization "${QUERY_NODE}=Artifact" "${QUERY_LANGUAGE}=Java"
129+
anomaly_detection_graph_visualization "${QUERY_NODE}=Package" "${QUERY_LANGUAGE}=Java"
130+
#anomaly_detection_graph_visualization "${QUERY_NODE}=Type" "${QUERY_LANGUAGE}=Java"
131+
#anomaly_detection_graph_visualization "${QUERY_NODE}=Module" "${QUERY_LANGUAGE}=Typescript"
132+
133+
# ---------------------------------------------------------------
134+
135+
echo "anomalyDetectionGraphVisualization: $(date +'%Y-%m-%dT%H:%M:%S%z') Successfully finished."

scripts/visualization/visualizeQueryResults.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ echo "visualizeQueryResults: VISUALIZATION_SCRIPTS_DIR=${VISUALIZATION_SCRIPTS_D
1717
# Read the first unnamed input argument containing the version of the project
1818
inputCsvFileName=""
1919
case "${1}" in
20-
"--"*) ;; # Skipping named command line options to forward them later to the "analyze" command
20+
"--"*) ;; # Skipping named command line options to forward them later to the "convertQueryResultCsvToGraphVizDotFile" command
2121
*)
2222
inputCsvFileName="${1}"
2323
shift || true

0 commit comments

Comments
 (0)