Skip to content

Commit 51253fc

Browse files
committed
Introduce anomaly detection pipeline with tuned clustering
1 parent c1d468f commit 51253fc

File tree

3 files changed

+696
-316
lines changed

3 files changed

+696
-316
lines changed
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
#!/usr/bin/env bash
2+
3+
# Pipeline that coordinates anomaly detection using the Graph Data Science Library of Neo4j.
4+
# It requires an already running Neo4j graph database with already scanned and analyzed artifacts.
5+
# The results will be written into the sub directory reports/anomaly-detection.
6+
7+
# Note that "scripts/prepareAnalysis.sh" is required to run prior to this script.
8+
9+
# Requires executeQueryFunctions.sh, projectionFunctions.sh, cleanupAfterReportGeneration.sh
10+
11+
# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
12+
set -o errexit -o pipefail
13+
14+
# Overrideable Constants (defaults also defined in sub scripts)
15+
REPORTS_DIRECTORY=${REPORTS_DIRECTORY:-"reports"}
16+
17+
## Get this "scripts/reports" directory if not already set
18+
# Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution.
19+
# CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes.
20+
# This way non-standard tools like readlink aren't needed.
21+
ANOMALY_DETECTION_SCRIPT_DIR=${ANOMALY_DETECTION_SCRIPT_DIR:-$(CDPATH=. cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)}
22+
echo "anomalyDetectionPipeline: ANOMALY_DETECTION_SCRIPT_DIR=${ANOMALY_DETECTION_SCRIPT_DIR}"
23+
# Get the "scripts" directory by taking the path of this script and going one directory up.
24+
SCRIPTS_DIR=${SCRIPTS_DIR:-"${ANOMALY_DETECTION_SCRIPT_DIR}/../../scripts"} # Repository directory containing the shell scripts
25+
# Get the "cypher" directory by taking the path of this script and going two directory up and then to "cypher".
26+
CYPHER_DIR=${CYPHER_DIR:-"${ANOMALY_DETECTION_SCRIPT_DIR}/../../cypher"}
27+
28+
# Function to display script usage
29+
usage() {
30+
echo -e "${COLOR_ERROR}" >&2
31+
echo "Usage: $0 [--verbose]" >&2
32+
echo -e "${COLOR_DEFAULT}" >&2
33+
exit 1
34+
}
35+
36+
# Default values
37+
verboseMode="" # either "" or "--verbose"
38+
39+
# Parse command line arguments
40+
while [[ $# -gt 0 ]]; do
41+
key="$1"
42+
value="${2}"
43+
44+
case ${key} in
45+
--verbose)
46+
verboseMode="--verbose"
47+
;;
48+
*)
49+
echo -e "${COLOR_ERROR}anomalyDetectionPipeline: Error: Unknown option: ${key}${COLOR_DEFAULT}" >&2
50+
usage
51+
;;
52+
esac
53+
shift || true # ignore error when there are no more arguments
54+
done
55+
56+
# Define functions to execute a cypher query from within the given file (first and only argument)
57+
source "${SCRIPTS_DIR}/executeQueryFunctions.sh"
58+
59+
# Define functions to create and delete Graph Projections like "createUndirectedDependencyProjection"
60+
source "${SCRIPTS_DIR}/projectionFunctions.sh"
61+
62+
# Create report directory
63+
REPORT_NAME="anomaly-detection"
64+
FULL_REPORT_DIRECTORY="${REPORTS_DIRECTORY}/${REPORT_NAME}"
65+
mkdir -p "${FULL_REPORT_DIRECTORY}"
66+
67+
# Query Parameter Keys
68+
CREATE_PROJECTION="dependencies_projection"
69+
READ_PROJECTION="projection_name"
70+
71+
PROJECTION_NODE="dependencies_projection_node"
72+
ALGORITHM_NODE="projection_node_label"
73+
74+
PROJECTION_WEIGHT="dependencies_projection_weight_property"
75+
ALGORITHM_WEIGHT="projection_weight_property"
76+
77+
# Code independent parameters
78+
LEIDEN_COMMUNITY_WRITE_PROPERTY="projection_write_property=communityLeidenIdTuned"
79+
80+
# -- Java Artifact Node Embeddings -------------------------------
81+
82+
if createUndirectedDependencyProjection "${CREATE_PROJECTION}=artifact-embeddings" "${PROJECTION_NODE}=Artifact" "${PROJECTION_WEIGHT}=weight"; then
83+
time "${ANOMALY_DETECTION_SCRIPT_DIR}/tunedLeidenCommunityDetection.py" "${READ_PROJECTION}=artifact-embeddings" "${ALGORITHM_NODE}=Artifact" "${ALGORITHM_WEIGHT}=weight" "${LEIDEN_COMMUNITY_WRITE_PROPERTY}" ${verboseMode}
84+
time "${ANOMALY_DETECTION_SCRIPT_DIR}/tunedNodeEmbeddingClustering.py" "${READ_PROJECTION}=artifact-embeddings" "${ALGORITHM_NODE}=Artifact" "${ALGORITHM_WEIGHT}=weight" ${verboseMode}
85+
fi
86+
87+
# -- Java Package Node Embeddings --------------------------------
88+
89+
if createUndirectedDependencyProjection "${CREATE_PROJECTION}=package-embeddings" "${PROJECTION_NODE}=Package" "${PROJECTION_WEIGHT}=weight25PercentInterfaces"; then
90+
time "${ANOMALY_DETECTION_SCRIPT_DIR}/tunedLeidenCommunityDetection.py" "${READ_PROJECTION}=package-embeddings" "${ALGORITHM_NODE}=Package" "${ALGORITHM_WEIGHT}=weight25PercentInterfaces" "${LEIDEN_COMMUNITY_WRITE_PROPERTY}" ${verboseMode}
91+
time "${ANOMALY_DETECTION_SCRIPT_DIR}/tunedNodeEmbeddingClustering.py" "${READ_PROJECTION}=package-embeddings" "${ALGORITHM_NODE}=Package" "${ALGORITHM_WEIGHT}=weight25PercentInterfaces" ${verboseMode}
92+
fi
93+
94+
# -- Java Type Node Embeddings -----------------------------------
95+
96+
if createUndirectedJavaTypeDependencyProjection "${CREATE_PROJECTION}=type-embeddings"; then
97+
time "${ANOMALY_DETECTION_SCRIPT_DIR}/tunedLeidenCommunityDetection.py" "${READ_PROJECTION}=type-embeddings" "${ALGORITHM_NODE}=Type" "${ALGORITHM_WEIGHT}=weight" "${LEIDEN_COMMUNITY_WRITE_PROPERTY}" ${verboseMode}
98+
time "${ANOMALY_DETECTION_SCRIPT_DIR}/tunedNodeEmbeddingClustering.py" "${READ_PROJECTION}=type-embeddings" "${ALGORITHM_NODE}=Type" "${ALGORITHM_WEIGHT}=weight" ${verboseMode}
99+
fi
100+
101+
# -- Typescript Module Node Embeddings ---------------------------
102+
103+
if createUndirectedDependencyProjection "${CREATE_PROJECTION}=typescript-module-embedding" "${PROJECTION_NODE}=Module" "${PROJECTION_WEIGHT}=lowCouplingElement25PercentWeight"; then
104+
time "${ANOMALY_DETECTION_SCRIPT_DIR}/tunedLeidenCommunityDetection.py" "${READ_PROJECTION}=typescript-module-embedding" "${ALGORITHM_NODE}=Module" "${ALGORITHM_WEIGHT}=lowCouplingElement25PercentWeight" "${LEIDEN_COMMUNITY_WRITE_PROPERTY}" ${verboseMode}
105+
time "${ANOMALY_DETECTION_SCRIPT_DIR}/tunedNodeEmbeddingClustering.py" "${READ_PROJECTION}=typescript-module-embedding" "${ALGORITHM_NODE}=Module" "${ALGORITHM_WEIGHT}=lowCouplingElement25PercentWeight" ${verboseMode}
106+
fi
107+
108+
# ---------------------------------------------------------------
109+
110+
# Clean-up after report generation. Empty reports will be deleted.
111+
source "${SCRIPTS_DIR}/cleanupAfterReportGeneration.sh" "${FULL_REPORT_DIRECTORY}"
112+
113+
echo "anomalyDetectionPipeline: $(date +'%Y-%m-%dT%H:%M:%S%z') Successfully finished."

0 commit comments

Comments
 (0)