Skip to content

Commit ec36219

Browse files
committed
Add anomaly detection Markdown summary report
1 parent cfa9f70 commit ec36219

13 files changed

+497
-0
lines changed

domains/anomaly-detection/anomalyDetectionCsv.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ anomaly_detection_features() {
7070
# Required Parameters:
7171
# - projection_node_label=...
7272
# Label of the nodes that will be used for the projection. Example: "Package"
73+
# - projection_language=...
74+
# Name of the associated programming language. Default: "Java". Example: "Typescript"
7375
anomaly_detection_queries() {
7476
local nodeLabel
7577
nodeLabel=$( extractQueryParameter "projection_node_label" "${@}" )
@@ -99,6 +101,8 @@ anomaly_detection_queries() {
99101
# Required Parameters:
100102
# - projection_node_label=...
101103
# Label of the nodes that will be used for the projection. Example: "Package"
104+
# - projection_language=...
105+
# Name of the associated programming language. Examples: "Java", "Typescript"
102106
anomaly_detection_labels() {
103107
local nodeLabel
104108
nodeLabel=$( extractQueryParameter "projection_node_label" "${@}" )
@@ -129,6 +133,8 @@ anomaly_detection_labels() {
129133
# Label of the nodes that will be used for the projection. Example: "Package"
130134
# - projection_weight_property=...
131135
# Name of the node property that contains the dependency weight. Example: "weight"
136+
# - projection_language=...
137+
# Name of the associated programming language. Examples: "Java", "Typescript"
132138
anomaly_detection_csv_reports() {
133139
time anomaly_detection_features "${@}"
134140
time anomaly_detection_queries "${@}"
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// Anomaly Detection Summary: Summarizes all labelled archetypes by their anomaly score including examples. Requires all other labels/*.cypher queries to run first. Variables: projection_language, projection_node_label
2+
3+
MATCH (codeUnit)
4+
WHERE $projection_node_label IN labels(codeUnit)
5+
UNWIND keys(codeUnit) AS codeUnitProperty
6+
WITH *
7+
WHERE codeUnitProperty STARTS WITH 'anomaly'
8+
AND codeUnitProperty ENDS WITH 'Rank'
9+
WITH *
10+
,coalesce(codeUnit.fqn, codeUnit.globalFqn, codeUnit.fileName, codeUnit.signature, codeUnit.name) AS codeUnitName
11+
,split(split(codeUnitProperty, 'anomaly')[1], 'Rank')[0] AS archetype
12+
,codeUnit[codeUnitProperty] AS archetypeRank
13+
,codeUnit.anomalyScore AS anomalyScore
14+
WITH *, collect(archetype)[0] AS archetype
15+
ORDER BY codeUnit.anomalyScore DESC, archetypeRank ASC, codeUnitName ASC, archetype ASC
16+
WITH archetype
17+
,anomalyScore
18+
,CASE WHEN codeUnit.anomalyScore <= 0 THEN 'Typical'
19+
WHEN codeUnit.anomalyTopFeature1 IS NULL THEN 'Undetermined'
20+
ELSE 'Anomalous' END AS modelStatus
21+
,codeUnitName
22+
RETURN archetype AS `Archetype`
23+
,count(DISTINCT codeUnitName) AS `Count`
24+
,round(max(anomalyScore), 4, 'HALF_UP') AS `Max. Score`
25+
,modelStatus AS `Model Status`
26+
,apoc.text.join(collect(DISTINCT codeUnitName)[0..3], ', ') AS `Examples`
27+
ORDER BY modelStatus, archetype, `Max. Score` DESC
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// Anomaly Detection DeepDive: Overview of analyzed code units and the number of anomalies detected. Requires all other labels/*.cypher queries to run first. Variables: projection_language, projection_node_label
2+
3+
MATCH (codeUnit)
4+
WHERE $projection_node_label IN labels(codeUnit)
5+
AND (codeUnit.incomingDependencies IS NOT NULL
6+
OR codeUnit.outgoingDependencies IS NOT NULL)
7+
WITH sum(codeUnit.anomalyLabel) AS anomalyCount
8+
,sum(sign(codeUnit.anomalyAuthorityRank)) AS authorityCount
9+
,sum(sign(codeUnit.anomalyBottleneckRank)) AS bottleNeckCount
10+
,sum(sign(codeUnit.anomalyBridgeRank)) AS bridgeCount
11+
,sum(sign(codeUnit.anomalyHubRank)) AS hubCount
12+
,sum(sign(codeUnit.anomalyOutlierRank)) AS outlierCount
13+
//,collect(codeUnit.name)[0..4] AS exampleNames
14+
RETURN anomalyCount AS `Anomalies`
15+
,authorityCount AS `Authorities`
16+
,bottleNeckCount AS `Bottlenecks`
17+
,bridgeCount AS `Bridges`
18+
,hubCount AS `Hubs`
19+
,outlierCount AS `Outliers`
20+
//,exampleNames
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// Anomaly Detection Summary: Overview of all analyzed code units in total. Requires all other labels/*.cypher queries to run first. Variables: projection_language, projection_node_label
2+
3+
MATCH (codeUnit)
4+
WHERE (codeUnit.incomingDependencies IS NOT NULL
5+
OR codeUnit.outgoingDependencies IS NOT NULL)
6+
WITH count(DISTINCT codeUnit) AS codeUnitCount
7+
,sum(codeUnit.anomalyLabel) AS anomalyCount
8+
,sum(sign(codeUnit.anomalyAuthorityRank)) AS authorityCount
9+
,sum(sign(codeUnit.anomalyBottleneckRank)) AS bottleNeckCount
10+
,sum(sign(codeUnit.anomalyBridgeRank)) AS bridgeCount
11+
,sum(sign(codeUnit.anomalyHubRank)) AS hubCount
12+
,sum(sign(codeUnit.anomalyOutlierRank)) AS outlierCount
13+
//,collect(codeUnit.name)[0..4] AS exampleNames
14+
RETURN codeUnitCount AS `Analyzed Units`
15+
,anomalyCount AS `Anomalies`
16+
,authorityCount AS `Authorities`
17+
,bottleNeckCount AS `Bottlenecks`
18+
,bridgeCount AS `Bridges`
19+
,hubCount AS `Hubs`
20+
,outlierCount AS `Outliers`
21+
//,exampleNames
22+
ORDER BY anomalyCount DESC, codeUnitCount DESC
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// Anomaly Detection Summary: Overview of analyzed code units and the number of anomalies detected. Requires all other labels/*.cypher queries to run first. Variables: projection_language, projection_node_label
2+
3+
MATCH (codeUnit)
4+
WHERE (codeUnit.incomingDependencies IS NOT NULL
5+
OR codeUnit.outgoingDependencies IS NOT NULL)
6+
UNWIND labels(codeUnit) AS codeUnitLabel
7+
WITH *
8+
WHERE NOT codeUnitLabel STARTS WITH 'Mark4'
9+
AND NOT codeUnitLabel IN ['File', 'Directory', 'ByteCode', 'GenericDeclaration']
10+
WITH collect(codeUnitLabel) AS codeUnitLabels
11+
,codeUnit
12+
WITH apoc.text.join(codeUnitLabels, ',') AS codeUnitLabels
13+
,count(DISTINCT codeUnit) AS codeUnitCount
14+
,sum(codeUnit.anomalyLabel) AS anomalyCount
15+
,sum(sign(codeUnit.anomalyAuthorityRank)) AS authorityCount
16+
,sum(sign(codeUnit.anomalyBottleneckRank)) AS bottleNeckCount
17+
,sum(sign(codeUnit.anomalyBridgeRank)) AS bridgeCount
18+
,sum(sign(codeUnit.anomalyHubRank)) AS hubCount
19+
,sum(sign(codeUnit.anomalyOutlierRank)) AS outlierCount
20+
//,collect(codeUnit.name)[0..4] AS exampleNames
21+
RETURN codeUnitLabels AS `Abstraction Level`
22+
,codeUnitCount AS `Units`
23+
,anomalyCount AS `Anomalies`
24+
,authorityCount AS `Authorities`
25+
,bottleNeckCount AS `Bottlenecks`
26+
,bridgeCount AS `Bridges`
27+
,hubCount AS `Hubs`
28+
,outlierCount AS `Outliers`
29+
//,exampleNames
30+
ORDER BY anomalyCount DESC, codeUnitCount DESC
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// Anomaly Detection Summary: Summarizes all labelled archetypes by their anomaly score including their archetype rank. For code units with more than one archetype, the one with the higher rank is shown. Requires all other labels/*.cypher queries to run first. Variables: projection_language, projection_node_label
2+
3+
MATCH (codeUnit)
4+
WHERE $projection_node_label IN labels(codeUnit)
5+
UNWIND keys(codeUnit) AS codeUnitProperty
6+
WITH *
7+
WHERE codeUnitProperty STARTS WITH 'anomaly' AND codeUnitProperty ENDS WITH 'Rank'
8+
WITH *
9+
,coalesce(codeUnit.fqn, codeUnit.globalFqn, codeUnit.fileName, codeUnit.signature, codeUnit.name) AS codeUnitName
10+
,split(split(codeUnitProperty, 'anomaly')[1], 'Rank')[0] AS archetype
11+
,codeUnit[codeUnitProperty] AS archetypeRank
12+
,codeUnit.anomalyScore AS anomalyScore
13+
ORDER BY codeUnit.anomalyScore DESC, archetypeRank ASC, codeUnitName ASC, archetype ASC
14+
OPTIONAL MATCH (artifact:Java:Artifact)-[:CONTAINS]->(codeUnit)
15+
WITH *, artifact.name AS artifactName
16+
OPTIONAL MATCH (projectRoot:Directory)<-[:HAS_ROOT]-(proj:TS:Project)-[:CONTAINS]->(codeUnit)
17+
WITH *, last(split(projectRoot.absoluteFileName, '/')) AS projectName
18+
OPTIONAL MATCH (codeDirectory:File:Directory)-[:CONTAINS]->(codeUnit)
19+
WITH *, split(replace(codeDirectory.fileName, './', ''), '/')[-2] AS directoryName
20+
WITH *, coalesce(artifactName, projectName, directoryName, "") AS projectName
21+
RETURN codeUnitName AS `Name`
22+
,projectName AS `Contained in`
23+
,round(anomalyScore, 4, 'HALF_UP') AS `Anomaly Score`
24+
,collect(archetype)[0] AS `Archetype`
25+
,collect(archetypeRank)[0] AS `Archetype Rank`
26+
,nullif(codeUnit.anomalyTopFeature1, "") AS `Top Feature 1`
27+
,nullif(round(codeUnit.anomalyTopFeatureSHAPValue1, 4, 'HALF_UP'), 0.0) AS `Top Feature 1 SHAP`
28+
,nullif(codeUnit.anomalyTopFeature2, "") AS `Top Feature 2`
29+
,nullif(round(codeUnit.anomalyTopFeatureSHAPValue2, 4, 'HALF_UP'), 0.0) AS `Top Feature 2 SHAP`
30+
,nullif(codeUnit.anomalyTopFeature3, "") AS `Top Feature 3`
31+
,nullif(round(codeUnit.anomalyTopFeatureSHAPValue3, 4, 'HALF_UP'), 0.0) AS `Top Feature 3 SHAP`
32+
,CASE WHEN codeUnit.anomalyScore <= 0 THEN 'Typical'
33+
WHEN codeUnit.anomalyTopFeature1 IS NULL THEN 'Undetermined'
34+
ELSE 'Anomalous' END AS `Model Status`
35+
//,collect(archetype)[1] AS secondaryArchetype
36+
//,collect(archetypeRank)[1] AS secondaryArchetypeRank
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
// Anomaly Detection Summary: Lists top anomalies (at most 20), the top 3 features that contributed to the decision and the archetype(s) classification (if available) they are assigned to. Requires all other labels/*.cypher queries to run first. Variables: projection_language, projection_node_label
2+
3+
MATCH (codeUnit)
4+
WHERE $projection_node_label IN labels(codeUnit)
5+
AND codeUnit.anomalyScore > 0
6+
ORDER BY codeUnit.anomalyScore DESC
7+
UNWIND keys(codeUnit) AS codeUnitProperty
8+
WITH codeUnit
9+
,CASE WHEN codeUnitProperty STARTS WITH 'anomaly'
10+
AND codeUnitProperty ENDS WITH 'Rank'
11+
THEN split(split(codeUnitProperty, 'anomaly')[1], 'Rank')[0]
12+
END AS archetype
13+
,CASE WHEN codeUnitProperty STARTS WITH 'anomaly'
14+
AND codeUnitProperty ENDS WITH 'Rank'
15+
THEN codeUnit[codeUnitProperty]
16+
END AS archetypeRank
17+
ORDER BY codeUnit.anomalyScore DESC, archetypeRank ASC
18+
WITH codeUnit
19+
,coalesce(codeUnit.fqn, codeUnit.globalFqn, codeUnit.fileName, codeUnit.signature, codeUnit.name) AS codeUnitName
20+
,apoc.text.join(collect(DISTINCT archetype), ', ') AS archetypes
21+
OPTIONAL MATCH (artifact:Java:Artifact)-[:CONTAINS]->(codeUnit)
22+
WITH *, artifact.name AS artifactName
23+
OPTIONAL MATCH (projectRoot:Directory)<-[:HAS_ROOT]-(proj:TS:Project)-[:CONTAINS]->(codeUnit)
24+
WITH *, last(split(projectRoot.absoluteFileName, '/')) AS projectName
25+
OPTIONAL MATCH (codeDirectory:File:Directory)-[:CONTAINS]->(codeUnit)
26+
WITH *, split(replace(codeDirectory.fileName, './', ''), '/')[-2] AS directoryName
27+
WITH *, coalesce(artifactName, projectName, directoryName, "") AS projectName
28+
RETURN codeUnitName AS `Name`
29+
,projectName AS `Contained in`
30+
,round(codeUnit.anomalyScore, 4, 'HALF_UP') AS `Anomaly Score`
31+
,collect(archetypes)[0] AS `Archetypes`
32+
,nullif(codeUnit.anomalyTopFeature1, "") AS `Top Feature 1`
33+
,nullif(round(codeUnit.anomalyTopFeatureSHAPValue1, 4, 'HALF_UP'), 0.0) AS `Top Feature 1 SHAP`
34+
,nullif(codeUnit.anomalyTopFeature2, "") AS `Top Feature 2`
35+
,nullif(round(codeUnit.anomalyTopFeatureSHAPValue2, 4, 'HALF_UP'), 0.0) AS `Top Feature 2 SHAP`
36+
,nullif(codeUnit.anomalyTopFeature3, "") AS `Top Feature 3`
37+
,nullif(round(codeUnit.anomalyTopFeatureSHAPValue3, 4, 'HALF_UP'), 0.0) AS `Top Feature 3 SHAP`
38+
,CASE WHEN codeUnit.anomalyScore <= 0 THEN 'Typical'
39+
WHEN codeUnit.anomalyTopFeature1 IS NULL THEN 'Undetermined'
40+
ELSE 'Anomalous' END AS `Model Status`
41+
LIMIT 20

0 commit comments

Comments
 (0)