Skip to content

Commit db0d589

Browse files
committed
Introduce anomaly detection pipeline with tuned clustering
1 parent 82c86b8 commit db0d589

13 files changed

+884
-341
lines changed
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
// Return the first node with a centralityArticleRank if it exists
2+
3+
MATCH (codeUnit)
4+
WHERE $projection_node_label IN labels(codeUnit)
5+
AND codeUnit.centralityArticleRank IS NOT NULL
6+
RETURN codeUnit.name AS shortCodeUnitName
7+
,elementId(codeUnit) AS nodeElementId
8+
,codeUnit.centralityArticleRank AS articleRank
9+
LIMIT 1
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// Calculates and writes the Article Rank centrality score for anomaly detection
2+
3+
CALL gds.articleRank.write(
4+
$projection_name + '-cleaned', {
5+
maxIterations: 50
6+
,relationshipWeightProperty: $projection_weight_property
7+
,scaler: "MinMax"
8+
,writeProperty: 'centralityArticleRank'
9+
})
10+
YIELD nodePropertiesWritten, ranIterations, didConverge, preProcessingMillis, computeMillis, postProcessingMillis, writeMillis
11+
RETURN nodePropertiesWritten, ranIterations, didConverge, preProcessingMillis, computeMillis, postProcessingMillis, writeMillis
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
// Return the first node with a centralityBetweenness if it exists
2+
3+
MATCH (codeUnit)
4+
WHERE $projection_node_label IN labels(codeUnit)
5+
AND codeUnit.centralityBetweenness IS NOT NULL
6+
RETURN codeUnit.name AS shortCodeUnitName
7+
,elementId(codeUnit) AS nodeElementId
8+
,codeUnit.centralityBetweenness AS pageRank
9+
LIMIT 1
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
// Calculates and writes the Betweeness centrality score for anomaly detection
2+
3+
CALL gds.betweenness.write(
4+
$dependencies_projection + '-directed-cleaned', {
5+
relationshipWeightProperty: $projection_weight_property
6+
,writeProperty: 'centralityBetweenness'
7+
})
8+
YIELD nodePropertiesWritten, preProcessingMillis, computeMillis, postProcessingMillis, writeMillis
9+
RETURN nodePropertiesWritten, preProcessingMillis, computeMillis, postProcessingMillis, writeMillis
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
// Return the first node with a clusteringCoefficient if it exists
2+
3+
MATCH (codeUnit)
4+
WHERE $projection_node_label IN labels(codeUnit)
5+
AND codeUnit.communityLocalClusteringCoefficient IS NOT NULL
6+
RETURN codeUnit.name AS shortCodeUnitName
7+
,elementId(codeUnit) AS nodeElementId
8+
,codeUnit.communityLocalClusteringCoefficient AS clusteringCoefficient
9+
LIMIT 1
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
// Calculates and writes the local clustering coefficient for anomaly detection
2+
3+
CALL gds.localClusteringCoefficient.write(
4+
$projection_name + '-cleaned', {
5+
writeProperty: 'communityLocalClusteringCoefficient'
6+
})
7+
YIELD averageClusteringCoefficient, nodeCount, nodePropertiesWritten, preProcessingMillis, computeMillis, postProcessingMillis, writeMillis
8+
RETURN averageClusteringCoefficient, nodeCount, nodePropertiesWritten, preProcessingMillis, computeMillis, postProcessingMillis, writeMillis
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
// Return the first node with a centralityPageRank if it exists
2+
3+
MATCH (codeUnit)
4+
WHERE $projection_node_label IN labels(codeUnit)
5+
AND codeUnit.centralityPageRank IS NOT NULL
6+
RETURN codeUnit.name AS shortCodeUnitName
7+
,elementId(codeUnit) AS nodeElementId
8+
,codeUnit.centralityPageRank AS pageRank
9+
LIMIT 1
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// Calculates and writes the Article Rank centrality score for anomaly detection
2+
3+
CALL gds.pageRank.write(
4+
$projection_name + '-cleaned', {
5+
maxIterations: 50
6+
,relationshipWeightProperty: $projection_weight_property
7+
,scaler: "MinMax"
8+
,writeProperty: 'centralityPageRank'
9+
})
10+
YIELD nodePropertiesWritten, ranIterations, didConverge, preProcessingMillis, computeMillis, postProcessingMillis, writeMillis
11+
RETURN nodePropertiesWritten, ranIterations, didConverge, preProcessingMillis, computeMillis, postProcessingMillis, writeMillis
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// Query code unit nodes with their anomaly detection
2+
3+
MATCH (codeUnit)
4+
WHERE $projection_node_label IN labels(codeUnit)
5+
AND codeUnit[$community_property] IS NOT NULL
6+
AND codeUnit.incomingDependencies IS NOT NULL
7+
AND codeUnit.outgoingDependencies IS NOT NULL
8+
AND codeUnit.communityLocalClusteringCoefficient IS NOT NULL
9+
AND codeUnit.centralityArticleRank IS NOT NULL
10+
AND codeUnit.centralityPageRank IS NOT NULL
11+
AND codeUnit.centralityBetweenness IS NOT NULL
12+
AND codeUnit.clusteringHDBSCANLabel IS NOT NULL
13+
AND codeUnit.clusteringHDBSCANProbability IS NOT NULL
14+
AND codeUnit.clusteringHDBSCANNoise IS NOT NULL
15+
AND codeUnit.embeddingFastRandomProjectionVisualizationX IS NOT NULL
16+
AND codeUnit.embeddingFastRandomProjectionVisualizationY IS NOT NULL
17+
OPTIONAL MATCH (artifact:Java:Artifact)-[:CONTAINS]->(codeUnit)
18+
WITH *, artifact.name AS artifactName
19+
OPTIONAL MATCH (projectRoot:Directory)<-[:HAS_ROOT]-(proj:TS:Project)-[:CONTAINS]->(codeUnit)
20+
WITH *, last(split(projectRoot.absoluteFileName, '/')) AS projectName
21+
RETURN DISTINCT
22+
coalesce(codeUnit.fqn, codeUnit.globalFqn, codeUnit.fileName, codeUnit.signature, codeUnit.name) AS codeUnitName
23+
,codeUnit.name AS shortCodeUnitName
24+
,elementId(codeUnit) AS nodeElementId
25+
,coalesce(artifactName, projectName) AS projectName
26+
,codeUnit.incomingDependencies AS incomingDependencies
27+
,codeUnit.outgoingDependencies AS outgoingDependencies
28+
,codeUnit[$community_property] AS communityId
29+
,codeUnit.communityLocalClusteringCoefficient AS clusteringCoefficient
30+
,codeUnit.centralityArticleRank AS articleRank
31+
,codeUnit.centralityPageRank AS pageRank
32+
,codeUnit.centralityBetweenness AS betweenness
33+
,codeUnit.clusteringHDBSCANLabel AS clusteringLabel
34+
,codeUnit.clusteringHDBSCANProbability AS clusteringProbability
35+
,codeUnit.clusteringHDBSCANNoise AS clusteringIsNoise
36+
,codeUnit.embeddingFastRandomProjectionVisualizationX AS visualizationX
37+
,codeUnit.embeddingFastRandomProjectionVisualizationY AS visualizationY
38+
,coalesce(codeUnit.centralityPageRank, 0.00001) AS centrality
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
// Example on how to set the parameters for anomaly detection
2+
3+
:params {
4+
"projection_name": "package-anomaly-detection",
5+
"projection_node_label": "Package",
6+
"projection_weight_property": "weight25PercentInterfaces",
7+
"community_property": "communityLeidenIdTuned",
8+
}

0 commit comments

Comments
 (0)