Skip to content

Commit d358e12

Browse files
committed
Document anomaly detection pipeline architecture
1 parent 0646563 commit d358e12

File tree

1 file changed

+173
-0
lines changed

1 file changed

+173
-0
lines changed
Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
digraph AnomalyDetectionPipeline {
2+
rankdir=LR;
3+
node [fontname="Helvetica", fontsize=10];
4+
5+
// Leiden community detection
6+
subgraph cluster_leiden {
7+
label="Leiden Community Detection";
8+
style=filled; color=lightblue;
9+
node [shape=box, style=filled, fillcolor=white];
10+
11+
Tuning_Leiden [label="Tuning\n(Optuna)"];
12+
Leiden_Gamma [label="gamma", shape=diamond]
13+
Leiden_Theta [label="theta", shape=diamond]
14+
Leiden_Algorithm [label="Leiden Community Detection"];
15+
CommunityId [label="Community", shape=ellipse];
16+
}
17+
18+
// --- Leiden Community Detection relationships ---
19+
Tuning_Leiden -> Leiden_Gamma;
20+
Tuning_Leiden -> Leiden_Theta;
21+
Leiden_Gamma -> Leiden_Algorithm
22+
Leiden_Theta -> Leiden_Algorithm
23+
Leiden_Algorithm -> Tuning_Leiden [label="modularity", style="dashed"]
24+
Leiden_Algorithm -> Tuning_Leiden [label="size", style="dashed"]
25+
Leiden_Algorithm -> CommunityId;
26+
27+
// Fast Random Projection (FastRP)
28+
subgraph cluster_fastRP {
29+
label="Fast Random Projection (FastRP)";
30+
style=filled; color=lightpink;
31+
node [shape=box, style=filled, fillcolor=white];
32+
33+
Tuning_FastRP [label="Tuning\n(Optuna)"];
34+
FastRP_Dimension [label="dimension", shape=diamond];
35+
FastRP_Normalization_Strength [label="normalization strength", shape="diamond"];
36+
FastRP_Forth_Iteration_Weight [label="forth iteration weight", shape="diamond"];
37+
FastRP_Algorithm [label="FastRP"];
38+
NodeEmbeddings [label="Node Embeddings", shape=ellipse];
39+
}
40+
41+
// --- FastRP relationships ---
42+
Tuning_FastRP -> FastRP_Dimension;
43+
Tuning_FastRP -> FastRP_Normalization_Strength;
44+
Tuning_FastRP -> FastRP_Forth_Iteration_Weight;
45+
FastRP_Dimension -> FastRP_Algorithm;
46+
FastRP_Normalization_Strength -> FastRP_Algorithm;
47+
FastRP_Forth_Iteration_Weight -> FastRP_Algorithm
48+
FastRP_Algorithm -> Tuning_FastRP [label="adjusted mutual info score\n(incl. preview clustering)", style="dashed"]
49+
FastRP_Algorithm -> NodeEmbeddings;
50+
51+
// Uniform Manifold Approximation and Projection (UMAP)
52+
subgraph cluster_UMAP {
53+
label="Uniform Manifold Approximation and Projection (UMAP)\nDimensionality Reduction for Visualization";
54+
style=filled; color=lightgrey;
55+
node [shape=box, style=filled, fillcolor=white];
56+
57+
UMAP_Algorithm [label="UMAP"];
58+
UMAP_Coordinates [label="2D Coordinates", shape=ellipse];
59+
}
60+
61+
// UMAP relationships
62+
NodeEmbeddings -> UMAP_Algorithm
63+
UMAP_Algorithm -> UMAP_Coordinates
64+
65+
// HDBSCAN clustering and tuning
66+
subgraph cluster_hdbscan {
67+
label="Hierarchical Density-Based Spatial Clustering (HDBSCAN)";
68+
style=filled; color=lightgoldenrod;
69+
node [shape=box, style=filled, fillcolor=white];
70+
71+
Tuning_HDBSCAN [label="Tuning\n(Optuna)"];
72+
HDBSCAN_Node [label="HDBSCAN"];
73+
HDBSCAN_Min_Cluster_Size [label="Min Cluster Size", shape=diamond];
74+
HDBSCAN_Min_Samples [label="Min Samples", shape=diamond];
75+
76+
ClusterLabel [label="Label", shape=ellipse];
77+
ClusterRadius [label="Radius\n(avg,max)", shape=ellipse];
78+
ClusterSize [label="Size", shape=ellipse];
79+
NormDistToMedoid [label="Normalized Distance\nTo Medoid", shape=ellipse];
80+
ClusterNoise [label="Noise\n(label=-1)", shape=ellipse];
81+
ClusterProbability [label="Probability", shape=ellipse];
82+
ClusterApproximationOutlierScore [label="Approximation\nOutlierScore\n(= 1 - Probability)", shape=ellipse];
83+
}
84+
85+
// --- Inputs into HDBSCAN ---
86+
CommunityId -> Tuning_HDBSCAN [label="reference"];
87+
NodeEmbeddings -> HDBSCAN_Node;
88+
89+
Tuning_HDBSCAN -> HDBSCAN_Min_Cluster_Size
90+
Tuning_HDBSCAN -> HDBSCAN_Min_Samples
91+
HDBSCAN_Min_Cluster_Size -> HDBSCAN_Node;
92+
HDBSCAN_Min_Samples -> HDBSCAN_Node;
93+
94+
HDBSCAN_Node -> Tuning_HDBSCAN [label="adjusted mutual info score", style=dashed];
95+
96+
// HDBSCAN outputs (cluster features)
97+
HDBSCAN_Node -> ClusterLabel;
98+
HDBSCAN_Node -> ClusterNoise;
99+
HDBSCAN_Node -> ClusterRadius;
100+
HDBSCAN_Node -> ClusterSize;
101+
HDBSCAN_Node -> NormDistToMedoid;
102+
HDBSCAN_Node -> ClusterProbability;
103+
HDBSCAN_Node -> ClusterApproximationOutlierScore;
104+
105+
// Graph algorithm based features
106+
subgraph cluster_graph_features {
107+
label="Graph (Algorithm) Features";
108+
style=filled; color=lightcyan;
109+
node [shape=ellipse, style=filled, fillcolor=white];
110+
111+
ArticleRank [label="ArticleRank"];
112+
PageRank [label="PageRank"];
113+
PageRank_minus_ArticleRank [label="PageRank -\nArticleRank"];
114+
BetweennessCentrality [label="Betweenness\nCentrality"];
115+
LocalClusteringCoefficient [label="Local Clustering\nCoefficient"];
116+
Degree [label="Degree\n(in, out, sum)"];
117+
}
118+
119+
// Anomaly detection model area
120+
subgraph cluster_anomaly {
121+
label="Anomaly Detection Model";
122+
style=filled; color=lightgreen; penwidth=4; pencolor=green; margin="50,50";
123+
node [shape=box, style=filled, fillcolor=white];
124+
125+
AF_Optuna [label="Tuning\n(Optuna)"];
126+
IsolationForest [label="Isolation Forest\nAnomaly Detector", margin="0.4,0.4"];
127+
ProxyRandomForest [label="RandomForest\n(Proxy)"];
128+
AnomalyScore [label="Score", shape=ellipse];
129+
AnomalyScore [label="Label", shape=ellipse];
130+
}
131+
132+
// Embeddings feed anomaly model
133+
NodeEmbeddings -> IsolationForest;
134+
135+
// HDBSCAN-derived features feed anomaly model
136+
ClusterRadius -> IsolationForest;
137+
NormDistToMedoid -> IsolationForest;
138+
ClusterApproximationOutlierScore -> IsolationForest;
139+
140+
// Graph Algorithm Features feed anomaly model
141+
ArticleRank -> IsolationForest;
142+
PageRank -> IsolationForest;
143+
PageRank_minus_ArticleRank -> IsolationForest;
144+
BetweennessCentrality -> IsolationForest;
145+
LocalClusteringCoefficient -> IsolationForest;
146+
Degree -> IsolationForest;
147+
148+
// Proxy RandomForest used as a backing/tuning model for the Isolation Forest
149+
AF_Optuna -> IsolationForest;
150+
AF_Optuna -> ProxyRandomForest;
151+
IsolationForest -> ProxyRandomForest [label="labels", style="dashed"];
152+
IsolationForest -> AnomalyScore
153+
154+
// Explainable AI / SHAP
155+
subgraph cluster_explainability {
156+
label="Explainable AI (SHAP)";
157+
style=filled; color=lavender;
158+
node [shape=note, style=filled, fillcolor=white];
159+
160+
SHAP [label="SHAP TreeExplainer"];
161+
162+
SHAP_Values [label="Top SHAP Values", shape=ellipse];
163+
SHAP_Features [label="Top Features", shape=ellipse];
164+
SHAP_Embedding_Sum [label="Node Embeddings\nSHAP Sum", shape=ellipse];
165+
}
166+
167+
// Explainability connections (RandomForest -> SHAP)
168+
ProxyRandomForest -> SHAP;
169+
SHAP -> SHAP_Values;
170+
SHAP -> SHAP_Features;
171+
SHAP -> SHAP_Embedding_Sum;
172+
173+
}

0 commit comments

Comments
 (0)