1+ digraph AnomalyDetectionPipeline {
2+ rankdir =LR;
3+ node [fontname =" Helvetica" , fontsize =10];
4+
5+ // Leiden community detection
6+ subgraph cluster_leiden {
7+ label =" Leiden Community Detection" ;
8+ style =filled; color =lightblue;
9+ node [shape =box , style =filled, fillcolor =white];
10+
11+ Tuning_Leiden [label =" Tuning\n (Optuna)" ];
12+ Leiden_Gamma [label =" gamma" , shape =diamond ]
13+ Leiden_Theta [label =" theta" , shape =diamond ]
14+ Leiden_Algorithm [label =" Leiden Community Detection" ];
15+ CommunityId [label =" Community" , shape =ellipse ];
16+ }
17+
18+ // --- Leiden Community Detection relationships ---
19+ Tuning_Leiden -> Leiden_Gamma;
20+ Tuning_Leiden -> Leiden_Theta;
21+ Leiden_Gamma -> Leiden_Algorithm
22+ Leiden_Theta -> Leiden_Algorithm
23+ Leiden_Algorithm -> Tuning_Leiden [label =" modularity" , style =" dashed" ]
24+ Leiden_Algorithm -> Tuning_Leiden [label =" size" , style =" dashed" ]
25+ Leiden_Algorithm -> CommunityId;
26+
27+ // Fast Random Projection (FastRP)
28+ subgraph cluster_fastRP {
29+ label =" Fast Random Projection (FastRP)" ;
30+ style =filled; color =lightpink;
31+ node [shape =box , style =filled, fillcolor =white];
32+
33+ Tuning_FastRP [label =" Tuning\n (Optuna)" ];
34+ FastRP_Dimension [label =" dimension" , shape =diamond ];
35+ FastRP_Normalization_Strength [label =" normalization strength" , shape =" diamond" ];
36+ FastRP_Forth_Iteration_Weight [label =" forth iteration weight" , shape =" diamond" ];
37+ FastRP_Algorithm [label =" FastRP" ];
38+ NodeEmbeddings [label =" Node Embeddings" , shape =ellipse ];
39+ }
40+
41+ // --- FastRP relationships ---
42+ Tuning_FastRP -> FastRP_Dimension;
43+ Tuning_FastRP -> FastRP_Normalization_Strength;
44+ Tuning_FastRP -> FastRP_Forth_Iteration_Weight;
45+ FastRP_Dimension -> FastRP_Algorithm;
46+ FastRP_Normalization_Strength -> FastRP_Algorithm;
47+ FastRP_Forth_Iteration_Weight -> FastRP_Algorithm
48+ FastRP_Algorithm -> Tuning_FastRP [label =" adjusted mutual info score\n (incl. preview clustering)" , style =" dashed" ]
49+ FastRP_Algorithm -> NodeEmbeddings;
50+
51+ // Uniform Manifold Approximation and Projection (UMAP)
52+ subgraph cluster_UMAP {
53+ label =" Uniform Manifold Approximation and Projection (UMAP)\n Dimensionality Reduction for Visualization" ;
54+ style =filled; color =lightgrey;
55+ node [shape =box , style =filled, fillcolor =white];
56+
57+ UMAP_Algorithm [label =" UMAP" ];
58+ UMAP_Coordinates [label =" 2D Coordinates" , shape =ellipse ];
59+ }
60+
61+ // UMAP relationships
62+ NodeEmbeddings -> UMAP_Algorithm
63+ UMAP_Algorithm -> UMAP_Coordinates
64+
65+ // HDBSCAN clustering and tuning
66+ subgraph cluster_hdbscan {
67+ label =" Hierarchical Density-Based Spatial Clustering (HDBSCAN)" ;
68+ style =filled; color =lightgoldenrod;
69+ node [shape =box , style =filled, fillcolor =white];
70+
71+ Tuning_HDBSCAN [label =" Tuning\n (Optuna)" ];
72+ HDBSCAN_Node [label =" HDBSCAN" ];
73+ HDBSCAN_Min_Cluster_Size [label =" Min Cluster Size" , shape =diamond ];
74+ HDBSCAN_Min_Samples [label =" Min Samples" , shape =diamond ];
75+
76+ ClusterLabel [label =" Label" , shape =ellipse ];
77+ ClusterRadius [label =" Radius\n (avg,max)" , shape =ellipse ];
78+ ClusterSize [label =" Size" , shape =ellipse ];
79+ NormDistToMedoid [label =" Normalized Distance\n To Medoid" , shape =ellipse ];
80+ ClusterNoise [label =" Noise\n (label=-1)" , shape =ellipse ];
81+ ClusterProbability [label =" Probability" , shape =ellipse ];
82+ ClusterApproximationOutlierScore [label =" Approximation\n OutlierScore\n (= 1 - Probability)" , shape =ellipse ];
83+ }
84+
85+ // --- Inputs into HDBSCAN ---
86+ CommunityId -> Tuning_HDBSCAN [label =" reference" ];
87+ NodeEmbeddings -> HDBSCAN_Node;
88+
89+ Tuning_HDBSCAN -> HDBSCAN_Min_Cluster_Size
90+ Tuning_HDBSCAN -> HDBSCAN_Min_Samples
91+ HDBSCAN_Min_Cluster_Size -> HDBSCAN_Node;
92+ HDBSCAN_Min_Samples -> HDBSCAN_Node;
93+
94+ HDBSCAN_Node -> Tuning_HDBSCAN [label =" adjusted mutual info score" , style =dashed];
95+
96+ // HDBSCAN outputs (cluster features)
97+ HDBSCAN_Node -> ClusterLabel;
98+ HDBSCAN_Node -> ClusterNoise;
99+ HDBSCAN_Node -> ClusterRadius;
100+ HDBSCAN_Node -> ClusterSize;
101+ HDBSCAN_Node -> NormDistToMedoid;
102+ HDBSCAN_Node -> ClusterProbability;
103+ HDBSCAN_Node -> ClusterApproximationOutlierScore;
104+
105+ // Graph algorithm based features
106+ subgraph cluster_graph_features {
107+ label =" Graph (Algorithm) Features" ;
108+ style =filled; color =lightcyan;
109+ node [shape =ellipse , style =filled, fillcolor =white];
110+
111+ ArticleRank [label =" ArticleRank" ];
112+ PageRank [label =" PageRank" ];
113+ PageRank_minus_ArticleRank [label =" PageRank -\n ArticleRank" ];
114+ BetweennessCentrality [label =" Betweenness\n Centrality" ];
115+ LocalClusteringCoefficient [label =" Local Clustering\n Coefficient" ];
116+ Degree [label =" Degree\n (in, out, sum)" ];
117+ }
118+
119+ // Anomaly detection model area
120+ subgraph cluster_anomaly {
121+ label =" Anomaly Detection Model" ;
122+ style =filled; color =lightgreen; penwidth=4; pencolor=green; margin =" 50,50" ;
123+ node [shape =box , style =filled, fillcolor =white];
124+
125+ AF_Optuna [label =" Tuning\n (Optuna)" ];
126+ IsolationForest [label =" Isolation Forest\n Anomaly Detector" , margin =" 0.4,0.4" ];
127+ ProxyRandomForest [label =" RandomForest\n (Proxy)" ];
128+ AnomalyScore [label =" Score" , shape =ellipse ];
129+ AnomalyScore [label =" Label" , shape =ellipse ];
130+ }
131+
132+ // Embeddings feed anomaly model
133+ NodeEmbeddings -> IsolationForest;
134+
135+ // HDBSCAN-derived features feed anomaly model
136+ ClusterRadius -> IsolationForest;
137+ NormDistToMedoid -> IsolationForest;
138+ ClusterApproximationOutlierScore -> IsolationForest;
139+
140+ // Graph Algorithm Features feed anomaly model
141+ ArticleRank -> IsolationForest;
142+ PageRank -> IsolationForest;
143+ PageRank_minus_ArticleRank -> IsolationForest;
144+ BetweennessCentrality -> IsolationForest;
145+ LocalClusteringCoefficient -> IsolationForest;
146+ Degree -> IsolationForest;
147+
148+ // Proxy RandomForest used as a backing/tuning model for the Isolation Forest
149+ AF_Optuna -> IsolationForest;
150+ AF_Optuna -> ProxyRandomForest;
151+ IsolationForest -> ProxyRandomForest [label =" labels" , style =" dashed" ];
152+ IsolationForest -> AnomalyScore
153+
154+ // Explainable AI / SHAP
155+ subgraph cluster_explainability {
156+ label =" Explainable AI (SHAP)" ;
157+ style =filled; color =lavender;
158+ node [shape =note , style =filled, fillcolor =white];
159+
160+ SHAP [label =" SHAP TreeExplainer" ];
161+
162+ SHAP_Values [label =" Top SHAP Values" , shape =ellipse ];
163+ SHAP_Features [label =" Top Features" , shape =ellipse ];
164+ SHAP_Embedding_Sum [label =" Node Embeddings\n SHAP Sum" , shape =ellipse ];
165+ }
166+
167+ // Explainability connections (RandomForest -> SHAP)
168+ ProxyRandomForest -> SHAP;
169+ SHAP -> SHAP_Values;
170+ SHAP -> SHAP_Features;
171+ SHAP -> SHAP_Embedding_Sum;
172+
173+ }
0 commit comments