88import bhc .api as api
99
1010
11- class BayesianHierarchicalClustering (
12- api .AbstractBayesianBasedHierarchicalClustering ):
11+ class BayesianHierarchicalClustering (api .AbstractBayesianBasedHierarchicalClustering ):
1312 """
1413 Reference: HELLER, Katherine A.; GHAHRAMANI, Zoubin.
1514 Bayesian hierarchical clustering.
@@ -26,7 +25,7 @@ def build(self):
2625
2726 weights = []
2827
29- # active nodes
28+ # active nodes (all)
3029 active_nodes = np .arange (n_objects )
3130 # assignments - starting each point in its own cluster
3231 assignments = np .arange (n_objects )
@@ -41,7 +40,8 @@ def build(self):
4140 for i in range (n_objects ):
4241 # compute log(d_k)
4342 log_d [i ] = BayesianHierarchicalClustering .__calc_log_d (
44- self .alpha , n [i ], None )
43+ self .alpha , n [i ], None
44+ )
4545 # compute log(p_i)
4646 log_p [i ] = self .model .calc_log_mlh (self .data [i ])
4747
@@ -54,7 +54,8 @@ def build(self):
5454 n_ch = n [i ] + n [j ]
5555 log_d_ch = log_d [i ] + log_d [j ]
5656 log_dk = BayesianHierarchicalClustering .__calc_log_d (
57- self .alpha , n_ch , log_d_ch )
57+ self .alpha , n_ch , log_d_ch
58+ )
5859 # compute log(pi_k)
5960 log_pik = np .log (self .alpha ) + gammaln (n_ch ) - log_dk
6061 # compute log(p_k)
@@ -67,8 +68,11 @@ def build(self):
6768 log_r = r1 - r2
6869 # store results
6970 merge_info = [i , j , log_r , r1 , r2 ]
70- tmp_merge = merge_info if tmp_merge is None \
71+ tmp_merge = (
72+ merge_info
73+ if tmp_merge is None
7174 else np .vstack ((tmp_merge , merge_info ))
75+ )
7276
7377 # find clusters to merge
7478 arc_list = np .empty (0 , dtype = api .Arc )
@@ -100,7 +104,8 @@ def build(self):
100104 # compute log(d_ij)
101105 log_d_ch = log_d [i ] + log_d [j ]
102106 log_d_ij = BayesianHierarchicalClustering .__calc_log_d (
103- self .alpha , n [ij ], log_d_ch )
107+ self .alpha , n [ij ], log_d_ch
108+ )
104109 log_d = np .append (log_d , log_d_ij )
105110 # update assignments
106111 assignments [np .argwhere (assignments == i )] = ij
@@ -129,14 +134,15 @@ def build(self):
129134 n_ch = n [k ] + n [ij ]
130135 log_d_ch = log_d [k ] + log_d [ij ]
131136 log_dij = BayesianHierarchicalClustering .__calc_log_d (
132- self .alpha , n_ch , log_d_ch )
137+ self .alpha , n_ch , log_d_ch
138+ )
133139 # compute log(pi_k)
134140 log_pik = np .log (self .alpha ) + gammaln (n_ch ) - log_dij
135141 # compute log(p_k)
136- data_merged = self .data [np . argwhere (
137- assignments == active_nodes [k ]).flatten ()]
138- log_p_ij = self . model . calc_log_mlh (
139- np .vstack ((x_mat_ij , data_merged )))
142+ data_merged = self .data [
143+ np . argwhere ( assignments == active_nodes [k ]).flatten ()
144+ ]
145+ log_p_ij = self . model . calc_log_mlh ( np .vstack ((x_mat_ij , data_merged )))
140146 # compute log(r_k)
141147 log_p_ch = log_p [ij ] + log_p [active_nodes [k ]]
142148 r1 = log_pik + log_p_ij
@@ -146,12 +152,14 @@ def build(self):
146152 merge_info = [ij , active_nodes [k ], log_r , r1 , r2 ]
147153 tmp_merge = np .vstack ((tmp_merge , merge_info ))
148154
149- return api .Result (arc_list ,
150- np .arange (0 , ij + 1 ),
151- log_p [- 1 ],
152- np .array (weights ),
153- hierarchy_cut ,
154- len (np .unique (assignments )))
155+ return api .Result (
156+ arc_list ,
157+ np .arange (0 , ij + 1 ),
158+ log_p [- 1 ],
159+ np .array (weights ),
160+ hierarchy_cut ,
161+ len (np .unique (assignments )),
162+ )
155163
156164 @staticmethod
157165 def __calc_log_d (alpha , nk , log_d_ch ):
0 commit comments