99#
1010
1111import numpy as np
12+ import scipy
1213from matplotlib import pyplot as plt
13- from sklearn .kernel_ridge import KernelRidge
14+ from sklearn .linear_model import LinearRegression
1415from sklearn .model_selection import train_test_split
1516
1617from skmatter .datasets import load_who_dataset
5758 ]
5859)
5960
60- columns = columns [[8 , 4 , 5 , 6 , 1 , 0 , 7 , 3 , 2 ]].tolist ()
61- column_names = column_names [[8 , 4 , 5 , 6 , 1 , 0 , 7 , 3 , 2 ]].tolist ()
61+ columns = columns [[8 , 4 , 2 , 6 , 1 , 7 , 0 , 5 , 3 ]].tolist ()
62+ column_names = column_names [[8 , 4 , 2 , 6 , 1 , 7 , 0 , 5 , 3 ]].tolist ()
6263
6364# %%
6465#
102103
103104
104105kernel_params = {"kernel" : "rbf" , "gamma" : 0.08858667904100832 }
105- krr = KernelRidge ( alpha = 0.006158482110660267 , ** kernel_params )
106+ lr = LinearRegression ( fit_intercept = False )
106107
107- yp_train = krr .fit (X_train , y_train ).predict (X_train )
108+
109+ yp_train = lr .fit (X_train , y_train ).predict (X_train )
108110
109111# %%
110112#
@@ -171,8 +173,8 @@ def fit(self, X, y):
171173 for n in range (self .n_to_select ):
172174 errors = np .zeros (len (remaining ))
173175 for i , pp in enumerate (remaining ):
174- krr .fit (X [:, [* self .selected_idx_ [:n ], pp ]], y )
175- errors [i ] = krr .score (X [:, [* self .selected_idx_ [:n ], pp ]], y )
176+ lr .fit (X [:, [* self .selected_idx_ [:n ], pp ]], y )
177+ errors [i ] = lr .score (X [:, [* self .selected_idx_ [:n ], pp ]], y )
176178 self .selected_idx_ [n ] = remaining [np .argmax (errors )]
177179 remaining = np .array (np .delete (remaining , np .argmax (errors )), dtype = int )
178180 return self
@@ -212,8 +214,8 @@ def fit(self, X, y):
212214 if label not in all_errors :
213215 errors = np .zeros (len (ns ))
214216 for i , n in enumerate (ns ):
215- krr .fit (X_train [:, selector .selected_idx_ [:n ]], y_train )
216- errors [i ] = krr .score (X_test [:, selector .selected_idx_ [:n ]], y_test )
217+ lr .fit (X_train [:, selector .selected_idx_ [:n ]], y_train )
218+ errors [i ] = lr .score (X_test [:, selector .selected_idx_ [:n ]], y_test )
217219 all_errors [label ] = errors
218220 axes [0 ].plot (ns , all_errors [label ], c = color , label = label , linestyle = linestyle )
219221 axes [1 ].plot (
@@ -230,3 +232,37 @@ def fit(self, X, y):
230232axes [1 ].grid (axis = "y" , alpha = 0.5 )
231233plt .tight_layout ()
232234plt .show ()
235+
236+
237+ # %%
238+ #
239+ # Plot correlation between selectors
240+ # ----------------------------------
241+
242+
243+ selected_idx = np .array (
244+ [selector .selected_idx_ for selector in [cur , fps , pcur , pfps , rfa ]]
245+ ).T
246+
247+ similarity = np .zeros ((len (selected_idx .T ), len (selected_idx .T )))
248+ for i in range (len (selected_idx .T )):
249+ for j in range (len (selected_idx .T )):
250+ similarity [i , j ] = scipy .stats .weightedtau (
251+ selected_idx [:, i ], selected_idx [:, j ], rank = False
252+ )[0 ]
253+
254+ labels = ["CUR" , "FPS" , "PCovCUR" , "PCovFPS," , "RFA" ]
255+
256+ plt .imshow (similarity , cmap = "Greens" )
257+ plt .xticks (np .arange (len (labels )), labels = labels )
258+ plt .yticks (np .arange (len (labels )), labels = labels )
259+
260+ plt .title ("Feature selection similarity" )
261+ for i in range (len (labels )):
262+ for j in range (len (labels )):
263+ value = np .round (similarity [i , j ], 2 )
264+ color = "white" if value > 0.5 else "black"
265+ text = plt .gca ().text (j , i , value , ha = "center" , va = "center" , color = color )
266+
267+ plt .colorbar ()
268+ plt .show ()
0 commit comments