1515# ==============================================================================
1616
1717from abc import ABCMeta , abstractmethod
18- from numbers import Integral
1918
2019import numpy as np
2120
2827from ..common ._mixin import ClassifierMixin , RegressorMixin
2928from ..datatypes import from_table , to_table
3029from ..utils ._array_api import _get_sycl_namespace
31- from ..utils .validation import (
32- _check_array ,
33- _check_classification_targets ,
34- _check_n_features ,
35- _check_X_y ,
36- _column_or_1d ,
37- _num_samples ,
38- )
30+ from ..utils .validation import _num_samples
3931
4032
4133class NeighborsCommonBase (metaclass = ABCMeta ):
@@ -50,23 +42,6 @@ def __init__(self):
5042 self .effective_metric_params_ = None
5143 self ._onedal_model = None
5244
53- def _parse_auto_method (self , method , n_samples , n_features ):
54- result_method = method
55-
56- if method in ["auto" , "ball_tree" ]:
57- condition = (
58- self .n_neighbors is not None and self .n_neighbors >= n_samples // 2
59- )
60- if self .metric == "precomputed" or n_features > 15 or condition :
61- result_method = "brute"
62- else :
63- if self .metric == "euclidean" :
64- result_method = "kd_tree"
65- else :
66- result_method = "brute"
67-
68- return result_method
69-
7045 @abstractmethod
7146 def train (self , * args , ** kwargs ): ...
7247
@@ -76,66 +51,6 @@ def infer(self, *args, **kwargs): ...
7651 @abstractmethod
7752 def _onedal_fit (self , X , y ): ...
7853
79- def _validate_data (
80- self , X , y = None , reset = True , validate_separately = None , ** check_params
81- ):
82- if y is None :
83- if self .requires_y :
84- raise ValueError (
85- f"This { self .__class__ .__name__ } estimator "
86- f"requires y to be passed, but the target y is None."
87- )
88- X = _check_array (X , ** check_params )
89- out = X , y
90- else :
91- if validate_separately :
92- # We need this because some estimators validate X and y
93- # separately, and in general, separately calling _check_array()
94- # on X and y isn't equivalent to just calling _check_X_y()
95- # :(
96- check_X_params , check_y_params = validate_separately
97- X = _check_array (X , ** check_X_params )
98- y = _check_array (y , ** check_y_params )
99- else :
100- X , y = _check_X_y (X , y , ** check_params )
101- out = X , y
102-
103- if check_params .get ("ensure_2d" , True ):
104- _check_n_features (self , X , reset = reset )
105-
106- return out
107-
108- def _get_weights (self , dist , weights ):
109- if weights in (None , "uniform" ):
110- return None
111- if weights == "distance" :
112- # if user attempts to classify a point that was zero distance from one
113- # or more training points, those training points are weighted as 1.0
114- # and the other points as 0.0
115- if dist .dtype is np .dtype (object ):
116- for point_dist_i , point_dist in enumerate (dist ):
117- # check if point_dist is iterable
118- # (ex: RadiusNeighborClassifier.predict may set an element of
119- # dist to 1e-6 to represent an 'outlier')
120- if hasattr (point_dist , "__contains__" ) and 0.0 in point_dist :
121- dist [point_dist_i ] = point_dist == 0.0
122- else :
123- dist [point_dist_i ] = 1.0 / point_dist
124- else :
125- with np .errstate (divide = "ignore" ):
126- dist = 1.0 / dist
127- inf_mask = np .isinf (dist )
128- inf_row = np .any (inf_mask , axis = 1 )
129- dist [inf_row ] = inf_mask [inf_row ]
130- return dist
131- elif callable (weights ):
132- return weights (dist )
133- else :
134- raise ValueError (
135- "weights not recognized: should be 'uniform', "
136- "'distance', or a callable function"
137- )
138-
13954 def _get_onedal_params (self , X , y = None , n_neighbors = None ):
14055 class_count = 0 if self .classes_ is None else len (self .classes_ )
14156 weights = getattr (self , "weights" , "uniform" )
@@ -145,8 +60,18 @@ def _get_onedal_params(self, X, y=None, n_neighbors=None):
14560 p = 2.0
14661 else :
14762 p = self .p
63+
64+ # Handle different input types for dtype
65+ try :
66+ fptype = X .dtype
67+ except AttributeError :
68+ # For pandas DataFrames or other types without dtype attribute
69+ import numpy as np
70+
71+ fptype = np .float64
72+
14873 return {
149- "fptype" : X . dtype ,
74+ "fptype" : fptype ,
15075 "vote_weights" : "uniform" if weights == "uniform" else "distance" ,
15176 "method" : self ._fit_method ,
15277 "radius" : self .radius ,
@@ -176,21 +101,6 @@ def __init__(
176101 self .p = p
177102 self .metric_params = metric_params
178103
179- def _validate_targets (self , y , dtype ):
180- arr = _column_or_1d (y , warn = True )
181-
182- try :
183- return arr .astype (dtype , copy = False )
184- except ValueError :
185- return arr
186-
187- def _validate_n_classes (self ):
188- length = 0 if self .classes_ is None else len (self .classes_ )
189- if length < 2 :
190- raise ValueError (
191- f"The number of classes has to be greater than one; got { length } "
192- )
193-
194104 def _fit (self , X , y ):
195105 self ._onedal_model = None
196106 self ._tree = None
@@ -202,13 +112,8 @@ def _fit(self, X, y):
202112 )
203113
204114 _ , xp , _ = _get_sycl_namespace (X )
205- use_raw_input = _get_config ().get ("use_raw_input" , False ) is True
206115 if y is not None or self .requires_y :
207116 shape = getattr (y , "shape" , None )
208- if not use_raw_input :
209- X , y = super ()._validate_data (
210- X , y , dtype = [np .float64 , np .float32 ], accept_sparse = "csr"
211- )
212117 self ._shape = shape if shape is not None else y .shape
213118
214119 if _is_classifier (self ):
@@ -218,7 +123,6 @@ def _fit(self, X, y):
218123 else :
219124 self .outputs_2d_ = True
220125
221- _check_classification_targets (y )
222126 self .classes_ = []
223127 self ._y = np .empty (y .shape , dtype = int )
224128 for k in range (self ._y .shape [1 ]):
@@ -228,36 +132,19 @@ def _fit(self, X, y):
228132 if not self .outputs_2d_ :
229133 self .classes_ = self .classes_ [0 ]
230134 self ._y = self ._y .ravel ()
231-
232- self ._validate_n_classes ()
233135 else :
234136 self ._y = y
235- elif not use_raw_input :
236- X , _ = super ()._validate_data (X , dtype = [np .float64 , np .float32 ])
237137
238138 self .n_samples_fit_ = X .shape [0 ]
239139 self .n_features_in_ = X .shape [1 ]
240140 self ._fit_X = X
241141
242- if self .n_neighbors is not None :
243- if self .n_neighbors <= 0 :
244- raise ValueError ("Expected n_neighbors > 0. Got %d" % self .n_neighbors )
245- if not isinstance (self .n_neighbors , Integral ):
246- raise TypeError (
247- "n_neighbors does not take %s value, "
248- "enter integer value" % type (self .n_neighbors )
249- )
250-
251- self ._fit_method = super ()._parse_auto_method (
252- self .algorithm , self .n_samples_fit_ , self .n_features_in_
253- )
254-
255142 _fit_y = None
256143 queue = QM .get_global_queue ()
257144 gpu_device = queue is not None and queue .sycl_device .is_gpu
258145
259146 if _is_classifier (self ) or (_is_regressor (self ) and gpu_device ):
260- _fit_y = self . _validate_targets ( self . _y , X .dtype ).reshape ((- 1 , 1 ))
147+ _fit_y = y . astype ( X .dtype ).reshape ((- 1 , 1 )) if y is not None else None
261148 result = self ._onedal_fit (X , _fit_y )
262149
263150 if y is not None and _is_regressor (self ):
@@ -269,38 +156,22 @@ def _fit(self, X, y):
269156 return result
270157
271158 def _kneighbors (self , X = None , n_neighbors = None , return_distance = True ):
272- n_features = getattr (self , "n_features_in_" , None )
273- shape = getattr (X , "shape" , None )
274- if n_features and shape and len (shape ) > 1 and shape [1 ] != n_features :
275- raise ValueError (
276- (
277- f"X has { X .shape [1 ]} features, "
278- f"but kneighbors is expecting "
279- f"{ n_features } features as input"
280- )
281- )
282-
283159 _check_is_fitted (self )
284160
285161 if n_neighbors is None :
286162 n_neighbors = self .n_neighbors
287- elif n_neighbors <= 0 :
288- raise ValueError ("Expected n_neighbors > 0. Got %d" % n_neighbors )
289- else :
290- if not isinstance (n_neighbors , Integral ):
291- raise TypeError (
292- "n_neighbors does not take %s value, "
293- "enter integer value" % type (n_neighbors )
294- )
295163
296164 if X is not None :
297165 query_is_train = False
166+ < << << << HEAD
298167< << << << HEAD
299168 if not use_raw_input :
300169 X = _check_array (X , accept_sparse = "csr" , dtype = [np .float64 , np .float32 ])
301170== == == =
302171 X = _check_array (X , accept_sparse = "csr" , dtype = [np .float64 , np .float32 ])
303172> >> >> >> e003b37f (fix : try it again )
173+ == == == =
174+ >> >> >> > 8 cd6f2b2 (fix : first round of refactor move preprocssing function to sklearnex )
304175 else :
305176 query_is_train = True
306177 X = self ._fit_X
@@ -309,24 +180,12 @@ def _kneighbors(self, X=None, n_neighbors=None, return_distance=True):
309180 n_neighbors += 1
310181
311182 n_samples_fit = self .n_samples_fit_
312- if n_neighbors > n_samples_fit :
313- if query_is_train :
314- n_neighbors -= 1 # ok to modify inplace because an error is raised
315- inequality_str = "n_neighbors < n_samples_fit"
316- else :
317- inequality_str = "n_neighbors <= n_samples_fit"
318- raise ValueError (
319- f"Expected { inequality_str } , but "
320- f"n_neighbors = { n_neighbors } , n_samples_fit = { n_samples_fit } , "
321- f"n_samples = { X .shape [0 ]} " # include n_samples for common tests
322- )
323183
324184 chunked_results = None
325- method = self ._parse_auto_method (
326- self ._fit_method , self .n_samples_fit_ , n_features
327- )
185+ # Use the fit method determined at sklearnex level
186+ method = getattr (self , "_fit_method ", "brute ")
328187
329- params = super () ._get_onedal_params (X , n_neighbors = n_neighbors )
188+ params = self ._get_onedal_params (X , n_neighbors = n_neighbors )
330189 prediction_results = self ._onedal_predict (self ._onedal_model , X , params )
331190 distances = from_table (prediction_results .distances )
332191 indices = from_table (prediction_results .indices )
@@ -434,30 +293,9 @@ def fit(self, X, y, queue=None):
434293
435294 @supports_queue
436295 def predict (self , X , queue = None ):
437- use_raw_input = _get_config ().get ("use_raw_input" , False ) is True
438- if not use_raw_input :
439- X = _check_array (X , accept_sparse = "csr" , dtype = [np .float64 , np .float32 ])
440296 onedal_model = getattr (self , "_onedal_model ", None )
441- n_features = getattr (self , "n_features_in_" , None )
442- n_samples_fit_ = getattr (self , "n_samples_fit_" , None )
443- shape = getattr (X , "shape" , None )
444- if n_features and shape and len (shape ) > 1 and shape [1 ] != n_features :
445- raise ValueError (
446- (
447- f"X has { X .shape [1 ]} features, "
448- f"but KNNClassifier is expecting "
449- f"{ n_features } features as input"
450- )
451- )
452-
453297 _check_is_fitted (self )
454298
455- self ._fit_method = self ._parse_auto_method (
456- self .algorithm , n_samples_fit_ , n_features
457- )
458-
459- self ._validate_n_classes ()
460-
461299 params = self ._get_onedal_params (X )
462300 prediction_result = self ._onedal_predict (onedal_model , X , params )
463301 responses = from_table (prediction_result .responses )
@@ -477,9 +315,8 @@ def predict_proba(self, X, queue=None):
477315
478316 n_queries = _num_samples (X )
479317
480- weights = self ._get_weights (neigh_dist , self .weights )
481- if weights is None :
482- weights = np .ones_like (neigh_ind )
318+ # Use uniform weights for now - weights calculation should be done at sklearnex level
319+ weights = np .ones_like (neigh_ind )
483320
484321 all_rows = np .arange (n_queries )
485322 probabilities = []
@@ -580,28 +417,9 @@ def kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None)
580417 return self ._kneighbors (X , n_neighbors , return_distance )
581418
582419 def _predict_gpu (self , X ):
583- use_raw_input = _get_config ().get ("use_raw_input" , False ) is True
584- if not use_raw_input :
585- X = _check_array (X , accept_sparse = "csr" , dtype = [np .float64 , np .float32 ])
586420 onedal_model = getattr (self , "_onedal_model ", None )
587- n_features = getattr (self , "n_features_in_" , None )
588- n_samples_fit_ = getattr (self , "n_samples_fit_" , None )
589- shape = getattr (X , "shape" , None )
590- if n_features and shape and len (shape ) > 1 and shape [1 ] != n_features :
591- raise ValueError (
592- (
593- f"X has { X .shape [1 ]} features, "
594- f"but KNNClassifier is expecting "
595- f"{ n_features } features as input"
596- )
597- )
598-
599421 _check_is_fitted (self )
600422
601- self ._fit_method = self ._parse_auto_method (
602- self .algorithm , n_samples_fit_ , n_features
603- )
604-
605423 params = self ._get_onedal_params (X )
606424
607425 prediction_result = self ._onedal_predict (onedal_model , X , params )
@@ -613,7 +431,8 @@ def _predict_gpu(self, X):
613431 def _predict_skl (self , X ):
614432 neigh_dist , neigh_ind = self .kneighbors (X )
615433
616- weights = self ._get_weights (neigh_dist , self .weights )
434+ # Use uniform weights for now - weights calculation should be done at sklearnex level
435+ weights = None
617436
618437 _y = self ._y
619438 if _y .ndim == 1 :
0 commit comments