1515# ==============================================================================
1616
1717from abc import ABCMeta , abstractmethod
18- from numbers import Integral
1918
2019import numpy as np
2120
2827from ..common ._mixin import ClassifierMixin , RegressorMixin
2928from ..datatypes import from_table , to_table
3029from ..utils ._array_api import _get_sycl_namespace
31- from ..utils .validation import (
32- _check_array ,
33- _check_classification_targets ,
34- _check_n_features ,
35- _check_X_y ,
36- _column_or_1d ,
37- _num_samples ,
38- )
30+ from ..utils .validation import _num_samples
3931
4032
4133class NeighborsCommonBase (metaclass = ABCMeta ):
@@ -50,23 +42,6 @@ def __init__(self):
5042 self .effective_metric_params_ = None
5143 self ._onedal_model = None
5244
53- def _parse_auto_method (self , method , n_samples , n_features ):
54- result_method = method
55-
56- if method in ["auto" , "ball_tree" ]:
57- condition = (
58- self .n_neighbors is not None and self .n_neighbors >= n_samples // 2
59- )
60- if self .metric == "precomputed" or n_features > 15 or condition :
61- result_method = "brute"
62- else :
63- if self .metric == "euclidean" :
64- result_method = "kd_tree"
65- else :
66- result_method = "brute"
67-
68- return result_method
69-
7045 @abstractmethod
7146 def train (self , * args , ** kwargs ): ...
7247
@@ -76,66 +51,6 @@ def infer(self, *args, **kwargs): ...
7651 @abstractmethod
7752 def _onedal_fit (self , X , y ): ...
7853
79- def _validate_data (
80- self , X , y = None , reset = True , validate_separately = None , ** check_params
81- ):
82- if y is None :
83- if self .requires_y :
84- raise ValueError (
85- f"This { self .__class__ .__name__ } estimator "
86- f"requires y to be passed, but the target y is None."
87- )
88- X = _check_array (X , ** check_params )
89- out = X , y
90- else :
91- if validate_separately :
92- # We need this because some estimators validate X and y
93- # separately, and in general, separately calling _check_array()
94- # on X and y isn't equivalent to just calling _check_X_y()
95- # :(
96- check_X_params , check_y_params = validate_separately
97- X = _check_array (X , ** check_X_params )
98- y = _check_array (y , ** check_y_params )
99- else :
100- X , y = _check_X_y (X , y , ** check_params )
101- out = X , y
102-
103- if check_params .get ("ensure_2d" , True ):
104- _check_n_features (self , X , reset = reset )
105-
106- return out
107-
108- def _get_weights (self , dist , weights ):
109- if weights in (None , "uniform" ):
110- return None
111- if weights == "distance" :
112- # if user attempts to classify a point that was zero distance from one
113- # or more training points, those training points are weighted as 1.0
114- # and the other points as 0.0
115- if dist .dtype is np .dtype (object ):
116- for point_dist_i , point_dist in enumerate (dist ):
117- # check if point_dist is iterable
118- # (ex: RadiusNeighborClassifier.predict may set an element of
119- # dist to 1e-6 to represent an 'outlier')
120- if hasattr (point_dist , "__contains__" ) and 0.0 in point_dist :
121- dist [point_dist_i ] = point_dist == 0.0
122- else :
123- dist [point_dist_i ] = 1.0 / point_dist
124- else :
125- with np .errstate (divide = "ignore" ):
126- dist = 1.0 / dist
127- inf_mask = np .isinf (dist )
128- inf_row = np .any (inf_mask , axis = 1 )
129- dist [inf_row ] = inf_mask [inf_row ]
130- return dist
131- elif callable (weights ):
132- return weights (dist )
133- else :
134- raise ValueError (
135- "weights not recognized: should be 'uniform', "
136- "'distance', or a callable function"
137- )
138-
13954 def _get_onedal_params (self , X , y = None , n_neighbors = None ):
14055 class_count = 0 if self .classes_ is None else len (self .classes_ )
14156 weights = getattr (self , "weights" , "uniform" )
@@ -145,8 +60,18 @@ def _get_onedal_params(self, X, y=None, n_neighbors=None):
14560 p = 2.0
14661 else :
14762 p = self .p
63+
64+ # Handle different input types for dtype
65+ try :
66+ fptype = X .dtype
67+ except AttributeError :
68+ # For pandas DataFrames or other types without dtype attribute
69+ import numpy as np
70+
71+ fptype = np .float64
72+
14873 return {
149- "fptype" : X . dtype ,
74+ "fptype" : fptype ,
15075 "vote_weights" : "uniform" if weights == "uniform" else "distance" ,
15176 "method" : self ._fit_method ,
15277 "radius" : self .radius ,
@@ -176,21 +101,6 @@ def __init__(
176101 self .p = p
177102 self .metric_params = metric_params
178103
179- def _validate_targets (self , y , dtype ):
180- arr = _column_or_1d (y , warn = True )
181-
182- try :
183- return arr .astype (dtype , copy = False )
184- except ValueError :
185- return arr
186-
187- def _validate_n_classes (self ):
188- length = 0 if self .classes_ is None else len (self .classes_ )
189- if length < 2 :
190- raise ValueError (
191- f"The number of classes has to be greater than one; got { length } "
192- )
193-
194104 def _fit (self , X , y ):
195105 self ._onedal_model = None
196106 self ._tree = None
@@ -202,13 +112,8 @@ def _fit(self, X, y):
202112 )
203113
204114 _ , xp , _ = _get_sycl_namespace (X )
205- use_raw_input = _get_config ().get ("use_raw_input" , False ) is True
206115 if y is not None or self .requires_y :
207116 shape = getattr (y , "shape" , None )
208- if not use_raw_input :
209- X , y = super ()._validate_data (
210- X , y , dtype = [np .float64 , np .float32 ], accept_sparse = "csr"
211- )
212117 self ._shape = shape if shape is not None else y .shape
213118
214119 if _is_classifier (self ):
@@ -218,7 +123,6 @@ def _fit(self, X, y):
218123 else :
219124 self .outputs_2d_ = True
220125
221- _check_classification_targets (y )
222126 self .classes_ = []
223127 self ._y = np .empty (y .shape , dtype = int )
224128 for k in range (self ._y .shape [1 ]):
@@ -228,36 +132,19 @@ def _fit(self, X, y):
228132 if not self .outputs_2d_ :
229133 self .classes_ = self .classes_ [0 ]
230134 self ._y = self ._y .ravel ()
231-
232- self ._validate_n_classes ()
233135 else :
234136 self ._y = y
235- elif not use_raw_input :
236- X , _ = super ()._validate_data (X , dtype = [np .float64 , np .float32 ])
237137
238138 self .n_samples_fit_ = X .shape [0 ]
239139 self .n_features_in_ = X .shape [1 ]
240140 self ._fit_X = X
241141
242- if self .n_neighbors is not None :
243- if self .n_neighbors <= 0 :
244- raise ValueError ("Expected n_neighbors > 0. Got %d" % self .n_neighbors )
245- if not isinstance (self .n_neighbors , Integral ):
246- raise TypeError (
247- "n_neighbors does not take %s value, "
248- "enter integer value" % type (self .n_neighbors )
249- )
250-
251- self ._fit_method = super ()._parse_auto_method (
252- self .algorithm , self .n_samples_fit_ , self .n_features_in_
253- )
254-
255142 _fit_y = None
256143 queue = QM .get_global_queue ()
257144 gpu_device = queue is not None and queue .sycl_device .is_gpu
258145
259146 if _is_classifier (self ) or (_is_regressor (self ) and gpu_device ):
260- _fit_y = self . _validate_targets ( self . _y , X .dtype ).reshape ((- 1 , 1 ))
147+ _fit_y = y . astype ( X .dtype ).reshape ((- 1 , 1 )) if y is not None else None
261148 result = self ._onedal_fit (X , _fit_y )
262149
263150 if y is not None and _is_regressor (self ):
@@ -269,33 +156,13 @@ def _fit(self, X, y):
269156 return result
270157
271158 def _kneighbors (self , X = None , n_neighbors = None , return_distance = True ):
272- n_features = getattr (self , "n_features_in_" , None )
273- shape = getattr (X , "shape" , None )
274- if n_features and shape and len (shape ) > 1 and shape [1 ] != n_features :
275- raise ValueError (
276- (
277- f"X has { X .shape [1 ]} features, "
278- f"but kneighbors is expecting "
279- f"{ n_features } features as input"
280- )
281- )
282-
283159 _check_is_fitted (self )
284160
285161 if n_neighbors is None :
286162 n_neighbors = self .n_neighbors
287- elif n_neighbors <= 0 :
288- raise ValueError ("Expected n_neighbors > 0. Got %d" % n_neighbors )
289- else :
290- if not isinstance (n_neighbors , Integral ):
291- raise TypeError (
292- "n_neighbors does not take %s value, "
293- "enter integer value" % type (n_neighbors )
294- )
295163
296164 if X is not None :
297165 query_is_train = False
298- X = _check_array (X , accept_sparse = "csr" , dtype = [np .float64 , np .float32 ])
299166 else :
300167 query_is_train = True
301168 X = self ._fit_X
@@ -304,24 +171,12 @@ def _kneighbors(self, X=None, n_neighbors=None, return_distance=True):
304171 n_neighbors += 1
305172
306173 n_samples_fit = self .n_samples_fit_
307- if n_neighbors > n_samples_fit :
308- if query_is_train :
309- n_neighbors -= 1 # ok to modify inplace because an error is raised
310- inequality_str = "n_neighbors < n_samples_fit"
311- else :
312- inequality_str = "n_neighbors <= n_samples_fit"
313- raise ValueError (
314- f"Expected { inequality_str } , but "
315- f"n_neighbors = { n_neighbors } , n_samples_fit = { n_samples_fit } , "
316- f"n_samples = { X .shape [0 ]} " # include n_samples for common tests
317- )
318174
319175 chunked_results = None
320- method = self ._parse_auto_method (
321- self ._fit_method , self .n_samples_fit_ , n_features
322- )
176+ # Use the fit method determined at sklearnex level
177+ method = getattr (self , "_fit_method" , "brute" )
323178
324- params = super () ._get_onedal_params (X , n_neighbors = n_neighbors )
179+ params = self ._get_onedal_params (X , n_neighbors = n_neighbors )
325180 prediction_results = self ._onedal_predict (self ._onedal_model , X , params )
326181 distances = from_table (prediction_results .distances )
327182 indices = from_table (prediction_results .indices )
@@ -429,30 +284,9 @@ def fit(self, X, y, queue=None):
429284
430285 @supports_queue
431286 def predict (self , X , queue = None ):
432- use_raw_input = _get_config ().get ("use_raw_input" , False ) is True
433- if not use_raw_input :
434- X = _check_array (X , accept_sparse = "csr" , dtype = [np .float64 , np .float32 ])
435287 onedal_model = getattr (self , "_onedal_model" , None )
436- n_features = getattr (self , "n_features_in_" , None )
437- n_samples_fit_ = getattr (self , "n_samples_fit_" , None )
438- shape = getattr (X , "shape" , None )
439- if n_features and shape and len (shape ) > 1 and shape [1 ] != n_features :
440- raise ValueError (
441- (
442- f"X has { X .shape [1 ]} features, "
443- f"but KNNClassifier is expecting "
444- f"{ n_features } features as input"
445- )
446- )
447-
448288 _check_is_fitted (self )
449289
450- self ._fit_method = self ._parse_auto_method (
451- self .algorithm , n_samples_fit_ , n_features
452- )
453-
454- self ._validate_n_classes ()
455-
456290 params = self ._get_onedal_params (X )
457291 prediction_result = self ._onedal_predict (onedal_model , X , params )
458292 responses = from_table (prediction_result .responses )
@@ -472,9 +306,8 @@ def predict_proba(self, X, queue=None):
472306
473307 n_queries = _num_samples (X )
474308
475- weights = self ._get_weights (neigh_dist , self .weights )
476- if weights is None :
477- weights = np .ones_like (neigh_ind )
309+ # Use uniform weights for now - weights calculation should be done at sklearnex level
310+ weights = np .ones_like (neigh_ind )
478311
479312 all_rows = np .arange (n_queries )
480313 probabilities = []
@@ -575,28 +408,9 @@ def kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None)
575408 return self ._kneighbors (X , n_neighbors , return_distance )
576409
577410 def _predict_gpu (self , X ):
578- use_raw_input = _get_config ().get ("use_raw_input" , False ) is True
579- if not use_raw_input :
580- X = _check_array (X , accept_sparse = "csr" , dtype = [np .float64 , np .float32 ])
581411 onedal_model = getattr (self , "_onedal_model" , None )
582- n_features = getattr (self , "n_features_in_" , None )
583- n_samples_fit_ = getattr (self , "n_samples_fit_" , None )
584- shape = getattr (X , "shape" , None )
585- if n_features and shape and len (shape ) > 1 and shape [1 ] != n_features :
586- raise ValueError (
587- (
588- f"X has { X .shape [1 ]} features, "
589- f"but KNNClassifier is expecting "
590- f"{ n_features } features as input"
591- )
592- )
593-
594412 _check_is_fitted (self )
595413
596- self ._fit_method = self ._parse_auto_method (
597- self .algorithm , n_samples_fit_ , n_features
598- )
599-
600414 params = self ._get_onedal_params (X )
601415
602416 prediction_result = self ._onedal_predict (onedal_model , X , params )
@@ -608,7 +422,8 @@ def _predict_gpu(self, X):
608422 def _predict_skl (self , X ):
609423 neigh_dist , neigh_ind = self .kneighbors (X )
610424
611- weights = self ._get_weights (neigh_dist , self .weights )
425+ # Use uniform weights for now - weights calculation should be done at sklearnex level
426+ weights = None
612427
613428 _y = self ._y
614429 if _y .ndim == 1 :
0 commit comments