uxlfoundation
diff --git a/‎onedal/neighbors/neighbors.py‎
Lines changed: 23 additions & 204 deletions b/‎onedal/neighbors/neighbors.py‎
Lines changed: 23 additions & 204 deletions
diff --git a/‎sklearnex/neighbors/_lof.py‎
Lines changed: 13 additions & 1 deletion b/‎sklearnex/neighbors/_lof.py‎
Lines changed: 13 additions & 1 deletion
@@ -15,7 +15,6 @@
 # ==============================================================================
 
 from abc import ABCMeta, abstractmethod
-from numbers import Integral
 
 import numpy as np
 
@@ -28,14 +27,7 @@
 from ..common._mixin import ClassifierMixin, RegressorMixin
 from ..datatypes import from_table, to_table
 from ..utils._array_api import _get_sycl_namespace
-from ..utils.validation import (
-    _check_array,
-    _check_classification_targets,
-    _check_n_features,
-    _check_X_y,
-    _column_or_1d,
-    _num_samples,
-)
+from ..utils.validation import _num_samples
 
 
 class NeighborsCommonBase(metaclass=ABCMeta):
@@ -50,23 +42,6 @@ def __init__(self):
         self.effective_metric_params_ = None
         self._onedal_model = None
 
-    def _parse_auto_method(self, method, n_samples, n_features):
-        result_method = method
-
-        if method in ["auto", "ball_tree"]:
-            condition = (
-                self.n_neighbors is not None and self.n_neighbors >= n_samples // 2
-            )
-            if self.metric == "precomputed" or n_features > 15 or condition:
-                result_method = "brute"
-            else:
-                if self.metric == "euclidean":
-                    result_method = "kd_tree"
-                else:
-                    result_method = "brute"
-
-        return result_method
-
     @abstractmethod
     def train(self, *args, **kwargs): ...
 
@@ -76,66 +51,6 @@ def infer(self, *args, **kwargs): ...
     @abstractmethod
     def _onedal_fit(self, X, y): ...
 
-    def _validate_data(
-        self, X, y=None, reset=True, validate_separately=None, **check_params
-    ):
-        if y is None:
-            if self.requires_y:
-                raise ValueError(
-                    f"This {self.__class__.__name__} estimator "
-                    f"requires y to be passed, but the target y is None."
-                )
-            X = _check_array(X, **check_params)
-            out = X, y
-        else:
-            if validate_separately:
-                # We need this because some estimators validate X and y
-                # separately, and in general, separately calling _check_array()
-                # on X and y isn't equivalent to just calling _check_X_y()
-                # :(
-                check_X_params, check_y_params = validate_separately
-                X = _check_array(X, **check_X_params)
-                y = _check_array(y, **check_y_params)
-            else:
-                X, y = _check_X_y(X, y, **check_params)
-            out = X, y
-
-        if check_params.get("ensure_2d", True):
-            _check_n_features(self, X, reset=reset)
-
-        return out
-
-    def _get_weights(self, dist, weights):
-        if weights in (None, "uniform"):
-            return None
-        if weights == "distance":
-            # if user attempts to classify a point that was zero distance from one
-            # or more training points, those training points are weighted as 1.0
-            # and the other points as 0.0
-            if dist.dtype is np.dtype(object):
-                for point_dist_i, point_dist in enumerate(dist):
-                    # check if point_dist is iterable
-                    # (ex: RadiusNeighborClassifier.predict may set an element of
-                    # dist to 1e-6 to represent an 'outlier')
-                    if hasattr(point_dist, "__contains__") and 0.0 in point_dist:
-                        dist[point_dist_i] = point_dist == 0.0
-                    else:
-                        dist[point_dist_i] = 1.0 / point_dist
-            else:
-                with np.errstate(divide="ignore"):
-                    dist = 1.0 / dist
-                inf_mask = np.isinf(dist)
-                inf_row = np.any(inf_mask, axis=1)
-                dist[inf_row] = inf_mask[inf_row]
-            return dist
-        elif callable(weights):
-            return weights(dist)
-        else:
-            raise ValueError(
-                "weights not recognized: should be 'uniform', "
-                "'distance', or a callable function"
-            )
-
     def _get_onedal_params(self, X, y=None, n_neighbors=None):
         class_count = 0 if self.classes_ is None else len(self.classes_)
         weights = getattr(self, "weights", "uniform")
@@ -145,8 +60,18 @@ def _get_onedal_params(self, X, y=None, n_neighbors=None):
             p = 2.0
         else:
             p = self.p
+
+        # Handle different input types for dtype
+        try:
+            fptype = X.dtype
+        except AttributeError:
+            # For pandas DataFrames or other types without dtype attribute
+            import numpy as np
+
+            fptype = np.float64
+
         return {
-            "fptype": X.dtype,
+            "fptype": fptype,
             "vote_weights": "uniform" if weights == "uniform" else "distance",
             "method": self._fit_method,
             "radius": self.radius,
@@ -176,21 +101,6 @@ def __init__(
         self.p = p
         self.metric_params = metric_params
 
-    def _validate_targets(self, y, dtype):
-        arr = _column_or_1d(y, warn=True)
-
-        try:
-            return arr.astype(dtype, copy=False)
-        except ValueError:
-            return arr
-
-    def _validate_n_classes(self):
-        length = 0 if self.classes_ is None else len(self.classes_)
-        if length < 2:
-            raise ValueError(
-                f"The number of classes has to be greater than one; got {length}"
-            )
-
     def _fit(self, X, y):
         self._onedal_model = None
         self._tree = None
@@ -202,13 +112,8 @@ def _fit(self, X, y):
         )
 
         _, xp, _ = _get_sycl_namespace(X)
-        use_raw_input = _get_config().get("use_raw_input", False) is True
         if y is not None or self.requires_y:
             shape = getattr(y, "shape", None)
-            if not use_raw_input:
-                X, y = super()._validate_data(
-                    X, y, dtype=[np.float64, np.float32], accept_sparse="csr"
-                )
             self._shape = shape if shape is not None else y.shape
 
             if _is_classifier(self):
@@ -218,7 +123,6 @@ def _fit(self, X, y):
                 else:
                     self.outputs_2d_ = True
 
-                _check_classification_targets(y)
                 self.classes_ = []
                 self._y = np.empty(y.shape, dtype=int)
                 for k in range(self._y.shape[1]):
@@ -228,36 +132,19 @@ def _fit(self, X, y):
                 if not self.outputs_2d_:
                     self.classes_ = self.classes_[0]
                     self._y = self._y.ravel()
-
-                self._validate_n_classes()
             else:
                 self._y = y
-        elif not use_raw_input:
-            X, _ = super()._validate_data(X, dtype=[np.float64, np.float32])
 
         self.n_samples_fit_ = X.shape[0]
         self.n_features_in_ = X.shape[1]
         self._fit_X = X
 
-        if self.n_neighbors is not None:
-            if self.n_neighbors <= 0:
-                raise ValueError("Expected n_neighbors > 0. Got %d" % self.n_neighbors)
-            if not isinstance(self.n_neighbors, Integral):
-                raise TypeError(
-                    "n_neighbors does not take %s value, "
-                    "enter integer value" % type(self.n_neighbors)
-                )
-
-        self._fit_method = super()._parse_auto_method(
-            self.algorithm, self.n_samples_fit_, self.n_features_in_
-        )
-
         _fit_y = None
         queue = QM.get_global_queue()
         gpu_device = queue is not None and queue.sycl_device.is_gpu
 
         if _is_classifier(self) or (_is_regressor(self) and gpu_device):
-            _fit_y = self._validate_targets(self._y, X.dtype).reshape((-1, 1))
+            _fit_y = y.astype(X.dtype).reshape((-1, 1)) if y is not None else None
         result = self._onedal_fit(X, _fit_y)
 
         if y is not None and _is_regressor(self):
@@ -269,38 +156,22 @@ def _fit(self, X, y):
         return result
 
     def _kneighbors(self, X=None, n_neighbors=None, return_distance=True):
-        n_features = getattr(self, "n_features_in_", None)
-        shape = getattr(X, "shape", None)
-        if n_features and shape and len(shape) > 1 and shape[1] != n_features:
-            raise ValueError(
-                (
-                    f"X has {X.shape[1]} features, "
-                    f"but kneighbors is expecting "
-                    f"{n_features} features as input"
-                )
-            )
-
         _check_is_fitted(self)
 
         if n_neighbors is None:
             n_neighbors = self.n_neighbors
-        elif n_neighbors <= 0:
-            raise ValueError("Expected n_neighbors > 0. Got %d" % n_neighbors)
-        else:
-            if not isinstance(n_neighbors, Integral):
-                raise TypeError(
-                    "n_neighbors does not take %s value, "
-                    "enter integer value" % type(n_neighbors)
-                )
 
         if X is not None:
             query_is_train = False
+<<<<<<< HEAD
 <<<<<<< HEAD
             if not use_raw_input:
                 X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32])
 =======
             X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32])
 >>>>>>> e003b37f (fix: try it again)
+=======
+>>>>>>> 8cd6f2b2 (fix: first round of refactor move preprocssing function to sklearnex)
         else:
             query_is_train = True
             X = self._fit_X
@@ -309,24 +180,12 @@ def _kneighbors(self, X=None, n_neighbors=None, return_distance=True):
             n_neighbors += 1
 
         n_samples_fit = self.n_samples_fit_
-        if n_neighbors > n_samples_fit:
-            if query_is_train:
-                n_neighbors -= 1  # ok to modify inplace because an error is raised
-                inequality_str = "n_neighbors < n_samples_fit"
-            else:
-                inequality_str = "n_neighbors <= n_samples_fit"
-            raise ValueError(
-                f"Expected {inequality_str}, but "
-                f"n_neighbors = {n_neighbors}, n_samples_fit = {n_samples_fit}, "
-                f"n_samples = {X.shape[0]}"  # include n_samples for common tests
-            )
 
         chunked_results = None
-        method = self._parse_auto_method(
-            self._fit_method, self.n_samples_fit_, n_features
-        )
+        # Use the fit method determined at sklearnex level
+        method = getattr(self, "_fit_method", "brute")
 
-        params = super()._get_onedal_params(X, n_neighbors=n_neighbors)
+        params = self._get_onedal_params(X, n_neighbors=n_neighbors)
         prediction_results = self._onedal_predict(self._onedal_model, X, params)
         distances = from_table(prediction_results.distances)
         indices = from_table(prediction_results.indices)
@@ -434,30 +293,9 @@ def fit(self, X, y, queue=None):
 
     @supports_queue
     def predict(self, X, queue=None):
-        use_raw_input = _get_config().get("use_raw_input", False) is True
-        if not use_raw_input:
-            X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32])
         onedal_model = getattr(self, "_onedal_model", None)
-        n_features = getattr(self, "n_features_in_", None)
-        n_samples_fit_ = getattr(self, "n_samples_fit_", None)
-        shape = getattr(X, "shape", None)
-        if n_features and shape and len(shape) > 1 and shape[1] != n_features:
-            raise ValueError(
-                (
-                    f"X has {X.shape[1]} features, "
-                    f"but KNNClassifier is expecting "
-                    f"{n_features} features as input"
-                )
-            )
-
         _check_is_fitted(self)
 
-        self._fit_method = self._parse_auto_method(
-            self.algorithm, n_samples_fit_, n_features
-        )
-
-        self._validate_n_classes()
-
         params = self._get_onedal_params(X)
         prediction_result = self._onedal_predict(onedal_model, X, params)
         responses = from_table(prediction_result.responses)
@@ -477,9 +315,8 @@ def predict_proba(self, X, queue=None):
 
         n_queries = _num_samples(X)
 
-        weights = self._get_weights(neigh_dist, self.weights)
-        if weights is None:
-            weights = np.ones_like(neigh_ind)
+        # Use uniform weights for now - weights calculation should be done at sklearnex level
+        weights = np.ones_like(neigh_ind)
 
         all_rows = np.arange(n_queries)
         probabilities = []
@@ -580,28 +417,9 @@ def kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None)
         return self._kneighbors(X, n_neighbors, return_distance)
 
     def _predict_gpu(self, X):
-        use_raw_input = _get_config().get("use_raw_input", False) is True
-        if not use_raw_input:
-            X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32])
         onedal_model = getattr(self, "_onedal_model", None)
-        n_features = getattr(self, "n_features_in_", None)
-        n_samples_fit_ = getattr(self, "n_samples_fit_", None)
-        shape = getattr(X, "shape", None)
-        if n_features and shape and len(shape) > 1 and shape[1] != n_features:
-            raise ValueError(
-                (
-                    f"X has {X.shape[1]} features, "
-                    f"but KNNClassifier is expecting "
-                    f"{n_features} features as input"
-                )
-            )
-
         _check_is_fitted(self)
 
-        self._fit_method = self._parse_auto_method(
-            self.algorithm, n_samples_fit_, n_features
-        )
-
         params = self._get_onedal_params(X)
 
         prediction_result = self._onedal_predict(onedal_model, X, params)
@@ -613,7 +431,8 @@ def _predict_gpu(self, X):
     def _predict_skl(self, X):
         neigh_dist, neigh_ind = self.kneighbors(X)
 
-        weights = self._get_weights(neigh_dist, self.weights)
+        # Use uniform weights for now - weights calculation should be done at sklearnex level
+        weights = None
 
         _y = self._y
         if _y.ndim == 1:
 
@@ -152,6 +152,18 @@ def _kneighbors(self, X=None, n_neighbors=None, return_distance=True):
         check_is_fitted(self)
         if X is not None:
             check_feature_names(self, X, reset=False)
+            # Perform preprocessing at sklearnex level
+            import numpy as np
+
+            from onedal.utils.validation import _check_array
+
+            X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32])
+            self._validate_feature_count(X, "kneighbors")
+
+        # Validate n_neighbors
+        if n_neighbors is not None:
+            self._validate_n_neighbors(n_neighbors)
+
         return dispatch(
             self,
             "kneighbors",
@@ -186,4 +198,4 @@ def score_samples(self, X):
         return -np.mean(lrd_ratios_array, axis=1)
 
     fit.__doc__ = _sklearn_LocalOutlierFactor.fit.__doc__
-    kneighbors.__doc__ = _sklearn_LocalOutlierFactor.kneighbors.__doc__
+    kneighbors.__doc__ = _sklearn_LocalOutlierFactor.kneighbors.__doc__