FIX: Fix error when calling forest classifiers on single-class data (#2723)

david-cortes-intel · web-flow · commit 91885a302e75 · 2025-10-17T12:10:31.000+02:00
* fix random forest on single-class data * fix tests * fix wrong method * missing import * rename test * remove unreachable branch * further simplifications * use 'get_namespace' * missing line * remove unused import * use more specialized function when possible * Revert "use more specialized function when possible" This reverts commit 81cd037.
diff --git a/doc/sources/algorithms.rst b/doc/sources/algorithms.rst
@@ -51,14 +51,14 @@ Classification
        - ``warm_start`` = `True`
        - ``ccp_alpha`` != `0`
        - ``criterion`` != `'gini'`
-     - Multi-output and sparse data are not supported
+     - Multi-output and sparse data are not supported. Number of classes must be at least 2.
    * - :obj:`sklearn.ensemble.ExtraTreesClassifier`
      - All parameters are supported except:
 
        - ``warm_start`` = `True`
        - ``ccp_alpha`` != `0`
        - ``criterion`` != `'gini'`
-     - Multi-output and sparse data are not supported
+     - Multi-output and sparse data are not supported. Number of classes must be at least 2.
    * - :obj:`sklearn.neighbors.KNeighborsClassifier`
      -
        - For ``algorithm`` == `'kd_tree'`:
@@ -293,7 +293,7 @@ Classification
        - ``criterion`` != `'gini'`
        - ``oob_score`` = `True`
        - ``sample_weight`` != `None`
-     - Multi-output and sparse data are not supported
+     - Multi-output and sparse data are not supported. Number of classes must be at least 2.
    * - :obj:`sklearn.ensemble.ExtraTreesClassifier`
      - All parameters are supported except:
 
@@ -302,7 +302,7 @@ Classification
        - ``criterion`` != `'gini'`
        - ``oob_score`` = `True`
        - ``sample_weight`` != `None`
-     - Multi-output and sparse data are not supported
+     - Multi-output and sparse data are not supported. Number of classes must be at least 2.
    * - :obj:`sklearn.neighbors.KNeighborsClassifier`
      - All parameters are supported except:
 
@@ -488,7 +488,7 @@ Classification
        - ``criterion`` != `'gini'`
        - ``oob_score`` = `True`
        - ``sample_weight`` != `None`
-     - Multi-output and sparse data are not supported
+     - Multi-output and sparse data are not supported. Number of classes must be at least 2.
    * - :obj:`sklearn.ensemble.ExtraTreesClassifier`
      - All parameters are supported except:
 
@@ -497,7 +497,7 @@ Classification
        - ``criterion`` != `'gini'`
        - ``oob_score`` = `True`
        - ``sample_weight`` != `None`
-     - Multi-output and sparse data are not supported
+     - Multi-output and sparse data are not supported. Number of classes must be at least 2.
    * - :obj:`sklearn.neighbors.KNeighborsClassifier`
      - All parameters are supported except:
 
diff --git a/onedal/tests/utils/_dataframes_support.py b/onedal/tests/utils/_dataframes_support.py
@@ -134,7 +134,7 @@ def _as_numpy(obj, *args, **kwargs):
     if dpctl_available and isinstance(obj, dpt.usm_ndarray):
         return dpt.to_numpy(obj, *args, **kwargs)
     if isinstance(obj, pd.DataFrame) or isinstance(obj, pd.Series):
-        return obj.to_array(*args, **kwargs)
+        return obj.to_numpy(*args, **kwargs)
     if sp.issparse(obj):
         return obj.toarray(*args, **kwargs)
     return np.asarray(obj, *args, **kwargs)
diff --git a/sklearnex/ensemble/_forest.py b/sklearnex/ensemble/_forest.py
@@ -592,6 +592,19 @@ def _onedal_fit_ready(self, patching_status, X, y, sample_weight):
             )
             # TODO: Fix to support integers as input
 
+            if self.n_outputs_ == 1:
+                xp, is_array_api_compliant = get_namespace(y)
+                sety = xp.unique_values(y) if is_array_api_compliant else np.unique(y)
+                num_classes = sety.shape[0]
+                patching_status.and_conditions(
+                    [
+                        (
+                            num_classes >= 2,
+                            "Number of classes must be at least 2.",
+                        ),
+                    ]
+                )
+
             _get_n_samples_bootstrap(n_samples=X.shape[0], max_samples=self.max_samples)
 
             if not self.bootstrap and self.max_samples is not None:
diff --git a/sklearnex/ensemble/tests/test_forest.py b/sklearnex/ensemble/tests/test_forest.py
@@ -14,6 +14,8 @@
 # limitations under the License.
 # ===============================================================================
 
+import numpy as np
+import pandas as pd
 import pytest
 from numpy.testing import assert_allclose
 from sklearn.datasets import make_classification, make_regression
@@ -153,3 +155,42 @@ def test_sklearnex_import_et_regression(dataframe, queue):
     # Check that the trees aren't just empty nodes predicting the mean
     for estimator in rf.estimators_:
         assert estimator.tree_.children_left.shape[0] > 1
+
+
+@pytest.mark.allow_sklearn_fallback
+@pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues())
+def test_classifiers_work_on_single_class(dataframe, queue):
+    from sklearnex.ensemble import ExtraTreesClassifier, RandomForestClassifier
+
+    rng = np.random.default_rng(seed=123)
+    X = rng.standard_normal(size=(20, 10))
+    y = np.zeros(X.shape[0])
+    X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
+    y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
+
+    np.testing.assert_array_equal(
+        _as_numpy(RandomForestClassifier(n_estimators=1).fit(X, y).predict(X)),
+        _as_numpy(y),
+    )
+    np.testing.assert_array_equal(
+        _as_numpy(ExtraTreesClassifier(n_estimators=1).fit(X, y).predict(X)),
+        _as_numpy(y),
+    )
+
+
+@pytest.mark.allow_sklearn_fallback
+def test_classifiers_work_on_single_class_non_numeric():
+    from sklearnex.ensemble import ExtraTreesClassifier, RandomForestClassifier
+
+    rng = np.random.default_rng(seed=123)
+    X = rng.standard_normal(size=(20, 10))
+    y = pd.Series(np.repeat("qwerty", X.shape[0]))
+
+    np.testing.assert_array_equal(
+        RandomForestClassifier(n_estimators=1).fit(X, y).predict(X),
+        y,
+    )
+    np.testing.assert_array_equal(
+        ExtraTreesClassifier(n_estimators=1).fit(X, y).predict(X),
+        y,
+    )
diff --git a/sklearnex/utils/class_weight.py b/sklearnex/utils/class_weight.py
@@ -44,9 +44,6 @@ def _compute_class_weight(class_weight, *, classes, y, sample_weight=None):
         return compute_class_weight(class_weight, classes, y, sample_weight=sample_weight)
 
     sety = xp.unique_values(y)
-    setclasses = xp.unique_values(classes)
-    if sety.shape[0] != xp.unique_values(xp.concat((sety, setclasses))).shape[0]:
-        raise ValueError("classes should include all valid labels that can be in y")
     if class_weight is None or len(class_weight) == 0:
         # uniform class weights
         weight = xp.ones((classes.shape[0],), dtype=xp.float64, device=classes.device)