other updates

glemaitre · glemaitre · commit bb1b9a69947a · 2019-10-31T23:34:41.000+01:00
diff --git a/imblearn/pipeline.py b/imblearn/pipeline.py
@@ -58,6 +58,9 @@ class Pipeline(pipeline.Pipeline):
         inspect estimators within the pipeline. Caching the
         transformers is advantageous when fitting is time consuming.
 
+    verbose : boolean, optional (default=False)
+        If True, the time elapsed while fitting each step will be printed as it
+        is completed.
 
     Attributes
     ----------
@@ -188,7 +191,14 @@ def _fit(self, X, y=None, **fit_params):
                     "=sample_weight)`.".format(pname))
             step, param = pname.split("__", 1)
             fit_params_steps[step][param] = pval
-        for step_idx, name, transformer in self._iter(with_final=False):
+        for (step_idx,
+             name,
+             transformer) in self._iter(with_final=False,
+                                        filter_passthrough=False):
+            if (transformer is None or transformer == 'passthrough'):
+                with _print_elapsed_time('Pipeline',
+                                         self._log_message(step_idx)):
+                    continue
             if hasattr(memory, "location"):
                 # joblib >= 0.12
                 if memory.location is None:
@@ -210,11 +220,17 @@ def _fit(self, X, y=None, **fit_params):
                 cloned_transformer, "fit_transform"
             ):
                 X, fitted_transformer = fit_transform_one_cached(
-                    cloned_transformer, None, X, y, **fit_params_steps[name]
+                    cloned_transformer, X, y, None,
+                    message_clsname='Pipeline',
+                    message=self._log_message(step_idx),
+                    **fit_params_steps[name]
                 )
             elif hasattr(cloned_transformer, "fit_resample"):
                 X, y, fitted_transformer = fit_resample_one_cached(
-                    cloned_transformer, X, y, **fit_params_steps[name]
+                    cloned_transformer, X, y,
+                    message_clsname='Pipeline',
+                    message=self._log_message(step_idx),
+                    **fit_params_steps[name]
                 )
             # Replace the transformer of the step with the fitted
             # transformer. This is necessary when loading the transformer
@@ -253,8 +269,10 @@ def fit(self, X, y=None, **fit_params):
 
         """
         Xt, yt, fit_params = self._fit(X, y, **fit_params)
-        if self._final_estimator != "passthrough":
-            self._final_estimator.fit(Xt, yt, **fit_params)
+        with _print_elapsed_time('Pipeline',
+                                 self._log_message(len(self.steps) - 1)):
+            if self._final_estimator != "passthrough":
+                self._final_estimator.fit(Xt, yt, **fit_params)
         return self
 
     def fit_transform(self, X, y=None, **fit_params):
@@ -287,12 +305,14 @@ def fit_transform(self, X, y=None, **fit_params):
         """
         last_step = self._final_estimator
         Xt, yt, fit_params = self._fit(X, y, **fit_params)
-        if last_step == "passthrough":
-            return Xt
-        elif hasattr(last_step, "fit_transform"):
-            return last_step.fit_transform(Xt, yt, **fit_params)
-        else:
-            return last_step.fit(Xt, yt, **fit_params).transform(Xt)
+        with _print_elapsed_time('Pipeline',
+                                 self._log_message(len(self.steps) - 1)):
+            if last_step == "passthrough":
+                return Xt
+            elif hasattr(last_step, "fit_transform"):
+                return last_step.fit_transform(Xt, yt, **fit_params)
+            else:
+                return last_step.fit(Xt, yt, **fit_params).transform(Xt)
 
     def fit_resample(self, X, y=None, **fit_params):
         """Fit the model and sample with the final estimator
@@ -327,10 +347,12 @@ def fit_resample(self, X, y=None, **fit_params):
         """
         last_step = self._final_estimator
         Xt, yt, fit_params = self._fit(X, y, **fit_params)
-        if last_step == "passthrough":
-            return Xt
-        elif hasattr(last_step, "fit_resample"):
-            return last_step.fit_resample(Xt, yt, **fit_params)
+        with _print_elapsed_time('Pipeline',
+                                 self._log_message(len(self.steps) - 1)):
+            if last_step == "passthrough":
+                return Xt
+            elif hasattr(last_step, "fit_resample"):
+                return last_step.fit_resample(Xt, yt, **fit_params)
 
     @if_delegate_has_method(delegate="_final_estimator")
     def predict(self, X, **predict_params):
@@ -392,7 +414,10 @@ def fit_predict(self, X, y=None, **fit_params):
         y_pred : array-like
         """
         Xt, yt, fit_params = self._fit(X, y, **fit_params)
-        return self.steps[-1][-1].fit_predict(Xt, yt, **fit_params)
+        with _print_elapsed_time('Pipeline',
+                                 self._log_message(len(self.steps) - 1)):
+            y_pred = self.steps[-1][-1].fit_predict(Xt, yt, **fit_params)
+        return y_pred
 
     @if_delegate_has_method(delegate="_final_estimator")
     def predict_proba(self, X):
@@ -583,22 +608,55 @@ def score(self, X, y=None, sample_weight=None):
             score_params["sample_weight"] = sample_weight
         return self.steps[-1][-1].score(Xt, y, **score_params)
 
+    @if_delegate_has_method(delegate='_final_estimator')
+    def score_samples(self, X):
+        """Apply transforms, and score_samples of the final estimator.
+        Parameters
+        ----------
+        X : iterable
+            Data to predict on. Must fulfill input requirements of first step
+            of the pipeline.
+        Returns
+        -------
+        y_score : ndarray, shape (n_samples,)
+        """
+        Xt = X
+        for _, _, transformer in self._iter(with_final=False):
+            if hasattr(transform, "fit_resample"):
+                pass
+            else:
+                Xt = transformer.transform(Xt)
+        return self.steps[-1][-1].score_samples(Xt)
+
 
-def _fit_transform_one(transformer, weight, X, y, **fit_params):
-    if hasattr(transformer, "fit_transform"):
-        res = transformer.fit_transform(X, y, **fit_params)
-    else:
-        res = transformer.fit(X, y, **fit_params).transform(X)
+def _fit_transform_one(transformer,
+                       X,
+                       y,
+                       weight,
+                       message_clsname='',
+                       message=None,
+                       **fit_params):
+    with _print_elapsed_time(message_clsname, message):
+        if hasattr(transformer, "fit_transform"):
+            res = transformer.fit_transform(X, y, **fit_params)
+        else:
+            res = transformer.fit(X, y, **fit_params).transform(X)
     # if we have a weight for this transformer, multiply output
     if weight is None:
         return res, transformer
     return res * weight, transformer
 
 
-def _fit_resample_one(sampler, X, y, **fit_params):
-    X_res, y_res = sampler.fit_resample(X, y, **fit_params)
+def _fit_resample_one(sampler,
+                      X,
+                      y,
+                      message_clsname='',
+                      message=None,
+                      **fit_params):
+    with _print_elapsed_time(message_clsname, message):
+        X_res, y_res = sampler.fit_resample(X, y, **fit_params)
 
-    return X_res, y_res, sampler
+        return X_res, y_res, sampler
 
 
 def make_pipeline(*steps, **kwargs):
@@ -622,6 +680,10 @@ def make_pipeline(*steps, **kwargs):
         inspect estimators within the pipeline. Caching the
         transformers is advantageous when fitting is time consuming.
 
+    verbose : boolean, optional (default=False)
+        If True, the time elapsed while fitting each step will be printed as it
+        is completed.
+
     Returns
     -------
     p : Pipeline
@@ -645,8 +707,11 @@ def make_pipeline(*steps, **kwargs):
              verbose=False)
     """
     memory = kwargs.pop("memory", None)
+    verbose = kwargs.pop('verbose', False)
     if kwargs:
         raise TypeError(
             'Unknown keyword arguments: "{}"'.format(list(kwargs.keys())[0])
         )
-    return Pipeline(pipeline._name_estimators(steps), memory=memory)
+    return Pipeline(
+        pipeline._name_estimators(steps), memory=memory, verbose=verbose
+    )
diff --git a/imblearn/tests/test_pipeline.py b/imblearn/tests/test_pipeline.py
@@ -5,9 +5,11 @@
 #          Christos Aridas
 # License: MIT
 
-from tempfile import mkdtemp
+import itertools
+import re
 import shutil
 import time
+from tempfile import mkdtemp
 
 import numpy as np
 import pytest
@@ -29,6 +31,7 @@
 from sklearn.feature_selection import SelectKBest, f_classif
 from sklearn.datasets import load_iris, make_classification
 from sklearn.preprocessing import StandardScaler
+from sklearn.pipeline import FeatureUnion
 
 from imblearn.pipeline import Pipeline, make_pipeline
 from imblearn.under_sampling import (
@@ -1268,3 +1271,56 @@ def test_pipeline_param_error():
     with pytest.raises(ValueError, match="Pipeline.fit does not accept "
                                          "the sample_weight parameter"):
         clf.fit([[0], [0]], [0, 1], sample_weight=[1, 1])
+
+
+parameter_grid_test_verbose = ((est, pattern, method) for
+                               (est, pattern), method in itertools.product(
+    [
+     (Pipeline([('transf', Transf()), ('clf', FitParamT())]),
+      r'\[Pipeline\].*\(step 1 of 2\) Processing transf.* total=.*\n'
+      r'\[Pipeline\].*\(step 2 of 2\) Processing clf.* total=.*\n$'),
+     (Pipeline([('transf', Transf()), ('noop', None),
+               ('clf', FitParamT())]),
+      r'\[Pipeline\].*\(step 1 of 3\) Processing transf.* total=.*\n'
+      r'\[Pipeline\].*\(step 2 of 3\) Processing noop.* total=.*\n'
+      r'\[Pipeline\].*\(step 3 of 3\) Processing clf.* total=.*\n$'),
+     (Pipeline([('transf', Transf()), ('noop', 'passthrough'),
+               ('clf', FitParamT())]),
+      r'\[Pipeline\].*\(step 1 of 3\) Processing transf.* total=.*\n'
+      r'\[Pipeline\].*\(step 2 of 3\) Processing noop.* total=.*\n'
+      r'\[Pipeline\].*\(step 3 of 3\) Processing clf.* total=.*\n$'),
+     (Pipeline([('transf', Transf()), ('clf', None)]),
+      r'\[Pipeline\].*\(step 1 of 2\) Processing transf.* total=.*\n'
+      r'\[Pipeline\].*\(step 2 of 2\) Processing clf.* total=.*\n$'),
+     (Pipeline([('transf', None), ('mult', Mult())]),
+      r'\[Pipeline\].*\(step 1 of 2\) Processing transf.* total=.*\n'
+      r'\[Pipeline\].*\(step 2 of 2\) Processing mult.* total=.*\n$'),
+     (Pipeline([('transf', 'passthrough'), ('mult', Mult())]),
+      r'\[Pipeline\].*\(step 1 of 2\) Processing transf.* total=.*\n'
+      r'\[Pipeline\].*\(step 2 of 2\) Processing mult.* total=.*\n$'),
+     (FeatureUnion([('mult1', Mult()), ('mult2', Mult())]),
+      r'\[FeatureUnion\].*\(step 1 of 2\) Processing mult1.* total=.*\n'
+      r'\[FeatureUnion\].*\(step 2 of 2\) Processing mult2.* total=.*\n$'),
+     (FeatureUnion([('mult1', 'drop'), ('mult2', Mult()), ('mult3', 'drop')]),
+      r'\[FeatureUnion\].*\(step 1 of 1\) Processing mult2.* total=.*\n$')
+    ], ['fit', 'fit_transform', 'fit_predict'])
+    if hasattr(est, method) and not (
+        method == 'fit_transform' and hasattr(est, 'steps') and
+        isinstance(est.steps[-1][1], FitParamT))
+)
+
+
+@pytest.mark.parametrize('est, pattern, method', parameter_grid_test_verbose)
+def test_verbose(est, method, pattern, capsys):
+    func = getattr(est, method)
+
+    X = [[1, 2, 3], [4, 5, 6]]
+    y = [[7], [8]]
+
+    est.set_params(verbose=False)
+    func(X, y)
+    assert not capsys.readouterr().out, 'Got output for verbose=False'
+
+    est.set_params(verbose=True)
+    func(X, y)
+    assert re.match(pattern, capsys.readouterr().out)
diff --git a/setup.cfg b/setup.cfg
@@ -28,7 +28,6 @@ addopts =
     --ignore examples
     --ignore maint_tools
     --doctest-modules
-    --disable-pytest-warnings
     -rs
 
 filterwarnings =