Skip to content

Commit 1092774

Browse files
dummy classifiers and sklearn lower bound change (#3)
* actually change python version * dummy classifiers and sklearn lower bound change * test fix * test fix
1 parent 63a0514 commit 1092774

File tree

22 files changed

+360
-28
lines changed

22 files changed

+360
-28
lines changed

.github/workflows/tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ jobs:
4040

4141
- uses: actions/setup-python@v4
4242
with:
43-
python-version: "3.10"
43+
python-version: ${{ matrix.python-version }}
4444

4545
- name: Install
4646
run: python -m pip install .[dev,optional_dependencies]

pyproject.toml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "tsml"
7-
version = "0.0.2"
7+
version = "0.0.3"
88
description = "A toolkit for time series machine learning algorithms."
99
authors = [
1010
{name = "Matthew Middlehurst", email = "m.middlehurst@uea.ac.uk"},
@@ -37,7 +37,7 @@ classifiers = [
3737
dependencies = [
3838
"numba>=0.55",
3939
"numpy>=1.21.0",
40-
"scikit-learn>=1.2.1",
40+
"scikit-learn>=1.0.2",
4141
]
4242

4343
[project.optional-dependencies]
@@ -76,7 +76,6 @@ include = ["tsml"]
7676
ignore = [
7777
"examples/**",
7878
"docs/**",
79-
"requirements.txt",
8079
"*.yaml",
8180
"*.yml",
8281
".coveragerc",
@@ -88,6 +87,8 @@ extend-ignore = ["E203"]
8887

8988
[tool.pytest.ini_options]
9089
addopts = '''
90+
--ignore examples
91+
--ignore docs
9192
--durations 10
9293
--timeout 600
9394
--showlocals

tsml/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# -*- coding: utf-8 -*-
22
"""tsml."""
33

4-
__version__ = "0.0.1"
4+
__version__ = "0.0.3"

tsml/dummy/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,10 @@
11
# -*- coding: utf-8 -*-
22
"""Dummy estimators."""
3+
4+
__all__ = [
5+
"DummyClassifier",
6+
"DummyRegressor",
7+
"DummyClusterer",
8+
]
9+
10+
from tsml.dummy._dummy import DummyClassifier, DummyClusterer, DummyRegressor

tsml/dummy/_dummy.py

Lines changed: 312 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,312 @@
1+
# -*- coding: utf-8 -*-
2+
"""Dummy time series estimators."""
3+
4+
__author__ = ["MatthewMiddlehurst"]
5+
__all__ = ["DummyClassifier", "DummyRegressor", "DummyClusterer"]
6+
7+
import numpy as np
8+
from sklearn.base import ClassifierMixin, ClusterMixin, RegressorMixin
9+
from sklearn.dummy import DummyClassifier as SklearnDummyClassifier
10+
from sklearn.dummy import DummyRegressor as SklearnDummyRegressor
11+
from sklearn.utils import check_random_state
12+
from sklearn.utils.multiclass import check_classification_targets
13+
from sklearn.utils.validation import check_is_fitted
14+
15+
from tsml.base import BaseTimeSeriesEstimator
16+
17+
18+
class DummyClassifier(ClassifierMixin, BaseTimeSeriesEstimator):
19+
"""DummyClassifier makes predictions that ignore the input features.
20+
21+
This classifier serves as a simple baseline to compare against other more
22+
complex classifiers. Do not use it for real problems.
23+
24+
The specific behavior of the baseline is selected with the `strategy`
25+
parameter.
26+
27+
All strategies make predictions that ignore the input feature values passed
28+
as the `X` argument to `fit` and `predict`. The predictions, however,
29+
typically depend on values observed in the `y` parameter passed to `fit`.
30+
31+
A wrapper for `sklearn.dummy.DummyClassifier` using the tsml interface. Functionally
32+
identical.
33+
34+
Parameters
35+
----------
36+
strategy : {"most_frequent", "prior", "stratified", "uniform", \
37+
"constant"}, default="prior"
38+
Strategy to use to generate predictions.
39+
40+
* "most_frequent": the `predict` method always returns the most
41+
frequent class label in the observed `y` argument passed to `fit`.
42+
The `predict_proba` method returns the matching one-hot encoded
43+
vector.
44+
* "prior": the `predict` method always returns the most frequent
45+
class label in the observed `y` argument passed to `fit` (like
46+
"most_frequent"). ``predict_proba`` always returns the empirical
47+
class distribution of `y` also known as the empirical class prior
48+
distribution.
49+
* "stratified": the `predict_proba` method randomly samples one-hot
50+
vectors from a multinomial distribution parametrized by the empirical
51+
class prior probabilities.
52+
The `predict` method returns the class label which got probability
53+
one in the one-hot vector of `predict_proba`.
54+
Each sampled row of both methods is therefore independent and
55+
identically distributed.
56+
* "uniform": generates predictions uniformly at random from the list
57+
of unique classes observed in `y`, i.e. each class has equal
58+
probability.
59+
* "constant": always predicts a constant label that is provided by
60+
the user. This is useful for metrics that evaluate a non-majority
61+
class.
62+
random_state : int, RandomState instance or None, default=None
63+
Controls the randomness to generate the predictions when
64+
``strategy='stratified'`` or ``strategy='uniform'``.
65+
Pass an int for reproducible output across multiple function calls.
66+
See :term:`Glossary <random_state>`.
67+
constant : int or str or array-like of shape (n_outputs,), default=None
68+
The explicit constant as predicted by the "constant" strategy. This
69+
parameter is useful only for the "constant" strategy.
70+
71+
See Also
72+
--------
73+
DummyRegressor : Regressor that makes predictions using simple rules.
74+
75+
Examples
76+
--------
77+
>>> from tsml.dummy import DummyClassifier
78+
>>> from tsml.datasets import load_minimal_chinatown
79+
>>> X_train, y_train = load_minimal_chinatown(split="train")
80+
>>> X_test, y_test = load_minimal_chinatown(split="test")
81+
>>> clf = DummyClassifier(strategy="most_frequent")
82+
>>> clf.fit(X_train, y_train)
83+
DummyClassifier(strategy='most_frequent')
84+
>>> clf.score(X_test, y_test)
85+
0.5
86+
"""
87+
88+
def __init__(self, strategy="prior", random_state=None, constant=None):
89+
self.strategy = strategy
90+
self.random_state = random_state
91+
self.constant = constant
92+
93+
super(DummyClassifier, self).__init__()
94+
95+
def fit(self, X, y):
96+
""""""
97+
X, y = self._validate_data(X=X, y=y)
98+
99+
check_classification_targets(y)
100+
101+
self.n_instances_, self.n_dims_, self.series_length_ = X.shape
102+
self.classes_ = np.unique(y)
103+
self.n_classes_ = self.classes_.shape[0]
104+
self.class_dictionary_ = {}
105+
for index, classVal in enumerate(self.classes_):
106+
self.class_dictionary_[classVal] = index
107+
108+
if len(self.classes_) == 1:
109+
return self
110+
111+
self._clf = SklearnDummyClassifier(
112+
strategy=self.strategy,
113+
random_state=self.random_state,
114+
constant=self.constant,
115+
)
116+
self._clf.fit(np.zeros(X.shape), y)
117+
118+
return self
119+
120+
def predict(self, X) -> np.ndarray:
121+
""""""
122+
check_is_fitted(self)
123+
124+
# treat case of single class seen in fit
125+
if self.n_classes_ == 1:
126+
return np.repeat(list(self.class_dictionary_.keys()), X.shape[0], axis=0)
127+
128+
X = self._validate_data(X=X, reset=False)
129+
130+
return self._clf.predict(np.zeros(X.shape))
131+
132+
def predict_proba(self, X) -> np.ndarray:
133+
""""""
134+
check_is_fitted(self)
135+
136+
# treat case of single class seen in fit
137+
if self.n_classes_ == 1:
138+
return np.repeat([[1]], X.shape[0], axis=0)
139+
140+
X = self._validate_data(X=X, reset=False)
141+
142+
return self._clf.predict_proba(np.zeros(X.shape))
143+
144+
145+
class DummyRegressor(RegressorMixin, BaseTimeSeriesEstimator):
146+
"""DummyRegressor makes predictions that ignore the input features.
147+
148+
This regressor is useful as a simple baseline to compare with other
149+
(real) regressors. Do not use it for real problems.
150+
151+
The specific behavior of the baseline is selected with the `strategy`
152+
parameter.
153+
154+
All strategies make predictions that ignore the input feature values passed
155+
as the `X` argument to `fit` and `predict`. The predictions, however,
156+
typically depend on values observed in the `y` parameter passed to `fit`.
157+
158+
A wrapper for `sklearn.dummy.DummyRegressor` using the tsml interface. Functionally
159+
identical.
160+
161+
Parameters
162+
----------
163+
strategy : {"mean", "median", "quantile", "constant"}, default="mean"
164+
Strategy to use to generate predictions.
165+
166+
* "mean": always predicts the mean of the training set
167+
* "median": always predicts the median of the training set
168+
* "quantile": always predicts a specified quantile of the training set,
169+
provided with the quantile parameter.
170+
* "constant": always predicts a constant value that is provided by
171+
the user.
172+
constant : int or float or array-like of shape (n_outputs,), default=None
173+
The explicit constant as predicted by the "constant" strategy. This
174+
parameter is useful only for the "constant" strategy.
175+
quantile : float in [0.0, 1.0], default=None
176+
The quantile to predict using the "quantile" strategy. A quantile of
177+
0.5 corresponds to the median, while 0.0 to the minimum and 1.0 to the
178+
maximum.
179+
180+
See Also
181+
--------
182+
DummyClassifier : Classifier that makes predictions using simple rules.
183+
184+
Examples
185+
--------
186+
>>> from tsml.dummy import DummyRegressor
187+
>>> from tsml.datasets import load_minimal_gas_prices
188+
>>> X_train, y_train = load_minimal_gas_prices(split="train")
189+
>>> X_test, y_test = load_minimal_gas_prices(split="test")
190+
>>> reg = DummyRegressor()
191+
>>> reg.fit(X_train, y_train)
192+
DummyRegressor()
193+
>>> reg.score(X_test, y_test)
194+
-0.07184048625633688
195+
"""
196+
197+
def __init__(self, strategy="mean", constant=None, quantile=None):
198+
self.strategy = strategy
199+
self.constant = constant
200+
self.quantile = quantile
201+
202+
super(DummyRegressor, self).__init__()
203+
204+
def fit(self, X, y):
205+
""""""
206+
X, y = self._validate_data(X=X, y=y)
207+
208+
self._reg = SklearnDummyRegressor(
209+
strategy=self.strategy, constant=self.constant, quantile=self.quantile
210+
)
211+
self._reg.fit(np.zeros(X.shape), y)
212+
213+
return self
214+
215+
def predict(self, X):
216+
""""""
217+
check_is_fitted(self)
218+
219+
X = self._validate_data(X=X, reset=False)
220+
221+
return self._reg.predict(np.zeros(X.shape))
222+
223+
224+
class DummyClusterer(ClusterMixin, BaseTimeSeriesEstimator):
225+
"""DummyRegressor makes predictions that ignore the input features.
226+
227+
This cluster makes no effort to form reasonable clusters, and is primarily used
228+
for interface testing. Do not use it for real problems.
229+
230+
All strategies make predictions that ignore the input feature values passed
231+
as the `X` argument to `fit` and `predict`.
232+
233+
todo example adjusted_rand_score
234+
235+
Examples
236+
--------
237+
>>> from tsml.dummy import DummyClusterer
238+
>>> from tsml.datasets import load_minimal_chinatown
239+
>>> from sklearn.metrics import adjusted_rand_score
240+
>>> X_train, y_train = load_minimal_chinatown(split="train")
241+
>>> X_test, y_test = load_minimal_chinatown(split="test")
242+
>>> clu = DummyClusterer(strategy="random", random_state=0)
243+
>>> clu.fit(X_train)
244+
DummyClusterer(random_state=0, strategy='random')
245+
>>> adjusted_rand_score(clu.labels_, y_train)
246+
0.2087729039422543
247+
>>> adjusted_rand_score(clu.predict(X_test), y_test)
248+
0.2087729039422543
249+
"""
250+
251+
def __init__(self, strategy="single", n_clusters=2, random_state=None):
252+
self.strategy = strategy
253+
self.n_clusters = n_clusters
254+
self.random_state = random_state
255+
256+
super(DummyClusterer, self).__init__()
257+
258+
def fit(self, X, y=None):
259+
""""""
260+
X = self._validate_data(X=X)
261+
262+
if self.strategy == "single":
263+
self.labels_ = np.zeros(len(X), dtype=np.int32)
264+
elif self.strategy == "unique":
265+
self.labels_ = np.arange(len(X), dtype=np.int32)
266+
elif self.strategy == "random":
267+
rng = check_random_state(self.random_state)
268+
self.labels_ = rng.randint(self.n_clusters, size=len(X), dtype=np.int32)
269+
else:
270+
raise ValueError(f"Unknown strategy {self.strategy}")
271+
272+
return self
273+
274+
def predict(self, X):
275+
""""""
276+
check_is_fitted(self)
277+
278+
X = self._validate_data(X=X, reset=False)
279+
280+
if self.strategy == "single":
281+
return np.zeros(len(X), dtype=np.int32)
282+
elif self.strategy == "unique":
283+
return np.arange(len(X), dtype=np.int32)
284+
elif self.strategy == "random":
285+
rng = check_random_state(self.random_state)
286+
return rng.randint(self.n_clusters, size=len(X), dtype=np.int32)
287+
else:
288+
raise ValueError(f"Unknown strategy {self.strategy}")
289+
290+
@classmethod
291+
def get_test_params(cls, parameter_set="default"):
292+
"""Return testing parameter settings for the estimator.
293+
294+
Parameters
295+
----------
296+
parameter_set : str, default="default"
297+
Name of the set of test parameters to return, for use in tests. If no
298+
special parameters are defined for a value, will return `"default"` set.
299+
For classifiers, a "default" set of parameters should be provided for
300+
general testing, and a "results_comparison" set for comparing against
301+
previously recorded results if the general set does not produce suitable
302+
probabilities to compare against.
303+
304+
Returns
305+
-------
306+
params : dict or list of dict, default={}
307+
Parameters to create testing instances of the class.
308+
Each dict are parameters to construct an "interesting" test instance, i.e.,
309+
`MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
310+
`create_test_instance` uses the first (or only) dictionary in `params`.
311+
"""
312+
return {}

tsml/feature_based/_catch22_classifier.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import numpy as np
1111
from sklearn.base import ClassifierMixin, RegressorMixin
1212
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
13+
from sklearn.utils.multiclass import check_classification_targets
1314
from sklearn.utils.validation import check_is_fitted
1415

1516
from tsml.base import BaseTimeSeriesEstimator, _clone_estimator
@@ -113,6 +114,8 @@ def fit(self, X, y):
113114
X=X, y=y, ensure_min_samples=2, ensure_min_series_length=3
114115
)
115116

117+
check_classification_targets(y)
118+
116119
self.n_instances_, self.n_dims_, self.series_length_ = X.shape
117120
self.classes_ = np.unique(y)
118121
self.n_classes_ = self.classes_.shape[0]

0 commit comments

Comments
 (0)