diff --git a/tsml_eval/_wip/condensing/__init__.py b/tsml_eval/_wip/condensing/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tsml_eval/_wip/condensing/clustering_condenser.py b/tsml_eval/_wip/condensing/clustering_condenser.py new file mode 100644 index 00000000..f8de1bc7 --- /dev/null +++ b/tsml_eval/_wip/condensing/clustering_condenser.py @@ -0,0 +1,125 @@ +import numpy as np +from aeon.transformations.collection.base import BaseCollectionTransformer + + +class ClusteringCondenser(BaseCollectionTransformer): + """ + Classifier wrapper for its use with any condensing approach. + + Parameters + ---------- + distance + distance_params + + Examples + -------- + >>> from ... + >>> from ... + """ + + _tags = { + "univariate-only": True, + "fit_is_empty": True, + "X_inner_mtype": ["np-list", "numpy3D"], + "requires_y": True, + "y_inner_mtype": ["numpy1D"], + } + + def __init__( + self, + clustering_approach=None, + distance="dtw", + distance_params=None, + num_instances_per_class=1, + random_state=None, + ): + self.distance = distance + + self.distance_params = distance_params + if self.distance_params is None: + self.distance_params = {} + + self.num_instances_per_class = num_instances_per_class + + self.selected_series = np.array([]) + self.y_selected_series = [] + + self.random_state = random_state + + self.clustering_approach = clustering_approach + if self.clustering_approach == "pam": + from aeon.clustering import TimeSeriesKMedoids + + self.clusterer = TimeSeriesKMedoids( + n_clusters=self.num_instances_per_class, + method="pam", + init_algorithm="random", + distance=self.distance, + distance_params=self.distance_params, + random_state=self.random_state, + ) + + elif self.clustering_approach == "kmeans" or self.clustering_approach is None: + from aeon.clustering import TimeSeriesKMeans + + self.average_params = { + "distance": self.distance, + **self.distance_params.copy(), + } + + self.clusterer = TimeSeriesKMeans( + n_clusters=self.num_instances_per_class, + distance=self.distance, + distance_params=self.distance_params, + averaging_method="ba", + average_params=self.average_params, + random_state=self.random_state, + ) + + super().__init__() + + def _transform(self, X, y): + self.selected_series = self.selected_series.reshape(0, *X.shape[1:]) + + for i in np.unique(y): + idxs_class = np.where(y == i) + X_i = X[idxs_class] + + # in case of self.num_instances_per_class == 1, does not make sense to run + # the approaches. + if self.num_instances_per_class == 1: + if self.clustering_approach == "pam": + from aeon.clustering.averaging._ba_utils import _medoids + + averaged_series_class_i = [ + _medoids( + X_i, + distance=self.distance, + **self.distance_params, + ) + ] + elif self.clustering_approach == "kmeans": + from aeon.clustering.averaging import elastic_barycenter_average + + averaged_series_class_i = [ + elastic_barycenter_average( + X_i, + distance=self.distance, + **self.distance_params, + ) + ] + # for self.num_instances_per_class > 1. + else: + self.clusterer.fit(X_i) + averaged_series_class_i = self.clusterer.cluster_centers_ + + self.selected_series = np.concatenate( + (self.selected_series, averaged_series_class_i), axis=0 + ) + + self.y_selected_series.extend([i] * self.num_instances_per_class) + + return np.array(self.selected_series), np.array(self.y_selected_series) + + def _fit_transform(self, X, y): + return self._transform(X, y) diff --git a/tsml_eval/_wip/condensing/condensing_classifier.py b/tsml_eval/_wip/condensing/condensing_classifier.py new file mode 100644 index 00000000..eafef96f --- /dev/null +++ b/tsml_eval/_wip/condensing/condensing_classifier.py @@ -0,0 +1,78 @@ +from aeon.classification.base import BaseClassifier + + +class CondenserClassifier(BaseClassifier): + """ + Classifier wrapper for its use with any condensing approach. + + Parameters + ---------- + distance + distance_params + + Examples + -------- + >>> from ... + >>> from ... + """ + + _tags = { + "univariate-only": True, + "fit_is_empty": False, + "X_inner_mtype": ["np-list", "numpy3D"], + } + + def __init__( + self, + condenser=None, + distance="dtw", + distance_params=None, + classifier=None, + num_instances=1, + random_state=None, + ): + self.distance = distance + + self.distance_params = distance_params + if self.distance_params is None: + self.distance_params = {} + + self.num_instances = num_instances + + self.random_state = random_state + + self.condenser = condenser + if self.condenser is None: + from tsml_eval._wip.condensing.clustering_condenser import ( + ClusteringCondenser, + ) + + self.condenser = ClusteringCondenser( + clustering_approach="kmeans", + distance=self.distance, + distance_params=self.distance_params, + num_instances_per_class=self.num_instances, + random_state=self.random_state, + ) + + self.classifier = classifier + if self.classifier is None: + from aeon.classification.distance_based import ( + KNeighborsTimeSeriesClassifier, + ) + + self.classifier = KNeighborsTimeSeriesClassifier( + distance=self.distance, + weights="distance", + distance_params=self.distance_params, + n_neighbors=1, + ) + super().__init__() + + def _fit(self, X, y): + condensed_X, condensed_y = self.condenser.fit_transform(X, y) + self.classifier.fit(condensed_X, condensed_y) + return self + + def _predict(self, X): + return self.classifier.predict(X) diff --git a/tsml_eval/_wip/condensing/draw_average_and_barycentres.py b/tsml_eval/_wip/condensing/draw_average_and_barycentres.py new file mode 100644 index 00000000..41ce4364 --- /dev/null +++ b/tsml_eval/_wip/condensing/draw_average_and_barycentres.py @@ -0,0 +1,66 @@ +import os + +import matplotlib.pyplot as plt +import numpy as np +from aeon.clustering.metrics.averaging import elastic_barycenter_average +from aeon.datasets import load_from_tsfile + +dataset = "GunPoint" +c = "1" + +distances = ["msm", "dtw", "twe"] +distance_params = { + "msm": {"c": 1}, + "dtw": {"window": 0.2}, + "twe": {"nu": 0.05, "lmbda": 1}, +} +names = ["MBA", "DBA", "TBA"] +colours = ["blue", "purple", "green"] +n_methods = len(distances) + 1 + +fig = plt.figure(figsize=(13, 13)) + +gs0 = fig.add_gridspec(1, 2) + +gs00 = gs0[0].subgridspec(n_methods * 2, 1) +gs01 = gs0[1].subgridspec(n_methods, 1) + +# original set of time series +start = n_methods - 1 +end = n_methods + 1 +ax00_gs00 = fig.add_subplot(gs00[start:end, 0]) + +x_train, y_train = load_from_tsfile( + os.path.join(f"../../../../TSC_datasets/{dataset}/{dataset}_TRAIN.ts") +) + +x = range(0, x_train.shape[2]) +idxs = np.where(y_train == c) + +for i in x_train[idxs]: + ax00_gs00.plot(x, i[0], lw=0.2) + +ax00_gs00.set_title("Original time series", size=14) + +# average time series +ax01_gs01 = fig.add_subplot(gs01[0]) +series_avg = np.mean(np.array(x_train[idxs]), axis=0)[0] +ax01_gs01.plot(x, series_avg, color="red") +ax01_gs01.set_title("Averaging", size=14) + +# plots BA time series (msm, dtw, twe). +for idx, i in enumerate(distances): + series_BA = elastic_barycenter_average( + x_train[idxs], + metric=i, + **distance_params[i], + ) + ax = fig.add_subplot(gs01[idx + 1]) + ax.plot(x, series_BA[0, :], color=colours[idx]) + ax.set_title(names[idx], size=14) + +fig.suptitle(f"{dataset} - Class {c}", size=16) + +fig.tight_layout() + +plt.savefig("barycentres_example.png") diff --git a/tsml_eval/_wip/condensing/drop1.py b/tsml_eval/_wip/condensing/drop1.py new file mode 100644 index 00000000..3ed9a75a --- /dev/null +++ b/tsml_eval/_wip/condensing/drop1.py @@ -0,0 +1,155 @@ +import numpy as np +from aeon.distances import get_distance_function +from aeon.transformations.collection.base import BaseCollectionTransformer + + +class Drop1Condenser(BaseCollectionTransformer): + """ + Class for the simple_rank condensing approach. + + Parameters + ---------- + distance + distance_params + num_instances_per_class + + References + ---------- + .. [1] Wilson, D. R., & Martinez, T. R. (2000). Reduction techniques for + instance-based learning algorithms. Machine learning, 38, 257-286. + + Examples + -------- + >>> from ... + >>> from ... + """ + + _tags = { + "univariate-only": True, + "fit_is_empty": False, + "X_inner_mtype": ["np-list", "numpy3D"], + "requires_y": True, + "y_inner_mtype": ["numpy1D"], + } + + def __init__( + self, + distance="dtw", + distance_params=None, + num_instances=1, + ): + self.distance = distance + self.distance_params = distance_params + if self.distance_params is None: + self.distance_params = {} + + self.num_instances = num_instances + + if isinstance(self.distance, str): + self.metric = get_distance_function(metric=self.distance) + + self.selected_indices = [] + + super().__init__() + + def _fit(self, X, y): + n_classes = len(np.unique(y)) + self.num_instances = self.num_instances * n_classes + + def _transform(self, X, y): + """ + Implement of the Drop1 prototype selection approach. + + Parameters + ---------- + X -- numpy array of shape (n_samples, n_features) representing the feature + vectors of the instances. + y -- numpy array of shape (n_samples,) representing the corresponding class + labels. + k -- int, the desired number of prototypes to be selected. + + Returns + ------- + self + """ + n_samples = X.shape[0] + + associates = [[] for _ in range(n_samples)] + kneighbors = [[] for _ in range(n_samples)] + weights = [[] for _ in range(n_samples)] + distances = np.zeros((n_samples, n_samples)) + + # Getting the kneighbors and the associates of the instance. + for p in range(n_samples): + for p2 in range(p + 1, n_samples): + distances[p, p2] = self.metric(X[p], X[p2], **self.distance_params) + distances[p2, p] = distances[p, p2] + + for p in range(n_samples): + weights[p], kneighbors[p] = zip( + *sorted(zip(distances[p], range(n_samples))) + ) + + # todo: maybe removing first element as is itself? + weights[p], kneighbors[p] = weights[p][1:], kneighbors[p][1:] + + for j in kneighbors[p][: self.num_instances]: + associates[j].append(p) + + # Predicting with/without rule for each instance p in the set. + for p in range(n_samples): + without_P = 0 + with_P = 0 + + for a in associates[p]: + # WITH + y_pred_w_P = self._predict_KNN( + kneighbors[a], + weights[a], + y, + self.num_instances, + ) + + if y_pred_w_P == y[a]: + with_P += 1 + # WITHOUT + y_pred_wo_P = self._predict_KNN( + [k for k in kneighbors[a] if k != p], + [w for idx, w in enumerate(weights[a]) if idx != p], + y, + self.num_instances, + ) + + if y_pred_wo_P == y[a]: + without_P += 1 + + if without_P < with_P: # the instance is worth keeping. + print(f"Keeping instance {p}.") + self.selected_indices.append(p) + else: # the instance is not worth keeping. + print(f"Removing instance {p}.") + for a in associates[p]: + kneighbors[a] = [kn for kn in kneighbors[a] if kn != p] + for j in kneighbors[a][: self.num_instances]: + if a not in associates[j]: + associates[j].append(a) + + for k in kneighbors[p]: + associates[k] = [a for a in associates[k] if a != p] + + print(self.selected_indices) + return X[self.selected_indices], y[self.selected_indices] + + def _fit_transform(self, X, y): + self.fit(X, y) + return self._transform(X, y) + + def _predict_KNN(self, neighbors, weights, y, num_neighbors): + neighbors = neighbors[:(num_neighbors)] + weights = weights[:(num_neighbors)] + classes_, y_ = np.unique(y, return_inverse=True) + scores = np.zeros(len(classes_)) + for id, w in zip(neighbors, weights): + predicted_class = y_[id] + scores[predicted_class] += 1 / (w + np.finfo(float).eps) + return classes_[np.argmax(scores)] diff --git a/tsml_eval/_wip/condensing/drop2.py b/tsml_eval/_wip/condensing/drop2.py new file mode 100644 index 00000000..599558f2 --- /dev/null +++ b/tsml_eval/_wip/condensing/drop2.py @@ -0,0 +1,163 @@ +import numpy as np +from aeon.distances import get_distance_function +from aeon.transformations.collection.base import BaseCollectionTransformer + + +class Drop2Condenser(BaseCollectionTransformer): + """ + Class for the simple_rank condensing approach. + + Parameters + ---------- + distance + distance_params + num_instances_per_class + + References + ---------- + .. [1] Wilson, D. R., & Martinez, T. R. (2000). Reduction techniques for + instance-based learning algorithms. Machine learning, 38, 257-286. + + Examples + -------- + >>> from ... + >>> from ... + """ + + _tags = { + "univariate-only": True, + "fit_is_empty": False, + "X_inner_mtype": ["np-list", "numpy3D"], + "requires_y": True, + "y_inner_mtype": ["numpy1D"], + } + + def __init__( + self, + distance="dtw", + distance_params=None, + num_instances=1, + ): + self.distance = distance + self.distance_params = distance_params + if self.distance_params is None: + self.distance_params = {} + + self.num_instances = num_instances + + if isinstance(self.distance, str): + self.metric = get_distance_function(metric=self.distance) + + self.selected_indices = [] + + super().__init__() + + def _fit(self, X, y): + n_classes = len(np.unique(y)) + self.num_instances = self.num_instances * n_classes + + def _transform(self, X, y): + """ + Implement of the Drop1 prototype selection approach. + + Parameters + ---------- + X -- numpy array of shape (n_samples, n_features) representing the feature + vectors of the instances. + y -- numpy array of shape (n_samples,) representing the corresponding class + labels. + k -- int, the desired number of prototypes to be selected. + + Returns + ------- + self + """ + n_samples = X.shape[0] + + associates = [[] for _ in range(n_samples)] + kneighbors = [[] for _ in range(n_samples)] + weights = [[] for _ in range(n_samples)] + distance_nearest_enemy = [] + distances = np.zeros((n_samples, n_samples)) + + # Getting the kneighbors and the associates of the instance. + for p in range(n_samples): + for p2 in range(p + 1, n_samples): + distances[p, p2] = self.metric(X[p], X[p2], **self.distance_params) + distances[p2, p] = distances[p, p2] + + for p in range(n_samples): + weights[p], kneighbors[p], y_ordered = zip( + *sorted(zip(distances[p], range(n_samples), y)) + ) + + # todo: maybe removing first element as is itself? + weights[p], kneighbors[p] = weights[p][1:], kneighbors[p][1:] + + for j in kneighbors[p][: self.num_instances]: + associates[j].append(p) + + # Drop2 order instances by their distance to the nearest enemy. + for kdx, _ in enumerate(kneighbors[p]): + if y_ordered[kdx] != y[p]: + distance_nearest_enemy.append(weights[p][kdx]) + break + + _, n_samples_ordered = zip( + *sorted(zip(distance_nearest_enemy, range(n_samples))) + ) + + # Predicting with/without rule for each instance p in the set. + for p in n_samples_ordered: + without_P = 0 + with_P = 0 + + for a in associates[p]: + # WITH + y_pred_w_P = self._predict_KNN( + kneighbors[a], + weights[a], + y, + self.num_instances, + ) + + if y_pred_w_P == y[a]: + with_P += 1 + # WITHOUT + y_pred_wo_P = self._predict_KNN( + [k for k in kneighbors[a] if k != p], + [w for idx, w in enumerate(weights[a]) if idx != p], + y, + self.num_instances, + ) + + if y_pred_wo_P == y[a]: + without_P += 1 + + if without_P < with_P: # the instance is worth keeping. + print(f"Keeping instance {p}.") + self.selected_indices.append(p) + else: # the instance is not worth keeping. + print(f"Removing instance {p}.") + for a in associates[p]: + kneighbors[a] = [kn for kn in kneighbors[a] if kn != p] + for j in kneighbors[a][: self.num_instances]: + if a not in associates[j]: + associates[j].append(a) + + print(self.selected_indices) + return X[self.selected_indices], y[self.selected_indices] + + def _fit_transform(self, X, y): + self.fit(X, y) + return self._transform(X, y) + + def _predict_KNN(self, neighbors, weights, y, num_neighbors): + neighbors = neighbors[:(num_neighbors)] + weights = weights[:(num_neighbors)] + classes_, y_ = np.unique(y, return_inverse=True) + scores = np.zeros(len(classes_)) + for id, w in zip(neighbors, weights): + predicted_class = y_[id] + scores[predicted_class] += 1 / (w + np.finfo(float).eps) + return classes_[np.argmax(scores)] diff --git a/tsml_eval/_wip/condensing/drop3.py b/tsml_eval/_wip/condensing/drop3.py new file mode 100644 index 00000000..e83d9bdf --- /dev/null +++ b/tsml_eval/_wip/condensing/drop3.py @@ -0,0 +1,178 @@ +import numpy as np +from aeon.distances import get_distance_function +from aeon.transformations.collection.base import BaseCollectionTransformer + + +class Drop3Condenser(BaseCollectionTransformer): + """ + Class for the simple_rank condensing approach. + + Parameters + ---------- + distance + distance_params + num_instances_per_class + + References + ---------- + .. [1] Wilson, D. R., & Martinez, T. R. (2000). Reduction techniques for + instance-based learning algorithms. Machine learning, 38, 257-286. + + Examples + -------- + >>> from ... + >>> from ... + """ + + _tags = { + "univariate-only": True, + "fit_is_empty": False, + "X_inner_mtype": ["np-list", "numpy3D"], + "requires_y": True, + "y_inner_mtype": ["numpy1D"], + } + + def __init__( + self, + distance="dtw", + distance_params=None, + num_instances=1, + ): + self.distance = distance + self.distance_params = distance_params + if self.distance_params is None: + self.distance_params = {} + + self.num_instances = num_instances + + if isinstance(self.distance, str): + self.metric = get_distance_function(metric=self.distance) + + self.selected_indices = [] + + super().__init__() + + def _fit(self, X, y): + n_classes = len(np.unique(y)) + self.num_instances = self.num_instances * n_classes + + def _transform(self, X, y): + """ + Implement of the Drop1 prototype selection approach. + + Parameters + ---------- + X -- numpy array of shape (n_samples, n_features) representing the feature + vectors of the instances. + y -- numpy array of shape (n_samples,) representing the corresponding class + labels. + k -- int, the desired number of prototypes to be selected. + + Returns + ------- + self + """ + n_samples = X.shape[0] + id_instances = [] + + associates = [[] for _ in range(n_samples)] + kneighbors = [[] for _ in range(n_samples)] + weights = [[] for _ in range(n_samples)] + distance_nearest_enemy = [] + distances = np.zeros((n_samples, n_samples)) + + # Getting the kneighbors and the associates of the instance. + for p in range(n_samples): + for p2 in range(p + 1, n_samples): + distances[p, p2] = self.metric(X[p], X[p2], **self.distance_params) + distances[p2, p] = distances[p, p2] + + for p in range(n_samples): + weights[p], kneighbors[p], y_ordered = zip( + *sorted(zip(distances[p], range(n_samples), y)) + ) + # todo: maybe removing first element as is itself? + weights[p], kneighbors[p] = weights[p][1:], kneighbors[p][1:] + + for j in kneighbors[p][: self.num_instances]: + associates[j].append(p) + + # compute knn for each instance. + for p in range(n_samples): + y_pred = self._predict_KNN( + kneighbors[p], + weights[p], + y, + self.num_instances, + ) + + if y_pred == y[p]: + id_instances.append(p) + print(id_instances) + print(y[id_instances]) + print(len(id_instances) == n_samples, len(id_instances), "of", n_samples) + + for p in id_instances: + # Drop2 order instances by their distance to the nearest enemy. + for kdx, _ in enumerate(kneighbors[p]): + if y_ordered[kdx] != y[p]: + # todo: maybe removing first element as is itself? if so, k-1 + distance_nearest_enemy.append(weights[p][kdx]) + break + + _, n_samples_ordered = zip(*sorted(zip(distance_nearest_enemy, id_instances))) + + # Predicting with/without rule for each instance p in the set. + for p in n_samples_ordered: + without_P = 0 + with_P = 0 + + for a in associates[p]: + # WITH + y_pred_w_P = self._predict_KNN( + kneighbors[a], + weights[a], + y, + self.num_instances, + ) + + if y_pred_w_P == y[a]: + with_P += 1 + # WITHOUT + y_pred_wo_P = self._predict_KNN( + [k for k in kneighbors[a] if k != p], + [w for idx, w in enumerate(weights[a]) if idx != p], + y, + self.num_instances, + ) + + if y_pred_wo_P == y[a]: + without_P += 1 + + if without_P < with_P: # the instance is worth keeping. + print(f"Keeping instance {p}.") + self.selected_indices.append(p) + else: # the instance is not worth keeping. + print(f"Removing instance {p}.") + for a in associates[p]: + kneighbors[a] = [kn for kn in kneighbors[a] if kn != p] + for j in kneighbors[a][: self.num_instances]: + if a not in associates[j]: + associates[j].append(a) + + print(self.selected_indices) + return X[self.selected_indices], y[self.selected_indices] + + def _fit_transform(self, X, y): + self.fit(X, y) + return self._transform(X, y) + + def _predict_KNN(self, neighbors, weights, y, num_neighbors): + neighbors = neighbors[:(num_neighbors)] + weights = weights[:(num_neighbors)] + classes_, y_ = np.unique(y, return_inverse=True) + scores = np.zeros(len(classes_)) + for id, w in zip(neighbors, weights): + predicted_class = y_[id] + scores[predicted_class] += 1 / (w + np.finfo(float).eps) + return classes_[np.argmax(scores)] diff --git a/tsml_eval/_wip/condensing/simple_rank.py b/tsml_eval/_wip/condensing/simple_rank.py new file mode 100644 index 00000000..25e8e521 --- /dev/null +++ b/tsml_eval/_wip/condensing/simple_rank.py @@ -0,0 +1,114 @@ +import numpy as np +from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier +from aeon.distances import get_distance_function +from aeon.transformations.collection.base import BaseCollectionTransformer + + +class SimpleRankCondenser(BaseCollectionTransformer): + """ + Class for the simple_rank condensing approach. + + Parameters + ---------- + distance + distance_params + n_neighbors + + References + ---------- + .. [1] Ueno, K., Xi, X., Keogh, E., & Lee, D. J. (2006, December). Anytime + classification using the nearest neighbor algorithm with applications to stream + mining. In Sixth International Conference on Data Mining (ICDM'06) (pp. 623-632). + IEEE. + + Examples + -------- + >>> from ... + >>> from ... + """ + + _tags = { + "univariate-only": True, + "fit_is_empty": False, + "X_inner_mtype": ["np-list", "numpy3D"], + "requires_y": True, + "y_inner_mtype": ["numpy1D"], + } + + def __init__( + self, + distance="dtw", + distance_params=None, + num_instances=1, + ): + self.distance = distance + self.distance_params = distance_params + if self.distance_params is None: + self.distance_params = {} + + self.num_instances = num_instances + + if isinstance(self.distance, str): + self.metric_ = get_distance_function(metric=self.distance) + + self.selected_indices = [] + + super().__init__() + + def _fit(self, X, y): + # As SR do not separate prototypes per class, the number should be multiplied by + # the number of instances per class of other methods. + num_classes = len(np.unique(y)) + self.num_instances = self.num_instances * num_classes + + def _transform(self, X, y): + n_samples = X.shape[0] + rank = np.zeros(n_samples) + distance = np.zeros(n_samples) + num_classes = len(np.unique(y)) + + for i in range(n_samples): + X_train = np.delete(X, i, axis=0) + y_train = np.delete(y, i) + X_pattern_loo = X[i] + y_pattern_loo = y[i] + + # Consider moving this to the init method. + classifier = KNeighborsTimeSeriesClassifier( + distance=self.distance, + distance_params=self.distance_params, + n_neighbors=1, + ) + + classifier.fit(X_train, y_train) + prediction = classifier.predict(X_pattern_loo) + + if y_pattern_loo == prediction: + rank[i] = 1 + else: + rank[i] = -2 / (num_classes - 1) + + # compute distance to nearest neighbour in class + distance[i] = np.min( + np.array( + [ + self.metric_( + X_pattern_loo, + j, + **self.distance_params, + ) + for j in X_train[np.where(y_train == y_pattern_loo)[0]] + ] + ) + ) + order = sorted(zip(rank, -np.array(distance), range(n_samples)))[::-1] + + self.selected_indices = [x[2] for x in order][: self.num_instances] + + condensed_X, condensed_y = X[self.selected_indices], y[self.selected_indices] + + return condensed_X, condensed_y + + def _fit_transform(self, X, y): + self._fit(X, y) + return self._transform(X, y) diff --git a/tsml_eval/_wip/condensing/wrapper.py b/tsml_eval/_wip/condensing/wrapper.py new file mode 100644 index 00000000..7b5381dc --- /dev/null +++ b/tsml_eval/_wip/condensing/wrapper.py @@ -0,0 +1,84 @@ +import numpy as np +from aeon.classification.base import BaseClassifier +from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier +from aeon.clustering import TimeSeriesKMeans + + +class WrapperBA(BaseClassifier): + """ + Wrapper for BA methods using condensing approach. + + Parameters + ---------- + distance + distance_params + + Examples + -------- + >>> from ... + >>> from ... + """ + + _tags = { + "univariate-only": True, + "fit_is_empty": True, + } + + def __init__( + self, + metric="dtw", + metric_params=None, + classifier=None, + num_instances_per_class=1, + ): + self.metric = metric + + self.metric_params = metric_params + if self.metric_params is None: + self.metric_params = {} + + self.num_instances_per_class = num_instances_per_class + + self.classifier = classifier + if self.classifier is None: + self.classifier = KNeighborsTimeSeriesClassifier( + distance=self.metric, + weights="distance", + distance_params=self.metric_params, + n_neighbors=1, + ) + + self.selected_series = [] + self.y_selected_series = [] + + self.clusterer = TimeSeriesKMeans( + n_clusters=self.num_instances_per_class, + metric=self.metric, + distance_params=self.metric_params, + averaging_method="ba", + average_params=self.metric_params, + ) + + super().__init__() + + def _fit(self, X, y): + for i in np.unique(y): + idxs_class = np.where(y == i) + + self.clusterer.fit(X[idxs_class]) + averaged_series_class_i = self.clusterer.cluster_centers_ + + if len(averaged_series_class_i.shape) == 3: + averaged_series_class_i = np.squeeze(averaged_series_class_i, axis=1) + + self.selected_series.append(averaged_series_class_i) + self.y_selected_series.append(i) + + self.classifier.fit( + np.array(self.selected_series), np.array(self.y_selected_series) + ) + + return self + + def _predict(self, X): + return self.classifier.predict(X) diff --git a/tsml_eval/experiments/set_classifier.py b/tsml_eval/experiments/set_classifier.py index 9f213e84..9c4cf785 100644 --- a/tsml_eval/experiments/set_classifier.py +++ b/tsml_eval/experiments/set_classifier.py @@ -48,6 +48,29 @@ ["msm", "1nn-msm"], ["twe", "1nn-twe"], "1nn-dtw-cv", + "condenserclassifier", + "kMeansCondenser-dtw-1", + "kMeansCondenser-dtw-2", + "kMeansCondenser-dtw-3", + "kMeansCondenser-twe-1", + "kMeansCondenser-twe-2", + "kMeansCondenser-twe-3", + "kMeansCondenser-msm-1", + "kMeansCondenser-msm-2", + "kMeansCondenser-msm-3", + "SimpleRankCondenser", + "PAMCondenser-dtw-1", + "PAMCondenser-dtw-2", + "PAMCondenser-dtw-3", + "PAMCondenser-twe-1", + "PAMCondenser-twe-2", + "PAMCondenser-twe-3", + "PAMCondenser-msm-1", + "PAMCondenser-msm-2", + "PAMCondenser-msm-3", + "Drop1Condenser", + "Drop2Condenser", + "Drop3Condenser", ["elasticensemble", "ee"], ["grailclassifier", "grail"], ["proximitytree", "proximitytreeclassifier"], @@ -415,7 +438,13 @@ def _set_classifier_distance_based( if c == "kneighborstimeseriesclassifier" or c == "dtw" or c == "1nn-dtw": from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier - return KNeighborsTimeSeriesClassifier(distance="dtw", n_jobs=n_jobs, **kwargs) + return KNeighborsTimeSeriesClassifier( + distance="dtw", + distance_params={"window": 0.2}, + weights="distance", + n_jobs=n_jobs, + **kwargs, + ) elif c == "ed" or c == "1nn-euclidean" or c == "1nn-ed": from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier @@ -425,11 +454,171 @@ def _set_classifier_distance_based( elif c == "msm" or c == "1nn-msm": from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier - return KNeighborsTimeSeriesClassifier(distance="msm", n_jobs=n_jobs, **kwargs) + return KNeighborsTimeSeriesClassifier( + distance="msm", n_jobs=n_jobs, weights="distance", **kwargs + ) elif c == "twe" or c == "1nn-twe": from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier - return KNeighborsTimeSeriesClassifier(distance="twe", n_jobs=n_jobs, **kwargs) + return KNeighborsTimeSeriesClassifier( + distance="twe", distance_params={"nu": 0.001}, n_jobs=n_jobs, **kwargs + ) + elif c == "condenserclassifier": + from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier + + return CondenserClassifier(**kwargs) + elif c == "simplerankcondenser": + from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier + + from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier + from tsml_eval._wip.condensing.simple_rank import SimpleRankCondenser + + return CondenserClassifier( + condenser=SimpleRankCondenser( + distance="dtw", + distance_params={"window": 0.2}, + num_instances=5, + ), + classifier=KNeighborsTimeSeriesClassifier( + distance="dtw", + weights="distance", + distance_params={"window": 0.2}, + n_neighbors=1, + ), + **kwargs, + ) + + elif c.startswith("kmeanscondenser"): + from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier + + from tsml_eval._wip.condensing.clustering_condenser import ClusteringCondenser + from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier + + distance = c.split("-")[1] + num_instances_per_class = int(c.split("-")[2]) + + if distance == "dtw": + distance_params = {"window": 0.2} + elif distance == "msm": + distance_params = {} + elif distance == "twe": + distance_params = {"nu": 0.001} + + return CondenserClassifier( + condenser=ClusteringCondenser( + clustering_approach="kmeans", + distance=distance, + distance_params=distance_params, + num_instances_per_class=num_instances_per_class, + random_state=random_state, + ), + classifier=KNeighborsTimeSeriesClassifier( + distance=distance, + distance_params=distance_params, + weights="distance", + n_neighbors=1, + ), + random_state=random_state, + **kwargs, + ) + + elif c.startswith("pamcondenser"): + from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier + + from tsml_eval._wip.condensing.clustering_condenser import ClusteringCondenser + from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier + + distance = c.split("-")[1] + num_instances_per_class = int(c.split("-")[2]) + + if distance == "dtw": + distance_params = {"window": 0.2} + elif distance == "msm": + distance_params = {} + elif distance == "twe": + distance_params = {"nu": 0.001} + + return CondenserClassifier( + condenser=ClusteringCondenser( + clustering_approach="pam", + distance=distance, + distance_params=distance_params, + num_instances_per_class=num_instances_per_class, + random_state=random_state, + ), + classifier=KNeighborsTimeSeriesClassifier( + distance=distance, + distance_params=distance_params, + weights="distance", + n_neighbors=1, + ), + random_state=random_state, + **kwargs, + ) + + elif c == "drop1condenser": + from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier + + from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier + from tsml_eval._wip.condensing.drop1 import Drop1Condenser + + return CondenserClassifier( + condenser=Drop1Condenser( + distance="dtw", + distance_params={"window": 0.2}, + num_instances=15, + ), + classifier=KNeighborsTimeSeriesClassifier( + distance="dtw", + weights="distance", + distance_params={"window": 0.2}, + n_neighbors=1, + ), + random_state=random_state, + **kwargs, + ) + elif c == "drop2condenser": + from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier + + from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier + from tsml_eval._wip.condensing.drop2 import Drop2Condenser + + return CondenserClassifier( + condenser=Drop2Condenser( + distance="dtw", + distance_params={"window": 0.2}, + num_instances=15, + ), + classifier=KNeighborsTimeSeriesClassifier( + distance="dtw", + weights="distance", + distance_params={"window": 0.2}, + n_neighbors=1, + ), + random_state=random_state, + **kwargs, + ) + elif c == "drop3condenser": + from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier + + from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier + from tsml_eval._wip.condensing.drop3 import Drop3Condenser + + return CondenserClassifier( + condenser=Drop3Condenser( + distance="dtw", + distance_params={"window": 0.2}, + num_instances=15, + ), + classifier=KNeighborsTimeSeriesClassifier( + distance="dtw", + weights="distance", + distance_params={"window": 0.2}, + n_neighbors=1, + ), + random_state=random_state, + **kwargs, + ) elif c == "elasticensemble" or c == "ee": from aeon.classification.distance_based import ElasticEnsemble