From 582bf1dff0bc961b83788f715c7245fccdb59283 Mon Sep 17 00:00:00 2001 From: David Guijo-Rubio Date: Wed, 24 May 2023 14:00:47 +0200 Subject: [PATCH 01/21] first approach --- tsml_eval/_wip/condensing/drop1.py | 193 +++++++++++++++++++++++ tsml_eval/_wip/condensing/figures.ipynb | 135 ++++++++++++++++ tsml_eval/_wip/condensing/simple_rank.py | 131 +++++++++++++++ tsml_eval/_wip/condensing/wrapper.py | 80 ++++++++++ 4 files changed, 539 insertions(+) create mode 100644 tsml_eval/_wip/condensing/drop1.py create mode 100644 tsml_eval/_wip/condensing/figures.ipynb create mode 100644 tsml_eval/_wip/condensing/simple_rank.py create mode 100644 tsml_eval/_wip/condensing/wrapper.py diff --git a/tsml_eval/_wip/condensing/drop1.py b/tsml_eval/_wip/condensing/drop1.py new file mode 100644 index 00000000..20e8a73a --- /dev/null +++ b/tsml_eval/_wip/condensing/drop1.py @@ -0,0 +1,193 @@ +# -*- coding: utf-8 -*- +import numpy as np +from aeon.distances import get_distance_function +from aeon.transformations.base import BaseTransformer + +from tsml_eval.estimators.classification.distance_based import ( + KNeighborsTimeSeriesClassifier, +) + + +class Drop1(BaseTransformer): + """ + Class for the simple_rank condensing approach. + + Parameters + ---------- + distance + distance_params + n_neighbors + + References + ---------- + .. [1] Ueno, K., Xi, X., Keogh, E., & Lee, D. J. (2006, December). Anytime + classification using the nearest neighbor algorithm with applications to stream + mining. In Sixth International Conference on Data Mining (ICDM'06) (pp. 623-632). + IEEE. + + Examples + -------- + >>> from ... + >>> from ... + """ + + _tags = { + "univariate-only": True, + "fit_is_empty": False, + } + + def __init__( + self, + distance="dtw", + distance_params=None, + n_neighbors=1, + ): + self.distance = distance + self._distance_params = distance_params + if self._distance_params is None: + self._distance_params = {} + + self.n_neighbors = n_neighbors + + if isinstance(self.distance, str): + self.metric_ = get_distance_function(metric=self.distance) + + self.selected_indices = [] + + super(Drop1, self).__init__() + + def _fit(self, X, y): + """ + Implement of the SimpleRank prototype selection approach. + + Parameters + ---------- + X -- numpy array of shape (n_samples, n_features) representing the feature + vectors of the instances. + y -- numpy array of shape (n_samples,) representing the corresponding class + labels. + k -- int, the desired number of prototypes to be selected. + + Returns + ------- + self + """ + n_samples = X.shape[0] + + associates = [[] for _ in range(n_samples)] + kneighbors = [[] for _ in range(n_samples)] + y_pred = [] + + classifier = KNeighborsTimeSeriesClassifier( + distance=self.distance, + distance_params=self._distance_params, + n_neighbors=self.n_neighbors + 1, + ) + + # Predicting class with the instance in the set. + # Also getting the kneighbors and the associates of the instance. + for i in range(n_samples): + classifier.fit(X, y) + y_pred.append(classifier.predict(X[i])) + i_kneighbors, i_distances = classifier._kneighbors(X[i]) + + i_kneighbors = [x[1] for x in sorted(zip(i_distances, i_kneighbors))] + + for j in i_kneighbors: + associates[j].append(i) + + kneighbors[i] = i_kneighbors + + # Predicting class without the instance in the set. + y_pred_wo_P = [] + for i in range(n_samples): + X_wo_P = np.delete(X, i, axis=0) + y_wo_P = np.delete(y, i) + classifier.fit(X_wo_P, y_wo_P) + y_pred_wo_P.append(classifier.predict(X[i])) + + X_S = X.copy() + y_S = y.copy() + + for i in range(n_samples): + # Num of associates correctly classified with i (or P) as neighbor. + with_list = [ + j + for j in associates[i] + if ((i in kneighbors[j]) and (y[j] == y_pred[j])) + ] + + # Num of associates correctly classified without i (or P) as neighbor. + without_list = [j for j in associates[i] if (y[j] == y_pred_wo_P[j])] + + # Check if removing i (or P) is better. + if len(without_list) >= len(with_list): + # Remove P from S. + i_S = self._find_index(i, X, X_S) + X_S = np.delete(X_S, i_S, axis=0) + y_S = np.delete(y_S, i_S) + + # Remove P from the kneighbors of the associates. + for j in associates[i]: + kneighbors[j].remove(i) + + # if self.n_neighbors + 1 >= len(X_S): + # classifier = KNeighborsTimeSeriesClassifier( + # distance=self.distance, + # distance_params=self._distance_params, + # n_neighbors=len(X_S), + # ) + + # Find the next nearest neighbor for the j-th associate. + classifier.fit(X_S, y_S) + y_pred[j] = classifier.predict(X[j]) + j_kneighbors, _ = classifier._kneighbors(X[j]) + j_kneighbors = self._find_index(j_kneighbors, X_S, X) + + j_neighbor = list( + set(j_kneighbors).symmetric_difference(set(kneighbors[j])) + )[0] + + kneighbors[j].append(j_neighbor) + associates[j_neighbor].append(j) + + # Remove P from the associates of the neighbors. + for j in kneighbors[i]: + associates[j].remove(i) + + associates[i] = [] + kneighbors[i] = [] + + # The instance worth staying. + else: + self.selected_indices.append(i) + return self + + def _transform(self, X, y): + return X[self.selected_indices], y[self.selected_indices] + + def _fit_transform(self, X, y): + self._fit(X, y) + condensed_X, condensed_y = self._transform(X, y) + + return condensed_X, condensed_y + + def _get_selected_indices(self): + # todo: check that fit has already been called. + return self.selected_indices + + def _find_index(self, values, training_set_instance, training_set_to_find): + if isinstance(values, int): + values = [values] + + index = [ + xdx + for xdx, x in enumerate(training_set_to_find) + for k in values + if np.array_equal(x, training_set_instance[k]) + ] + + if len(index) == 1: + return index[0] + else: + return index diff --git a/tsml_eval/_wip/condensing/figures.ipynb b/tsml_eval/_wip/condensing/figures.ipynb new file mode 100644 index 00000000..23873e4d --- /dev/null +++ b/tsml_eval/_wip/condensing/figures.ipynb @@ -0,0 +1,135 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "\n", + "def plot_return_condensed(CM, x_train, y_train, verbose=1):\n", + "\n", + " plt.figure()\n", + "\n", + " x_train_condensed, y_train_condensed = CM._fit_transform(x_train, y_train)\n", + " if verbose >= 1:\n", + " print(\n", + " f\"Selected indices ({len(x_train_condensed)}): {CM._get_selected_indices()}\"\n", + " )\n", + " print(y_train_condensed)\n", + " print(f\"Labels: {len(np.unique(y_train_condensed))/len(np.unique(y_train))}\")\n", + "\n", + " _, _, len_ts = x_train_condensed.shape\n", + "\n", + " color = {0: \"black\", 1: \"blue\", 2: \"red\", 3: \"green\", 4: \"pink\", 5: \"orange\"}\n", + "\n", + " x = range(0, len_ts)\n", + "\n", + " for idx, (series, label) in enumerate(zip(x_train, y_train)):\n", + " alpha = 0.2\n", + " if idx in CM._get_selected_indices():\n", + " alpha = 1\n", + "\n", + " plt.plot(x, series[0, :], color=color[int(label)], alpha=alpha)\n", + "\n", + " return x_train_condensed, y_train_condensed" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\tFull: 0.9545454545454546\n", + "\tCondensed: 0.8636363636363636\n", + "\n", + "\tFull: 0.7028571428571428\n", + "\tCondensed: 0.42857142857142855\n", + "\n", + "\tFull: 0.9066666666666666\n", + "\tCondensed: 0.6533333333333333\n" + ] + } + ], + "source": [ + "from aeon.datasets._single_problem_loaders import (\n", + " load_arrow_head,\n", + " load_gunpoint,\n", + " load_unit_test,\n", + ")\n", + "from sklearn.metrics import accuracy_score\n", + "\n", + "from tsml_eval._wip.condensing.drop1 import Drop1\n", + "from tsml_eval._wip.condensing.simple_rank import SimpleRank\n", + "from tsml_eval._wip.condensing.wrapper import WrapperBA\n", + "from tsml_eval.estimators.classification.distance_based import (\n", + " KNeighborsTimeSeriesClassifier,\n", + ")\n", + "\n", + "for dataset in [load_unit_test, load_arrow_head, load_gunpoint]:\n", + "\n", + " print(dataset)\n", + "\n", + " x_train, y_train = dataset(\"TRAIN\")\n", + "\n", + " CM = SimpleRank(distance=\"msm\", n_neighbors=2) # Drop1\n", + " CM = Drop1(distance=\"msm\", n_neighbors=2) # Drop1\n", + " CM = WrapperBA() # Drop1\n", + "\n", + " # x_train_condensed, y_train_condensed = plot_return_condensed(\n", + " # CM, x_train, y_train, 0\n", + " # )\n", + " x_train_condensed, y_train_condensed = CM._fit_transform(x_train, y_train)\n", + "\n", + " x_test, y_test = dataset(\"TEST\")\n", + "\n", + " knn = KNeighborsTimeSeriesClassifier(n_neighbors=1)\n", + " knn.fit(x_train, y_train)\n", + " y_pred_full = knn.predict(x_test)\n", + "\n", + " knn.fit(x_train_condensed, y_train_condensed)\n", + " y_pred_cond = knn.predict(x_test)\n", + "\n", + " print(f\"\\tFull: {accuracy_score(y_test, y_pred_full)}\")\n", + " print(f\"\\tCondensed: {accuracy_score(y_test, y_pred_cond)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TSR_original", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tsml_eval/_wip/condensing/simple_rank.py b/tsml_eval/_wip/condensing/simple_rank.py new file mode 100644 index 00000000..7a93c3bf --- /dev/null +++ b/tsml_eval/_wip/condensing/simple_rank.py @@ -0,0 +1,131 @@ +# -*- coding: utf-8 -*- +import numpy as np +from aeon.distances import get_distance_function +from aeon.transformations.base import BaseTransformer + +from tsml_eval.estimators.classification.distance_based import ( + KNeighborsTimeSeriesClassifier, +) + + +class SimpleRank(BaseTransformer): + """ + Class for the simple_rank condensing approach. + + Parameters + ---------- + distance + distance_params + n_neighbors + + References + ---------- + .. [1] Ueno, K., Xi, X., Keogh, E., & Lee, D. J. (2006, December). Anytime + classification using the nearest neighbor algorithm with applications to stream + mining. In Sixth International Conference on Data Mining (ICDM'06) (pp. 623-632). + IEEE. + + Examples + -------- + >>> from ... + >>> from ... + """ + + _tags = { + "univariate-only": True, + "fit_is_empty": False, + } + + def __init__( + self, + distance="dtw", + distance_params=None, + n_neighbors=1, + ): + self.distance = distance + self._distance_params = distance_params + if self._distance_params is None: + self._distance_params = {} + + self.n_neighbors = n_neighbors + + if isinstance(self.distance, str): + self.metric_ = get_distance_function(metric=self.distance) + + self.selected_indices = [] + + super(SimpleRank, self).__init__() + + def _fit(self, X, y): + """ + Implement of the SimpleRank prototype selection approach. + + Parameters + ---------- + X -- numpy array of shape (n_samples, n_features) representing the feature + vectors of the instances. + y -- numpy array of shape (n_samples,) representing the corresponding class + labels. + + Returns + ------- + self + """ + n_samples = X.shape[0] + rank = np.zeros(n_samples) + distance = np.zeros(n_samples) + num_classes = len(np.unique(y)) + + for i in range(n_samples): + X_train = np.delete(X, i, axis=0) + y_train = np.delete(y, i) + X_pattern_loo = X[i] + + classifier = KNeighborsTimeSeriesClassifier( + distance=self.distance, + distance_params=self._distance_params, + n_neighbors=self.n_neighbors, + ) + + classifier.fit(X_train, y_train) + prediction = classifier.predict(X_pattern_loo) + + if y[i] == prediction: + rank[i] = 1 + else: + rank[i] = -2 / (num_classes - 1) + + # compute distance to nearest neigh in class + distance[i] = np.min( + np.array( + [ + self.metric_( + X_pattern_loo, + X_train[np.where(y_train == y[i])[0]][j], + **self._distance_params, + ) + for j in range(len([np.where(y_train == y[i])[0]])) + ] + ) + ) + + samples_ordered = sorted(zip(rank, -np.array(distance), range(n_samples))) + + self.selected_indices = [x[2] for x in samples_ordered][::-1][ + : self.n_neighbors + ] + + return self + + def _transform(self, X, y): + return X[self.selected_indices], y[self.selected_indices] + + def _fit_transform(self, X, y): + self._fit(X, y) + condensed_X, condensed_y = self._transform(X, y) + + return condensed_X, condensed_y + + def _get_selected_indices(self): + # todo: check that fit has already been called. + return self.selected_indices diff --git a/tsml_eval/_wip/condensing/wrapper.py b/tsml_eval/_wip/condensing/wrapper.py new file mode 100644 index 00000000..32d0f638 --- /dev/null +++ b/tsml_eval/_wip/condensing/wrapper.py @@ -0,0 +1,80 @@ +# -*- coding: utf-8 -*- +import numpy as np +from aeon.clustering.metrics.averaging._dba import dba +from aeon.distances import get_distance_function +from aeon.transformations.base import BaseTransformer + + +class WrapperBA(BaseTransformer): + """ + Wrapper for BA methods using condensing approach. + + Parameters + ---------- + distance + distance_params + Examples + -------- + >>> from ... + >>> from ... + """ + + _tags = { + "univariate-only": True, + "fit_is_empty": True, + } + + def __init__( + self, + distance="dtw", + distance_params=None, + ): + self.distance = distance + self._distance_params = distance_params + if self._distance_params is None: + self._distance_params = {} + + if isinstance(self.distance, str): + self.metric_ = get_distance_function(metric=self.distance) + + self.selected_series = [] + self.y_selected_series = [] + + super(WrapperBA, self).__init__() + + def _fit(self): + """ + Implement the Wrapper for BA. + + Returns + ------- + self + """ + return self + + def _transform(self, X, y): + for i in np.unique(y): + idxs = np.where(y == i) + + self.selected_series.append( + dba(X[idxs], metric=self.distance, kwargs=self._distance_params) + ) + + self.y_selected_series.append(i) + return np.array(self.selected_series), np.array(self.y_selected_series) + + def _fit_transform(self, X, y): + self._fit() + condensed_X, condensed_y = self._transform(X, y) + + return condensed_X, condensed_y + + +# from aeon.datasets._single_problem_loaders import load_unit_test +# x_train, y_train = load_unit_test("TRAIN") + +# wa = WrapperBA() +# x_condensed, y_condensed = wa._fit_transform(x_train, y_train) + +# print(x_condensed) +# print(y_condensed) From a35a7286e2d3943570cb981fa403d529f82f1b44 Mon Sep 17 00:00:00 2001 From: David Guijo-Rubio Date: Thu, 25 May 2023 17:30:11 +0200 Subject: [PATCH 02/21] minimal changes --- tsml_eval/_wip/condensing/figures.ipynb | 24 ++++++++++++------------ tsml_eval/_wip/condensing/wrapper.py | 20 ++------------------ tsml_eval/experiments/set_classifier.py | 21 +++++++++++++++++++++ 3 files changed, 35 insertions(+), 30 deletions(-) diff --git a/tsml_eval/_wip/condensing/figures.ipynb b/tsml_eval/_wip/condensing/figures.ipynb index 23873e4d..fc8d9ede 100644 --- a/tsml_eval/_wip/condensing/figures.ipynb +++ b/tsml_eval/_wip/condensing/figures.ipynb @@ -47,15 +47,15 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", - "\tFull: 0.9545454545454546\n", - "\tCondensed: 0.8636363636363636\n", - "\n", - "\tFull: 0.7028571428571428\n", - "\tCondensed: 0.42857142857142855\n", - "\n", - "\tFull: 0.9066666666666666\n", - "\tCondensed: 0.6533333333333333\n" + "\n", + "\tFull: 0.9090909090909091\n", + "\tCondensed: 0.9090909090909091\n", + "\n", + "\tFull: 0.7942857142857143\n", + "\tCondensed: 0.5371428571428571\n", + "\n", + "\tFull: 0.9666666666666667\n", + "\tCondensed: 0.7066666666666667\n" ] } ], @@ -80,9 +80,9 @@ "\n", " x_train, y_train = dataset(\"TRAIN\")\n", "\n", - " CM = SimpleRank(distance=\"msm\", n_neighbors=2) # Drop1\n", + " CM = SimpleRank(distance=\"msm\", n_neighbors=2) # SR\n", " CM = Drop1(distance=\"msm\", n_neighbors=2) # Drop1\n", - " CM = WrapperBA() # Drop1\n", + " CM = WrapperBA(distance=\"msm\") # MBA/DBA\n", "\n", " # x_train_condensed, y_train_condensed = plot_return_condensed(\n", " # CM, x_train, y_train, 0\n", @@ -91,7 +91,7 @@ "\n", " x_test, y_test = dataset(\"TEST\")\n", "\n", - " knn = KNeighborsTimeSeriesClassifier(n_neighbors=1)\n", + " knn = KNeighborsTimeSeriesClassifier(distance=\"msm\", n_neighbors=1)\n", " knn.fit(x_train, y_train)\n", " y_pred_full = knn.predict(x_test)\n", "\n", diff --git a/tsml_eval/_wip/condensing/wrapper.py b/tsml_eval/_wip/condensing/wrapper.py index 32d0f638..0a60ab53 100644 --- a/tsml_eval/_wip/condensing/wrapper.py +++ b/tsml_eval/_wip/condensing/wrapper.py @@ -13,6 +13,7 @@ class WrapperBA(BaseTransformer): ---------- distance distance_params + Examples -------- >>> from ... @@ -26,7 +27,7 @@ class WrapperBA(BaseTransformer): def __init__( self, - distance="dtw", + distance="msm", distance_params=None, ): self.distance = distance @@ -43,13 +44,6 @@ def __init__( super(WrapperBA, self).__init__() def _fit(self): - """ - Implement the Wrapper for BA. - - Returns - ------- - self - """ return self def _transform(self, X, y): @@ -68,13 +62,3 @@ def _fit_transform(self, X, y): condensed_X, condensed_y = self._transform(X, y) return condensed_X, condensed_y - - -# from aeon.datasets._single_problem_loaders import load_unit_test -# x_train, y_train = load_unit_test("TRAIN") - -# wa = WrapperBA() -# x_condensed, y_condensed = wa._fit_transform(x_train, y_train) - -# print(x_condensed) -# print(y_condensed) diff --git a/tsml_eval/experiments/set_classifier.py b/tsml_eval/experiments/set_classifier.py index 88ad433c..dc3dbf54 100644 --- a/tsml_eval/experiments/set_classifier.py +++ b/tsml_eval/experiments/set_classifier.py @@ -3,6 +3,8 @@ __author__ = ["TonyBagnall", "MatthewMiddlehurst"] +from sklearn.pipeline import make_pipeline + from tsml_eval.utils.functions import str_in_nested_list convolution_based_classifiers = [ @@ -40,6 +42,7 @@ ["KNeighborsTimeSeriesClassifier", "dtw", "1nn-dtw"], ["ed", "1nn-euclidean", "1nn-ed"], ["msm", "1nn-msm"], + ["condensed-1nn-msm", "condensed-1nn-dtw"], ["ElasticEnsemble", "ee"], "ShapeDTW", ["MatrixProfileClassifier", "matrixprofile"], @@ -381,6 +384,24 @@ def _set_classifier_distance_based( from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier return KNeighborsTimeSeriesClassifier(distance="msm", n_jobs=n_jobs, **kwargs) + elif c == "condensed-1nn-dtw": + from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier + + from tsml_eval._wip.condensing.wrapper import WrapperBA + + return make_pipeline( + WrapperBA(distance="dtw", **kwargs), + KNeighborsTimeSeriesClassifier(distance="dtw", n_jobs=n_jobs, **kwargs), + ) + elif c == "condensed-1nn-msm": + from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier + + from tsml_eval._wip.condensing.wrapper import WrapperBA + + return make_pipeline( + WrapperBA(distance="msm", **kwargs), + KNeighborsTimeSeriesClassifier(distance="msm", n_jobs=n_jobs, **kwargs), + ) elif c == "elasticensemble" or c == "ee": from aeon.classification.distance_based import ElasticEnsemble From 1cf3b82969102cc30c8bf48259a518adaa95cf6d Mon Sep 17 00:00:00 2001 From: David Guijo-Rubio Date: Fri, 9 Jun 2023 16:40:08 +0100 Subject: [PATCH 03/21] Condensing with new version of ba and graphs for drawing averages --- tsml_eval/_wip/condensing/draw_MBA_DBA.py | 59 ++++++++++ tsml_eval/_wip/condensing/figures.ipynb | 135 ---------------------- tsml_eval/_wip/condensing/wrapper.py | 50 ++++---- tsml_eval/experiments/set_classifier.py | 16 +-- 4 files changed, 89 insertions(+), 171 deletions(-) create mode 100644 tsml_eval/_wip/condensing/draw_MBA_DBA.py delete mode 100644 tsml_eval/_wip/condensing/figures.ipynb diff --git a/tsml_eval/_wip/condensing/draw_MBA_DBA.py b/tsml_eval/_wip/condensing/draw_MBA_DBA.py new file mode 100644 index 00000000..8593ed73 --- /dev/null +++ b/tsml_eval/_wip/condensing/draw_MBA_DBA.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- +import os + +import matplotlib.pyplot as plt +import numpy as np +from aeon.clustering.metrics.averaging import elastic_barycenter_average +from aeon.datasets import load_from_tsfile + +dataset = "GunPoint" +c = "1" +plt.figure() + +fig, axs = plt.subplots(3, 2, sharey=True, sharex=True, figsize=(8, 6)) + +x_train, y_train = load_from_tsfile( + os.path.join(f"../../../../../ajb/Data/{dataset}/{dataset}_TRAIN.ts") +) + +_, _, len_ts = x_train.shape + +x = range(0, len_ts) + +idxs = np.where(y_train == c) + +for i in x_train[idxs]: + axs[0, 0].plot(x, i[0], lw=0.2) + axs[1, 0].plot(x, i[0], lw=0.2) + axs[2, 0].plot(x, i[0], lw=0.2) + + +series_avg = np.mean(np.array(x_train[idxs]), axis=0)[0] + +axs[0, 1].plot(x, series_avg, color="red") + +series_mba = elastic_barycenter_average( + x_train[idxs], + metric="msm", +) + + +axs[1, 1].plot(x, series_mba[0, :]) + +series_dba = elastic_barycenter_average( + x_train[idxs], + metric="dtw", +) + +axs[2, 1].plot(x, series_dba[0, :], color="green") + +fig.suptitle(f"{dataset} - Class {c}") +axs[0, 0].set_title("Original time series") +axs[0, 1].set_title("Averaging") +axs[1, 0].set_title("Original time series") +axs[1, 1].set_title("MBA") +axs[2, 0].set_title("Original time series") +axs[2, 1].set_title("DBA") +fig.tight_layout() + +plt.savefig("test.png") diff --git a/tsml_eval/_wip/condensing/figures.ipynb b/tsml_eval/_wip/condensing/figures.ipynb deleted file mode 100644 index fc8d9ede..00000000 --- a/tsml_eval/_wip/condensing/figures.ipynb +++ /dev/null @@ -1,135 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "\n", - "\n", - "def plot_return_condensed(CM, x_train, y_train, verbose=1):\n", - "\n", - " plt.figure()\n", - "\n", - " x_train_condensed, y_train_condensed = CM._fit_transform(x_train, y_train)\n", - " if verbose >= 1:\n", - " print(\n", - " f\"Selected indices ({len(x_train_condensed)}): {CM._get_selected_indices()}\"\n", - " )\n", - " print(y_train_condensed)\n", - " print(f\"Labels: {len(np.unique(y_train_condensed))/len(np.unique(y_train))}\")\n", - "\n", - " _, _, len_ts = x_train_condensed.shape\n", - "\n", - " color = {0: \"black\", 1: \"blue\", 2: \"red\", 3: \"green\", 4: \"pink\", 5: \"orange\"}\n", - "\n", - " x = range(0, len_ts)\n", - "\n", - " for idx, (series, label) in enumerate(zip(x_train, y_train)):\n", - " alpha = 0.2\n", - " if idx in CM._get_selected_indices():\n", - " alpha = 1\n", - "\n", - " plt.plot(x, series[0, :], color=color[int(label)], alpha=alpha)\n", - "\n", - " return x_train_condensed, y_train_condensed" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\tFull: 0.9090909090909091\n", - "\tCondensed: 0.9090909090909091\n", - "\n", - "\tFull: 0.7942857142857143\n", - "\tCondensed: 0.5371428571428571\n", - "\n", - "\tFull: 0.9666666666666667\n", - "\tCondensed: 0.7066666666666667\n" - ] - } - ], - "source": [ - "from aeon.datasets._single_problem_loaders import (\n", - " load_arrow_head,\n", - " load_gunpoint,\n", - " load_unit_test,\n", - ")\n", - "from sklearn.metrics import accuracy_score\n", - "\n", - "from tsml_eval._wip.condensing.drop1 import Drop1\n", - "from tsml_eval._wip.condensing.simple_rank import SimpleRank\n", - "from tsml_eval._wip.condensing.wrapper import WrapperBA\n", - "from tsml_eval.estimators.classification.distance_based import (\n", - " KNeighborsTimeSeriesClassifier,\n", - ")\n", - "\n", - "for dataset in [load_unit_test, load_arrow_head, load_gunpoint]:\n", - "\n", - " print(dataset)\n", - "\n", - " x_train, y_train = dataset(\"TRAIN\")\n", - "\n", - " CM = SimpleRank(distance=\"msm\", n_neighbors=2) # SR\n", - " CM = Drop1(distance=\"msm\", n_neighbors=2) # Drop1\n", - " CM = WrapperBA(distance=\"msm\") # MBA/DBA\n", - "\n", - " # x_train_condensed, y_train_condensed = plot_return_condensed(\n", - " # CM, x_train, y_train, 0\n", - " # )\n", - " x_train_condensed, y_train_condensed = CM._fit_transform(x_train, y_train)\n", - "\n", - " x_test, y_test = dataset(\"TEST\")\n", - "\n", - " knn = KNeighborsTimeSeriesClassifier(distance=\"msm\", n_neighbors=1)\n", - " knn.fit(x_train, y_train)\n", - " y_pred_full = knn.predict(x_test)\n", - "\n", - " knn.fit(x_train_condensed, y_train_condensed)\n", - " y_pred_cond = knn.predict(x_test)\n", - "\n", - " print(f\"\\tFull: {accuracy_score(y_test, y_pred_full)}\")\n", - " print(f\"\\tCondensed: {accuracy_score(y_test, y_pred_cond)}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "TSR_original", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.4" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/tsml_eval/_wip/condensing/wrapper.py b/tsml_eval/_wip/condensing/wrapper.py index 0a60ab53..b0a9df7c 100644 --- a/tsml_eval/_wip/condensing/wrapper.py +++ b/tsml_eval/_wip/condensing/wrapper.py @@ -1,11 +1,11 @@ # -*- coding: utf-8 -*- import numpy as np -from aeon.clustering.metrics.averaging._dba import dba -from aeon.distances import get_distance_function -from aeon.transformations.base import BaseTransformer +from aeon.classification.base import BaseClassifier +from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier +from aeon.clustering.metrics.averaging import elastic_barycenter_average -class WrapperBA(BaseTransformer): +class WrapperBA(BaseClassifier): """ Wrapper for BA methods using condensing approach. @@ -27,38 +27,44 @@ class WrapperBA(BaseTransformer): def __init__( self, - distance="msm", + distance="dtw", distance_params=None, ): self.distance = distance - self._distance_params = distance_params - if self._distance_params is None: - self._distance_params = {} - - if isinstance(self.distance, str): - self.metric_ = get_distance_function(metric=self.distance) + self.distance_params = distance_params + if self.distance_params is None: + self.distance_params = {} self.selected_series = [] self.y_selected_series = [] - super(WrapperBA, self).__init__() + self.classifier = KNeighborsTimeSeriesClassifier( + distance=self.distance, distance_params=self.distance_params + ) - def _fit(self): - return self + super(WrapperBA, self).__init__() - def _transform(self, X, y): + def _fit(self, X, y): for i in np.unique(y): idxs = np.where(y == i) - self.selected_series.append( - dba(X[idxs], metric=self.distance, kwargs=self._distance_params) + series = elastic_barycenter_average( + X[idxs], + metric=self.distance, + **self.distance_params, ) + if len(series.shape) == 3: + series = np.squeeze(series, axis=0) + + self.selected_series.append(series) + self.y_selected_series.append(i) - return np.array(self.selected_series), np.array(self.y_selected_series) - def _fit_transform(self, X, y): - self._fit() - condensed_X, condensed_y = self._transform(X, y) + self.classifier.fit( + np.array(self.selected_series), np.array(self.y_selected_series) + ) + return self - return condensed_X, condensed_y + def _predict(self, X): + return self.classifier.predict(X) diff --git a/tsml_eval/experiments/set_classifier.py b/tsml_eval/experiments/set_classifier.py index dc3dbf54..89b354ee 100644 --- a/tsml_eval/experiments/set_classifier.py +++ b/tsml_eval/experiments/set_classifier.py @@ -3,8 +3,6 @@ __author__ = ["TonyBagnall", "MatthewMiddlehurst"] -from sklearn.pipeline import make_pipeline - from tsml_eval.utils.functions import str_in_nested_list convolution_based_classifiers = [ @@ -385,23 +383,13 @@ def _set_classifier_distance_based( return KNeighborsTimeSeriesClassifier(distance="msm", n_jobs=n_jobs, **kwargs) elif c == "condensed-1nn-dtw": - from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier - from tsml_eval._wip.condensing.wrapper import WrapperBA - return make_pipeline( - WrapperBA(distance="dtw", **kwargs), - KNeighborsTimeSeriesClassifier(distance="dtw", n_jobs=n_jobs, **kwargs), - ) + return WrapperBA(distance="dtw", **kwargs) elif c == "condensed-1nn-msm": - from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier - from tsml_eval._wip.condensing.wrapper import WrapperBA - return make_pipeline( - WrapperBA(distance="msm", **kwargs), - KNeighborsTimeSeriesClassifier(distance="msm", n_jobs=n_jobs, **kwargs), - ) + return WrapperBA(distance="msm", **kwargs) elif c == "elasticensemble" or c == "ee": from aeon.classification.distance_based import ElasticEnsemble From bfdf0a65a41e7ce42ae32e14c0025ce80ea464b3 Mon Sep 17 00:00:00 2001 From: David Guijo-Rubio Date: Sat, 10 Jun 2023 08:52:03 +0100 Subject: [PATCH 04/21] k=2 to 5 with kmeans clustering --- tsml_eval/_wip/condensing/wrapper.py | 57 +++++++++++++++++-------- tsml_eval/experiments/set_classifier.py | 43 ++++++++++++++++--- 2 files changed, 78 insertions(+), 22 deletions(-) diff --git a/tsml_eval/_wip/condensing/wrapper.py b/tsml_eval/_wip/condensing/wrapper.py index b0a9df7c..74743ab0 100644 --- a/tsml_eval/_wip/condensing/wrapper.py +++ b/tsml_eval/_wip/condensing/wrapper.py @@ -2,6 +2,7 @@ import numpy as np from aeon.classification.base import BaseClassifier from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier +from aeon.clustering.k_means import TimeSeriesKMeans from aeon.clustering.metrics.averaging import elastic_barycenter_average @@ -27,35 +28,56 @@ class WrapperBA(BaseClassifier): def __init__( self, - distance="dtw", - distance_params=None, + metric="dtw", + metric_params=None, + classifier=None, + num_instances_per_class=1, ): - self.distance = distance - self.distance_params = distance_params - if self.distance_params is None: - self.distance_params = {} + self.metric = metric + self.metric_params = metric_params + if self.metric_params is None: + self.metric_params = {} self.selected_series = [] self.y_selected_series = [] - self.classifier = KNeighborsTimeSeriesClassifier( - distance=self.distance, distance_params=self.distance_params - ) + self.num_instances_per_class = num_instances_per_class + + self.classifier = classifier + if self.classifier is None: + self.classifier = KNeighborsTimeSeriesClassifier( + distance=self.metric, + distance_params=self.metric_params, + n_neighbors=1, + ) + + if self.num_instances_per_class > 1: + self.clusterer = TimeSeriesKMeans( + n_clusters=self.num_instances_per_class, + metric=self.metric, + distance_params=self.metric_params, + averaging_method="ba", + average_params=self.metric_params, + ) super(WrapperBA, self).__init__() def _fit(self, X, y): for i in np.unique(y): - idxs = np.where(y == i) - - series = elastic_barycenter_average( - X[idxs], - metric=self.distance, - **self.distance_params, - ) + idxs_class = np.where(y == i) + + if self.num_instances_per_class > 1: + self.clusterer.fit(X[idxs_class]) + series = self.clusterer.cluster_centers_ + else: + series = elastic_barycenter_average( + X[idxs_class], + metric=self.metric, + **self.metric_params, + ) if len(series.shape) == 3: - series = np.squeeze(series, axis=0) + series = np.squeeze(series, axis=1) self.selected_series.append(series) @@ -64,6 +86,7 @@ def _fit(self, X, y): self.classifier.fit( np.array(self.selected_series), np.array(self.y_selected_series) ) + return self def _predict(self, X): diff --git a/tsml_eval/experiments/set_classifier.py b/tsml_eval/experiments/set_classifier.py index 89b354ee..46439d4a 100644 --- a/tsml_eval/experiments/set_classifier.py +++ b/tsml_eval/experiments/set_classifier.py @@ -40,7 +40,9 @@ ["KNeighborsTimeSeriesClassifier", "dtw", "1nn-dtw"], ["ed", "1nn-euclidean", "1nn-ed"], ["msm", "1nn-msm"], - ["condensed-1nn-msm", "condensed-1nn-dtw"], + ["1-condensed-1nn-msm", "1-condensed-1nn-dtw"], + ["2-condensed-1nn-msm", "2-condensed-1nn-dtw"], + ["3-condensed-1nn-msm", "3-condensed-1nn-dtw"], ["ElasticEnsemble", "ee"], "ShapeDTW", ["MatrixProfileClassifier", "matrixprofile"], @@ -382,14 +384,45 @@ def _set_classifier_distance_based( from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier return KNeighborsTimeSeriesClassifier(distance="msm", n_jobs=n_jobs, **kwargs) - elif c == "condensed-1nn-dtw": + elif c == "1-condensed-1nn-dtw": from tsml_eval._wip.condensing.wrapper import WrapperBA - return WrapperBA(distance="dtw", **kwargs) - elif c == "condensed-1nn-msm": + return WrapperBA( + metric="dtw", + num_instances_per_class=1, + metric_params={"window": 0.2}, + **kwargs, + ) + elif c == "1-condensed-1nn-msm": + from tsml_eval._wip.condensing.wrapper import WrapperBA + + return WrapperBA(metric="msm", num_instances_per_class=1, **kwargs) + elif c == "2-condensed-1nn-dtw": + from tsml_eval._wip.condensing.wrapper import WrapperBA + + return WrapperBA( + metric="dtw", + num_instances_per_class=2, + metric_params={"window": 0.2}, + **kwargs, + ) + elif c == "2-condensed-1nn-msm": + from tsml_eval._wip.condensing.wrapper import WrapperBA + + return WrapperBA(metric="msm", num_instances_per_class=2, **kwargs) + elif c == "3-condensed-1nn-dtw": + from tsml_eval._wip.condensing.wrapper import WrapperBA + + return WrapperBA( + metric="dtw", + num_instances_per_class=3, + metric_params={"window": 0.2}, + **kwargs, + ) + elif c == "3-condensed-1nn-msm": from tsml_eval._wip.condensing.wrapper import WrapperBA - return WrapperBA(distance="msm", **kwargs) + return WrapperBA(metric="msm", num_instances_per_class=3, **kwargs) elif c == "elasticensemble" or c == "ee": from aeon.classification.distance_based import ElasticEnsemble From 4fda5e222e335dd6cb125f6d6197fc8743102863 Mon Sep 17 00:00:00 2001 From: David Guijo-Rubio Date: Mon, 12 Jun 2023 16:25:36 +0100 Subject: [PATCH 05/21] kmeans for condensing with 2 to 5 instances per class --- tsml_eval/_wip/condensing/wrapper.py | 36 +++++++++++----------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/tsml_eval/_wip/condensing/wrapper.py b/tsml_eval/_wip/condensing/wrapper.py index 74743ab0..bfbf9a30 100644 --- a/tsml_eval/_wip/condensing/wrapper.py +++ b/tsml_eval/_wip/condensing/wrapper.py @@ -3,7 +3,6 @@ from aeon.classification.base import BaseClassifier from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier from aeon.clustering.k_means import TimeSeriesKMeans -from aeon.clustering.metrics.averaging import elastic_barycenter_average class WrapperBA(BaseClassifier): @@ -34,6 +33,7 @@ def __init__( num_instances_per_class=1, ): self.metric = metric + self.metric_params = metric_params if self.metric_params is None: self.metric_params = {} @@ -47,18 +47,18 @@ def __init__( if self.classifier is None: self.classifier = KNeighborsTimeSeriesClassifier( distance=self.metric, + weights="distance", distance_params=self.metric_params, n_neighbors=1, ) - if self.num_instances_per_class > 1: - self.clusterer = TimeSeriesKMeans( - n_clusters=self.num_instances_per_class, - metric=self.metric, - distance_params=self.metric_params, - averaging_method="ba", - average_params=self.metric_params, - ) + self.clusterer = TimeSeriesKMeans( + n_clusters=self.num_instances_per_class, + metric=self.metric, + distance_params=self.metric_params, + averaging_method="ba", + average_params=self.metric_params, + ) super(WrapperBA, self).__init__() @@ -66,21 +66,13 @@ def _fit(self, X, y): for i in np.unique(y): idxs_class = np.where(y == i) - if self.num_instances_per_class > 1: - self.clusterer.fit(X[idxs_class]) - series = self.clusterer.cluster_centers_ - else: - series = elastic_barycenter_average( - X[idxs_class], - metric=self.metric, - **self.metric_params, - ) - - if len(series.shape) == 3: - series = np.squeeze(series, axis=1) + self.clusterer.fit(X[idxs_class]) + averaged_series_class_i = self.clusterer.cluster_centers_ - self.selected_series.append(series) + if len(averaged_series_class_i.shape) == 3: + averaged_series_class_i = np.squeeze(averaged_series_class_i, axis=1) + self.selected_series.append(averaged_series_class_i) self.y_selected_series.append(i) self.classifier.fit( From ed105dff8fe15150b37965ab59d0a6b2ac2fd281 Mon Sep 17 00:00:00 2001 From: David Guijo-Rubio Date: Wed, 2 Aug 2023 16:03:50 +0200 Subject: [PATCH 06/21] changes in the way fig is done --- tsml_eval/_wip/condensing/draw_MBA_DBA.py | 59 ---------------- .../draw_average_and_barycentres.py | 67 +++++++++++++++++++ 2 files changed, 67 insertions(+), 59 deletions(-) delete mode 100644 tsml_eval/_wip/condensing/draw_MBA_DBA.py create mode 100644 tsml_eval/_wip/condensing/draw_average_and_barycentres.py diff --git a/tsml_eval/_wip/condensing/draw_MBA_DBA.py b/tsml_eval/_wip/condensing/draw_MBA_DBA.py deleted file mode 100644 index 8593ed73..00000000 --- a/tsml_eval/_wip/condensing/draw_MBA_DBA.py +++ /dev/null @@ -1,59 +0,0 @@ -# -*- coding: utf-8 -*- -import os - -import matplotlib.pyplot as plt -import numpy as np -from aeon.clustering.metrics.averaging import elastic_barycenter_average -from aeon.datasets import load_from_tsfile - -dataset = "GunPoint" -c = "1" -plt.figure() - -fig, axs = plt.subplots(3, 2, sharey=True, sharex=True, figsize=(8, 6)) - -x_train, y_train = load_from_tsfile( - os.path.join(f"../../../../../ajb/Data/{dataset}/{dataset}_TRAIN.ts") -) - -_, _, len_ts = x_train.shape - -x = range(0, len_ts) - -idxs = np.where(y_train == c) - -for i in x_train[idxs]: - axs[0, 0].plot(x, i[0], lw=0.2) - axs[1, 0].plot(x, i[0], lw=0.2) - axs[2, 0].plot(x, i[0], lw=0.2) - - -series_avg = np.mean(np.array(x_train[idxs]), axis=0)[0] - -axs[0, 1].plot(x, series_avg, color="red") - -series_mba = elastic_barycenter_average( - x_train[idxs], - metric="msm", -) - - -axs[1, 1].plot(x, series_mba[0, :]) - -series_dba = elastic_barycenter_average( - x_train[idxs], - metric="dtw", -) - -axs[2, 1].plot(x, series_dba[0, :], color="green") - -fig.suptitle(f"{dataset} - Class {c}") -axs[0, 0].set_title("Original time series") -axs[0, 1].set_title("Averaging") -axs[1, 0].set_title("Original time series") -axs[1, 1].set_title("MBA") -axs[2, 0].set_title("Original time series") -axs[2, 1].set_title("DBA") -fig.tight_layout() - -plt.savefig("test.png") diff --git a/tsml_eval/_wip/condensing/draw_average_and_barycentres.py b/tsml_eval/_wip/condensing/draw_average_and_barycentres.py new file mode 100644 index 00000000..3eff539f --- /dev/null +++ b/tsml_eval/_wip/condensing/draw_average_and_barycentres.py @@ -0,0 +1,67 @@ +# -*- coding: utf-8 -*- +import os + +import matplotlib.pyplot as plt +import numpy as np +from aeon.clustering.metrics.averaging import elastic_barycenter_average +from aeon.datasets import load_from_tsfile + +dataset = "GunPoint" +c = "1" + +distances = ["msm", "dtw", "twe"] +distance_params = { + "msm": {"c": 1}, + "dtw": {"window": 0.2}, + "twe": {"nu": 0.05, "lmbda": 1}, +} +names = ["MBA", "DBA", "TBA"] +colours = ["blue", "purple", "green"] +n_methods = len(distances) + 1 + +fig = plt.figure(figsize=(13, 13)) + +gs0 = fig.add_gridspec(1, 2) + +gs00 = gs0[0].subgridspec(n_methods * 2, 1) +gs01 = gs0[1].subgridspec(n_methods, 1) + +# original set of time series +start = n_methods - 1 +end = n_methods + 1 +ax00_gs00 = fig.add_subplot(gs00[start:end, 0]) + +x_train, y_train = load_from_tsfile( + os.path.join(f"../../../../TSC_datasets/{dataset}/{dataset}_TRAIN.ts") +) + +x = range(0, x_train.shape[2]) +idxs = np.where(y_train == c) + +for i in x_train[idxs]: + ax00_gs00.plot(x, i[0], lw=0.2) + +ax00_gs00.set_title("Original time series", size=14) + +# average time series +ax01_gs01 = fig.add_subplot(gs01[0]) +series_avg = np.mean(np.array(x_train[idxs]), axis=0)[0] +ax01_gs01.plot(x, series_avg, color="red") +ax01_gs01.set_title("Averaging", size=14) + +# plots BA time series (msm, dtw, twe). +for idx, i in enumerate(distances): + series_BA = elastic_barycenter_average( + x_train[idxs], + metric=i, + **distance_params[i], + ) + ax = fig.add_subplot(gs01[idx + 1]) + ax.plot(x, series_BA[0, :], color=colours[idx]) + ax.set_title(names[idx], size=14) + +fig.suptitle(f"{dataset} - Class {c}", size=16) + +fig.tight_layout() + +plt.savefig("barycentres_example.png") From 462bc500abd4379d2d9bb96c30eba3e8c94f87c1 Mon Sep 17 00:00:00 2001 From: David Guijo-Rubio Date: Fri, 11 Aug 2023 12:39:09 +0200 Subject: [PATCH 07/21] Drop1 done and changes in the condensing wrapper --- .../_wip/condensing/condensing_classifier.py | 73 ++++++ tsml_eval/_wip/condensing/drop1.py | 234 ++++++++---------- tsml_eval/_wip/condensing/kMeans.py | 73 ++++++ tsml_eval/_wip/condensing/simple_rank.py | 75 +++--- tsml_eval/_wip/condensing/wrapper.py | 6 +- 5 files changed, 281 insertions(+), 180 deletions(-) create mode 100644 tsml_eval/_wip/condensing/condensing_classifier.py create mode 100644 tsml_eval/_wip/condensing/kMeans.py diff --git a/tsml_eval/_wip/condensing/condensing_classifier.py b/tsml_eval/_wip/condensing/condensing_classifier.py new file mode 100644 index 00000000..3c68a8d1 --- /dev/null +++ b/tsml_eval/_wip/condensing/condensing_classifier.py @@ -0,0 +1,73 @@ +# -*- coding: utf-8 -*- +from aeon.classification.base import BaseClassifier + + +class CondenserClassifier(BaseClassifier): + """ + Classifier wrapper for its use with any condensing approach. + + Parameters + ---------- + distance + distance_params + + Examples + -------- + >>> from ... + >>> from ... + """ + + _tags = { + "univariate-only": True, + "fit_is_empty": False, + "X_inner_mtype": ["np-list", "numpy3D"], + } + + def __init__( + self, + condenser=None, + distance="dtw", + distance_params=None, + classifier=None, + num_instances=1, + ): + if condenser is None or classifier is None: + self.distance = distance + + self.distance_params = distance_params + if self.distance_params is None: + self.distance_params = {} + + self.num_instances = num_instances + + self.condenser = condenser + if self.condenser is None: + from tsml_eval._wip.condensing.kMeans import kMeansCondenser + + self.condenser = kMeansCondenser( + distance=self.distance, + distance_params=self.distance_params, + num_instances=self.num_instances, + ) + + self.classifier = classifier + if self.classifier is None: + from aeon.classification.distance_based import ( + KNeighborsTimeSeriesClassifier, + ) + + self.classifier = KNeighborsTimeSeriesClassifier( + distance=self.distance, + weights="distance", + distance_params=self.distance_params, + n_neighbors=1, + ) + super(CondenserClassifier, self).__init__() + + def _fit(self, X, y): + condensed_X, condensed_y = self.condenser.fit_transform(X, y) + self.classifier.fit(condensed_X, condensed_y) + return self + + def _predict(self, X): + return self.classifier.predict(X) diff --git a/tsml_eval/_wip/condensing/drop1.py b/tsml_eval/_wip/condensing/drop1.py index 20e8a73a..cc1b53ce 100644 --- a/tsml_eval/_wip/condensing/drop1.py +++ b/tsml_eval/_wip/condensing/drop1.py @@ -1,14 +1,10 @@ # -*- coding: utf-8 -*- import numpy as np from aeon.distances import get_distance_function -from aeon.transformations.base import BaseTransformer +from aeon.transformations.collection.base import BaseCollectionTransformer -from tsml_eval.estimators.classification.distance_based import ( - KNeighborsTimeSeriesClassifier, -) - -class Drop1(BaseTransformer): +class Drop1Condenser(BaseCollectionTransformer): """ Class for the simple_rank condensing approach. @@ -16,14 +12,12 @@ class Drop1(BaseTransformer): ---------- distance distance_params - n_neighbors + num_instances_per_class References ---------- - .. [1] Ueno, K., Xi, X., Keogh, E., & Lee, D. J. (2006, December). Anytime - classification using the nearest neighbor algorithm with applications to stream - mining. In Sixth International Conference on Data Mining (ICDM'06) (pp. 623-632). - IEEE. + .. [1] Wilson, D. R., & Martinez, T. R. (2000). Reduction techniques for + instance-based learning algorithms. Machine learning, 38, 257-286. Examples -------- @@ -34,31 +28,38 @@ class Drop1(BaseTransformer): _tags = { "univariate-only": True, "fit_is_empty": False, + "X_inner_mtype": ["np-list", "numpy3D"], + "requires_y": True, + "y_inner_mtype": ["numpy1D"], } def __init__( self, distance="dtw", distance_params=None, - n_neighbors=1, + num_instances=1, ): self.distance = distance - self._distance_params = distance_params - if self._distance_params is None: - self._distance_params = {} + self.distance_params = distance_params + if self.distance_params is None: + self.distance_params = {} - self.n_neighbors = n_neighbors + self.num_instances = num_instances if isinstance(self.distance, str): - self.metric_ = get_distance_function(metric=self.distance) + self.metric = get_distance_function(metric=self.distance) self.selected_indices = [] - super(Drop1, self).__init__() + super(Drop1Condenser, self).__init__() def _fit(self, X, y): + n_classes = len(np.unique(y)) + self.num_instances = self.num_instances * n_classes + + def _transform(self, X, y): """ - Implement of the SimpleRank prototype selection approach. + Implement of the Drop1 prototype selection approach. Parameters ---------- @@ -76,118 +77,89 @@ def _fit(self, X, y): associates = [[] for _ in range(n_samples)] kneighbors = [[] for _ in range(n_samples)] - y_pred = [] - - classifier = KNeighborsTimeSeriesClassifier( - distance=self.distance, - distance_params=self._distance_params, - n_neighbors=self.n_neighbors + 1, - ) - - # Predicting class with the instance in the set. - # Also getting the kneighbors and the associates of the instance. - for i in range(n_samples): - classifier.fit(X, y) - y_pred.append(classifier.predict(X[i])) - i_kneighbors, i_distances = classifier._kneighbors(X[i]) - - i_kneighbors = [x[1] for x in sorted(zip(i_distances, i_kneighbors))] - - for j in i_kneighbors: - associates[j].append(i) - - kneighbors[i] = i_kneighbors - - # Predicting class without the instance in the set. - y_pred_wo_P = [] - for i in range(n_samples): - X_wo_P = np.delete(X, i, axis=0) - y_wo_P = np.delete(y, i) - classifier.fit(X_wo_P, y_wo_P) - y_pred_wo_P.append(classifier.predict(X[i])) - - X_S = X.copy() - y_S = y.copy() - - for i in range(n_samples): - # Num of associates correctly classified with i (or P) as neighbor. - with_list = [ - j - for j in associates[i] - if ((i in kneighbors[j]) and (y[j] == y_pred[j])) - ] - - # Num of associates correctly classified without i (or P) as neighbor. - without_list = [j for j in associates[i] if (y[j] == y_pred_wo_P[j])] - - # Check if removing i (or P) is better. - if len(without_list) >= len(with_list): - # Remove P from S. - i_S = self._find_index(i, X, X_S) - X_S = np.delete(X_S, i_S, axis=0) - y_S = np.delete(y_S, i_S) - - # Remove P from the kneighbors of the associates. - for j in associates[i]: - kneighbors[j].remove(i) - - # if self.n_neighbors + 1 >= len(X_S): - # classifier = KNeighborsTimeSeriesClassifier( - # distance=self.distance, - # distance_params=self._distance_params, - # n_neighbors=len(X_S), - # ) - - # Find the next nearest neighbor for the j-th associate. - classifier.fit(X_S, y_S) - y_pred[j] = classifier.predict(X[j]) - j_kneighbors, _ = classifier._kneighbors(X[j]) - j_kneighbors = self._find_index(j_kneighbors, X_S, X) - - j_neighbor = list( - set(j_kneighbors).symmetric_difference(set(kneighbors[j])) - )[0] - - kneighbors[j].append(j_neighbor) - associates[j_neighbor].append(j) - - # Remove P from the associates of the neighbors. - for j in kneighbors[i]: - associates[j].remove(i) - - associates[i] = [] - kneighbors[i] = [] - - # The instance worth staying. - else: - self.selected_indices.append(i) - return self - - def _transform(self, X, y): + weights = [[] for _ in range(n_samples)] + distances = np.zeros((n_samples, n_samples)) + + # Getting the kneighbors and the associates of the instance. + for p in range(n_samples): + for p2 in range(p + 1, n_samples): + distances[p, p2] = self.metric(X[p], X[p2], **self.distance_params) + distances[p2, p] = distances[p, p2] + + for p in range(n_samples): + weights[p], kneighbors[p] = zip( + *sorted(zip(distances[p], range(n_samples))) + ) + + # todo: comprobar que tenemos que quitar el 1er elemento por ser él mismo. + # weights[p], kneighbors[p] = weights[p][1:], kneighbors[p][1:] + + for j in kneighbors[p][: self.num_instances]: + associates[j].append(p) + + # print(kneighbors) + # print(associates) + + # Predicting with/without rule for each instance p in the set. + for p in range(n_samples): + without_P = 0 + with_P = 0 + + for a in associates[p]: + # print(f"{a=}") + # WITH + y_pred_w_P = self._predict_KNN( + kneighbors[a], + weights[a], + y, + self.num_instances, + ) + + if y_pred_w_P == y[a]: + with_P += 1 + # WITHOUT + y_pred_wo_P = self._predict_KNN( + [k for k in kneighbors[a] if k != p], + [w for idx, w in enumerate(weights[a]) if idx != p], + y, + self.num_instances, + ) + + if y_pred_wo_P == y[a]: + without_P += 1 + # print(without_P, with_P) + if without_P < with_P: # the instance is worth keeping. + print(f"Keeping instance {p}.") + self.selected_indices.append(p) + else: # the instance is not worth keeping. + print(f"Removing instance {p}.") + for a in associates[p]: + kneighbors[a] = [kn for kn in kneighbors[a] if kn != p] + for j in kneighbors[a][: self.num_instances]: + if a not in associates[j]: + associates[j].append(a) + + for k in kneighbors[p]: + associates[k] = [a for a in associates[k] if a != p] + + # print(associates) + # for k in kneighbors: + # print(k[: self.num_instances], end=", ") + # # print(kneighbors) + print(self.selected_indices) return X[self.selected_indices], y[self.selected_indices] def _fit_transform(self, X, y): - self._fit(X, y) - condensed_X, condensed_y = self._transform(X, y) - - return condensed_X, condensed_y - - def _get_selected_indices(self): - # todo: check that fit has already been called. - return self.selected_indices - - def _find_index(self, values, training_set_instance, training_set_to_find): - if isinstance(values, int): - values = [values] - - index = [ - xdx - for xdx, x in enumerate(training_set_to_find) - for k in values - if np.array_equal(x, training_set_instance[k]) - ] - - if len(index) == 1: - return index[0] - else: - return index + self.fit(X, y) + return self._transform(X, y) + + def _predict_KNN(self, neighbors, weights, y, num_neighbors): + neighbors = neighbors[:(num_neighbors)] + weights = weights[:(num_neighbors)] + classes_, y_ = np.unique(y, return_inverse=True) + scores = np.zeros(len(classes_)) + for id, w in zip(neighbors, weights): + predicted_class = y_[id] + scores[predicted_class] += 1 / (w + np.finfo(float).eps) + # print(f"{neighbors=}\n{weights=}\n{classes_[np.argmax(scores)]=}") + return classes_[np.argmax(scores)] diff --git a/tsml_eval/_wip/condensing/kMeans.py b/tsml_eval/_wip/condensing/kMeans.py new file mode 100644 index 00000000..b5cc6fa8 --- /dev/null +++ b/tsml_eval/_wip/condensing/kMeans.py @@ -0,0 +1,73 @@ +# -*- coding: utf-8 -*- +import numpy as np +from aeon.clustering.k_means import TimeSeriesKMeans +from aeon.transformations.collection.base import BaseCollectionTransformer + + +class kMeansCondenser(BaseCollectionTransformer): + """ + Classifier wrapper for its use with any condensing approach. + + Parameters + ---------- + distance + distance_params + + Examples + -------- + >>> from ... + >>> from ... + """ + + _tags = { + "univariate-only": True, + "fit_is_empty": False, + "X_inner_mtype": ["np-list", "numpy3D"], + "requires_y": True, + "y_inner_mtype": ["numpy1D"], + } + + def __init__( + self, + distance="dtw", + distance_params=None, + num_instances_per_class=1, + ): + self.distance = distance + + self.distance_params = distance_params + if self.distance_params is None: + self.distance_params = {} + + self.num_instances_per_class = num_instances_per_class + + self.selected_series = [] + self.y_selected_series = [] + + self.clusterer = TimeSeriesKMeans( + n_clusters=self.num_instances_per_class, + metric=self.distance, + distance_params=self.distance_params, + averaging_method="ba", + average_params=self.distance_params, + ) + + super(kMeansCondenser, self).__init__() + + def _transform(self, X, y): + for i in np.unique(y): + idxs_class = np.where(y == i) + + self.clusterer.fit(X[idxs_class]) + averaged_series_class_i = self.clusterer.cluster_centers_ + + if len(averaged_series_class_i.shape) == 3: + averaged_series_class_i = np.squeeze(averaged_series_class_i, axis=1) + + self.selected_series.append(averaged_series_class_i) + self.y_selected_series.append(i) + + return np.array(self.selected_series), np.array(self.y_selected_series) + + def _fit_transform(self, X, y): + return self._transform(X, y) diff --git a/tsml_eval/_wip/condensing/simple_rank.py b/tsml_eval/_wip/condensing/simple_rank.py index 7a93c3bf..13de0024 100644 --- a/tsml_eval/_wip/condensing/simple_rank.py +++ b/tsml_eval/_wip/condensing/simple_rank.py @@ -1,14 +1,14 @@ # -*- coding: utf-8 -*- import numpy as np from aeon.distances import get_distance_function -from aeon.transformations.base import BaseTransformer +from aeon.transformations.collection.base import BaseCollectionTransformer from tsml_eval.estimators.classification.distance_based import ( KNeighborsTimeSeriesClassifier, ) -class SimpleRank(BaseTransformer): +class SimpleRankCondenser(BaseCollectionTransformer): """ Class for the simple_rank condensing approach. @@ -34,98 +34,81 @@ class SimpleRank(BaseTransformer): _tags = { "univariate-only": True, "fit_is_empty": False, + "X_inner_mtype": ["np-list", "numpy3D"], + "requires_y": True, + "y_inner_mtype": ["numpy1D"], } def __init__( self, distance="dtw", distance_params=None, - n_neighbors=1, + num_instances_per_class=1, ): self.distance = distance - self._distance_params = distance_params - if self._distance_params is None: - self._distance_params = {} + self.distance_params = distance_params + if self.distance_params is None: + self.distance_params = {} - self.n_neighbors = n_neighbors + self.num_instances_per_class = num_instances_per_class if isinstance(self.distance, str): self.metric_ = get_distance_function(metric=self.distance) self.selected_indices = [] - super(SimpleRank, self).__init__() + super(SimpleRankCondenser, self).__init__() - def _fit(self, X, y): - """ - Implement of the SimpleRank prototype selection approach. - - Parameters - ---------- - X -- numpy array of shape (n_samples, n_features) representing the feature - vectors of the instances. - y -- numpy array of shape (n_samples,) representing the corresponding class - labels. - - Returns - ------- - self - """ + def _transform(self, X, y): n_samples = X.shape[0] rank = np.zeros(n_samples) distance = np.zeros(n_samples) num_classes = len(np.unique(y)) + # As SR do not separate prototypes per class, the number should be multiplied by + # the number of instances per class of other methods. + self.num_instances_per_class = self.num_instances_per_class * num_classes for i in range(n_samples): X_train = np.delete(X, i, axis=0) y_train = np.delete(y, i) X_pattern_loo = X[i] + y_pattern_loo = y[i] + # Consider moving this to the init method. classifier = KNeighborsTimeSeriesClassifier( distance=self.distance, - distance_params=self._distance_params, - n_neighbors=self.n_neighbors, + distance_params=self.distance_params, + n_neighbors=1, ) classifier.fit(X_train, y_train) prediction = classifier.predict(X_pattern_loo) - if y[i] == prediction: + if y_pattern_loo == prediction: rank[i] = 1 else: rank[i] = -2 / (num_classes - 1) - # compute distance to nearest neigh in class + # compute distance to nearest neighbour in class distance[i] = np.min( np.array( [ self.metric_( X_pattern_loo, - X_train[np.where(y_train == y[i])[0]][j], - **self._distance_params, + j, + **self.distance_params, ) - for j in range(len([np.where(y_train == y[i])[0]])) + for j in X_train[np.where(y_train == y_pattern_loo)[0]] ] ) ) + order = sorted(zip(rank, -np.array(distance), range(n_samples)))[::-1] - samples_ordered = sorted(zip(rank, -np.array(distance), range(n_samples))) + self.selected_indices = [x[2] for x in order][: self.num_instances_per_class] - self.selected_indices = [x[2] for x in samples_ordered][::-1][ - : self.n_neighbors - ] - - return self - - def _transform(self, X, y): - return X[self.selected_indices], y[self.selected_indices] - - def _fit_transform(self, X, y): - self._fit(X, y) - condensed_X, condensed_y = self._transform(X, y) + condensed_X, condensed_y = X[self.selected_indices], y[self.selected_indices] return condensed_X, condensed_y - def _get_selected_indices(self): - # todo: check that fit has already been called. - return self.selected_indices + def _fit_transform(self, X, y): + return self._transform(X, y) diff --git a/tsml_eval/_wip/condensing/wrapper.py b/tsml_eval/_wip/condensing/wrapper.py index bfbf9a30..f2b6ca3c 100644 --- a/tsml_eval/_wip/condensing/wrapper.py +++ b/tsml_eval/_wip/condensing/wrapper.py @@ -38,9 +38,6 @@ def __init__( if self.metric_params is None: self.metric_params = {} - self.selected_series = [] - self.y_selected_series = [] - self.num_instances_per_class = num_instances_per_class self.classifier = classifier @@ -52,6 +49,9 @@ def __init__( n_neighbors=1, ) + self.selected_series = [] + self.y_selected_series = [] + self.clusterer = TimeSeriesKMeans( n_clusters=self.num_instances_per_class, metric=self.metric, From 66e3d48d138d1abbf724ec53d759080628e294db Mon Sep 17 00:00:00 2001 From: David Guijo-Rubio Date: Fri, 11 Aug 2023 12:44:41 +0200 Subject: [PATCH 08/21] Minimal change to the wrapper --- tsml_eval/_wip/condensing/condensing_classifier.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tsml_eval/_wip/condensing/condensing_classifier.py b/tsml_eval/_wip/condensing/condensing_classifier.py index 3c68a8d1..1382bd93 100644 --- a/tsml_eval/_wip/condensing/condensing_classifier.py +++ b/tsml_eval/_wip/condensing/condensing_classifier.py @@ -31,14 +31,13 @@ def __init__( classifier=None, num_instances=1, ): - if condenser is None or classifier is None: - self.distance = distance + self.distance = distance - self.distance_params = distance_params - if self.distance_params is None: - self.distance_params = {} + self.distance_params = distance_params + if self.distance_params is None: + self.distance_params = {} - self.num_instances = num_instances + self.num_instances = num_instances self.condenser = condenser if self.condenser is None: From 1b1181646e81e5e57858702bbe8b2b970aea4318 Mon Sep 17 00:00:00 2001 From: David Guijo-Rubio Date: Fri, 11 Aug 2023 13:00:04 +0200 Subject: [PATCH 09/21] Drop 2 done. Still some prints for sanity check --- tsml_eval/_wip/condensing/drop2.py | 165 +++++++++++++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 tsml_eval/_wip/condensing/drop2.py diff --git a/tsml_eval/_wip/condensing/drop2.py b/tsml_eval/_wip/condensing/drop2.py new file mode 100644 index 00000000..29598452 --- /dev/null +++ b/tsml_eval/_wip/condensing/drop2.py @@ -0,0 +1,165 @@ +# -*- coding: utf-8 -*- +import numpy as np +from aeon.distances import get_distance_function +from aeon.transformations.collection.base import BaseCollectionTransformer + + +class Drop2Condenser(BaseCollectionTransformer): + """ + Class for the simple_rank condensing approach. + + Parameters + ---------- + distance + distance_params + num_instances_per_class + + References + ---------- + .. [1] Wilson, D. R., & Martinez, T. R. (2000). Reduction techniques for + instance-based learning algorithms. Machine learning, 38, 257-286. + + Examples + -------- + >>> from ... + >>> from ... + """ + + _tags = { + "univariate-only": True, + "fit_is_empty": False, + "X_inner_mtype": ["np-list", "numpy3D"], + "requires_y": True, + "y_inner_mtype": ["numpy1D"], + } + + def __init__( + self, + distance="dtw", + distance_params=None, + num_instances=1, + ): + self.distance = distance + self.distance_params = distance_params + if self.distance_params is None: + self.distance_params = {} + + self.num_instances = num_instances + + if isinstance(self.distance, str): + self.metric = get_distance_function(metric=self.distance) + + self.selected_indices = [] + + super(Drop2Condenser, self).__init__() + + def _fit(self, X, y): + n_classes = len(np.unique(y)) + self.num_instances = self.num_instances * n_classes + + def _transform(self, X, y): + """ + Implement of the Drop1 prototype selection approach. + + Parameters + ---------- + X -- numpy array of shape (n_samples, n_features) representing the feature + vectors of the instances. + y -- numpy array of shape (n_samples,) representing the corresponding class + labels. + k -- int, the desired number of prototypes to be selected. + + Returns + ------- + self + """ + n_samples = X.shape[0] + + associates = [[] for _ in range(n_samples)] + kneighbors = [[] for _ in range(n_samples)] + weights = [[] for _ in range(n_samples)] + distances = np.zeros((n_samples, n_samples)) + + # Getting the kneighbors and the associates of the instance. + for p in range(n_samples): + for p2 in range(p + 1, n_samples): + distances[p, p2] = self.metric(X[p], X[p2], **self.distance_params) + distances[p2, p] = distances[p, p2] + + for p in range(n_samples): + weights[p], kneighbors[p] = zip( + *sorted(zip(distances[p], range(n_samples))) + ) + + # todo: comprobar que tenemos que quitar el 1er elemento por ser él mismo. + # weights[p], kneighbors[p] = weights[p][1:], kneighbors[p][1:] + + for j in kneighbors[p][: self.num_instances]: + associates[j].append(p) + + # print(kneighbors) + # print(associates) + + # Predicting with/without rule for each instance p in the set. + for p in range(n_samples): + without_P = 0 + with_P = 0 + + for a in associates[p]: + # print(f"{a=}") + # WITH + y_pred_w_P = self._predict_KNN( + kneighbors[a], + weights[a], + y, + self.num_instances, + ) + + if y_pred_w_P == y[a]: + with_P += 1 + # WITHOUT + y_pred_wo_P = self._predict_KNN( + [k for k in kneighbors[a] if k != p], + [w for idx, w in enumerate(weights[a]) if idx != p], + y, + self.num_instances, + ) + + if y_pred_wo_P == y[a]: + without_P += 1 + # print(without_P, with_P) + if without_P < with_P: # the instance is worth keeping. + print(f"Keeping instance {p}.") + self.selected_indices.append(p) + else: # the instance is not worth keeping. + print(f"Removing instance {p}.") + for a in associates[p]: + kneighbors[a] = [kn for kn in kneighbors[a] if kn != p] + for j in kneighbors[a][: self.num_instances]: + if a not in associates[j]: + associates[j].append(a) + + # for k in kneighbors[p]: + # associates[k] = [a for a in associates[k] if a != p] + + # print(associates) + # for k in kneighbors: + # print(k[: self.num_instances], end=", ") + # # print(kneighbors) + print(self.selected_indices) + return X[self.selected_indices], y[self.selected_indices] + + def _fit_transform(self, X, y): + self.fit(X, y) + return self._transform(X, y) + + def _predict_KNN(self, neighbors, weights, y, num_neighbors): + neighbors = neighbors[:(num_neighbors)] + weights = weights[:(num_neighbors)] + classes_, y_ = np.unique(y, return_inverse=True) + scores = np.zeros(len(classes_)) + for id, w in zip(neighbors, weights): + predicted_class = y_[id] + scores[predicted_class] += 1 / (w + np.finfo(float).eps) + # print(f"{neighbors=}\n{weights=}\n{classes_[np.argmax(scores)]=}") + return classes_[np.argmax(scores)] From dbd2e260ee9f1212b742e669fc55e59047544d67 Mon Sep 17 00:00:00 2001 From: David Guijo-Rubio Date: Fri, 11 Aug 2023 13:51:24 +0200 Subject: [PATCH 10/21] Drop 1-2 reworked. Still some prints for sanity check --- tsml_eval/_wip/condensing/drop1.py | 13 ++----------- tsml_eval/_wip/condensing/drop2.py | 31 ++++++++++++++---------------- 2 files changed, 16 insertions(+), 28 deletions(-) diff --git a/tsml_eval/_wip/condensing/drop1.py b/tsml_eval/_wip/condensing/drop1.py index cc1b53ce..8d159aaf 100644 --- a/tsml_eval/_wip/condensing/drop1.py +++ b/tsml_eval/_wip/condensing/drop1.py @@ -91,22 +91,18 @@ def _transform(self, X, y): *sorted(zip(distances[p], range(n_samples))) ) - # todo: comprobar que tenemos que quitar el 1er elemento por ser él mismo. + # todo: maybe removing first element as is itself? # weights[p], kneighbors[p] = weights[p][1:], kneighbors[p][1:] for j in kneighbors[p][: self.num_instances]: associates[j].append(p) - # print(kneighbors) - # print(associates) - # Predicting with/without rule for each instance p in the set. for p in range(n_samples): without_P = 0 with_P = 0 for a in associates[p]: - # print(f"{a=}") # WITH y_pred_w_P = self._predict_KNN( kneighbors[a], @@ -127,7 +123,7 @@ def _transform(self, X, y): if y_pred_wo_P == y[a]: without_P += 1 - # print(without_P, with_P) + if without_P < with_P: # the instance is worth keeping. print(f"Keeping instance {p}.") self.selected_indices.append(p) @@ -142,10 +138,6 @@ def _transform(self, X, y): for k in kneighbors[p]: associates[k] = [a for a in associates[k] if a != p] - # print(associates) - # for k in kneighbors: - # print(k[: self.num_instances], end=", ") - # # print(kneighbors) print(self.selected_indices) return X[self.selected_indices], y[self.selected_indices] @@ -161,5 +153,4 @@ def _predict_KNN(self, neighbors, weights, y, num_neighbors): for id, w in zip(neighbors, weights): predicted_class = y_[id] scores[predicted_class] += 1 / (w + np.finfo(float).eps) - # print(f"{neighbors=}\n{weights=}\n{classes_[np.argmax(scores)]=}") return classes_[np.argmax(scores)] diff --git a/tsml_eval/_wip/condensing/drop2.py b/tsml_eval/_wip/condensing/drop2.py index 29598452..6b93581e 100644 --- a/tsml_eval/_wip/condensing/drop2.py +++ b/tsml_eval/_wip/condensing/drop2.py @@ -78,6 +78,7 @@ def _transform(self, X, y): associates = [[] for _ in range(n_samples)] kneighbors = [[] for _ in range(n_samples)] weights = [[] for _ in range(n_samples)] + distance_nearest_enemy = [] distances = np.zeros((n_samples, n_samples)) # Getting the kneighbors and the associates of the instance. @@ -87,26 +88,29 @@ def _transform(self, X, y): distances[p2, p] = distances[p, p2] for p in range(n_samples): - weights[p], kneighbors[p] = zip( - *sorted(zip(distances[p], range(n_samples))) + weights[p], kneighbors[p], y_ordered = zip( + *sorted(zip(distances[p], range(n_samples), y)) ) - # todo: comprobar que tenemos que quitar el 1er elemento por ser él mismo. - # weights[p], kneighbors[p] = weights[p][1:], kneighbors[p][1:] - for j in kneighbors[p][: self.num_instances]: associates[j].append(p) - # print(kneighbors) - # print(associates) + # Drop2 order instances by their distance to the nearest enemy. + for k in kneighbors[p]: + if y_ordered[k] != y[p]: + distance_nearest_enemy.append(weights[p][k]) + break + + _, n_samples_ordered = zip( + *sorted(zip(distance_nearest_enemy, range(n_samples))) + ) # Predicting with/without rule for each instance p in the set. - for p in range(n_samples): + for p in n_samples_ordered: without_P = 0 with_P = 0 for a in associates[p]: - # print(f"{a=}") # WITH y_pred_w_P = self._predict_KNN( kneighbors[a], @@ -127,7 +131,7 @@ def _transform(self, X, y): if y_pred_wo_P == y[a]: without_P += 1 - # print(without_P, with_P) + if without_P < with_P: # the instance is worth keeping. print(f"Keeping instance {p}.") self.selected_indices.append(p) @@ -139,13 +143,6 @@ def _transform(self, X, y): if a not in associates[j]: associates[j].append(a) - # for k in kneighbors[p]: - # associates[k] = [a for a in associates[k] if a != p] - - # print(associates) - # for k in kneighbors: - # print(k[: self.num_instances], end=", ") - # # print(kneighbors) print(self.selected_indices) return X[self.selected_indices], y[self.selected_indices] From cc82fc5bb6519c8e6f0f0c7619057bf3ff387de2 Mon Sep 17 00:00:00 2001 From: David Guijo-Rubio Date: Fri, 11 Aug 2023 14:51:57 +0200 Subject: [PATCH 11/21] DropX versions ready and some experiments in set_classifier --- _uea_experiments/uco/task_gpu_regression.sub | 18 ++ tsml_eval/_wip/condensing/drop1.py | 2 +- tsml_eval/_wip/condensing/drop2.py | 10 +- tsml_eval/_wip/condensing/drop3.py | 179 +++++++++++++++++++ tsml_eval/experiments/set_classifier.py | 150 +++++++++++++++- 5 files changed, 351 insertions(+), 8 deletions(-) create mode 100644 _uea_experiments/uco/task_gpu_regression.sub create mode 100644 tsml_eval/_wip/condensing/drop3.py diff --git a/_uea_experiments/uco/task_gpu_regression.sub b/_uea_experiments/uco/task_gpu_regression.sub new file mode 100644 index 00000000..c0c1942f --- /dev/null +++ b/_uea_experiments/uco/task_gpu_regression.sub @@ -0,0 +1,18 @@ +executable = ../../tsml_eval/experiments/regression_experiments_condor.py +arguments = $(data_dir) $(results_dir) $(regressor) $(dataset) $(random_state) $(generate_train_files) $(predefined_folds) +getenv = True +output = ../../../TSER_condor_output/$(regressor)/$(dataset)/output_$(random_state).out +error = ../../../TSER_condor_output/$(regressor)/$(dataset)/error_$(random_state).err +log = ../../../TSER_condor_output/$(regressor)/$(dataset)/logs_$(random_state).log +should_transfer_files = NO + +request_memory = 12G +request_CPUs = 2 +request_GPUs = 1 + +#require_gpus = (GlobalMemoryMb >= 15000) +#requirements = (Machine == "srvrrycarn01.priv.uco.es") || (Machine == "srvrrycarn02.priv.uco.es") + +batch_name = $(batchname) + +queue data_dir,results_dir,regressor,dataset,random_state,generate_train_files,predefined_folds,checkpoint,batchname from ./task_params.txt diff --git a/tsml_eval/_wip/condensing/drop1.py b/tsml_eval/_wip/condensing/drop1.py index 8d159aaf..20a28a95 100644 --- a/tsml_eval/_wip/condensing/drop1.py +++ b/tsml_eval/_wip/condensing/drop1.py @@ -92,7 +92,7 @@ def _transform(self, X, y): ) # todo: maybe removing first element as is itself? - # weights[p], kneighbors[p] = weights[p][1:], kneighbors[p][1:] + weights[p], kneighbors[p] = weights[p][1:], kneighbors[p][1:] for j in kneighbors[p][: self.num_instances]: associates[j].append(p) diff --git a/tsml_eval/_wip/condensing/drop2.py b/tsml_eval/_wip/condensing/drop2.py index 6b93581e..69707892 100644 --- a/tsml_eval/_wip/condensing/drop2.py +++ b/tsml_eval/_wip/condensing/drop2.py @@ -92,13 +92,16 @@ def _transform(self, X, y): *sorted(zip(distances[p], range(n_samples), y)) ) + # todo: maybe removing first element as is itself? + weights[p], kneighbors[p] = weights[p][1:], kneighbors[p][1:] + for j in kneighbors[p][: self.num_instances]: associates[j].append(p) # Drop2 order instances by their distance to the nearest enemy. - for k in kneighbors[p]: - if y_ordered[k] != y[p]: - distance_nearest_enemy.append(weights[p][k]) + for kdx, _ in enumerate(kneighbors[p]): + if y_ordered[kdx] != y[p]: + distance_nearest_enemy.append(weights[p][kdx]) break _, n_samples_ordered = zip( @@ -158,5 +161,4 @@ def _predict_KNN(self, neighbors, weights, y, num_neighbors): for id, w in zip(neighbors, weights): predicted_class = y_[id] scores[predicted_class] += 1 / (w + np.finfo(float).eps) - # print(f"{neighbors=}\n{weights=}\n{classes_[np.argmax(scores)]=}") return classes_[np.argmax(scores)] diff --git a/tsml_eval/_wip/condensing/drop3.py b/tsml_eval/_wip/condensing/drop3.py new file mode 100644 index 00000000..0aabecba --- /dev/null +++ b/tsml_eval/_wip/condensing/drop3.py @@ -0,0 +1,179 @@ +# -*- coding: utf-8 -*- +import numpy as np +from aeon.distances import get_distance_function +from aeon.transformations.collection.base import BaseCollectionTransformer + + +class Drop3Condenser(BaseCollectionTransformer): + """ + Class for the simple_rank condensing approach. + + Parameters + ---------- + distance + distance_params + num_instances_per_class + + References + ---------- + .. [1] Wilson, D. R., & Martinez, T. R. (2000). Reduction techniques for + instance-based learning algorithms. Machine learning, 38, 257-286. + + Examples + -------- + >>> from ... + >>> from ... + """ + + _tags = { + "univariate-only": True, + "fit_is_empty": False, + "X_inner_mtype": ["np-list", "numpy3D"], + "requires_y": True, + "y_inner_mtype": ["numpy1D"], + } + + def __init__( + self, + distance="dtw", + distance_params=None, + num_instances=1, + ): + self.distance = distance + self.distance_params = distance_params + if self.distance_params is None: + self.distance_params = {} + + self.num_instances = num_instances + + if isinstance(self.distance, str): + self.metric = get_distance_function(metric=self.distance) + + self.selected_indices = [] + + super(Drop3Condenser, self).__init__() + + def _fit(self, X, y): + n_classes = len(np.unique(y)) + self.num_instances = self.num_instances * n_classes + + def _transform(self, X, y): + """ + Implement of the Drop1 prototype selection approach. + + Parameters + ---------- + X -- numpy array of shape (n_samples, n_features) representing the feature + vectors of the instances. + y -- numpy array of shape (n_samples,) representing the corresponding class + labels. + k -- int, the desired number of prototypes to be selected. + + Returns + ------- + self + """ + n_samples = X.shape[0] + id_instances = [] + + associates = [[] for _ in range(n_samples)] + kneighbors = [[] for _ in range(n_samples)] + weights = [[] for _ in range(n_samples)] + distance_nearest_enemy = [] + distances = np.zeros((n_samples, n_samples)) + + # Getting the kneighbors and the associates of the instance. + for p in range(n_samples): + for p2 in range(p + 1, n_samples): + distances[p, p2] = self.metric(X[p], X[p2], **self.distance_params) + distances[p2, p] = distances[p, p2] + + for p in range(n_samples): + weights[p], kneighbors[p], y_ordered = zip( + *sorted(zip(distances[p], range(n_samples), y)) + ) + # todo: maybe removing first element as is itself? + weights[p], kneighbors[p] = weights[p][1:], kneighbors[p][1:] + + for j in kneighbors[p][: self.num_instances]: + associates[j].append(p) + + # compute knn for each instance. + for p in range(n_samples): + y_pred = self._predict_KNN( + kneighbors[p], + weights[p], + y, + self.num_instances, + ) + + if y_pred == y[p]: + id_instances.append(p) + print(id_instances) + print(y[id_instances]) + print(len(id_instances) == n_samples, len(id_instances), "of", n_samples) + + for p in id_instances: + # Drop2 order instances by their distance to the nearest enemy. + for kdx, _ in enumerate(kneighbors[p]): + if y_ordered[kdx] != y[p]: + # todo: maybe removing first element as is itself? if so, k-1 + distance_nearest_enemy.append(weights[p][kdx]) + break + + _, n_samples_ordered = zip(*sorted(zip(distance_nearest_enemy, id_instances))) + + # Predicting with/without rule for each instance p in the set. + for p in n_samples_ordered: + without_P = 0 + with_P = 0 + + for a in associates[p]: + # WITH + y_pred_w_P = self._predict_KNN( + kneighbors[a], + weights[a], + y, + self.num_instances, + ) + + if y_pred_w_P == y[a]: + with_P += 1 + # WITHOUT + y_pred_wo_P = self._predict_KNN( + [k for k in kneighbors[a] if k != p], + [w for idx, w in enumerate(weights[a]) if idx != p], + y, + self.num_instances, + ) + + if y_pred_wo_P == y[a]: + without_P += 1 + + if without_P < with_P: # the instance is worth keeping. + print(f"Keeping instance {p}.") + self.selected_indices.append(p) + else: # the instance is not worth keeping. + print(f"Removing instance {p}.") + for a in associates[p]: + kneighbors[a] = [kn for kn in kneighbors[a] if kn != p] + for j in kneighbors[a][: self.num_instances]: + if a not in associates[j]: + associates[j].append(a) + + print(self.selected_indices) + return X[self.selected_indices], y[self.selected_indices] + + def _fit_transform(self, X, y): + self.fit(X, y) + return self._transform(X, y) + + def _predict_KNN(self, neighbors, weights, y, num_neighbors): + neighbors = neighbors[:(num_neighbors)] + weights = weights[:(num_neighbors)] + classes_, y_ = np.unique(y, return_inverse=True) + scores = np.zeros(len(classes_)) + for id, w in zip(neighbors, weights): + predicted_class = y_[id] + scores[predicted_class] += 1 / (w + np.finfo(float).eps) + return classes_[np.argmax(scores)] diff --git a/tsml_eval/experiments/set_classifier.py b/tsml_eval/experiments/set_classifier.py index 46439d4a..4e8c19b2 100644 --- a/tsml_eval/experiments/set_classifier.py +++ b/tsml_eval/experiments/set_classifier.py @@ -40,9 +40,20 @@ ["KNeighborsTimeSeriesClassifier", "dtw", "1nn-dtw"], ["ed", "1nn-euclidean", "1nn-ed"], ["msm", "1nn-msm"], - ["1-condensed-1nn-msm", "1-condensed-1nn-dtw"], - ["2-condensed-1nn-msm", "2-condensed-1nn-dtw"], - ["3-condensed-1nn-msm", "3-condensed-1nn-dtw"], + "1-condensed-1nn-msm", + "1-condensed-1nn-dtw", + "1-condensed-1nn-twe", + "2-condensed-1nn-msm", + "2-condensed-1nn-dtw", + "2-condensed-1nn-twe", + "3-condensed-1nn-msm", + "3-condensed-1nn-dtw", + "3-condensed-1nn-twe", + "SimpleRankCondenser", + "kMeansCondenser", + "Drop1Condenser", + "Drop2Condenser", + "Drop3Condenser", ["ElasticEnsemble", "ee"], "ShapeDTW", ["MatrixProfileClassifier", "matrixprofile"], @@ -397,6 +408,15 @@ def _set_classifier_distance_based( from tsml_eval._wip.condensing.wrapper import WrapperBA return WrapperBA(metric="msm", num_instances_per_class=1, **kwargs) + elif c == "1-condensed-1nn-twe": + from tsml_eval._wip.condensing.wrapper import WrapperBA + + return WrapperBA( + metric="twe", + num_instances_per_class=1, + metric_params={"nu": 0.05}, + **kwargs, + ) elif c == "2-condensed-1nn-dtw": from tsml_eval._wip.condensing.wrapper import WrapperBA @@ -410,6 +430,15 @@ def _set_classifier_distance_based( from tsml_eval._wip.condensing.wrapper import WrapperBA return WrapperBA(metric="msm", num_instances_per_class=2, **kwargs) + elif c == "2-condensed-1nn-twe": + from tsml_eval._wip.condensing.wrapper import WrapperBA + + return WrapperBA( + metric="twe", + num_instances_per_class=2, + metric_params={"nu": 0.05}, + **kwargs, + ) elif c == "3-condensed-1nn-dtw": from tsml_eval._wip.condensing.wrapper import WrapperBA @@ -423,6 +452,121 @@ def _set_classifier_distance_based( from tsml_eval._wip.condensing.wrapper import WrapperBA return WrapperBA(metric="msm", num_instances_per_class=3, **kwargs) + elif c == "3-condensed-1nn-twe": + from tsml_eval._wip.condensing.wrapper import WrapperBA + + return WrapperBA( + metric="twe", + num_instances_per_class=3, + metric_params={"nu": 0.05}, + **kwargs, + ) + elif c == "simplerankcondenser": + from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier + + from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier + from tsml_eval._wip.condensing.simple_rank import SimpleRankCondenser + + return CondenserClassifier( + condenser=SimpleRankCondenser( + distance="dtw", + distance_params={"window": 0.2}, + num_instances_per_class=5, + ), + distance="dtw", + distance_params={"window": 0.2}, + classifier=KNeighborsTimeSeriesClassifier( + distance="dtw", + weights="distance", + distance_params={"window": 0.2}, + n_neighbors=1, + ), + num_instances_per_class=1, + **kwargs, + ) + elif c == "kmeanscondenser": + from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier + + from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier + from tsml_eval._wip.condensing.kMeans import kMeansCondenser + + return CondenserClassifier( + condenser=kMeansCondenser( + distance="dtw", + distance_params={"window": 0.2}, + num_instances_per_class=1, + ), + distance="dtw", + distance_params={"window": 0.2}, + classifier=KNeighborsTimeSeriesClassifier( + distance="dtw", + weights="distance", + distance_params={"window": 0.2}, + n_neighbors=1, + ), + num_instances_per_class=1, + **kwargs, + ) + elif c == "drop1condenser": + from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier + + from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier + from tsml_eval._wip.condensing.drop1 import Drop1Condenser + + return CondenserClassifier( + condenser=Drop1Condenser( + distance="dtw", + distance_params={"window": 0.2}, + num_instances=15, + ), + classifier=KNeighborsTimeSeriesClassifier( + distance="dtw", + weights="distance", + distance_params={"window": 0.2}, + n_neighbors=1, + ), + **kwargs, + ) + elif c == "drop2condenser": + from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier + + from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier + from tsml_eval._wip.condensing.drop2 import Drop2Condenser + + return CondenserClassifier( + condenser=Drop2Condenser( + distance="dtw", + distance_params={"window": 0.2}, + num_instances=15, + ), + classifier=KNeighborsTimeSeriesClassifier( + distance="dtw", + weights="distance", + distance_params={"window": 0.2}, + n_neighbors=1, + ), + **kwargs, + ) + elif c == "drop3condenser": + from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier + + from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier + from tsml_eval._wip.condensing.drop3 import Drop3Condenser + + return CondenserClassifier( + condenser=Drop3Condenser( + distance="dtw", + distance_params={"window": 0.2}, + num_instances=15, + ), + classifier=KNeighborsTimeSeriesClassifier( + distance="dtw", + weights="distance", + distance_params={"window": 0.2}, + n_neighbors=1, + ), + **kwargs, + ) elif c == "elasticensemble" or c == "ee": from aeon.classification.distance_based import ElasticEnsemble From e30edbe5ad2420e2810d63d49017d7ab4a658819 Mon Sep 17 00:00:00 2001 From: David Guijo-Rubio Date: Fri, 11 Aug 2023 14:56:54 +0200 Subject: [PATCH 12/21] Removes config file UCO --- _uea_experiments/uco/task_gpu_regression.sub | 18 ------------------ 1 file changed, 18 deletions(-) delete mode 100644 _uea_experiments/uco/task_gpu_regression.sub diff --git a/_uea_experiments/uco/task_gpu_regression.sub b/_uea_experiments/uco/task_gpu_regression.sub deleted file mode 100644 index c0c1942f..00000000 --- a/_uea_experiments/uco/task_gpu_regression.sub +++ /dev/null @@ -1,18 +0,0 @@ -executable = ../../tsml_eval/experiments/regression_experiments_condor.py -arguments = $(data_dir) $(results_dir) $(regressor) $(dataset) $(random_state) $(generate_train_files) $(predefined_folds) -getenv = True -output = ../../../TSER_condor_output/$(regressor)/$(dataset)/output_$(random_state).out -error = ../../../TSER_condor_output/$(regressor)/$(dataset)/error_$(random_state).err -log = ../../../TSER_condor_output/$(regressor)/$(dataset)/logs_$(random_state).log -should_transfer_files = NO - -request_memory = 12G -request_CPUs = 2 -request_GPUs = 1 - -#require_gpus = (GlobalMemoryMb >= 15000) -#requirements = (Machine == "srvrrycarn01.priv.uco.es") || (Machine == "srvrrycarn02.priv.uco.es") - -batch_name = $(batchname) - -queue data_dir,results_dir,regressor,dataset,random_state,generate_train_files,predefined_folds,checkpoint,batchname from ./task_params.txt From dc5cc7363fbb92ef4f332769636b1efd123e6677 Mon Sep 17 00:00:00 2001 From: David Guijo-Rubio Date: Sat, 12 Aug 2023 13:21:38 +0200 Subject: [PATCH 13/21] kMedoids condenser and several changes to others --- tsml_eval/_wip/condensing/kMeans.py | 12 +++- tsml_eval/_wip/condensing/kMedoids.py | 77 ++++++++++++++++++++++++ tsml_eval/_wip/condensing/simple_rank.py | 19 +++--- tsml_eval/experiments/set_classifier.py | 30 ++++++--- 4 files changed, 120 insertions(+), 18 deletions(-) create mode 100644 tsml_eval/_wip/condensing/kMedoids.py diff --git a/tsml_eval/_wip/condensing/kMeans.py b/tsml_eval/_wip/condensing/kMeans.py index b5cc6fa8..c11cfd18 100644 --- a/tsml_eval/_wip/condensing/kMeans.py +++ b/tsml_eval/_wip/condensing/kMeans.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import numpy as np from aeon.clustering.k_means import TimeSeriesKMeans from aeon.transformations.collection.base import BaseCollectionTransformer @@ -32,6 +31,7 @@ def __init__( distance="dtw", distance_params=None, num_instances_per_class=1, + random_state=None, ): self.distance = distance @@ -44,16 +44,21 @@ def __init__( self.selected_series = [] self.y_selected_series = [] + self.random_state = random_state + + super(kMeansCondenser, self).__init__() + + def _fit(self, X, y): + self.num_instances_per_class = self.num_instances_per_class * len(np.unique(y)) self.clusterer = TimeSeriesKMeans( n_clusters=self.num_instances_per_class, metric=self.distance, distance_params=self.distance_params, averaging_method="ba", average_params=self.distance_params, + random_state=self.random_state, ) - super(kMeansCondenser, self).__init__() - def _transform(self, X, y): for i in np.unique(y): idxs_class = np.where(y == i) @@ -70,4 +75,5 @@ def _transform(self, X, y): return np.array(self.selected_series), np.array(self.y_selected_series) def _fit_transform(self, X, y): + self._fit(X, y) return self._transform(X, y) diff --git a/tsml_eval/_wip/condensing/kMedoids.py b/tsml_eval/_wip/condensing/kMedoids.py new file mode 100644 index 00000000..12be58d7 --- /dev/null +++ b/tsml_eval/_wip/condensing/kMedoids.py @@ -0,0 +1,77 @@ +import numpy as np +from aeon.clustering.k_medoids import TimeSeriesKMedoids +from aeon.transformations.collection.base import BaseCollectionTransformer + + +class kMedoidsCondenser(BaseCollectionTransformer): + """ + Classifier wrapper for its use with any condensing approach. + + Parameters + ---------- + distance + distance_params + + Examples + -------- + >>> from ... + >>> from ... + """ + + _tags = { + "univariate-only": True, + "fit_is_empty": False, + "X_inner_mtype": ["np-list", "numpy3D"], + "requires_y": True, + "y_inner_mtype": ["numpy1D"], + } + + def __init__( + self, + distance="dtw", + distance_params=None, + num_instances_per_class=1, + random_state=None, + ): + self.distance = distance + + self.distance_params = distance_params + if self.distance_params is None: + self.distance_params = {} + + self.num_instances_per_class = num_instances_per_class + + self.selected_series = [] + self.y_selected_series = [] + + self.random_state = random_state + + super(kMedoidsCondenser, self).__init__() + + def _fit(self, X, y): + self.num_instances_per_class = self.num_instances_per_class * len(np.unique(y)) + self.clusterer = TimeSeriesKMedoids( + n_clusters=self.num_instances_per_class, + distance=self.distance, + distance_params=self.distance_params, + method="pam", + random_state=self.random_state, + ) + + def _transform(self, X, y): + for i in np.unique(y): + idxs_class = np.where(y == i) + self.clusterer.fit(X[idxs_class]) + averaged_series_class_i = self.clusterer.cluster_centers_ + + if len(averaged_series_class_i.shape) == 3: + averaged_series_class_i = np.squeeze(averaged_series_class_i, axis=1) + + self.selected_series.append(averaged_series_class_i) + self.y_selected_series.append(i) + + return np.array(self.selected_series), np.array(self.y_selected_series) + + def _fit_transform(self, X, y): + self._fit(X, y) + return self._transform(X, y) diff --git a/tsml_eval/_wip/condensing/simple_rank.py b/tsml_eval/_wip/condensing/simple_rank.py index 13de0024..db13c16e 100644 --- a/tsml_eval/_wip/condensing/simple_rank.py +++ b/tsml_eval/_wip/condensing/simple_rank.py @@ -1,9 +1,8 @@ -# -*- coding: utf-8 -*- import numpy as np from aeon.distances import get_distance_function from aeon.transformations.collection.base import BaseCollectionTransformer -from tsml_eval.estimators.classification.distance_based import ( +from aeon.classification.distance_based import ( KNeighborsTimeSeriesClassifier, ) @@ -43,14 +42,14 @@ def __init__( self, distance="dtw", distance_params=None, - num_instances_per_class=1, + num_instances=1, ): self.distance = distance self.distance_params = distance_params if self.distance_params is None: self.distance_params = {} - self.num_instances_per_class = num_instances_per_class + self.num_instances = num_instances if isinstance(self.distance, str): self.metric_ = get_distance_function(metric=self.distance) @@ -59,14 +58,17 @@ def __init__( super(SimpleRankCondenser, self).__init__() + def _fit(self, X, y): + # As SR do not separate prototypes per class, the number should be multiplied by + # the number of instances per class of other methods. + num_classes = len(np.unique(y)) + self.num_instances = self.num_instances * num_classes + def _transform(self, X, y): n_samples = X.shape[0] rank = np.zeros(n_samples) distance = np.zeros(n_samples) num_classes = len(np.unique(y)) - # As SR do not separate prototypes per class, the number should be multiplied by - # the number of instances per class of other methods. - self.num_instances_per_class = self.num_instances_per_class * num_classes for i in range(n_samples): X_train = np.delete(X, i, axis=0) @@ -104,11 +106,12 @@ def _transform(self, X, y): ) order = sorted(zip(rank, -np.array(distance), range(n_samples)))[::-1] - self.selected_indices = [x[2] for x in order][: self.num_instances_per_class] + self.selected_indices = [x[2] for x in order][: self.num_instances] condensed_X, condensed_y = X[self.selected_indices], y[self.selected_indices] return condensed_X, condensed_y def _fit_transform(self, X, y): + self._fit(X, y) return self._transform(X, y) diff --git a/tsml_eval/experiments/set_classifier.py b/tsml_eval/experiments/set_classifier.py index 1353b0f2..2ba1acfe 100644 --- a/tsml_eval/experiments/set_classifier.py +++ b/tsml_eval/experiments/set_classifier.py @@ -53,6 +53,7 @@ "3-condensed-1nn-twe", "SimpleRankCondenser", "kMeansCondenser", + "kMedoidsCondenser", "Drop1Condenser", "Drop2Condenser", "Drop3Condenser", @@ -484,17 +485,14 @@ def _set_classifier_distance_based( condenser=SimpleRankCondenser( distance="dtw", distance_params={"window": 0.2}, - num_instances_per_class=5, + num_instances=5, ), - distance="dtw", - distance_params={"window": 0.2}, classifier=KNeighborsTimeSeriesClassifier( distance="dtw", weights="distance", distance_params={"window": 0.2}, n_neighbors=1, ), - num_instances_per_class=1, **kwargs, ) elif c == "kmeanscondenser": @@ -509,15 +507,33 @@ def _set_classifier_distance_based( distance_params={"window": 0.2}, num_instances_per_class=1, ), - distance="dtw", - distance_params={"window": 0.2}, classifier=KNeighborsTimeSeriesClassifier( distance="dtw", weights="distance", distance_params={"window": 0.2}, n_neighbors=1, ), - num_instances_per_class=1, + **kwargs, + ) + elif c == "kmedoidscondenser": + from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier + + from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier + from tsml_eval._wip.condensing.kMedoids import kMedoidsCondenser + + return CondenserClassifier( + condenser=kMedoidsCondenser( + distance="dtw", + distance_params={"window": 0.2}, + num_instances_per_class=1, + random_state=random_state, + ), + classifier=KNeighborsTimeSeriesClassifier( + distance="dtw", + weights="distance", + distance_params={"window": 0.2}, + n_neighbors=1, + ), **kwargs, ) elif c == "drop1condenser": From 8a797933dfa927173d2c2aa2970d437aeeab170e Mon Sep 17 00:00:00 2001 From: David Guijo-Rubio Date: Wed, 30 Aug 2023 16:18:05 +0200 Subject: [PATCH 14/21] kmeans improvements --- .../_wip/condensing/condensing_classifier.py | 5 +- tsml_eval/_wip/condensing/kMeans.py | 21 +- tsml_eval/experiments/set_classifier.py | 266 +++++++++++++----- 3 files changed, 213 insertions(+), 79 deletions(-) diff --git a/tsml_eval/_wip/condensing/condensing_classifier.py b/tsml_eval/_wip/condensing/condensing_classifier.py index 1382bd93..db1d1826 100644 --- a/tsml_eval/_wip/condensing/condensing_classifier.py +++ b/tsml_eval/_wip/condensing/condensing_classifier.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- from aeon.classification.base import BaseClassifier @@ -30,6 +29,7 @@ def __init__( distance_params=None, classifier=None, num_instances=1, + random_state=None, ): self.distance = distance @@ -39,6 +39,8 @@ def __init__( self.num_instances = num_instances + self.random_state = random_state + self.condenser = condenser if self.condenser is None: from tsml_eval._wip.condensing.kMeans import kMeansCondenser @@ -47,6 +49,7 @@ def __init__( distance=self.distance, distance_params=self.distance_params, num_instances=self.num_instances, + random_state=self.random_state, ) self.classifier = classifier diff --git a/tsml_eval/_wip/condensing/kMeans.py b/tsml_eval/_wip/condensing/kMeans.py index c11cfd18..aab2c46f 100644 --- a/tsml_eval/_wip/condensing/kMeans.py +++ b/tsml_eval/_wip/condensing/kMeans.py @@ -20,7 +20,7 @@ class kMeansCondenser(BaseCollectionTransformer): _tags = { "univariate-only": True, - "fit_is_empty": False, + "fit_is_empty": True, "X_inner_mtype": ["np-list", "numpy3D"], "requires_y": True, "y_inner_mtype": ["numpy1D"], @@ -41,15 +41,11 @@ def __init__( self.num_instances_per_class = num_instances_per_class - self.selected_series = [] + self.selected_series = np.array([]) self.y_selected_series = [] self.random_state = random_state - super(kMeansCondenser, self).__init__() - - def _fit(self, X, y): - self.num_instances_per_class = self.num_instances_per_class * len(np.unique(y)) self.clusterer = TimeSeriesKMeans( n_clusters=self.num_instances_per_class, metric=self.distance, @@ -59,21 +55,24 @@ def _fit(self, X, y): random_state=self.random_state, ) + super(kMeansCondenser, self).__init__() + def _transform(self, X, y): + self.selected_series = self.selected_series.reshape(0, *X.shape[1:]) + for i in np.unique(y): idxs_class = np.where(y == i) self.clusterer.fit(X[idxs_class]) averaged_series_class_i = self.clusterer.cluster_centers_ - if len(averaged_series_class_i.shape) == 3: - averaged_series_class_i = np.squeeze(averaged_series_class_i, axis=1) + self.selected_series = np.concatenate( + (self.selected_series, averaged_series_class_i), axis=0 + ) - self.selected_series.append(averaged_series_class_i) - self.y_selected_series.append(i) + self.y_selected_series.extend([i] * self.num_instances_per_class) return np.array(self.selected_series), np.array(self.y_selected_series) def _fit_transform(self, X, y): - self._fit(X, y) return self._transform(X, y) diff --git a/tsml_eval/experiments/set_classifier.py b/tsml_eval/experiments/set_classifier.py index 2ba1acfe..cb37ee48 100644 --- a/tsml_eval/experiments/set_classifier.py +++ b/tsml_eval/experiments/set_classifier.py @@ -42,15 +42,15 @@ ["msm", "1nn-msm"], ["twe", "1nn-twe"], "1nn-dtw-cv", - "1-condensed-1nn-msm", - "1-condensed-1nn-dtw", - "1-condensed-1nn-twe", - "2-condensed-1nn-msm", - "2-condensed-1nn-dtw", - "2-condensed-1nn-twe", - "3-condensed-1nn-msm", - "3-condensed-1nn-dtw", - "3-condensed-1nn-twe", + "kMeansCondenser-dtw-1", + "kMeansCondenser-dtw-2", + "kMeansCondenser-dtw-3", + "kMeansCondenser-twe-1", + "kMeansCondenser-twe-2", + "kMeansCondenser-twe-3", + "kMeansCondenser-msm-1", + "kMeansCondenser-msm-2", + "kMeansCondenser-msm-3", "SimpleRankCondenser", "kMeansCondenser", "kMedoidsCondenser", @@ -409,93 +409,155 @@ def _set_classifier_distance_based( from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier return KNeighborsTimeSeriesClassifier(distance="twe", n_jobs=n_jobs, **kwargs) - elif c == "1-condensed-1nn-dtw": - from tsml_eval._wip.condensing.wrapper import WrapperBA + elif c == "simplerankcondenser": + from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier + + from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier + from tsml_eval._wip.condensing.simple_rank import SimpleRankCondenser - return WrapperBA( - metric="dtw", - num_instances_per_class=1, - metric_params={"window": 0.2}, + return CondenserClassifier( + condenser=SimpleRankCondenser( + distance="dtw", + distance_params={"window": 0.2}, + num_instances=5, + ), + classifier=KNeighborsTimeSeriesClassifier( + distance="dtw", + weights="distance", + distance_params={"window": 0.2}, + n_neighbors=1, + ), **kwargs, ) - elif c == "1-condensed-1nn-msm": - from tsml_eval._wip.condensing.wrapper import WrapperBA + elif c == "kmeanscondenser": + from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier - return WrapperBA(metric="msm", num_instances_per_class=1, **kwargs) - elif c == "1-condensed-1nn-twe": - from tsml_eval._wip.condensing.wrapper import WrapperBA + from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier + from tsml_eval._wip.condensing.kMeans import kMeansCondenser - return WrapperBA( - metric="twe", - num_instances_per_class=1, - metric_params={"nu": 0.05}, + return CondenserClassifier( + condenser=kMeansCondenser( + distance="dtw", + distance_params={"window": 0.2}, + num_instances_per_class=3, + random_state=random_state, + ), + classifier=KNeighborsTimeSeriesClassifier( + distance="dtw", + weights="distance", + distance_params={"window": 0.2}, + n_neighbors=1, + ), + random_state=random_state, **kwargs, ) - elif c == "2-condensed-1nn-dtw": - from tsml_eval._wip.condensing.wrapper import WrapperBA + elif c == "kmeanscondenser-dtw-1": + from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier + + from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier + from tsml_eval._wip.condensing.kMeans import kMeansCondenser - return WrapperBA( - metric="dtw", - num_instances_per_class=2, - metric_params={"window": 0.2}, + return CondenserClassifier( + condenser=kMeansCondenser( + distance="dtw", + distance_params={"window": 0.2}, + num_instances_per_class=1, + random_state=random_state, + ), + classifier=KNeighborsTimeSeriesClassifier( + distance="dtw", + distance_params={"window": 0.2}, + weights="distance", + n_neighbors=1, + ), + random_state=random_state, **kwargs, ) - elif c == "2-condensed-1nn-msm": - from tsml_eval._wip.condensing.wrapper import WrapperBA + elif c == "kmeanscondenser-dtw-2": + from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier - return WrapperBA(metric="msm", num_instances_per_class=2, **kwargs) - elif c == "2-condensed-1nn-twe": - from tsml_eval._wip.condensing.wrapper import WrapperBA + from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier + from tsml_eval._wip.condensing.kMeans import kMeansCondenser - return WrapperBA( - metric="twe", - num_instances_per_class=2, - metric_params={"nu": 0.05}, + return CondenserClassifier( + condenser=kMeansCondenser( + distance="dtw", + distance_params={"window": 0.2}, + num_instances_per_class=2, + random_state=random_state, + ), + classifier=KNeighborsTimeSeriesClassifier( + distance="dtw", + distance_params={"window": 0.2}, + weights="distance", + n_neighbors=1, + ), + random_state=random_state, **kwargs, ) - elif c == "3-condensed-1nn-dtw": - from tsml_eval._wip.condensing.wrapper import WrapperBA + elif c == "kmeanscondenser-dtw-3": + from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier - return WrapperBA( - metric="dtw", - num_instances_per_class=3, - metric_params={"window": 0.2}, + from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier + from tsml_eval._wip.condensing.kMeans import kMeansCondenser + + return CondenserClassifier( + condenser=kMeansCondenser( + distance="dtw", + distance_params={"window": 0.2}, + num_instances_per_class=3, + random_state=random_state, + ), + classifier=KNeighborsTimeSeriesClassifier( + distance="dtw", + distance_params={"window": 0.2}, + weights="distance", + n_neighbors=1, + ), + random_state=random_state, **kwargs, ) - elif c == "3-condensed-1nn-msm": - from tsml_eval._wip.condensing.wrapper import WrapperBA + elif c == "kmeanscondenser-msm-1": + from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier - return WrapperBA(metric="msm", num_instances_per_class=3, **kwargs) - elif c == "3-condensed-1nn-twe": - from tsml_eval._wip.condensing.wrapper import WrapperBA + from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier + from tsml_eval._wip.condensing.kMeans import kMeansCondenser - return WrapperBA( - metric="twe", - num_instances_per_class=3, - metric_params={"nu": 0.05}, + return CondenserClassifier( + condenser=kMeansCondenser( + distance="msm", + num_instances_per_class=1, + random_state=random_state, + ), + classifier=KNeighborsTimeSeriesClassifier( + distance="msm", + weights="distance", + n_neighbors=1, + ), + random_state=random_state, **kwargs, ) - elif c == "simplerankcondenser": + elif c == "kmeanscondenser-msm-2": from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier - from tsml_eval._wip.condensing.simple_rank import SimpleRankCondenser + from tsml_eval._wip.condensing.kMeans import kMeansCondenser return CondenserClassifier( - condenser=SimpleRankCondenser( - distance="dtw", - distance_params={"window": 0.2}, - num_instances=5, + condenser=kMeansCondenser( + distance="msm", + num_instances_per_class=2, + random_state=random_state, ), classifier=KNeighborsTimeSeriesClassifier( - distance="dtw", + distance="msm", weights="distance", - distance_params={"window": 0.2}, n_neighbors=1, ), + random_state=random_state, **kwargs, ) - elif c == "kmeanscondenser": + elif c == "kmeanscondenser-msm-3": from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier @@ -503,16 +565,82 @@ def _set_classifier_distance_based( return CondenserClassifier( condenser=kMeansCondenser( - distance="dtw", - distance_params={"window": 0.2}, + distance="msm", + num_instances_per_class=3, + random_state=random_state, + ), + classifier=KNeighborsTimeSeriesClassifier( + distance="msm", + weights="distance", + n_neighbors=1, + ), + random_state=random_state, + **kwargs, + ) + elif c == "kmeanscondenser-twe-1": + from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier + + from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier + from tsml_eval._wip.condensing.kMeans import kMeansCondenser + + return CondenserClassifier( + condenser=kMeansCondenser( + distance="twe", + distance_params={"nu": 0.001}, num_instances_per_class=1, + random_state=random_state, ), classifier=KNeighborsTimeSeriesClassifier( - distance="dtw", + distance="twe", + distance_params={"nu": 0.001}, + weights="distance", + n_neighbors=1, + ), + random_state=random_state, + **kwargs, + ) + elif c == "kmeanscondenser-twe-2": + from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier + + from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier + from tsml_eval._wip.condensing.kMeans import kMeansCondenser + + return CondenserClassifier( + condenser=kMeansCondenser( + distance="twe", + distance_params={"nu": 0.001}, + num_instances_per_class=2, + random_state=random_state, + ), + classifier=KNeighborsTimeSeriesClassifier( + distance="twe", + distance_params={"nu": 0.001}, weights="distance", - distance_params={"window": 0.2}, n_neighbors=1, ), + random_state=random_state, + **kwargs, + ) + elif c == "kmeanscondenser-twe-3": + from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier + + from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier + from tsml_eval._wip.condensing.kMeans import kMeansCondenser + + return CondenserClassifier( + condenser=kMeansCondenser( + distance="twe", + distance_params={"nu": 0.001}, + num_instances_per_class=3, + random_state=random_state, + ), + classifier=KNeighborsTimeSeriesClassifier( + distance="twe", + distance_params={"nu": 0.001}, + weights="distance", + n_neighbors=1, + ), + random_state=random_state, **kwargs, ) elif c == "kmedoidscondenser": @@ -534,6 +662,7 @@ def _set_classifier_distance_based( distance_params={"window": 0.2}, n_neighbors=1, ), + random_state=random_state, **kwargs, ) elif c == "drop1condenser": @@ -554,6 +683,7 @@ def _set_classifier_distance_based( distance_params={"window": 0.2}, n_neighbors=1, ), + random_state=random_state, **kwargs, ) elif c == "drop2condenser": @@ -574,6 +704,7 @@ def _set_classifier_distance_based( distance_params={"window": 0.2}, n_neighbors=1, ), + random_state=random_state, **kwargs, ) elif c == "drop3condenser": @@ -594,6 +725,7 @@ def _set_classifier_distance_based( distance_params={"window": 0.2}, n_neighbors=1, ), + random_state=random_state, **kwargs, ) elif c == "elasticensemble" or c == "ee": From 8a62a3f87a1fbff28b65424730325f6de34d7ac4 Mon Sep 17 00:00:00 2001 From: David Guijo-Rubio Date: Fri, 1 Sep 2023 20:14:13 +0200 Subject: [PATCH 15/21] Removing duplicated code --- .../{kMeans.py => clustering_condenser.py} | 51 +++- .../_wip/condensing/condensing_classifier.py | 17 +- tsml_eval/_wip/condensing/kMedoids.py | 77 ----- tsml_eval/experiments/set_classifier.py | 262 ++++-------------- 4 files changed, 106 insertions(+), 301 deletions(-) rename tsml_eval/_wip/condensing/{kMeans.py => clustering_condenser.py} (52%) delete mode 100644 tsml_eval/_wip/condensing/kMedoids.py diff --git a/tsml_eval/_wip/condensing/kMeans.py b/tsml_eval/_wip/condensing/clustering_condenser.py similarity index 52% rename from tsml_eval/_wip/condensing/kMeans.py rename to tsml_eval/_wip/condensing/clustering_condenser.py index aab2c46f..f15c71a4 100644 --- a/tsml_eval/_wip/condensing/kMeans.py +++ b/tsml_eval/_wip/condensing/clustering_condenser.py @@ -1,9 +1,8 @@ import numpy as np -from aeon.clustering.k_means import TimeSeriesKMeans from aeon.transformations.collection.base import BaseCollectionTransformer -class kMeansCondenser(BaseCollectionTransformer): +class ClusteringCondenser(BaseCollectionTransformer): """ Classifier wrapper for its use with any condensing approach. @@ -20,7 +19,7 @@ class kMeansCondenser(BaseCollectionTransformer): _tags = { "univariate-only": True, - "fit_is_empty": True, + "fit_is_empty": False, "X_inner_mtype": ["np-list", "numpy3D"], "requires_y": True, "y_inner_mtype": ["numpy1D"], @@ -28,6 +27,7 @@ class kMeansCondenser(BaseCollectionTransformer): def __init__( self, + clustering_approach=None, distance="dtw", distance_params=None, num_instances_per_class=1, @@ -46,16 +46,42 @@ def __init__( self.random_state = random_state - self.clusterer = TimeSeriesKMeans( - n_clusters=self.num_instances_per_class, - metric=self.distance, - distance_params=self.distance_params, - averaging_method="ba", - average_params=self.distance_params, - random_state=self.random_state, - ) + self.clustering_approach = clustering_approach + self.clusterer = None - super(kMeansCondenser, self).__init__() + super(ClusteringCondenser, self).__init__() + + def _fit(self, X, y): + self.num_instances_per_class = len(np.unique(y)) * self.num_instances_per_class + + if self.clustering_approach == "pam": + from aeon.clustering.k_medoids import TimeSeriesKMedoids + + self.clusterer = TimeSeriesKMedoids( + n_clusters=self.num_instances_per_class, + method="pam", + init_algorithm="random", + distance=self.distance, + distance_params=self.distance_params, + random_state=self.random_state, + ) + + elif self.clustering_approach == "kmeans" or self.clustering_approach is None: + from aeon.clustering.k_means import TimeSeriesKMeans + + self.average_params = { + "metric": self.distance, + **self.distance_params.copy(), + } + + self.clusterer = TimeSeriesKMeans( + n_clusters=self.num_instances_per_class, + metric=self.distance, + distance_params=self.distance_params, + averaging_method="ba", + average_params=self.average_params, + random_state=self.random_state, + ) def _transform(self, X, y): self.selected_series = self.selected_series.reshape(0, *X.shape[1:]) @@ -75,4 +101,5 @@ def _transform(self, X, y): return np.array(self.selected_series), np.array(self.y_selected_series) def _fit_transform(self, X, y): + self._fit(X, y) return self._transform(X, y) diff --git a/tsml_eval/_wip/condensing/condensing_classifier.py b/tsml_eval/_wip/condensing/condensing_classifier.py index db1d1826..32d47b6b 100644 --- a/tsml_eval/_wip/condensing/condensing_classifier.py +++ b/tsml_eval/_wip/condensing/condensing_classifier.py @@ -43,13 +43,18 @@ def __init__( self.condenser = condenser if self.condenser is None: - from tsml_eval._wip.condensing.kMeans import kMeansCondenser + from tsml_eval._wip.condensing.clustering_condenser import ( + ClusteringCondenser, + ) - self.condenser = kMeansCondenser( - distance=self.distance, - distance_params=self.distance_params, - num_instances=self.num_instances, - random_state=self.random_state, + self.condenser = ( + ClusteringCondenser( + clustering_approach="kmeans", + distance=self.distance, + distance_params=self.distance_params, + num_instances_per_class=self.num_instances, + random_state=self.random_state, + ), ) self.classifier = classifier diff --git a/tsml_eval/_wip/condensing/kMedoids.py b/tsml_eval/_wip/condensing/kMedoids.py deleted file mode 100644 index 12be58d7..00000000 --- a/tsml_eval/_wip/condensing/kMedoids.py +++ /dev/null @@ -1,77 +0,0 @@ -import numpy as np -from aeon.clustering.k_medoids import TimeSeriesKMedoids -from aeon.transformations.collection.base import BaseCollectionTransformer - - -class kMedoidsCondenser(BaseCollectionTransformer): - """ - Classifier wrapper for its use with any condensing approach. - - Parameters - ---------- - distance - distance_params - - Examples - -------- - >>> from ... - >>> from ... - """ - - _tags = { - "univariate-only": True, - "fit_is_empty": False, - "X_inner_mtype": ["np-list", "numpy3D"], - "requires_y": True, - "y_inner_mtype": ["numpy1D"], - } - - def __init__( - self, - distance="dtw", - distance_params=None, - num_instances_per_class=1, - random_state=None, - ): - self.distance = distance - - self.distance_params = distance_params - if self.distance_params is None: - self.distance_params = {} - - self.num_instances_per_class = num_instances_per_class - - self.selected_series = [] - self.y_selected_series = [] - - self.random_state = random_state - - super(kMedoidsCondenser, self).__init__() - - def _fit(self, X, y): - self.num_instances_per_class = self.num_instances_per_class * len(np.unique(y)) - self.clusterer = TimeSeriesKMedoids( - n_clusters=self.num_instances_per_class, - distance=self.distance, - distance_params=self.distance_params, - method="pam", - random_state=self.random_state, - ) - - def _transform(self, X, y): - for i in np.unique(y): - idxs_class = np.where(y == i) - self.clusterer.fit(X[idxs_class]) - averaged_series_class_i = self.clusterer.cluster_centers_ - - if len(averaged_series_class_i.shape) == 3: - averaged_series_class_i = np.squeeze(averaged_series_class_i, axis=1) - - self.selected_series.append(averaged_series_class_i) - self.y_selected_series.append(i) - - return np.array(self.selected_series), np.array(self.y_selected_series) - - def _fit_transform(self, X, y): - self._fit(X, y) - return self._transform(X, y) diff --git a/tsml_eval/experiments/set_classifier.py b/tsml_eval/experiments/set_classifier.py index cb37ee48..49097348 100644 --- a/tsml_eval/experiments/set_classifier.py +++ b/tsml_eval/experiments/set_classifier.py @@ -52,8 +52,15 @@ "kMeansCondenser-msm-2", "kMeansCondenser-msm-3", "SimpleRankCondenser", - "kMeansCondenser", - "kMedoidsCondenser", + "PAMCondenser-dtw-1", + "PAMCondenser-dtw-2", + "PAMCondenser-dtw-3", + "PAMCondenser-twe-1", + "PAMCondenser-twe-2", + "PAMCondenser-twe-3", + "PAMCondenser-msm-1", + "PAMCondenser-msm-2", + "PAMCondenser-msm-3", "Drop1Condenser", "Drop2Condenser", "Drop3Condenser", @@ -394,7 +401,13 @@ def _set_classifier_distance_based( if c == "kneighborstimeseriesclassifier" or c == "dtw" or c == "1nn-dtw": from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier - return KNeighborsTimeSeriesClassifier(distance="dtw", n_jobs=n_jobs, **kwargs) + return KNeighborsTimeSeriesClassifier( + distance="dtw", + distance_params={"window": 0.2}, + weights="distance", + n_jobs=n_jobs, + **kwargs, + ) elif c == "ed" or c == "1nn-euclidean" or c == "1nn-ed": from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier @@ -404,11 +417,15 @@ def _set_classifier_distance_based( elif c == "msm" or c == "1nn-msm": from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier - return KNeighborsTimeSeriesClassifier(distance="msm", n_jobs=n_jobs, **kwargs) + return KNeighborsTimeSeriesClassifier( + distance="msm", n_jobs=n_jobs, weights="distance", **kwargs + ) elif c == "twe" or c == "1nn-twe": from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier - return KNeighborsTimeSeriesClassifier(distance="twe", n_jobs=n_jobs, **kwargs) + return KNeighborsTimeSeriesClassifier( + distance="twe", distance_params={"nu": 0.001}, n_jobs=n_jobs, **kwargs + ) elif c == "simplerankcondenser": from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier @@ -429,242 +446,75 @@ def _set_classifier_distance_based( ), **kwargs, ) - elif c == "kmeanscondenser": - from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier - from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier - from tsml_eval._wip.condensing.kMeans import kMeansCondenser - - return CondenserClassifier( - condenser=kMeansCondenser( - distance="dtw", - distance_params={"window": 0.2}, - num_instances_per_class=3, - random_state=random_state, - ), - classifier=KNeighborsTimeSeriesClassifier( - distance="dtw", - weights="distance", - distance_params={"window": 0.2}, - n_neighbors=1, - ), - random_state=random_state, - **kwargs, - ) - elif c == "kmeanscondenser-dtw-1": + elif c.startswith("kmeanscondenser"): from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier + from tsml_eval._wip.condensing.clustering_condenser import ClusteringCondenser from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier - from tsml_eval._wip.condensing.kMeans import kMeansCondenser - return CondenserClassifier( - condenser=kMeansCondenser( - distance="dtw", - distance_params={"window": 0.2}, - num_instances_per_class=1, - random_state=random_state, - ), - classifier=KNeighborsTimeSeriesClassifier( - distance="dtw", - distance_params={"window": 0.2}, - weights="distance", - n_neighbors=1, - ), - random_state=random_state, - **kwargs, - ) - elif c == "kmeanscondenser-dtw-2": - from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier + distance = c.split("-")[1] + num_instances_per_class = int(c.split("-")[2]) - from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier - from tsml_eval._wip.condensing.kMeans import kMeansCondenser + if distance == "dtw": + distance_params = {"window": 0.2} + elif distance == "msm": + distance_params = {} + elif distance == "twe": + distance_params = {"nu": 0.001} return CondenserClassifier( - condenser=kMeansCondenser( - distance="dtw", - distance_params={"window": 0.2}, - num_instances_per_class=2, + condenser=ClusteringCondenser( + clustering_approach="kmeans", + distance=distance, + distance_params=distance_params, + num_instances_per_class=num_instances_per_class, random_state=random_state, ), classifier=KNeighborsTimeSeriesClassifier( - distance="dtw", - distance_params={"window": 0.2}, + distance=distance, + distance_params=distance_params, weights="distance", n_neighbors=1, ), random_state=random_state, **kwargs, ) - elif c == "kmeanscondenser-dtw-3": - from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier - from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier - from tsml_eval._wip.condensing.kMeans import kMeansCondenser - - return CondenserClassifier( - condenser=kMeansCondenser( - distance="dtw", - distance_params={"window": 0.2}, - num_instances_per_class=3, - random_state=random_state, - ), - classifier=KNeighborsTimeSeriesClassifier( - distance="dtw", - distance_params={"window": 0.2}, - weights="distance", - n_neighbors=1, - ), - random_state=random_state, - **kwargs, - ) - elif c == "kmeanscondenser-msm-1": + elif c.startswith("pamcondenser"): from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier + from tsml_eval._wip.condensing.clustering_condenser import ClusteringCondenser from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier - from tsml_eval._wip.condensing.kMeans import kMeansCondenser - return CondenserClassifier( - condenser=kMeansCondenser( - distance="msm", - num_instances_per_class=1, - random_state=random_state, - ), - classifier=KNeighborsTimeSeriesClassifier( - distance="msm", - weights="distance", - n_neighbors=1, - ), - random_state=random_state, - **kwargs, - ) - elif c == "kmeanscondenser-msm-2": - from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier + distance = c.split("-")[1] + num_instances_per_class = int(c.split("-")[2]) - from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier - from tsml_eval._wip.condensing.kMeans import kMeansCondenser + if distance == "dtw": + distance_params = {"window": 0.2} + elif distance == "msm": + distance_params = {} + elif distance == "twe": + distance_params = {"nu": 0.001} return CondenserClassifier( - condenser=kMeansCondenser( - distance="msm", - num_instances_per_class=2, + condenser=ClusteringCondenser( + clustering_approach="pam", + distance=distance, + distance_params=distance_params, + num_instances_per_class=num_instances_per_class, random_state=random_state, ), classifier=KNeighborsTimeSeriesClassifier( - distance="msm", + distance=distance, + distance_params=distance_params, weights="distance", n_neighbors=1, ), random_state=random_state, **kwargs, ) - elif c == "kmeanscondenser-msm-3": - from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier - - from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier - from tsml_eval._wip.condensing.kMeans import kMeansCondenser - return CondenserClassifier( - condenser=kMeansCondenser( - distance="msm", - num_instances_per_class=3, - random_state=random_state, - ), - classifier=KNeighborsTimeSeriesClassifier( - distance="msm", - weights="distance", - n_neighbors=1, - ), - random_state=random_state, - **kwargs, - ) - elif c == "kmeanscondenser-twe-1": - from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier - - from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier - from tsml_eval._wip.condensing.kMeans import kMeansCondenser - - return CondenserClassifier( - condenser=kMeansCondenser( - distance="twe", - distance_params={"nu": 0.001}, - num_instances_per_class=1, - random_state=random_state, - ), - classifier=KNeighborsTimeSeriesClassifier( - distance="twe", - distance_params={"nu": 0.001}, - weights="distance", - n_neighbors=1, - ), - random_state=random_state, - **kwargs, - ) - elif c == "kmeanscondenser-twe-2": - from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier - - from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier - from tsml_eval._wip.condensing.kMeans import kMeansCondenser - - return CondenserClassifier( - condenser=kMeansCondenser( - distance="twe", - distance_params={"nu": 0.001}, - num_instances_per_class=2, - random_state=random_state, - ), - classifier=KNeighborsTimeSeriesClassifier( - distance="twe", - distance_params={"nu": 0.001}, - weights="distance", - n_neighbors=1, - ), - random_state=random_state, - **kwargs, - ) - elif c == "kmeanscondenser-twe-3": - from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier - - from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier - from tsml_eval._wip.condensing.kMeans import kMeansCondenser - - return CondenserClassifier( - condenser=kMeansCondenser( - distance="twe", - distance_params={"nu": 0.001}, - num_instances_per_class=3, - random_state=random_state, - ), - classifier=KNeighborsTimeSeriesClassifier( - distance="twe", - distance_params={"nu": 0.001}, - weights="distance", - n_neighbors=1, - ), - random_state=random_state, - **kwargs, - ) - elif c == "kmedoidscondenser": - from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier - - from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier - from tsml_eval._wip.condensing.kMedoids import kMedoidsCondenser - - return CondenserClassifier( - condenser=kMedoidsCondenser( - distance="dtw", - distance_params={"window": 0.2}, - num_instances_per_class=1, - random_state=random_state, - ), - classifier=KNeighborsTimeSeriesClassifier( - distance="dtw", - weights="distance", - distance_params={"window": 0.2}, - n_neighbors=1, - ), - random_state=random_state, - **kwargs, - ) elif c == "drop1condenser": from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier From 89f8dabd4776bde6acff7bea5e58d11f02e180e5 Mon Sep 17 00:00:00 2001 From: David Guijo-Rubio Date: Tue, 5 Sep 2023 18:44:26 +0200 Subject: [PATCH 16/21] Improvements to clustering condenser method --- .../_wip/condensing/clustering_condenser.py | 46 ++++++++++++++----- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/tsml_eval/_wip/condensing/clustering_condenser.py b/tsml_eval/_wip/condensing/clustering_condenser.py index f15c71a4..65a8e68b 100644 --- a/tsml_eval/_wip/condensing/clustering_condenser.py +++ b/tsml_eval/_wip/condensing/clustering_condenser.py @@ -19,7 +19,7 @@ class ClusteringCondenser(BaseCollectionTransformer): _tags = { "univariate-only": True, - "fit_is_empty": False, + "fit_is_empty": True, "X_inner_mtype": ["np-list", "numpy3D"], "requires_y": True, "y_inner_mtype": ["numpy1D"], @@ -47,13 +47,6 @@ def __init__( self.random_state = random_state self.clustering_approach = clustering_approach - self.clusterer = None - - super(ClusteringCondenser, self).__init__() - - def _fit(self, X, y): - self.num_instances_per_class = len(np.unique(y)) * self.num_instances_per_class - if self.clustering_approach == "pam": from aeon.clustering.k_medoids import TimeSeriesKMedoids @@ -83,14 +76,44 @@ def _fit(self, X, y): random_state=self.random_state, ) + super(ClusteringCondenser, self).__init__() + def _transform(self, X, y): self.selected_series = self.selected_series.reshape(0, *X.shape[1:]) for i in np.unique(y): idxs_class = np.where(y == i) - - self.clusterer.fit(X[idxs_class]) - averaged_series_class_i = self.clusterer.cluster_centers_ + X_i = X[idxs_class] + + # in case of self.num_instances_per_class == 1, does not make sense to run + # the approaches. + if self.num_instances_per_class == 1: + if self.clustering_approach == "pam": + from aeon.clustering.metrics.medoids import medoids + + averaged_series_class_i = [ + medoids( + X_i, + distance=self.distance, + **self.distance_params, + ) + ] + elif self.clustering_approach == "kmeans": + from aeon.clustering.metrics.averaging import ( + elastic_barycenter_average, + ) + + averaged_series_class_i = [ + elastic_barycenter_average( + X_i, + metric=self.distance, + **self.distance_params, + ) + ] + # for self.num_instances_per_class > 1. + else: + self.clusterer.fit(X_i) + averaged_series_class_i = self.clusterer.cluster_centers_ self.selected_series = np.concatenate( (self.selected_series, averaged_series_class_i), axis=0 @@ -101,5 +124,4 @@ def _transform(self, X, y): return np.array(self.selected_series), np.array(self.y_selected_series) def _fit_transform(self, X, y): - self._fit(X, y) return self._transform(X, y) From fda86af5eac0a5a4a5a5c7eff237bc7332942eb4 Mon Sep 17 00:00:00 2001 From: David Guijo-Rubio <47889499+dguijo@users.noreply.github.com> Date: Wed, 9 Oct 2024 13:32:33 +0200 Subject: [PATCH 17/21] Modifies paths --- tsml_eval/_wip/condensing/clustering_condenser.py | 6 +++--- tsml_eval/_wip/condensing/wrapper.py | 5 ++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/tsml_eval/_wip/condensing/clustering_condenser.py b/tsml_eval/_wip/condensing/clustering_condenser.py index 65a8e68b..34ca0969 100644 --- a/tsml_eval/_wip/condensing/clustering_condenser.py +++ b/tsml_eval/_wip/condensing/clustering_condenser.py @@ -48,7 +48,7 @@ def __init__( self.clustering_approach = clustering_approach if self.clustering_approach == "pam": - from aeon.clustering.k_medoids import TimeSeriesKMedoids + from aeon.clustering import TimeSeriesKMedoids self.clusterer = TimeSeriesKMedoids( n_clusters=self.num_instances_per_class, @@ -60,7 +60,7 @@ def __init__( ) elif self.clustering_approach == "kmeans" or self.clustering_approach is None: - from aeon.clustering.k_means import TimeSeriesKMeans + from aeon.clustering import TimeSeriesKMeans self.average_params = { "metric": self.distance, @@ -76,7 +76,7 @@ def __init__( random_state=self.random_state, ) - super(ClusteringCondenser, self).__init__() + super().__init__() def _transform(self, X, y): self.selected_series = self.selected_series.reshape(0, *X.shape[1:]) diff --git a/tsml_eval/_wip/condensing/wrapper.py b/tsml_eval/_wip/condensing/wrapper.py index f2b6ca3c..7b5381dc 100644 --- a/tsml_eval/_wip/condensing/wrapper.py +++ b/tsml_eval/_wip/condensing/wrapper.py @@ -1,8 +1,7 @@ -# -*- coding: utf-8 -*- import numpy as np from aeon.classification.base import BaseClassifier from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier -from aeon.clustering.k_means import TimeSeriesKMeans +from aeon.clustering import TimeSeriesKMeans class WrapperBA(BaseClassifier): @@ -60,7 +59,7 @@ def __init__( average_params=self.metric_params, ) - super(WrapperBA, self).__init__() + super().__init__() def _fit(self, X, y): for i in np.unique(y): From ef16f93beff9bf473345a0936346204e25745b2d Mon Sep 17 00:00:00 2001 From: David Guijo-Rubio <47889499+dguijo@users.noreply.github.com> Date: Wed, 9 Oct 2024 13:57:32 +0200 Subject: [PATCH 18/21] modify to use aeon new clustering module --- tsml_eval/_wip/condensing/clustering_condenser.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/tsml_eval/_wip/condensing/clustering_condenser.py b/tsml_eval/_wip/condensing/clustering_condenser.py index 34ca0969..f8de1bc7 100644 --- a/tsml_eval/_wip/condensing/clustering_condenser.py +++ b/tsml_eval/_wip/condensing/clustering_condenser.py @@ -63,13 +63,13 @@ def __init__( from aeon.clustering import TimeSeriesKMeans self.average_params = { - "metric": self.distance, + "distance": self.distance, **self.distance_params.copy(), } self.clusterer = TimeSeriesKMeans( n_clusters=self.num_instances_per_class, - metric=self.distance, + distance=self.distance, distance_params=self.distance_params, averaging_method="ba", average_params=self.average_params, @@ -89,24 +89,22 @@ def _transform(self, X, y): # the approaches. if self.num_instances_per_class == 1: if self.clustering_approach == "pam": - from aeon.clustering.metrics.medoids import medoids + from aeon.clustering.averaging._ba_utils import _medoids averaged_series_class_i = [ - medoids( + _medoids( X_i, distance=self.distance, **self.distance_params, ) ] elif self.clustering_approach == "kmeans": - from aeon.clustering.metrics.averaging import ( - elastic_barycenter_average, - ) + from aeon.clustering.averaging import elastic_barycenter_average averaged_series_class_i = [ elastic_barycenter_average( X_i, - metric=self.distance, + distance=self.distance, **self.distance_params, ) ] From 680cf313f5bdc26e668330e13e419eb3b7255dbb Mon Sep 17 00:00:00 2001 From: David Guijo-Rubio <47889499+dguijo@users.noreply.github.com> Date: Wed, 9 Oct 2024 15:10:17 +0200 Subject: [PATCH 19/21] Adding BaseCondenser case --- tsml_eval/_wip/condensing/__init__.py | 0 .../_wip/condensing/condensing_classifier.py | 16 +++++++--------- tsml_eval/experiments/set_classifier.py | 5 +++++ 3 files changed, 12 insertions(+), 9 deletions(-) create mode 100644 tsml_eval/_wip/condensing/__init__.py diff --git a/tsml_eval/_wip/condensing/__init__.py b/tsml_eval/_wip/condensing/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tsml_eval/_wip/condensing/condensing_classifier.py b/tsml_eval/_wip/condensing/condensing_classifier.py index 32d47b6b..eafef96f 100644 --- a/tsml_eval/_wip/condensing/condensing_classifier.py +++ b/tsml_eval/_wip/condensing/condensing_classifier.py @@ -47,14 +47,12 @@ def __init__( ClusteringCondenser, ) - self.condenser = ( - ClusteringCondenser( - clustering_approach="kmeans", - distance=self.distance, - distance_params=self.distance_params, - num_instances_per_class=self.num_instances, - random_state=self.random_state, - ), + self.condenser = ClusteringCondenser( + clustering_approach="kmeans", + distance=self.distance, + distance_params=self.distance_params, + num_instances_per_class=self.num_instances, + random_state=self.random_state, ) self.classifier = classifier @@ -69,7 +67,7 @@ def __init__( distance_params=self.distance_params, n_neighbors=1, ) - super(CondenserClassifier, self).__init__() + super().__init__() def _fit(self, X, y): condensed_X, condensed_y = self.condenser.fit_transform(X, y) diff --git a/tsml_eval/experiments/set_classifier.py b/tsml_eval/experiments/set_classifier.py index 819a2ef8..9c4cf785 100644 --- a/tsml_eval/experiments/set_classifier.py +++ b/tsml_eval/experiments/set_classifier.py @@ -48,6 +48,7 @@ ["msm", "1nn-msm"], ["twe", "1nn-twe"], "1nn-dtw-cv", + "condenserclassifier", "kMeansCondenser-dtw-1", "kMeansCondenser-dtw-2", "kMeansCondenser-dtw-3", @@ -462,6 +463,10 @@ def _set_classifier_distance_based( return KNeighborsTimeSeriesClassifier( distance="twe", distance_params={"nu": 0.001}, n_jobs=n_jobs, **kwargs ) + elif c == "condenserclassifier": + from tsml_eval._wip.condensing.condensing_classifier import CondenserClassifier + + return CondenserClassifier(**kwargs) elif c == "simplerankcondenser": from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier From 94a3094d3fc37f96f5cf4cb626651c99b3f0c238 Mon Sep 17 00:00:00 2001 From: David Guijo-Rubio <47889499+dguijo@users.noreply.github.com> Date: Mon, 21 Oct 2024 18:22:39 +0200 Subject: [PATCH 20/21] Minor change to super --- tsml_eval/_wip/condensing/drop1.py | 3 +-- tsml_eval/_wip/condensing/drop2.py | 3 +-- tsml_eval/_wip/condensing/drop3.py | 3 +-- tsml_eval/_wip/condensing/simple_rank.py | 7 ++----- 4 files changed, 5 insertions(+), 11 deletions(-) diff --git a/tsml_eval/_wip/condensing/drop1.py b/tsml_eval/_wip/condensing/drop1.py index 20a28a95..3ed9a75a 100644 --- a/tsml_eval/_wip/condensing/drop1.py +++ b/tsml_eval/_wip/condensing/drop1.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import numpy as np from aeon.distances import get_distance_function from aeon.transformations.collection.base import BaseCollectionTransformer @@ -51,7 +50,7 @@ def __init__( self.selected_indices = [] - super(Drop1Condenser, self).__init__() + super().__init__() def _fit(self, X, y): n_classes = len(np.unique(y)) diff --git a/tsml_eval/_wip/condensing/drop2.py b/tsml_eval/_wip/condensing/drop2.py index 69707892..599558f2 100644 --- a/tsml_eval/_wip/condensing/drop2.py +++ b/tsml_eval/_wip/condensing/drop2.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import numpy as np from aeon.distances import get_distance_function from aeon.transformations.collection.base import BaseCollectionTransformer @@ -51,7 +50,7 @@ def __init__( self.selected_indices = [] - super(Drop2Condenser, self).__init__() + super().__init__() def _fit(self, X, y): n_classes = len(np.unique(y)) diff --git a/tsml_eval/_wip/condensing/drop3.py b/tsml_eval/_wip/condensing/drop3.py index 0aabecba..e83d9bdf 100644 --- a/tsml_eval/_wip/condensing/drop3.py +++ b/tsml_eval/_wip/condensing/drop3.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import numpy as np from aeon.distances import get_distance_function from aeon.transformations.collection.base import BaseCollectionTransformer @@ -51,7 +50,7 @@ def __init__( self.selected_indices = [] - super(Drop3Condenser, self).__init__() + super().__init__() def _fit(self, X, y): n_classes = len(np.unique(y)) diff --git a/tsml_eval/_wip/condensing/simple_rank.py b/tsml_eval/_wip/condensing/simple_rank.py index db13c16e..25e8e521 100644 --- a/tsml_eval/_wip/condensing/simple_rank.py +++ b/tsml_eval/_wip/condensing/simple_rank.py @@ -1,11 +1,8 @@ import numpy as np +from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier from aeon.distances import get_distance_function from aeon.transformations.collection.base import BaseCollectionTransformer -from aeon.classification.distance_based import ( - KNeighborsTimeSeriesClassifier, -) - class SimpleRankCondenser(BaseCollectionTransformer): """ @@ -56,7 +53,7 @@ def __init__( self.selected_indices = [] - super(SimpleRankCondenser, self).__init__() + super().__init__() def _fit(self, X, y): # As SR do not separate prototypes per class, the number should be multiplied by From deaab361d0974b0deeab64068b7135d92cb2977b Mon Sep 17 00:00:00 2001 From: David Guijo-Rubio <47889499+dguijo@users.noreply.github.com> Date: Mon, 21 Oct 2024 18:27:28 +0200 Subject: [PATCH 21/21] Remove utf8 first line --- tsml_eval/_wip/condensing/draw_average_and_barycentres.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tsml_eval/_wip/condensing/draw_average_and_barycentres.py b/tsml_eval/_wip/condensing/draw_average_and_barycentres.py index 3eff539f..41ce4364 100644 --- a/tsml_eval/_wip/condensing/draw_average_and_barycentres.py +++ b/tsml_eval/_wip/condensing/draw_average_and_barycentres.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import os import matplotlib.pyplot as plt