Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
582bf1d
first approach
dguijo May 24, 2023
a35a728
minimal changes
dguijo May 25, 2023
1cf3b82
Condensing with new version of ba and graphs for drawing averages
Jun 9, 2023
bfdf0a6
k=2 to 5 with kmeans clustering
Jun 10, 2023
4fda5e2
kmeans for condensing with 2 to 5 instances per class
Jun 12, 2023
ed105df
changes in the way fig is done
dguijo Aug 2, 2023
462bc50
Drop1 done and changes in the condensing wrapper
dguijo Aug 11, 2023
66e3d48
Minimal change to the wrapper
dguijo Aug 11, 2023
1b11816
Drop 2 done. Still some prints for sanity check
dguijo Aug 11, 2023
dbd2e26
Drop 1-2 reworked. Still some prints for sanity check
dguijo Aug 11, 2023
cc82fc5
DropX versions ready and some experiments in set_classifier
dguijo Aug 11, 2023
be0d7d7
merged main
dguijo Aug 11, 2023
e30edbe
Removes config file UCO
dguijo Aug 11, 2023
dc5cc73
kMedoids condenser and several changes to others
dguijo Aug 12, 2023
8a79793
kmeans improvements
dguijo Aug 30, 2023
562df94
Merge remote-tracking branch 'origin/main' into condensing
dguijo Aug 30, 2023
8a62a3f
Removing duplicated code
dguijo Sep 1, 2023
89f8dab
Improvements to clustering condenser method
dguijo Sep 5, 2023
0dfcca0
Merge main
dguijo Oct 9, 2024
573f649
Merge remote-tracking branch 'origin' into condensing
dguijo Oct 9, 2024
fda86af
Modifies paths
dguijo Oct 9, 2024
ef16f93
modify to use aeon new clustering module
dguijo Oct 9, 2024
680cf31
Adding BaseCondenser case
dguijo Oct 9, 2024
531adcb
Merge remote-tracking branch 'origin' into condensing
dguijo Oct 9, 2024
b78053f
Merge remote-tracking branch 'origin' into condensing
dguijo Oct 11, 2024
ccd6063
Merge remote-tracking branch 'origin' into condensing
dguijo Oct 21, 2024
94a3094
Minor change to super
dguijo Oct 21, 2024
deaab36
Remove utf8 first line
dguijo Oct 21, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
125 changes: 125 additions & 0 deletions tsml_eval/_wip/condensing/clustering_condenser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import numpy as np
from aeon.transformations.collection.base import BaseCollectionTransformer


class ClusteringCondenser(BaseCollectionTransformer):
"""
Classifier wrapper for its use with any condensing approach.

Parameters
----------
distance
distance_params

Examples
--------
>>> from ...
>>> from ...
"""

_tags = {
"univariate-only": True,
"fit_is_empty": True,
"X_inner_mtype": ["np-list", "numpy3D"],
"requires_y": True,
"y_inner_mtype": ["numpy1D"],
}

def __init__(
self,
clustering_approach=None,
distance="dtw",
distance_params=None,
num_instances_per_class=1,
random_state=None,
):
self.distance = distance

self.distance_params = distance_params
if self.distance_params is None:
self.distance_params = {}

self.num_instances_per_class = num_instances_per_class

self.selected_series = np.array([])
self.y_selected_series = []

self.random_state = random_state

self.clustering_approach = clustering_approach
if self.clustering_approach == "pam":
from aeon.clustering import TimeSeriesKMedoids

self.clusterer = TimeSeriesKMedoids(
n_clusters=self.num_instances_per_class,
method="pam",
init_algorithm="random",
distance=self.distance,
distance_params=self.distance_params,
random_state=self.random_state,
)

elif self.clustering_approach == "kmeans" or self.clustering_approach is None:
from aeon.clustering import TimeSeriesKMeans

self.average_params = {
"distance": self.distance,
**self.distance_params.copy(),
}

self.clusterer = TimeSeriesKMeans(
n_clusters=self.num_instances_per_class,
distance=self.distance,
distance_params=self.distance_params,
averaging_method="ba",
average_params=self.average_params,
random_state=self.random_state,
)

super().__init__()

def _transform(self, X, y):
self.selected_series = self.selected_series.reshape(0, *X.shape[1:])

for i in np.unique(y):
idxs_class = np.where(y == i)
X_i = X[idxs_class]

# in case of self.num_instances_per_class == 1, does not make sense to run
# the approaches.
if self.num_instances_per_class == 1:
if self.clustering_approach == "pam":
from aeon.clustering.averaging._ba_utils import _medoids

averaged_series_class_i = [
_medoids(
X_i,
distance=self.distance,
**self.distance_params,
)
]
elif self.clustering_approach == "kmeans":
from aeon.clustering.averaging import elastic_barycenter_average

averaged_series_class_i = [
elastic_barycenter_average(
X_i,
distance=self.distance,
**self.distance_params,
)
]
# for self.num_instances_per_class > 1.
else:
self.clusterer.fit(X_i)
averaged_series_class_i = self.clusterer.cluster_centers_

self.selected_series = np.concatenate(
(self.selected_series, averaged_series_class_i), axis=0
)

self.y_selected_series.extend([i] * self.num_instances_per_class)

return np.array(self.selected_series), np.array(self.y_selected_series)

def _fit_transform(self, X, y):
return self._transform(X, y)
78 changes: 78 additions & 0 deletions tsml_eval/_wip/condensing/condensing_classifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
from aeon.classification.base import BaseClassifier


class CondenserClassifier(BaseClassifier):
"""
Classifier wrapper for its use with any condensing approach.

Parameters
----------
distance
distance_params

Examples
--------
>>> from ...
>>> from ...
"""

_tags = {
"univariate-only": True,
"fit_is_empty": False,
"X_inner_mtype": ["np-list", "numpy3D"],
}

def __init__(
self,
condenser=None,
distance="dtw",
distance_params=None,
classifier=None,
num_instances=1,
random_state=None,
):
self.distance = distance

self.distance_params = distance_params
if self.distance_params is None:
self.distance_params = {}

self.num_instances = num_instances

self.random_state = random_state

self.condenser = condenser
if self.condenser is None:
from tsml_eval._wip.condensing.clustering_condenser import (
ClusteringCondenser,
)

self.condenser = ClusteringCondenser(
clustering_approach="kmeans",
distance=self.distance,
distance_params=self.distance_params,
num_instances_per_class=self.num_instances,
random_state=self.random_state,
)

self.classifier = classifier
if self.classifier is None:
from aeon.classification.distance_based import (
KNeighborsTimeSeriesClassifier,
)

self.classifier = KNeighborsTimeSeriesClassifier(
distance=self.distance,
weights="distance",
distance_params=self.distance_params,
n_neighbors=1,
)
super().__init__()

def _fit(self, X, y):
condensed_X, condensed_y = self.condenser.fit_transform(X, y)
self.classifier.fit(condensed_X, condensed_y)
return self

def _predict(self, X):
return self.classifier.predict(X)
66 changes: 66 additions & 0 deletions tsml_eval/_wip/condensing/draw_average_and_barycentres.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import os

import matplotlib.pyplot as plt
import numpy as np
from aeon.clustering.metrics.averaging import elastic_barycenter_average
from aeon.datasets import load_from_tsfile

dataset = "GunPoint"
c = "1"

distances = ["msm", "dtw", "twe"]
distance_params = {
"msm": {"c": 1},
"dtw": {"window": 0.2},
"twe": {"nu": 0.05, "lmbda": 1},
}
names = ["MBA", "DBA", "TBA"]
colours = ["blue", "purple", "green"]
n_methods = len(distances) + 1

fig = plt.figure(figsize=(13, 13))

gs0 = fig.add_gridspec(1, 2)

gs00 = gs0[0].subgridspec(n_methods * 2, 1)
gs01 = gs0[1].subgridspec(n_methods, 1)

# original set of time series
start = n_methods - 1
end = n_methods + 1
ax00_gs00 = fig.add_subplot(gs00[start:end, 0])

x_train, y_train = load_from_tsfile(
os.path.join(f"../../../../TSC_datasets/{dataset}/{dataset}_TRAIN.ts")
)

x = range(0, x_train.shape[2])
idxs = np.where(y_train == c)

for i in x_train[idxs]:
ax00_gs00.plot(x, i[0], lw=0.2)

ax00_gs00.set_title("Original time series", size=14)

# average time series
ax01_gs01 = fig.add_subplot(gs01[0])
series_avg = np.mean(np.array(x_train[idxs]), axis=0)[0]
ax01_gs01.plot(x, series_avg, color="red")
ax01_gs01.set_title("Averaging", size=14)

# plots BA time series (msm, dtw, twe).
for idx, i in enumerate(distances):
series_BA = elastic_barycenter_average(
x_train[idxs],
metric=i,
**distance_params[i],
)
ax = fig.add_subplot(gs01[idx + 1])
ax.plot(x, series_BA[0, :], color=colours[idx])
ax.set_title(names[idx], size=14)

fig.suptitle(f"{dataset} - Class {c}", size=16)

fig.tight_layout()

plt.savefig("barycentres_example.png")
Loading