From 85efc47f32ebf72be00a04cf4565d77fbdc0f028 Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Wed, 6 Aug 2025 19:40:45 +0100
Subject: [PATCH 1/9] add mixins

---
 aeon/forecasting/machine_learning/_setar.py        |  4 ++--
 aeon/forecasting/machine_learning/_setarforest.py  |  4 ++--
 aeon/forecasting/machine_learning/_setartree.py    |  4 ++--
 .../machine_learning/tests/test_setartree.py       | 14 --------------
 4 files changed, 6 insertions(+), 20 deletions(-)

diff --git a/aeon/forecasting/machine_learning/_setar.py b/aeon/forecasting/machine_learning/_setar.py
index 9bb1517625..98d5f205c8 100644
--- a/aeon/forecasting/machine_learning/_setar.py
+++ b/aeon/forecasting/machine_learning/_setar.py
@@ -5,7 +5,7 @@
 
 import numpy as np
 
-from aeon.forecasting.base import BaseForecaster
+from aeon.forecasting.base import BaseForecaster, IterativeForecastingMixin
 
 
 def _lagmat_1d(y: np.ndarray, maxlag: int) -> np.ndarray:
@@ -38,7 +38,7 @@ def _ols_fit(X: np.ndarray, y: np.ndarray):
     return intercept, coefs, sse
 
 
-class SETAR(BaseForecaster):
+class SETAR(BaseForecaster, IterativeForecastingMixin):
     """
     Self-Exciting Threshold AutoRegressive (SETAR) forecaster with 2 regimes.
 
diff --git a/aeon/forecasting/machine_learning/_setarforest.py b/aeon/forecasting/machine_learning/_setarforest.py
index 89ae2fe2bf..b37c048e05 100644
--- a/aeon/forecasting/machine_learning/_setarforest.py
+++ b/aeon/forecasting/machine_learning/_setarforest.py
@@ -4,7 +4,7 @@
 
 import numpy as np
 
-from aeon.forecasting.base import BaseForecaster
+from aeon.forecasting.base import BaseForecaster, IterativeForecastingMixin
 
 from ._setartree import SETARTree
 
@@ -12,7 +12,7 @@
 __all__ = ["SETARForest"]
 
 
-class SETARForest(BaseForecaster):
+class SETARForest(BaseForecaster, IterativeForecastingMixin):
     """
     SETAR-Forest: Bagging + random subspace ensemble of SETAR-Tree base learners.
 
diff --git a/aeon/forecasting/machine_learning/_setartree.py b/aeon/forecasting/machine_learning/_setartree.py
index 6fa72addc7..a5f3e775fb 100644
--- a/aeon/forecasting/machine_learning/_setartree.py
+++ b/aeon/forecasting/machine_learning/_setartree.py
@@ -5,13 +5,13 @@
 from scipy.stats import f
 from sklearn.linear_model import LinearRegression
 
-from aeon.forecasting.base import BaseForecaster
+from aeon.forecasting.base import BaseForecaster, IterativeForecastingMixin
 
 __maintainer__ = ["TinaJin0228"]
 __all__ = ["SETARTree"]
 
 
-class SETARTree(BaseForecaster):
+class SETARTree(BaseForecaster, IterativeForecastingMixin):
     """
     SETAR-Tree: A tree algorithm for global time series forecasting.
 
diff --git a/aeon/forecasting/machine_learning/tests/test_setartree.py b/aeon/forecasting/machine_learning/tests/test_setartree.py
index 12f76b1b43..fc031d9976 100644
--- a/aeon/forecasting/machine_learning/tests/test_setartree.py
+++ b/aeon/forecasting/machine_learning/tests/test_setartree.py
@@ -25,20 +25,6 @@ def test_linear_series():
     assert np.isclose(pred, 21.0, atol=0.01), f"Prediction {pred} not close to 21.0"
 
 
-def test_iterative_forecast_linear():
-    """Test iterative forecasting on a linear series (1-D array of length H)."""
-    y = np.arange(1, 11.0)
-    f = SETARTree(lag=2)
-    preds = f.iterative_forecast(y, 3)
-    expected = np.arange(11.0, 14.0)
-    assert preds.shape == (
-        3,
-    ), f"iterative_forecast should return shape (H,), got {preds.shape}"
-    assert np.allclose(
-        preds, expected, atol=0.01
-    ), f"Predictions {preds} not close to {expected}"
-
-
 def test_fixed_lag():
     """Test the fixed_lag parameter."""
     y = np.arange(1, 21.0)

From 0aad2c58fa64131958bfd182763bcae1eebc4819 Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Thu, 7 Aug 2025 19:33:59 +0100
Subject: [PATCH 2/9] TAR

---
 aeon/forecasting/stats/__init__.py            |   6 +-
 .../{machine_learning => stats}/_setar.py     |  20 +--
 aeon/forecasting/stats/_tar.py                | 163 ++++++++++++++++++
 aeon/forecasting/stats/_tvp.py                |   2 +-
 .../tests/test_setar.py                       |   2 +-
 aeon/forecasting/stats/tests/test_tar.py      |  67 +++++++
 aeon/forecasting/stats/tests/test_tvp.py      |  12 +-
 7 files changed, 247 insertions(+), 25 deletions(-)
 rename aeon/forecasting/{machine_learning => stats}/_setar.py (90%)
 create mode 100644 aeon/forecasting/stats/_tar.py
 rename aeon/forecasting/{machine_learning => stats}/tests/test_setar.py (98%)
 create mode 100644 aeon/forecasting/stats/tests/test_tar.py

diff --git a/aeon/forecasting/stats/__init__.py b/aeon/forecasting/stats/__init__.py
index 77e06e44f2..d67bef5e68 100644
--- a/aeon/forecasting/stats/__init__.py
+++ b/aeon/forecasting/stats/__init__.py
@@ -3,9 +3,11 @@
 __all__ = [
     "ETS",
     "ARIMA",
-    "TVPForecaster",
+    "TVP",
+    "TAR",
 ]
 
 from aeon.forecasting.stats._arima import ARIMA
 from aeon.forecasting.stats._ets import ETS
-from aeon.forecasting.stats._tvp import TVPForecaster
+from aeon.forecasting.stats._tar import TAR
+from aeon.forecasting.stats._tvp import TVP
diff --git a/aeon/forecasting/machine_learning/_setar.py b/aeon/forecasting/stats/_setar.py
similarity index 90%
rename from aeon/forecasting/machine_learning/_setar.py
rename to aeon/forecasting/stats/_setar.py
index 98d5f205c8..41c7f93315 100644
--- a/aeon/forecasting/machine_learning/_setar.py
+++ b/aeon/forecasting/stats/_setar.py
@@ -4,21 +4,10 @@
 __all__ = ["SETAR"]
 
 import numpy as np
+from numba import njit
 
 from aeon.forecasting.base import BaseForecaster, IterativeForecastingMixin
-
-
-def _lagmat_1d(y: np.ndarray, maxlag: int) -> np.ndarray:
-    """Return lag matrix with columns [y_{t-1}, ..., y_{t-maxlag}] (trim='both')."""
-    y = np.asarray(y, dtype=float).squeeze()
-    if y.ndim != 1:
-        raise ValueError("y must be a 1D array for lag construction.")
-    n = y.shape[0]
-    if n <= maxlag:
-        raise ValueError("Series too short for lag construction.")
-    # Column k (0-based) is y_{t-(k+1)}
-    cols = [y[maxlag - (k + 1) : -(k + 1) or None] for k in range(maxlag)]
-    return np.column_stack(cols)  # shape: (n - maxlag, maxlag)
+from aeon.forecasting.stats._tar import _make_lag_matrix
 
 
 def _add_constant(X: np.ndarray) -> np.ndarray:
@@ -28,6 +17,7 @@ def _add_constant(X: np.ndarray) -> np.ndarray:
     return np.hstack([np.ones((n, 1), dtype=X.dtype), X])
 
 
+@njit(cache=True, fastmath=True)
 def _ols_fit(X: np.ndarray, y: np.ndarray):
     """Least-squares fit: return (intercept, coefs, sse)."""
     beta, *_ = np.linalg.lstsq(X, y, rcond=None)
@@ -106,7 +96,7 @@ def _fit(self, y, exog=None):
             maxlag = self.lag
             if len(y) <= maxlag:
                 raise ValueError("Series too short for fallback fitting.")
-            lagged = _lagmat_1d(y, maxlag)  # cols: y_{t-1}..y_{t-maxlag}
+            lagged = _make_lag_matrix(y, maxlag)  # cols: y_{t-1}..y_{t-maxlag}
             X = _add_constant(lagged)
             target = y[maxlag:]
             inter, coefs, _ = _ols_fit(X, target)
@@ -127,7 +117,7 @@ def _fit_setar(y: np.ndarray, lag_order: int, delay: int):
             # caller does the one-time length check; just return None here
             return None
 
-        lagged = _lagmat_1d(y, maxlag)  # shape: (T-maxlag, maxlag)
+        lagged = _make_lag_matrix(y, maxlag)  # shape: (T-maxlag, maxlag)
         trimmed_y = y[maxlag:]
         th_index = delay - 1  # threshold variable (y_{t-d})
         th_var = lagged[:, th_index]  # y_{t-1} when delay=1
diff --git a/aeon/forecasting/stats/_tar.py b/aeon/forecasting/stats/_tar.py
new file mode 100644
index 0000000000..221031fbb9
--- /dev/null
+++ b/aeon/forecasting/stats/_tar.py
@@ -0,0 +1,163 @@
+import numpy as np
+from numba import njit
+
+from aeon.forecasting.base import BaseForecaster, IterativeForecastingMixin
+
+
+@njit(cache=True, fastmath=True)
+def _make_lag_matrix(y: np.ndarray, maxlag: int) -> np.ndarray:
+    """Return lag matrix with columns [y_{t-1}, ..., y_{t-maxlag}] (trim='both')."""
+    n = y.shape[0]
+    rows = n - maxlag
+    out = np.empty((rows, maxlag), dtype=np.float64)
+
+    for i in range(rows):
+        for k in range(maxlag):
+            out[i, k] = y[maxlag + i - (k + 1)]
+    return out
+
+
+@njit(cache=True, fastmath=True)
+def _make_lags_regimes(y: np.ndarray, maxlag: int, delay: int, threshold: float):
+    """Create lag matrix and regime flags in one pass."""
+    n = y.shape[0]
+    rows = n - maxlag
+    X_lag = np.empty((rows, maxlag), dtype=np.float64)
+    regimes = np.empty(rows, dtype=np.bool_)
+
+    for i in range(rows):
+        for k in range(maxlag):
+            X_lag[i, k] = y[maxlag + i - (k + 1)]
+        regimes[i] = y[maxlag + i - delay] > threshold
+
+    return X_lag, regimes
+
+
+@njit(cache=True, fastmath=True)
+def _ols_fit(X: np.ndarray, y: np.ndarray):
+    """Fit OLS regression using normal equations."""
+    n_samples, n_features = X.shape
+    Xb = np.ones((n_samples, n_features + 1), dtype=np.float64)
+    Xb[:, 1:] = X
+
+    XtX = Xb.T @ Xb
+    Xty = Xb.T @ y
+    beta = np.linalg.solve(XtX, Xty)
+
+    intercept = beta[0]
+    coef = beta[1:]
+    return intercept, coef
+
+
+@njit(cache=True, fastmath=True)
+def _ols_predict(X: np.ndarray, intercept: float, coef: np.ndarray):
+    """Predict using intercept and coefficients."""
+    return intercept + X @ coef
+
+
+class TAR(BaseForecaster, IterativeForecastingMixin):
+    """Fast Threshold Autoregressive (TAR) forecaster using Numba OLS.
+
+    A TAR model fits two autoregressive models to different regimes,
+    depending on whether the value at lag `delay` is above or below
+    a threshold (default: mean of training series). Each regime has
+    its own set of AR coefficients estimated via OLS.
+
+    This implementation uses Numba to efficiently compute the lag
+    matrix, assign regimes, and fit both models without sklearn.
+
+    Parameters
+    ----------
+    threshold : float or None, default=None
+        Threshold value for regime split. If None, set to mean of training data.
+    delay : int, default=1
+        Delay `d` for threshold variable y_{t-d}.
+    ar_order : int, default=1
+        AR order `p` for both regimes.
+
+    Attributes
+    ----------
+    forecast_ : float
+        One-step-ahead forecast from end of training series.
+    params_ : dict
+        Dictionary containing fitted intercepts and AR coefficients for each regime.
+
+    References
+    ----------
+    Tong, H. (1978). "On a threshold model." In: Chen, C.H. (ed.)
+    Pattern Recognition and Signal Processing. Sijthoff & Noordhoff, pp. 575–586.
+
+    Examples
+    --------
+    >>> from aeon.datasets import load_airline
+    >>> from aeon.forecasting.stats import TAR
+    >>> y = load_airline().squeeze()
+    >>> f = TAR(ar_order=2, delay=1)
+    >>> f.fit(y)
+    TARForecaster(...)
+    >>> f.forecast_
+    431.23...
+    >>> f.predict(y[:-1])
+    428.57...
+    """
+
+    def __init__(self, threshold=None, delay=1, ar_order=1):
+        self.threshold = threshold
+        self.delay = delay
+        self.ar_order = ar_order
+        super().__init__(horizon=1, axis=1)
+
+    def _fit(self, y, X=None):
+        """Fit TAR model to training data."""
+        y = y.squeeze()
+        self.threshold_ = (
+            float(np.mean(y)) if self.threshold is None else float(self.threshold)
+        )
+
+        # Use Numba lag matrix + regime builder
+        X_lag, regimes = _make_lags_regimes(
+            y, self.ar_order, self.delay, self.threshold_
+        )
+        y_resp = y[self.ar_order :]
+
+        # Fit OLS to each regime
+        self.intercept1_, self.coef1_ = _ols_fit(X_lag[~regimes], y_resp[~regimes])
+        self.intercept2_, self.coef2_ = _ols_fit(X_lag[regimes], y_resp[regimes])
+
+        # Store for inspection
+        self.params_ = {
+            "threshold": self.threshold_,
+            "regime_1": {
+                "intercept": self.intercept1_,
+                "coefficients": self.coef1_.tolist(),
+            },
+            "regime_2": {
+                "intercept": self.intercept2_,
+                "coefficients": self.coef2_.tolist(),
+            },
+        }
+
+        # Store training data
+        self._y = list(y)
+
+        # Forecast from end of training
+        lagged_last = np.array(self._y[-self.ar_order :])[::-1].reshape(1, -1)
+        regime_last = self._y[-self.delay] > self.threshold_
+        self.forecast_ = (
+            _ols_predict(lagged_last, self.intercept2_, self.coef2_)[0]
+            if regime_last
+            else _ols_predict(lagged_last, self.intercept1_, self.coef1_)[0]
+        )
+
+        return self
+
+    def _predict(self, y, exog=None) -> float:
+        """Predict the next step ahead given current series y."""
+        y = np.asarray(y).squeeze()
+        lagged = np.array(y[-self.ar_order :])[::-1].reshape(1, -1)
+        regime = y[-self.delay] > self.threshold_
+        return (
+            _ols_predict(lagged, self.intercept2_, self.coef2_)[0]
+            if regime
+            else _ols_predict(lagged, self.intercept1_, self.coef1_)[0]
+        )
diff --git a/aeon/forecasting/stats/_tvp.py b/aeon/forecasting/stats/_tvp.py
index fd0168ceb5..57ddd7309b 100644
--- a/aeon/forecasting/stats/_tvp.py
+++ b/aeon/forecasting/stats/_tvp.py
@@ -9,7 +9,7 @@
 )
 
 
-class TVPForecaster(BaseForecaster, DirectForecastingMixin, IterativeForecastingMixin):
+class TVP(BaseForecaster, DirectForecastingMixin, IterativeForecastingMixin):
     r"""Time-Varying Parameter (TVP) Forecaster using Kalman filter as described in [1].
 
     This forecaster models the target series using a time-varying linear autoregression:
diff --git a/aeon/forecasting/machine_learning/tests/test_setar.py b/aeon/forecasting/stats/tests/test_setar.py
similarity index 98%
rename from aeon/forecasting/machine_learning/tests/test_setar.py
rename to aeon/forecasting/stats/tests/test_setar.py
index 01ae18a7ea..1cd9b9713b 100644
--- a/aeon/forecasting/machine_learning/tests/test_setar.py
+++ b/aeon/forecasting/stats/tests/test_setar.py
@@ -3,7 +3,7 @@
 import numpy as np
 import pytest
 
-from aeon.forecasting.machine_learning._setar import SETAR
+from aeon.forecasting.stats._setar import SETAR
 
 
 def test_constant_series_scalar_and_close():
diff --git a/aeon/forecasting/stats/tests/test_tar.py b/aeon/forecasting/stats/tests/test_tar.py
new file mode 100644
index 0000000000..f38e52a885
--- /dev/null
+++ b/aeon/forecasting/stats/tests/test_tar.py
@@ -0,0 +1,67 @@
+"""Test the TAR forecaster."""
+
+import numpy as np
+import pytest
+
+from aeon.datasets import load_airline
+from aeon.forecasting.stats._tar import TAR  # adjust if it's in another module
+
+
+@pytest.fixture
+def airline_series():
+    """Fixture to load a simple test time series."""
+    return load_airline().squeeze()
+
+
+def test_fit_forecast_output(airline_series):
+    """Test that fit runs and forecast_ is a float."""
+    model = TAR(ar_order=2, delay=1)
+    model.fit(airline_series)
+    assert isinstance(model.forecast_, float)
+    assert np.isfinite(model.forecast_)
+
+
+def test_predict_output(airline_series):
+    """Test that _predict returns a float for valid input."""
+    model = TAR(ar_order=2, delay=1)
+    model.fit(airline_series)
+    pred = model._predict(airline_series[:-1])
+    assert isinstance(pred, float)
+    assert np.isfinite(pred)
+
+
+def test_params_structure(airline_series):
+    """Test that params_ dict has correct structure after fitting."""
+    model = TAR(ar_order=2, delay=1)
+    model.fit(airline_series)
+    params = model.params_
+
+    assert "threshold" in params
+    assert "regime_1" in params
+    assert "regime_2" in params
+
+    for regime in ["regime_1", "regime_2"]:
+        assert "intercept" in params[regime]
+        assert "coefficients" in params[regime]
+        assert isinstance(params[regime]["coefficients"], list)
+
+
+def test_custom_threshold(airline_series):
+    """Test that a user-defined threshold is respected."""
+    threshold = 400.0
+    model = TAR(ar_order=2, delay=1, threshold=threshold)
+    model.fit(airline_series)
+    assert model.threshold_ == threshold
+
+
+def test_forecast_changes_with_series(airline_series):
+    """Test that modifying the series changes the forecast."""
+    model1 = TAR(ar_order=2, delay=1)
+    model2 = TAR(ar_order=2, delay=1)
+
+    model1.fit(airline_series)
+    modified_series = airline_series.copy()
+    modified_series[-1] += 100
+    model2.fit(modified_series)
+
+    assert not np.isclose(model1.forecast_, model2.forecast_)
diff --git a/aeon/forecasting/stats/tests/test_tvp.py b/aeon/forecasting/stats/tests/test_tvp.py
index 89e78aa3ff..ad1240c284 100644
--- a/aeon/forecasting/stats/tests/test_tvp.py
+++ b/aeon/forecasting/stats/tests/test_tvp.py
@@ -6,20 +6,20 @@
 
 import numpy as np
 
-from aeon.forecasting.stats._tvp import TVPForecaster
+from aeon.forecasting.stats._tvp import TVP
 
 
 def test_direct():
     """Test aeon TVP Forecaster equivalent to statsmodels."""
     expected = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0])
-    tvp = TVPForecaster(window=5, horizon=1, var=0.01, beta_var=0.01)
+    tvp = TVP(window=5, horizon=1, var=0.01, beta_var=0.01)
     p = tvp.forecast(expected)
     p2 = tvp.direct_forecast(expected, prediction_horizon=5)
     assert p == p2[0]
 
 
 def test_static_ar1_convergence_to_ols():
-    """Test TVPForecaster converges to the OLS solution for a static AR(1) process."""
+    """Test TVP converges to the OLS solution for a static AR(1) process."""
     # Simulate AR(1) data with constant parameters
     rng = np.random.RandomState(0)
     true_phi = 0.6
@@ -32,7 +32,7 @@ def test_static_ar1_convergence_to_ols():
     for t in range(1, n):
         y[t] = true_intercept + true_phi * y[t - 1] + rng.normal(0, noise_std)
     # Fit with beta_var=0 (no parameter drift) and observation variance = noise_var
-    forecaster = TVPForecaster(window=1, horizon=1, var=noise_std**2, beta_var=0.0)
+    forecaster = TVP(window=1, horizon=1, var=noise_std**2, beta_var=0.0)
     forecaster.fit(y)
     beta_est = forecaster._beta  # [intercept, phi] estimated
     # Compute static OLS estimates for comparison
@@ -67,8 +67,8 @@ def test_tvp_adapts_to_changing_coefficient():
     # Second half (t=100 to 199) with phi2
     for t in range(100, n):
         y[t] = intercept + phi2 * y[t - 1] + rng.normal(0, noise_std)
-    # Fit TVPForecaster with nonzero beta_var to allow parameter drift
-    forecaster = TVPForecaster(window=1, horizon=1, var=noise_std**2, beta_var=0.1)
+    # Fit TVP with nonzero beta_var to allow parameter drift
+    forecaster = TVP(window=1, horizon=1, var=noise_std**2, beta_var=0.1)
     forecaster.fit(y)
     beta_final = forecaster._beta
     # Compute OLS on first and second half segments for reference

From ce9c7b3d3240ccfc14624d6487fcd93c10d615df Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Thu, 7 Aug 2025 20:48:47 +0100
Subject: [PATCH 3/9] TAR

---
 aeon/forecasting/machine_learning/_setar.py |   0
 aeon/forecasting/stats/__init__.py          |   4 +-
 aeon/forecasting/stats/_setar.py            | 195 --------------------
 aeon/forecasting/stats/_tar.py              |  24 +--
 pyproject.toml                              |   2 +-
 5 files changed, 10 insertions(+), 215 deletions(-)
 create mode 100644 aeon/forecasting/machine_learning/_setar.py
 delete mode 100644 aeon/forecasting/stats/_setar.py

diff --git a/aeon/forecasting/machine_learning/_setar.py b/aeon/forecasting/machine_learning/_setar.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/aeon/forecasting/stats/__init__.py b/aeon/forecasting/stats/__init__.py
index d67bef5e68..985bc875bd 100644
--- a/aeon/forecasting/stats/__init__.py
+++ b/aeon/forecasting/stats/__init__.py
@@ -1,13 +1,15 @@
 """Stats based forecasters."""
 
 __all__ = [
-    "ETS",
     "ARIMA",
+    "ETS",
+    "SETAR",
     "TVP",
     "TAR",
 ]
 
 from aeon.forecasting.stats._arima import ARIMA
 from aeon.forecasting.stats._ets import ETS
+from aeon.forecasting.stats._setar import SETAR
 from aeon.forecasting.stats._tar import TAR
 from aeon.forecasting.stats._tvp import TVP
diff --git a/aeon/forecasting/stats/_setar.py b/aeon/forecasting/stats/_setar.py
deleted file mode 100644
index 41c7f93315..0000000000
--- a/aeon/forecasting/stats/_setar.py
+++ /dev/null
@@ -1,195 +0,0 @@
-"""SETAR forecaster with 2 regimes and fallback to linear regression."""
-
-__maintainer__ = ["TinaJin0228"]
-__all__ = ["SETAR"]
-
-import numpy as np
-from numba import njit
-
-from aeon.forecasting.base import BaseForecaster, IterativeForecastingMixin
-from aeon.forecasting.stats._tar import _make_lag_matrix
-
-
-def _add_constant(X: np.ndarray) -> np.ndarray:
-    """Add an explicit intercept column of ones as the first column."""
-    X = np.asarray(X, dtype=float)
-    n = X.shape[0]
-    return np.hstack([np.ones((n, 1), dtype=X.dtype), X])
-
-
-@njit(cache=True, fastmath=True)
-def _ols_fit(X: np.ndarray, y: np.ndarray):
-    """Least-squares fit: return (intercept, coefs, sse)."""
-    beta, *_ = np.linalg.lstsq(X, y, rcond=None)
-    resid = y - X @ beta
-    sse = float(resid @ resid)
-    intercept = float(beta[0])
-    coefs = np.asarray(beta[1:], dtype=float)
-    return intercept, coefs, sse
-
-
-class SETAR(BaseForecaster, IterativeForecastingMixin):
-    """
-    Self-Exciting Threshold AutoRegressive (SETAR) forecaster with 2 regimes.
-
-    Implements a 2-regime SETAR model with fallback to linear regression if fitting
-    fails. Attempts decreasing lags from the specified max lag down to 1,
-    using a fixed delay of 1.
-
-    Parameters
-    ----------
-    lag : int, default=10
-        The maximum autoregressive order (embedding dimension) to attempt.
-    """
-
-    _tags = {
-        "capability:horizon": False,
-        "y_inner_type": "np.ndarray",
-    }
-
-    def __init__(self, lag=10):
-        self.lag = lag
-        self.model = None  # 'setar' or 'linear'
-        self.current_lag = None
-        self.threshold = None
-        self.intercept_low = None
-        self.coefs_low = None
-        self.intercept_high = None
-        self.coefs_high = None
-        self.fallback_intercept = None
-        self.fallback_coefs = None
-        self.forecast_ = None
-        super().__init__(horizon=1, axis=1)
-
-    def _fit(self, y, exog=None):
-        y = y.squeeze().astype(float)
-        delay = 1
-
-        # one-time length check
-        if len(y) <= self.lag + delay:
-            fallback_needed = True
-        else:
-            fallback_needed = False
-
-        fitted = False
-        if not fallback_needed:
-            # try decreasing AR orders
-            for lag_order in range(self.lag, 0, -1):
-                res = self._fit_setar(y, lag_order, delay)
-                if res is None:
-                    # no valid threshold/regime split at this lag; try smaller lag
-                    continue
-                (
-                    self.threshold,
-                    self.coefs_low,
-                    self.intercept_low,
-                    self.coefs_high,
-                    self.intercept_high,
-                ) = res
-                self.model = "setar"
-                self.current_lag = lag_order
-                fitted = True
-                break
-
-        if not fitted:
-            # Fallback to linear regression AR(self.lag)
-            maxlag = self.lag
-            if len(y) <= maxlag:
-                raise ValueError("Series too short for fallback fitting.")
-            lagged = _make_lag_matrix(y, maxlag)  # cols: y_{t-1}..y_{t-maxlag}
-            X = _add_constant(lagged)
-            target = y[maxlag:]
-            inter, coefs, _ = _ols_fit(X, target)
-            self.fallback_intercept = inter
-            self.fallback_coefs = coefs
-            self.model = "linear"
-            self.current_lag = len(self.fallback_coefs)
-
-        # set one-step-ahead forecast_ for forecast()
-        self.forecast_ = float(self._one_step_from_tail(y))
-        return self
-
-    @staticmethod
-    def _fit_setar(y: np.ndarray, lag_order: int, delay: int):
-        """Fit 2-regime SETAR for a given lag and delay; return params or None."""
-        maxlag = lag_order + delay
-        if len(y) <= maxlag:
-            # caller does the one-time length check; just return None here
-            return None
-
-        lagged = _make_lag_matrix(y, maxlag)  # shape: (T-maxlag, maxlag)
-        trimmed_y = y[maxlag:]
-        th_index = delay - 1  # threshold variable (y_{t-d})
-        th_var = lagged[:, th_index]  # y_{t-1} when delay=1
-
-        # Grid search for threshold over 15%-85% quantiles
-        num_obs = th_var.shape[0]
-        if num_obs <= 0:
-            return None
-        sort_idx = np.argsort(th_var)
-        th_var_sorted = th_var[sort_idx]
-        start = int(num_obs * 0.15)
-        end = int(num_obs * 0.85)
-        if end <= start:
-            return None
-        grid = np.unique(th_var_sorted[start:end])  # Unique to avoid duplicates
-        if grid.size == 0:
-            return None
-
-        best_sse = np.inf
-        best_th = None
-        best_inter_low = None
-        best_coefs_low = None
-        best_inter_high = None
-        best_coefs_high = None
-
-        # X for AR: lags 1..lag_order (plus intercept)
-        X_full = _add_constant(lagged[:, :lag_order])
-
-        min_obs_per_regime = lag_order + 1  # intercept + lag_order params
-        for th in grid:
-            low_idx = th_var <= th
-            high_idx = ~low_idx
-            n_low = int(low_idx.sum())
-            n_high = int(high_idx.sum())
-            if n_low < min_obs_per_regime or n_high < min_obs_per_regime:
-                continue
-
-            X_low = X_full[low_idx]
-            y_low = trimmed_y[low_idx]
-            X_high = X_full[high_idx]
-            y_high = trimmed_y[high_idx]
-
-            inter_l, coef_l, sse_l = _ols_fit(X_low, y_low)
-            inter_h, coef_h, sse_h = _ols_fit(X_high, y_high)
-            sse = sse_l + sse_h
-
-            if sse < best_sse:
-                best_sse = sse
-                best_th = th
-                best_inter_low = inter_l
-                best_coefs_low = coef_l
-                best_inter_high = inter_h
-                best_coefs_high = coef_h
-
-        if best_th is None:
-            return None
-
-        return best_th, best_coefs_low, best_inter_low, best_coefs_high, best_inter_high
-
-    def _one_step_from_tail(self, y_1d: np.ndarray):
-        tail = y_1d[-self.current_lag :]
-        vector = tail[::-1]  # [y_t, y_{t-1}, ..., y_{t-l+1}]
-
-        if self.model == "setar":
-            th_value = tail[-1]  # y_t for delay=1
-            if th_value <= self.threshold:
-                return self.intercept_low + float(np.dot(self.coefs_low, vector))
-            else:
-                return self.intercept_high + float(np.dot(self.coefs_high, vector))
-        else:  # linear fallback
-            return self.fallback_intercept + float(np.dot(self.fallback_coefs, vector))
-
-    def _predict(self, y, exog=None):
-        y = y.squeeze().astype(float)
-        return float(self._one_step_from_tail(y))
diff --git a/aeon/forecasting/stats/_tar.py b/aeon/forecasting/stats/_tar.py
index 221031fbb9..c14aa7e928 100644
--- a/aeon/forecasting/stats/_tar.py
+++ b/aeon/forecasting/stats/_tar.py
@@ -56,7 +56,7 @@ def _ols_predict(X: np.ndarray, intercept: float, coef: np.ndarray):
 
 
 class TAR(BaseForecaster, IterativeForecastingMixin):
-    """Fast Threshold Autoregressive (TAR) forecaster using Numba OLS.
+    """Fast Threshold Autoregressive (TAR) [1] forecaster using Numba OLS.
 
     A TAR model fits two autoregressive models to different regimes,
     depending on whether the value at lag `delay` is above or below
@@ -64,16 +64,17 @@ class TAR(BaseForecaster, IterativeForecastingMixin):
     its own set of AR coefficients estimated via OLS.
 
     This implementation uses Numba to efficiently compute the lag
-    matrix, assign regimes, and fit both models without sklearn.
+    matrix, assign regimes, and fit both models without sklearn. It does not yet set
+    the threshold or the AR parameters automatically based on the training data.
 
     Parameters
     ----------
     threshold : float or None, default=None
         Threshold value for regime split. If None, set to mean of training data.
     delay : int, default=1
-        Delay `d` for threshold variable y_{t-d}.
+        Delay ``d`` for threshold variable ``y_{t-d}``.
     ar_order : int, default=1
-        AR order `p` for both regimes.
+        AR order ``p`` for both regimes.
 
     Attributes
     ----------
@@ -84,21 +85,8 @@ class TAR(BaseForecaster, IterativeForecastingMixin):
 
     References
     ----------
-    Tong, H. (1978). "On a threshold model." In: Chen, C.H. (ed.)
+    ..[1] Tong, H. (1978). "On a threshold model." In: Chen, C.H. (ed.)
     Pattern Recognition and Signal Processing. Sijthoff & Noordhoff, pp. 575–586.
-
-    Examples
-    --------
-    >>> from aeon.datasets import load_airline
-    >>> from aeon.forecasting.stats import TAR
-    >>> y = load_airline().squeeze()
-    >>> f = TAR(ar_order=2, delay=1)
-    >>> f.fit(y)
-    TARForecaster(...)
-    >>> f.forecast_
-    431.23...
-    >>> f.predict(y[:-1])
-    428.57...
     """
 
     def __init__(self, threshold=None, delay=1, ar_order=1):
diff --git a/pyproject.toml b/pyproject.toml
index b27cc4d00f..8965ccd402 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -169,7 +169,7 @@ testpaths = "aeon"
 doctest_optionflags = [
     "NORMALIZE_WHITESPACE",
     "ELLIPSIS",
-    "FLOAT_CMP",
+#    "FLOAT_CMP",
 ]
 addopts = [
     "--durations=20",

From 0682349e3a1ab3d8dbd7b3ff55765e6e8de05804 Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Thu, 7 Aug 2025 21:04:40 +0100
Subject: [PATCH 4/9] sync

---
 aeon/distances/_distance.py              | 62 +++++++++++++++++++-----
 aeon/distances/_mpdist.py                | 48 ++----------------
 aeon/distances/_sbd.py                   | 20 ++++++--
 aeon/distances/_shift_scale_invariant.py | 13 +++--
 aeon/distances/elastic/_adtw.py          | 17 +++++--
 aeon/distances/elastic/_ddtw.py          | 17 +++++--
 aeon/distances/elastic/_dtw.py           | 17 +++++--
 aeon/distances/elastic/_dtw_gi.py        | 13 +++--
 aeon/distances/elastic/_edr.py           | 17 +++++--
 aeon/distances/elastic/_erp.py           | 17 +++++--
 aeon/distances/elastic/_lcss.py          | 17 +++++--
 aeon/distances/elastic/_msm.py           | 21 +++++---
 aeon/distances/elastic/_shape_dtw.py     | 17 +++++--
 aeon/distances/elastic/_soft_dtw.py      | 17 +++++--
 aeon/distances/elastic/_twe.py           | 17 +++++--
 aeon/distances/elastic/_wddtw.py         | 17 +++++--
 aeon/distances/elastic/_wdtw.py          | 17 +++++--
 aeon/distances/mindist/_dft_sfa.py       | 12 +++--
 aeon/distances/mindist/_paa_sax.py       | 16 ++++--
 aeon/distances/mindist/_sax.py           | 16 ++++--
 aeon/distances/mindist/_sfa.py           | 12 +++--
 aeon/distances/pointwise/_euclidean.py   | 20 ++++++--
 aeon/distances/pointwise/_manhattan.py   | 20 ++++++--
 aeon/distances/pointwise/_minkowski.py   | 17 +++++--
 aeon/distances/pointwise/_squared.py     | 20 ++++++--
 aeon/distances/tests/test_pairwise.py    | 19 ++++++++
 26 files changed, 355 insertions(+), 161 deletions(-)

diff --git a/aeon/distances/_distance.py b/aeon/distances/_distance.py
index 575b81c642..d1e790ec9e 100644
--- a/aeon/distances/_distance.py
+++ b/aeon/distances/_distance.py
@@ -4,6 +4,7 @@
 from typing import Any, Callable, Optional, TypedDict, Union
 
 import numpy as np
+from joblib import Parallel, delayed
 from typing_extensions import Unpack
 
 from aeon.distances._mpdist import mp_distance, mp_pairwise_distance
@@ -87,6 +88,7 @@
     squared_pairwise_distance,
 )
 from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list
+from aeon.utils.numba._threading import threaded
 from aeon.utils.validation.collection import _is_numpy_list_multivariate
 
 
@@ -177,6 +179,7 @@ def pairwise_distance(
     y: Optional[np.ndarray] = None,
     method: Union[str, DistanceFunction, None] = None,
     symmetric: bool = True,
+    n_jobs: int = 1,
     **kwargs: Unpack[DistanceKwargs],
 ) -> np.ndarray:
     """Compute the pairwise distance matrix between two time series.
@@ -201,6 +204,10 @@ def pairwise_distance(
         function is provided as the "method" parameter, then it will compute an
         asymmetric distance matrix, and the entire matrix (including both upper and
         lower triangles) is returned.
+    n_jobs : int, default=1
+        The number of jobs to run in parallel. If -1, then the number of jobs is set
+        to the number of CPU cores. If 1, then the function is executed in a single
+        thread. If greater than 1, then the function is executed in parallel.
     kwargs : Any
         Extra arguments for distance. Refer to each distance documentation for a list of
         possible arguments.
@@ -244,19 +251,23 @@ def pairwise_distance(
            [ 48.]])
     """
     if method in PAIRWISE_DISTANCE:
-        return DISTANCES_DICT[method]["pairwise_distance"](x, y, **kwargs)
+        return DISTANCES_DICT[method]["pairwise_distance"](
+            x, y, n_jobs=n_jobs, **kwargs
+        )
     elif isinstance(method, Callable):
         if y is None and not symmetric:
-            return _custom_func_pairwise(x, x, method, **kwargs)
-        return _custom_func_pairwise(x, y, method, **kwargs)
+            return _custom_func_pairwise(x, x, method, n_jobs=n_jobs, **kwargs)
+        return _custom_func_pairwise(x, y, method, n_jobs=n_jobs, **kwargs)
     else:
         raise ValueError("Method must be one of the supported strings or a callable")
 
 
+@threaded
 def _custom_func_pairwise(
     X: Optional[Union[np.ndarray, list[np.ndarray]]],
     y: Optional[Union[np.ndarray, list[np.ndarray]]] = None,
     dist_func: Union[DistanceFunction, None] = None,
+    n_jobs: int = 1,
     **kwargs: Unpack[DistanceKwargs],
 ) -> np.ndarray:
     if dist_func is None:
@@ -264,25 +275,42 @@ def _custom_func_pairwise(
 
     multivariate_conversion = _is_numpy_list_multivariate(X, y)
     X, _ = _convert_collection_to_numba_list(X, "X", multivariate_conversion)
+
+    if n_jobs > 1:
+        X = np.array(X)
+
     if y is None:
         # To self
-        return _custom_pairwise_distance(X, dist_func, **kwargs)
+        return _custom_pairwise_distance(X, dist_func, n_jobs=n_jobs, **kwargs)
     y, _ = _convert_collection_to_numba_list(y, "y", multivariate_conversion)
-    return _custom_from_multiple_to_multiple_distance(X, y, dist_func, **kwargs)
+    if n_jobs > 1:
+        y = np.array(y)
+    return _custom_from_multiple_to_multiple_distance(
+        X, y, dist_func, n_jobs=n_jobs, **kwargs
+    )
 
 
 def _custom_pairwise_distance(
     X: Union[np.ndarray, list[np.ndarray]],
     dist_func: DistanceFunction,
+    n_jobs: int = 1,
     **kwargs: Unpack[DistanceKwargs],
 ) -> np.ndarray:
     n_cases = len(X)
     distances = np.zeros((n_cases, n_cases))
 
-    for i in range(n_cases):
-        for j in range(i + 1, n_cases):
-            distances[i, j] = dist_func(X[i], X[j], **kwargs)
-            distances[j, i] = distances[i, j]
+    def compute_single_distance(i, j):
+        return i, j, dist_func(X[i], X[j], **kwargs)
+
+    indices = [(i, j) for i in range(n_cases) for j in range(i + 1, n_cases)]
+
+    results = Parallel(n_jobs=n_jobs)(
+        delayed(compute_single_distance)(i, j) for i, j in indices
+    )
+
+    for i, j, dist in results:
+        distances[i, j] = dist
+        distances[j, i] = dist  # Mirror for symmetry
 
     return distances
 
@@ -291,15 +319,25 @@ def _custom_from_multiple_to_multiple_distance(
     x: Union[np.ndarray, list[np.ndarray]],
     y: Union[np.ndarray, list[np.ndarray]],
     dist_func: DistanceFunction,
+    n_jobs: int = 1,
     **kwargs: Unpack[DistanceKwargs],
 ) -> np.ndarray:
     n_cases = len(x)
     m_cases = len(y)
     distances = np.zeros((n_cases, m_cases))
 
-    for i in range(n_cases):
-        for j in range(m_cases):
-            distances[i, j] = dist_func(x[i], y[j], **kwargs)
+    def compute_single_distance(i, j):
+        return i, j, dist_func(x[i], y[j], **kwargs)
+
+    indices = [(i, j) for i in range(n_cases) for j in range(m_cases)]
+
+    results = Parallel(n_jobs=n_jobs)(
+        delayed(compute_single_distance)(i, j) for i, j in indices
+    )
+
+    for i, j, dist in results:
+        distances[i, j] = dist
+
     return distances
 
 
diff --git a/aeon/distances/_mpdist.py b/aeon/distances/_mpdist.py
index c679daef5c..ece204ab39 100644
--- a/aeon/distances/_mpdist.py
+++ b/aeon/distances/_mpdist.py
@@ -4,10 +4,6 @@
 
 import numpy as np
 from numba import njit
-from numba.typed import List as NumbaList
-
-from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list
-from aeon.utils.validation.collection import _is_numpy_list_multivariate
 
 
 def mp_distance(x: np.ndarray, y: np.ndarray, m: int = 0) -> float:
@@ -287,6 +283,7 @@ def mp_pairwise_distance(
     X: Union[np.ndarray, list[np.ndarray]],
     y: Optional[Union[np.ndarray, list[np.ndarray]]] = None,
     m: int = 0,
+    n_jobs: int = 1,
 ) -> np.ndarray:
     """Compute the mpdist pairwise distance between a set of time series.
 
@@ -339,45 +336,8 @@ def mp_pairwise_distance(
            [2.82842712],
            [2.82842712]])
     """
-    multivariate_conversion = _is_numpy_list_multivariate(X, y)
-    _X, unequal_length = _convert_collection_to_numba_list(
-        X, "X", multivariate_conversion
-    )
-
     if m == 0:
-        m = int(_X.shape[2] / 4)
-
-    if y is None:
-        return _mpdist_pairwise_distance_single(_X, m)
-
-    _y, unequal_length = _convert_collection_to_numba_list(
-        y, "y", multivariate_conversion
-    )
-
-    return _mpdist_pairwise_distance(_X, _y, m)
-
-
-def _mpdist_pairwise_distance_single(x: NumbaList[np.ndarray], m: int) -> np.ndarray:
-    n_cases = len(x)
-    distances = np.zeros((n_cases, n_cases))
-
-    for i in range(n_cases):
-        for j in range(i + 1, n_cases):
-            distances[i, j] = mp_distance(x[i], x[j], m)
-            distances[j, i] = distances[i, j]
-
-    return distances
-
-
-def _mpdist_pairwise_distance(
-    x: NumbaList[np.ndarray], y: NumbaList[np.ndarray], m: int
-) -> np.ndarray:
-    n_cases = len(x)
-    m_cases = len(y)
-
-    distances = np.zeros((n_cases, m_cases))
+        m = int(X.shape[2] / 4)
+    from aeon.distances._distance import pairwise_distance
 
-    for i in range(n_cases):
-        for j in range(m_cases):
-            distances[i, j] = mp_distance(x[i], y[j], m)
-    return distances
+    return pairwise_distance(X, y, method=mp_distance, m=m, n_jobs=n_jobs)
diff --git a/aeon/distances/_sbd.py b/aeon/distances/_sbd.py
index 7625f12e7e..eff73bd2de 100644
--- a/aeon/distances/_sbd.py
+++ b/aeon/distances/_sbd.py
@@ -5,11 +5,12 @@
 from typing import Optional, Union
 
 import numpy as np
-from numba import njit, objmode
+from numba import njit, objmode, prange
 from numba.typed import List as NumbaList
 from scipy.signal import correlate
 
 from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list
+from aeon.utils.numba._threading import threaded
 from aeon.utils.validation.collection import _is_numpy_list_multivariate
 
 
@@ -113,10 +114,12 @@ def sbd_distance(x: np.ndarray, y: np.ndarray, standardize: bool = True) -> floa
     raise ValueError("x and y must be 1D or 2D")
 
 
+@threaded
 def sbd_pairwise_distance(
     X: Union[np.ndarray, list[np.ndarray]],
     y: Optional[Union[np.ndarray, list[np.ndarray]]] = None,
     standardize: bool = True,
+    n_jobs: int = 1,
 ) -> np.ndarray:
     """
     Compute the shape-based distance (SBD) between all pairs of time series.
@@ -138,6 +141,13 @@ def sbd_pairwise_distance(
     standardize : bool, default=True
         Apply z-score to both input time series for standardization before
         computing the distance. This makes SBD scaling invariant. Default is True.
+    n_jobs : int, default=1
+        The number of jobs to run in parallel. If -1, then the number of jobs is set
+        to the number of CPU cores. If 1, then the function is executed in a single
+        thread. If greater than 1, then the function is executed in parallel.
+
+        NOTE: For this distance function unless your data has a large number of time
+        points, it is recommended to use n_jobs=1.
 
     Returns
     -------
@@ -199,14 +209,14 @@ def sbd_pairwise_distance(
     return _sbd_pairwise_distance(_X, _y, standardize)
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _sbd_pairwise_distance_single(
     x: NumbaList[np.ndarray], standardize: bool
 ) -> np.ndarray:
     n_cases = len(x)
     distances = np.zeros((n_cases, n_cases))
 
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(i + 1, n_cases):
             distances[i, j] = sbd_distance(x[i], x[j], standardize)
             distances[j, i] = distances[i, j]
@@ -214,7 +224,7 @@ def _sbd_pairwise_distance_single(
     return distances
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _sbd_pairwise_distance(
     x: NumbaList[np.ndarray], y: NumbaList[np.ndarray], standardize: bool
 ) -> np.ndarray:
@@ -222,7 +232,7 @@ def _sbd_pairwise_distance(
     m_cases = len(y)
     distances = np.zeros((n_cases, m_cases))
 
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(m_cases):
             distances[i, j] = sbd_distance(x[i], y[j], standardize)
     return distances
diff --git a/aeon/distances/_shift_scale_invariant.py b/aeon/distances/_shift_scale_invariant.py
index 951b7ac560..83cc1014b0 100644
--- a/aeon/distances/_shift_scale_invariant.py
+++ b/aeon/distances/_shift_scale_invariant.py
@@ -3,10 +3,11 @@
 from typing import Optional, Union
 
 import numpy as np
-from numba import njit
+from numba import njit, prange
 from numba.typed import List as NumbaList
 
 from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list
+from aeon.utils.numba._threading import threaded
 from aeon.utils.validation.collection import _is_numpy_list_multivariate
 
 
@@ -156,10 +157,12 @@ def _univariate_shift_scale_invariant_distance(
     return min_dist, best_shifted_y
 
 
+@threaded
 def shift_scale_invariant_pairwise_distance(
     X: Union[np.ndarray, list[np.ndarray]],
     y: Optional[Union[np.ndarray, list[np.ndarray]]] = None,
     max_shift: Optional[int] = None,
+    n_jobs: int = 1,
 ) -> np.ndarray:
     r"""Compute the shift-scale invariant pairwise distance between time series.
 
@@ -193,6 +196,10 @@ def shift_scale_invariant_pairwise_distance(
         Maximum shift allowed in the alignment path. If None, then max_shift is set
         to min(X.shape[-1], y.shape[-1]) or if y is None, max_shift is set to
         X.shape[-1].
+    n_jobs : int, default=1
+        The number of jobs to run in parallel. If -1, then the number of jobs is set
+        to the number of CPU cores. If 1, then the function is executed in a single
+        thread. If greater than 1, then the function is executed in parallel.
 
     Returns
     -------
@@ -308,7 +315,7 @@ def shift_scale_invariant_best_shift(
     raise ValueError("x and y must be 1D or 2D")
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _shift_invariant_pairwise_distance(
     x: NumbaList[np.ndarray], y: NumbaList[np.ndarray], max_shift: int
 ) -> np.ndarray:
@@ -316,7 +323,7 @@ def _shift_invariant_pairwise_distance(
     m_cases = len(y)
     distances = np.zeros((n_cases, m_cases))
 
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(m_cases):
             distances[i, j] = shift_scale_invariant_distance(x[i], y[j], max_shift)
     return distances
diff --git a/aeon/distances/elastic/_adtw.py b/aeon/distances/elastic/_adtw.py
index 36a9bec08f..6ae83db1f0 100644
--- a/aeon/distances/elastic/_adtw.py
+++ b/aeon/distances/elastic/_adtw.py
@@ -5,13 +5,14 @@
 from typing import Optional, Union
 
 import numpy as np
-from numba import njit
+from numba import njit, prange
 from numba.typed import List as NumbaList
 
 from aeon.distances.elastic._alignment_paths import compute_min_return_path
 from aeon.distances.elastic._bounding_matrix import create_bounding_matrix
 from aeon.distances.pointwise._squared import _univariate_squared_distance
 from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list
+from aeon.utils.numba._threading import threaded
 from aeon.utils.validation.collection import _is_numpy_list_multivariate
 
 
@@ -197,12 +198,14 @@ def _adtw_cost_matrix(
     return cost_matrix[1:, 1:]
 
 
+@threaded
 def adtw_pairwise_distance(
     X: Union[np.ndarray, list[np.ndarray]],
     y: Optional[Union[np.ndarray, list[np.ndarray]]] = None,
     window: Optional[float] = None,
     itakura_max_slope: Optional[float] = None,
     warp_penalty: float = 1.0,
+    n_jobs: int = 1,
 ) -> np.ndarray:
     r"""Compute the ADTW pairwise distance between a set of time series.
 
@@ -226,6 +229,10 @@ def adtw_pairwise_distance(
         Penalty for warping. A high value will mean less warping.
         warp less and if value is low then will encourage algorithm to warp
         more.
+    n_jobs : int, default=1
+        The number of jobs to run in parallel. If -1, then the number of jobs is set
+        to the number of CPU cores. If 1, then the function is executed in a single
+        thread. If greater than 1, then the function is executed in parallel.
 
     Returns
     -------
@@ -290,7 +297,7 @@ def adtw_pairwise_distance(
     )
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _adtw_pairwise_distance(
     X: NumbaList[np.ndarray],
     window: Optional[float],
@@ -306,7 +313,7 @@ def _adtw_pairwise_distance(
         bounding_matrix = create_bounding_matrix(
             n_timepoints, n_timepoints, window, itakura_max_slope
         )
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(i + 1, n_cases):
             x1, x2 = X[i], X[j]
             if unequal_length:
@@ -319,7 +326,7 @@ def _adtw_pairwise_distance(
     return distances
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _adtw_from_multiple_to_multiple_distance(
     x: NumbaList[np.ndarray],
     y: NumbaList[np.ndarray],
@@ -336,7 +343,7 @@ def _adtw_from_multiple_to_multiple_distance(
         bounding_matrix = create_bounding_matrix(
             x[0].shape[1], y[0].shape[1], window, itakura_max_slope
         )
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(m_cases):
             x1, y1 = x[i], y[j]
             if unequal_length:
diff --git a/aeon/distances/elastic/_ddtw.py b/aeon/distances/elastic/_ddtw.py
index 2566686318..1c975e982c 100644
--- a/aeon/distances/elastic/_ddtw.py
+++ b/aeon/distances/elastic/_ddtw.py
@@ -5,7 +5,7 @@
 from typing import Optional, Union
 
 import numpy as np
-from numba import njit
+from numba import njit, prange
 from numba.typed import List as NumbaList
 
 from aeon.distances.elastic._alignment_paths import compute_min_return_path
@@ -15,6 +15,7 @@
     create_bounding_matrix,
 )
 from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list
+from aeon.utils.numba._threading import threaded
 from aeon.utils.numba.general import slope_derivative_2d
 from aeon.utils.validation.collection import _is_numpy_list_multivariate
 
@@ -168,11 +169,13 @@ def ddtw_cost_matrix(
     raise ValueError("x and y must be 1D or 2D")
 
 
+@threaded
 def ddtw_pairwise_distance(
     X: Union[np.ndarray, list[np.ndarray]],
     y: Optional[Union[np.ndarray, list[np.ndarray]]] = None,
     window: Optional[float] = None,
     itakura_max_slope: Optional[float] = None,
+    n_jobs: int = 1,
 ) -> np.ndarray:
     """Compute the DDTW pairwise distance between a set of time series.
 
@@ -192,6 +195,10 @@ def ddtw_pairwise_distance(
     itakura_max_slope : float, default=None
         Maximum slope as a proportion of the number of time points used to create
         Itakura parallelogram on the bounding matrix. Must be between 0. and 1.
+    n_jobs : int, default=1
+        The number of jobs to run in parallel. If -1, then the number of jobs is set
+        to the number of CPU cores. If 1, then the function is executed in a single
+        thread. If greater than 1, then the function is executed in parallel.
 
     Returns
     -------
@@ -255,7 +262,7 @@ def ddtw_pairwise_distance(
     )
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _ddtw_pairwise_distance(
     X: NumbaList[np.ndarray],
     window: Optional[float],
@@ -275,7 +282,7 @@ def _ddtw_pairwise_distance(
     for i in range(n_cases):
         X_average_of_slope.append(slope_derivative_2d(X[i]))
 
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(i + 1, n_cases):
             x1, x2 = X_average_of_slope[i], X_average_of_slope[j]
             if unequal_length:
@@ -288,7 +295,7 @@ def _ddtw_pairwise_distance(
     return distances
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _ddtw_from_multiple_to_multiple_distance(
     x: NumbaList[np.ndarray],
     y: NumbaList[np.ndarray],
@@ -314,7 +321,7 @@ def _ddtw_from_multiple_to_multiple_distance(
     for i in range(m_cases):
         y_average_of_slope.append(slope_derivative_2d(y[i]))
 
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(m_cases):
             x1, y1 = x_average_of_slope[i], y_average_of_slope[j]
             if unequal_length:
diff --git a/aeon/distances/elastic/_dtw.py b/aeon/distances/elastic/_dtw.py
index 32e45f218d..7aefbde4a7 100644
--- a/aeon/distances/elastic/_dtw.py
+++ b/aeon/distances/elastic/_dtw.py
@@ -5,13 +5,14 @@
 from typing import Optional, Union
 
 import numpy as np
-from numba import njit
+from numba import njit, prange
 from numba.typed import List as NumbaList
 
 from aeon.distances.elastic._alignment_paths import compute_min_return_path
 from aeon.distances.elastic._bounding_matrix import create_bounding_matrix
 from aeon.distances.pointwise._squared import _univariate_squared_distance
 from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list
+from aeon.utils.numba._threading import threaded
 from aeon.utils.validation.collection import _is_numpy_list_multivariate
 
 
@@ -228,11 +229,13 @@ def _dtw_cost_matrix(
     return cost_matrix[1:, 1:]
 
 
+@threaded
 def dtw_pairwise_distance(
     X: Union[np.ndarray, list[np.ndarray]],
     y: Optional[Union[np.ndarray, list[np.ndarray]]] = None,
     window: Optional[float] = None,
     itakura_max_slope: Optional[float] = None,
+    n_jobs: int = 1,
 ) -> np.ndarray:
     r"""Compute the DTW pairwise distance between a set of time series.
 
@@ -268,6 +271,10 @@ def dtw_pairwise_distance(
     itakura_max_slope : float, default=None
         Maximum slope as a proportion of the number of time points used to create
         Itakura parallelogram on the bounding matrix. Must be between 0. and 1.
+    n_jobs : int, default=1
+        The number of jobs to run in parallel. If -1, then the number of jobs is set
+        to the number of CPU cores. If 1, then the function is executed in a single
+        thread. If greater than 1, then the function is executed in parallel.
 
     Returns
     -------
@@ -330,7 +337,7 @@ def dtw_pairwise_distance(
     )
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _dtw_pairwise_distance(
     X: NumbaList[np.ndarray],
     window: Optional[float],
@@ -345,7 +352,7 @@ def _dtw_pairwise_distance(
         bounding_matrix = create_bounding_matrix(
             n_timepoints, n_timepoints, window, itakura_max_slope
         )
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(i + 1, n_cases):
             x1, x2 = X[i], X[j]
             if unequal_length:
@@ -358,7 +365,7 @@ def _dtw_pairwise_distance(
     return distances
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _dtw_from_multiple_to_multiple_distance(
     x: NumbaList[np.ndarray],
     y: NumbaList[np.ndarray],
@@ -374,7 +381,7 @@ def _dtw_from_multiple_to_multiple_distance(
         bounding_matrix = create_bounding_matrix(
             x[0].shape[1], y[0].shape[1], window, itakura_max_slope
         )
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(m_cases):
             x1, y1 = x[i], y[j]
             if unequal_length:
diff --git a/aeon/distances/elastic/_dtw_gi.py b/aeon/distances/elastic/_dtw_gi.py
index f757eaa202..5b29a40e6c 100644
--- a/aeon/distances/elastic/_dtw_gi.py
+++ b/aeon/distances/elastic/_dtw_gi.py
@@ -5,11 +5,12 @@
 from typing import Optional, Union
 
 import numpy as np
-from numba import njit
+from numba import njit, prange
 from numba.typed import List as NumbaList
 
 from aeon.distances.elastic._dtw import dtw_alignment_path, dtw_cost_matrix
 from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list
+from aeon.utils.numba._threading import threaded
 from aeon.utils.validation.collection import _is_numpy_list_multivariate
 
 
@@ -323,6 +324,7 @@ def dtw_gi_cost_matrix(
     return dtw_cost_matrix(xnew, y_trans, window, itakura_max_slope)
 
 
+@threaded
 def dtw_gi_pairwise_distance(
     X: Union[np.ndarray, list[np.ndarray]],
     y: Optional[Union[np.ndarray, list[np.ndarray]]] = None,
@@ -332,6 +334,7 @@ def dtw_gi_pairwise_distance(
     init_p: np.ndarray = None,
     max_iter: int = 20,
     use_bias: bool = False,
+    n_jobs: int = 1,
 ) -> np.ndarray:
     r"""Compute the DTW_GI pairwise distance between a set of time series.
 
@@ -373,6 +376,10 @@ def dtw_gi_pairwise_distance(
         Maximum number of iterations for the iterative optimization.
     use_bias : bool, default=False
         If True, the feature space map is affine (with a bias term).
+    n_jobs : int, default=1
+        The number of jobs to run in parallel. If -1, then the number of jobs is set
+        to the number of CPU cores. If 1, then the function is executed in a single
+        thread. If greater than 1, then the function is executed in parallel.
 
     Returns
     -------
@@ -452,7 +459,7 @@ def _dtw_gi_from_multiple_to_multiple_distance(
     m_cases = len(y)
     distances = np.zeros((n_cases, m_cases))
 
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(m_cases):
             x1, y1 = x[i], y[j]
             distances[i, j] = dtw_gi_distance(
@@ -474,7 +481,7 @@ def _dtw_gi_pairwise_distance(
     n_cases = len(X)
     distances = np.zeros((n_cases, n_cases))
 
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(i + 1, n_cases):
             x1, x2 = X[i], X[j]
             distances[i, j] = dtw_gi_distance(
diff --git a/aeon/distances/elastic/_edr.py b/aeon/distances/elastic/_edr.py
index e14996ef7a..84335cf0da 100644
--- a/aeon/distances/elastic/_edr.py
+++ b/aeon/distances/elastic/_edr.py
@@ -5,13 +5,14 @@
 from typing import Optional, Union
 
 import numpy as np
-from numba import njit
+from numba import njit, prange
 from numba.typed import List as NumbaList
 
 from aeon.distances.elastic._alignment_paths import compute_min_return_path
 from aeon.distances.elastic._bounding_matrix import create_bounding_matrix
 from aeon.distances.pointwise._euclidean import _univariate_euclidean_distance
 from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list
+from aeon.utils.numba._threading import threaded
 from aeon.utils.validation.collection import _is_numpy_list_multivariate
 
 
@@ -229,12 +230,14 @@ def _edr_cost_matrix(
     return cost_matrix[1:, 1:]
 
 
+@threaded
 def edr_pairwise_distance(
     X: Union[np.ndarray, list[np.ndarray]],
     y: Optional[Union[np.ndarray, list[np.ndarray]]] = None,
     window: Optional[float] = None,
     epsilon: Optional[float] = None,
     itakura_max_slope: Optional[float] = None,
+    n_jobs: int = 1,
 ) -> np.ndarray:
     """Compute the pairwise EDR distance between a set of time series.
 
@@ -258,6 +261,10 @@ def edr_pairwise_distance(
     itakura_max_slope : float, default=None
         Maximum slope as a proportion of the number of time points used to create
         Itakura parallelogram on the bounding matrix. Must be between 0. and 1.
+    n_jobs : int, default=1
+        The number of jobs to run in parallel. If -1, then the number of jobs is set
+        to the number of CPU cores. If 1, then the function is executed in a single
+        thread. If greater than 1, then the function is executed in parallel.
 
     Returns
     -------
@@ -322,7 +329,7 @@ def edr_pairwise_distance(
     )
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _edr_pairwise_distance(
     X: NumbaList[np.ndarray],
     window: Optional[float],
@@ -338,7 +345,7 @@ def _edr_pairwise_distance(
         bounding_matrix = create_bounding_matrix(
             n_timepoints, n_timepoints, window, itakura_max_slope
         )
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(i + 1, n_cases):
             x1, x2 = X[i], X[j]
             if unequal_length:
@@ -351,7 +358,7 @@ def _edr_pairwise_distance(
     return distances
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _edr_from_multiple_to_multiple_distance(
     x: NumbaList[np.ndarray],
     y: NumbaList[np.ndarray],
@@ -368,7 +375,7 @@ def _edr_from_multiple_to_multiple_distance(
         bounding_matrix = create_bounding_matrix(
             x[0].shape[1], y[0].shape[1], window, itakura_max_slope
         )
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(m_cases):
             x1, y1 = x[i], y[j]
             if unequal_length:
diff --git a/aeon/distances/elastic/_erp.py b/aeon/distances/elastic/_erp.py
index 0c126a4382..b2ec9b898d 100644
--- a/aeon/distances/elastic/_erp.py
+++ b/aeon/distances/elastic/_erp.py
@@ -5,13 +5,14 @@
 from typing import Optional, Union
 
 import numpy as np
-from numba import njit
+from numba import njit, prange
 from numba.typed import List as NumbaList
 
 from aeon.distances.elastic._alignment_paths import compute_min_return_path
 from aeon.distances.elastic._bounding_matrix import create_bounding_matrix
 from aeon.distances.pointwise._euclidean import _univariate_euclidean_distance
 from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list
+from aeon.utils.numba._threading import threaded
 from aeon.utils.validation.collection import _is_numpy_list_multivariate
 
 
@@ -248,6 +249,7 @@ def _precompute_g(
     return gx_distance, x_sum
 
 
+@threaded
 def erp_pairwise_distance(
     X: Union[np.ndarray, list[np.ndarray]],
     y: Optional[Union[np.ndarray, list[np.ndarray]]] = None,
@@ -255,6 +257,7 @@ def erp_pairwise_distance(
     g: float = 0.0,
     g_arr: Optional[np.ndarray] = None,
     itakura_max_slope: Optional[float] = None,
+    n_jobs: int = 1,
 ) -> np.ndarray:
     """Compute the ERP pairwise distance between a set of time series.
 
@@ -283,6 +286,10 @@ def erp_pairwise_distance(
     itakura_max_slope : float, default=None
         Maximum slope as a proportion of the number of time points used to create
         Itakura parallelogram on the bounding matrix. Must be between 0. and 1.
+    n_jobs : int, default=1
+        The number of jobs to run in parallel. If -1, then the number of jobs is set
+        to the number of CPU cores. If 1, then the function is executed in a single
+        thread. If greater than 1, then the function is executed in parallel.
 
     Returns
     -------
@@ -343,7 +350,7 @@ def erp_pairwise_distance(
     )
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _erp_pairwise_distance(
     X: NumbaList[np.ndarray],
     window: Optional[float],
@@ -361,7 +368,7 @@ def _erp_pairwise_distance(
             n_timepoints, n_timepoints, window, itakura_max_slope
         )
 
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(i + 1, n_cases):
             x1, x2 = X[i], X[j]
             if unequal_length:
@@ -374,7 +381,7 @@ def _erp_pairwise_distance(
     return distances
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _erp_from_multiple_to_multiple_distance(
     x: NumbaList[np.ndarray],
     y: NumbaList[np.ndarray],
@@ -392,7 +399,7 @@ def _erp_from_multiple_to_multiple_distance(
         bounding_matrix = create_bounding_matrix(
             x[0].shape[1], y[0].shape[1], window, itakura_max_slope
         )
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(m_cases):
             x1, y1 = x[i], y[j]
             if unequal_length:
diff --git a/aeon/distances/elastic/_lcss.py b/aeon/distances/elastic/_lcss.py
index 23e1eb9fe2..65042a64a7 100644
--- a/aeon/distances/elastic/_lcss.py
+++ b/aeon/distances/elastic/_lcss.py
@@ -5,13 +5,14 @@
 from typing import Optional, Union
 
 import numpy as np
-from numba import njit
+from numba import njit, prange
 from numba.typed import List as NumbaList
 
 from aeon.distances.elastic._alignment_paths import compute_lcss_return_path
 from aeon.distances.elastic._bounding_matrix import create_bounding_matrix
 from aeon.distances.pointwise._euclidean import _univariate_euclidean_distance
 from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list
+from aeon.utils.numba._threading import threaded
 from aeon.utils.validation.collection import _is_numpy_list_multivariate
 
 
@@ -222,12 +223,14 @@ def _lcss_cost_matrix(
     return cost_matrix
 
 
+@threaded
 def lcss_pairwise_distance(
     X: Union[np.ndarray, list[np.ndarray]],
     y: Optional[Union[np.ndarray, list[np.ndarray]]] = None,
     window: Optional[float] = None,
     epsilon: float = 1.0,
     itakura_max_slope: Optional[float] = None,
+    n_jobs: int = 1,
 ) -> np.ndarray:
     """Compute the LCSS pairwise distance between a set of time series.
 
@@ -250,6 +253,10 @@ def lcss_pairwise_distance(
     itakura_max_slope : float, default=None
         Maximum slope as a proportion of the number of time points used to create
         Itakura parallelogram on the bounding matrix. Must be between 0. and 1.
+    n_jobs : int, default=1
+        The number of jobs to run in parallel. If -1, then the number of jobs is set
+        to the number of CPU cores. If 1, then the function is executed in a single
+        thread. If greater than 1, then the function is executed in parallel.
 
     Returns
     -------
@@ -312,7 +319,7 @@ def lcss_pairwise_distance(
     )
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _lcss_pairwise_distance(
     X: NumbaList[np.ndarray],
     window: Optional[float],
@@ -327,7 +334,7 @@ def _lcss_pairwise_distance(
         bounding_matrix = create_bounding_matrix(
             n_timepoints, n_timepoints, window, itakura_max_slope
         )
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(i + 1, n_cases):
             x1, x2 = X[i], X[j]
             if unequal_length:
@@ -340,7 +347,7 @@ def _lcss_pairwise_distance(
     return distances
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _lcss_from_multiple_to_multiple_distance(
     x: NumbaList[np.ndarray],
     y: NumbaList[np.ndarray],
@@ -357,7 +364,7 @@ def _lcss_from_multiple_to_multiple_distance(
         bounding_matrix = create_bounding_matrix(
             x[0].shape[1], y[0].shape[1], window, itakura_max_slope
         )
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(m_cases):
             x1, y1 = x[i], y[j]
             if unequal_length:
diff --git a/aeon/distances/elastic/_msm.py b/aeon/distances/elastic/_msm.py
index a2c12790fd..b4d2417aa0 100644
--- a/aeon/distances/elastic/_msm.py
+++ b/aeon/distances/elastic/_msm.py
@@ -5,13 +5,14 @@
 from typing import Optional, Union
 
 import numpy as np
-from numba import njit
+from numba import njit, prange
 from numba.typed import List as NumbaList
 
 from aeon.distances.elastic._alignment_paths import compute_min_return_path
 from aeon.distances.elastic._bounding_matrix import create_bounding_matrix
 from aeon.distances.pointwise._squared import _univariate_squared_distance
 from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list
+from aeon.utils.numba._threading import threaded
 from aeon.utils.validation.collection import _is_numpy_list_multivariate
 
 
@@ -271,7 +272,7 @@ def _independent_cost_matrix(
 
     for i in range(1, y_size):
         if bounding_matrix[0, i]:
-            cost = _cost_independent(y[i], y[i - 1], x[0], c)
+            cost = _cost_independent(y[i], x[0], y[i - 1], c)
             cost_matrix[0][i] = cost_matrix[0][i - 1] + cost
 
     for i in range(1, x_size):
@@ -301,7 +302,7 @@ def _msm_dependent_cost_matrix(
             cost_matrix[i][0] = cost_matrix[i - 1][0] + cost
     for i in range(1, y_size):
         if bounding_matrix[0, i]:
-            cost = _cost_dependent(y[:, i], y[:, i - 1], x[:, 0], c)
+            cost = _cost_dependent(y[:, i], x[:, 0], y[:, i - 1], c)
             cost_matrix[0][i] = cost_matrix[0][i - 1] + cost
 
     for i in range(1, x_size):
@@ -343,6 +344,7 @@ def _cost_independent(x: float, y: float, z: float, c: float) -> float:
     return c + min(abs(x - y), abs(x - z))
 
 
+@threaded
 def msm_pairwise_distance(
     X: Union[np.ndarray, list[np.ndarray]],
     y: Optional[Union[np.ndarray, list[np.ndarray]]] = None,
@@ -350,6 +352,7 @@ def msm_pairwise_distance(
     independent: bool = True,
     c: float = 1.0,
     itakura_max_slope: Optional[float] = None,
+    n_jobs: int = 1,
 ) -> np.ndarray:
     """Compute the msm pairwise distance between a set of time series.
 
@@ -374,6 +377,10 @@ def msm_pairwise_distance(
     itakura_max_slope : float, default=None
         Maximum slope as a proportion of the number of time points used to create
         Itakura parallelogram on the bounding matrix. Must be between 0. and 1.
+    n_jobs : int, default=1
+        The number of jobs to run in parallel. If -1, then the number of jobs is set
+        to the number of CPU cores. If 1, then the function is executed in a single
+        thread. If greater than 1, then the function is executed in parallel.
 
     Returns
     -------
@@ -438,7 +445,7 @@ def msm_pairwise_distance(
     )
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _msm_pairwise_distance(
     X: NumbaList[np.ndarray],
     window: Optional[float],
@@ -455,7 +462,7 @@ def _msm_pairwise_distance(
         bounding_matrix = create_bounding_matrix(
             n_timepoints, n_timepoints, window, itakura_max_slope
         )
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(i + 1, n_cases):
             x1, x2 = X[i], X[j]
             if unequal_length:
@@ -468,7 +475,7 @@ def _msm_pairwise_distance(
     return distances
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _msm_from_multiple_to_multiple_distance(
     x: NumbaList[np.ndarray],
     y: NumbaList[np.ndarray],
@@ -486,7 +493,7 @@ def _msm_from_multiple_to_multiple_distance(
         bounding_matrix = create_bounding_matrix(
             x[0].shape[1], y[0].shape[1], window, itakura_max_slope
         )
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(m_cases):
             x1, y1 = x[i], y[j]
             if unequal_length:
diff --git a/aeon/distances/elastic/_shape_dtw.py b/aeon/distances/elastic/_shape_dtw.py
index c94e5274c3..022fa5232a 100644
--- a/aeon/distances/elastic/_shape_dtw.py
+++ b/aeon/distances/elastic/_shape_dtw.py
@@ -5,7 +5,7 @@
 from typing import Optional, Union
 
 import numpy as np
-from numba import njit
+from numba import njit, prange
 from numba.typed import List as NumbaList
 
 from aeon.distances.elastic._alignment_paths import compute_min_return_path
@@ -13,6 +13,7 @@
 from aeon.distances.elastic._dtw import _dtw_cost_matrix
 from aeon.distances.pointwise._squared import _univariate_squared_distance
 from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list
+from aeon.utils.numba._threading import threaded
 from aeon.utils.validation.collection import _is_numpy_list_multivariate
 
 
@@ -515,6 +516,7 @@ def shape_dtw_alignment_path(
     return (compute_min_return_path(cost_matrix), shapedtw_dist)
 
 
+@threaded
 def shape_dtw_pairwise_distance(
     X: Union[np.ndarray, list[np.ndarray]],
     y: Optional[Union[np.ndarray, list[np.ndarray]]] = None,
@@ -525,6 +527,7 @@ def shape_dtw_pairwise_distance(
     transformation_precomputed: bool = False,
     transformed_x: Optional[np.ndarray] = None,
     transformed_y: Optional[np.ndarray] = None,
+    n_jobs: int = 1,
 ) -> np.ndarray:
     """Compute the ShapeDTW pairwise distance among a set of series.
 
@@ -563,6 +566,10 @@ def shape_dtw_pairwise_distance(
         The transformation of X, ignored if transformation_precomputed is False.
     transformed_y : np.ndarray, default = None
         The transformation of y, ignored if transformation_precomputed is False.
+    n_jobs : int, default=1
+        The number of jobs to run in parallel. If -1, then the number of jobs is set
+        to the number of CPU cores. If 1, then the function is executed in a single
+        thread. If greater than 1, then the function is executed in parallel.
 
     Returns
     -------
@@ -644,7 +651,7 @@ def shape_dtw_pairwise_distance(
     )
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _shape_dtw_pairwise_distance(
     X: NumbaList[np.ndarray],
     window: Optional[float],
@@ -663,7 +670,7 @@ def _shape_dtw_pairwise_distance(
         bounding_matrix = create_bounding_matrix(
             n_timepoints, n_timepoints, window, itakura_max_slope
         )
-    for i in range(len(X)):
+    for i in prange(len(X)):
         for j in range(i + 1, n_cases):
             x1_, x2_ = X[i], X[j]
             x1 = _pad_ts_edges(x=x1_, reach=reach)
@@ -695,7 +702,7 @@ def _shape_dtw_pairwise_distance(
     return distances
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _shape_dtw_from_multiple_to_multiple_distance(
     x: NumbaList[np.ndarray],
     y: NumbaList[np.ndarray],
@@ -716,7 +723,7 @@ def _shape_dtw_from_multiple_to_multiple_distance(
         bounding_matrix = create_bounding_matrix(
             x[0].shape[1], y[0].shape[1], window, itakura_max_slope
         )
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(m_cases):
             x1_, y1_ = x[i], y[j]
             x1 = _pad_ts_edges(x=x1_, reach=reach)
diff --git a/aeon/distances/elastic/_soft_dtw.py b/aeon/distances/elastic/_soft_dtw.py
index 4cbbf2507b..efac0ff908 100644
--- a/aeon/distances/elastic/_soft_dtw.py
+++ b/aeon/distances/elastic/_soft_dtw.py
@@ -5,7 +5,7 @@
 from typing import Optional, Union
 
 import numpy as np
-from numba import njit
+from numba import njit, prange
 from numba.typed import List as NumbaList
 
 from aeon.distances.elastic._alignment_paths import compute_min_return_path
@@ -13,6 +13,7 @@
 from aeon.distances.elastic._dtw import _dtw_cost_matrix
 from aeon.distances.pointwise._squared import _univariate_squared_distance
 from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list
+from aeon.utils.numba._threading import threaded
 from aeon.utils.validation.collection import _is_numpy_list_multivariate
 
 
@@ -243,12 +244,14 @@ def _soft_dtw_cost_matrix(
     return cost_matrix[1:, 1:]
 
 
+@threaded
 def soft_dtw_pairwise_distance(
     X: Union[np.ndarray, list[np.ndarray]],
     y: Optional[Union[np.ndarray, list[np.ndarray]]] = None,
     gamma: float = 1.0,
     window: Optional[float] = None,
     itakura_max_slope: Optional[float] = None,
+    n_jobs: int = 1,
 ) -> np.ndarray:
     r"""Compute the soft-DTW pairwise distance between a set of time series.
 
@@ -270,6 +273,10 @@ def soft_dtw_pairwise_distance(
     itakura_max_slope : float, default=None
         Maximum slope as a proportion of the number of time points used to create
         Itakura parallelogram on the bounding matrix. Must be between 0. and 1.
+    n_jobs : int, default=1
+        The number of jobs to run in parallel. If -1, then the number of jobs is set
+        to the number of CPU cores. If 1, then the function is executed in a single
+        thread. If greater than 1, then the function is executed in parallel.
 
     Returns
     -------
@@ -334,7 +341,7 @@ def soft_dtw_pairwise_distance(
     )
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _soft_dtw_pairwise_distance(
     X: NumbaList[np.ndarray],
     window: Optional[float],
@@ -350,7 +357,7 @@ def _soft_dtw_pairwise_distance(
         bounding_matrix = create_bounding_matrix(
             n_timepoints, n_timepoints, window, itakura_max_slope
         )
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(i + 1, n_cases):
             x1, x2 = X[i], X[j]
             if unequal_length:
@@ -363,7 +370,7 @@ def _soft_dtw_pairwise_distance(
     return distances
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _soft_dtw_from_multiple_to_multiple_distance(
     x: NumbaList[np.ndarray],
     y: NumbaList[np.ndarray],
@@ -380,7 +387,7 @@ def _soft_dtw_from_multiple_to_multiple_distance(
         bounding_matrix = create_bounding_matrix(
             x[0].shape[1], y[0].shape[1], window, itakura_max_slope
         )
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(m_cases):
             x1, y1 = x[i], y[j]
             if unequal_length:
diff --git a/aeon/distances/elastic/_twe.py b/aeon/distances/elastic/_twe.py
index 41e747d3b3..d3c7448403 100644
--- a/aeon/distances/elastic/_twe.py
+++ b/aeon/distances/elastic/_twe.py
@@ -5,13 +5,14 @@
 from typing import Optional, Union
 
 import numpy as np
-from numba import njit
+from numba import njit, prange
 from numba.typed import List as NumbaList
 
 from aeon.distances.elastic._alignment_paths import compute_min_return_path
 from aeon.distances.elastic._bounding_matrix import create_bounding_matrix
 from aeon.distances.pointwise._euclidean import _univariate_euclidean_distance
 from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list
+from aeon.utils.numba._threading import threaded
 from aeon.utils.validation.collection import _is_numpy_list_multivariate
 
 
@@ -243,6 +244,7 @@ def _pad_arrs(x: np.ndarray) -> np.ndarray:
     return padded_x
 
 
+@threaded
 def twe_pairwise_distance(
     X: Union[np.ndarray, list[np.ndarray]],
     y: Optional[Union[np.ndarray, list[np.ndarray]]] = None,
@@ -250,6 +252,7 @@ def twe_pairwise_distance(
     nu: float = 0.001,
     lmbda: float = 1.0,
     itakura_max_slope: Optional[float] = None,
+    n_jobs: int = 1,
 ) -> np.ndarray:
     """Compute the TWE pairwise distance between a set of time series.
 
@@ -274,6 +277,10 @@ def twe_pairwise_distance(
     itakura_max_slope : float, default=None
         Maximum slope as a proportion of the number of time points used to create
         Itakura parallelogram on the bounding matrix. Must be between 0. and 1.
+    n_jobs : int, default=1
+        The number of jobs to run in parallel. If -1, then the number of jobs is set
+        to the number of CPU cores. If 1, then the function is executed in a single
+        thread. If greater than 1, then the function is executed in parallel.
 
     Returns
     -------
@@ -336,7 +343,7 @@ def twe_pairwise_distance(
     )
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _twe_pairwise_distance(
     X: NumbaList[np.ndarray],
     window: Optional[float],
@@ -359,7 +366,7 @@ def _twe_pairwise_distance(
     for i in range(n_cases):
         padded_X.append(_pad_arrs(X[i]))
 
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(i + 1, n_cases):
             x1, x2 = padded_X[i], padded_X[j]
             if unequal_length:
@@ -372,7 +379,7 @@ def _twe_pairwise_distance(
     return distances
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _twe_from_multiple_to_multiple_distance(
     x: NumbaList[np.ndarray],
     y: NumbaList[np.ndarray],
@@ -399,7 +406,7 @@ def _twe_from_multiple_to_multiple_distance(
     for i in range(m_cases):
         padded_y.append(_pad_arrs(y[i]))
 
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(m_cases):
             x1, y1 = padded_x[i], padded_y[j]
             if unequal_length:
diff --git a/aeon/distances/elastic/_wddtw.py b/aeon/distances/elastic/_wddtw.py
index 52e7dd60e4..829fdabc68 100644
--- a/aeon/distances/elastic/_wddtw.py
+++ b/aeon/distances/elastic/_wddtw.py
@@ -5,13 +5,14 @@
 from typing import Optional, Union
 
 import numpy as np
-from numba import njit
+from numba import njit, prange
 from numba.typed import List as NumbaList
 
 from aeon.distances.elastic._alignment_paths import compute_min_return_path
 from aeon.distances.elastic._bounding_matrix import create_bounding_matrix
 from aeon.distances.elastic._wdtw import _wdtw_cost_matrix, _wdtw_distance
 from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list
+from aeon.utils.numba._threading import threaded
 from aeon.utils.numba.general import slope_derivative_2d
 from aeon.utils.validation.collection import _is_numpy_list_multivariate
 
@@ -171,12 +172,14 @@ def wddtw_cost_matrix(
     raise ValueError("x and y must be 1D or 2D")
 
 
+@threaded
 def wddtw_pairwise_distance(
     X: Union[np.ndarray, list[np.ndarray]],
     y: Optional[Union[np.ndarray, list[np.ndarray]]] = None,
     window: Optional[float] = None,
     g: float = 0.05,
     itakura_max_slope: Optional[float] = None,
+    n_jobs: int = 1,
 ) -> np.ndarray:
     """Compute the WDDTW pairwise distance between a set of time series.
 
@@ -199,6 +202,10 @@ def wddtw_pairwise_distance(
     itakura_max_slope : float, default=None
         Maximum slope as a proportion of the number of time points used to create
         Itakura parallelogram on the bounding matrix. Must be between 0. and 1.
+    n_jobs : int, default=1
+        The number of jobs to run in parallel. If -1, then the number of jobs is set
+        to the number of CPU cores. If 1, then the function is executed in a single
+        thread. If greater than 1, then the function is executed in parallel.
 
     Raises
     ------
@@ -258,7 +265,7 @@ def wddtw_pairwise_distance(
     )
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _wddtw_pairwise_distance(
     X: NumbaList[np.ndarray],
     window: Optional[float],
@@ -278,7 +285,7 @@ def _wddtw_pairwise_distance(
     for i in range(n_cases):
         X_average_of_slope.append(slope_derivative_2d(X[i]))
 
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(i + 1, n_cases):
             x1, x2 = X_average_of_slope[i], X_average_of_slope[j]
             if unequal_length:
@@ -291,7 +298,7 @@ def _wddtw_pairwise_distance(
     return distances
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _wddtw_from_multiple_to_multiple_distance(
     x: NumbaList[np.ndarray],
     y: NumbaList[np.ndarray],
@@ -318,7 +325,7 @@ def _wddtw_from_multiple_to_multiple_distance(
     for i in range(m_cases):
         y_average_of_slope.append(slope_derivative_2d(y[i]))
 
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(m_cases):
             x1, y1 = x_average_of_slope[i], y_average_of_slope[j]
             if unequal_length:
diff --git a/aeon/distances/elastic/_wdtw.py b/aeon/distances/elastic/_wdtw.py
index d345395383..75677b5aa7 100644
--- a/aeon/distances/elastic/_wdtw.py
+++ b/aeon/distances/elastic/_wdtw.py
@@ -5,13 +5,14 @@
 from typing import Optional, Union
 
 import numpy as np
-from numba import njit
+from numba import njit, prange
 from numba.typed import List as NumbaList
 
 from aeon.distances.elastic._alignment_paths import compute_min_return_path
 from aeon.distances.elastic._bounding_matrix import create_bounding_matrix
 from aeon.distances.pointwise._squared import _univariate_squared_distance
 from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list
+from aeon.utils.numba._threading import threaded
 from aeon.utils.validation.collection import _is_numpy_list_multivariate
 
 
@@ -235,12 +236,14 @@ def _wdtw_cost_matrix(
     return cost_matrix[1:, 1:]
 
 
+@threaded
 def wdtw_pairwise_distance(
     X: Union[np.ndarray, list[np.ndarray]],
     y: Optional[Union[np.ndarray, list[np.ndarray]]] = None,
     window: Optional[float] = None,
     g: float = 0.05,
     itakura_max_slope: Optional[float] = None,
+    n_jobs: int = 1,
 ) -> np.ndarray:
     """Compute the WDTW pairwise distance between a set of time series.
 
@@ -263,6 +266,10 @@ def wdtw_pairwise_distance(
     itakura_max_slope : float, default=None
         Maximum slope as a proportion of the number of time points used to create
         Itakura parallelogram on the bounding matrix. Must be between 0. and 1.
+    n_jobs : int, default=1
+        The number of jobs to run in parallel. If -1, then the number of jobs is set
+        to the number of CPU cores. If 1, then the function is executed in a single
+        thread. If greater than 1, then the function is executed in parallel.
 
     Returns
     -------
@@ -324,7 +331,7 @@ def wdtw_pairwise_distance(
     )
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _wdtw_pairwise_distance(
     X: NumbaList[np.ndarray],
     window: Optional[float],
@@ -340,7 +347,7 @@ def _wdtw_pairwise_distance(
         bounding_matrix = create_bounding_matrix(
             n_timepoints, n_timepoints, window, itakura_max_slope
         )
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(i + 1, n_cases):
             x1, x2 = X[i], X[j]
             if unequal_length:
@@ -353,7 +360,7 @@ def _wdtw_pairwise_distance(
     return distances
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _wdtw_from_multiple_to_multiple_distance(
     x: NumbaList[np.ndarray],
     y: NumbaList[np.ndarray],
@@ -370,7 +377,7 @@ def _wdtw_from_multiple_to_multiple_distance(
         bounding_matrix = create_bounding_matrix(
             x[0].shape[1], y[0].shape[1], window, itakura_max_slope
         )
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(m_cases):
             x1, y1 = x[i], y[j]
             if unequal_length:
diff --git a/aeon/distances/mindist/_dft_sfa.py b/aeon/distances/mindist/_dft_sfa.py
index d251023ddb..07d291ff14 100644
--- a/aeon/distances/mindist/_dft_sfa.py
+++ b/aeon/distances/mindist/_dft_sfa.py
@@ -6,6 +6,7 @@
 from numba import njit, prange
 
 from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list
+from aeon.utils.numba._threading import threaded
 from aeon.utils.validation.collection import _is_numpy_list_multivariate
 
 
@@ -89,8 +90,9 @@ def _univariate_dft_sfa_distance(
     return np.sqrt(2 * dist)
 
 
+@threaded
 def mindist_dft_sfa_pairwise_distance(
-    X: np.ndarray, y: np.ndarray, breakpoints: np.ndarray
+    X: np.ndarray, y: np.ndarray, breakpoints: np.ndarray, n_jobs: int = 1
 ) -> np.ndarray:
     """Compute the DFT SFA pairwise distance between a set of SFA representations.
 
@@ -102,6 +104,10 @@ def mindist_dft_sfa_pairwise_distance(
         A collection of SFA instances  of shape ``(n_instances, n_timepoints)``.
     breakpoints: np.ndarray
         The breakpoints of the SAX transformation
+    n_jobs : int, default=1
+        The number of jobs to run in parallel. If -1, then the number of jobs is set
+        to the number of CPU cores. If 1, then the function is executed in a single
+        thread. If greater than 1, then the function is executed in parallel.
 
     Returns
     -------
@@ -136,7 +142,7 @@ def _dft_sfa_from_multiple_to_multiple_distance(
         distances = np.zeros((n_instances, n_instances))
 
         for i in prange(n_instances):
-            for j in prange(i + 1, n_instances):
+            for j in range(i + 1, n_instances):
                 distances[i, j] = _univariate_dft_sfa_distance(X[i], X[j], breakpoints)
                 distances[j, i] = distances[i, j]
     else:
@@ -145,7 +151,7 @@ def _dft_sfa_from_multiple_to_multiple_distance(
         distances = np.zeros((n_instances, m_instances))
 
         for i in prange(n_instances):
-            for j in prange(m_instances):
+            for j in range(m_instances):
                 distances[i, j] = _univariate_dft_sfa_distance(X[i], y[j], breakpoints)
 
     return distances
diff --git a/aeon/distances/mindist/_paa_sax.py b/aeon/distances/mindist/_paa_sax.py
index a53a8b35aa..2049c45fac 100644
--- a/aeon/distances/mindist/_paa_sax.py
+++ b/aeon/distances/mindist/_paa_sax.py
@@ -4,6 +4,7 @@
 from numba import njit, prange
 
 from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list
+from aeon.utils.numba._threading import threaded
 from aeon.utils.validation.collection import _is_numpy_list_multivariate
 
 
@@ -90,8 +91,13 @@ def _univariate_paa_sax_distance(
     return np.sqrt(dist)
 
 
+@threaded
 def mindist_paa_sax_pairwise_distance(
-    X: np.ndarray, y: np.ndarray, breakpoints: np.ndarray, n: int
+    X: np.ndarray,
+    y: np.ndarray,
+    breakpoints: np.ndarray,
+    n: int,
+    n_jobs: int = 1,
 ) -> np.ndarray:
     """Compute the PAA SAX pairwise distance between a set of SAX representations.
 
@@ -105,6 +111,10 @@ def mindist_paa_sax_pairwise_distance(
         The breakpoints of the SAX transformation
     n : int
         The original size of the time series
+    n_jobs : int, default=1
+        The number of jobs to run in parallel. If -1, then the number of jobs is set
+        to the number of CPU cores. If 1, then the function is executed in a single
+        thread. If greater than 1, then the function is executed in parallel.
 
     Returns
     -------
@@ -139,7 +149,7 @@ def _paa_sax_from_multiple_to_multiple_distance(
         distances = np.zeros((n_instances, n_instances))
 
         for i in prange(n_instances):
-            for j in prange(i + 1, n_instances):
+            for j in range(i + 1, n_instances):
                 distances[i, j] = _univariate_paa_sax_distance(
                     X[i], X[j], breakpoints, n
                 )
@@ -150,7 +160,7 @@ def _paa_sax_from_multiple_to_multiple_distance(
         distances = np.zeros((n_instances, m_instances))
 
         for i in prange(n_instances):
-            for j in prange(m_instances):
+            for j in range(m_instances):
                 distances[i, j] = _univariate_paa_sax_distance(
                     X[i], y[j], breakpoints, n
                 )
diff --git a/aeon/distances/mindist/_sax.py b/aeon/distances/mindist/_sax.py
index cdecfb2ebc..fbfe9eca36 100644
--- a/aeon/distances/mindist/_sax.py
+++ b/aeon/distances/mindist/_sax.py
@@ -6,6 +6,7 @@
 from numba import njit, prange
 
 from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list
+from aeon.utils.numba._threading import threaded
 from aeon.utils.validation.collection import _is_numpy_list_multivariate
 
 
@@ -84,8 +85,13 @@ def _univariate_sax_distance(
     return np.sqrt(dist)
 
 
+@threaded
 def mindist_sax_pairwise_distance(
-    X: np.ndarray, y: np.ndarray, breakpoints: np.ndarray, n: int
+    X: np.ndarray,
+    y: np.ndarray,
+    breakpoints: np.ndarray,
+    n: int,
+    n_jobs: int = 1,
 ) -> np.ndarray:
     """Compute the SAX pairwise distance between a set of SAX representations.
 
@@ -99,6 +105,10 @@ def mindist_sax_pairwise_distance(
         The breakpoints of the SAX transformation
     n : int
         The original size of the time series
+    n_jobs : int, default=1
+        The number of jobs to run in parallel. If -1, then the number of jobs is set
+        to the number of CPU cores. If 1, then the function is executed in a single
+        thread. If greater than 1, then the function is executed in parallel.
 
     Returns
     -------
@@ -134,7 +144,7 @@ def _sax_from_multiple_to_multiple_distance(
         distances = np.zeros((n_instances, n_instances))
 
         for i in prange(n_instances):
-            for j in prange(i + 1, n_instances):
+            for j in range(i + 1, n_instances):
                 distances[i, j] = _univariate_sax_distance(X[i], X[j], breakpoints, n)
                 distances[j, i] = distances[i, j]
     else:
@@ -143,7 +153,7 @@ def _sax_from_multiple_to_multiple_distance(
         distances = np.zeros((n_instances, m_instances))
 
         for i in prange(n_instances):
-            for j in prange(m_instances):
+            for j in range(m_instances):
                 distances[i, j] = _univariate_sax_distance(X[i], y[j], breakpoints, n)
 
     return distances
diff --git a/aeon/distances/mindist/_sfa.py b/aeon/distances/mindist/_sfa.py
index de348e6ea2..4c67d05ecb 100644
--- a/aeon/distances/mindist/_sfa.py
+++ b/aeon/distances/mindist/_sfa.py
@@ -6,6 +6,7 @@
 from numba import njit, prange
 
 from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list
+from aeon.utils.numba._threading import threaded
 from aeon.utils.validation.collection import _is_numpy_list_multivariate
 
 
@@ -80,8 +81,9 @@ def _univariate_sfa_distance(
     return np.sqrt(2 * dist)
 
 
+@threaded
 def mindist_sfa_pairwise_distance(
-    X: np.ndarray, y: np.ndarray, breakpoints: np.ndarray
+    X: np.ndarray, y: np.ndarray, breakpoints: np.ndarray, n_jobs: int = 1
 ) -> np.ndarray:
     """Compute the SFA mindist pairwise distance between a set of SFA representations.
 
@@ -93,6 +95,10 @@ def mindist_sfa_pairwise_distance(
         A collection of SFA instances  of shape ``(n_instances, n_timepoints)``.
     breakpoints: np.ndarray
         The breakpoints of the SAX transformation
+    n_jobs : int, default=1
+        The number of jobs to run in parallel. If -1, then the number of jobs is set
+        to the number of CPU cores. If 1, then the function is executed in a single
+        thread. If greater than 1, then the function is executed in parallel.
 
     Returns
     -------
@@ -128,7 +134,7 @@ def _sfa_from_multiple_to_multiple_distance(
         distances = np.zeros((n_instances, n_instances))
 
         for i in prange(n_instances):
-            for j in prange(i + 1, n_instances):
+            for j in range(i + 1, n_instances):
                 distances[i, j] = _univariate_sfa_distance(X[i], X[j], breakpoints)
                 distances[j, i] = distances[i, j]
     else:
@@ -137,7 +143,7 @@ def _sfa_from_multiple_to_multiple_distance(
         distances = np.zeros((n_instances, m_instances))
 
         for i in prange(n_instances):
-            for j in prange(m_instances):
+            for j in range(m_instances):
                 distances[i, j] = _univariate_sfa_distance(X[i], y[j], breakpoints)
 
     return distances
diff --git a/aeon/distances/pointwise/_euclidean.py b/aeon/distances/pointwise/_euclidean.py
index f7f0a640d4..58696a4b57 100644
--- a/aeon/distances/pointwise/_euclidean.py
+++ b/aeon/distances/pointwise/_euclidean.py
@@ -3,7 +3,7 @@
 from typing import Optional, Union
 
 import numpy as np
-from numba import njit
+from numba import njit, prange
 from numba.typed import List as NumbaList
 
 from aeon.distances.pointwise._squared import (
@@ -11,6 +11,7 @@
     squared_distance,
 )
 from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list
+from aeon.utils.numba._threading import threaded
 from aeon.utils.validation.collection import _is_numpy_list_multivariate
 
 
@@ -69,9 +70,11 @@ def _univariate_euclidean_distance(x: np.ndarray, y: np.ndarray) -> float:
     return np.sqrt(_univariate_squared_distance(x, y))
 
 
+@threaded
 def euclidean_pairwise_distance(
     X: Union[np.ndarray, list[np.ndarray]],
     y: Optional[Union[np.ndarray, list[np.ndarray]]] = None,
+    n_jobs: int = 1,
 ) -> np.ndarray:
     """Compute the Euclidean pairwise distance between a set of time series.
 
@@ -85,6 +88,13 @@ def euclidean_pairwise_distance(
         ``(m_cases, m_timepoints)`` or ``(m_cases, m_channels, m_timepoints)``.
         If None, then the euclidean pairwise distance between the instances of X is
         calculated.
+    n_jobs : int, default=1
+        The number of jobs to run in parallel. If -1, then the number of jobs is set
+        to the number of CPU cores. If 1, then the function is executed in a single
+        thread. If greater than 1, then the function is executed in parallel.
+
+        NOTE: For this distance function unless your data has a large number of time
+        points, it is recommended to use n_jobs=1.
 
     Returns
     -------
@@ -138,12 +148,12 @@ def euclidean_pairwise_distance(
     return _euclidean_from_multiple_to_multiple_distance(_X, _y)
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _euclidean_pairwise_distance(X: NumbaList[np.ndarray]) -> np.ndarray:
     n_cases = len(X)
     distances = np.zeros((n_cases, n_cases))
 
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(i + 1, n_cases):
             distances[i, j] = euclidean_distance(X[i], X[j])
             distances[j, i] = distances[i, j]
@@ -151,7 +161,7 @@ def _euclidean_pairwise_distance(X: NumbaList[np.ndarray]) -> np.ndarray:
     return distances
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _euclidean_from_multiple_to_multiple_distance(
     x: NumbaList[np.ndarray], y: NumbaList[np.ndarray]
 ) -> np.ndarray:
@@ -159,7 +169,7 @@ def _euclidean_from_multiple_to_multiple_distance(
     m_cases = len(y)
     distances = np.zeros((n_cases, m_cases))
 
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(m_cases):
             distances[i, j] = euclidean_distance(x[i], y[j])
     return distances
diff --git a/aeon/distances/pointwise/_manhattan.py b/aeon/distances/pointwise/_manhattan.py
index 5c4a80e7a2..1f5febdaf7 100644
--- a/aeon/distances/pointwise/_manhattan.py
+++ b/aeon/distances/pointwise/_manhattan.py
@@ -3,10 +3,11 @@
 from typing import Optional, Union
 
 import numpy as np
-from numba import njit
+from numba import njit, prange
 from numba.typed import List as NumbaList
 
 from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list
+from aeon.utils.numba._threading import threaded
 from aeon.utils.validation.collection import _is_numpy_list_multivariate
 
 
@@ -74,9 +75,11 @@ def _univariate_manhattan_distance(x: np.ndarray, y: np.ndarray) -> float:
     return distance
 
 
+@threaded
 def manhattan_pairwise_distance(
     X: Union[np.ndarray, list[np.ndarray]],
     y: Optional[Union[np.ndarray, list[np.ndarray]]] = None,
+    n_jobs: int = 1,
 ) -> np.ndarray:
     """Compute the manhattan pairwise distance between a set of time series.
 
@@ -90,6 +93,13 @@ def manhattan_pairwise_distance(
         ``(m_cases, m_timepoints)`` or ``(m_cases, m_channels, m_timepoints)``.
         If None, then the manhattan pairwise distance between the instances of X is
         calculated.
+    n_jobs : int, default=1
+        The number of jobs to run in parallel. If -1, then the number of jobs is set
+        to the number of CPU cores. If 1, then the function is executed in a single
+        thread. If greater than 1, then the function is executed in parallel.
+
+        NOTE: For this distance function unless your data has a large number of time
+        points, it is recommended to use n_jobs=1.
 
     Returns
     -------
@@ -142,12 +152,12 @@ def manhattan_pairwise_distance(
     return _manhattan_from_multiple_to_multiple_distance(_X, _y)
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _manhattan_pairwise_distance(X: NumbaList[np.ndarray]) -> np.ndarray:
     n_cases = len(X)
     distances = np.zeros((n_cases, n_cases))
 
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(i + 1, n_cases):
             distances[i, j] = manhattan_distance(X[i], X[j])
             distances[j, i] = distances[i, j]
@@ -155,7 +165,7 @@ def _manhattan_pairwise_distance(X: NumbaList[np.ndarray]) -> np.ndarray:
     return distances
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _manhattan_from_multiple_to_multiple_distance(
     x: NumbaList[np.ndarray], y: NumbaList[np.ndarray]
 ) -> np.ndarray:
@@ -163,7 +173,7 @@ def _manhattan_from_multiple_to_multiple_distance(
     m_cases = len(y)
     distances = np.zeros((n_cases, m_cases))
 
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(m_cases):
             distances[i, j] = manhattan_distance(x[i], y[j])
     return distances
diff --git a/aeon/distances/pointwise/_minkowski.py b/aeon/distances/pointwise/_minkowski.py
index d25b504403..224ab71f4a 100644
--- a/aeon/distances/pointwise/_minkowski.py
+++ b/aeon/distances/pointwise/_minkowski.py
@@ -3,10 +3,11 @@
 from typing import Optional, Union
 
 import numpy as np
-from numba import njit
+from numba import njit, prange
 from numba.typed import List as NumbaList
 
 from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list
+from aeon.utils.numba._threading import threaded
 from aeon.utils.validation.collection import _is_numpy_list_multivariate
 
 
@@ -126,11 +127,13 @@ def _multivariate_minkowski_distance(
     return dist ** (1.0 / p)
 
 
+@threaded
 def minkowski_pairwise_distance(
     X: Union[np.ndarray, list[np.ndarray]],
     y: Optional[Union[np.ndarray, list[np.ndarray]]] = None,
     p: float = 2.0,
     w: Optional[np.ndarray] = None,
+    n_jobs: int = 1,
 ) -> np.ndarray:
     """Compute the Minkowski pairwise distance between a set of time series.
 
@@ -150,6 +153,10 @@ def minkowski_pairwise_distance(
     w : np.ndarray, default=None
         An array of weights, applied to each pairwise calculation.
         The weights should match the shape of the time series in X and y.
+    n_jobs : int, default=1
+        The number of jobs to run in parallel. If -1, then the number of jobs is set
+        to the number of CPU cores. If 1, then the function is executed in a single
+        thread. If greater than 1, then the function is executed in parallel.
 
     Returns
     -------
@@ -211,14 +218,14 @@ def minkowski_pairwise_distance(
     return _minkowski_from_multiple_to_multiple_distance(_X, _y, p, w)
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _minkowski_pairwise_distance(
     X: NumbaList[np.ndarray], p: float, w: Optional[np.ndarray] = None
 ) -> np.ndarray:
     n_cases = len(X)
     distances = np.zeros((n_cases, n_cases))
 
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(i + 1, n_cases):
             if w is None:
                 distances[i, j] = minkowski_distance(X[i], X[j], p)
@@ -232,7 +239,7 @@ def _minkowski_pairwise_distance(
     return distances
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _minkowski_from_multiple_to_multiple_distance(
     x: NumbaList[np.ndarray],
     y: NumbaList[np.ndarray],
@@ -243,7 +250,7 @@ def _minkowski_from_multiple_to_multiple_distance(
     m_cases = len(y)
     distances = np.zeros((n_cases, m_cases))
 
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(m_cases):
             if w is None:
                 distances[i, j] = minkowski_distance(x[i], y[j], p)
diff --git a/aeon/distances/pointwise/_squared.py b/aeon/distances/pointwise/_squared.py
index 045466ef51..273cb1239e 100644
--- a/aeon/distances/pointwise/_squared.py
+++ b/aeon/distances/pointwise/_squared.py
@@ -3,10 +3,11 @@
 from typing import Optional, Union
 
 import numpy as np
-from numba import njit
+from numba import njit, prange
 from numba.typed import List as NumbaList
 
 from aeon.utils.conversion._convert_collection import _convert_collection_to_numba_list
+from aeon.utils.numba._threading import threaded
 from aeon.utils.validation.collection import _is_numpy_list_multivariate
 
 
@@ -73,9 +74,11 @@ def _univariate_squared_distance(x: np.ndarray, y: np.ndarray) -> float:
     return distance
 
 
+@threaded
 def squared_pairwise_distance(
     X: Union[np.ndarray, list[np.ndarray]],
     y: Optional[Union[np.ndarray, list[np.ndarray]]] = None,
+    n_jobs: int = 1,
 ) -> np.ndarray:
     """Compute the squared pairwise distance between a set of time series.
 
@@ -89,6 +92,13 @@ def squared_pairwise_distance(
         ``(m_cases, m_timepoints)`` or ``(m_cases, m_channels, m_timepoints)``.
         If None, then the squared pairwise distance between the instances of X is
         calculated.
+    n_jobs : int, default=1
+        The number of jobs to run in parallel. If -1, then the number of jobs is set
+        to the number of CPU cores. If 1, then the function is executed in a single
+        thread. If greater than 1, then the function is executed in parallel.
+
+        NOTE: For this distance function unless your data has a large number of time
+        points, it is recommended to use n_jobs=1.
 
     Returns
     -------
@@ -143,12 +153,12 @@ def squared_pairwise_distance(
     return _squared_from_multiple_to_multiple_distance(_X, _y)
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _squared_pairwise_distance(X: NumbaList[np.ndarray]) -> np.ndarray:
     n_cases = len(X)
     distances = np.zeros((n_cases, n_cases))
 
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(i + 1, n_cases):
             distances[i, j] = squared_distance(X[i], X[j])
             distances[j, i] = distances[i, j]
@@ -156,7 +166,7 @@ def _squared_pairwise_distance(X: NumbaList[np.ndarray]) -> np.ndarray:
     return distances
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def _squared_from_multiple_to_multiple_distance(
     x: NumbaList[np.ndarray], y: NumbaList[np.ndarray]
 ) -> np.ndarray:
@@ -164,7 +174,7 @@ def _squared_from_multiple_to_multiple_distance(
     m_cases = len(y)
     distances = np.zeros((n_cases, m_cases))
 
-    for i in range(n_cases):
+    for i in prange(n_cases):
         for j in range(m_cases):
             distances[i, j] = squared_distance(x[i], y[j])
     return distances
diff --git a/aeon/distances/tests/test_pairwise.py b/aeon/distances/tests/test_pairwise.py
index 8f20670de9..9037186f67 100644
--- a/aeon/distances/tests/test_pairwise.py
+++ b/aeon/distances/tests/test_pairwise.py
@@ -20,6 +20,7 @@
     make_example_3d_numpy,
     make_example_3d_numpy_list,
 )
+from aeon.testing.testing_config import MULTITHREAD_TESTING
 
 
 def _make_3d_series(x: np.ndarray) -> np.ndarray:
@@ -562,3 +563,21 @@ def test_pairwise_distance_non_negative(dist, seed):
     pairwise = dist["pairwise_distance"]
     Xt2 = pairwise(X2, X)
     assert Xt2.min() >= 0, f"Distance {dist['name']} is negative"
+
+
+@pytest.mark.skipif(not MULTITHREAD_TESTING, reason="Only run on multithread testing")
+@pytest.mark.parametrize("dist", DISTANCES)
+@pytest.mark.parametrize("n_jobs", [2, -1])
+def test_pairwise_distance_n_jobs_equals_serial(dist, n_jobs):
+    """Ensure parallel n_jobs yields same result as serial (n_jobs=1)."""
+    if dist["name"] in MIN_DISTANCES or dist["name"] in MP_DISTANCES:
+        return
+    X1 = make_example_2d_numpy_collection(5, 5, random_state=1, return_y=False)
+    X2 = make_example_3d_numpy(5, 3, 7, random_state=2, return_y=False)
+
+    for X in (X1, X2):
+        serial = dist["pairwise_distance"](X, n_jobs=1)
+        parallel = dist["pairwise_distance"](X, n_jobs=n_jobs)
+        assert isinstance(parallel, np.ndarray)
+        assert serial.shape == parallel.shape
+        assert_almost_equal(serial, parallel)

From a91611d84a66766959b0ae7ba3c137c756c0d0ad Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Thu, 7 Aug 2025 21:24:02 +0100
Subject: [PATCH 5/9] tests

---
 .../forecasting/{stats => machine_learning}/tests/test_setar.py | 2 +-
 aeon/forecasting/stats/__init__.py                              | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)
 rename aeon/forecasting/{stats => machine_learning}/tests/test_setar.py (98%)

diff --git a/aeon/forecasting/stats/tests/test_setar.py b/aeon/forecasting/machine_learning/tests/test_setar.py
similarity index 98%
rename from aeon/forecasting/stats/tests/test_setar.py
rename to aeon/forecasting/machine_learning/tests/test_setar.py
index 1cd9b9713b..01ae18a7ea 100644
--- a/aeon/forecasting/stats/tests/test_setar.py
+++ b/aeon/forecasting/machine_learning/tests/test_setar.py
@@ -3,7 +3,7 @@
 import numpy as np
 import pytest
 
-from aeon.forecasting.stats._setar import SETAR
+from aeon.forecasting.machine_learning._setar import SETAR
 
 
 def test_constant_series_scalar_and_close():
diff --git a/aeon/forecasting/stats/__init__.py b/aeon/forecasting/stats/__init__.py
index 985bc875bd..647b50555e 100644
--- a/aeon/forecasting/stats/__init__.py
+++ b/aeon/forecasting/stats/__init__.py
@@ -3,13 +3,11 @@
 __all__ = [
     "ARIMA",
     "ETS",
-    "SETAR",
     "TVP",
     "TAR",
 ]
 
 from aeon.forecasting.stats._arima import ARIMA
 from aeon.forecasting.stats._ets import ETS
-from aeon.forecasting.stats._setar import SETAR
 from aeon.forecasting.stats._tar import TAR
 from aeon.forecasting.stats._tvp import TVP

From ec9f36d83e07ff3491af3b39b29c195512268ac5 Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Fri, 8 Aug 2025 10:07:02 +0100
Subject: [PATCH 6/9] notebook

---
 docs/api_reference/forecasting.rst    |  3 ++-
 examples/forecasting/regression.ipynb | 21 +++++++++++++++------
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/docs/api_reference/forecasting.rst b/docs/api_reference/forecasting.rst
index c9e967b682..634c67ea13 100644
--- a/docs/api_reference/forecasting.rst
+++ b/docs/api_reference/forecasting.rst
@@ -35,4 +35,5 @@ Statistical Models
 
     ARIMA
     ETS
-    TVPForecaster
+    TVP
+    TAR
diff --git a/examples/forecasting/regression.ipynb b/examples/forecasting/regression.ipynb
index 35e3b847ba..c885458d6d 100644
--- a/examples/forecasting/regression.ipynb
+++ b/examples/forecasting/regression.ipynb
@@ -344,7 +344,14 @@
    "source": [
     "##     Time-Varying Parameter (TVP) Forecaster\n",
     "\n",
-    "The `aeon` RegressionForecaster defaults to fits a standard linear regression on the windowed series using the sklearn `LinearRegression` estimator. This fits the linear regression parameters with the standard least squares algorithm. An alternative approach in the forecasting literature is to fit the linear regression parameters adaptively, so that more recent observations can have a greater weight in setting the parameters. One way of doing this is to use a Kalman filter. This considers the correlation between parameters in addition to the forecasting error. This is implemented in `aeon` as the `TVPForecaster`.\n"
+    "The `aeon` RegressionForecaster defaults to fits a standard linear regression on the \n",
+    "windowed series using the sklearn `LinearRegression` estimator. This fits the linear \n",
+    "regression parameters with the standard least squares algorithm. An alternative \n",
+    "approach in the forecasting literature is to fit the linear regression parameters \n",
+    "adaptively, so that more recent observations can have a greater weight in setting the\n",
+    " parameters. One way of doing this is to use a Kalman filter. This considers the \n",
+    " correlation between parameters in addition to the forecasting error. This is \n",
+    " implemented in `aeon` as the `TVP` forecaster.\n"
    ],
    "id": "2850a18027127db0"
   },
@@ -357,9 +364,9 @@
    },
    "cell_type": "code",
    "source": [
-    "from aeon.forecasting.stats import TVPForecaster\n",
+    "from aeon.forecasting.stats import TVP\n",
     "\n",
-    "tvp = TVPForecaster(window=50)\n",
+    "tvp = TVP(window=50)\n",
     "tvp.fit(y_train)\n",
     "pred = tvp.predict(y_train)"
    ],
@@ -438,9 +445,11 @@
    "source": [
     "TVP has two parameters that control the weight given to more recent observations. These are `var` and `coeff_var`. `var` represents the variation in the data. A small value of `var`, such as the default of 0.01 indicates there is less noise in the data and recent values will have a greater effect on parameter updates. It is a bit like the learning rate used in gradient descent or reinforcement learning. A large `var` value (e.g. greater than 1.0) will mean new values will adjust the parameters less at each update.\n",
     "\n",
-    "The second parameter, `beta_var`, estimates the variation in parameters over time. It has a similar effect on updates to `var` in that it effects how much parameters are changed at each stage, but it is modelling a different type on uncertainty. `TVPRegression` maintains an estimate of the covariance of the parameters in `fit` and this is adjusted after each update and then `beta_var` is added to perturb the matrix. Small `var` means the Kalman weights.\n",
+    "The second parameter, `beta_var`, estimates the variation in parameters over time. It\n",
+    " has a similar effect on updates to `var` in that it effects how much parameters are \n",
+    " changed at each stage, but it is modelling a different type on uncertainty. `TVP` regression maintains an estimate of the covariance of the parameters in `fit` and this is adjusted after each update and then `beta_var` is added to perturb the matrix.\n",
     "\n",
-    "Add some examples to demonstrate the difference\n"
+    "\n"
    ],
    "id": "ca7da34828be1c2"
   },
@@ -453,7 +462,7 @@
    },
    "cell_type": "code",
    "source": [
-    "tvp2 = TVPForecaster(var=1.0, beta_var=1.0, window=50)\n",
+    "tvp2 = TVP(var=1.0, beta_var=1.0, window=50)\n",
     "direct2 = tvp.direct_forecast(airline, prediction_horizon=12)\n",
     "plt.plot(\n",
     "    np.arange(0, len(direct)),\n",

From 986dead012969f14ca68327b5095656d5ad61cef Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Thu, 14 Aug 2025 19:14:25 +0100
Subject: [PATCH 7/9] tar and autotar

---
 aeon/forecasting/stats/__init__.py       |   2 +
 aeon/forecasting/stats/_auto_tar.py      | 531 +++++++++++++++++++++++
 aeon/forecasting/stats/_tar.py           | 410 ++++++++++++-----
 aeon/forecasting/stats/tests/test_tar.py | 359 +++++++++++++--
 4 files changed, 1145 insertions(+), 157 deletions(-)
 create mode 100644 aeon/forecasting/stats/_auto_tar.py

diff --git a/aeon/forecasting/stats/__init__.py b/aeon/forecasting/stats/__init__.py
index 647b50555e..eef07a2f73 100644
--- a/aeon/forecasting/stats/__init__.py
+++ b/aeon/forecasting/stats/__init__.py
@@ -5,9 +5,11 @@
     "ETS",
     "TVP",
     "TAR",
+    "AutoTAR",
 ]
 
 from aeon.forecasting.stats._arima import ARIMA
+from aeon.forecasting.stats._auto_tar import AutoTAR
 from aeon.forecasting.stats._ets import ETS
 from aeon.forecasting.stats._tar import TAR
 from aeon.forecasting.stats._tvp import TVP
diff --git a/aeon/forecasting/stats/_auto_tar.py b/aeon/forecasting/stats/_auto_tar.py
new file mode 100644
index 0000000000..3f9c1360fa
--- /dev/null
+++ b/aeon/forecasting/stats/_auto_tar.py
@@ -0,0 +1,531 @@
+from __future__ import annotations
+
+from collections.abc import Iterable
+
+import numpy as np
+from numba import njit
+
+from aeon.forecasting.base import BaseForecaster, IterativeForecastingMixin
+
+# Import all shared helpers from TAR (adjust import path if not packaged together)
+# If these files live side-by-side, `from _tar import ...` also works.
+from aeon.forecasting.stats._tar import (
+    _aic_value,
+    _numba_predict,
+    _ols_fit_with_rss,
+    _prepare_design,
+    _subset_rows_cols,
+    _subset_target,
+)
+
+
+class AutoTAR(BaseForecaster, IterativeForecastingMixin):
+    r"""Threshold Autoregressive (AutoTAR) forecaster with fast threshold search.
+
+    AutoTAR [1] assumes two regimes: when the threshold variable :math:`z_t = y_{t-d}`
+    is **at or below** a threshold :math:`r` (below/left regime) or **above** it
+    (above/right regime). The model fits separate AR(p) processes to each regime
+    by OLS. If not fixed by the user, the AR orders, delay, and threshold are
+    chosen by grid search with model selection by **AIC**. By convention, ties
+    are split as: below/left if :math:`z_t \le r`, above/right if :math:`z_t > r`.
+
+    Parameters
+    ----------
+    threshold : float | None, default=None
+        Fixed threshold :math:`r`. If ``None``, search over trimmed quantile candidates.
+    delay : int | None, default=None
+        Threshold delay :math:`d`. If ``None``, search over ``1..max_delay``.
+    ar_order : int | tuple[int, int] | None, default=None
+        If ``int``, same order in both regimes. If tuple, fixed ``(p_below, p_above)``.
+        If ``None``, search both in ``0..max_order``.
+    max_order : int, default=3
+        Upper bound for per-regime order search when ``ar_order`` is ``None``.
+    max_delay : int, default=3
+        Upper bound for delay search when ``delay`` is ``None``.
+    threshold_trim : float in [0, 0.5), default=0.15
+        Fraction trimmed from each tail of the threshold variable distribution.
+    min_regime_frac : float in (0, 0.5], default=0.10
+        Minimum fraction of rows required in each regime to accept a split.
+    min_points_offset : int, default=0
+        Additional absolute minimum observations required in each regime.
+    max_threshold_candidates : int | None, default=100
+        Maximum number of quantile-like candidates per (p_below,p_above,d).
+        If ``None``, uses all indices inside the trimmed region.
+
+    Attributes
+    ----------
+    threshold_ : float
+        Selected/used threshold.
+    delay_ : int
+        Selected/used delay.
+    p_below_ : int
+        AR order in the below/left regime.
+    p_above_ : int
+        AR order in the above/right regime.
+    intercept_below_, coef_below_ : float, np.ndarray
+        OLS parameters for the below/left regime.
+    intercept_above_, coef_above_ : float, np.ndarray
+        OLS parameters for the above/right regime.
+    params_ : dict
+        Snapshot of selected parameters and AIC.
+    forecast_ : float
+        One-step-ahead forecast from end of training.
+
+    References
+    ----------
+    .. [1] Tong, H., & Lim, K. S. (1980).
+       Threshold autoregression, limit cycles and cyclical data.
+       *Journal of the Royal Statistical Society: Series B*, 42(3), 245–292.
+    """
+
+    def __init__(
+        self,
+        threshold: float | None = None,
+        delay: int | None = None,
+        ar_order: int | tuple[int, int] | None = None,
+        *,
+        max_order: int = 3,
+        max_delay: int = 3,
+        threshold_trim: float = 0.15,
+        min_regime_frac: float = 0.10,
+        min_points_offset: int = 0,
+        max_threshold_candidates: int | None = 100,
+    ) -> None:
+        self.threshold = threshold
+        self.delay = delay
+        self.ar_order = ar_order
+        self.max_order = int(max_order)
+        self.max_delay = int(max_delay)
+        self.threshold_trim = float(threshold_trim)
+        self.min_regime_frac = float(min_regime_frac)
+        self.min_points_offset = int(min_points_offset)
+        self.max_threshold_candidates = max_threshold_candidates
+        super().__init__(horizon=1, axis=1)
+
+    def _iter_orders(self) -> Iterable[tuple[int, int]]:
+        if isinstance(self.ar_order, tuple):
+            yield self.ar_order[0], self.ar_order[1]
+            return
+        if isinstance(self.ar_order, int):
+            p = self.ar_order
+            yield p, p
+            return
+        for pL in range(self.max_order + 1):
+            for pR in range(self.max_order + 1):
+                yield pL, pR
+
+    def _iter_delays(self) -> Iterable[int]:
+        if isinstance(self.delay, int):
+            yield self.delay
+        elif self.delay is None:
+            yield from range(1, self.max_delay + 1)
+        else:
+            raise ValueError("delay must be int or None")
+
+    def _fit(self, y: np.ndarray, exog: np.ndarray | None = None) -> AutoTAR:
+        self._validate_params()
+        y = np.ascontiguousarray(np.asarray(y, dtype=np.float64)).squeeze()
+        n = y.shape[0]
+
+        best_aic = np.inf
+        found = False
+        fixed_r = self.threshold
+
+        for pL, pR in self._iter_orders():
+            base_maxlag = max(pL, pR)
+            for d in self._iter_delays():
+                maxlag = max(base_maxlag, d)
+                if n <= maxlag + 1:
+                    continue
+
+                if fixed_r is None:
+                    cap = (
+                        -1
+                        if (self.max_threshold_candidates is None)
+                        else self.max_threshold_candidates
+                    )
+                    r, aic_val, iL, bL, iR, bR, _, _ = _numba_threshold_search(
+                        y,
+                        pL,
+                        pR,
+                        d,
+                        self.threshold_trim,
+                        self.min_regime_frac,
+                        self.min_points_offset,
+                        cap,
+                    )
+                else:
+                    # Evaluate a single fixed threshold with AIC.
+                    X_full, y_resp, z = _prepare_design(y, maxlag, d)
+                    thr = float(fixed_r)
+                    mask_R = z > thr
+                    nR = int(mask_R.sum())
+                    nL = y_resp.shape[0] - nR
+                    min_per = max(
+                        int(np.ceil(self.min_regime_frac * y_resp.shape[0])),
+                        self.min_points_offset,
+                        pL + 1,
+                        pR + 1,
+                    )
+                    if (nL < min_per) or (nR < min_per):
+                        continue
+                    XL = (
+                        _subset_rows_cols(X_full, mask_R, False, pL)
+                        if pL > 0
+                        else np.empty((nL, 0))
+                    )
+                    XR = (
+                        _subset_rows_cols(X_full, mask_R, True, pR)
+                        if pR > 0
+                        else np.empty((nR, 0))
+                    )
+                    yL = _subset_target(y_resp, mask_R, False)
+                    yR = _subset_target(y_resp, mask_R, True)
+                    iL, bL, rssL = _ols_fit_with_rss(XL, yL)
+                    iR, bR, rssR = _ols_fit_with_rss(XR, yR)
+                    rss = rssL + rssR
+                    kpars = (1 + pL) + (1 + pR)
+                    aic_val = _aic_value(rss, y_resp.shape[0], kpars)
+                    r = thr
+
+                if aic_val < best_aic:
+                    best_aic = aic_val
+                    self.p_below_ = pL
+                    self.p_above_ = pR
+                    self.delay_ = d
+                    self.threshold_ = r
+                    self.intercept_below_ = iL
+                    self.coef_below_ = bL
+                    self.intercept_above_ = iR
+                    self.coef_above_ = bR
+                    found = True
+
+        if not found:
+            raise RuntimeError(
+                "No valid AutoTAR fit found. Consider relaxing trims or widening the "
+                "search space."
+            )
+
+        self.forecast_ = _numba_predict(
+            y,
+            self.delay_,
+            self.threshold_,
+            self.intercept_below_,
+            self.coef_below_,
+            self.p_below_,
+            self.intercept_above_,
+            self.coef_above_,
+            self.p_above_,
+        )
+
+        self.params_ = {
+            "threshold": self.threshold_,
+            "delay": self.delay_,
+            "regime_below": {
+                "order": self.p_below_,
+                "intercept": self.intercept_below_,
+                "coef": self.coef_below_,
+            },
+            "regime_above": {
+                "order": self.p_above_,
+                "intercept": self.intercept_above_,
+                "coef": self.coef_above_,
+            },
+            "selection": {"criterion": "AIC", "value": best_aic},
+        }
+        return self
+
+    def _predict(self, y: np.ndarray, exog: np.ndarray | None = None) -> float:
+        y = np.ascontiguousarray(np.asarray(y, dtype=np.float64)).squeeze()
+        return _numba_predict(
+            y,
+            self.delay_,
+            self.threshold_,
+            self.intercept_below_,
+            self.coef_below_,
+            self.p_below_,
+            self.intercept_above_,
+            self.coef_above_,
+            self.p_above_,
+        )
+
+    def _validate_params(self) -> None:
+        """Validate constructor parameters for type and value ranges."""
+        if not isinstance(self.max_order, int) or self.max_order < 0:
+            raise TypeError("max_order must be an int >= 0")
+        if not isinstance(self.max_delay, int) or self.max_delay < 1:
+            raise TypeError("max_delay must be an int >= 1")
+
+        if self.ar_order is not None:
+            if isinstance(self.ar_order, int):
+                if self.ar_order < 0:
+                    raise ValueError("ar_order int must be >= 0")
+            elif isinstance(self.ar_order, tuple):
+                if len(self.ar_order) != 2:
+                    raise TypeError(
+                        "ar_order tuple must have length 2: (p_below, p_above)"
+                    )
+                pL, pR = self.ar_order
+                if not (isinstance(pL, int) and isinstance(pR, int)):
+                    raise TypeError("ar_order tuple entries must be ints")
+                if pL < 0 or pR < 0:
+                    raise ValueError("ar_order tuple entries must be >= 0")
+            else:
+                raise TypeError("ar_order must be int, (int,int), or None")
+
+        if self.delay is not None:
+            if not isinstance(self.delay, int) or self.delay < 1:
+                raise TypeError("delay must be an int >= 1 or None")
+
+        if self.threshold is not None:
+            if not isinstance(self.threshold, (int, float, np.floating)):
+                raise TypeError("threshold must be a real number or None")
+            if not np.isfinite(self.threshold):
+                raise ValueError("threshold must be finite")
+
+        if not (0.0 <= self.threshold_trim < 0.5):
+            raise ValueError("threshold_trim must be in [0, 0.5)")
+        if not (0.0 < self.min_regime_frac <= 0.5):
+            raise ValueError("min_regime_frac must be in (0, 0.5]")
+        if not isinstance(self.min_points_offset, int) or self.min_points_offset < 0:
+            raise TypeError("min_points_offset must be an int >= 0")
+        if self.max_threshold_candidates is not None:
+            if (
+                not isinstance(self.max_threshold_candidates, int)
+                or self.max_threshold_candidates < 1
+            ):
+                raise TypeError("max_threshold_candidates must be int >= 1 or None")
+
+
+# ============================ AutoTAR-specific kernel ============================
+
+
+@njit(cache=True, fastmath=True)
+def _numba_threshold_search(
+    y: np.ndarray,
+    p_below: int,
+    p_above: int,
+    d: int,
+    trim_frac: float,
+    min_frac: float,
+    min_offset: int,
+    max_cands: int,  # <= 0 → use all candidates in trimmed span
+) -> tuple[float, float, float, np.ndarray, float, np.ndarray, int, int]:
+    """Search threshold using quantile-capped candidates + prefix/suffix stats.
+
+    Below-regime means z_t <= r, above-regime means z_t > r.
+    """
+    maxlag = max(p_below, p_above, d)
+    X_full, y_resp, z = _prepare_design(y, maxlag, d)
+    rows = y_resp.shape[0]
+    if rows <= 0:
+        return (
+            0.0,
+            np.inf,
+            0.0,
+            np.empty(0),
+            0.0,
+            np.empty(0),
+            0,
+            0,
+        )
+
+    # Augmented per-row designs (intercept + lags for each regime)
+    X_below_aug = np.empty((rows, p_below + 1), dtype=np.float64)
+    X_above_aug = np.empty((rows, p_above + 1), dtype=np.float64)
+    for i in range(rows):
+        X_below_aug[i, 0] = 1.0
+        X_above_aug[i, 0] = 1.0
+        for c in range(p_below):
+            X_below_aug[i, c + 1] = X_full[i, c]
+        for c in range(p_above):
+            X_above_aug[i, c + 1] = X_full[i, c]
+
+    order = np.argsort(z)
+    z_sorted = z[order]
+    y_sorted = y_resp[order]
+    X_below_sorted = X_below_aug[order, :]
+    X_above_sorted = X_above_aug[order, :]
+
+    lower = int(np.floor(trim_frac * rows))
+    upper = rows - lower
+    if upper <= lower + 1:
+        # Fallback: evaluate median split with full OLS
+        r_med = np.median(z_sorted)
+        mask_above = z > r_med
+        n_above = int(mask_above.sum())
+        n_below = rows - n_above
+        if (n_below == 0) or (n_above == 0):
+            return (
+                r_med,
+                np.inf,
+                0.0,
+                np.empty(0),
+                0.0,
+                np.empty(0),
+                n_below,
+                n_above,
+            )
+
+        Xb = (
+            _subset_rows_cols(X_full, mask_above, False, p_below)
+            if p_below > 0
+            else np.empty((n_below, 0))
+        )
+        Xa = (
+            _subset_rows_cols(X_full, mask_above, True, p_above)
+            if p_above > 0
+            else np.empty((n_above, 0))
+        )
+        yb = _subset_target(y_resp, mask_above, False)
+        ya = _subset_target(y_resp, mask_above, True)
+
+        i_below, b_below, rss_below = _ols_fit_with_rss(Xb, yb)
+        i_above, b_above, rss_above = _ols_fit_with_rss(Xa, ya)
+
+        k = (1 + p_below) + (1 + p_above)
+        aic_val = _aic_value(rss_below + rss_above, rows, k)
+        return (
+            r_med,
+            aic_val,
+            i_below,
+            b_below,
+            i_above,
+            b_above,
+            n_below,
+            n_above,
+        )
+
+    min_per = max(
+        int(np.ceil(min_frac * rows)),
+        min_offset,
+        p_below + 1,
+        p_above + 1,
+    )
+
+    # Candidate indices inside trimmed span
+    span = upper - lower
+    m = span if (max_cands <= 0) else (max_cands if span > max_cands else span)
+    idx = np.empty(m, dtype=np.int64)
+    if m == 1:
+        idx[0] = (lower + upper - 1) // 2
+    else:
+        for j in range(m):
+            pos = lower + (j * (span - 1)) / (m - 1)
+            k = int(np.floor(pos + 0.5))
+            if k < lower:
+                k = lower
+            if k > (upper - 1):
+                k = upper - 1
+            idx[j] = k
+        # Deduplicate adjacent duplicates
+        w = 1
+        for j in range(1, m):
+            if idx[j] != idx[w - 1]:
+                idx[w] = idx[j]
+                w += 1
+        m = w
+
+    # Prefix/suffix sufficient statistics
+    Sxx_below = np.zeros((p_below + 1, p_below + 1), dtype=np.float64)
+    Sxy_below = np.zeros((p_below + 1,), dtype=np.float64)
+    Syy_below = 0.0
+    n_below = 0
+
+    Sxx_above = np.zeros((p_above + 1, p_above + 1), dtype=np.float64)
+    Sxy_above = np.zeros((p_above + 1,), dtype=np.float64)
+    Syy_above = 0.0
+    for i in range(rows):
+        x_above_row = X_above_sorted[i]
+        yi = y_sorted[i]
+        for r0 in range(p_above + 1):
+            v0 = x_above_row[r0]
+            for r1 in range(p_above + 1):
+                Sxx_above[r0, r1] += v0 * x_above_row[r1]
+        for r0 in range(p_above + 1):
+            Sxy_above[r0] += x_above_row[r0] * yi
+        Syy_above += yi * yi
+    n_above = rows
+
+    ridge = 1e-12
+    best_aic = np.inf
+    best_r = 0.0
+
+    best_i_below = 0.0
+    best_b_below = np.empty(0, dtype=np.float64)
+    best_i_above = 0.0
+    best_b_above = np.empty(0, dtype=np.float64)
+    best_n_below = 0
+    best_n_above = 0
+
+    prev = -1
+    for j in range(m):
+        cut = idx[j]
+        # Move rows (prev+1 .. cut) from ABOVE → BELOW
+        for t in range(prev + 1, cut + 1):
+            x_below_row = X_below_sorted[t]
+            x_above_row = X_above_sorted[t]
+            yi = y_sorted[t]
+
+            # Add to BELOW
+            for r0 in range(p_below + 1):
+                v0 = x_below_row[r0]
+                for r1 in range(p_below + 1):
+                    Sxx_below[r0, r1] += v0 * x_below_row[r1]
+            for r0 in range(p_below + 1):
+                Sxy_below[r0] += x_below_row[r0] * yi
+            Syy_below += yi * yi
+            n_below += 1
+
+            # Remove from ABOVE
+            for r0 in range(p_above + 1):
+                v0 = x_above_row[r0]
+                for r1 in range(p_above + 1):
+                    Sxx_above[r0, r1] -= v0 * x_above_row[r1]
+            for r0 in range(p_above + 1):
+                Sxy_above[r0] -= x_above_row[r0] * yi
+            Syy_above -= yi * yi
+            n_above -= 1
+
+        prev = cut
+
+        if (n_below < min_per) or (n_above < min_per):
+            continue
+
+        # Solve (copies avoid accumulating ridge across candidates)
+        S_below = Sxx_below.copy()
+        for r0 in range(p_below + 1):
+            S_below[r0, r0] += ridge
+        beta_below = np.linalg.solve(S_below, Sxy_below)
+
+        S_above = Sxx_above.copy()
+        for r0 in range(p_above + 1):
+            S_above[r0, r0] += ridge
+        beta_above = np.linalg.solve(S_above, Sxy_above)
+
+        rss_below = Syy_below - np.dot(beta_below, Sxy_below)
+        rss_above = Syy_above - np.dot(beta_above, Sxy_above)
+        rss = rss_below + rss_above
+        kpars = (1 + p_below) + (1 + p_above)
+        aic_val = _aic_value(rss, rows, kpars)
+
+        if aic_val < best_aic:
+            best_aic = aic_val
+            best_r = z_sorted[cut]
+            best_i_below = beta_below[0]
+            best_b_below = beta_below[1:].copy()
+            best_i_above = beta_above[0]
+            best_b_above = beta_above[1:].copy()
+            best_n_below = n_below
+            best_n_above = n_above
+
+    return (
+        best_r,
+        best_aic,
+        best_i_below,
+        best_b_below,
+        best_i_above,
+        best_b_above,
+        best_n_below,
+        best_n_above,
+    )
diff --git a/aeon/forecasting/stats/_tar.py b/aeon/forecasting/stats/_tar.py
index c14aa7e928..caf5976e4a 100644
--- a/aeon/forecasting/stats/_tar.py
+++ b/aeon/forecasting/stats/_tar.py
@@ -1,151 +1,341 @@
+from __future__ import annotations
+
 import numpy as np
 from numba import njit
 
 from aeon.forecasting.base import BaseForecaster, IterativeForecastingMixin
 
 
-@njit(cache=True, fastmath=True)
-def _make_lag_matrix(y: np.ndarray, maxlag: int) -> np.ndarray:
-    """Return lag matrix with columns [y_{t-1}, ..., y_{t-maxlag}] (trim='both')."""
-    n = y.shape[0]
-    rows = n - maxlag
-    out = np.empty((rows, maxlag), dtype=np.float64)
-
-    for i in range(rows):
-        for k in range(maxlag):
-            out[i, k] = y[maxlag + i - (k + 1)]
-    return out
-
-
-@njit(cache=True, fastmath=True)
-def _make_lags_regimes(y: np.ndarray, maxlag: int, delay: int, threshold: float):
-    """Create lag matrix and regime flags in one pass."""
-    n = y.shape[0]
-    rows = n - maxlag
-    X_lag = np.empty((rows, maxlag), dtype=np.float64)
-    regimes = np.empty(rows, dtype=np.bool_)
-
-    for i in range(rows):
-        for k in range(maxlag):
-            X_lag[i, k] = y[maxlag + i - (k + 1)]
-        regimes[i] = y[maxlag + i - delay] > threshold
-
-    return X_lag, regimes
-
-
-@njit(cache=True, fastmath=True)
-def _ols_fit(X: np.ndarray, y: np.ndarray):
-    """Fit OLS regression using normal equations."""
-    n_samples, n_features = X.shape
-    Xb = np.ones((n_samples, n_features + 1), dtype=np.float64)
-    Xb[:, 1:] = X
-
-    XtX = Xb.T @ Xb
-    Xty = Xb.T @ y
-    beta = np.linalg.solve(XtX, Xty)
-
-    intercept = beta[0]
-    coef = beta[1:]
-    return intercept, coef
-
-
-@njit(cache=True, fastmath=True)
-def _ols_predict(X: np.ndarray, intercept: float, coef: np.ndarray):
-    """Predict using intercept and coefficients."""
-    return intercept + X @ coef
-
-
 class TAR(BaseForecaster, IterativeForecastingMixin):
-    """Fast Threshold Autoregressive (TAR) [1] forecaster using Numba OLS.
+    r"""Threshold Autoregressive (TAR) forecaster with fixed parameters.
 
-    A TAR model fits two autoregressive models to different regimes,
-    depending on whether the value at lag `delay` is above or below
-    a threshold (default: mean of training series). Each regime has
-    its own set of AR coefficients estimated via OLS.
+    Two regimes split by a threshold :math:`r` on the variable :math:`z_t=y_{t-d}`:
+    observations with :math:`z_t \le r` follow the **below/left** AR model, and
+    those with :math:`z_t > r` follow the **above/right** AR model. Each regime is
+    fit by OLS. **No parameter optimisation/search** is performed.
 
-    This implementation uses Numba to efficiently compute the lag
-    matrix, assign regimes, and fit both models without sklearn. It does not yet set
-    the threshold or the AR parameters automatically based on the training data.
+    Defaults:
+    - ``delay=1``
+    - ``ar_order=(2, 2)`` (AR(2) in each regime)
+    - ``threshold=None`` → set to the **median** of the aligned threshold variable
+      computed on the training window.
 
     Parameters
     ----------
-    threshold : float or None, default=None
-        Threshold value for regime split. If None, set to mean of training data.
+    threshold : float | None, default=None
+        Fixed threshold :math:`r`. If ``None``, it is set in ``fit`` to the median
+        of :math:`z_t=y_{t-d}` over the aligned training rows.
     delay : int, default=1
-        Delay ``d`` for threshold variable ``y_{t-d}``.
-    ar_order : int, default=1
-        AR order ``p`` for both regimes.
+        Threshold delay :math:`d \ge 1` for :math:`z_t = y_{t-d}`.
+    ar_order : int | tuple[int, int], default=(2, 2)
+        If ``int``, use the same AR order in both regimes.
+        If tuple, use ``(p_below, p_above)`` for the two regimes.
 
     Attributes
     ----------
+    threshold_ : float
+        The threshold actually used (either provided or the computed median).
+    delay_ : int
+        The fixed delay actually used.
+    p_below_, p_above_ : int
+        AR orders used in the below/left and above/right regimes, respectively.
+    intercept_below_, coef_below_ : float, np.ndarray
+        OLS parameters for the below/left regime (:math:`z_t \le r`).
+    intercept_above_, coef_above_ : float, np.ndarray
+        OLS parameters for the above/right regime (:math:`z_t > r`).
     forecast_ : float
-        One-step-ahead forecast from end of training series.
+        One-step-ahead forecast from the end of training.
     params_ : dict
-        Dictionary containing fitted intercepts and AR coefficients for each regime.
+        Snapshot of configuration and a simple AIC diagnostic.
 
     References
     ----------
-    ..[1] Tong, H. (1978). "On a threshold model." In: Chen, C.H. (ed.)
-    Pattern Recognition and Signal Processing. Sijthoff & Noordhoff, pp. 575–586.
+    Tong, H., & Lim, K. S. (1980).
+    Threshold autoregression, limit cycles and cyclical data.
+    *JRSS-B*, 42(3), 245–292.
     """
 
-    def __init__(self, threshold=None, delay=1, ar_order=1):
+    def __init__(
+        self,
+        threshold: float | None = None,
+        delay: int = 1,
+        ar_order: int | tuple[int, int] = (2, 2),
+    ) -> None:
         self.threshold = threshold
         self.delay = delay
         self.ar_order = ar_order
         super().__init__(horizon=1, axis=1)
 
-    def _fit(self, y, X=None):
-        """Fit TAR model to training data."""
+    def _fit(self, y: np.ndarray, exog: np.ndarray | None = None) -> TAR:
+        self._validate_params()
         y = y.squeeze()
-        self.threshold_ = (
-            float(np.mean(y)) if self.threshold is None else float(self.threshold)
-        )
+        y = np.ascontiguousarray(np.asarray(y, dtype=np.float64))
+        n = y.shape[0]
 
-        # Use Numba lag matrix + regime builder
-        X_lag, regimes = _make_lags_regimes(
-            y, self.ar_order, self.delay, self.threshold_
-        )
-        y_resp = y[self.ar_order :]
+        # Resolve orders
+        if isinstance(self.ar_order, int):
+            p_below = p_above = int(self.ar_order)
+        else:
+            p_below = int(self.ar_order[0])
+            p_above = int(self.ar_order[1])
+
+        maxlag = max(p_below, p_above, self.delay)
+        if n <= maxlag:
+            raise RuntimeError(
+                f"Not enough observations (n={n}) for maxlag={maxlag}. "
+                "Provide more data or lower delay/order."
+            )
+
+        # Design matrices aligned to t = maxlag .. n-1
+        X_full = _make_lag_matrix(y, maxlag)  # shape (rows, maxlag)
+        y_resp = y[maxlag:]  # shape (rows,)
+        rows = y_resp.shape[0]
+
+        # Threshold variable z_t = y_{t-d}
+        base = maxlag - self.delay
+        z = y[base : base + rows]
+
+        # Default threshold to the median of z if not provided
+        if self.threshold is not None:
+            thr = self.threshold
+        else:
+            thr = np.median(z)
+
+        # Regime mask and sizes
+        mask_R = z > thr
+        nR = int(mask_R.sum())
+        nL = rows - nR
+
+        minL = p_below + 1
+        minR = p_above + 1
+        if nL < minL or nR < minR:
+            raise RuntimeError(
+                "Insufficient data per regime at the chosen threshold: "
+                f"below n={nL} (need ≥ {minL}), above n={nR} (need ≥ {minR}). "
+                "Consider providing a different threshold, delay, or orders."
+            )
+
+        # Per-regime designs
+        if p_below > 0:
+            XL = X_full[~mask_R, :p_below]
+        else:
+            XL = np.empty((nL, 0), dtype=np.float64)
+        if p_above > 0:
+            XR = X_full[mask_R, :p_above]
+        else:
+            XR = np.empty((nR, 0), dtype=np.float64)
+        yL = y_resp[~mask_R]
+        yR = y_resp[mask_R]
+
+        # OLS fits
+        iL, bL, rssL = _ols_fit_with_rss(XL, yL)
+        iR, bR, rssR = _ols_fit_with_rss(XR, yR)
 
-        # Fit OLS to each regime
-        self.intercept1_, self.coef1_ = _ols_fit(X_lag[~regimes], y_resp[~regimes])
-        self.intercept2_, self.coef2_ = _ols_fit(X_lag[regimes], y_resp[regimes])
+        # Persist learned params
+        self.threshold_ = thr
+        self.delay_ = int(self.delay)
+        self.p_below_ = p_below
+        self.p_above_ = p_above
+        self.intercept_below_ = float(iL)
+        self.coef_below_ = np.ascontiguousarray(bL, dtype=np.float64)
+        self.intercept_above_ = float(iR)
+        self.coef_above_ = np.ascontiguousarray(bR, dtype=np.float64)
 
-        # Store for inspection
+        # 1-step forecast
+        self.forecast_ = _numba_predict(
+            y,
+            self.delay_,
+            self.threshold_,
+            self.intercept_below_,
+            self.coef_below_,
+            self.p_below_,
+            self.intercept_above_,
+            self.coef_above_,
+            self.p_above_,
+        )
+
+        # Simple AIC diagnostic (sum RSS; k counts both regimes incl. intercepts)
+        rss = rssL + rssR
+        k = (1 + p_below) + (1 + p_above)
+        aic = _aic_value(rss, rows, k)
         self.params_ = {
             "threshold": self.threshold_,
-            "regime_1": {
-                "intercept": self.intercept1_,
-                "coefficients": self.coef1_.tolist(),
+            "delay": self.delay_,
+            "regime_below": {
+                "order": self.p_below_,
+                "intercept": self.intercept_below_,
+                "coef": self.coef_below_,
+                "n": int(nL),
             },
-            "regime_2": {
-                "intercept": self.intercept2_,
-                "coefficients": self.coef2_.tolist(),
+            "regime_above": {
+                "order": self.p_above_,
+                "intercept": self.intercept_above_,
+                "coef": self.coef_above_,
+                "n": int(nR),
             },
+            "selection": {"criterion": "AIC", "value": float(aic)},
         }
+        return self
 
-        # Store training data
-        self._y = list(y)
-
-        # Forecast from end of training
-        lagged_last = np.array(self._y[-self.ar_order :])[::-1].reshape(1, -1)
-        regime_last = self._y[-self.delay] > self.threshold_
-        self.forecast_ = (
-            _ols_predict(lagged_last, self.intercept2_, self.coef2_)[0]
-            if regime_last
-            else _ols_predict(lagged_last, self.intercept1_, self.coef1_)[0]
+    def _predict(self, y: np.ndarray, exog: np.ndarray | None = None) -> float:
+        y = np.ascontiguousarray(np.asarray(y, dtype=np.float64)).squeeze()
+        return _numba_predict(
+            y,
+            self.delay_,
+            self.threshold_,
+            self.intercept_below_,
+            self.coef_below_,
+            self.p_below_,
+            self.intercept_above_,
+            self.coef_above_,
+            self.p_above_,
         )
 
-        return self
+    def _validate_params(self) -> None:
+        """Validate fixed-parameter configuration for types and ranges."""
+        if self.threshold is not None:
+            if not isinstance(
+                self.threshold, (int, float, np.floating)
+            ) or not np.isfinite(self.threshold):
+                raise TypeError("threshold must be a finite real number or None")
+        if not isinstance(self.delay, int) or self.delay < 1:
+            raise TypeError("delay must be an int >= 1")
+        if isinstance(self.ar_order, int):
+            if self.ar_order < 0:
+                raise ValueError("ar_order int must be >= 0")
+        elif isinstance(self.ar_order, tuple):
+            if len(self.ar_order) != 2:
+                raise TypeError("ar_order tuple must be (p_below, p_above)")
+            pL, pR = self.ar_order
+            if not (isinstance(pL, int) and isinstance(pR, int)):
+                raise TypeError("ar_order tuple entries must be ints")
+            if pL < 0 or pR < 0:
+                raise ValueError("ar_order tuple entries must be >= 0")
+        else:
+            raise TypeError("ar_order must be int or (int, int)")
 
-    def _predict(self, y, exog=None) -> float:
-        """Predict the next step ahead given current series y."""
-        y = np.asarray(y).squeeze()
-        lagged = np.array(y[-self.ar_order :])[::-1].reshape(1, -1)
-        regime = y[-self.delay] > self.threshold_
-        return (
-            _ols_predict(lagged, self.intercept2_, self.coef2_)[0]
-            if regime
-            else _ols_predict(lagged, self.intercept1_, self.coef1_)[0]
-        )
+
+# ============================ shared Numba utilities ============================
+
+
+@njit(cache=True, fastmath=True)
+def _make_lag_matrix(y: np.ndarray, maxlag: int) -> np.ndarray:
+    """Build lag matrix with columns [y_{t-1}, ..., y_{t-maxlag}] (trim='both')."""
+    n = y.shape[0]
+    rows = n - maxlag
+    out = np.empty((rows, maxlag), dtype=np.float64)
+    for i in range(rows):
+        base = maxlag + i
+        for k in range(maxlag):
+            out[i, k] = y[base - (k + 1)]
+    return out
+
+
+@njit(cache=True, fastmath=True)
+def _prepare_design(
+    y: np.ndarray, maxlag: int, d: int
+) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """Build lagged design X, response y_resp, and threshold var z=y_{t-d} (aligned)."""
+    X_full = _make_lag_matrix(y, maxlag)
+    y_resp = y[maxlag:]
+    rows = y_resp.shape[0]
+    z = np.empty(rows, dtype=np.float64)
+    base = maxlag - d
+    for i in range(rows):
+        z[i] = y[base + i]  # y_{t-d}
+    return X_full, y_resp, z
+
+
+@njit(cache=True, fastmath=True)
+def _ols_fit_with_rss(X: np.ndarray, y: np.ndarray) -> tuple[float, np.ndarray, float]:
+    """OLS via normal equations; return (intercept, coef, rss)."""
+    n_samples, n_features = X.shape
+    Xb = np.empty((n_samples, n_features + 1), dtype=np.float64)
+    Xb[:, 0] = 1.0
+    if n_features:
+        Xb[:, 1:] = X
+    XtX = Xb.T @ Xb
+    Xty = Xb.T @ y
+    beta = np.linalg.solve(XtX, Xty)
+    pred = Xb @ beta
+    resid = y - pred
+    rss = float(resid @ resid)
+    return float(beta[0]), beta[1:], rss
+
+
+@njit(cache=True, fastmath=True)
+def _subset_rows_cols(
+    X: np.ndarray, mask_true: np.ndarray, choose_true: bool, keep_cols: int
+) -> np.ndarray:
+    """Select rows by mask and first keep_cols columns (Numba-friendly)."""
+    rows = 0
+    for i in range(mask_true.size):
+        if mask_true[i] == choose_true:
+            rows += 1
+    out = np.empty((rows, keep_cols), dtype=np.float64)
+    r = 0
+    for i in range(mask_true.size):
+        if mask_true[i] == choose_true:
+            for c in range(keep_cols):
+                out[r, c] = X[i, c]
+            r += 1
+    return out
+
+
+@njit(cache=True, fastmath=True)
+def _subset_target(
+    y: np.ndarray, mask_true: np.ndarray, choose_true: bool
+) -> np.ndarray:
+    """Select target rows by mask (Numba-friendly)."""
+    rows = 0
+    for i in range(mask_true.size):
+        if mask_true[i] == choose_true:
+            rows += 1
+    out = np.empty(rows, dtype=np.float64)
+    r = 0
+    for i in range(mask_true.size):
+        if mask_true[i] == choose_true:
+            out[r] = y[i]
+            r += 1
+    return out
+
+
+@njit(cache=True, fastmath=True)
+def _aic_value(rss: float, n_eff: int, k: int) -> float:
+    """AIC ∝ n*log(max(RSS/n, tiny)) + 2k."""
+    if n_eff <= 0:
+        return np.inf
+    sigma2 = rss / n_eff
+    if sigma2 <= 1e-300:
+        sigma2 = 1e-300
+    return n_eff * np.log(sigma2) + 2.0 * k
+
+
+@njit(cache=True, fastmath=True)
+def _numba_predict(
+    y: np.ndarray,
+    delay: int,
+    thr: float,
+    iL: float,
+    bL: np.ndarray,
+    pL: int,
+    iR: float,
+    bR: np.ndarray,
+    pR: int,
+) -> float:
+    """One-step forecast from end of y with fitted TAR params."""
+    regime_right = y[-delay] > thr
+    if regime_right:
+        if pR == 0:
+            return iR
+        val = iR
+        for j in range(pR):
+            val += bR[j] * y[-(j + 1)]
+        return val
+    else:
+        if pL == 0:
+            return iL
+        val = iL
+        for j in range(pL):
+            val += bL[j] * y[-(j + 1)]
+        return val
diff --git a/aeon/forecasting/stats/tests/test_tar.py b/aeon/forecasting/stats/tests/test_tar.py
index f38e52a885..825ac7bcdd 100644
--- a/aeon/forecasting/stats/tests/test_tar.py
+++ b/aeon/forecasting/stats/tests/test_tar.py
@@ -1,67 +1,332 @@
-"""Test the TAR forecaster."""
+"""Tests for TAR (fixed) and AutoTAR (search) forecasters."""
+
+from __future__ import annotations
+
+import math
 
 import numpy as np
 import pytest
 
-from aeon.datasets import load_airline
-from aeon.forecasting.stats._tar import TAR  # adjust if it's in another module
+from aeon.forecasting.stats import TAR, AutoTAR
+
+# --------------------------- helpers ---------------------------
+
+
+def _gen_tar_series(
+    n: int,
+    phi_L: float = 0.2,
+    phi_R: float = 0.9,
+    r: float = 0.0,
+    d: int = 1,
+    sigma: float = 1.0,
+    seed: int = 123,
+) -> np.ndarray:
+    """Generate a synthetic TAR(1,1) time series."""
+    rng = np.random.default_rng(seed)
+    y = np.zeros(n, dtype=float)
+    eps = rng.normal(scale=sigma, size=n)
+    for t in range(1, n):
+        z = y[t - d] if t - d >= 0 else -np.inf
+        phi = phi_R if z > r else phi_L
+        y[t] = phi * y[t - 1] + eps[t]
+    return y
+
+
+def _aligned_z(y: np.ndarray, delay: int, pL: int, pR: int) -> np.ndarray:
+    """Compute aligned threshold variable z_t = y_{t-d} for given (pL, pR, d)."""
+    maxlag = max(delay, pL, pR)
+    rows = y.shape[0] - maxlag
+    base = maxlag - delay
+    return y[base : base + rows]
+
+
+def _compute_trim_band(
+    y: np.ndarray, delay: int, pL: int, pR: int, trim: float
+) -> tuple[float, float]:
+    """Compute the [low, high] values of the trimmed threshold candidate band."""
+    z = _aligned_z(y, delay, pL, pR)
+    z_sorted = np.sort(z)
+    rows = z_sorted.shape[0]
+    lower = int(np.floor(trim * rows))
+    upper = rows - lower
+    lower_val = z_sorted[lower] if rows > 0 and lower < rows else -np.inf
+    upper_val = z_sorted[upper - 1] if rows > 0 and (upper - 1) >= 0 else np.inf
+    return lower_val, upper_val
+
+
+# ============================ AutoTAR (search) ============================
+
+
+def test_autotar_fit_and_basic_attrs_exist():
+    """AutoTAR fitting sets core learned attributes and forecast_."""
+    y = _gen_tar_series(500, phi_L=0.2, phi_R=0.8, r=0.0, d=1, sigma=0.8, seed=7)
+    f = AutoTAR(threshold=None, delay=None, ar_order=None, max_order=3, max_delay=3)
+    f.fit(y)
+    for attr in [
+        "threshold_",
+        "delay_",
+        "p_below_",
+        "p_above_",
+        "intercept_below_",
+        "coef_below_",
+        "intercept_above_",
+        "coef_above_",
+    ]:
+        assert hasattr(f, attr)
+    assert isinstance(f.forecast_, float)
+    assert isinstance(f.params_, dict)
+    assert f.params_["selection"]["criterion"] == "AIC"
+
+
+def test_autotar_search_ranges_respected():
+    """AutoTAR search-selected orders and delay stay within bounds."""
+    y = _gen_tar_series(500, seed=1)
+    f = AutoTAR(threshold=None, delay=None, ar_order=None, max_order=2, max_delay=2)
+    f.fit(y)
+    assert 0 <= f.p_below_ <= 2
+    assert 0 <= f.p_above_ <= 2
+    assert 1 <= f.delay_ <= 2
+
+
+def test_autotar_fixed_params_branch_respected():
+    """AutoTAR fixed threshold + fixed (pL,pR,d) branch is respected."""
+    y = _gen_tar_series(600, phi_L=0.1, phi_R=0.7, r=0.25, d=2, seed=3)
+    f = AutoTAR(threshold=0.25, delay=2, ar_order=(1, 1))
+    f.fit(y)
+    assert math.isclose(f.threshold_, 0.25, rel_tol=0, abs_tol=0)
+    assert f.delay_ == 2
+    assert f.p_below_ == 1 and f.p_above_ == 1
+
+
+def test_autotar_threshold_within_trim_band():
+    """AutoTAR learned threshold lies within the trimmed candidate range."""
+    y = _gen_tar_series(400, phi_L=0.3, phi_R=0.85, r=0.0, d=1, sigma=0.9, seed=11)
+    f = AutoTAR(
+        threshold=None,
+        delay=None,
+        ar_order=None,
+        max_order=3,
+        max_delay=3,
+        threshold_trim=0.15,
+    )
+    f.fit(y)
+    low, high = _compute_trim_band(
+        y, delay=f.delay_, pL=f.p_below_, pR=f.p_above_, trim=0.15
+    )
+    assert low <= f.threshold_ <= high
+
+
+def test_autotar_predict_matches_internal_one_step():
+    """AutoTAR one-step prediction matches forecast_ from fit."""
+    y = _gen_tar_series(150, seed=9)
+    f = AutoTAR(threshold=None, delay=None, ar_order=None)
+    f.fit(y)
+    yhat_internal = f._predict(y)
+    assert isinstance(yhat_internal, float)
+    assert np.isfinite(f.forecast_)
+    assert math.isclose(f.forecast_, yhat_internal, rel_tol=1e-12, abs_tol=1e-12)
+
+
+@pytest.mark.parametrize(
+    "phi_L,phi_R,r,d",
+    [
+        (0.1, 0.8, 0.0, 1),
+        (0.3, 0.9, 0.25, 1),
+        (0.2, 0.7, -0.2, 2),
+    ],
+)
+def test_autotar_parameter_recovery_is_reasonable(
+    phi_L: float, phi_R: float, r: float, d: int
+):
+    """AutoTAR fitted coefficients are close for synthetic TAR(1,1)."""
+    y = _gen_tar_series(600, phi_L=phi_L, phi_R=phi_R, r=r, d=d, sigma=0.8, seed=123)
+    f = AutoTAR(
+        threshold=None, delay=None, ar_order=None, max_order=1, max_delay=max(1, d)
+    )
+    f.fit(y)
+    if f.p_below_ >= 1:
+        assert abs(float(f.coef_below_[0]) - phi_L) < 0.30
+    if f.p_above_ >= 1:
+        assert abs(float(f.coef_above_[0]) - phi_R) < 0.30
+    assert 1 <= f.delay_ <= max(3, d)
+
+
+def test_autotar_max_threshold_candidates_caps_workload():
+    """AutoTAR: setting a tiny candidate cap still fits and records AIC."""
+    y = _gen_tar_series(400, seed=21)
+    f = AutoTAR(threshold=None, delay=None, ar_order=None, max_threshold_candidates=10)
+    f.fit(y)
+    assert hasattr(f, "threshold_")
+    assert np.isfinite(f.params_["selection"]["value"])
+
+
+def test_autotar_fixed_threshold_branch_and_tie_rule():
+    """AutoTAR: when threshold is fixed and z == r, tie goes to below/left regime."""
+    y = _gen_tar_series(800, seed=5)
+    delay = 2
+    thr = float(y[-delay])  # force tie at prediction time
+    f = AutoTAR(threshold=thr, delay=delay, ar_order=(1, 1))
+    f.fit(y)
+
+    y_arr = np.asarray(y, dtype=float)
+    below_pred = f.intercept_below_
+    if f.p_below_ >= 1:
+        below_pred += float(f.coef_below_[0]) * y_arr[-1]
+    if f.p_below_ >= 2:
+        below_pred += float(f.coef_below_[1]) * y_arr[-2]
+    yhat = f._predict(y)
+    assert math.isclose(yhat, below_pred, rel_tol=1e-12, abs_tol=1e-12)
+
+
+def test_autotar_none_max_threshold_candidates_uses_full_span():
+    """AutoTAR: max_threshold_candidates=None uses all trimmed candidates."""
+    y = _gen_tar_series(450, seed=17)
+    f = AutoTAR(
+        threshold=None,
+        delay=None,
+        ar_order=None,
+        max_threshold_candidates=None,
+        threshold_trim=0.1,
+    )
+    f.fit(y)
+    assert np.isfinite(f.params_["selection"]["value"])
+
+
+def test_autotar_no_valid_fit_raises():
+    """AutoTAR: if every combo is invalid (e.g., delay too large), fit raises."""
+    y = _gen_tar_series(50, seed=2)
+    f = AutoTAR(threshold=None, delay=200, ar_order=(0, 0))
+    with pytest.raises(RuntimeError):
+        f.fit(y)
+
+
+@pytest.mark.parametrize(
+    "kwargs,exc",
+    [
+        (dict(max_order=-1), TypeError),
+        (dict(max_delay=0), TypeError),
+        (dict(ar_order=(-1, 1)), ValueError),
+        (dict(ar_order=(1.5, 1)), TypeError),
+        (dict(delay=0), TypeError),
+        (dict(threshold=float("nan")), ValueError),
+        (dict(threshold="bad"), TypeError),
+        (dict(threshold_trim=0.55), ValueError),
+        (dict(min_regime_frac=0.0), ValueError),
+        (dict(min_points_offset=-1), TypeError),
+        (dict(max_threshold_candidates=0), TypeError),
+    ],
+)
+def test_autotar_validation_errors(kwargs, exc):
+    """AutoTAR: constructor parameter validation raises clear errors."""
+    y = _gen_tar_series(200, seed=3)
+    f = AutoTAR(**kwargs)
+    with pytest.raises(exc):
+        f.fit(y)
+
+
+def test_autotar_predict_accepts_list_and_ndarray():
+    """AutoTAR: _predict should accept list input and ndarray input seamlessly."""
+    y = _gen_tar_series(300, seed=19)
+    f = AutoTAR(threshold=None, delay=None, ar_order=None)
+    f.fit(y)
+    yhat1 = f._predict(y)  # ndarray
+    yhat2 = f._predict(list(y))  # list
+    assert math.isclose(yhat1, yhat2, rel_tol=0, abs_tol=0)
 
 
-@pytest.fixture
-def airline_series():
-    """Fixture to load a simple test time series."""
-    return load_airline().squeeze()
+def test_autotar_median_fallback_path_runs():
+    """AutoTAR: force trimmed span so small it hits the median-fallback path."""
+    # Make rows small relative to trim: rows ≈ n - maxlag; pick n modest, trim high.
+    y = _gen_tar_series(12, seed=42)
+    f = AutoTAR(
+        threshold=None,
+        delay=1,
+        ar_order=(1, 1),
+        threshold_trim=0.49,
+        max_order=1,
+        max_delay=1,
+    )
+    f.fit(y)  # should not crash; exercises the fallback branch
+    # The threshold should be close to the median of aligned z
+    z = _aligned_z(y, delay=f.delay_, pL=f.p_below_, pR=f.p_above_)
+    assert math.isclose(f.threshold_, float(np.median(z)), rel_tol=0, abs_tol=1e-12)
 
 
-def test_fit_forecast_output(airline_series):
-    """Test that fit runs and forecast_ is a float."""
-    model = TAR(ar_order=2, delay=1)
-    model.fit(airline_series)
-    assert isinstance(model.forecast_, float)
-    assert np.isfinite(model.forecast_)
+def test_autotar_single_candidate_path_runs():
+    """AutoTAR: force single-candidate path (m==1) in threshold search."""
+    y = _gen_tar_series(60, seed=7)
+    # Use a trim that makes a tiny span, then cap to 1 candidate
+    f = AutoTAR(
+        threshold=None,
+        delay=1,
+        ar_order=None,
+        max_order=1,
+        max_delay=1,
+        threshold_trim=0.25,
+        max_threshold_candidates=1,
+    )
+    f.fit(y)
+    assert hasattr(f, "threshold_") and np.isfinite(f.params_["selection"]["value"])
 
 
-def test_predict_output(airline_series):
-    """Test that _predict returns a float for valid input."""
-    model = TAR(ar_order=2, delay=1)
-    model.fit(airline_series)
-    pred = model._predict(airline_series[:-1])
-    assert isinstance(pred, float)
-    assert np.isfinite(pred)
+# ============================ TAR (fixed) ============================
 
 
-def test_params_structure(airline_series):
-    """Test that params_ dict has correct structure after fitting."""
-    model = TAR(ar_order=2, delay=1)
-    model.fit(airline_series)
-    params = model.params_
+def test_tar_defaults_use_median_threshold_and_ar22():
+    """TAR defaults: delay=1, ar_order=(2,2), threshold=median(z)."""
+    y = _gen_tar_series(200, seed=10)
+    f = TAR()  # defaults
+    f.fit(y)
+    assert f.delay_ == 1
+    assert f.p_below_ == 2 and f.p_above_ == 2
+    # Check threshold equals median of aligned z
+    z = _aligned_z(y, delay=f.delay_, pL=f.p_below_, pR=f.p_above_)
+    assert math.isclose(f.threshold_, float(np.median(z)), rel_tol=0, abs_tol=1e-12)
 
-    assert "threshold" in params
-    assert "regime_1" in params
-    assert "regime_2" in params
 
-    for regime in ["regime_1", "regime_2"]:
-        assert "intercept" in params[regime]
-        assert "coefficients" in params[regime]
-        assert isinstance(params[regime]["coefficients"], list)
+def test_tar_ar_int_sets_both_and_fixed_threshold():
+    """TAR: ar_order=int sets both regimes; explicit threshold respected."""
+    y = _gen_tar_series(250, seed=22)
+    thr = 0.0
+    f = TAR(threshold=thr, delay=1, ar_order=1)
+    f.fit(y)
+    assert f.p_below_ == 1 and f.p_above_ == 1
+    assert math.isclose(f.threshold_, thr, rel_tol=0, abs_tol=0)
 
 
-def test_custom_threshold(airline_series):
-    """Test that a user-defined threshold is respected."""
-    threshold = 400.0
-    model = TAR(ar_order=2, delay=1, threshold=threshold)
-    model.fit(airline_series)
-    assert model.threshold_ == threshold
+def test_tar_predict_matches_internal_one_step_and_types():
+    """TAR: one-step prediction matches forecast_ and handles list input."""
+    y = _gen_tar_series(220, seed=5)
+    f = TAR()  # defaults use median threshold, (2,2)
+    f.fit(y)
+    yhat_nd = f._predict(y)
+    yhat_list = f._predict(list(y))
+    assert math.isclose(f.forecast_, yhat_nd, rel_tol=0, abs_tol=0)
+    assert math.isclose(yhat_nd, yhat_list, rel_tol=0, abs_tol=0)
 
 
-def test_forecast_changes_with_series(airline_series):
-    """Test that modifying the series changes the forecast."""
-    model1 = TAR(ar_order=2, delay=1)
-    model2 = TAR(ar_order=2, delay=1)
+def test_tar_insufficient_data_per_regime_raises():
+    """TAR: threshold causing empty regime should raise (identifiability)."""
+    y = _gen_tar_series(120, seed=99)
+    # Choose a very large threshold so mask_R is all False → above regime empty
+    f = TAR(threshold=1e9, delay=1, ar_order=(2, 2))
+    with pytest.raises(RuntimeError):
+        f.fit(y)
 
-    model1.fit(airline_series)
-    modified_series = airline_series.copy()
-    modified_series[-1] += 100
-    model2.fit(modified_series)
 
-    assert not np.isclose(model1.forecast_, model2.forecast_)
+@pytest.mark.parametrize(
+    "kwargs,exc",
+    [
+        (dict(threshold="bad"), TypeError),
+        (dict(delay=0), TypeError),
+        (dict(ar_order=(-1, 1)), ValueError),
+        (dict(ar_order=(1,)), TypeError),
+        (dict(ar_order=(1.5, 1)), TypeError),
+    ],
+)
+def test_tar_validation_errors(kwargs, exc):
+    """TAR: validation errors for bad config."""
+    y = _gen_tar_series(120, seed=3)
+    f = TAR(**kwargs)  # type: ignore[arg-type]
+    with pytest.raises(exc):
+        f.fit(y)

From d39be96eeb434dd2d92a06b178998b01fae7315d Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Fri, 15 Aug 2025 12:09:20 +0100
Subject: [PATCH 8/9] init

---
 aeon/forecasting/stats/__init__.py | 6 +++++-
 docs/api_reference/forecasting.rst | 2 ++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/aeon/forecasting/stats/__init__.py b/aeon/forecasting/stats/__init__.py
index 79f3acab48..6f6d38c200 100644
--- a/aeon/forecasting/stats/__init__.py
+++ b/aeon/forecasting/stats/__init__.py
@@ -1,13 +1,17 @@
 """Stats based forecasters."""
 
 __all__ = [
-    "ETS",
     "ARIMA",
+    "AutoTAR",
+    "ETS",
+    "TAR",
     "Theta",
     "TVPForecaster",
 ]
 
 from aeon.forecasting.stats._arima import ARIMA
+from aeon.forecasting.stats._auto_tar import AutoTAR
 from aeon.forecasting.stats._ets import ETS
+from aeon.forecasting.stats._tar import TAR
 from aeon.forecasting.stats._theta import Theta
 from aeon.forecasting.stats._tvp import TVPForecaster
diff --git a/docs/api_reference/forecasting.rst b/docs/api_reference/forecasting.rst
index c721c969cf..780dc53944 100644
--- a/docs/api_reference/forecasting.rst
+++ b/docs/api_reference/forecasting.rst
@@ -35,5 +35,7 @@ Statistical Models
 
     ARIMA
     ETS
+    TAR
+    AutoTAR
     Theta
     TVPForecaster

From 3854af7fa73be98fd98b6748e18cad58321c795f Mon Sep 17 00:00:00 2001
From: Tony Bagnall <ajb@uea.ac.uk>
Date: Fri, 15 Aug 2025 12:59:28 +0100
Subject: [PATCH 9/9] AR Forecaster

---
 aeon/forecasting/stats/_ar.py | 282 ++++++++++++++++++++++++++++++++++
 1 file changed, 282 insertions(+)
 create mode 100644 aeon/forecasting/stats/_ar.py

diff --git a/aeon/forecasting/stats/_ar.py b/aeon/forecasting/stats/_ar.py
new file mode 100644
index 0000000000..a60d5fed62
--- /dev/null
+++ b/aeon/forecasting/stats/_ar.py
@@ -0,0 +1,282 @@
+from __future__ import annotations
+
+import numpy as np
+from numba import njit
+
+from aeon.forecasting.base import BaseForecaster, IterativeForecastingMixin
+
+
+class AR(BaseForecaster, IterativeForecastingMixin):
+    r"""Autoregressive (AR) forecaster fit by OLS.
+
+    Parameters are estimated by ordinary least squares on
+    the lag-matrix design. Optionally, the AR order can be *selected* by scanning
+    orders ``0..p_max`` with an information criterion (AIC by default), reusing
+    the same lag matrix and OLS routine for efficiency (no nonlinear optimisation).
+
+    Parameters
+    ----------
+    ar_order : int | None, default=None
+        If an ``int``, fit a fixed-order AR(p) with that order.
+        If ``None``, the order is selected by scanning ``0..p_max`` and choosing
+        the minimum information criterion (``criterion``).
+    p_max : int, default=10
+        Maximum order to consider when ``ar_order is None``.
+    criterion : {"AIC", "BIC", "AICc"}, default="AIC"
+        Information criterion for order selection when scanning.
+    demean : bool, default=True
+        If ``True``, subtract the training mean before fitting (common practice).
+        If ``False``, an intercept is estimated in OLS.
+
+    Attributes
+    ----------
+    p_ : int
+        Selected/used AR order.
+    intercept_ : float
+        Estimated intercept term (0.0 if ``demean=True``).
+    coef_ : np.ndarray of shape (p_,)
+        AR coefficients for lags 1..p_.
+    forecast_ : float
+        One-step-ahead forecast from the end of training.
+    params_ : dict
+        Snapshot including order-selection diagnostics.
+
+    Notes
+    -----
+    *Design alignment.* For a chosen maximum lag ``maxlag``, the design rows
+    correspond to times ``t = maxlag .. n-1``. For order ``p``, we use the first
+    ``p`` columns of the lag matrix ``[y_{t-1}, ..., y_{t-maxlag}]``.
+
+    The OLS implementation uses normal equations with an explicit intercept term
+    (unless ``demean=True``), returning the residual sum of squares for criterion
+    computation:
+
+    ``AIC = n_eff * log(max(RSS/n_eff, tiny)) + 2*k``, where ``k = p + 1`` if an
+    intercept is fit, else ``k = p``.
+
+    """
+
+    def __init__(
+        self,
+        ar_order: int | None = None,
+        *,
+        p_max: int = 10,
+        criterion: str = "AIC",
+        demean: bool = True,
+    ) -> None:
+        self.ar_order = ar_order
+        self.p_max = p_max
+        self.criterion = criterion
+        self.demean = demean
+        super().__init__(horizon=1, axis=1)
+
+    # ---------------------------------------------------------------------
+    # aeon required internals
+    # ---------------------------------------------------------------------
+    def _fit(self, y: np.ndarray, exog: np.ndarray | None = None) -> AR:
+        self._validate_params()
+        y = np.asarray(y, dtype=np.float64).squeeze()
+        if y.ndim != 1:
+            raise ValueError("y must be a 1D array-like")
+        y = np.ascontiguousarray(y)
+        n = y.shape[0]
+
+        # centring (if requested)
+        if self.demean:
+            self._y_mean_ = float(y.mean())
+            yc = y - self._y_mean_
+            fit_intercept = False
+        else:
+            self._y_mean_ = 0.0
+            yc = y
+            fit_intercept = True
+
+        # Resolve order and build lag matrix up to needed maxlag
+        if self.ar_order is None:
+            if self.p_max < 0:
+                raise ValueError("p_max must be >= 0 when ar_order=None")
+            maxlag = int(self.p_max)
+        else:
+            if self.ar_order < 0:
+                raise ValueError("ar_order must be >= 0")
+            maxlag = int(self.ar_order)
+
+        if n <= maxlag:
+            raise RuntimeError(
+                f"Not enough observations (n={n}) for maxlag={maxlag}. Provide more "
+                f"data or lower order."
+            )
+
+        X_full = _make_lag_matrix(yc, maxlag)  # shape (rows, maxlag)
+        y_resp = yc[maxlag:]
+        rows = y_resp.shape[0]
+
+        # If fixed order
+        if self.ar_order is not None:
+            p = int(self.ar_order)
+            if p == 0:
+                # intercept-only (if fit_intercept) or mean-zero (demeaned)
+                i, b, rss = _ols_fit_with_rss(
+                    np.empty((rows, 0)), y_resp, fit_intercept
+                )
+            else:
+                i, b, rss = _ols_fit_with_rss(X_full[:, :p], y_resp, fit_intercept)
+            self.p_ = p
+            self.intercept_ = float(i)
+            self.coef_ = np.ascontiguousarray(b, dtype=np.float64)
+            crit_value = _criterion_value(self.criterion, rss, rows, p, fit_intercept)
+            self.params_ = {
+                "selection": {
+                    "mode": "fixed",
+                    "criterion": self.criterion,
+                    "value": float(crit_value),
+                },
+                "order": int(self.p_),
+            }
+        else:
+            # Scan 0..p_max using shared design and OLS
+            best = (
+                np.inf,
+                0,
+                0.0,
+                np.zeros(0, dtype=np.float64),
+                np.inf,
+            )  # (crit, p, i, b, rss)
+            for p in range(0, maxlag + 1):
+                if p == 0:
+                    i, b, rss = _ols_fit_with_rss(
+                        np.empty((rows, 0)), y_resp, fit_intercept
+                    )
+                else:
+                    i, b, rss = _ols_fit_with_rss(X_full[:, :p], y_resp, fit_intercept)
+                crit = _criterion_value(self.criterion, rss, rows, p, fit_intercept)
+                if crit < best[0]:
+                    best = (crit, p, i, b.copy(), rss)
+            crit, p, i, b, rss = best
+            self.p_ = int(p)
+            self.intercept_ = float(i)
+            self.coef_ = np.ascontiguousarray(b, dtype=np.float64)
+            self.params_ = {
+                "selection": {
+                    "mode": "scan",
+                    "criterion": self.criterion,
+                    "value": float(crit),
+                    "p_max": int(maxlag),
+                },
+                "order": int(self.p_),
+            }
+
+        # one-step forecast from end of training
+        self.forecast_ = _ar_predict(yc, self.intercept_, self.coef_, self.p_)
+        return self
+
+    def _predict(self, y: np.ndarray, exog: np.ndarray | None = None) -> float:
+        y = np.asarray(y, dtype=np.float64).squeeze()
+        if y.ndim != 1:
+            raise ValueError("y must be a 1D array-like")
+        # apply the same centring used in fit
+        yc = y - self._y_mean_
+        return _ar_predict(yc, self.intercept_, self.coef_, self.p_)
+
+    # ---------------------------------------------------------------------
+    # validation helpers
+    # ---------------------------------------------------------------------
+    def _validate_params(self) -> None:
+        if self.ar_order is not None and not isinstance(self.ar_order, int):
+            raise TypeError("ar_order must be an int or None")
+        if not isinstance(self.p_max, int) or self.p_max < 0:
+            raise TypeError("p_max must be a non-negative int")
+        if self.criterion not in {"AIC", "BIC", "AICc"}:
+            raise ValueError("criterion must be one of {'AIC','BIC','AICc'}")
+        if not isinstance(self.demean, (bool, np.bool_)):
+            raise TypeError("demean must be a bool")
+
+
+# ============================ shared Numba utilities ============================
+
+
+@njit(cache=True, fastmath=True)
+def _make_lag_matrix(y: np.ndarray, maxlag: int) -> np.ndarray:
+    """Lag matrix with columns [y_{t-1}, ..., y_{t-maxlag}] (trim='both')."""
+    n = y.shape[0]
+    rows = n - maxlag
+    out = np.empty((rows, maxlag), dtype=np.float64)
+    for i in range(rows):
+        base = maxlag + i
+        for k in range(maxlag):
+            out[i, k] = y[base - (k + 1)]
+    return out
+
+
+@njit(cache=True, fastmath=True)
+def _ols_fit_with_rss(
+    X: np.ndarray, y: np.ndarray, fit_intercept: bool
+) -> tuple[float, np.ndarray, float]:
+    """OLS via normal equations; return (intercept, coef, rss).
+
+    If ``fit_intercept`` is ``False``, the intercept is forced to 0 and the
+    returned value is 0.0. Coefficients will have shape ``(n_features,)``.
+    """
+    n_samples, n_features = X.shape
+
+    if fit_intercept:
+        Xb = np.empty((n_samples, n_features + 1), dtype=np.float64)
+        Xb[:, 0] = 1.0
+        if n_features:
+            Xb[:, 1:] = X
+        XtX = Xb.T @ Xb
+        Xty = Xb.T @ y
+        beta = np.linalg.solve(XtX, Xty)
+        pred = Xb @ beta
+        resid = y - pred
+        rss = float(resid @ resid)
+        return float(beta[0]), beta[1:], rss
+    else:
+        if n_features:
+            XtX = X.T @ X
+            Xty = X.T @ y
+            beta = np.linalg.solve(XtX, Xty)
+            pred = X @ beta
+            resid = y - pred
+            rss = float(resid @ resid)
+            return 0.0, beta, rss
+        else:
+            # no features, no intercept: y is mean-zero => model predicts 0
+            rss = float(y @ y)
+            return 0.0, np.zeros(0, dtype=np.float64), rss
+
+
+@njit(cache=True, fastmath=True)
+def _criterion_value(
+    name: str, rss: float, n_eff: int, p: int, fit_intercept: bool
+) -> float:
+    """Compute AIC/BIC/AICc from RSS for an AR(p) regression.
+
+    k = number of free parameters = p + (1 if fit_intercept else 0)
+    """
+    if n_eff <= 0:
+        return np.inf
+    sigma2 = rss / n_eff
+    if sigma2 <= 1e-300:
+        sigma2 = 1e-300
+
+    k = p + (1 if fit_intercept else 0)
+
+    if name == "AIC":
+        return n_eff * np.log(sigma2) + 2.0 * k
+    elif name == "BIC":
+        return n_eff * np.log(sigma2) + k * np.log(max(2, n_eff))
+    else:  # AICc
+        denom = max(1.0, (n_eff - k - 1))
+        return n_eff * np.log(sigma2) + 2.0 * k + (2.0 * k * (k + 1)) / denom
+
+
+@njit(cache=True, fastmath=True)
+def _ar_predict(y: np.ndarray, intercept: float, coef: np.ndarray, p: int) -> float:
+    """One-step-ahead forecast from the end of ``y`` for an AR(p) model."""
+    if p == 0:
+        return intercept
+    val = intercept
+    for j in range(p):
+        val += coef[j] * y[-(j + 1)]
+    return val